pluribus-context 0.3.35 → 0.3.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/README.md +1 -1
  3. package/bin/pluribus.js +12 -0
  4. package/docs/agent-firewall-denial-audit.md +95 -0
  5. package/docs/ai-pr-review-receipts.md +20 -0
  6. package/docs/compaction-resume-receipts.md +43 -0
  7. package/docs/controlled-learning-queue.md +48 -0
  8. package/docs/install-plan-receipts.md +2 -0
  9. package/docs/loaded-resource-boundary.md +97 -0
  10. package/docs/memory-write-policy-receipts.md +41 -0
  11. package/docs/parallel-session-review-ledger.md +103 -0
  12. package/docs/phase-boundary-contracts.md +87 -0
  13. package/docs/review-primitive-gate.md +2 -0
  14. package/docs/skill-install-receipts.md +102 -0
  15. package/docs/skill-use-rate-receipts.md +104 -0
  16. package/examples/agent-firewall-denial-audit/README.md +14 -0
  17. package/examples/agent-firewall-denial-audit/check-denial-audit.mjs +116 -0
  18. package/examples/agent-firewall-denial-audit/denial-envelope.json +9 -0
  19. package/examples/agent-firewall-denial-audit/operator-audit-record.json +20 -0
  20. package/examples/ai-pr-review-receipts/.github/workflows/ai-pr-review-receipt.yml +25 -0
  21. package/examples/ai-pr-review-receipts/README.md +51 -1
  22. package/examples/ai-pr-review-receipts/incomplete-review-primitive-receipt.json +43 -0
  23. package/examples/ai-pr-review-receipts/review-primitive-receipt.json +60 -0
  24. package/examples/compaction-resume-receipts/README.md +12 -0
  25. package/examples/compaction-resume-receipts/check-resume-receipt.mjs +116 -0
  26. package/examples/compaction-resume-receipts/safe-resume-receipt.json +52 -0
  27. package/examples/compaction-resume-receipts/unsafe-resume-receipt.json +41 -0
  28. package/examples/controlled-learning-queue/README.md +26 -0
  29. package/examples/controlled-learning-queue/check-learning-queue.mjs +44 -0
  30. package/examples/controlled-learning-queue/leads/acme-job-card.md +12 -0
  31. package/examples/controlled-learning-queue/learning_queue.md +27 -0
  32. package/examples/controlled-learning-queue/memory/durable.md +10 -0
  33. package/examples/controlled-learning-queue/memory/working-notes.md +5 -0
  34. package/examples/controlled-learning-queue/role/job-contract.md +18 -0
  35. package/examples/controlled-learning-queue/skills/qualify-lead.md +17 -0
  36. package/examples/loaded-resource-boundary/README.md +22 -0
  37. package/examples/loaded-resource-boundary/check-loaded-resource-boundary.mjs +65 -0
  38. package/examples/loaded-resource-boundary/loaded-resource-boundary.json +69 -0
  39. package/examples/memory-write-policy/README.md +28 -0
  40. package/examples/memory-write-policy/approved-memory-update.json +48 -0
  41. package/examples/memory-write-policy/check-memory-update.mjs +120 -0
  42. package/examples/memory-write-policy/quarantined-memory-update.json +43 -0
  43. package/examples/parallel-session-review-ledger/README.md +13 -0
  44. package/examples/parallel-session-review-ledger/check-parallel-session-review-ledger.mjs +69 -0
  45. package/examples/parallel-session-review-ledger/parallel-session-review-ledger.json +72 -0
  46. package/examples/phase-boundary-contract/README.md +23 -0
  47. package/examples/phase-boundary-contract/check-phase-boundary.mjs +73 -0
  48. package/examples/phase-boundary-contract/phase-boundary-contract.json +68 -0
  49. package/examples/skill-install-receipts/README.md +31 -0
  50. package/examples/skill-install-receipts/check-skill-install-receipt.mjs +75 -0
  51. package/examples/skill-install-receipts/skill-install-receipt.json +79 -0
  52. package/examples/skill-use-rate-receipts/README.md +16 -0
  53. package/examples/skill-use-rate-receipts/check-skill-use-rate.mjs +89 -0
  54. package/examples/skill-use-rate-receipts/skill-use-rate-receipt.json +79 -0
  55. package/package.json +1 -1
  56. package/src/commands/demo.js +155 -0
  57. package/src/index.js +1 -0
  58. package/src/utils/version.js +1 -1
@@ -0,0 +1,87 @@
1
+ # Phase-boundary contracts for multi-model coding workflows
2
+
3
+ Use this when a coding workflow routes work through phases such as **Explore → Propose → Spec → Design → Tasks → Apply → Verify**, especially when different tools or models handle different phases.
4
+
5
+ The problem is not only “which model is best for this step”. The failure mode is handoff: a plan agent burns context, a build agent receives a lossy summary, a verifier cannot tell which decisions are current, and stale assumptions leak from one phase into the next.
6
+
7
+ A phase-boundary contract makes every transition explicit:
8
+
9
+ - what input context was allowed into the phase;
10
+ - what artifact the phase had to produce;
11
+ - what evidence is required before the next phase may start;
12
+ - what context must not be carried forward;
13
+ - which stop conditions require human review or a fresh phase run.
14
+
15
+ This keeps Pluribus out of the orchestration layer. The workflow runner can be OpenCode, Claude Code, Cursor, OpenClaw, Codex, a local script, or a human checklist. Pluribus supplies the evidence shape.
16
+
17
+ ## Contract shape
18
+
19
+ ```json
20
+ {
21
+ "schema": "pluribus.phase-boundary-contract.v1",
22
+ "workflowId": "checkout-refactor-2026-06-03",
23
+ "currentPhase": "apply",
24
+ "nextPhase": "verify",
25
+ "allowedInput": [
26
+ {
27
+ "kind": "approved_plan",
28
+ "ref": "plans/checkout-refactor.md",
29
+ "contentHash": "sha256:...",
30
+ "required": true
31
+ }
32
+ ],
33
+ "outputArtifact": {
34
+ "kind": "patch",
35
+ "ref": "git:working-tree",
36
+ "contentHash": "sha256:..."
37
+ },
38
+ "evidenceGate": {
39
+ "requiredBeforeNextPhase": ["changed_files", "tests_run", "open_risks", "stop_conditions"],
40
+ "status": "pass"
41
+ },
42
+ "droppedContext": [
43
+ {
44
+ "kind": "exploration_transcript",
45
+ "reason": "not authoritative after approved plan"
46
+ }
47
+ ],
48
+ "stopConditions": []
49
+ }
50
+ ```
51
+
52
+ ## Minimum fields
53
+
54
+ | Field | Why it exists |
55
+ | --- | --- |
56
+ | `workflowId` | Correlates phase records without storing a transcript. |
57
+ | `currentPhase` / `nextPhase` | Makes the handoff boundary explicit. |
58
+ | `allowedInput[]` | Prevents the next model from inheriting stale scratch context accidentally. |
59
+ | `outputArtifact` | Names the thing this phase produced: plan, spec, task list, patch, review, or verification report. |
60
+ | `evidenceGate.requiredBeforeNextPhase[]` | Forces the phase to prove the minimum facts the next phase depends on. |
61
+ | `droppedContext[]` | Records what intentionally did **not** cross the boundary. |
62
+ | `stopConditions[]` | Lets the workflow stop instead of laundering uncertainty into the next model. |
63
+
64
+ ## Apply → Verify is the strictest boundary
65
+
66
+ For coding workflows, the most useful hard gate is often between Apply and Verify. The verifier should receive a compact evidence packet, not a vague “I implemented it” summary:
67
+
68
+ - decision implemented;
69
+ - source/plan hash used;
70
+ - changed files or file-set hash;
71
+ - tests/commands run with pass/fail state;
72
+ - open risks and skipped checks;
73
+ - whether secrets, schema migrations, data writes, or external calls were touched;
74
+ - explicit stop condition if verification cannot be trusted.
75
+
76
+ ## Privacy boundary
77
+
78
+ Do not put raw source, prompts, transcripts, secrets, full command output, absolute local paths, or customer data in the contract. Use stable refs, hashes, counts, risk classes, and short non-secret labels.
79
+
80
+ ## Try it
81
+
82
+ ```bash
83
+ cd examples/phase-boundary-contract
84
+ node check-phase-boundary.mjs phase-boundary-contract.json
85
+ ```
86
+
87
+ The checker is intentionally small. It is a copyable acceptance test for workflow builders: if a phase handoff cannot pass this gate, the next model should not pretend it has reliable state.
@@ -77,6 +77,8 @@ The copyable demo in [`examples/review-primitive-gate/`](../examples/review-prim
77
77
 
78
78
  If you use Claude Code hooks, the [`examples/claude-code-review-hook/`](../examples/claude-code-review-hook/) bridge shows how to run the same gate from `TaskCompleted`, `PostCompact`, or `SessionEnd` without logging raw prompts, transcripts, tool output, source code, or secrets.
79
79
 
80
+ If you review AI-authored pull requests, the [`examples/ai-pr-review-receipts/`](../examples/ai-pr-review-receipts/) recipe shows the same gate as a GitHub Actions merge/check primitive for PR blast-radius evidence.
81
+
80
82
  ```bash
81
83
  node examples/review-primitive-gate/check-review-receipt.mjs \
82
84
  examples/review-primitive-gate/pass-review-receipt.json
@@ -0,0 +1,102 @@
1
+ # Skill install/load receipts
2
+
3
+ Privacy-safe receipts for answering a narrow setup question:
4
+
5
+ > After a skill installer ran, which agent targets can actually discover and load the installed skill, and what context budget did the install create?
6
+
7
+ This is not a skill marketplace, package manager, or telemetry backend. Use this receipt next to tools such as `npx skills add`, team setup scripts, Claude Code plugins, Codex/Cursor/OpenClaw skill folders, or internal bootstrap scripts when the risky part is crossing several boundaries at once:
8
+
9
+ 1. the installer selected a source package/ref;
10
+ 2. files were written into project or global skill roots;
11
+ 3. each target agent discovered the installed manifest/resource;
12
+ 4. the runtime either injected/read the skill on activation or deferred/skipped it; and
13
+ 5. the new always-loaded or advertised context cost did not make the first session unsafe.
14
+
15
+ The receipt should stay reviewable without raw skill bodies, private paths, prompts, transcripts, environment dumps, secrets, tool outputs, or customer data.
16
+
17
+ ## When to use it
18
+
19
+ Use a skill install/load receipt when:
20
+
21
+ - a setup command installs the same Skill into Claude Code, Codex, Cursor, OpenClaw, Zed/ACP, or another agent client;
22
+ - a plugin/installer claims "cross-agent" support but you need proof per target;
23
+ - a user says the Skill exists on disk but the runtime does not use it;
24
+ - an installer adds MCP, hooks, rules, commands, or skill folders and could increase startup context cost; or
25
+ - CI/review needs a compact proof that install succeeded without dumping the installed content.
26
+
27
+ For mutation planning before any writes begin, use [install-plan receipts](install-plan-receipts.md). For runtime-only debugging where the file already exists but disappears in ACP/Zed/CLI/chat, use [loaded-resource boundary receipts](loaded-resource-boundary.md). This receipt sits between them: installer result + per-target discovery/load/budget proof.
28
+
29
+ ## Minimum contract
30
+
31
+ ```json
32
+ {
33
+ "receipt_type": "agent.skill_install_receipt.v1",
34
+ "run_id": "skill-install-demo-001",
35
+ "installer": {
36
+ "name": "skills-cli",
37
+ "command_class": "skill_package_install",
38
+ "source": {
39
+ "kind": "git_ref",
40
+ "package": "vercel-labs/skills/context-budget-preflight",
41
+ "ref": "sha256:source-package-hash"
42
+ }
43
+ },
44
+ "mode_effective": "post_install_check",
45
+ "writes_completed": true,
46
+ "targets": [
47
+ {
48
+ "agent": "claude-code",
49
+ "scope": "project",
50
+ "required": true,
51
+ "install_status": "installed",
52
+ "discovery_status": "discovered",
53
+ "load_status": "activation_required",
54
+ "activation": "on_demand_skill_description",
55
+ "context_cost_bucket": "0-1k",
56
+ "safe_to_start_session": true
57
+ }
58
+ ],
59
+ "overall_safe_to_start_session": true,
60
+ "privacy_exclusions": ["raw_skill_body", "raw_prompt", "transcript", "secrets", "env_dump", "private_absolute_path"]
61
+ }
62
+ ```
63
+
64
+ ## Fields that matter most
65
+
66
+ - `installer.source` — package/ref/hash identity without embedded credentials.
67
+ - `targets[].agent` and `targets[].scope` — which client and project/global root were targeted.
68
+ - `targets[].install_status` — `installed`, `skipped`, or `failed`.
69
+ - `targets[].discovery_status` — whether the target client can discover the installed manifest/resource.
70
+ - `targets[].load_status` — `injected`, `readable`, `activation_required`, `deferred`, `not_tested`, or `failed`.
71
+ - `targets[].context_cost_bucket` — coarse estimate such as `0-1k`, `1k-5k`, `5k-20k`, `over_budget`, or `unknown`; do not log raw schemas or skill text.
72
+ - `targets[].safe_to_start_session` — false if a required target failed install/discovery, the runtime load test failed, or budget is already over cap.
73
+ - `overall_safe_to_start_session` — false unless every required target is safe.
74
+
75
+ ## Copyable smoke test
76
+
77
+ ```bash
78
+ node examples/skill-install-receipts/check-skill-install-receipt.mjs \
79
+ examples/skill-install-receipts/skill-install-receipt.json
80
+ ```
81
+
82
+ Expected output:
83
+
84
+ ```text
85
+ skill install receipt ok: 3 targets checked
86
+ ```
87
+
88
+ ## What this proves / does not prove
89
+
90
+ Proves:
91
+
92
+ - the installer wrote or skipped the intended targets;
93
+ - each required target has an explicit discovery/load status;
94
+ - context cost is bucketed before a session starts; and
95
+ - raw skill/source content stayed out of the receipt.
96
+
97
+ Does not prove:
98
+
99
+ - the Skill is semantically good;
100
+ - the agent will choose the Skill for every matching task;
101
+ - the source package is trustworthy beyond the pinned ref/hash; or
102
+ - runtime behavior in clients not listed in `targets`.
@@ -0,0 +1,104 @@
1
+ # Skill use-rate receipts
2
+
3
+ Agent Skill installers are getting good at the first boundary: download a Skill and attach it to Claude Code, Cursor, Codex, OpenCode, or another harness. The next boundary is harder: **installed is not used**.
4
+
5
+ A skill use-rate receipt is a privacy-safe record that separates four states:
6
+
7
+ 1. **Discovered** — the installer/catalog found the Skill.
8
+ 2. **Installed/attached** — files or symlinks were written for a target agent/scope.
9
+ 3. **Invoked** — a session actually selected or loaded the Skill.
10
+ 4. **Useful enough to keep** — the Skill affected a reviewed action, check, or decision in a defined window.
11
+
12
+ This matters when a team installs many Skills, plugins, commands, or subagents and later cannot tell which ones are just prompt clutter. The receipt should prove lifecycle state and usage counters without logging raw Skill bodies, prompts, source code, transcripts, or tool output.
13
+
14
+ ## Minimal receipt shape
15
+
16
+ ```json
17
+ {
18
+ "schema": "pluribus.skill_use_rate_receipt.v1",
19
+ "run_id": "skills-audit-2026-06-05T13:00Z",
20
+ "generated_at": "2026-06-05T13:00:00Z",
21
+ "installer": {
22
+ "name": "skills",
23
+ "version": "1.5.9",
24
+ "command_digest": "sha256:..."
25
+ },
26
+ "window": {
27
+ "started_at": "2026-05-22T00:00:00Z",
28
+ "ended_at": "2026-06-05T13:00:00Z"
29
+ },
30
+ "skills": [
31
+ {
32
+ "skill_id": "frontend-design",
33
+ "source_ref": "github:vercel-labs/agent-skills/skills/frontend-design@main",
34
+ "target_agent": "claude-code",
35
+ "scope": "project",
36
+ "install_method": "symlink",
37
+ "discovered": true,
38
+ "installed": true,
39
+ "attached": true,
40
+ "invoked_count": 7,
41
+ "acted_on_count": 3,
42
+ "last_invoked_at": "2026-06-05T10:12:08Z",
43
+ "unused_since_install": false,
44
+ "context_cost_bucket": "small",
45
+ "evidence": [
46
+ {
47
+ "kind": "session_log_digest",
48
+ "ref": "sha256:0b7d..."
49
+ }
50
+ ]
51
+ }
52
+ ]
53
+ }
54
+ ```
55
+
56
+ ## Evaluation questions
57
+
58
+ Use this receipt to ask:
59
+
60
+ - Which Skills are installed but never discovered by the harness?
61
+ - Which Skills are discoverable but never invoked?
62
+ - Which Skills are invoked but never acted on?
63
+ - Which Skills are globally installed but only useful in one project?
64
+ - Which Skills should be detached, narrowed, or promoted to a hard policy/check?
65
+
66
+ ## Privacy boundary
67
+
68
+ Do record:
69
+
70
+ - source refs and commit/tag when available;
71
+ - target agent and scope;
72
+ - install method (`copy`, `symlink`, generated file, ephemeral use);
73
+ - boolean lifecycle states;
74
+ - invocation and acted-on counters;
75
+ - timestamps, hashes, and non-sensitive evidence refs.
76
+
77
+ Do **not** record:
78
+
79
+ - full Skill Markdown bodies;
80
+ - raw user prompts or transcripts;
81
+ - source code or tool output;
82
+ - private file paths beyond a reviewed alias;
83
+ - secrets, tokens, customer data, or unredacted environment values.
84
+
85
+ ## Copyable checker
86
+
87
+ The [skill use-rate receipt example](../examples/skill-use-rate-receipts/) includes a small checker that validates required lifecycle fields and prints installed-but-unused Skills as review warnings rather than pretending installation equals adoption.
88
+
89
+ ```bash
90
+ node examples/skill-use-rate-receipts/check-skill-use-rate.mjs \
91
+ examples/skill-use-rate-receipts/skill-use-rate-receipt.json
92
+ ```
93
+
94
+ Expected output:
95
+
96
+ ```text
97
+ skill use-rate receipt ok: 3 skills checked, 1 unused install warning
98
+ ```
99
+
100
+ ## Where this fits
101
+
102
+ This is adjacent to [Skill install/load receipts](skill-install-receipts.md), but it answers a different question. Install/load receipts decide whether it is safe to start a session after an installer runs. Skill use-rate receipts decide whether a Skill actually earned its place after real sessions.
103
+
104
+ The market signal behind this is current Skill/plugin consolidation pressure: teams can install many prompt resources, but the useful metric is not package count. It is `invoked / installed` and, when possible, `acted_on / invoked` over a reviewable window.
@@ -0,0 +1,14 @@
1
+ # Agent firewall denial/audit example
2
+
3
+ This example turns an agent-firewall block into two privacy-safe artifacts:
4
+
5
+ - `denial-envelope.json` — what the model is allowed to see.
6
+ - `operator-audit-record.json` — what the operator/dashboard/CI can audit.
7
+
8
+ Run the checker:
9
+
10
+ ```bash
11
+ node check-denial-audit.mjs .
12
+ ```
13
+
14
+ The checker enforces the core invariant: blocks should be structured enough for the model to stop or ask for approval, but not detailed enough to reveal raw commands, secrets, private paths, or bypassable policy internals.
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/env node
2
+ import fs from 'node:fs';
3
+ import path from 'node:path';
4
+
5
+ const dir = process.argv[2] || new URL('.', import.meta.url).pathname;
6
+ const envelopePath = path.join(dir, 'denial-envelope.json');
7
+ const auditPath = path.join(dir, 'operator-audit-record.json');
8
+ const envelope = JSON.parse(fs.readFileSync(envelopePath, 'utf8'));
9
+ const audit = JSON.parse(fs.readFileSync(auditPath, 'utf8'));
10
+ const errors = [];
11
+
12
+ const secretLike = /(api[_-]?key|secret|password|token\s*[:=]|-----BEGIN|bearer\s+[a-z0-9._-]+|raw transcript|verbatim customer|full email)/i;
13
+ const rawCommandLike = /\b(rm\s+-rf|git\s+push|git\s+reset|npm\s+publish|curl\s+https?:|gh\s+(issue|pr)\s+(create|edit))\b/i;
14
+ const absolutePathLike = /(^|["'\s])\/(home|Users|var|etc|tmp)\/[\w./-]+/i;
15
+ const sha256Like = /^sha256:[a-f0-9]{64}$/;
16
+ const allowedReasonClasses = new Set([
17
+ 'destructive_git',
18
+ 'filesystem_write_out_of_scope',
19
+ 'outbound_after_secret_read',
20
+ 'credential_exposure_risk',
21
+ 'package_publish_requires_approval',
22
+ 'unknown_policy_boundary'
23
+ ]);
24
+ const retrySafety = new Set(['safe_to_retry', 'unsafe_until_approved', 'unsafe_do_not_retry']);
25
+
26
+ function requireString(object, field, label) {
27
+ if (typeof object[field] !== 'string' || object[field].trim() === '') {
28
+ errors.push(`${label}: missing ${field}`);
29
+ return '';
30
+ }
31
+ return object[field];
32
+ }
33
+
34
+ function inspectModelVisible(value, prefix = 'envelope') {
35
+ if (typeof value === 'string') {
36
+ if (secretLike.test(value)) errors.push(`${prefix}: possible secret/private payload leak`);
37
+ if (rawCommandLike.test(value)) errors.push(`${prefix}: raw command leaked to model-visible denial`);
38
+ if (absolutePathLike.test(value)) errors.push(`${prefix}: absolute private path leaked to model-visible denial`);
39
+ return;
40
+ }
41
+ if (Array.isArray(value)) {
42
+ value.forEach((item, index) => inspectModelVisible(item, `${prefix}[${index}]`));
43
+ return;
44
+ }
45
+ if (value && typeof value === 'object') {
46
+ for (const [key, item] of Object.entries(value)) {
47
+ if (/raw(command|input|prompt|policy|file|content)|secret|token|password/i.test(key)) {
48
+ errors.push(`${prefix}.${key}: forbidden model-visible field`);
49
+ }
50
+ inspectModelVisible(item, `${prefix}.${key}`);
51
+ }
52
+ }
53
+ }
54
+
55
+ if (envelope.type !== 'agent_firewall_denial.v1') errors.push('envelope: wrong type');
56
+ if (audit.type !== 'agent_firewall_operator_audit.v1') errors.push('audit: wrong type');
57
+ if (envelope.decision !== 'blocked') errors.push('envelope: decision must be blocked');
58
+ if (audit.decision !== 'blocked') errors.push('audit: decision must be blocked');
59
+
60
+ const correlationId = requireString(envelope, 'correlationId', 'envelope');
61
+ if (correlationId !== audit.correlationId) errors.push('audit: correlationId does not match envelope');
62
+
63
+ const reasonClass = requireString(envelope, 'reasonClass', 'envelope');
64
+ if (reasonClass && !allowedReasonClasses.has(reasonClass)) {
65
+ errors.push(`envelope: unknown or too-specific reasonClass ${reasonClass}`);
66
+ }
67
+
68
+ const alternative = requireString(envelope, 'safeAlternative', 'envelope');
69
+ if (alternative.length > 240) errors.push('envelope: safeAlternative should stay short');
70
+ if (typeof envelope.requiresApproval !== 'boolean') errors.push('envelope: requiresApproval must be boolean');
71
+ if (!retrySafety.has(envelope.retrySafety)) errors.push('envelope: invalid retrySafety');
72
+ inspectModelVisible(envelope);
73
+
74
+ if (!['Bash', 'Edit', 'Write', 'WebFetch', 'MCP', 'Task', 'Agent'].includes(audit.tool)) {
75
+ errors.push('audit: unexpected or missing tool class');
76
+ }
77
+ if (!sha256Like.test(audit.commandHash || '')) errors.push('audit: commandHash must be sha256:<64 hex>');
78
+ if (!sha256Like.test(audit.cwdHash || '')) errors.push('audit: cwdHash must be sha256:<64 hex>');
79
+ if (!Array.isArray(audit.matchedPolicyIds) || audit.matchedPolicyIds.length === 0) {
80
+ errors.push('audit: matchedPolicyIds must be a non-empty array');
81
+ }
82
+ if (!audit.sessionTaint || typeof audit.sessionTaint !== 'object') errors.push('audit: missing sessionTaint object');
83
+ if (!audit.approval || typeof audit.approval !== 'object') errors.push('audit: missing approval object');
84
+ if (!retrySafety.has(audit.retrySafety)) errors.push('audit: invalid retrySafety');
85
+ if (typeof audit.modelEnvelopeHash !== 'string' || !sha256Like.test(audit.modelEnvelopeHash)) {
86
+ errors.push('audit: modelEnvelopeHash must be sha256:<64 hex>');
87
+ }
88
+
89
+ function inspectAuditValues(value, prefix = 'audit') {
90
+ if (typeof value === 'string') {
91
+ if (/(api[_-]?key\s*[:=]|secret\s*[:=]|password\s*[:=]|token\s*[:=]|-----BEGIN|bearer\s+[a-z0-9._-]+|raw transcript|verbatim customer|full email)/i.test(value)) {
92
+ errors.push(`${prefix}: possible raw secret/private payload`);
93
+ }
94
+ return;
95
+ }
96
+ if (Array.isArray(value)) {
97
+ value.forEach((item, index) => inspectAuditValues(item, `${prefix}[${index}]`));
98
+ return;
99
+ }
100
+ if (value && typeof value === 'object') {
101
+ for (const [key, item] of Object.entries(value)) inspectAuditValues(item, `${prefix}.${key}`);
102
+ }
103
+ }
104
+
105
+ inspectAuditValues(audit);
106
+ if ('rawCommand' in audit || 'rawPrompt' in audit || 'rawPolicy' in audit || 'rawFileContent' in audit) {
107
+ errors.push('audit: raw private payload fields are not allowed');
108
+ }
109
+
110
+ if (errors.length) {
111
+ console.error(`agent firewall denial/audit failed (${errors.length}):`);
112
+ for (const error of errors) console.error(`- ${error}`);
113
+ process.exit(1);
114
+ }
115
+
116
+ console.log(`agent firewall denial/audit ok: ${correlationId}, ${reasonClass}, ${audit.matchedPolicyIds.length} policy id(s)`);
@@ -0,0 +1,9 @@
1
+ {
2
+ "type": "agent_firewall_denial.v1",
3
+ "decision": "blocked",
4
+ "reasonClass": "destructive_git",
5
+ "requiresApproval": true,
6
+ "safeAlternative": "Explain the planned git operation and wait for explicit approval.",
7
+ "retrySafety": "unsafe_until_approved",
8
+ "correlationId": "deny_2026_06_02_2200_7f3a"
9
+ }
@@ -0,0 +1,20 @@
1
+ {
2
+ "type": "agent_firewall_operator_audit.v1",
3
+ "decision": "blocked",
4
+ "correlationId": "deny_2026_06_02_2200_7f3a",
5
+ "tool": "Bash",
6
+ "commandHash": "sha256:0e5751c026e543b2a6f2b4d7a7c8d8e5b81b69c5b9f7db2a5b94f31f987e7f44",
7
+ "cwdHash": "sha256:dcdb704109a454784b81229d2b05f368692e758bfa33cb61d04c1b93791b0273",
8
+ "matchedPolicyIds": ["git.destructive.requires_approval"],
9
+ "sessionTaint": {
10
+ "secretRead": false,
11
+ "privateFileRead": true,
12
+ "networkAccessed": false
13
+ },
14
+ "approval": {
15
+ "state": "missing",
16
+ "requiredFrom": "operator"
17
+ },
18
+ "retrySafety": "unsafe_until_approved",
19
+ "modelEnvelopeHash": "sha256:a1bcaa1cb2572ab0e735c30062a268391d0a9d1b3dd7ff4b14065d8b29513b2a"
20
+ }
@@ -0,0 +1,25 @@
1
+ name: AI PR review receipt gate
2
+
3
+ on:
4
+ pull_request:
5
+ types: [opened, synchronize, reopened, ready_for_review]
6
+
7
+ permissions:
8
+ contents: read
9
+
10
+ jobs:
11
+ review-receipt:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ # Put your agent/bot-generated receipt at this path, or adjust RECEIPT_PATH.
17
+ # Keep the receipt privacy-safe: no raw prompts, transcripts, source code,
18
+ # tool output, secrets, stack traces, or customer data.
19
+ - name: Validate AI PR review receipt
20
+ env:
21
+ RECEIPT_PATH: artifacts/review-primitive-receipt.json
22
+ run: |
23
+ test -f "$RECEIPT_PATH"
24
+ npm install --no-save --ignore-scripts pluribus-context@latest
25
+ node node_modules/pluribus-context/examples/review-primitive-gate/check-review-receipt.mjs "$RECEIPT_PATH"
@@ -1,5 +1,55 @@
1
1
  # AI PR review receipts example
2
2
 
3
- This example contains a copyable GitHub PR template for agent-generated or agent-modified pull requests.
3
+ This example contains a copyable GitHub PR template and CI gate for agent-generated or agent-modified pull requests.
4
4
 
5
5
  Use it when review risk depends on blast radius: schema/data contracts, async paths, rollout gates, external side effects, generated/public interfaces, or security-sensitive config.
6
+
7
+ The point is not to make every AI PR small. It is to make the risky boundaries reviewable enough that CI or a maintainer can decide: merge, route to a human owner, or stop.
8
+
9
+ ## Files
10
+
11
+ - [`.github/pull_request_template.md`](.github/pull_request_template.md) — human-readable PR body section for blast-radius review.
12
+ - [`.github/workflows/ai-pr-review-receipt.yml`](.github/workflows/ai-pr-review-receipt.yml) — copyable GitHub Actions gate that validates a machine-readable receipt.
13
+ - [`review-primitive-receipt.json`](review-primitive-receipt.json) — passing receipt fixture.
14
+ - [`incomplete-review-primitive-receipt.json`](incomplete-review-primitive-receipt.json) — failing fixture for partial/unsafe evidence.
15
+
16
+ ## 60-second local smoke
17
+
18
+ From the repository root:
19
+
20
+ ```bash
21
+ node examples/review-primitive-gate/check-review-receipt.mjs \
22
+ examples/ai-pr-review-receipts/review-primitive-receipt.json
23
+ ```
24
+
25
+ Expected: `ok: true`.
26
+
27
+ Then run the incomplete fixture:
28
+
29
+ ```bash
30
+ node examples/review-primitive-gate/check-review-receipt.mjs \
31
+ examples/ai-pr-review-receipts/incomplete-review-primitive-receipt.json
32
+ ```
33
+
34
+ Expected: non-zero exit. The failure is intentional: unapproved scope change, skipped required test, missing evidence, and `partial` resume state should not silently pass a merge gate.
35
+
36
+ ## GitHub Actions usage
37
+
38
+ 1. Copy `.github/workflows/ai-pr-review-receipt.yml` into your repo.
39
+ 2. Have your Claude Code / Codex / Cursor / OpenClaw / review bot emit a privacy-safe receipt at `artifacts/review-primitive-receipt.json`.
40
+ 3. Keep raw prompts, transcripts, source code, secrets, stack traces, customer data, and raw tool output out of the receipt.
41
+ 4. Let the workflow fail if the receipt is partial, unsafe, missing evidence, or outside approved boundaries.
42
+
43
+ The template and JSON receipt can be used together: the PR body explains the blast radius to humans, while the JSON receipt gives CI a hard decision primitive.
44
+
45
+ ## Why this exists
46
+
47
+ Large AI PRs are not automatically unsafe, and small PRs are not automatically reviewable. Diff size is a proxy. This receipt makes the underlying question explicit:
48
+
49
+ - Which assignment did the agent accept?
50
+ - What read/write boundaries were approved?
51
+ - Did scope or access change mid-run, and was it approved?
52
+ - Which required checks actually ran, with evidence?
53
+ - Did the agent refuse unsafe operations?
54
+ - Is the handoff `complete`, `partial`, or `unsafe-to-resume`?
55
+ - What is the next safe action for the reviewer?
@@ -0,0 +1,43 @@
1
+ {
2
+ "type": "agent.review_primitive_receipt.v1",
3
+ "assignment_id": "pr-483-agent-review",
4
+ "run_id": "run-2026-05-31T23-10Z",
5
+ "agent": {
6
+ "tool": "claude-code-github-actions",
7
+ "role": "pr-reviewer"
8
+ },
9
+ "approved_boundaries": {
10
+ "read": ["src/auth/**", "tests/auth/**"],
11
+ "write": ["tests/auth/**"],
12
+ "network": false
13
+ },
14
+ "scope_access_changes": [
15
+ {
16
+ "change": "write src/auth/session.ts",
17
+ "reason": "agent attempted to patch production auth code during review",
18
+ "approved": false,
19
+ "approved_by": ""
20
+ }
21
+ ],
22
+ "commands_and_checks": [
23
+ {
24
+ "name": "npm test -- tests/auth",
25
+ "kind": "required_test",
26
+ "status": "skipped",
27
+ "evidence": "not-run"
28
+ }
29
+ ],
30
+ "refused_operations": [],
31
+ "handoff": {
32
+ "changed_files_bucket": "under_10",
33
+ "evidence_path": "artifacts/pr-483/review-primitive-receipt.json",
34
+ "next_safe_action": "human must inspect attempted auth write and run required tests before merge"
35
+ },
36
+ "resume_state": "partial",
37
+ "privacy": {
38
+ "raw_prompts_logged": false,
39
+ "raw_tool_output_logged": false,
40
+ "source_code_logged": false,
41
+ "secrets_logged": false
42
+ }
43
+ }
@@ -0,0 +1,60 @@
1
+ {
2
+ "type": "agent.review_primitive_receipt.v1",
3
+ "assignment_id": "pr-482-agent-review",
4
+ "run_id": "run-2026-05-31T23-00Z",
5
+ "agent": {
6
+ "tool": "claude-code-github-actions",
7
+ "role": "pr-reviewer"
8
+ },
9
+ "approved_boundaries": {
10
+ "read": ["src/billing/**", "tests/billing/**", "docs/rollout/**"],
11
+ "write": ["tests/billing/**", "docs/review-receipts/**"],
12
+ "network": false
13
+ },
14
+ "scope_access_changes": [
15
+ {
16
+ "change": "read docs/rollout/**",
17
+ "reason": "verify feature-flag and rollback evidence for the PR receipt",
18
+ "approved": true,
19
+ "approved_by": "maintainer"
20
+ }
21
+ ],
22
+ "commands_and_checks": [
23
+ {
24
+ "name": "npm test -- tests/billing",
25
+ "kind": "required_test",
26
+ "status": "passed",
27
+ "evidence": "https://github.com/example/repo/actions/runs/123#billing-tests"
28
+ },
29
+ {
30
+ "name": "npm run lint",
31
+ "kind": "required_check",
32
+ "status": "passed",
33
+ "evidence": "https://github.com/example/repo/actions/runs/123#lint"
34
+ },
35
+ {
36
+ "name": "migration rollback smoke",
37
+ "kind": "required_check",
38
+ "status": "passed",
39
+ "evidence": "artifacts/pr-482/rollback-smoke.txt"
40
+ }
41
+ ],
42
+ "refused_operations": [
43
+ {
44
+ "operation": "write src/billing/charge-customer.ts",
45
+ "reason": "outside approved write boundary; reviewer requested tests/docs only"
46
+ }
47
+ ],
48
+ "handoff": {
49
+ "changed_files_bucket": "under_5",
50
+ "evidence_path": "artifacts/pr-482/review-primitive-receipt.json",
51
+ "next_safe_action": "review billing test assertions and rollback evidence before merge"
52
+ },
53
+ "resume_state": "complete",
54
+ "privacy": {
55
+ "raw_prompts_logged": false,
56
+ "raw_tool_output_logged": false,
57
+ "source_code_logged": false,
58
+ "secrets_logged": false
59
+ }
60
+ }
@@ -0,0 +1,12 @@
1
+ # Compaction resume receipt gate
2
+
3
+ This example validates a privacy-safe receipt for `PostCompact`, `SessionStart(compact)`, or any workflow that resumes an AI coding session after summarization.
4
+
5
+ Run:
6
+
7
+ ```bash
8
+ node check-resume-receipt.mjs safe-resume-receipt.json
9
+ node check-resume-receipt.mjs unsafe-resume-receipt.json
10
+ ```
11
+
12
+ Use it as a tiny CI/hook check before an agent continues work after compaction. The receipt records hashes, refs, and verdicts — not raw transcripts or raw instruction bodies.