pluribus-context 0.3.40 → 0.3.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/README.md +3 -2
  3. package/bin/pluribus.js +3 -1
  4. package/docs/.nojekyll +0 -0
  5. package/docs/.well-known/agent-skills/context-receipts/SKILL.md +206 -0
  6. package/docs/.well-known/agent-skills/index.json +19 -0
  7. package/docs/.well-known/agent-skills/skill-policy-receipts/SKILL.md +77 -0
  8. package/docs/agent-surface-proof-chain.md +176 -0
  9. package/docs/cursor-claude-context-handoff.md +68 -0
  10. package/docs/index.html +38 -0
  11. package/docs/receipt-playground.html +304 -0
  12. package/docs/session-preflight-receipts.md +77 -0
  13. package/examples/claude-md-read-receipts/README.md +70 -0
  14. package/examples/claude-md-read-receipts/check-read-receipt.mjs +119 -0
  15. package/examples/claude-md-read-receipts/sample-read-receipt.json +45 -0
  16. package/examples/claude-md-read-receipts/stale-read-receipt.json +18 -0
  17. package/examples/context-attention-receipts/README.md +41 -0
  18. package/examples/context-attention-receipts/attention-receipt-fail.json +49 -0
  19. package/examples/context-attention-receipts/attention-receipt-pass.json +53 -0
  20. package/examples/context-attention-receipts/check-attention-receipt.mjs +97 -0
  21. package/examples/context-sufficiency-trace/README.md +22 -0
  22. package/examples/context-sufficiency-trace/check-context-sufficiency.mjs +40 -0
  23. package/examples/context-sufficiency-trace/context-trace-pass.json +28 -0
  24. package/examples/context-sufficiency-trace/context-trace.json +47 -0
  25. package/examples/context-sufficiency-trace/ground-truth.json +13 -0
  26. package/examples/provider-degradation-canaries/README.md +79 -0
  27. package/examples/provider-degradation-canaries/check-degradation-receipt.mjs +64 -0
  28. package/examples/provider-degradation-canaries/healthy-decision.json +27 -0
  29. package/examples/provider-degradation-canaries/unsafe-write-decision.json +26 -0
  30. package/examples/semantic-anchor-receipts/README.md +49 -0
  31. package/examples/semantic-anchor-receipts/check-semantic-anchors.mjs +153 -0
  32. package/examples/semantic-anchor-receipts/cleaned-paste.md +17 -0
  33. package/examples/semantic-anchor-receipts/original-paste.md +19 -0
  34. package/examples/semantic-anchor-receipts/sample-receipt.json +62 -0
  35. package/examples/session-preflight-receipts/README.md +25 -0
  36. package/examples/session-preflight-receipts/session-preflight-receipt.json +39 -0
  37. package/examples/session-preflight-receipts/session-preflight.mdc +18 -0
  38. package/examples/task-scoped-mcp-config/README.md +60 -0
  39. package/examples/task-scoped-mcp-config/mcp-catalog.json +46 -0
  40. package/examples/task-scoped-mcp-config/select-mcp-config.mjs +64 -0
  41. package/examples/task-scoped-mcp-config/tasks/browser-debug.json +7 -0
  42. package/package.json +5 -2
  43. package/skills/context-receipts/README.md +13 -2
  44. package/skills/context-receipts/SKILL.md +65 -0
  45. package/src/commands/demo.js +81 -1
  46. package/src/utils/version.js +1 -1
@@ -0,0 +1,119 @@
1
+ #!/usr/bin/env node
2
+ import fs from 'node:fs';
3
+ import path from 'node:path';
4
+
5
+ const VALID_STATES = new Set(['fresh', 'compacted', 'topic_switched', 'resumed']);
6
+ const REGROUNDING_STATES = new Set(['compacted', 'topic_switched', 'resumed']);
7
+
8
+ function parseArgs(argv) {
9
+ const args = {};
10
+ for (let i = 0; i < argv.length; i += 1) {
11
+ const key = argv[i];
12
+ const value = argv[i + 1];
13
+ if (key === '--receipt') { args.receipt = value; i += 1; continue; }
14
+ if (key === '--help' || key === '-h') { args.help = true; continue; }
15
+ throw new Error(`Unknown argument: ${key}`);
16
+ }
17
+ return args;
18
+ }
19
+
20
+ function usage() {
21
+ return 'Usage: node check-read-receipt.mjs --receipt sample-read-receipt.json\n';
22
+ }
23
+
24
+ function hasText(value) {
25
+ return typeof value === 'string' && value.trim().length > 0;
26
+ }
27
+
28
+ function asArray(value) {
29
+ return Array.isArray(value) ? value : [];
30
+ }
31
+
32
+ function validate(receipt) {
33
+ const errors = [];
34
+ const warnings = [];
35
+
36
+ if (receipt.schema !== 'pluribus.claude_md_read_receipt.v1') {
37
+ errors.push('schema must be pluribus.claude_md_read_receipt.v1');
38
+ }
39
+ if (!VALID_STATES.has(receipt.session_state)) {
40
+ errors.push(`session_state must be one of: ${[...VALID_STATES].join(', ')}`);
41
+ }
42
+ if (!hasText(receipt.current_task)) {
43
+ errors.push('current_task is required');
44
+ }
45
+
46
+ const reloadedFiles = asArray(receipt.reloaded_files);
47
+ if (reloadedFiles.length === 0) {
48
+ errors.push('reloaded_files must name at least one file/source');
49
+ }
50
+ for (const [index, file] of reloadedFiles.entries()) {
51
+ if (!hasText(file.path)) errors.push(`reloaded_files[${index}].path is required`);
52
+ if (!hasText(file.why)) errors.push(`reloaded_files[${index}].why is required`);
53
+ }
54
+
55
+ const activeConstraints = asArray(receipt.active_constraints).filter(hasText);
56
+ if (activeConstraints.length < 3) {
57
+ errors.push('active_constraints must include at least 3 concrete constraints');
58
+ }
59
+
60
+ if (!Array.isArray(receipt.not_loaded_files)) {
61
+ errors.push('not_loaded_files must be present as an array; use [] only when nothing relevant was skipped');
62
+ } else {
63
+ for (const [index, file] of receipt.not_loaded_files.entries()) {
64
+ if (!hasText(file.path)) errors.push(`not_loaded_files[${index}].path is required`);
65
+ if (!hasText(file.why)) errors.push(`not_loaded_files[${index}].why is required`);
66
+ }
67
+ }
68
+
69
+ const routerLoaded = reloadedFiles.some((file) => /(^|\/)CLAUDE\.md$/i.test(file.path || '') || /router|index/i.test(file.role || ''));
70
+ if (!routerLoaded) warnings.push('no CLAUDE.md/router/index source named in reloaded_files');
71
+
72
+ const topicAuthorityLoaded = reloadedFiles.some((file) => /topic|authority|migration|spec|docs?\//i.test(`${file.role || ''} ${file.path || ''}`));
73
+ if (REGROUNDING_STATES.has(receipt.session_state) && !topicAuthorityLoaded) {
74
+ errors.push(`${receipt.session_state} receipts must name a topic authority/spec/doc reloaded after the boundary`);
75
+ }
76
+
77
+ if (receipt.safe_to_edit === true && errors.length > 0) {
78
+ warnings.push('safe_to_edit=true is ignored because the receipt failed validation');
79
+ }
80
+ if (typeof receipt.safe_to_edit !== 'boolean') {
81
+ errors.push('safe_to_edit must be a boolean');
82
+ }
83
+
84
+ const safeToEdit = errors.length === 0 && receipt.safe_to_edit === true;
85
+ return {
86
+ schema: 'pluribus.claude_md_read_receipt_check.v1',
87
+ source_receipt_schema: receipt.schema || null,
88
+ session_state: receipt.session_state || null,
89
+ current_task_present: hasText(receipt.current_task),
90
+ reloaded_files_count: reloadedFiles.length,
91
+ active_constraints_count: activeConstraints.length,
92
+ skipped_relevant_files_count: Array.isArray(receipt.not_loaded_files) ? receipt.not_loaded_files.length : null,
93
+ router_or_index_named: routerLoaded,
94
+ topic_authority_named: topicAuthorityLoaded,
95
+ stale_notes_named: asArray(receipt.stale_or_historical_notes).length,
96
+ errors,
97
+ warnings,
98
+ safe_to_edit: safeToEdit
99
+ };
100
+ }
101
+
102
+ function main() {
103
+ const args = parseArgs(process.argv.slice(2));
104
+ if (args.help) { process.stdout.write(usage()); return; }
105
+ if (!args.receipt) throw new Error(usage().trim());
106
+
107
+ const receiptPath = path.resolve(args.receipt);
108
+ const receipt = JSON.parse(fs.readFileSync(receiptPath, 'utf8'));
109
+ const result = validate(receipt);
110
+ process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
111
+ if (!result.safe_to_edit) process.exitCode = 1;
112
+ }
113
+
114
+ try {
115
+ main();
116
+ } catch (error) {
117
+ console.error(error.message);
118
+ process.exit(1);
119
+ }
@@ -0,0 +1,45 @@
1
+ {
2
+ "schema": "pluribus.claude_md_read_receipt.v1",
3
+ "session_state": "topic_switched",
4
+ "current_task": "Update upload API retry handling without regressing the v2.4.0 migration",
5
+ "reloaded_files": [
6
+ {
7
+ "path": "CLAUDE.md",
8
+ "role": "router",
9
+ "why": "Confirms the project uses topic-specific docs and points upload work to docs/upload-api.md"
10
+ },
11
+ {
12
+ "path": "docs/upload-api.md",
13
+ "role": "topic_authority",
14
+ "why": "Current source for upload retries, deprecated helpers, and safety constraints"
15
+ },
16
+ {
17
+ "path": "docs/migrations/v2.4.0.md",
18
+ "role": "migration_note",
19
+ "why": "Current migration note for uploadFile behavior and legacy helper removal"
20
+ }
21
+ ],
22
+ "active_constraints": [
23
+ "Preserve max 3 retry attempts with exponential backoff",
24
+ "Never log raw file contents or customer uploads",
25
+ "Treat v2.4.0 uploadFile migration notes as current authority",
26
+ "Do not reintroduce uploadLegacy(path); it is removed after v2.5.0"
27
+ ],
28
+ "not_loaded_files": [
29
+ {
30
+ "path": "docs/payments.md",
31
+ "why": "Previous topic only; not authority for upload API edits"
32
+ },
33
+ {
34
+ "path": "notes/old-upload-spike.md",
35
+ "why": "Historical spike; superseded by docs/upload-api.md"
36
+ }
37
+ ],
38
+ "stale_or_historical_notes": [
39
+ {
40
+ "ref": "notes/old-upload-spike.md",
41
+ "reason": "Mentions uploadLegacy(path), which is no longer current"
42
+ }
43
+ ],
44
+ "safe_to_edit": true
45
+ }
@@ -0,0 +1,18 @@
1
+ {
2
+ "schema": "pluribus.claude_md_read_receipt.v1",
3
+ "session_state": "compacted",
4
+ "current_task": "Continue whatever we were doing before compaction",
5
+ "reloaded_files": [
6
+ {
7
+ "path": "CLAUDE.md",
8
+ "role": "router",
9
+ "why": "It is usually loaded at startup"
10
+ }
11
+ ],
12
+ "active_constraints": [
13
+ "Use the normal project rules"
14
+ ],
15
+ "not_loaded_files": [],
16
+ "stale_or_historical_notes": [],
17
+ "safe_to_edit": true
18
+ }
@@ -0,0 +1,41 @@
1
+ # Context attention receipts
2
+
3
+ This example is for the `r/ClaudeCode` / GraphRAG failure mode: a graph, memory, RAG, or MCP retrieval system finds the right context, but the coding agent behaves as if it never saw it.
4
+
5
+ Pluribus should not replace graph memory, RAG, or MCP search. The useful boundary is smaller: emit a privacy-safe receipt proving whether selected context actually crossed the agent boundary and was treated as required before planning or editing.
6
+
7
+ ## What the receipt proves
8
+
9
+ `pluribus.context_attention_receipt.v1` records low-cardinality evidence only:
10
+
11
+ - which retrieval/memory/tool context IDs were required for the task;
12
+ - which surface delivered them (`mcp_tool_response`, `claude_hook`, `CLAUDE.md`, `AGENTS.md`, etc.);
13
+ - whether the agent acknowledged those IDs before plan/edit;
14
+ - whether the final plan cites the IDs it depended on;
15
+ - whether missing context forced a stop instead of a best-effort edit;
16
+ - whether raw documents, prompts, source code, transcripts, tokens, or customer data were omitted.
17
+
18
+ It intentionally does **not** store the retrieved chunks themselves. Use stable IDs, hashes, coarse labels, and evidence paths instead.
19
+
20
+ ## Smoke test
21
+
22
+ ```bash
23
+ node examples/context-attention-receipts/check-attention-receipt.mjs \
24
+ examples/context-attention-receipts/attention-receipt-pass.json
25
+
26
+ node examples/context-attention-receipts/check-attention-receipt.mjs \
27
+ examples/context-attention-receipts/attention-receipt-fail.json
28
+ ```
29
+
30
+ The first command should pass. The second should fail because the graph/memory result was retrieved but not acknowledged or cited before editing.
31
+
32
+ ## Why this exists
33
+
34
+ A high-quality retrieval layer is still weak if the next agent turn can ignore it. The receipt makes that failure visible:
35
+
36
+ - retrieval succeeded;
37
+ - required context was or was not delivered to the agent surface;
38
+ - the agent did or did not acknowledge it before changing files;
39
+ - the wrapper/hook did or did not stop the run when required context was missing.
40
+
41
+ That is the narrow Pluribus angle: not more memory, not a new graph database, and not another router — evidence that the context boundary was actually crossed.
@@ -0,0 +1,49 @@
1
+ {
2
+ "schema": "pluribus.context_attention_receipt.v1",
3
+ "receipt_id": "attn_2026_06_09_demo_fail",
4
+ "task_id": "claude-code-refactor-185",
5
+ "agent_surface": "claude_code",
6
+ "retrieval_system": "graph_memory_mcp",
7
+ "required_context": [
8
+ {
9
+ "id": "ctx:architecture:tenant-routing",
10
+ "source_type": "knowledge_graph",
11
+ "source_hash": "sha256:7bf3e0c1-redacted",
12
+ "why_required": "routes tenant-scoped writes through the policy gateway"
13
+ },
14
+ {
15
+ "id": "ctx:decision:readonly-audit-first",
16
+ "source_type": "decision_log",
17
+ "source_hash": "sha256:11ab91d4-redacted",
18
+ "why_required": "requires dry-run/audit before writes"
19
+ }
20
+ ],
21
+ "delivery": {
22
+ "surface": "mcp_tool_response",
23
+ "delivered_context_ids": [
24
+ "ctx:architecture:tenant-routing",
25
+ "ctx:decision:readonly-audit-first"
26
+ ],
27
+ "raw_context_omitted": true,
28
+ "evidence_path": ".pluribus/receipts/context-attention.ndjson"
29
+ },
30
+ "attention": {
31
+ "acknowledged_before_plan": [
32
+ "ctx:architecture:tenant-routing"
33
+ ],
34
+ "cited_in_plan": [],
35
+ "missing_context_stop": false
36
+ },
37
+ "privacy": {
38
+ "raw_prompts_omitted": true,
39
+ "raw_documents_omitted": true,
40
+ "source_code_omitted": true,
41
+ "tool_outputs_omitted": true,
42
+ "tokens_omitted": true,
43
+ "customer_data_omitted": true
44
+ },
45
+ "result": {
46
+ "status": "unsafe_to_continue",
47
+ "next_safe_action": "stop and ask the retrieval wrapper to re-deliver required context before editing"
48
+ }
49
+ }
@@ -0,0 +1,53 @@
1
+ {
2
+ "schema": "pluribus.context_attention_receipt.v1",
3
+ "receipt_id": "attn_2026_06_09_demo_pass",
4
+ "task_id": "claude-code-refactor-184",
5
+ "agent_surface": "claude_code",
6
+ "retrieval_system": "graph_memory_mcp",
7
+ "required_context": [
8
+ {
9
+ "id": "ctx:architecture:tenant-routing",
10
+ "source_type": "knowledge_graph",
11
+ "source_hash": "sha256:7bf3e0c1-redacted",
12
+ "why_required": "routes tenant-scoped writes through the policy gateway"
13
+ },
14
+ {
15
+ "id": "ctx:decision:readonly-audit-first",
16
+ "source_type": "decision_log",
17
+ "source_hash": "sha256:11ab91d4-redacted",
18
+ "why_required": "requires dry-run/audit before writes"
19
+ }
20
+ ],
21
+ "delivery": {
22
+ "surface": "mcp_tool_response",
23
+ "delivered_context_ids": [
24
+ "ctx:architecture:tenant-routing",
25
+ "ctx:decision:readonly-audit-first"
26
+ ],
27
+ "raw_context_omitted": true,
28
+ "evidence_path": ".pluribus/receipts/context-attention.ndjson"
29
+ },
30
+ "attention": {
31
+ "acknowledged_before_plan": [
32
+ "ctx:architecture:tenant-routing",
33
+ "ctx:decision:readonly-audit-first"
34
+ ],
35
+ "cited_in_plan": [
36
+ "ctx:architecture:tenant-routing",
37
+ "ctx:decision:readonly-audit-first"
38
+ ],
39
+ "missing_context_stop": false
40
+ },
41
+ "privacy": {
42
+ "raw_prompts_omitted": true,
43
+ "raw_documents_omitted": true,
44
+ "source_code_omitted": true,
45
+ "tool_outputs_omitted": true,
46
+ "tokens_omitted": true,
47
+ "customer_data_omitted": true
48
+ },
49
+ "result": {
50
+ "status": "safe_to_continue",
51
+ "next_safe_action": "continue with dry-run refactor plan"
52
+ }
53
+ }
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/env node
2
+ import { readFileSync } from 'node:fs'
3
+ import { resolve } from 'node:path'
4
+
5
+ const [receiptPathArg] = process.argv.slice(2)
6
+
7
+ if (!receiptPathArg) {
8
+ fail(['usage: node check-attention-receipt.mjs <receipt.json>'], 2)
9
+ }
10
+
11
+ const receiptPath = resolve(process.cwd(), receiptPathArg)
12
+ let receipt
13
+ try {
14
+ receipt = JSON.parse(readFileSync(receiptPath, 'utf8'))
15
+ } catch (error) {
16
+ fail([`could not read receipt JSON: ${error.message}`], 2)
17
+ }
18
+
19
+ const errors = []
20
+ const warnings = []
21
+
22
+ if (receipt.schema !== 'pluribus.context_attention_receipt.v1') {
23
+ errors.push('schema must be pluribus.context_attention_receipt.v1')
24
+ }
25
+
26
+ const requiredIds = ids(receipt.required_context?.map((item) => item.id))
27
+ const deliveredIds = ids(receipt.delivery?.delivered_context_ids)
28
+ const acknowledgedIds = ids(receipt.attention?.acknowledged_before_plan)
29
+ const citedIds = ids(receipt.attention?.cited_in_plan)
30
+
31
+ if (requiredIds.length === 0) errors.push('required_context must name at least one context id')
32
+
33
+ for (const id of requiredIds) {
34
+ if (!deliveredIds.includes(id)) errors.push(`required context not delivered: ${id}`)
35
+ if (!acknowledgedIds.includes(id)) errors.push(`required context not acknowledged before plan: ${id}`)
36
+ if (!citedIds.includes(id)) errors.push(`required context not cited in plan: ${id}`)
37
+ }
38
+
39
+ if (receipt.delivery?.raw_context_omitted !== true) {
40
+ errors.push('delivery.raw_context_omitted must be true')
41
+ }
42
+
43
+ const privacy = receipt.privacy || {}
44
+ for (const field of [
45
+ 'raw_prompts_omitted',
46
+ 'raw_documents_omitted',
47
+ 'source_code_omitted',
48
+ 'tool_outputs_omitted',
49
+ 'tokens_omitted',
50
+ 'customer_data_omitted'
51
+ ]) {
52
+ if (privacy[field] !== true) errors.push(`privacy.${field} must be true`)
53
+ }
54
+
55
+ if (errors.length > 0 && receipt.attention?.missing_context_stop !== true) {
56
+ errors.push('missing_context_stop must be true when required context attention evidence is incomplete')
57
+ }
58
+
59
+ if (receipt.result?.status === 'safe_to_continue' && errors.length > 0) {
60
+ errors.push('result.status cannot be safe_to_continue when required evidence is incomplete')
61
+ }
62
+
63
+ if (!receipt.delivery?.evidence_path) {
64
+ warnings.push('delivery.evidence_path is missing; reviewers need a pointer to the audit trail')
65
+ }
66
+
67
+ const output = {
68
+ ok: errors.length === 0,
69
+ receipt_id: receipt.receipt_id || null,
70
+ task_id: receipt.task_id || null,
71
+ agent_surface: receipt.agent_surface || null,
72
+ required_count: requiredIds.length,
73
+ delivered_count: deliveredIds.length,
74
+ acknowledged_count: acknowledgedIds.length,
75
+ cited_count: citedIds.length,
76
+ status: receipt.result?.status || null,
77
+ next_safe_action: receipt.result?.next_safe_action || null,
78
+ errors,
79
+ warnings
80
+ }
81
+
82
+ if (output.ok) {
83
+ console.log(JSON.stringify(output, null, 2))
84
+ process.exit(0)
85
+ }
86
+
87
+ console.error(JSON.stringify(output, null, 2))
88
+ process.exit(1)
89
+
90
+ function ids(value) {
91
+ return Array.isArray(value) ? value.filter((id) => typeof id === 'string' && id.length > 0) : []
92
+ }
93
+
94
+ function fail(errors, code) {
95
+ console.error(JSON.stringify({ ok: false, errors }, null, 2))
96
+ process.exit(code)
97
+ }
@@ -0,0 +1,22 @@
1
+ # Context sufficiency trace
2
+
3
+ Tiny fixture for the market signal that token-saving context tools need a second metric: did the agent receive the files it later needed?
4
+
5
+ The example compares a retrieval/context-bundle trace against task ground truth and reports:
6
+
7
+ - `gold_context_recall`: required files returned before the edit;
8
+ - `missed_required_file_rate`: required files not returned by the bundle;
9
+ - `late_context_rate`: required files first discovered after the edit started;
10
+ - `frontier_cut_misses`: required files that were seen as candidates but cut from the bundle.
11
+
12
+ Run it:
13
+
14
+ ```bash
15
+ node examples/context-sufficiency-trace/check-context-sufficiency.mjs \
16
+ examples/context-sufficiency-trace/ground-truth.json \
17
+ examples/context-sufficiency-trace/context-trace.json
18
+ ```
19
+
20
+ Expected result for the included fixture: fail. The bundle saved tokens, but it missed `src/auth/session.ts`, which was required for the task and only appeared after editing began.
21
+
22
+ Why this exists: score + token savings can hide a bad context bundle. A sufficiency trace turns the hidden failure into something benchmarkable before a team trusts compression.
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env node
2
+ import fs from 'node:fs';
3
+
4
+ const [truthPath, tracePath] = process.argv.slice(2);
5
+ if (!truthPath || !tracePath) {
6
+ console.error('Usage: node check-context-sufficiency.mjs <ground-truth.json> <context-trace.json>');
7
+ process.exit(2);
8
+ }
9
+
10
+ const readJson = (path) => JSON.parse(fs.readFileSync(path, 'utf8'));
11
+ const truth = readJson(truthPath);
12
+ const trace = readJson(tracePath);
13
+
14
+ const required = new Set(truth.required_files || []);
15
+ const returned = new Set((trace.returned_files || []).map((file) => file.path));
16
+ const frontierCut = new Set((trace.frontier_cut || []).map((file) => file.path));
17
+ const late = new Set((trace.late_files || []).map((file) => file.path));
18
+
19
+ const requiredList = [...required];
20
+ const returnedRequired = requiredList.filter((path) => returned.has(path));
21
+ const missedRequired = requiredList.filter((path) => !returned.has(path));
22
+ const frontierCutMisses = missedRequired.filter((path) => frontierCut.has(path));
23
+ const lateMisses = missedRequired.filter((path) => late.has(path));
24
+
25
+ const ratio = (count, total) => (total === 0 ? 0 : Number((count / total).toFixed(4)));
26
+ const report = {
27
+ task_id: truth.task_id,
28
+ trace_id: trace.trace_id,
29
+ required_files: requiredList.length,
30
+ returned_files: returned.size,
31
+ gold_context_recall: ratio(returnedRequired.length, requiredList.length),
32
+ missed_required_file_rate: ratio(missedRequired.length, requiredList.length),
33
+ late_context_rate: ratio(lateMisses.length, requiredList.length),
34
+ missed_required_files: missedRequired,
35
+ frontier_cut_misses: frontierCutMisses,
36
+ verdict: missedRequired.length === 0 ? 'pass' : 'fail'
37
+ };
38
+
39
+ console.log(JSON.stringify(report, null, 2));
40
+ process.exit(report.verdict === 'pass' ? 0 : 1);
@@ -0,0 +1,28 @@
1
+ {
2
+ "trace_id": "ctxtrace_2026_06_17_demo_pass",
3
+ "query": "fix stale session redirect after token refresh",
4
+ "token_budget": 12000,
5
+ "index_revision": "sha256:demo-index-revision",
6
+ "returned_files": [
7
+ {
8
+ "path": "src/auth/session.ts",
9
+ "hash": "sha256:demo-session",
10
+ "tokens": 2200,
11
+ "reason": "session refresh state owns the redirect freshness condition"
12
+ },
13
+ {
14
+ "path": "src/routes/redirect.ts",
15
+ "hash": "sha256:demo-redirect",
16
+ "tokens": 1400,
17
+ "reason": "direct redirect logic match"
18
+ },
19
+ {
20
+ "path": "test/auth-refresh.test.ts",
21
+ "hash": "sha256:demo-test",
22
+ "tokens": 900,
23
+ "reason": "failing test references token refresh redirect"
24
+ }
25
+ ],
26
+ "frontier_cut": [],
27
+ "late_files": []
28
+ }
@@ -0,0 +1,47 @@
1
+ {
2
+ "trace_id": "ctxtrace_2026_06_17_demo",
3
+ "query": "fix stale session redirect after token refresh",
4
+ "token_budget": 12000,
5
+ "index_revision": "sha256:demo-index-revision",
6
+ "returned_files": [
7
+ {
8
+ "path": "src/routes/redirect.ts",
9
+ "hash": "sha256:demo-redirect",
10
+ "tokens": 1400,
11
+ "reason": "direct redirect logic match"
12
+ },
13
+ {
14
+ "path": "test/auth-refresh.test.ts",
15
+ "hash": "sha256:demo-test",
16
+ "tokens": 900,
17
+ "reason": "failing test references token refresh redirect"
18
+ },
19
+ {
20
+ "path": "docs/auth-flow.md",
21
+ "hash": "sha256:demo-doc",
22
+ "tokens": 1300,
23
+ "reason": "architecture notes for auth flow"
24
+ }
25
+ ],
26
+ "frontier_cut": [
27
+ {
28
+ "path": "src/auth/session.ts",
29
+ "hash": "sha256:demo-session",
30
+ "tokens": 2200,
31
+ "reason": "ranked 4th; cut to preserve budget"
32
+ },
33
+ {
34
+ "path": "src/auth/cookies.ts",
35
+ "hash": "sha256:demo-cookies",
36
+ "tokens": 1800,
37
+ "reason": "ranked 5th; cut to preserve budget"
38
+ }
39
+ ],
40
+ "late_files": [
41
+ {
42
+ "path": "src/auth/session.ts",
43
+ "first_seen_after": "edit_started",
44
+ "reason": "test failure showed refresh state was stored outside redirect.ts"
45
+ }
46
+ ]
47
+ }
@@ -0,0 +1,13 @@
1
+ {
2
+ "task_id": "tsbench-style-auth-redirect",
3
+ "task": "Fix the stale session redirect after token refresh.",
4
+ "required_files": [
5
+ "src/auth/session.ts",
6
+ "src/routes/redirect.ts",
7
+ "test/auth-refresh.test.ts"
8
+ ],
9
+ "edit_files": [
10
+ "src/routes/redirect.ts",
11
+ "test/auth-refresh.test.ts"
12
+ ]
13
+ }
@@ -0,0 +1,79 @@
1
+ # Provider degradation canary receipt
2
+
3
+ A tiny gate for agent runs when the model/provider may be silently degraded: slower than normal, timing out, drifting on tool calls, breaking JSON, or producing weaker code edits while the status page still looks green.
4
+
5
+ Use this before side-effecting agent actions such as patches, PRs, migrations, shell commands, deploys, or external writes. The receipt does **not** log prompts or model outputs. It records transport health, cheap capability canaries, and the decision to continue, fallback, pause writes, or stop.
6
+
7
+ ## Prompt / harness pattern
8
+
9
+ ```text
10
+ Before this agent run writes anything, record a degradation decision:
11
+ - provider/model/region and prompt template hash
12
+ - latency/error summary for the run window
13
+ - capability canaries that match this app: JSON schema, tool choice, patch format, refusal/over-refusal, citation grounding
14
+ - failed canaries and severity
15
+ - fallback chosen, if any
16
+ - write gate: continue, read_only, fallback_model, pause_writes, or stop
17
+ - confidence: provider_degraded, app_bug, network, unknown, or healthy
18
+ ```
19
+
20
+ If tool-choice or patch-format canaries fail, keep read-only analysis alive but pause writes until a fallback or human review confirms the agent can still act safely.
21
+
22
+ ## Run the sample checker
23
+
24
+ ```bash
25
+ cd examples/provider-degradation-canaries
26
+ node check-degradation-receipt.mjs --receipt healthy-decision.json
27
+ ```
28
+
29
+ The bundled healthy receipt passes because transport is stable, app-critical canaries pass, and the write gate is allowed.
30
+
31
+ A degraded write attempt should fail:
32
+
33
+ ```bash
34
+ node check-degradation-receipt.mjs --receipt unsafe-write-decision.json
35
+ ```
36
+
37
+ ## Receipt shape
38
+
39
+ ```json
40
+ {
41
+ "schema": "pluribus.provider_degradation_decision.v1",
42
+ "run_id": "agent-run-2026-06-15T20:02Z",
43
+ "provider": "anthropic",
44
+ "model": "claude-sonnet-4",
45
+ "region": "us-east-1",
46
+ "prompt_template_hash": "sha256:...",
47
+ "canary_suite_version": "coding-agent-smoke-2026-06-15",
48
+ "transport": {
49
+ "window_minutes": 10,
50
+ "ttft_p95_ms": 1400,
51
+ "total_latency_p95_ms": 9200,
52
+ "timeout_rate": 0.01,
53
+ "error_rate": 0,
54
+ "retry_count": 1,
55
+ "status_incident_url": null
56
+ },
57
+ "capability_canaries": [
58
+ { "name": "json_schema", "status": "pass", "severity": "write_blocking" },
59
+ { "name": "tool_choice", "status": "pass", "severity": "write_blocking" },
60
+ { "name": "patch_format", "status": "pass", "severity": "write_blocking" }
61
+ ],
62
+ "fallback": { "chosen": false, "reason": null },
63
+ "confidence": "healthy",
64
+ "write_gate": "continue"
65
+ }
66
+ ```
67
+
68
+ ## Why this exists
69
+
70
+ The market signal from Claude Code / API builders is practical: when an LLM silently degrades, teams lose time deciding whether provider behavior, network health, prompt changes, or their own code caused the failure.
71
+
72
+ This receipt keeps that decision falsifiable:
73
+
74
+ - latency alerts and provider status are separated from capability drift;
75
+ - canaries are app-critical, not generic benchmarks;
76
+ - side-effecting actions get stricter gates than read-only analysis;
77
+ - fallback and pause decisions are recorded as evidence, not hidden in transcripts.
78
+
79
+ Pair this with Pluribus context receipts when runtime inputs are the problem. Use this receipt when the question is whether the model/provider is currently reliable enough to let an agent keep writing.