pluribus-context 0.3.40 โ 0.3.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +3 -2
- package/bin/pluribus.js +3 -1
- package/docs/.nojekyll +0 -0
- package/docs/.well-known/agent-skills/context-receipts/SKILL.md +206 -0
- package/docs/.well-known/agent-skills/index.json +19 -0
- package/docs/.well-known/agent-skills/skill-policy-receipts/SKILL.md +77 -0
- package/docs/agent-surface-proof-chain.md +176 -0
- package/docs/cursor-claude-context-handoff.md +68 -0
- package/docs/index.html +38 -0
- package/docs/receipt-playground.html +304 -0
- package/docs/session-preflight-receipts.md +77 -0
- package/examples/claude-md-read-receipts/README.md +70 -0
- package/examples/claude-md-read-receipts/check-read-receipt.mjs +119 -0
- package/examples/claude-md-read-receipts/sample-read-receipt.json +45 -0
- package/examples/claude-md-read-receipts/stale-read-receipt.json +18 -0
- package/examples/context-attention-receipts/README.md +41 -0
- package/examples/context-attention-receipts/attention-receipt-fail.json +49 -0
- package/examples/context-attention-receipts/attention-receipt-pass.json +53 -0
- package/examples/context-attention-receipts/check-attention-receipt.mjs +97 -0
- package/examples/context-sufficiency-trace/README.md +22 -0
- package/examples/context-sufficiency-trace/check-context-sufficiency.mjs +40 -0
- package/examples/context-sufficiency-trace/context-trace-pass.json +28 -0
- package/examples/context-sufficiency-trace/context-trace.json +47 -0
- package/examples/context-sufficiency-trace/ground-truth.json +13 -0
- package/examples/provider-degradation-canaries/README.md +79 -0
- package/examples/provider-degradation-canaries/check-degradation-receipt.mjs +64 -0
- package/examples/provider-degradation-canaries/healthy-decision.json +27 -0
- package/examples/provider-degradation-canaries/unsafe-write-decision.json +26 -0
- package/examples/semantic-anchor-receipts/README.md +49 -0
- package/examples/semantic-anchor-receipts/check-semantic-anchors.mjs +153 -0
- package/examples/semantic-anchor-receipts/cleaned-paste.md +17 -0
- package/examples/semantic-anchor-receipts/original-paste.md +19 -0
- package/examples/semantic-anchor-receipts/sample-receipt.json +62 -0
- package/examples/session-preflight-receipts/README.md +25 -0
- package/examples/session-preflight-receipts/session-preflight-receipt.json +39 -0
- package/examples/session-preflight-receipts/session-preflight.mdc +18 -0
- package/examples/task-scoped-mcp-config/README.md +60 -0
- package/examples/task-scoped-mcp-config/mcp-catalog.json +46 -0
- package/examples/task-scoped-mcp-config/select-mcp-config.mjs +64 -0
- package/examples/task-scoped-mcp-config/tasks/browser-debug.json +7 -0
- package/package.json +5 -2
- package/skills/context-receipts/README.md +13 -2
- package/skills/context-receipts/SKILL.md +65 -0
- package/src/commands/demo.js +81 -1
- package/src/utils/version.js +1 -1
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { readFileSync, writeFileSync } from 'node:fs';
|
|
3
|
+
import { dirname, resolve } from 'node:path';
|
|
4
|
+
import { fileURLToPath } from 'node:url';
|
|
5
|
+
|
|
6
|
+
const here = dirname(fileURLToPath(import.meta.url));
|
|
7
|
+
const args = new Map();
|
|
8
|
+
for (let i = 2; i < process.argv.length; i += 1) {
|
|
9
|
+
const key = process.argv[i];
|
|
10
|
+
const value = process.argv[i + 1];
|
|
11
|
+
if (!key.startsWith('--') || value === undefined || value.startsWith('--')) {
|
|
12
|
+
throw new Error(`Expected --key value, got ${key}`);
|
|
13
|
+
}
|
|
14
|
+
args.set(key.slice(2), value);
|
|
15
|
+
i += 1;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const taskPath = args.has('task') ? resolve(process.cwd(), args.get('task')) : resolve(here, 'tasks/browser-debug.json');
|
|
19
|
+
const catalogPath = args.has('catalog') ? resolve(process.cwd(), args.get('catalog')) : resolve(here, 'mcp-catalog.json');
|
|
20
|
+
const outPath = args.has('out') ? resolve(process.cwd(), args.get('out')) : null;
|
|
21
|
+
const receiptPath = args.has('receipt') ? resolve(process.cwd(), args.get('receipt')) : null;
|
|
22
|
+
|
|
23
|
+
const catalog = JSON.parse(readFileSync(catalogPath, 'utf8'));
|
|
24
|
+
const task = JSON.parse(readFileSync(taskPath, 'utf8'));
|
|
25
|
+
const servers = new Map(catalog.servers.map((server) => [server.id, server]));
|
|
26
|
+
const missing = task.includeServerIds.filter((id) => !servers.has(id));
|
|
27
|
+
if (missing.length) {
|
|
28
|
+
throw new Error(`Task references unknown server ids: ${missing.join(', ')}`);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const selected = task.includeServerIds.map((id) => servers.get(id));
|
|
32
|
+
const withheld = catalog.servers.filter((server) => !task.includeServerIds.includes(server.id));
|
|
33
|
+
const mcpServers = Object.fromEntries(
|
|
34
|
+
selected.map((server) => [
|
|
35
|
+
server.id,
|
|
36
|
+
{
|
|
37
|
+
command: server.command,
|
|
38
|
+
args: server.args,
|
|
39
|
+
},
|
|
40
|
+
]),
|
|
41
|
+
);
|
|
42
|
+
|
|
43
|
+
const config = { mcpServers };
|
|
44
|
+
const receipt = {
|
|
45
|
+
schema: 'pluribus.task_scoped_mcp_config_receipt.v1',
|
|
46
|
+
task_id: task.taskId,
|
|
47
|
+
catalog_id: catalog.catalogId,
|
|
48
|
+
selected_server_ids: selected.map((server) => server.id),
|
|
49
|
+
withheld_server_ids: withheld.map((server) => server.id),
|
|
50
|
+
selected_estimated_schema_tokens: selected.reduce((sum, server) => sum + server.estimatedSchemaTokens, 0),
|
|
51
|
+
withheld_estimated_schema_tokens: withheld.reduce((sum, server) => sum + server.estimatedSchemaTokens, 0),
|
|
52
|
+
selection_reason: task.description,
|
|
53
|
+
withheld_reason: task.excludeReason,
|
|
54
|
+
raw_tool_schemas_logged: false,
|
|
55
|
+
raw_prompts_logged: false,
|
|
56
|
+
raw_tool_outputs_logged: false,
|
|
57
|
+
adoption_claim_allowed: false,
|
|
58
|
+
note: 'This proves only the task-scoped MCP config surface. It does not prove that the agent later called or adopted the selected tools.',
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
if (outPath) writeFileSync(outPath, `${JSON.stringify(config, null, 2)}\n`);
|
|
62
|
+
if (receiptPath) writeFileSync(receiptPath, `${JSON.stringify(receipt, null, 2)}\n`);
|
|
63
|
+
|
|
64
|
+
console.log(JSON.stringify({ ok: true, config, receipt }, null, 2));
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema": "pluribus.mcp_task_profile.v1",
|
|
3
|
+
"taskId": "browser-debug",
|
|
4
|
+
"description": "Debug a failing browser flow and look up one library API while keeping unrelated memory/observability/repo tools out of the initial context.",
|
|
5
|
+
"includeServerIds": ["playwright", "context7"],
|
|
6
|
+
"excludeReason": "Not needed for this task's first pass; load a different --mcp-config if the task changes."
|
|
7
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pluribus-context",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.42",
|
|
4
4
|
"description": "AI context and rules sync CLI for Claude.md, Claude Code, Cursor, and Copilot instructions, with privacy-safe context receipts that prove what memory, tools, skills, compactions, and security findings crossed agent boundaries without logging raw content.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"homepage": "https://github.com/caioribeiroclw-pixel/pluribus#readme",
|
|
@@ -89,7 +89,10 @@
|
|
|
89
89
|
"agent-context-audit",
|
|
90
90
|
"agent-memory",
|
|
91
91
|
"bob",
|
|
92
|
-
"bob-rules"
|
|
92
|
+
"bob-rules",
|
|
93
|
+
"agent-skill",
|
|
94
|
+
"skillpm",
|
|
95
|
+
"agent-skills-registry"
|
|
93
96
|
],
|
|
94
97
|
"author": "Caio Ribeiro",
|
|
95
98
|
"license": "MIT",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Context receipts Agent Skill recipe
|
|
2
2
|
|
|
3
|
-
This is a small, copyable Agent Skill recipe for context-engineering users who are adopting Tool Search, lazy MCP loading, skills, memory, compaction, or subagents and need to verify what actually crossed the context boundary.
|
|
3
|
+
This is a small, copyable Agent Skill recipe for context-engineering users who are adopting Tool Search, lazy MCP loading, dynamic tool discovery, skills, memory, compaction, GraphRAG/code search, transcript review, or subagents and need to verify what actually crossed the context boundary.
|
|
4
4
|
|
|
5
5
|
It is intentionally markdown-only so it can be copied into a local skills directory such as:
|
|
6
6
|
|
|
@@ -8,6 +8,11 @@ It is intentionally markdown-only so it can be copied into a local skills direct
|
|
|
8
8
|
- `.opencode/skills/context-receipts/SKILL.md`
|
|
9
9
|
- `.agents/skills/context-receipts/SKILL.md`
|
|
10
10
|
|
|
11
|
+
The two newest smoke paths are:
|
|
12
|
+
|
|
13
|
+
- **Runtime tool-surface diff:** prove which MCP tools were discovered, activated, withheld, or blocked without copying raw schemas/prompts/results.
|
|
14
|
+
- **Context attention:** prove that retrieved/baseline context was delivered, acknowledged before planning, and cited before edits/tool calls.
|
|
15
|
+
|
|
11
16
|
## Quick smoke
|
|
12
17
|
|
|
13
18
|
Ask an agent or harness using the skill to emit a receipt for one workflow and verify these constraints:
|
|
@@ -19,9 +24,15 @@ grep -E 'raw_(schema|query|args|result|output|transcript|text)_copied":false|raw
|
|
|
19
24
|
|
|
20
25
|
Then manually check that the receipt contains counts, hashes, ids, buckets, and `audit_gap`, but does **not** contain private prompts, raw schemas, tool args/results, skill bodies, memory bodies, customer names, secrets, or transcript text.
|
|
21
26
|
|
|
22
|
-
For executable fixture examples, see
|
|
27
|
+
For executable fixture examples, see:
|
|
28
|
+
|
|
29
|
+
- [`../../examples/tool-surface-diff-receipts/`](../../examples/tool-surface-diff-receipts/) for runtime MCP tool-surface diff receipts.
|
|
30
|
+
- [`../../examples/context-attention-receipts/`](../../examples/context-attention-receipts/) for retrieved-context attention receipts.
|
|
31
|
+
- [`../../examples/context-input-evidence/`](../../examples/context-input-evidence/) for ToolSearch propagation, pruning, and compaction transaction smokes.
|
|
23
32
|
|
|
24
33
|
```bash
|
|
34
|
+
node ../../examples/context-attention-receipts/check-attention-receipt.mjs \
|
|
35
|
+
../../examples/context-attention-receipts/attention-receipt-pass.json
|
|
25
36
|
node ../../examples/context-input-evidence/convert-subagent-toolsearch-propagation-log.mjs
|
|
26
37
|
node ../../examples/context-input-evidence/convert-pruning-log.mjs
|
|
27
38
|
node ../../examples/context-input-evidence/convert-compaction-transaction-log.mjs
|
|
@@ -45,6 +45,71 @@ Minimal JSONL event names:
|
|
|
45
45
|
{"event":"mcp.tool_call.completed","tool_id":"github.search_code","args_hash":"sha256:...","result_token_bucket":"2k_4k","raw_args_copied":false,"raw_result_copied":false,"status":"ok"}
|
|
46
46
|
```
|
|
47
47
|
|
|
48
|
+
## Runtime tool-surface diff smoke
|
|
49
|
+
|
|
50
|
+
For MCP dynamic discovery, gateways, admin/Purview-style audit trails, or runtime tool catalogs, separate discovery from activation:
|
|
51
|
+
|
|
52
|
+
- which platform/gateway/audit sink observed the runtime catalog change;
|
|
53
|
+
- which catalog/version/hash was active before and after discovery;
|
|
54
|
+
- which tools were discovered, activated, withheld, or blocked;
|
|
55
|
+
- which validation outcome applied, such as `accepted`, `blocked_by_rai`, `blocked_by_xpia`, `schema_invalid`, or `entitlement_filtered`;
|
|
56
|
+
- whether only low-cardinality ids, hashes, counts, and outcome codes entered the receipt;
|
|
57
|
+
- the audit gap, such as not proving the tool was semantically right for the user task.
|
|
58
|
+
|
|
59
|
+
Minimal JSON shape:
|
|
60
|
+
|
|
61
|
+
```json
|
|
62
|
+
{
|
|
63
|
+
"receipt_type": "pluribus.mcp_tool_surface_diff_receipt.v1",
|
|
64
|
+
"runtime_discovery": {
|
|
65
|
+
"trigger": "turn_start|admin_refresh|tool_search|manual_refresh",
|
|
66
|
+
"before_catalog_hash": "sha256:...",
|
|
67
|
+
"after_catalog_hash": "sha256:..."
|
|
68
|
+
},
|
|
69
|
+
"summary": {
|
|
70
|
+
"discovered_count": 3,
|
|
71
|
+
"activated_count": 1,
|
|
72
|
+
"withheld_count": 1,
|
|
73
|
+
"blocked_count": 1
|
|
74
|
+
},
|
|
75
|
+
"privacy": {
|
|
76
|
+
"raw_schemas_copied": false,
|
|
77
|
+
"raw_prompts_copied": false,
|
|
78
|
+
"raw_results_copied": false
|
|
79
|
+
},
|
|
80
|
+
"audit_gap": "proves tool-surface boundary, not semantic usefulness"
|
|
81
|
+
}
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Context attention smoke
|
|
85
|
+
|
|
86
|
+
For GraphRAG, memory, code search, transcript review, or baseline-first workflows, separate retrieval from attention:
|
|
87
|
+
|
|
88
|
+
- which required context ids were selected or retrieved;
|
|
89
|
+
- where they were delivered, such as prompt, tool result, memory result, subagent packet, or file read;
|
|
90
|
+
- which ids were acknowledged before planning;
|
|
91
|
+
- which ids were cited before edits/tool calls;
|
|
92
|
+
- what the agent must stop on if a required id is missing;
|
|
93
|
+
- whether raw docs, prompts, results, paths, customer text, and full transcript snippets stayed out of the receipt.
|
|
94
|
+
|
|
95
|
+
Minimal JSON shape:
|
|
96
|
+
|
|
97
|
+
```json
|
|
98
|
+
{
|
|
99
|
+
"receipt_type": "pluribus.context_attention_receipt.v1",
|
|
100
|
+
"required_context_ids": ["ctx:auth-boundary", "ctx:migration-plan"],
|
|
101
|
+
"delivered_context_ids": ["ctx:auth-boundary", "ctx:migration-plan"],
|
|
102
|
+
"acknowledged_before_plan_ids": ["ctx:auth-boundary", "ctx:migration-plan"],
|
|
103
|
+
"cited_before_edit_ids": ["ctx:auth-boundary"],
|
|
104
|
+
"missing_context_stop": "stop_before_edit",
|
|
105
|
+
"privacy": {
|
|
106
|
+
"raw_context_copied": false,
|
|
107
|
+
"raw_transcript_copied": false
|
|
108
|
+
},
|
|
109
|
+
"audit_gap": "proves required context was acknowledged/cited, not that the edit is correct"
|
|
110
|
+
}
|
|
111
|
+
```
|
|
112
|
+
|
|
48
113
|
## Skill / prompt context smoke
|
|
49
114
|
|
|
50
115
|
For skills, rules, AGENTS.md overlays, or instruction files, answer:
|
package/src/commands/demo.js
CHANGED
|
@@ -12,7 +12,8 @@ const SKILL_USE_RATE_DEMO = 'skill-use-rate'
|
|
|
12
12
|
const MCP_AUDIT_RECEIPT_DEMO = 'mcp-audit-receipt'
|
|
13
13
|
const MCP_TELEMETRY_IMPORT_DEMO = 'mcp-telemetry-import'
|
|
14
14
|
const TOOL_SURFACE_DIFF_DEMO = 'tool-surface-diff'
|
|
15
|
-
const
|
|
15
|
+
const CONTEXT_SUFFICIENCY_TRACE_DEMO = 'context-sufficiency-trace'
|
|
16
|
+
const AVAILABLE_DEMOS = [SKILL_USE_RATE_DEMO, MCP_AUDIT_RECEIPT_DEMO, MCP_TELEMETRY_IMPORT_DEMO, TOOL_SURFACE_DIFF_DEMO, CONTEXT_SUFFICIENCY_TRACE_DEMO]
|
|
16
17
|
const SKILL_USE_RATE_SCHEMA = 'pluribus.skill_use_rate_receipt.v1'
|
|
17
18
|
const MCP_AUDIT_RECEIPT_SCHEMA = 'pluribus.mcp_tool_call_audit_receipt.v1'
|
|
18
19
|
const TOOL_SURFACE_DIFF_SCHEMA = 'pluribus.mcp_tool_surface_diff_receipt.v1'
|
|
@@ -33,6 +34,8 @@ export async function runDemo(args, positional = []) {
|
|
|
33
34
|
return runMcpTelemetryImportDemo(args)
|
|
34
35
|
case TOOL_SURFACE_DIFF_DEMO:
|
|
35
36
|
return runToolSurfaceDiffDemo(args)
|
|
37
|
+
case CONTEXT_SUFFICIENCY_TRACE_DEMO:
|
|
38
|
+
return runContextSufficiencyTraceDemo(args)
|
|
36
39
|
default:
|
|
37
40
|
console.error(`โ Unknown demo: ${demoName}`)
|
|
38
41
|
console.error(` Available demos: ${AVAILABLE_DEMOS.join(', ')}`)
|
|
@@ -196,6 +199,18 @@ function bundledToolSurfaceDiffReceiptPath() {
|
|
|
196
199
|
return fileURLToPath(new URL('../../examples/tool-surface-diff-receipts/tool-surface-diff-receipt.json', import.meta.url))
|
|
197
200
|
}
|
|
198
201
|
|
|
202
|
+
function bundledContextSufficiencyGroundTruthPath() {
|
|
203
|
+
return fileURLToPath(new URL('../../examples/context-sufficiency-trace/ground-truth.json', import.meta.url))
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
function bundledContextSufficiencyTracePath() {
|
|
207
|
+
return fileURLToPath(new URL('../../examples/context-sufficiency-trace/context-trace.json', import.meta.url))
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function bundledContextSufficiencyPassTracePath() {
|
|
211
|
+
return fileURLToPath(new URL('../../examples/context-sufficiency-trace/context-trace-pass.json', import.meta.url))
|
|
212
|
+
}
|
|
213
|
+
|
|
199
214
|
function runToolSurfaceDiffDemo(args) {
|
|
200
215
|
const receiptPath = selectedReceiptPath(args, bundledToolSurfaceDiffReceiptPath())
|
|
201
216
|
const receipt = readReceipt(receiptPath, 'tool-surface diff')
|
|
@@ -230,6 +245,71 @@ function runToolSurfaceDiffDemo(args) {
|
|
|
230
245
|
if (result.errors.length > 0) process.exit(1)
|
|
231
246
|
}
|
|
232
247
|
|
|
248
|
+
function runContextSufficiencyTraceDemo(args) {
|
|
249
|
+
const truthPath = typeof args.receipt === 'string' && args.receipt.trim()
|
|
250
|
+
? path.resolve(process.cwd(), args.receipt)
|
|
251
|
+
: bundledContextSufficiencyGroundTruthPath()
|
|
252
|
+
const tracePath = typeof args.input === 'string' && args.input.trim()
|
|
253
|
+
? path.resolve(process.cwd(), args.input)
|
|
254
|
+
: (Boolean(args.pass) ? bundledContextSufficiencyPassTracePath() : bundledContextSufficiencyTracePath())
|
|
255
|
+
|
|
256
|
+
const truth = readReceipt(truthPath, 'context sufficiency ground-truth')
|
|
257
|
+
const trace = readReceipt(tracePath, 'context trace')
|
|
258
|
+
const result = validateContextSufficiencyTrace(truth, trace)
|
|
259
|
+
|
|
260
|
+
if (Boolean(args.json)) {
|
|
261
|
+
console.log(JSON.stringify({
|
|
262
|
+
ok: result.verdict === 'pass',
|
|
263
|
+
demo: CONTEXT_SUFFICIENCY_TRACE_DEMO,
|
|
264
|
+
groundTruth: path.relative(process.cwd(), truthPath) || truthPath,
|
|
265
|
+
trace: path.relative(process.cwd(), tracePath) || tracePath,
|
|
266
|
+
summary: result,
|
|
267
|
+
}, null, 2))
|
|
268
|
+
} else {
|
|
269
|
+
console.log('๐งช Pluribus demo: context sufficiency trace')
|
|
270
|
+
console.log(` Ground truth: ${path.relative(process.cwd(), truthPath) || truthPath}`)
|
|
271
|
+
console.log(` Trace: ${path.relative(process.cwd(), tracePath) || tracePath}`)
|
|
272
|
+
console.log('')
|
|
273
|
+
|
|
274
|
+
const mark = result.verdict === 'pass' ? 'โ
' : 'โ'
|
|
275
|
+
console.log(`${mark} context sufficiency ${result.verdict}: gold_context_recall=${result.gold_context_recall}, missed_required_file_rate=${result.missed_required_file_rate}, late_context_rate=${result.late_context_rate}`)
|
|
276
|
+
if (result.missed_required_files.length > 0) console.log(` โข missed_required_files: ${result.missed_required_files.join(', ')}`)
|
|
277
|
+
if (result.frontier_cut_misses.length > 0) console.log(` โข frontier_cut_misses: ${result.frontier_cut_misses.join(', ')}`)
|
|
278
|
+
console.log('')
|
|
279
|
+
console.log('Why this matters: context compression is only safe if the reduced bundle still contains the files/symbols the task ground truth requires before editing starts.')
|
|
280
|
+
console.log('Try your own trace: pluribus demo context-sufficiency-trace --receipt ground-truth.json --input context-trace.json --json')
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
if (result.verdict !== 'pass') process.exit(1)
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
export function validateContextSufficiencyTrace(truth, trace) {
|
|
287
|
+
const required = new Set(Array.isArray(truth.required_files) ? truth.required_files : [])
|
|
288
|
+
const returned = new Set((Array.isArray(trace.returned_files) ? trace.returned_files : []).map((file) => file.path).filter(Boolean))
|
|
289
|
+
const frontierCut = new Set((Array.isArray(trace.frontier_cut) ? trace.frontier_cut : []).map((file) => file.path).filter(Boolean))
|
|
290
|
+
const late = new Set((Array.isArray(trace.late_files) ? trace.late_files : []).map((file) => file.path).filter(Boolean))
|
|
291
|
+
|
|
292
|
+
const requiredList = [...required]
|
|
293
|
+
const returnedRequired = requiredList.filter((filePath) => returned.has(filePath))
|
|
294
|
+
const missedRequired = requiredList.filter((filePath) => !returned.has(filePath))
|
|
295
|
+
const frontierCutMisses = missedRequired.filter((filePath) => frontierCut.has(filePath))
|
|
296
|
+
const lateMisses = missedRequired.filter((filePath) => late.has(filePath))
|
|
297
|
+
|
|
298
|
+
const ratio = (count, total) => (total === 0 ? 0 : Number((count / total).toFixed(4)))
|
|
299
|
+
return {
|
|
300
|
+
task_id: truth.task_id || 'unknown-task',
|
|
301
|
+
trace_id: trace.trace_id || 'unknown-trace',
|
|
302
|
+
required_files: requiredList.length,
|
|
303
|
+
returned_files: returned.size,
|
|
304
|
+
gold_context_recall: ratio(returnedRequired.length, requiredList.length),
|
|
305
|
+
missed_required_file_rate: ratio(missedRequired.length, requiredList.length),
|
|
306
|
+
late_context_rate: ratio(lateMisses.length, requiredList.length),
|
|
307
|
+
missed_required_files: missedRequired,
|
|
308
|
+
frontier_cut_misses: frontierCutMisses,
|
|
309
|
+
verdict: missedRequired.length === 0 ? 'pass' : 'fail',
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
233
313
|
export function validateSkillUseRateReceipt(receipt) {
|
|
234
314
|
const errors = []
|
|
235
315
|
const warnings = []
|
package/src/utils/version.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export const VERSION = '0.3.
|
|
1
|
+
export const VERSION = '0.3.42'
|