@openwop/openwop-conformance 1.10.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +34 -0
  2. package/README.md +2 -2
  3. package/api/asyncapi.yaml +70 -0
  4. package/api/openapi.yaml +268 -1
  5. package/coverage.md +30 -2
  6. package/fixtures/oauth-providers/synthetic.json +38 -0
  7. package/fixtures.md +10 -0
  8. package/package.json +1 -1
  9. package/schemas/README.md +12 -0
  10. package/schemas/agent-deployment-transition.schema.json +49 -0
  11. package/schemas/agent-deployment.schema.json +54 -0
  12. package/schemas/agent-eval-suite.schema.json +140 -0
  13. package/schemas/agent-inventory-response.schema.json +25 -0
  14. package/schemas/agent-manifest.schema.json +5 -0
  15. package/schemas/agent-org-chart.schema.json +82 -0
  16. package/schemas/agent-ref.schema.json +12 -2
  17. package/schemas/agent-roster-entry.schema.json +81 -0
  18. package/schemas/agent-roster-response.schema.json +21 -0
  19. package/schemas/budget-policy.schema.json +18 -0
  20. package/schemas/capabilities.schema.json +277 -0
  21. package/schemas/credential-provenance.schema.json +18 -0
  22. package/schemas/eval-summary.schema.json +92 -0
  23. package/schemas/node-pack-manifest.schema.json +17 -0
  24. package/schemas/org-chart-responsibility-view.schema.json +26 -0
  25. package/schemas/run-event-payloads.schema.json +286 -3
  26. package/schemas/run-event.schema.json +19 -0
  27. package/schemas/tool-descriptor.schema.json +63 -0
  28. package/schemas/trigger-subscription.schema.json +26 -0
  29. package/src/lib/agentRoster.ts +76 -0
  30. package/src/lib/liveRuntime.ts +59 -0
  31. package/src/lib/profiles.ts +157 -0
  32. package/src/lib/runtimeRequires.ts +38 -0
  33. package/src/lib/safeFetch.ts +87 -0
  34. package/src/scenarios/agent-deployment-shape.test.ts +139 -0
  35. package/src/scenarios/agent-eval-suite-shape.test.ts +167 -0
  36. package/src/scenarios/agent-live-allowlist-enforced.test.ts +53 -0
  37. package/src/scenarios/agent-live-invocation-bracket.test.ts +98 -0
  38. package/src/scenarios/agent-live-runtime-shape.test.ts +98 -0
  39. package/src/scenarios/agent-live-structured-output.test.ts +58 -0
  40. package/src/scenarios/agent-org-chart-shape.test.ts +127 -0
  41. package/src/scenarios/agent-platform-profile.test.ts +158 -0
  42. package/src/scenarios/agent-roster-attribution.test.ts +179 -0
  43. package/src/scenarios/agent-roster-shape.test.ts +146 -0
  44. package/src/scenarios/budget-policy-shape.test.ts +136 -0
  45. package/src/scenarios/egress-provenance-shape.test.ts +137 -0
  46. package/src/scenarios/memory-capability-model-shape.test.ts +186 -0
  47. package/src/scenarios/oauth-authorization-code-roundtrip.test.ts +145 -0
  48. package/src/scenarios/runtime-requires-install-gate.test.ts +92 -0
  49. package/src/scenarios/runtime-requires-shape.test.ts +134 -0
  50. package/src/scenarios/safefetch-behavior.test.ts +99 -0
  51. package/src/scenarios/safefetch-live-audit.test.ts +175 -0
  52. package/src/scenarios/spec-corpus-validity.test.ts +19 -3
  53. package/src/scenarios/tool-descriptor-shape.test.ts +133 -0
  54. package/src/scenarios/trigger-bridge-shape.test.ts +135 -0
  55. package/src/scenarios/x-openwop-form-pack-manifest.test.ts +155 -0
@@ -0,0 +1,92 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openwop.dev/spec/v1/eval-summary.schema.json",
4
+ "title": "EvalSummary",
5
+ "description": "RFC 0081 §C. The terminal scorecard of an eval run (the `mode: \"eval\"` projection over `POST /v1/runs`, RFC 0081 §B): the aggregate + per-task scores, cost, latency, schema-validity, and safety findings, plus the suite provenance and (for `regression` mode) the score delta vs a baseline. Set as the eval run's output and served by `GET /v1/runs/{runId}/eval-summary`. SECURITY invariant `eval-summary-no-content-leak`: the summary carries scores, ids, counts, and redaction-safe safety descriptors only — NEVER task output bodies, rubric prose, model completions, prompts, or credential material (SR-1). A consumer reads the run's normal projection for any body.",
6
+ "type": "object",
7
+ "additionalProperties": false,
8
+ "required": ["suiteId", "suiteVersion", "aggregateScore", "passed", "taskCount", "passedCount", "tasks"],
9
+ "properties": {
10
+ "suiteId": {
11
+ "type": "string",
12
+ "pattern": "^[a-z0-9.-]+\\.evals\\.[a-z0-9-]+$",
13
+ "description": "The `agent-eval-suite.schema.json#suiteId` this summary scores."
14
+ },
15
+ "suiteVersion": {
16
+ "type": "string",
17
+ "pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$",
18
+ "description": "The pinned suite version the run executed (mirrors `eval.started.suiteVersion`)."
19
+ },
20
+ "evaluatedModelClass": {
21
+ "type": "string",
22
+ "enum": ["reasoning", "writing", "coding", "research", "classification", "general"],
23
+ "description": "MAY. The `AgentManifest.modelClass` (RFC 0002) the run was evaluated against, so a score is read against its model. Present when the host resolves a concrete class."
24
+ },
25
+ "aggregateScore": {
26
+ "type": "number",
27
+ "minimum": 0,
28
+ "maximum": 1,
29
+ "description": "The suite-level score (0.0–1.0): the aggregation (host-defined, typically the mean) of per-task scores."
30
+ },
31
+ "passed": {
32
+ "type": "boolean",
33
+ "description": "Whether the run cleared the suite's `thresholds` (RFC 0081 §A) — `aggregateScore >= passScore` AND, when declared, `totalCostUsd <= maxCostUsd` AND the p95 latency bar. The load-bearing flag an RFC 0082 deployment gate may require (`requiredEval`)."
34
+ },
35
+ "taskCount": {
36
+ "type": "integer",
37
+ "minimum": 0,
38
+ "description": "Number of tasks executed."
39
+ },
40
+ "passedCount": {
41
+ "type": "integer",
42
+ "minimum": 0,
43
+ "description": "Number of tasks that individually passed."
44
+ },
45
+ "totalCostUsd": {
46
+ "type": "number",
47
+ "minimum": 0,
48
+ "description": "MAY. Total cost of the run, summed from the per-task RFC 0026 `provider.usage` events (the scalar only — never a pricing breakdown or rate card; `eval-summary-no-content-leak`)."
49
+ },
50
+ "tasks": {
51
+ "type": "array",
52
+ "description": "Per-task results, in suite order. Each entry is content-free: scores, scalars, ids, and redaction-safe safety descriptors only.",
53
+ "items": {
54
+ "type": "object",
55
+ "additionalProperties": false,
56
+ "required": ["taskId", "score", "passed"],
57
+ "properties": {
58
+ "taskId": { "type": "string", "pattern": "^[a-z0-9][a-z0-9-]*$", "description": "The `agent-eval-suite` task id." },
59
+ "score": { "type": "number", "minimum": 0, "maximum": 1, "description": "Task score (0.0–1.0)." },
60
+ "passed": { "type": "boolean", "description": "Whether this task individually met its bar." },
61
+ "costUsd": { "type": "number", "minimum": 0, "description": "MAY. Task cost (scalar)." },
62
+ "latencyMs": { "type": "integer", "minimum": 0, "description": "MAY. Task wall-clock latency." },
63
+ "schemaValid": { "type": "boolean", "description": "MAY. Whether the task output validated against the agent's `handoff.returnSchemaRef` (when structured-output enforcement is in effect)." },
64
+ "safetyFindings": {
65
+ "type": "array",
66
+ "description": "MAY. Redaction-safe safety findings (primarily `adversarial` mode). Each is a `{kind, severity}` descriptor — NO excerpted content, prompt, or completion text (`eval-summary-no-content-leak`).",
67
+ "items": {
68
+ "type": "object",
69
+ "additionalProperties": false,
70
+ "required": ["kind", "severity"],
71
+ "properties": {
72
+ "kind": { "type": "string", "minLength": 1, "description": "Finding category (e.g. `jailbreak`, `pii-leak`, `unsafe-tool-call`) — a category label, not excerpted content." },
73
+ "severity": { "type": "string", "enum": ["low", "medium", "high", "critical"], "description": "Finding severity." }
74
+ }
75
+ }
76
+ }
77
+ }
78
+ }
79
+ },
80
+ "regression": {
81
+ "type": "object",
82
+ "additionalProperties": false,
83
+ "description": "MAY. Present for `regression` mode. The score delta vs a baseline eval run, plus a pointer to the RFC 0054 `:diff` for the structural divergence. Content-free.",
84
+ "required": ["baselineRunId", "scoreDelta"],
85
+ "properties": {
86
+ "baselineRunId": { "type": "string", "minLength": 1, "description": "The prior eval run this run is compared against." },
87
+ "scoreDelta": { "type": "number", "minimum": -1, "maximum": 1, "description": "`aggregateScore` minus the baseline's (negative ⇒ regression)." },
88
+ "diffRef": { "type": "string", "description": "MAY. A pointer to `GET /v1/runs/{runId}:diff?against={baselineRunId}` (RFC 0054) for the structural delta." }
89
+ }
90
+ }
91
+ }
92
+ }
@@ -366,6 +366,23 @@
366
366
  "minRuntimeVersion": {
367
367
  "type": "string",
368
368
  "description": "Minimum host runtime version (e.g., `node>=20`, `python>=3.10`, `go>=1.22`)."
369
+ },
370
+ "requires": {
371
+ "type": "array",
372
+ "uniqueItems": true,
373
+ "description": "RFC 0076. Abstract platform primitives the pack's runtime code exercises, for install-time sandbox gating. Runtime-agnostic (not language builtin names). Absent or [] ⇒ no elevated platform needs. A sandbox host MUST evaluate this at install time and refuse (`pack_runtime_requirement_unmet`) any primitive it will not grant; see node-packs.md §\"Runtime platform requirements\".",
374
+ "items": {
375
+ "oneOf": [
376
+ { "const": "net.dns", "description": "Resolves hostnames (e.g., SSRF pre-flight)." },
377
+ { "const": "net.outbound", "description": "Opens outbound network connections / fetch." },
378
+ { "const": "crypto", "description": "Primitives beyond the standard hashing the host already provides." },
379
+ { "const": "subprocess", "description": "Spawns a child process (composes with the RFC 0069 exec-class contract when the host advertises it)." },
380
+ { "const": "fs.read", "description": "Reads the local filesystem." },
381
+ { "const": "fs.write", "description": "Writes the local filesystem." },
382
+ { "const": "env.read", "description": "Reads the process environment (may expose deployment secrets if the host does not scrub it)." },
383
+ { "const": "clock", "description": "Reads wall-clock time as a behavioral input — gated for REPLAY determinism, not access control. A pack that branches on the clock is non-deterministic on replay (replay.md)." }
384
+ ]
385
+ }
369
386
  }
370
387
  },
371
388
  "additionalProperties": false
@@ -0,0 +1,26 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openwop.dev/spec/v1/org-chart-responsibility-view.schema.json",
4
+ "title": "OrgChartResponsibilityView",
5
+ "description": "RFC 0087 §D. Response body for `GET /v1/agents/org-chart/{departmentId}` — one department's subtree + the derived responsibility roll-up (the union of its members' RFC 0086 workflow portfolios, recursively through sub-departments unless `?recursive=false`). A read-only VIEW computed from live roster entries: it grants nothing (§B `org-position-no-authority-escalation`) and stores nothing. Tenant-scoped per RFC 0074.",
6
+ "type": "object",
7
+ "additionalProperties": false,
8
+ "required": ["department", "members", "responsibilities"],
9
+ "properties": {
10
+ "department": {
11
+ "$ref": "./agent-org-chart.schema.json#/$defs/Department",
12
+ "description": "The department this view is rooted at."
13
+ },
14
+ "members": {
15
+ "type": "array",
16
+ "items": { "$ref": "./agent-org-chart.schema.json#/$defs/Member" },
17
+ "description": "The department's members (and, by default, its sub-departments' members)."
18
+ },
19
+ "responsibilities": {
20
+ "type": "array",
21
+ "items": { "type": "string", "minLength": 1 },
22
+ "uniqueItems": true,
23
+ "description": "The union of the members' RFC 0086 `workflows[]` portfolios — 'what this department is collectively responsible for'. Deduped; descriptive only."
24
+ }
25
+ }
26
+ }
@@ -2,7 +2,7 @@
2
2
  "$schema": "https://json-schema.org/draft/2020-12/schema",
3
3
  "$id": "https://openwop.dev/spec/v1/run-event-payloads.schema.json",
4
4
  "title": "RunEventPayloads",
5
- "description": "Per-RunEventType payload schemas. The base RunEventDoc shape (run-event.schema.json) leaves `payload` permissive for forward-compat. This schema defines the canonical payload contract for each known RunEventType. Consumers MAY pin strict payload validation via `$defs.<typeId>` and `ajv.validate(schema.$defs[event.type], event.payload)`. Unknown event types MUST be tolerated (no $defs match → fold best-effort).\n\n75 variants from `run-event.schema.json#$defs.RunEventType` are covered, grouped into ~20 shape families with shared $defs. Naming convention: camelCase keys mirror dotted RunEventType names (e.g., `run.started` → `runStarted`).",
5
+ "description": "Per-RunEventType payload schemas. The base RunEventDoc shape (run-event.schema.json) leaves `payload` permissive for forward-compat. This schema defines the canonical payload contract for each known RunEventType. Consumers MAY pin strict payload validation via `$defs.<typeId>` and `ajv.validate(schema.$defs[event.type], event.payload)`. Unknown event types MUST be tolerated (no $defs match → fold best-effort).\n\n94 variants from `run-event.schema.json#$defs.RunEventType` are covered, grouped into ~20 shape families with shared $defs. Naming convention: camelCase keys mirror dotted RunEventType names (e.g., `run.started` → `runStarted`).",
6
6
  "type": "object",
7
7
  "$defs": {
8
8
  "_typeIndex": {
@@ -79,12 +79,31 @@
79
79
  "memory.written": { "$ref": "#/$defs/memoryWritten" },
80
80
  "agent.memory.consolidated": { "$ref": "#/$defs/agentMemoryConsolidated" },
81
81
  "commitment.fired": { "$ref": "#/$defs/commitmentFired" },
82
+ "agent.invocation.started": { "$ref": "#/$defs/agentInvocationStarted" },
83
+ "agent.invocation.completed":{ "$ref": "#/$defs/agentInvocationCompleted" },
82
84
  "workspace.updated": { "$ref": "#/$defs/workspaceUpdated" },
83
85
  "core.workflowChain.event": { "$ref": "#/$defs/coreWorkflowChainEvent" },
84
86
  "core.workflowChain.confidence-escalated": { "$ref": "#/$defs/coreWorkflowChainConfidenceEscalated" },
85
87
  "connector.authorized": { "$ref": "#/$defs/connectorAuthorized" },
86
88
  "connector.auth_expired": { "$ref": "#/$defs/connectorAuthExpired" },
87
- "authorization.decided": { "$ref": "#/$defs/authorizationDecided" }
89
+ "authorization.decided": { "$ref": "#/$defs/authorizationDecided" },
90
+ "eval.started": { "$ref": "#/$defs/evalStarted" },
91
+ "eval.scored": { "$ref": "#/$defs/evalScored" },
92
+ "eval.completed": { "$ref": "#/$defs/evalCompleted" },
93
+ "deployment.promoted": { "$ref": "#/$defs/deploymentPromoted" },
94
+ "deployment.rolled-back": { "$ref": "#/$defs/deploymentRolledBack" },
95
+ "deployment.canary.adjusted":{ "$ref": "#/$defs/deploymentCanaryAdjusted" },
96
+ "deployment.state.changed": { "$ref": "#/$defs/deploymentStateChanged" },
97
+ "roster.run.initiated": { "$ref": "#/$defs/rosterRunInitiated" },
98
+ "tool.session.opened": { "$ref": "#/$defs/toolSessionOpened" },
99
+ "tool.session.closed": { "$ref": "#/$defs/toolSessionClosed" },
100
+ "egress.decided": { "$ref": "#/$defs/egressDecided" },
101
+ "trigger.subscription.state.changed": { "$ref": "#/$defs/triggerSubscriptionStateChanged" },
102
+ "trigger.delivery.attempted": { "$ref": "#/$defs/triggerDeliveryAttempted" },
103
+ "budget.reserved": { "$ref": "#/$defs/budgetReserved" },
104
+ "budget.consumed": { "$ref": "#/$defs/budgetConsumed" },
105
+ "budget.threshold.crossed": { "$ref": "#/$defs/budgetThresholdCrossed" },
106
+ "budget.exhausted": { "$ref": "#/$defs/budgetExhausted" }
88
107
  }
89
108
  },
90
109
 
@@ -744,7 +763,7 @@
744
763
  "description": "Emitted when a CapabilityLimit is exceeded. Protocol-level limits use the engine kinds (clarificationRounds / schemaRounds / envelopesPerTurn / maxNodeExecutions). RFC 0008 §K WASM-runtime caps use the wasm-* kinds (memory ceiling, fuel exhaustion, execution-time wall-clock). RFC 0058 run-execution bounds use the run-scoped run-duration (wall-clock timeout; limit=resolvedMs, observed=elapsedMs) and loop-iterations (agent-loop ceiling; limit=resolvedMax, observed=iterationCount) kinds.",
745
764
  "required": ["kind", "limit", "observed"],
746
765
  "properties": {
747
- "kind": { "type": "string", "enum": ["clarification", "schema", "envelopes", "node-executions", "wasm-memory", "wasm-fuel", "wasm-execution-time", "run-duration", "loop-iterations"] },
766
+ "kind": { "type": "string", "enum": ["clarification", "schema", "envelopes", "node-executions", "wasm-memory", "wasm-fuel", "wasm-execution-time", "run-duration", "loop-iterations", "budget-tokens", "budget-cost", "budget-tool-calls", "budget-retries"] },
748
767
  "limit": { "type": "integer", "minimum": 0 },
749
768
  "observed": { "type": "integer", "minimum": 0 },
750
769
  "nodeId": { "type": "string" }
@@ -1360,6 +1379,40 @@
1360
1379
  "additionalProperties": true
1361
1380
  },
1362
1381
 
1382
+ "agentInvocationStarted": {
1383
+ "type": "object",
1384
+ "description": "RFC 0077. Emitted by a host advertising `capabilities.agents.liveRuntime.supported: true` as the FIRST agent-scoped event of a live manifest invocation, bracketing the existing `agent.*` family with `agent.invocation.completed`. Content-free: identifiers + selection metadata only — prompt/task content is served by the run's normal projection, never on this event. A recorded-fact event per `replay.md` §\"Recorded-fact events\": on replay it is re-emitted from the log and the host MUST NOT regenerate its `invocationId` (or any identifier). Distinct from the deterministic RFC 0070 sample floor.",
1385
+ "required": ["invocationId", "agentId", "source"],
1386
+ "properties": {
1387
+ "invocationId": { "type": "string", "minLength": 1, "description": "Host-defined id correlating this agent invocation within its run, UNIQUE-WITHIN-RUN (not a mandated global id-space). Distinct from `runId` — one run MAY dispatch several invocations (multiple agent nodes, or a handoff chain) — but a host MAY derive it from an existing per-node-execution receipt id (e.g. `runId:nodeId:seq`) or mint a UUID; a single-invocation run MAY reuse `runId`. Recorded-fact: re-read from the log on replay, never regenerated (`replay.md` §\"Recorded-fact events\"). Correlates to the matching `agent.invocation.completed`." },
1388
+ "agentId": { "type": "string", "minLength": 1, "description": "The manifest agentId being invoked (matches `AgentManifest.agentId`)." },
1389
+ "source": { "type": "string", "enum": ["workflow-node", "run-api", "chat-mention"], "description": "Which entry point launched the invocation. All sources emit this identical family." },
1390
+ "modelClass": { "type": "string", "description": "MAY — the manifest's abstract `modelClass`. Always populatable at start." },
1391
+ "resolvedModel": { "type": "string", "description": "MAY — the concrete model the host selected. OPTIONAL: modelClass→concrete resolution MAY happen downstream (with capability-gated fallback substitution), so a dispatch-time start event genuinely may not know it; a host MAY also omit for deployment-privacy." },
1392
+ "resolvedProvider": { "type": "string", "description": "MAY — the concrete provider the host routed to (aligns with `capabilities.aiProviders.supported` / RFC 0067 `authModes`). OPTIONAL, same rationale as `resolvedModel`." },
1393
+ "resolvedAgentVersion": { "type": "string", "description": "MAY — RFC 0082 §B. When this invocation's `AgentRef` bound a `channel` (rather than an exact `version`), the concrete agent-definition version the host pinned per-(run, agentId, channel) at first resolution. A RECORDED FACT: re-read from the log on replay and NEVER re-resolved against a moved channel (the whole event is recorded-fact). Absent when the ref used an exact `version` or host-default resolution." },
1394
+ "resolvedChannel": { "type": "string", "description": "MAY — RFC 0082 §B. The named `channel` the `AgentRef` bound (mirrors `AgentRef.channel`), for which `resolvedAgentVersion` is the pinned resolution. Content-free label; present only when the ref bound a channel." },
1395
+ "toolSurfaceCount": { "type": "integer", "minimum": 0, "description": "MAY — number of tools in the constructed surface after `toolAllowlist` filtering. Content-free count, not the tool ids." },
1396
+ "memoryBound": { "type": "boolean", "description": "MAY — whether a long-term memory backend was bound for the invocation." }
1397
+ },
1398
+ "additionalProperties": true
1399
+ },
1400
+
1401
+ "agentInvocationCompleted": {
1402
+ "type": "object",
1403
+ "description": "RFC 0077. Emitted by a host advertising `capabilities.agents.liveRuntime.supported: true` as the LAST agent-scoped event of a live manifest invocation (after the terminal `agent.decided`). Content-free: identifiers + outcome metadata only — the result body is served by the run's normal projection. A recorded-fact event per `replay.md` §\"Recorded-fact events\": re-read from the log on replay, never regenerated.",
1404
+ "required": ["invocationId", "agentId", "outcome"],
1405
+ "properties": {
1406
+ "invocationId": { "type": "string", "minLength": 1, "description": "Correlates to the `agent.invocation.started` `invocationId`. Recorded-fact: never regenerated on replay." },
1407
+ "agentId": { "type": "string", "minLength": 1, "description": "The manifest agentId." },
1408
+ "outcome": { "type": "string", "enum": ["completed", "handed-off", "escalated", "refused", "failed"], "description": "Terminal disposition. `completed`: produced a (schema-valid, if enforced) terminal result. `handed-off`: control passed to another agent (`agent.handoff`). `escalated`: confidence escalation fired (RFC 0002 §F). `refused`: the model refused (RFC 0032 refusal envelope) — never a silent substitution. `failed`: an error terminated the invocation." },
1409
+ "schemaValidated": { "type": "boolean", "description": "MAY — whether the terminal result was validated against `handoff.returnSchemaRef` (true only when `liveRuntime.structuredOutput` is advertised and a `returnSchemaRef` exists)." },
1410
+ "confidence": { "type": "number", "minimum": 0, "maximum": 1, "description": "MAY — the terminal `agent.decided` confidence, mirrored for convenience. Content-free scalar." },
1411
+ "enqueuedRunId": { "type": "string", "minLength": 1, "description": "MAY — id of a follow-on run the invocation enqueued (e.g. a sub-run), when applicable." }
1412
+ },
1413
+ "additionalProperties": true
1414
+ },
1415
+
1363
1416
  "workspaceUpdated": {
1364
1417
  "description": "RFC 0059. Emitted by a host advertising `capabilities.workspace.supported: true` on each successful `PUT`/`DELETE` of a workspace file, attributing the change to the file `path` + resulting `version`. Content-free: carries the path + version only — the file body is served by the read-side (`GET /v1/host/workspace/files/{path}`), already SR-1-redacted (RFC 0059 §E WSR-1). On replay the event is re-read from the log, never regenerated. MUST NOT be emitted unless `capabilities.workspace.supported: true`. The proposed SECURITY invariant `workspace-cross-tenant-isolation` (WCT-1) applies to the read-side that resolves these paths.",
1365
1418
  "type": "object",
@@ -1369,6 +1422,236 @@
1369
1422
  "path": { "type": "string", "minLength": 1, "description": "Workspace-relative path of the file that changed (matches `workspace-file.schema.json#path`)." },
1370
1423
  "version": { "type": "integer", "minimum": 1, "description": "The file's resulting monotonic version after the write. On delete, the tombstone version when `versioned: true`." }
1371
1424
  }
1425
+ },
1426
+
1427
+ "evalStarted": {
1428
+ "type": "object",
1429
+ "description": "RFC 0081 §C. Emitted ONCE at the start of an eval run (a `mode: \"eval\"` run, RFC 0081 §B) by a host advertising `capabilities.agents.evalSuite.supported: true`. Content-free: suite provenance + counts only. A recorded-fact event per `replay.md` §\"Recorded-fact events\".",
1430
+ "required": ["suiteId", "suiteVersion", "taskCount", "modes"],
1431
+ "properties": {
1432
+ "suiteId": { "type": "string", "minLength": 1, "description": "The `agent-eval-suite.schema.json#suiteId` being run." },
1433
+ "suiteVersion": { "type": "string", "minLength": 1, "description": "The pinned suite SemVer." },
1434
+ "taskCount": { "type": "integer", "minimum": 0, "description": "Number of tasks the run will execute." },
1435
+ "modes": { "type": "array", "uniqueItems": true, "items": { "type": "string", "enum": ["golden", "rubric", "adversarial", "regression", "live-shadow"] }, "description": "The eval modes this run exercises (RFC 0081 §D)." },
1436
+ "baselineRunId": { "type": "string", "minLength": 1, "description": "MAY — for `regression` mode, the prior eval run this run is scored against." }
1437
+ },
1438
+ "additionalProperties": true
1439
+ },
1440
+
1441
+ "evalScored": {
1442
+ "type": "object",
1443
+ "description": "RFC 0081 §C. Emitted ONCE PER TASK, after that task's terminal `agent.decided`, so a streaming consumer sees results land incrementally. Content-free: the task id + scalars + counts ONLY — NEVER the task output, rubric prose, or model completion (SECURITY invariant `eval-summary-no-content-leak`). A recorded-fact event.",
1444
+ "required": ["taskId", "score", "passed"],
1445
+ "properties": {
1446
+ "taskId": { "type": "string", "minLength": 1, "description": "The `agent-eval-suite` task id." },
1447
+ "score": { "type": "number", "minimum": 0, "maximum": 1, "description": "Task score (0.0–1.0)." },
1448
+ "passed": { "type": "boolean", "description": "Whether the task met its bar." },
1449
+ "costUsd": { "type": "number", "minimum": 0, "description": "MAY — task cost (scalar; summed from RFC 0026 `provider.usage`). NEVER a pricing breakdown." },
1450
+ "latencyMs": { "type": "integer", "minimum": 0, "description": "MAY — task wall-clock latency." },
1451
+ "schemaValid": { "type": "boolean", "description": "MAY — whether the output validated against `handoff.returnSchemaRef`." },
1452
+ "safetyFindingCount": { "type": "integer", "minimum": 0, "description": "MAY — count of safety findings for the task (the redaction-safe descriptors live on `EvalSummary`; the event carries the count only)." }
1453
+ },
1454
+ "additionalProperties": true
1455
+ },
1456
+
1457
+ "evalCompleted": {
1458
+ "type": "object",
1459
+ "description": "RFC 0081 §C. Emitted ONCE, after all tasks and before `run.completed`. Content-free: aggregate scalars only. A recorded-fact event. The full scorecard is the run's output (`eval-summary.schema.json`), served by `GET /v1/runs/{runId}/eval-summary`.",
1460
+ "required": ["aggregateScore", "passed", "taskCount", "passedCount"],
1461
+ "properties": {
1462
+ "aggregateScore": { "type": "number", "minimum": 0, "maximum": 1, "description": "The suite-level score (0.0–1.0)." },
1463
+ "passed": { "type": "boolean", "description": "Whether the run cleared the suite thresholds — the flag an RFC 0082 deployment gate MAY require." },
1464
+ "taskCount": { "type": "integer", "minimum": 0, "description": "Tasks executed." },
1465
+ "passedCount": { "type": "integer", "minimum": 0, "description": "Tasks that individually passed." },
1466
+ "regressionVsBaseline": { "type": "number", "minimum": -1, "maximum": 1, "description": "MAY — for `regression` mode, `aggregateScore` minus the baseline's (negative ⇒ regression)." }
1467
+ },
1468
+ "additionalProperties": true
1469
+ },
1470
+ "toolSessionOpened": {
1471
+ "type": "object",
1472
+ "description": "RFC 0078 §D. Emitted when the host opens a multi-step tool session, bracketing one or more RFC 0064 `agent.toolCalled`/`agent.toolReturned` call events. Content-free: identifiers only — never tool arguments, results, or credential material (SR-1). Emitted only when `capabilities.toolCatalog.sessionLifecycle: true`.",
1473
+ "required": ["sessionId", "toolId"],
1474
+ "properties": {
1475
+ "sessionId": { "type": "string", "minLength": 1, "description": "Host-unique id correlating the opened/closed pair + the bracketed call events." },
1476
+ "toolId": { "type": "string", "minLength": 1, "description": "The `ToolDescriptor.toolId` the session is for (`<scope>:<tool-id>`)." }
1477
+ },
1478
+ "additionalProperties": true
1479
+ },
1480
+ "toolSessionClosed": {
1481
+ "type": "object",
1482
+ "description": "RFC 0078 §D. Emitted when the host closes a multi-step tool session opened by `tool.session.opened`. Content-free: identifiers + a closed-enum outcome only — never tool arguments, results, or credential material (SR-1).",
1483
+ "required": ["sessionId", "toolId", "outcome"],
1484
+ "properties": {
1485
+ "sessionId": { "type": "string", "minLength": 1, "description": "Matches the `tool.session.opened` `sessionId`." },
1486
+ "toolId": { "type": "string", "minLength": 1, "description": "The `ToolDescriptor.toolId` the session was for." },
1487
+ "outcome": { "type": "string", "enum": ["completed", "failed", "cancelled"], "description": "Terminal disposition of the session. Content-free — no failure detail/result on the payload." }
1488
+ },
1489
+ "additionalProperties": true
1490
+ },
1491
+ "egressDecided": {
1492
+ "type": "object",
1493
+ "description": "RFC 0079 §B. Emitted when a host evaluates a credentialed egress (via `ctx.http.safeFetch` / a tool) against credential provenance + the SSRF guard. Content-free: identifiers + decision only — no credential value, no request/response body (SR-1). On replay re-read from the log, never regenerated (the decision is a recorded fact). Emitted only when `capabilities.httpClient.egressPolicy.supported: true`.",
1494
+ "required": ["decision", "destination"],
1495
+ "properties": {
1496
+ "decision": { "type": "string", "enum": ["allowed", "denied", "downgraded", "approval-required"], "description": "`allowed`: egress proceeds with the credential; `denied`: blocked (out-of-audience / expired / SSRF / unevaluable provenance — fail-closed); `downgraded`: proceeds WITHOUT the credential (anonymous egress, when host policy permits); `approval-required`: suspended pending an RFC 0051 approval interrupt." },
1497
+ "destination": { "type": "string", "minLength": 1, "description": "The egress destination **host/authority ONLY** (`api.stripe.com`) — NOT a path, query, or full URL (SR-1: a path/query can carry secrets; the host MUST strip them for this canonical content-free event). A host whose internal audit retains the path keeps that on its vendor `x-host-*` variant, never here." },
1498
+ "credentialId": { "type": "string", "minLength": 1, "description": "MAY — the provenance `credentialId` considered (absent for an anonymous / denied-pre-credential egress)." },
1499
+ "reason": { "type": "string", "enum": ["ok", "out-of-audience", "expired", "ssrf-blocked", "provenance-unevaluable", "scope-denied", "policy-denied"], "description": "MAY — a machine-stable reason code (a CLOSED enum — a free-form reason is a leak vector that would let a host spill the blocked URL/host/header into it, defeating `egress-decision-no-secret-leak`). The load-bearing field is `decision`." },
1500
+ "auditCorrelationId": { "type": "string", "minLength": 1, "description": "MAY — correlates to the provenance descriptor + host audit log." }
1501
+ },
1502
+ "additionalProperties": true
1503
+ },
1504
+ "triggerSubscriptionStateChanged": {
1505
+ "type": "object",
1506
+ "description": "RFC 0083 §C. Emitted when a trigger subscription changes state (the §B four-state machine). Content-free: identifiers + states only — no inbound payload, headers, or credential material (SR-1). Emitted only when `capabilities.triggerBridge.supported: true`.",
1507
+ "required": ["subscriptionId", "source", "fromState", "toState"],
1508
+ "properties": {
1509
+ "subscriptionId": { "type": "string", "minLength": 1, "description": "The TriggerSubscription this state change is for." },
1510
+ "source": { "type": "string", "enum": ["webhook", "schedule", "queue", "email", "form"], "description": "The subscription source." },
1511
+ "fromState": { "type": "string", "enum": ["active", "paused", "failed", "dead-lettered"], "description": "Prior state." },
1512
+ "toState": { "type": "string", "enum": ["active", "paused", "failed", "dead-lettered"], "description": "New state." },
1513
+ "reason": { "type": "string", "enum": ["retry-exhausted", "operator-paused", "signature-invalid", "backpressure", "source-removed", "provenance-unevaluable"], "description": "MAY — a machine-stable reason code (a CLOSED enum — a free-form reason is a leak vector that would let a host spill an inbound URL/host/header into it). The load-bearing field is `toState`." }
1514
+ },
1515
+ "additionalProperties": true
1516
+ },
1517
+ "triggerDeliveryAttempted": {
1518
+ "type": "object",
1519
+ "description": "RFC 0083 §C. Emitted on each inbound-delivery attempt against an active subscription. Content-free: the subscription id, the dedup key, the attempt counter, the outcome, and the resulting runId only — NEVER the inbound body, headers, or credential material (SR-1).",
1520
+ "required": ["subscriptionId", "dedupKey", "attempt", "outcome"],
1521
+ "properties": {
1522
+ "subscriptionId": { "type": "string", "minLength": 1, "description": "The TriggerSubscription the delivery is for." },
1523
+ "dedupKey": { "type": "string", "minLength": 1, "description": "The de-duplication key (§C-1). MUST be a **host-opaque** stable key (e.g. `hash(subscriptionId + inbound-event-id)`); it MUST NOT embed inbound body / path / header content in cleartext (SR-1 — a key like `POST /webhook/orders/12345?token=…` would leak). A repeat within retention is a no-op returning the prior runId." },
1524
+ "attempt": { "type": "integer", "minimum": 1, "description": "1-based attempt counter." },
1525
+ "outcome": { "type": "string", "enum": ["delivered", "retrying", "dead-lettered"], "description": "`delivered`: the run started; `retrying`: failed, will retry per policy; `dead-lettered`: retries exhausted, routed to the RFC 0053 sink (no run)." },
1526
+ "runId": { "type": "string", "minLength": 1, "description": "MAY — the run started by a `delivered` outcome (the run's `run.started` carries this delivery's id as `causationId`, RFC 0040). Absent for `retrying` / `dead-lettered`." }
1527
+ },
1528
+ "additionalProperties": true
1529
+ },
1530
+ "budgetReserved": {
1531
+ "type": "object",
1532
+ "description": "RFC 0084 §C. Emitted once at run start with the resolved effective budget (`min` across run/workflow/agent/project scopes, clamped to the host ceiling). A recorded fact — replay re-reads it, never re-resolves (§B). Content-free: dimension ceilings + scope only, no pricing breakdown (SR-1, `budget-no-pricing-leak`).",
1533
+ "required": ["effectiveBudget", "scope"],
1534
+ "properties": {
1535
+ "effectiveBudget": {
1536
+ "type": "object",
1537
+ "additionalProperties": false,
1538
+ "description": "The resolved per-dimension ceilings (a subset of the BudgetPolicy dimensions).",
1539
+ "properties": {
1540
+ "maxTokens": { "type": "integer", "minimum": 0 },
1541
+ "maxCostUsd": { "type": "number", "minimum": 0 },
1542
+ "maxToolCalls": { "type": "integer", "minimum": 0 },
1543
+ "maxRetries": { "type": "integer", "minimum": 0 }
1544
+ }
1545
+ },
1546
+ "scope": { "type": "string", "enum": ["run", "workflow", "agent", "project"], "description": "The binding scope the effective budget resolved from (§B)." }
1547
+ },
1548
+ "additionalProperties": false
1549
+ },
1550
+ "budgetConsumed": {
1551
+ "type": "object",
1552
+ "description": "RFC 0084 §C. A running projection of spend, derived from the existing events (`provider.usage` tokens/cost, `agent.toolCalled`, `node.retried`) — NOT a new measurement (no double-counting). The host MAY coalesce. Content-free: dimension name + integers only, no pricing breakdown / rate card (SR-1).",
1553
+ "required": ["dimension", "consumed", "limit"],
1554
+ "properties": {
1555
+ "dimension": { "type": "string", "enum": ["tokens", "cost", "toolCalls", "retries"], "description": "Which budget dimension this projection is for." },
1556
+ "consumed": { "type": "number", "minimum": 0, "description": "Amount consumed so far (tokens/calls/retries are integers; cost is a number in the host's `costEstimateUsd` units)." },
1557
+ "limit": { "type": "number", "minimum": 0, "description": "The effective ceiling for the dimension." },
1558
+ "remaining": { "type": "number", "description": "MAY — `limit - consumed`." }
1559
+ },
1560
+ "additionalProperties": false
1561
+ },
1562
+ "budgetThresholdCrossed": {
1563
+ "type": "object",
1564
+ "description": "RFC 0084 §C. Emitted once per dimension when consumption crosses `thresholdPercent`. Content-free.",
1565
+ "required": ["dimension", "consumed", "limit", "percent"],
1566
+ "properties": {
1567
+ "dimension": { "type": "string", "enum": ["tokens", "cost", "toolCalls", "retries"] },
1568
+ "consumed": { "type": "number", "minimum": 0 },
1569
+ "limit": { "type": "number", "minimum": 0 },
1570
+ "percent": { "type": "number", "minimum": 0, "maximum": 100, "description": "The percent-of-limit threshold crossed." }
1571
+ },
1572
+ "additionalProperties": false
1573
+ },
1574
+ "budgetExhausted": {
1575
+ "type": "object",
1576
+ "description": "RFC 0084 §C. Emitted when a dimension hits its limit. For a hard dimension this is followed by `cap.breached{kind:\"budget-*\"}` → `run.failed{budget_exhausted}` (or, with `onExhaustion:\"interrupt\"`, an approval interrupt). Content-free.",
1577
+ "required": ["dimension", "consumed", "limit"],
1578
+ "properties": {
1579
+ "dimension": { "type": "string", "enum": ["tokens", "cost", "toolCalls", "retries"] },
1580
+ "consumed": { "type": "number", "minimum": 0 },
1581
+ "limit": { "type": "number", "minimum": 0 }
1582
+ },
1583
+ "additionalProperties": false
1584
+ },
1585
+
1586
+ "deploymentPromoted": {
1587
+ "type": "object",
1588
+ "description": "RFC 0082 §D. Emitted on the deployment-management run when a version is promoted into a new lifecycle state (the §E contract: authorized via RFC 0049 `deploy:*`, gated via RFC 0051 `approvalGate`, eval-verified via RFC 0081 when `requiredEval` is configured). Content-free: ids / state / scalars / content-free references only — NEVER a manifest body, prompt, or credential (SECURITY invariant `deployment-event-no-content-leak`; `additionalProperties:false` enforces it). A recorded-fact event per `replay.md` §\"Recorded-fact events\". Audit-logged with the acting principal (`auth.md`).",
1589
+ "additionalProperties": false,
1590
+ "required": ["agentId", "toVersion", "toState"],
1591
+ "properties": {
1592
+ "agentId": { "type": "string", "minLength": 1, "description": "The promoted agent's id." },
1593
+ "fromVersion": { "type": "string", "minLength": 1, "description": "MAY — the version previously occupying the target channel/state (absent on a first promotion)." },
1594
+ "toVersion": { "type": "string", "minLength": 1, "description": "The version promoted." },
1595
+ "toState": { "type": "string", "enum": ["draft", "test", "staged", "active", "paused", "deprecated", "rolled-back"], "description": "The lifecycle state entered." },
1596
+ "channel": { "type": "string", "minLength": 1, "description": "MAY — the named channel this promotion targets (when promoting to a channel)." },
1597
+ "canaryPercent": { "type": "integer", "minimum": 0, "maximum": 100, "description": "MAY — the canary traffic share assigned (when promoting `active` at < 100)." },
1598
+ "evalRunId": { "type": "string", "minLength": 1, "description": "MAY — the RFC 0081 eval run whose `EvalSummary.passed` gated this promotion (the §E evidence)." },
1599
+ "approvalGateId": { "type": "string", "minLength": 1, "description": "MAY — the RFC 0051 approvalGate that authorized this promotion." }
1600
+ }
1601
+ },
1602
+
1603
+ "deploymentRolledBack": {
1604
+ "type": "object",
1605
+ "description": "RFC 0082 §D. Emitted when an `active` version is rolled back and a prior version restored to `active`. Content-free (`deployment-event-no-content-leak`). Recorded-fact; audit-logged.",
1606
+ "additionalProperties": false,
1607
+ "required": ["agentId", "fromVersion", "toVersion", "rollbackPointer"],
1608
+ "properties": {
1609
+ "agentId": { "type": "string", "minLength": 1, "description": "The agent's id." },
1610
+ "fromVersion": { "type": "string", "minLength": 1, "description": "The version that was rolled back (left `active`)." },
1611
+ "toVersion": { "type": "string", "minLength": 1, "description": "The version restored to `active`." },
1612
+ "rollbackPointer": { "type": "string", "minLength": 1, "description": "The recovery target the rolled-back record points to (= `toVersion`)." },
1613
+ "reason": { "type": "string", "minLength": 1, "description": "MAY — a short redaction-safe rollback reason label (NOT free-text content; same posture as `run.dead_lettered.reason`)." }
1614
+ }
1615
+ },
1616
+
1617
+ "deploymentCanaryAdjusted": {
1618
+ "type": "object",
1619
+ "description": "RFC 0082 §D. Emitted when an `active` version's canary traffic share changes. Content-free (`deployment-event-no-content-leak`). Recorded-fact; audit-logged.",
1620
+ "additionalProperties": false,
1621
+ "required": ["agentId", "version", "fromPercent", "toPercent"],
1622
+ "properties": {
1623
+ "agentId": { "type": "string", "minLength": 1, "description": "The agent's id." },
1624
+ "version": { "type": "string", "minLength": 1, "description": "The version whose canary share changed." },
1625
+ "fromPercent": { "type": "integer", "minimum": 0, "maximum": 100, "description": "The prior canary percent." },
1626
+ "toPercent": { "type": "integer", "minimum": 0, "maximum": 100, "description": "The new canary percent." }
1627
+ }
1628
+ },
1629
+
1630
+ "deploymentStateChanged": {
1631
+ "type": "object",
1632
+ "description": "RFC 0082 §D. Emitted on any non-promotion lifecycle transition (pause / resume / deprecate). Content-free (`deployment-event-no-content-leak`). Recorded-fact; audit-logged.",
1633
+ "additionalProperties": false,
1634
+ "required": ["agentId", "version", "fromState", "toState"],
1635
+ "properties": {
1636
+ "agentId": { "type": "string", "minLength": 1, "description": "The agent's id." },
1637
+ "version": { "type": "string", "minLength": 1, "description": "The version whose state changed." },
1638
+ "fromState": { "type": "string", "enum": ["draft", "test", "staged", "active", "paused", "deprecated", "rolled-back"], "description": "The prior state." },
1639
+ "toState": { "type": "string", "enum": ["draft", "test", "staged", "active", "paused", "deprecated", "rolled-back"], "description": "The new state." }
1640
+ }
1641
+ },
1642
+ "rosterRunInitiated": {
1643
+ "type": "object",
1644
+ "description": "RFC 0086 §C. Emitted once, immediately after `run.started` and before any agent invocation, when a trigger (RFC 0052 schedule / RFC 0083 durable work item) fires a workflow in a roster member's portfolio — attributing the run to the standing agent. Content-free (`roster-attribution-no-content`): ids + persona + trigger source ONLY — never the work-item body, prompt, or credential material (SR-1). A recorded-fact event (replay.md §'Recorded-fact events'): re-emitted from the log on replay, identifiers never regenerated, so the run stays attributed to the same member even if the entry was since renamed or deleted.",
1645
+ "additionalProperties": false,
1646
+ "required": ["rosterId", "persona", "agentId", "workflowId", "triggerSource"],
1647
+ "properties": {
1648
+ "rosterId": { "type": "string", "minLength": 1, "description": "The standing agent instance the run is attributed to (a `host:<id>` AgentRef agentId)." },
1649
+ "persona": { "type": "string", "minLength": 1, "description": "The member's human display name (e.g. \"Sally\")." },
1650
+ "agentId": { "type": "string", "minLength": 1, "description": "The manifest agentId the member instantiates (`agentRef.agentId`)." },
1651
+ "workflowId": { "type": "string", "minLength": 1, "description": "The portfolio workflow this run executes." },
1652
+ "triggerSource": { "type": "string", "minLength": 1, "description": "What fired the run: an RFC 0083 source (`schedule`/`webhook`/`queue`/`email`/`form`) or a host-extension source name (e.g. a vendor Kanban bridge)." },
1653
+ "triggerSubscriptionId": { "type": "string", "minLength": 1, "description": "MAY. The RFC 0083 trigger-subscription id when the fire came through the durable trigger bridge, so trigger → run → roster is traceable via /ancestry (RFC 0040)." }
1654
+ }
1372
1655
  }
1373
1656
  }
1374
1657
  }
@@ -123,6 +123,25 @@
123
123
  "agent.toolReturned",
124
124
  "agent.handoff",
125
125
  "agent.decided",
126
+ "agent.invocation.started",
127
+ "agent.invocation.completed",
128
+ "eval.started",
129
+ "eval.scored",
130
+ "eval.completed",
131
+ "deployment.promoted",
132
+ "deployment.rolled-back",
133
+ "deployment.canary.adjusted",
134
+ "deployment.state.changed",
135
+ "roster.run.initiated",
136
+ "tool.session.opened",
137
+ "tool.session.closed",
138
+ "egress.decided",
139
+ "trigger.subscription.state.changed",
140
+ "trigger.delivery.attempted",
141
+ "budget.reserved",
142
+ "budget.consumed",
143
+ "budget.threshold.crossed",
144
+ "budget.exhausted",
126
145
  "runOrchestrator.decided",
127
146
  "node.dispatched",
128
147
  "conversation.opened",
@@ -0,0 +1,63 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openwop.dev/spec/v1/tool-descriptor.schema.json",
4
+ "title": "ToolDescriptor",
5
+ "description": "RFC 0078. A portable, read-only description of one tool, unifying the five openwop tool surfaces (node-pack / workflow / MCP / connector / host-extension) behind one stable shape. Returned by `GET /v1/tools` + `GET /v1/tools/{toolId}` when the host advertises `capabilities.toolCatalog.supported: true`. The descriptor DESCRIBES a tool; it never carries credential material (SR-1) and is not an invocation path (tools are invoked on their existing surfaces).",
6
+ "type": "object",
7
+ "additionalProperties": false,
8
+ "required": ["toolId", "source", "safetyTier"],
9
+ "properties": {
10
+ "toolId": {
11
+ "type": "string",
12
+ "minLength": 1,
13
+ "description": "Stable, host-unique tool identifier in the `<scope>:<tool-id>` form RFC 0077 `toolAllowlist` references (`openwop:` core, `mcp:` MCP-namespaced, `connector:` connector, `<vendor>.<host>` / `x-host-<vendor>-*` host-extension). The keyspace SPANS sources so a single allowlist entry resolves to exactly one descriptor; the `<scope>` prefix disambiguates by construction (RFC 0078 §UQ1). MUST be stable across catalog reads for a given host version so an agent's `toolAllowlist` keeps resolving."
14
+ },
15
+ "source": {
16
+ "type": "string",
17
+ "enum": ["node-pack", "workflow", "mcp", "connector", "host-extension"],
18
+ "description": "Which surface backs the tool. `node-pack`: a pack typeId (RFC 0003); `workflow`: a workflow-as-tool (`core.subWorkflow` / RFC 0013); `mcp`: an MCP server tool (`host.mcp`); `connector`: an RFC 0045 connector action; `host-extension`: an `x-host-<vendor>-*` scope (RFC 0069)."
19
+ },
20
+ "title": { "type": "string", "description": "MAY — human-readable name for UI." },
21
+ "description": { "type": "string", "description": "MAY — one-line summary for a tool picker." },
22
+ "inputSchema": { "type": "object", "description": "MAY — JSON Schema (2020-12) for the tool's arguments. Absent ⇒ opaque/host-interpreted args. RECOMMENDED (not required) to be the RFC 0030 Tier-1 universal subset for tools that feed an LLM tool-call (RFC 0078 §UQ2)." },
23
+ "outputSchema": { "type": "object", "description": "MAY — JSON Schema for the tool's result." },
24
+ "auth": {
25
+ "type": "object",
26
+ "additionalProperties": false,
27
+ "description": "MAY — what the caller must supply/hold to invoke. Composes RFC 0049 (scopes) + RFC 0046 (credentials).",
28
+ "properties": {
29
+ "scopes": { "type": "array", "items": { "type": "string" }, "uniqueItems": true, "description": "RFC 0049 scopes the principal MUST hold; per-tool authorization fails closed (RFC 0064 §C)." },
30
+ "credentialRef": { "type": "boolean", "description": "true ⇒ the tool needs a host-stored credential reference (RFC 0046); the catalog NEVER carries credential material (SR-1)." }
31
+ }
32
+ },
33
+ "egress": {
34
+ "type": "string",
35
+ "enum": ["none", "safe-fetch", "host-mediated", "host-owned"],
36
+ "description": "MAY — outbound-network posture. `none`: no egress; `safe-fetch`: via the host's RFC 0076 §B `ctx.http.safeFetch` (SSRF-guarded); `host-mediated`: host-proxied non-safeFetch; `host-owned`: the host owns the egress story (e.g. a host-extension)."
37
+ },
38
+ "approval": {
39
+ "type": "string",
40
+ "enum": ["never", "conditional", "always"],
41
+ "description": "MAY — whether invocation requires an RFC 0051 approval interrupt. `conditional` ⇒ host-policy (e.g. over a cost/scope threshold)."
42
+ },
43
+ "replayPolicy": {
44
+ "type": "string",
45
+ "enum": ["deterministic", "idempotent", "non-deterministic"],
46
+ "description": "MAY — replay posture (`replay.md`). `deterministic`: pure/replay-safe; `idempotent`: safe under the Layer-2 idempotency key (`idempotency.md`); `non-deterministic`: the host MUST cache the observable result (RFC 0041 §C) so replay reproduces the sequence."
47
+ },
48
+ "safetyTier": {
49
+ "type": "string",
50
+ "enum": ["pure", "read", "write", "exec"],
51
+ "description": "REQUIRED. The tool's DATA-EFFECT classification: `pure`: no external side effects; `read`: reads external state; `write`: mutates external state; `exec`: arbitrary-command/`exec`-class — per RFC 0069 this MUST have `source: \"host-extension\"` (exec is never protocol-tier). A consumer uses this to gate/warn. The host MUST ASSIGN `safetyTier` explicitly as per-tool metadata; it is NOT derivable from a permission / approval / risk tier — those are an ORTHOGONAL authorization axis (a read-only tool can be high-approval/restricted while being `safetyTier:\"read\"`). A host that mechanically maps its risk tier onto `safetyTier` mis-advertises."
52
+ },
53
+ "costHint": { "type": "string", "enum": ["low", "medium", "high"], "description": "MAY — advisory cost magnitude for planning UX. Non-normative." },
54
+ "latencyHint": { "type": "string", "enum": ["low", "medium", "high"], "description": "MAY — advisory latency magnitude. Non-normative." }
55
+ },
56
+ "allOf": [
57
+ {
58
+ "$comment": "RFC 0078 §C-1 / §F-4: an exec-tier tool MUST be host-extension-sourced (RFC 0069 — exec is never protocol-tier).",
59
+ "if": { "properties": { "safetyTier": { "const": "exec" } }, "required": ["safetyTier"] },
60
+ "then": { "properties": { "source": { "const": "host-extension" } }, "required": ["source"] }
61
+ }
62
+ ]
63
+ }