@openwop/openwop-conformance 1.6.1 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. package/CHANGELOG.md +44 -0
  2. package/README.md +2 -2
  3. package/api/asyncapi.yaml +127 -0
  4. package/api/openapi.yaml +518 -1
  5. package/coverage.md +44 -2
  6. package/fixtures/conformance-run-duration-breach.json +33 -0
  7. package/fixtures/oauth-providers/synthetic.json +38 -0
  8. package/fixtures.md +29 -0
  9. package/package.json +1 -1
  10. package/schemas/README.md +22 -0
  11. package/schemas/agent-deployment-transition.schema.json +49 -0
  12. package/schemas/agent-deployment.schema.json +54 -0
  13. package/schemas/agent-eval-suite.schema.json +140 -0
  14. package/schemas/agent-inventory-response.schema.json +115 -0
  15. package/schemas/agent-manifest.schema.json +5 -0
  16. package/schemas/agent-org-chart.schema.json +82 -0
  17. package/schemas/agent-ref.schema.json +12 -2
  18. package/schemas/agent-roster-entry.schema.json +81 -0
  19. package/schemas/agent-roster-response.schema.json +21 -0
  20. package/schemas/ai-envelope.schema.json +28 -0
  21. package/schemas/artifact-type-pack-manifest.schema.json +160 -0
  22. package/schemas/budget-policy.schema.json +18 -0
  23. package/schemas/capabilities.schema.json +448 -4
  24. package/schemas/chat-card-pack-manifest.schema.json +158 -0
  25. package/schemas/credential-provenance.schema.json +18 -0
  26. package/schemas/envelopes/media.audio.schema.json +38 -0
  27. package/schemas/envelopes/media.file.schema.json +37 -0
  28. package/schemas/envelopes/media.image.schema.json +33 -0
  29. package/schemas/eval-summary.schema.json +92 -0
  30. package/schemas/heartbeat-evaluated.schema.json +14 -0
  31. package/schemas/heartbeat-state-changed.schema.json +14 -0
  32. package/schemas/node-pack-manifest.schema.json +33 -1
  33. package/schemas/org-chart-responsibility-view.schema.json +26 -0
  34. package/schemas/run-event-payloads.schema.json +380 -6
  35. package/schemas/run-event.schema.json +23 -0
  36. package/schemas/tool-descriptor.schema.json +63 -0
  37. package/schemas/trigger-subscription.schema.json +26 -0
  38. package/schemas/workflow-definition.schema.json +5 -0
  39. package/schemas/workspace-file-create.schema.json +20 -0
  40. package/schemas/workspace-file.schema.json +39 -0
  41. package/src/lib/agentLoop.ts +44 -0
  42. package/src/lib/agentRoster.ts +76 -0
  43. package/src/lib/agentRuntime.ts +45 -0
  44. package/src/lib/artifactTypes.ts +96 -0
  45. package/src/lib/cardPacks.ts +52 -0
  46. package/src/lib/discovery-capabilities.ts +50 -0
  47. package/src/lib/distillation.ts +38 -0
  48. package/src/lib/feedback.ts +3 -3
  49. package/src/lib/heartbeat.ts +31 -0
  50. package/src/lib/liveRuntime.ts +59 -0
  51. package/src/lib/memoryAttribution.ts +48 -0
  52. package/src/lib/profiles.ts +157 -0
  53. package/src/lib/runtimeRequires.ts +38 -0
  54. package/src/lib/safeFetch.ts +87 -0
  55. package/src/lib/subRunAttestation.ts +35 -0
  56. package/src/lib/toolHooks.ts +33 -0
  57. package/src/scenarios/agent-deployment-shape.test.ts +139 -0
  58. package/src/scenarios/agent-eval-suite-shape.test.ts +167 -0
  59. package/src/scenarios/agent-live-allowlist-enforced.test.ts +53 -0
  60. package/src/scenarios/agent-live-invocation-bracket.test.ts +98 -0
  61. package/src/scenarios/agent-live-runtime-shape.test.ts +98 -0
  62. package/src/scenarios/agent-live-structured-output.test.ts +58 -0
  63. package/src/scenarios/agent-loop-iteration-monotonic.test.ts +33 -0
  64. package/src/scenarios/agent-loop-stateful-resume.test.ts +28 -0
  65. package/src/scenarios/agent-loop-version5-shape.test.ts +41 -0
  66. package/src/scenarios/agent-loop-workspace-snapshot.test.ts +33 -0
  67. package/src/scenarios/agent-manifest-runtime.test.ts +85 -0
  68. package/src/scenarios/agent-org-chart-shape.test.ts +127 -0
  69. package/src/scenarios/agent-platform-profile.test.ts +158 -0
  70. package/src/scenarios/agent-roster-attribution.test.ts +179 -0
  71. package/src/scenarios/agent-roster-shape.test.ts +146 -0
  72. package/src/scenarios/ai-envelope-shape.test.ts +14 -18
  73. package/src/scenarios/aiEnvelope.capBreached.test.ts +2 -1
  74. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +2 -1
  75. package/src/scenarios/aiEnvelope.universalKinds.test.ts +2 -1
  76. package/src/scenarios/approval-gate-flow.test.ts +4 -6
  77. package/src/scenarios/artifact-schema-compile-bounded.test.ts +126 -0
  78. package/src/scenarios/artifact-type-pack-install.test.ts +78 -0
  79. package/src/scenarios/artifact-type-pack-manifest-validation.test.ts +140 -0
  80. package/src/scenarios/artifact-type-store-without-render.test.ts +54 -0
  81. package/src/scenarios/audit-log-integrity.test.ts +3 -2
  82. package/src/scenarios/auth-api-key-rotation.test.ts +2 -1
  83. package/src/scenarios/auth-mtls.test.ts +2 -1
  84. package/src/scenarios/auth-oauth2-client-credentials.test.ts +2 -1
  85. package/src/scenarios/auth-oidc-user-bearer.test.ts +2 -1
  86. package/src/scenarios/auth-saml-profile.test.ts +2 -1
  87. package/src/scenarios/auth-scim-profile.test.ts +2 -1
  88. package/src/scenarios/authorization-fail-closed.test.ts +2 -1
  89. package/src/scenarios/authorization-roles-shape.test.ts +2 -1
  90. package/src/scenarios/budget-policy-shape.test.ts +136 -0
  91. package/src/scenarios/byok-auth-modes.test.ts +141 -0
  92. package/src/scenarios/chat-card-pack-execution.test.ts +56 -0
  93. package/src/scenarios/chat-card-pack-manifest-validation.test.ts +128 -0
  94. package/src/scenarios/commitment-fired.test.ts +83 -0
  95. package/src/scenarios/credential-payload-redaction.test.ts +2 -1
  96. package/src/scenarios/credentials-capability-shape.test.ts +2 -1
  97. package/src/scenarios/cross-engine-append-ordering.test.ts +2 -1
  98. package/src/scenarios/cross-host-ancestry-endpoint.test.ts +3 -2
  99. package/src/scenarios/cross-host-causation-shape.test.ts +3 -2
  100. package/src/scenarios/deadletter-capability-shape.test.ts +2 -1
  101. package/src/scenarios/deadletter-retry-exhaustion.test.ts +2 -1
  102. package/src/scenarios/distillation-index-roundtrip.test.ts +35 -0
  103. package/src/scenarios/distillation-secret-carryforward.test.ts +35 -0
  104. package/src/scenarios/distillation-shape.test.ts +41 -0
  105. package/src/scenarios/distillation-stable-archive.test.ts +37 -0
  106. package/src/scenarios/distillation-token-budget.test.ts +45 -0
  107. package/src/scenarios/egress-provenance-shape.test.ts +137 -0
  108. package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +4 -3
  109. package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +5 -4
  110. package/src/scenarios/envelope-reasoning-shape.test.ts +3 -2
  111. package/src/scenarios/envelope-refusal-shape.test.ts +3 -2
  112. package/src/scenarios/envelope-rendering-hint.test.ts +95 -0
  113. package/src/scenarios/envelope-retry-attempted.test.ts +2 -1
  114. package/src/scenarios/envelope-tier-one-subset-static.test.ts +3 -2
  115. package/src/scenarios/exec-not-protocol-tier.test.ts +137 -0
  116. package/src/scenarios/experimental-tier-shape.test.ts +5 -4
  117. package/src/scenarios/fs-path-traversal.test.ts +2 -1
  118. package/src/scenarios/heartbeat-capability-shape.test.ts +35 -0
  119. package/src/scenarios/heartbeat-fires-once-per-tick.test.ts +28 -0
  120. package/src/scenarios/heartbeat-idempotent-no-spam.test.ts +43 -0
  121. package/src/scenarios/heartbeat-runtime-bound.test.ts +30 -0
  122. package/src/scenarios/http-client-ssrf.test.ts +10 -13
  123. package/src/scenarios/mcp-toolcall-redaction.test.ts +3 -2
  124. package/src/scenarios/media-url-inline-cap.test.ts +167 -0
  125. package/src/scenarios/memory-attribution-emits-on-write.test.ts +54 -0
  126. package/src/scenarios/memory-attribution-no-content.test.ts +45 -0
  127. package/src/scenarios/memory-attribution-replay-stable.test.ts +60 -0
  128. package/src/scenarios/memory-attribution-shape.test.ts +28 -0
  129. package/src/scenarios/memory-attribution-tenant-scoped.test.ts +44 -0
  130. package/src/scenarios/memory-capability-model-shape.test.ts +186 -0
  131. package/src/scenarios/memory-compaction-event-emitted.test.ts +2 -1
  132. package/src/scenarios/memory-compaction-provenance-tag.test.ts +2 -1
  133. package/src/scenarios/memory-compaction-sr1-carry-forward.test.ts +2 -1
  134. package/src/scenarios/memory-consolidation-idempotent.test.ts +77 -0
  135. package/src/scenarios/memory-consolidation-shape.test.ts +90 -0
  136. package/src/scenarios/model-capability-substituted.test.ts +2 -1
  137. package/src/scenarios/multi-agent-confidence-escalation.test.ts +5 -4
  138. package/src/scenarios/multi-agent-handoff-state-machine.test.ts +6 -5
  139. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +4 -3
  140. package/src/scenarios/multi-region-idempotency.test.ts +10 -10
  141. package/src/scenarios/oauth-authorization-code-roundtrip.test.ts +145 -0
  142. package/src/scenarios/oauth-capability-shape.test.ts +2 -1
  143. package/src/scenarios/oauth-connector-redaction.test.ts +2 -1
  144. package/src/scenarios/pause-resume.test.ts +3 -3
  145. package/src/scenarios/production-backpressure.test.ts +2 -2
  146. package/src/scenarios/production-retention-expiry.test.ts +2 -2
  147. package/src/scenarios/prompt-all-four-kinds-events.test.ts +2 -1
  148. package/src/scenarios/prompt-composed-secret-redaction.test.ts +2 -1
  149. package/src/scenarios/prompt-composed-trust-marker.test.ts +2 -1
  150. package/src/scenarios/prompt-end-to-end-events.test.ts +2 -1
  151. package/src/scenarios/prompt-list-and-fetch.test.ts +2 -1
  152. package/src/scenarios/prompt-mutable-lifecycle.test.ts +2 -1
  153. package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +2 -1
  154. package/src/scenarios/prompt-pack-install.test.ts +2 -1
  155. package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +2 -1
  156. package/src/scenarios/prompt-render-deterministic.test.ts +2 -1
  157. package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +2 -1
  158. package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +2 -1
  159. package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +2 -1
  160. package/src/scenarios/prompt-template-shape.test.ts +2 -1
  161. package/src/scenarios/provider-usage.test.ts +2 -1
  162. package/src/scenarios/replay-divergence-at-refusal.test.ts +4 -3
  163. package/src/scenarios/replay-fork-arbitrary.test.ts +3 -1
  164. package/src/scenarios/replay-llm-cache-key-portable.test.ts +2 -1
  165. package/src/scenarios/replayDeterminism.test.ts +3 -1
  166. package/src/scenarios/run-execution-bounds-shape.test.ts +133 -0
  167. package/src/scenarios/runtime-requires-install-gate.test.ts +92 -0
  168. package/src/scenarios/runtime-requires-shape.test.ts +134 -0
  169. package/src/scenarios/safefetch-behavior.test.ts +99 -0
  170. package/src/scenarios/safefetch-live-audit.test.ts +175 -0
  171. package/src/scenarios/sandbox-memory-cap.test.ts +2 -1
  172. package/src/scenarios/sandbox-mvp-behavior.test.ts +2 -1
  173. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +2 -1
  174. package/src/scenarios/sandbox-timeout-cap.test.ts +2 -1
  175. package/src/scenarios/scheduling-capability-shape.test.ts +2 -1
  176. package/src/scenarios/scheduling-cron-fires-once.test.ts +2 -1
  177. package/src/scenarios/secret-leakage-otel-attribute.test.ts +7 -6
  178. package/src/scenarios/spec-corpus-validity.test.ts +20 -4
  179. package/src/scenarios/subrun-approval-fail-closed.test.ts +33 -0
  180. package/src/scenarios/subrun-approval-gate.test.ts +35 -0
  181. package/src/scenarios/subrun-attestation-shape.test.ts +30 -0
  182. package/src/scenarios/subrun-checksum-stable.test.ts +43 -0
  183. package/src/scenarios/tool-descriptor-shape.test.ts +133 -0
  184. package/src/scenarios/tool-hooks-authorization-fail-closed.test.ts +39 -0
  185. package/src/scenarios/tool-hooks-content-free.test.ts +40 -0
  186. package/src/scenarios/tool-hooks-rate-limit.test.ts +32 -0
  187. package/src/scenarios/tool-hooks-secret-redaction.test.ts +34 -0
  188. package/src/scenarios/tool-hooks-shape.test.ts +34 -0
  189. package/src/scenarios/trigger-bridge-shape.test.ts +135 -0
  190. package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +3 -10
  191. package/src/scenarios/wasm-pack-invoke-completed.test.ts +2 -2
  192. package/src/scenarios/wasm-pack-invoke-suspended.test.ts +2 -2
  193. package/src/scenarios/wasm-pack-load.test.ts +2 -2
  194. package/src/scenarios/wasm-pack-memory-cap.test.ts +3 -6
  195. package/src/scenarios/wasm-pack-replay-determinism.test.ts +2 -2
  196. package/src/scenarios/workflow-primary-output-annotation.test.ts +142 -0
  197. package/src/scenarios/workspace-behavior.test.ts +134 -0
  198. package/src/scenarios/workspace-capability-shape.test.ts +73 -0
  199. package/src/scenarios/workspace-cross-tenant-isolation.test.ts +84 -0
  200. package/src/scenarios/x-openwop-form-pack-manifest.test.ts +155 -0
@@ -0,0 +1,140 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openwop.dev/spec/v1/agent-eval-suite.schema.json",
4
+ "title": "AgentEvalSuite",
5
+ "description": "RFC 0081 §A. A portable, host-agnostic evaluation suite for a manifest agent (RFC 0003/0070): the tasks, the expected outputs or rubrics, the deterministic tool/memory fixtures, the allowed model classes, and the pass/fail thresholds that answer \"is this agent good enough to deploy?\". Distributed inside a pack tarball and referenced by URI exactly like `systemPromptRef` / `handoff.*SchemaRef` (RFC 0003 §C/§D) — NOT embedded in `AgentManifest`. A host advertising `capabilities.agents.evalSuite.supported: true` executes a suite as an eval run (a `mode: \"eval\"` projection over `POST /v1/runs`, RFC 0081 §B) and terminates with an `eval-summary.schema.json` scorecard. The suite carries NO secret material and NO host-internal identifiers (it is authored offline and shipped in a signed pack).",
6
+ "type": "object",
7
+ "additionalProperties": false,
8
+ "required": ["suiteId", "version", "modes", "tasks"],
9
+ "properties": {
10
+ "suiteId": {
11
+ "type": "string",
12
+ "pattern": "^[a-z0-9.-]+\\.evals\\.[a-z0-9-]+$",
13
+ "description": "Globally unique suite identifier in the `<scope>.<org>.evals.<name>` convention (e.g. `core.openwop.evals.support-resolver`), mirroring the pack `<scope>.<author>.<pack>` namespace (RFC 0003). The `.evals.` infix distinguishes a suite from an agent/pack id."
14
+ },
15
+ "version": {
16
+ "type": "string",
17
+ "pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$",
18
+ "description": "SemVer of the suite. A suite version is pinned on an eval run (carried on `eval.started.suiteVersion` and `eval-summary.suiteVersion`) so a regression comparison (§D `regression` mode) is between like versions."
19
+ },
20
+ "targetAgentId": {
21
+ "type": "string",
22
+ "minLength": 1,
23
+ "description": "MAY. The `AgentManifest.id` this suite is authored for. Absent ⇒ the suite is agent-agnostic and MAY be pointed at any `agentId` at run time (the run request carries the `agentId`). When present, a host SHOULD reject an eval run whose target `agentId` differs, unless the caller explicitly overrides."
24
+ },
25
+ "modes": {
26
+ "type": "array",
27
+ "minItems": 1,
28
+ "uniqueItems": true,
29
+ "items": { "type": "string", "enum": ["golden", "rubric", "adversarial", "regression", "live-shadow"] },
30
+ "description": "The eval modes this suite exercises (RFC 0081 §D closed vocabulary). `golden`: exact / contains / json-match against each task's `expected`. `rubric`: a host-chosen judge scores against weighted criteria (nondeterministic — a recorded-fact score). `adversarial`: tasks probe for unsafe / jailbreak behavior; `safetyFindings` is the primary output. `regression`: re-run against a new agent/model/prompt version and diff scores vs a `baselineRunId` (composes RFC 0054 `:diff`). `live-shadow`: run against LIVE tools/memory instead of `fixtures` — the only mode that bypasses fixture injection; explicitly nondeterministic. A run MUST request only modes the suite declares here AND the host advertises (`capabilities.agents.evalSuite.modes`); an unadvertised mode is rejected at run-create with `400 validation_error`."
31
+ },
32
+ "allowedModels": {
33
+ "type": "array",
34
+ "uniqueItems": true,
35
+ "items": { "type": "string", "enum": ["reasoning", "writing", "coding", "research", "classification", "general"] },
36
+ "description": "MAY. The `AgentManifest.modelClass` values (RFC 0002) the suite is valid for. Absent ⇒ valid for any class. A host SHOULD record the `evaluatedModelClass` on the summary so a score is interpreted against the model it was produced with."
37
+ },
38
+ "thresholds": {
39
+ "type": "object",
40
+ "additionalProperties": false,
41
+ "description": "MAY. The pass/fail bar for the suite. A task or the aggregate `passed` flag is computed against these. Absent ⇒ the host's default bar (the summary still carries raw scores).",
42
+ "properties": {
43
+ "passScore": {
44
+ "type": "number",
45
+ "minimum": 0,
46
+ "maximum": 1,
47
+ "description": "The minimum aggregate score (0.0–1.0) for `EvalSummary.passed: true`."
48
+ },
49
+ "maxCostUsd": {
50
+ "type": "number",
51
+ "minimum": 0,
52
+ "description": "MAY. The maximum total cost (summed from RFC 0026 `provider.usage`) for a passing run. A run that exceeds it MUST NOT report `passed: true` even if `passScore` is met."
53
+ },
54
+ "maxP95LatencyMs": {
55
+ "type": "integer",
56
+ "minimum": 0,
57
+ "description": "MAY. The maximum p95 per-task latency for a passing run."
58
+ }
59
+ }
60
+ },
61
+ "tasks": {
62
+ "type": "array",
63
+ "minItems": 1,
64
+ "description": "The eval tasks. Each is executed as one child agent invocation (the RFC 0077 `agent.invocation.*` bracket + the existing `agent.*` / `provider.usage` events), scored, and reported via a per-task `eval.scored` event + an `EvalSummary` entry.",
65
+ "items": {
66
+ "type": "object",
67
+ "additionalProperties": false,
68
+ "required": ["taskId", "input", "expected"],
69
+ "properties": {
70
+ "taskId": {
71
+ "type": "string",
72
+ "pattern": "^[a-z0-9][a-z0-9-]*$",
73
+ "description": "Suite-unique task identifier (kebab-case). Carried verbatim on `eval.scored.taskId` and the per-task summary entry."
74
+ },
75
+ "input": {
76
+ "description": "The run input for the task, validated against the agent's input schema by the host. An opaque object/value — content is task-defined."
77
+ },
78
+ "expected": {
79
+ "type": "object",
80
+ "additionalProperties": false,
81
+ "required": ["kind"],
82
+ "description": "How the task is scored. `golden`: deterministic match against `match`. `rubric`: a judge scores against weighted `rubric` criteria.",
83
+ "properties": {
84
+ "kind": { "type": "string", "enum": ["golden", "rubric"], "description": "Scoring mode for this task. A suite declaring a non-`golden`/`rubric` `modes` entry (e.g. `adversarial`) still scores each task via one of these two `kind`s." },
85
+ "match": {
86
+ "type": "object",
87
+ "additionalProperties": false,
88
+ "description": "Present when `kind: \"golden\"`. The deterministic expectation.",
89
+ "properties": {
90
+ "strategy": { "type": "string", "enum": ["exact", "contains", "json-match"], "description": "`exact`: stringified output equals `value`. `contains`: output contains `value`. `json-match`: output JSON-deep-equals `value` (key order / whitespace insensitive)." },
91
+ "value": { "description": "The expected value for the strategy. Opaque." }
92
+ },
93
+ "required": ["strategy", "value"]
94
+ },
95
+ "rubric": {
96
+ "type": "array",
97
+ "minItems": 1,
98
+ "description": "Present when `kind: \"rubric\"`. Weighted criteria a judge scores the output against; the task score is the weighted sum of met criteria, normalized to 0.0–1.0. Judge selection + scoring is host-internal (nondeterministic — the score is a recorded fact).",
99
+ "items": {
100
+ "type": "object",
101
+ "additionalProperties": false,
102
+ "required": ["criterion", "weight"],
103
+ "properties": {
104
+ "criterion": { "type": "string", "minLength": 1, "description": "A human-readable scoring criterion (e.g. \"cites the 30-day refund window\")." },
105
+ "weight": { "type": "number", "minimum": 0, "maximum": 1, "description": "Relative weight of this criterion (criteria weights SHOULD sum to 1.0 across the task)." }
106
+ }
107
+ }
108
+ }
109
+ }
110
+ },
111
+ "fixtures": {
112
+ "type": "object",
113
+ "additionalProperties": false,
114
+ "description": "MAY. Deterministic substitutes for live tool/memory I/O so a `golden`/`regression` eval is reproducible. When present, the eval host MUST inject `toolResponses` in place of live tool calls and seed `memorySeed` before the invocation. The `live-shadow` mode is the explicit exception — it ignores `fixtures` and runs against live tools/memory.",
115
+ "properties": {
116
+ "toolResponses": {
117
+ "type": "array",
118
+ "description": "Canned tool results keyed by tool invocation, injected in place of live tool calls.",
119
+ "items": {
120
+ "type": "object",
121
+ "additionalProperties": false,
122
+ "required": ["tool"],
123
+ "properties": {
124
+ "tool": { "type": "string", "minLength": 1, "description": "The `<scope>:<tool-id>` (RFC 0077 `toolAllowlist` / RFC 0078) the response stands in for." },
125
+ "response": { "description": "The canned result the host returns for that tool. Opaque." }
126
+ }
127
+ }
128
+ },
129
+ "memorySeed": {
130
+ "type": "array",
131
+ "description": "Memory entries seeded into the agent's read snapshot before the invocation (RFC 0004 `MemoryAdapter` shape). Tenant-scoped + SR-1-redacted on the host side exactly like any memory write.",
132
+ "items": { "type": "object" }
133
+ }
134
+ }
135
+ }
136
+ }
137
+ }
138
+ }
139
+ }
140
+ }
@@ -0,0 +1,115 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openwop.dev/spec/v1/agent-inventory-response.schema.json",
4
+ "title": "AgentInventoryResponse",
5
+ "description": "Response body for `GET /v1/agents` (RFC 0072 §A). A read-only projection of the manifest agents a host has installed into its AgentRegistry (RFC 0003 / RFC 0070). Served only when the host advertises `capabilities.agents.manifestRuntime.supported: true`. Each entry MUST NOT carry the agent's system-prompt body, resolved handoff schemas, or any credential material (SR-1).",
6
+ "type": "object",
7
+ "additionalProperties": false,
8
+ "required": ["agents", "total"],
9
+ "properties": {
10
+ "agents": {
11
+ "type": "array",
12
+ "items": { "$ref": "#/$defs/AgentInventoryEntry" },
13
+ "description": "Installed manifest agents, in a stable (agentId-sorted) order."
14
+ },
15
+ "total": {
16
+ "type": "integer",
17
+ "minimum": 0,
18
+ "description": "Count of installed manifest agents (== agents.length)."
19
+ }
20
+ },
21
+ "$defs": {
22
+ "AgentInventoryEntry": {
23
+ "type": "object",
24
+ "title": "AgentInventoryEntry",
25
+ "description": "Read projection of an installed `AgentManifest` (agent-manifest.schema.json). Also the body of `GET /v1/agents/{agentId}`.",
26
+ "additionalProperties": false,
27
+ "required": ["agentId", "persona", "label", "modelClass", "packName", "packVersion", "toolAllowlist", "hasHandoffSchemas"],
28
+ "properties": {
29
+ "agentId": {
30
+ "type": "string",
31
+ "description": "The manifest agentId (matches `AgentManifest.agentId` / `AgentRef.agentId`)."
32
+ },
33
+ "persona": {
34
+ "type": "string",
35
+ "description": "Human-readable agent name from the manifest."
36
+ },
37
+ "label": {
38
+ "type": "string",
39
+ "description": "Short UI label; falls back to `persona` when the manifest omits `label`."
40
+ },
41
+ "description": {
42
+ "type": "string",
43
+ "description": "Optional one-line catalog summary from the manifest."
44
+ },
45
+ "modelClass": {
46
+ "type": "string",
47
+ "description": "The manifest's `modelClass` (see agent-manifest.schema.json)."
48
+ },
49
+ "packName": {
50
+ "type": "string",
51
+ "description": "The pack this agent was installed from."
52
+ },
53
+ "packVersion": {
54
+ "type": "string",
55
+ "description": "The installed pack version."
56
+ },
57
+ "toolAllowlist": {
58
+ "type": "array",
59
+ "items": { "type": "string" },
60
+ "description": "Tool identifiers the agent MAY invoke (RFC 0002 §A14). The host MUST enforce this at dispatch; an empty array means no tools."
61
+ },
62
+ "hasHandoffSchemas": {
63
+ "type": "boolean",
64
+ "description": "Whether the manifest declares handoff task/return schemas (the host validates dispatch payloads against them when it advertises `agents.manifestRuntime.handoffValidation`). The schemas themselves are NOT exposed here."
65
+ },
66
+ "memoryShape": {
67
+ "type": "object",
68
+ "additionalProperties": false,
69
+ "description": "The manifest's declared memory shape, when present.",
70
+ "properties": {
71
+ "scratchpad": { "type": "boolean" },
72
+ "conversation": { "type": "boolean" },
73
+ "longTerm": { "type": "boolean" }
74
+ }
75
+ },
76
+ "confidenceThreshold": {
77
+ "type": "number",
78
+ "minimum": 0,
79
+ "maximum": 1,
80
+ "description": "The manifest's `confidence.defaultThreshold`, when present (RFC 0002 §F)."
81
+ },
82
+ "degraded": {
83
+ "type": "array",
84
+ "items": { "type": "string" },
85
+ "description": "Capability keys this agent declared as `peerDependenciesMeta.optional` that this host does NOT satisfy, and which are therefore inert for this installation (RFC 0072 §C). Absent or empty means the agent runs at full declared capability here."
86
+ },
87
+ "memoryDegraded": {
88
+ "type": "boolean",
89
+ "description": "RFC 0080 §C. `true` when this agent's `memoryShape` declares a memory dimension the host's reconciled memory model (RFC 0080 §A) does NOT satisfy — the agent MAY still dispatch at the RFC 0070 floor, but the degradation MUST be observable here (a silent satisfied-looking entry for an unsatisfiable `memoryShape` is non-conformant). Absent ⇒ memory fully satisfied, or an older host that does not compute the projection (consumers treat absence as not-degraded/unknown and MAY probe)."
90
+ },
91
+ "degradedMemoryDimensions": {
92
+ "type": "array",
93
+ "items": { "type": "string", "enum": ["read", "write", "search", "long-term-durability", "compaction", "attribution", "replay-snapshot", "retention"] },
94
+ "uniqueItems": true,
95
+ "description": "RFC 0080 §C. The RFC 0080 §A dimension names (NOT the `memoryShape` keys) this host cannot satisfy for this agent. Present (non-empty) iff `memoryDegraded: true`; OPTIONAL/absent when `memoryDegraded` is false or absent. The dimension name `long-term-durability` is deliberately distinct from the `agents.memoryBackends` *value* `long-term` (a backend id) to avoid wire ambiguity. The §A→`memoryShape` mapping: `longTerm`⇒`long-term-durability`, `scratchpad`/`conversation`⇒`write`+`read` as applicable."
96
+ },
97
+ "roster": {
98
+ "type": "array",
99
+ "uniqueItems": true,
100
+ "description": "RFC 0086 §B. OPTIONAL/additive. The standing roster INSTANCES of this manifest agent visible to the caller (tenant-scoped per RFC 0074), each with its persona + owned workflow portfolio — so a single GET /v1/agents call surfaces responsibilities without a second round-trip. Present only when the host advertises `capabilities.agents.roster.supported: true`; absent ⇒ no roster surface (today's default).",
101
+ "items": {
102
+ "type": "object",
103
+ "additionalProperties": false,
104
+ "required": ["rosterId", "persona", "workflows"],
105
+ "properties": {
106
+ "rosterId": { "type": "string", "minLength": 1, "description": "The roster entry's `host:<id>` instance id (agent-roster-entry.schema.json)." },
107
+ "persona": { "type": "string", "minLength": 1, "description": "The instance's human display name." },
108
+ "workflows": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true, "description": "The workflow ids this instance owns by role (its portfolio)." }
109
+ }
110
+ }
111
+ }
112
+ }
113
+ }
114
+ }
115
+ }
@@ -41,6 +41,11 @@
41
41
  "description": "URI-reference to a prompt file inside the pack tarball (e.g., `prompts/supervisor.md`). Mutually exclusive with `systemPrompt`. Useful when the prompt body is large enough that inlining would bloat the manifest.",
42
42
  "minLength": 1
43
43
  },
44
+ "evalSuiteRef": {
45
+ "type": "string",
46
+ "description": "RFC 0081 §A. MAY. URI-reference to an `agent-eval-suite.schema.json` file inside the pack tarball (e.g., `evals/support-resolver.json`), resolved at install exactly like `systemPromptRef` / `handoff.*SchemaRef`. Declares the agent's portable evaluation suite (golden/rubric/adversarial/regression/live-shadow tasks + thresholds) so a host advertising `capabilities.agents.evalSuite.supported: true` can run it as a `mode: \"eval\"` run. Absent ⇒ the agent ships no suite (a suite MAY still be authored independently and pointed at this `agentId` at run time). Does NOT embed the suite in the manifest — the suite evolves on its own cadence and is often authored by a different role.",
47
+ "minLength": 1
48
+ },
44
49
  "toolAllowlist": {
45
50
  "type": "array",
46
51
  "items": { "type": "string", "minLength": 1 },
@@ -0,0 +1,82 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openwop.dev/spec/v1/agent-org-chart.schema.json",
4
+ "title": "AgentOrgChart",
5
+ "description": "RFC 0087 §A. A tenant-scoped, DESCRIPTIVE grouping of RFC 0086 standing roster members (agent-roster-entry.schema.json) into departments + roles with acyclic `reportsTo` edges. The load-bearing constraint (§B `org-position-no-authority-escalation`): an org edge confers NO authority — there is NO `permissions`/`scopes`/`canDispatch`/`authority` field anywhere in this schema BY DESIGN, and every object is `additionalProperties:false` so a host cannot smuggle one in. Authority stays in `toolAllowlist` (RFC 0002 §A14), RBAC (RFC 0049, the sole authority source, fail-closed), and approval gates (RFC 0051); org position MUST NOT be consulted as an authorization input. Position describes; it never authorizes. Tenant-scoped (RFC 0074): a chart is served only to its `owner` triple. The record is host-internal; this is the canonical wire shape behind GET /v1/agents/org-chart (the endpoint lands at Active → Accepted per RFC 0087 §Conformance). The `Department`/`Role`/`Member` subschemas are published as named `$defs` so dependent schemas (e.g. org-chart-responsibility-view.schema.json) reference a stable anchor rather than a positional `properties/.../items` pointer.",
6
+ "type": "object",
7
+ "additionalProperties": false,
8
+ "required": ["owner", "departments", "members"],
9
+ "properties": {
10
+ "owner": {
11
+ "type": "object",
12
+ "additionalProperties": false,
13
+ "required": ["tenantId"],
14
+ "description": "RFC 0048 owner triple the chart is scoped by (RFC 0074). A chart is served only to a principal within this triple; a member/department/edge outside it is never disclosed (CTI-1 carry-forward).",
15
+ "properties": {
16
+ "tenantId": { "type": "string", "minLength": 1, "maxLength": 256, "description": "Owning tenant." },
17
+ "workspaceId": { "type": "string", "minLength": 1, "maxLength": 256, "description": "MAY. Owning workspace within the tenant." }
18
+ }
19
+ },
20
+ "departments": {
21
+ "type": "array",
22
+ "description": "The departments, forming a tree via `parentDepartmentId`.",
23
+ "items": { "$ref": "#/$defs/Department" }
24
+ },
25
+ "members": {
26
+ "type": "array",
27
+ "description": "The members — each is an RFC 0086 roster instance placed in a department + role, with an optional reporting edge. Every field is descriptive; there is NO authority-bearing field (§B).",
28
+ "items": { "$ref": "#/$defs/Member" }
29
+ }
30
+ },
31
+ "$defs": {
32
+ "Department": {
33
+ "type": "object",
34
+ "additionalProperties": false,
35
+ "required": ["departmentId", "name", "roles"],
36
+ "properties": {
37
+ "departmentId": { "type": "string", "minLength": 1, "maxLength": 128, "description": "Stable department id, unique within the chart." },
38
+ "name": { "type": "string", "minLength": 1, "maxLength": 200, "description": "Human department name (e.g. \"Marketing\")." },
39
+ "parentDepartmentId": {
40
+ "type": ["string", "null"],
41
+ "maxLength": 128,
42
+ "description": "MAY. The parent department id (department nesting); `null` for a top-level department. A host advertising `agents.orgChart.departmentNesting: false` MUST reject a non-null value."
43
+ },
44
+ "roles": {
45
+ "type": "array",
46
+ "description": "The roles defined in this department.",
47
+ "items": { "$ref": "#/$defs/Role" }
48
+ }
49
+ }
50
+ },
51
+ "Role": {
52
+ "type": "object",
53
+ "additionalProperties": false,
54
+ "required": ["roleId", "name"],
55
+ "properties": {
56
+ "roleId": { "type": "string", "minLength": 1, "maxLength": 128, "description": "Stable role id, unique within the chart." },
57
+ "name": { "type": "string", "minLength": 1, "maxLength": 200, "description": "Human role name (e.g. \"Campaign Manager\"). DESCRIPTIVE — a role grants no authority (§B)." }
58
+ }
59
+ },
60
+ "Member": {
61
+ "type": "object",
62
+ "additionalProperties": false,
63
+ "required": ["rosterId", "departmentId", "roleId", "reportsTo"],
64
+ "properties": {
65
+ "rosterId": {
66
+ "type": "string",
67
+ "pattern": "^host:[a-z0-9][a-z0-9._-]*$",
68
+ "minLength": 6,
69
+ "maxLength": 128,
70
+ "description": "An RFC 0086 roster entry id (`agent-roster-entry.schema.json` `rosterId`). MUST reference a roster entry in the same `owner` tenant (no cross-tenant membership — §C)."
71
+ },
72
+ "departmentId": { "type": "string", "minLength": 1, "maxLength": 128, "description": "The department this member belongs to (MUST exist in `departments[]`)." },
73
+ "roleId": { "type": "string", "minLength": 1, "maxLength": 128, "description": "The member's role (MUST exist in the chart's roles)." },
74
+ "reportsTo": {
75
+ "type": ["string", "null"],
76
+ "maxLength": 128,
77
+ "description": "Another member's `rosterId` (the manager), or `null` for the root. The edge set MUST be acyclic (a cycle is a `validation_error`, §A). A `reportsTo` edge is METADATA ONLY — it confers no authority over the report (§B `org-position-no-authority-escalation`)."
78
+ }
79
+ }
80
+ }
81
+ }
82
+ }
@@ -41,7 +41,13 @@
41
41
  "version": {
42
42
  "type": "string",
43
43
  "maxLength": 64,
44
- "description": "Optional version pin for the agent definition (matches `AgentManifest.version`). Lets audit consumers trace which version of an agent definition was active for a given run. Pinning is encouraged for replay determinism; absent values mean 'host's current resolution of the agent'."
44
+ "description": "Optional EXACT version pin for the agent definition (matches `AgentManifest.version`). Lets audit consumers trace which version of an agent definition was active for a given run. Pinning is encouraged for replay determinism; absent values mean 'host's current resolution of the agent'. Mutually exclusive with `channel` (RFC 0082 §A) — set at most one."
45
+ },
46
+ "channel": {
47
+ "type": "string",
48
+ "minLength": 1,
49
+ "maxLength": 64,
50
+ "description": "RFC 0082 §A. Optional NAMED deployment-channel binding (e.g. `stable`, `canary`, or the reserved `latest` = highest active semver), as an alternative to the exact `version` pin. A host advertising `capabilities.agents.deployment.supported: true` resolves the channel to a concrete version and pins it per-(run, agentId, channel) at first resolution (RFC 0082 §B) — the resolved version is a recorded fact carried on `agent.invocation.started.resolvedAgentVersion`, re-read on replay and NEVER re-resolved against a moved channel. Mutually exclusive with `version` (the `not` clause below). A channel that resolves to no `active` version fails the run with `no_active_deployment`. Hosts that omit `agents.deployment` MUST reject a `channel`-bearing ref with `validation_error` (the channel has nowhere to resolve)."
45
51
  },
46
52
  "sourceManifestId": {
47
53
  "type": "string",
@@ -49,5 +55,9 @@
49
55
  "description": "Optional provenance pointer back to the `AgentManifest.agentId` this AgentRef was projected from. Lets pack-aware hosts trace runtime AgentRefs back to their distribution origin. Absent for host-internal `host:<id>` agents (which have no manifest)."
50
56
  }
51
57
  },
52
- "additionalProperties": false
58
+ "additionalProperties": false,
59
+ "not": {
60
+ "required": ["version", "channel"],
61
+ "$comment": "RFC 0082 §A. `version` (exact pin) and `channel` (named deployment channel) are mutually exclusive — a ref MAY set at most one. Setting both is a validation_error. Setting neither is valid (host-default resolution). Additive-safe: no pre-RFC-0082 ref can carry `channel`, so this clause never invalidates an existing AgentRef."
62
+ }
53
63
  }
@@ -0,0 +1,81 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openwop.dev/spec/v1/agent-roster-entry.schema.json",
4
+ "title": "AgentRosterEntry",
5
+ "description": "RFC 0086 §A. A standing agent INSTANCE — a named, tenant-scoped, mutable agent (the 'digital-twin employee', e.g. \"Sally\") that REFERENCES a manifest/deployment (RFC 0070/0082) and OWNS a workflow portfolio (the workflows it is responsible for by role). Distinct from the immutable AgentManifest (the pack-distribution class) and from the RFC 0082 deployment record (the per-version channel). `rosterId` IS a dispatchable `host:<id>` AgentRef agentId — the runtime-synthesis namespace RFC 0002 reserves for host-internal agents that don't ship as packs (NOT a parallel id space): dispatching it resolves the bound `agentRef` and projects `persona`. Content-free of any system-prompt body or credential material (SR-1). The record is host-internal + mutable; this schema is the canonical wire shape a host exposes via GET /v1/agents/roster (the endpoint lands at Active → Accepted per RFC 0086 §Conformance).",
6
+ "type": "object",
7
+ "additionalProperties": false,
8
+ "required": ["rosterId", "persona", "agentRef", "owner"],
9
+ "properties": {
10
+ "rosterId": {
11
+ "type": "string",
12
+ "pattern": "^host:[a-z0-9][a-z0-9._-]*$",
13
+ "minLength": 6,
14
+ "maxLength": 128,
15
+ "description": "Host-issued stable instance id in the reserved `host:<id>` AgentRef form (RFC 0002 / RFC 0086 §A). The dispatch handle for 'run as this agent': a `WorkflowNode.agent: { agentId: rosterId }` resolves to this entry's `agentRef` + stamps `persona`."
16
+ },
17
+ "persona": {
18
+ "type": "string",
19
+ "minLength": 1,
20
+ "maxLength": 200,
21
+ "description": "Human display name (e.g. \"Sally\"). Reuses `AgentRef.persona` semantics (RFC 0002) — projected onto the dispatch AgentRef. Free-form; MAY collide within a tenant (`rosterId` is the uniqueness key)."
22
+ },
23
+ "agentRef": {
24
+ "type": "object",
25
+ "additionalProperties": false,
26
+ "required": ["agentId"],
27
+ "description": "The manifest/deployment this instance instantiates (a trimmed AgentRef — RFC 0002). `version` (exact) XOR `channel` (RFC 0082) — never both; absent ⇒ host-default resolution (RFC 0070).",
28
+ "properties": {
29
+ "agentId": {
30
+ "type": "string",
31
+ "minLength": 3,
32
+ "maxLength": 256,
33
+ "description": "The manifest agentId this instance runs (matches `AgentManifest.agentId`). MUST be resolvable by the host (RFC 0070)."
34
+ },
35
+ "version": {
36
+ "type": "string",
37
+ "maxLength": 64,
38
+ "description": "Exact agent-definition version pin (RFC 0002). Mutually exclusive with `channel`."
39
+ },
40
+ "channel": {
41
+ "type": "string",
42
+ "minLength": 1,
43
+ "maxLength": 64,
44
+ "description": "Named deployment channel (RFC 0082 §A), e.g. `stable`. Resolved + pinned per run at first resolution. Mutually exclusive with `version`."
45
+ }
46
+ },
47
+ "not": { "required": ["version", "channel"] }
48
+ },
49
+ "workflows": {
50
+ "type": "array",
51
+ "uniqueItems": true,
52
+ "items": { "type": "string", "minLength": 1, "maxLength": 128 },
53
+ "description": "The standing portfolio: workflow ids this agent owns by role (RFC 0086 §A/§B). Each MUST be resolvable by the host and within the entry's `owner` tenant scope (the WCT/CTI carry-forward). Absent ⇒ empty portfolio."
54
+ },
55
+ "owner": {
56
+ "type": "object",
57
+ "additionalProperties": false,
58
+ "required": ["tenantId"],
59
+ "description": "RFC 0048 owner triple the entry is scoped by. On a `'tenant'`-install host (RFC 0074), GET /v1/agents/roster returns only entries within the caller's owner triple; a cross-tenant entry 404s.",
60
+ "properties": {
61
+ "tenantId": { "type": "string", "minLength": 1, "maxLength": 256, "description": "Owning tenant." },
62
+ "workspaceId": { "type": "string", "minLength": 1, "maxLength": 256, "description": "MAY. Owning workspace within the tenant." }
63
+ }
64
+ },
65
+ "enabled": {
66
+ "type": "boolean",
67
+ "description": "When `false`, the entry's portfolio triggers are inert (no run fires) — the member is paused but still discoverable (RFC 0086 §A). Absent ⇒ `true`."
68
+ },
69
+ "label": {
70
+ "type": "string",
71
+ "minLength": 1,
72
+ "maxLength": 100,
73
+ "description": "MAY. Short UI label; falls back to `persona`."
74
+ },
75
+ "description": {
76
+ "type": "string",
77
+ "maxLength": 500,
78
+ "description": "MAY. One-line summary for catalog / console surfaces."
79
+ }
80
+ }
81
+ }
@@ -0,0 +1,21 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openwop.dev/spec/v1/agent-roster-response.schema.json",
4
+ "title": "AgentRosterResponse",
5
+ "description": "RFC 0086 §B. Response body for `GET /v1/agents/roster` — the standing agent roster (named instances + their workflow portfolios) visible to the authenticated principal. Tenant-scoped per RFC 0074 on an `installScope: 'tenant'` host: only the caller's owner-triple entries are returned. Each entry is the canonical `agent-roster-entry.schema.json` shape; content-free of any system-prompt body or credential material (SR-1).",
6
+ "type": "object",
7
+ "additionalProperties": false,
8
+ "required": ["roster", "total"],
9
+ "properties": {
10
+ "roster": {
11
+ "type": "array",
12
+ "items": { "$ref": "./agent-roster-entry.schema.json" },
13
+ "description": "The standing roster entries, in a stable (rosterId-sorted) order."
14
+ },
15
+ "total": {
16
+ "type": "integer",
17
+ "minimum": 0,
18
+ "description": "Count of entries returned (== roster.length)."
19
+ }
20
+ }
21
+ }
@@ -75,6 +75,34 @@
75
75
  "type": "string",
76
76
  "maxLength": 256,
77
77
  "description": "Optional human-readable label for ops dashboards (e.g., `\"Draft PRD #2\"`). Never used for routing; never persisted into event payloads in a security-relevant way. (in-flight)"
78
+ },
79
+ "rendering": {
80
+ "type": "object",
81
+ "description": "RFC 0055. Optional hint for how a consumer SHOULD render this envelope's payload. Non-normative w.r.t. payload validation; a consumer that doesn't recognize the hint (or a `display` value) MUST fall back to its default rendering (text / raw JSON). Carries no secret material (SR-1 applies as for the rest of `meta`).",
82
+ "properties": {
83
+ "display": {
84
+ "type": "string",
85
+ "enum": ["markdown", "code", "card", "image", "audio", "file"],
86
+ "description": "Renderer family the producer suggests."
87
+ },
88
+ "mimeType": {
89
+ "type": "string",
90
+ "description": "IANA media type when `display` is `image`/`audio`/`file`."
91
+ },
92
+ "lang": {
93
+ "type": "string",
94
+ "description": "Language tag when `display` is `code` (e.g. `ts`, `python`)."
95
+ },
96
+ "alt": {
97
+ "type": "string",
98
+ "description": "Text alternative for accessibility when `display` is `image`/`audio`/`file`. SHOULD be present for those families."
99
+ },
100
+ "title": {
101
+ "type": "string",
102
+ "description": "Optional caption / card header."
103
+ }
104
+ },
105
+ "additionalProperties": false
78
106
  }
79
107
  },
80
108
  "additionalProperties": false,