@openwop/openwop-conformance 1.6.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/README.md +2 -2
  3. package/api/asyncapi.yaml +74 -1
  4. package/api/openapi.yaml +316 -0
  5. package/coverage.md +16 -0
  6. package/fixtures/conformance-run-duration-breach.json +33 -0
  7. package/fixtures.md +19 -0
  8. package/package.json +1 -1
  9. package/schemas/README.md +12 -0
  10. package/schemas/agent-inventory-response.schema.json +90 -0
  11. package/schemas/ai-envelope.schema.json +28 -0
  12. package/schemas/annotation-create.schema.json +37 -0
  13. package/schemas/annotation.schema.json +56 -0
  14. package/schemas/artifact-type-pack-manifest.schema.json +160 -0
  15. package/schemas/capabilities.schema.json +195 -4
  16. package/schemas/chat-card-pack-manifest.schema.json +158 -0
  17. package/schemas/envelopes/media.audio.schema.json +38 -0
  18. package/schemas/envelopes/media.file.schema.json +37 -0
  19. package/schemas/envelopes/media.image.schema.json +33 -0
  20. package/schemas/heartbeat-evaluated.schema.json +14 -0
  21. package/schemas/heartbeat-state-changed.schema.json +14 -0
  22. package/schemas/node-pack-manifest.schema.json +16 -1
  23. package/schemas/run-event-payloads.schema.json +96 -5
  24. package/schemas/run-event.schema.json +4 -0
  25. package/schemas/workflow-definition.schema.json +5 -0
  26. package/schemas/workspace-file-create.schema.json +20 -0
  27. package/schemas/workspace-file.schema.json +39 -0
  28. package/src/lib/agentLoop.ts +44 -0
  29. package/src/lib/agentRuntime.ts +45 -0
  30. package/src/lib/artifactTypes.ts +96 -0
  31. package/src/lib/cardPacks.ts +52 -0
  32. package/src/lib/discovery-capabilities.ts +50 -0
  33. package/src/lib/distillation.ts +38 -0
  34. package/src/lib/feedback.ts +31 -0
  35. package/src/lib/heartbeat.ts +31 -0
  36. package/src/lib/memoryAttribution.ts +48 -0
  37. package/src/lib/subRunAttestation.ts +35 -0
  38. package/src/lib/toolHooks.ts +33 -0
  39. package/src/scenarios/agent-loop-iteration-monotonic.test.ts +33 -0
  40. package/src/scenarios/agent-loop-stateful-resume.test.ts +28 -0
  41. package/src/scenarios/agent-loop-version5-shape.test.ts +41 -0
  42. package/src/scenarios/agent-loop-workspace-snapshot.test.ts +33 -0
  43. package/src/scenarios/agent-manifest-runtime.test.ts +85 -0
  44. package/src/scenarios/ai-envelope-shape.test.ts +14 -18
  45. package/src/scenarios/aiEnvelope.capBreached.test.ts +2 -1
  46. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +2 -1
  47. package/src/scenarios/aiEnvelope.universalKinds.test.ts +2 -1
  48. package/src/scenarios/approval-gate-flow.test.ts +4 -6
  49. package/src/scenarios/artifact-schema-compile-bounded.test.ts +126 -0
  50. package/src/scenarios/artifact-type-pack-install.test.ts +78 -0
  51. package/src/scenarios/artifact-type-pack-manifest-validation.test.ts +140 -0
  52. package/src/scenarios/artifact-type-store-without-render.test.ts +54 -0
  53. package/src/scenarios/audit-log-integrity.test.ts +3 -2
  54. package/src/scenarios/auth-api-key-rotation.test.ts +2 -1
  55. package/src/scenarios/auth-mtls.test.ts +2 -1
  56. package/src/scenarios/auth-oauth2-client-credentials.test.ts +2 -1
  57. package/src/scenarios/auth-oidc-user-bearer.test.ts +2 -1
  58. package/src/scenarios/auth-saml-profile.test.ts +2 -1
  59. package/src/scenarios/auth-scim-profile.test.ts +2 -1
  60. package/src/scenarios/authorization-fail-closed.test.ts +2 -1
  61. package/src/scenarios/authorization-roles-shape.test.ts +2 -1
  62. package/src/scenarios/byok-auth-modes.test.ts +141 -0
  63. package/src/scenarios/chat-card-pack-execution.test.ts +56 -0
  64. package/src/scenarios/chat-card-pack-manifest-validation.test.ts +128 -0
  65. package/src/scenarios/commitment-fired.test.ts +83 -0
  66. package/src/scenarios/credential-payload-redaction.test.ts +2 -1
  67. package/src/scenarios/credentials-capability-shape.test.ts +2 -1
  68. package/src/scenarios/cross-engine-append-ordering.test.ts +2 -1
  69. package/src/scenarios/cross-host-ancestry-endpoint.test.ts +3 -2
  70. package/src/scenarios/cross-host-causation-shape.test.ts +3 -2
  71. package/src/scenarios/deadletter-capability-shape.test.ts +2 -1
  72. package/src/scenarios/deadletter-retry-exhaustion.test.ts +2 -1
  73. package/src/scenarios/distillation-index-roundtrip.test.ts +35 -0
  74. package/src/scenarios/distillation-secret-carryforward.test.ts +35 -0
  75. package/src/scenarios/distillation-shape.test.ts +41 -0
  76. package/src/scenarios/distillation-stable-archive.test.ts +37 -0
  77. package/src/scenarios/distillation-token-budget.test.ts +45 -0
  78. package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +4 -3
  79. package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +5 -4
  80. package/src/scenarios/envelope-reasoning-shape.test.ts +3 -2
  81. package/src/scenarios/envelope-refusal-shape.test.ts +3 -2
  82. package/src/scenarios/envelope-rendering-hint.test.ts +95 -0
  83. package/src/scenarios/envelope-retry-attempted.test.ts +2 -1
  84. package/src/scenarios/envelope-tier-one-subset-static.test.ts +3 -2
  85. package/src/scenarios/exec-not-protocol-tier.test.ts +137 -0
  86. package/src/scenarios/experimental-tier-shape.test.ts +5 -4
  87. package/src/scenarios/feedback-capability-shape.test.ts +35 -0
  88. package/src/scenarios/feedback-correction-redaction.test.ts +35 -0
  89. package/src/scenarios/feedback-cross-tenant-isolation.test.ts +37 -0
  90. package/src/scenarios/feedback-fork-not-copied.test.ts +40 -0
  91. package/src/scenarios/feedback-on-terminal-run.test.ts +32 -0
  92. package/src/scenarios/feedback-record-and-list.test.ts +32 -0
  93. package/src/scenarios/feedback-unsupported-501.test.ts +32 -0
  94. package/src/scenarios/fs-path-traversal.test.ts +2 -1
  95. package/src/scenarios/heartbeat-capability-shape.test.ts +35 -0
  96. package/src/scenarios/heartbeat-fires-once-per-tick.test.ts +28 -0
  97. package/src/scenarios/heartbeat-idempotent-no-spam.test.ts +43 -0
  98. package/src/scenarios/heartbeat-runtime-bound.test.ts +30 -0
  99. package/src/scenarios/http-client-ssrf.test.ts +10 -13
  100. package/src/scenarios/mcp-toolcall-redaction.test.ts +3 -2
  101. package/src/scenarios/media-url-inline-cap.test.ts +167 -0
  102. package/src/scenarios/memory-attribution-emits-on-write.test.ts +54 -0
  103. package/src/scenarios/memory-attribution-no-content.test.ts +45 -0
  104. package/src/scenarios/memory-attribution-replay-stable.test.ts +60 -0
  105. package/src/scenarios/memory-attribution-shape.test.ts +28 -0
  106. package/src/scenarios/memory-attribution-tenant-scoped.test.ts +44 -0
  107. package/src/scenarios/memory-compaction-event-emitted.test.ts +2 -1
  108. package/src/scenarios/memory-compaction-provenance-tag.test.ts +2 -1
  109. package/src/scenarios/memory-compaction-sr1-carry-forward.test.ts +2 -1
  110. package/src/scenarios/memory-consolidation-idempotent.test.ts +77 -0
  111. package/src/scenarios/memory-consolidation-shape.test.ts +90 -0
  112. package/src/scenarios/model-capability-substituted.test.ts +2 -1
  113. package/src/scenarios/multi-agent-confidence-escalation.test.ts +5 -4
  114. package/src/scenarios/multi-agent-handoff-state-machine.test.ts +6 -5
  115. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +4 -3
  116. package/src/scenarios/multi-region-idempotency.test.ts +10 -10
  117. package/src/scenarios/oauth-capability-shape.test.ts +2 -1
  118. package/src/scenarios/oauth-connector-redaction.test.ts +2 -1
  119. package/src/scenarios/pause-resume.test.ts +3 -3
  120. package/src/scenarios/production-backpressure.test.ts +2 -2
  121. package/src/scenarios/production-retention-expiry.test.ts +2 -2
  122. package/src/scenarios/prompt-all-four-kinds-events.test.ts +2 -1
  123. package/src/scenarios/prompt-composed-secret-redaction.test.ts +2 -1
  124. package/src/scenarios/prompt-composed-trust-marker.test.ts +2 -1
  125. package/src/scenarios/prompt-end-to-end-events.test.ts +2 -1
  126. package/src/scenarios/prompt-list-and-fetch.test.ts +2 -1
  127. package/src/scenarios/prompt-mutable-lifecycle.test.ts +2 -1
  128. package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +2 -1
  129. package/src/scenarios/prompt-pack-install.test.ts +2 -1
  130. package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +2 -1
  131. package/src/scenarios/prompt-render-deterministic.test.ts +2 -1
  132. package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +2 -1
  133. package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +2 -1
  134. package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +2 -1
  135. package/src/scenarios/prompt-template-shape.test.ts +2 -1
  136. package/src/scenarios/provider-usage.test.ts +2 -1
  137. package/src/scenarios/redaction.test.ts +4 -1
  138. package/src/scenarios/replay-divergence-at-refusal.test.ts +4 -3
  139. package/src/scenarios/replay-fork-arbitrary.test.ts +3 -1
  140. package/src/scenarios/replay-llm-cache-key-portable.test.ts +2 -1
  141. package/src/scenarios/replayDeterminism.test.ts +3 -1
  142. package/src/scenarios/run-execution-bounds-shape.test.ts +133 -0
  143. package/src/scenarios/sandbox-memory-cap.test.ts +2 -1
  144. package/src/scenarios/sandbox-mvp-behavior.test.ts +2 -1
  145. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +2 -1
  146. package/src/scenarios/sandbox-timeout-cap.test.ts +2 -1
  147. package/src/scenarios/scheduling-capability-shape.test.ts +2 -1
  148. package/src/scenarios/scheduling-cron-fires-once.test.ts +2 -1
  149. package/src/scenarios/secret-leakage-otel-attribute.test.ts +7 -6
  150. package/src/scenarios/spec-corpus-validity.test.ts +4 -1
  151. package/src/scenarios/subrun-approval-fail-closed.test.ts +33 -0
  152. package/src/scenarios/subrun-approval-gate.test.ts +35 -0
  153. package/src/scenarios/subrun-attestation-shape.test.ts +30 -0
  154. package/src/scenarios/subrun-checksum-stable.test.ts +43 -0
  155. package/src/scenarios/tool-hooks-authorization-fail-closed.test.ts +39 -0
  156. package/src/scenarios/tool-hooks-content-free.test.ts +40 -0
  157. package/src/scenarios/tool-hooks-rate-limit.test.ts +32 -0
  158. package/src/scenarios/tool-hooks-secret-redaction.test.ts +34 -0
  159. package/src/scenarios/tool-hooks-shape.test.ts +34 -0
  160. package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +3 -10
  161. package/src/scenarios/wasm-pack-invoke-completed.test.ts +2 -2
  162. package/src/scenarios/wasm-pack-invoke-suspended.test.ts +2 -2
  163. package/src/scenarios/wasm-pack-load.test.ts +2 -2
  164. package/src/scenarios/wasm-pack-memory-cap.test.ts +3 -6
  165. package/src/scenarios/wasm-pack-replay-determinism.test.ts +2 -2
  166. package/src/scenarios/workflow-primary-output-annotation.test.ts +142 -0
  167. package/src/scenarios/workspace-behavior.test.ts +134 -0
  168. package/src/scenarios/workspace-capability-shape.test.ts +73 -0
  169. package/src/scenarios/workspace-cross-tenant-isolation.test.ts +84 -0
@@ -0,0 +1,37 @@
1
+ /**
2
+ * distillation-stable-archive — RFC 0062 §B(4). The distilled archive is an
3
+ * immutable, addressable artifact: the same source set + budget MUST yield a
4
+ * byte-stable archive checksum (reproducible + auditable).
5
+ *
6
+ * Gated on `capabilities.memory.distillation.supported` + the host memory-
7
+ * distillation seam; soft-skips when either is absent.
8
+ *
9
+ * @see RFCS/0062-scheduled-memory-distillation.md §B
10
+ */
11
+
12
+ import { describe, it, expect } from 'vitest';
13
+ import { driver } from '../lib/driver.js';
14
+ import { readDistillationCap, invokeDistill } from '../lib/distillation.js';
15
+
16
+ describe('distillation-stable-archive (RFC 0062 §B)', () => {
17
+ it('identical sources + budget produce an identical archive checksum', async () => {
18
+ if ((await readDistillationCap())?.supported !== true) return;
19
+ const req = {
20
+ memoryRef: 'conformance-distill',
21
+ tokenBudget: 8000,
22
+ sources: ['s1', 's2', 's3'],
23
+ };
24
+ const a = await invokeDistill(req);
25
+ if (a === null) return; // seam absent — soft-skip
26
+ const b = await invokeDistill(req);
27
+ if (b === null) return;
28
+ expect(
29
+ typeof a.body.archiveChecksum === 'string' && (a.body.archiveChecksum as string).length > 0,
30
+ driver.describe('RFC 0062 §B', 'a distillation run MUST produce a non-empty archive checksum'),
31
+ ).toBe(true);
32
+ expect(
33
+ b.body.archiveChecksum,
34
+ driver.describe('RFC 0062 §B', 'the same source set + budget MUST yield a byte-stable archive'),
35
+ ).toBe(a.body.archiveChecksum);
36
+ });
37
+ });
@@ -0,0 +1,45 @@
1
+ /**
2
+ * distillation-token-budget — RFC 0062 §B. A distillation run stays within its
3
+ * token budget (`memory.compacted.distillation.tokensUsed ≤ tokenBudget`); an
4
+ * un-meetable budget fails with `token_budget_exceeded` and writes no partial
5
+ * archive (atomic).
6
+ *
7
+ * Gated on `capabilities.memory.distillation.supported` + the host memory-
8
+ * distillation seam; soft-skips when either is absent.
9
+ *
10
+ * @see RFCS/0062-scheduled-memory-distillation.md §B
11
+ */
12
+
13
+ import { describe, it, expect } from 'vitest';
14
+ import { driver } from '../lib/driver.js';
15
+ import { readDistillationCap, invokeDistill } from '../lib/distillation.js';
16
+
17
+ describe('distillation-token-budget (RFC 0062 §B)', () => {
18
+ it('within budget tokensUsed ≤ tokenBudget; an un-meetable budget fails atomically', async () => {
19
+ if ((await readDistillationCap())?.supported !== true) return;
20
+
21
+ const ok = await invokeDistill({ memoryRef: 'conformance-distill', tokenBudget: 8000 });
22
+ if (ok === null) return; // seam absent — soft-skip
23
+ const dist = ok.body.event?.distillation ?? {};
24
+ expect(
25
+ typeof dist.tokenBudget === 'number' && typeof dist.tokensUsed === 'number',
26
+ driver.describe('RFC 0062 §B', 'memory.compacted MUST carry distillation.tokenBudget + tokensUsed on a budgeted run'),
27
+ ).toBe(true);
28
+ expect(
29
+ (dist.tokensUsed as number) <= (dist.tokenBudget as number),
30
+ driver.describe('RFC 0062 §B', 'a successful distillation MUST consume ≤ its tokenBudget'),
31
+ ).toBe(true);
32
+
33
+ // A budget too small to distill the corpus MUST fail closed, no partial archive.
34
+ const tooSmall = await invokeDistill({ memoryRef: 'conformance-distill', tokenBudget: 1 });
35
+ if (tooSmall === null) return;
36
+ expect(
37
+ tooSmall.status >= 400 && tooSmall.body.error === 'token_budget_exceeded',
38
+ driver.describe('RFC 0062 §B', 'an un-meetable budget MUST fail with token_budget_exceeded'),
39
+ ).toBe(true);
40
+ expect(
41
+ tooSmall.body.archiveChecksum,
42
+ driver.describe('RFC 0062 §B', 'a token_budget_exceeded run MUST write no partial archive (atomic)'),
43
+ ).toBeUndefined();
44
+ });
45
+ });
@@ -31,6 +31,7 @@ import { describe, it, expect } from 'vitest';
31
31
  import { driver } from '../lib/driver.js';
32
32
  import { pollUntilTerminal } from '../lib/polling.js';
33
33
  import { isFixtureAdvertised } from '../lib/fixtures.js';
34
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
34
35
 
35
36
  const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
36
37
  const NODE_ID = 'structured-call';
@@ -91,7 +92,7 @@ describe.skipIf(HTTP_SKIP)('envelope-completion-distinguishes-truncation: advert
91
92
  it('capabilities.envelopes.reliability.completion (when present) conforms to RFC 0033 §E', async () => {
92
93
  const d = await readDiscovery();
93
94
  if (d === null) return;
94
- const completion = d.capabilities?.envelopes?.reliability?.completion;
95
+ const completion = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability?.completion;
95
96
  if (completion === undefined) return;
96
97
  expect(
97
98
  typeof completion.distinguishesTruncation,
@@ -114,7 +115,7 @@ describe.skipIf(HTTP_SKIP)('envelope-completion-distinguishes-truncation: trunca
114
115
  it('truncation: emits envelope.truncated + envelope.retry.attempted with reason: "truncation"', async () => {
115
116
  if (!isFixtureAdvertised(TRUNCATED_FIXTURE)) return;
116
117
  const d = await readDiscovery();
117
- if (d?.capabilities?.envelopes?.reliability?.completion?.distinguishesTruncation !== true) return;
118
+ if (capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability?.completion?.distinguishesTruncation !== true) return;
118
119
  const seed = await programMock([
119
120
  { stopReason: 'max_tokens', content: '{"partial' },
120
121
  { stopReason: 'end_turn', content: '{"valid":true}' },
@@ -139,7 +140,7 @@ describe.skipIf(HTTP_SKIP)('envelope-completion-distinguishes-truncation: trunca
139
140
  it('truncation: retry budget strictly greater than initial (RFC 0033 §B truncationBudgetMultiplier)', async () => {
140
141
  if (!isFixtureAdvertised(TRUNCATED_FIXTURE)) return;
141
142
  const d = await readDiscovery();
142
- if (d?.capabilities?.envelopes?.reliability?.completion?.distinguishesTruncation !== true) return;
143
+ if (capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability?.completion?.distinguishesTruncation !== true) return;
143
144
  const seed = await programMock([
144
145
  { stopReason: 'max_tokens', content: '{"partial' },
145
146
  { stopReason: 'end_turn', content: '{"valid":true}' },
@@ -35,6 +35,7 @@
35
35
 
36
36
  import { describe, it, expect } from 'vitest';
37
37
  import { driver } from '../lib/driver.js';
38
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
38
39
 
39
40
  const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
40
41
 
@@ -97,8 +98,8 @@ describe.skipIf(HTTP_SKIP)('envelope-reasoning-secret-redaction: advertisement s
97
98
  it('hosts advertising envelope reasoning + BYOK honor SR-1 carry-forward for the reasoning field', async () => {
98
99
  const d = await readDiscovery();
99
100
  if (d === null) return;
100
- const reasoning = d.capabilities?.envelopes?.reasoning?.supported;
101
- const secrets = d.capabilities?.secrets?.supported;
101
+ const reasoning = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reasoning?.supported;
102
+ const secrets = capabilityFamily<{ supported?: unknown }>(d, 'secrets')?.supported;
102
103
  if (reasoning !== true || secrets !== true) return; // soft-skip when either is absent
103
104
  // The contract is invariant-based, not capability-flag-based — the
104
105
  // advertisement-shape check here just confirms both surfaces are claimed.
@@ -257,7 +258,7 @@ describe.skipIf(HTTP_SKIP)('envelope-reasoning-secret-redaction: downstream-proj
257
258
  // RFC 0034 §B: gate on capabilities.observability.testSeams.otelScrape.
258
259
  // Hosts that don't advertise it soft-skip; hosts that DO advertise MUST serve a valid response.
259
260
  const d = await readDiscovery();
260
- const otelScrapeAdvertised = d?.capabilities?.observability?.testSeams?.otelScrape === true;
261
+ const otelScrapeAdvertised = capabilityFamily<{ testSeams?: Record<string, unknown> }>(d, 'observability')?.testSeams?.otelScrape === true;
261
262
  if (!otelScrapeAdvertised) return; // soft-skip — host honest about not implementing per RFC 0034 §A
262
263
 
263
264
  const r = await acceptForRun(
@@ -291,7 +292,7 @@ describe.skipIf(HTTP_SKIP)('envelope-reasoning-secret-redaction: downstream-proj
291
292
  it("debug-bundle export MUST NOT include plaintext `secret:`-prefixed substrings from envelope.reasoning", async () => {
292
293
  // RFC 0034 §B: gate on capabilities.observability.testSeams.debugBundleExport.
293
294
  const d = await readDiscovery();
294
- const debugBundleAdvertised = d?.capabilities?.observability?.testSeams?.debugBundleExport === true;
295
+ const debugBundleAdvertised = capabilityFamily<{ testSeams?: Record<string, unknown> }>(d, 'observability')?.testSeams?.debugBundleExport === true;
295
296
  if (!debugBundleAdvertised) return; // soft-skip — host honest about not implementing per RFC 0034 §A
296
297
 
297
298
  const r = await acceptForRun(
@@ -32,6 +32,7 @@ import { readFileSync } from 'node:fs';
32
32
  import { join } from 'node:path';
33
33
  import { driver } from '../lib/driver.js';
34
34
  import { SCHEMAS_DIR } from '../lib/paths.js';
35
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
35
36
 
36
37
  const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
37
38
 
@@ -163,7 +164,7 @@ describe.skipIf(HTTP_SKIP)('envelope-reasoning-shape: capabilities.envelopes adv
163
164
  it('capabilities.envelopes.reasoning (when present) conforms to RFC 0030 §C', async () => {
164
165
  const d = await readDiscovery();
165
166
  if (d === null) return;
166
- const reasoning = d.capabilities?.envelopes?.reasoning;
167
+ const reasoning = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reasoning;
167
168
  if (reasoning === undefined) return; // optional block; host MAY omit
168
169
  expect(
169
170
  typeof reasoning.supported,
@@ -180,7 +181,7 @@ describe.skipIf(HTTP_SKIP)('envelope-reasoning-shape: capabilities.envelopes adv
180
181
  it('capabilities.envelopes.tierOneSubsetCompliance (when present) conforms to RFC 0030 §B', async () => {
181
182
  const d = await readDiscovery();
182
183
  if (d === null) return;
183
- const compliance = d.capabilities?.envelopes?.tierOneSubsetCompliance;
184
+ const compliance = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.tierOneSubsetCompliance;
184
185
  if (compliance === undefined) return; // optional; host MAY omit
185
186
  expect(
186
187
  ['strict', 'warn', 'off'],
@@ -64,7 +64,7 @@ describe.skipIf(HTTP_SKIP)('envelope-refusal-shape: seam emission (RFC 0032 §B.
64
64
  it('accepts a well-formed `envelope.refusal` payload + writes it to the test event log', async () => {
65
65
  const d = await readDiscovery();
66
66
  if (d === null) return;
67
- const reliability = d.capabilities?.envelopes?.reliability;
67
+ const reliability = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability;
68
68
  if (!reliability || reliability.supported !== true) return;
69
69
  if (!Array.isArray(reliability.events) || !(reliability.events as unknown[]).includes('envelope.refusal')) return;
70
70
 
@@ -154,7 +154,7 @@ describe.skipIf(HTTP_SKIP)('envelope-refusal-shape: advertisement contract (RFC
154
154
  it('capabilities.envelopes.reliability (when supported: true with non-empty events[]) MUST list both MUST-tier events', async () => {
155
155
  const d = await readDiscovery();
156
156
  if (d === null) return;
157
- const reliability = d.capabilities?.envelopes?.reliability;
157
+ const reliability = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability;
158
158
  if (!reliability || reliability.supported !== true) return;
159
159
  // Hosts running the legacy undifferentiated retry loop advertise
160
160
  // `events: []` (per the OPENWOP_ENVELOPE_RELIABILITY_END_TO_END=false
@@ -190,6 +190,7 @@ describe.skipIf(HTTP_SKIP)('envelope-refusal-shape: advertisement contract (RFC
190
190
 
191
191
  import { pollUntilTerminal } from '../lib/polling.js';
192
192
  import { isFixtureAdvertised } from '../lib/fixtures.js';
193
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
193
194
 
194
195
  const E2E_FIXTURE = 'conformance-envelope-refusal';
195
196
  const E2E_NODE_ID = 'structured-call';
@@ -0,0 +1,95 @@
1
+ /**
2
+ * envelope-rendering-hint — RFC 0055 §B `meta.rendering` shape conformance.
3
+ *
4
+ * Server-free schema assertions that the optional rendering hint is exactly
5
+ * that — optional and additive:
6
+ * 1. An envelope WITH a well-formed `meta.rendering` validates.
7
+ * 2. An envelope WITHOUT `meta.rendering` still validates (proves the
8
+ * property is optional — existing envelopes are unaffected).
9
+ * 3. An unknown `display` value is rejected by the closed enum (the
10
+ * vocabulary is fixed; consumers fall back, producers don't invent).
11
+ * 4. An unknown property under `rendering` is rejected
12
+ * (additionalProperties:false on the hint object).
13
+ *
14
+ * Always runs (pure on-disk Ajv2020 validation).
15
+ *
16
+ * @see RFCS/0055-multimodal-envelope-variants-and-rendering-hints.md §B
17
+ * @see spec/v1/ai-envelope.md §"Rendering hints"
18
+ * @see schemas/ai-envelope.schema.json ($defs.EnvelopeMeta.rendering)
19
+ */
20
+
21
+ import { describe, it, expect } from 'vitest';
22
+ import Ajv2020 from 'ajv/dist/2020.js';
23
+ import addFormats from 'ajv-formats';
24
+ import { readFileSync } from 'node:fs';
25
+ import { join } from 'node:path';
26
+ import { SCHEMAS_DIR } from '../lib/paths.js';
27
+
28
+ function compileEnvelope(): ReturnType<Ajv2020['compile']> {
29
+ const ajv = new Ajv2020({ strict: false, allErrors: true });
30
+ addFormats(ajv);
31
+ const schema = JSON.parse(
32
+ readFileSync(join(SCHEMAS_DIR, 'ai-envelope.schema.json'), 'utf8'),
33
+ ) as Record<string, unknown>;
34
+ return ajv.compile(schema);
35
+ }
36
+
37
+ const baseEnvelope = {
38
+ type: 'error',
39
+ schemaVersion: 1,
40
+ envelopeId: 'env-rendering-1',
41
+ correlationId: 'run-1:node-2:turn-0:abc123',
42
+ payload: { code: 'x', message: 'y' },
43
+ meta: { source: 'ai-generation' as const, ts: '2026-05-25T10:00:00Z' },
44
+ };
45
+
46
+ describe('envelope-rendering-hint: meta.rendering shape (RFC 0055 §B)', () => {
47
+ const validate = compileEnvelope();
48
+
49
+ it('accepts an envelope carrying a well-formed meta.rendering hint', () => {
50
+ const env = {
51
+ ...baseEnvelope,
52
+ meta: {
53
+ ...baseEnvelope.meta,
54
+ rendering: { display: 'image', mimeType: 'image/png', alt: 'Q3 revenue chart', title: 'Revenue' },
55
+ },
56
+ };
57
+ const ok = validate(env);
58
+ expect(
59
+ ok,
60
+ 'ai-envelope.md §"Rendering hints": ' + `meta.rendering MUST validate; errors: ${JSON.stringify(validate.errors)}`,
61
+ ).toBe(true);
62
+ });
63
+
64
+ it('accepts an envelope with NO meta.rendering (proves the property is optional)', () => {
65
+ const ok = validate(baseEnvelope);
66
+ expect(
67
+ ok,
68
+ 'ai-envelope.md §"Rendering hints": ' + 'meta.rendering MUST be optional — envelopes omitting it still validate',
69
+ ).toBe(true);
70
+ });
71
+
72
+ it('rejects an unknown display value (closed enum)', () => {
73
+ const env = {
74
+ ...baseEnvelope,
75
+ meta: { ...baseEnvelope.meta, rendering: { display: 'hologram' } },
76
+ };
77
+ const ok = validate(env);
78
+ expect(
79
+ ok,
80
+ 'ai-envelope.md §"Rendering hints": ' + 'display is a closed enum — unknown families MUST be rejected',
81
+ ).toBe(false);
82
+ });
83
+
84
+ it('rejects an unknown property under rendering (additionalProperties:false)', () => {
85
+ const env = {
86
+ ...baseEnvelope,
87
+ meta: { ...baseEnvelope.meta, rendering: { display: 'markdown', wat: true } },
88
+ };
89
+ const ok = validate(env);
90
+ expect(
91
+ ok,
92
+ 'ai-envelope.md §"Rendering hints": ' + 'rendering is additionalProperties:false',
93
+ ).toBe(false);
94
+ });
95
+ });
@@ -59,7 +59,7 @@ describe.skipIf(HTTP_SKIP)('envelope-retry-attempted: advertisement shape (RFC 0
59
59
  it('capabilities.envelopes.reliability (when present) conforms to RFC 0032 §C', async () => {
60
60
  const d = await readDiscovery();
61
61
  if (d === null) return;
62
- const reliability = d.capabilities?.envelopes?.reliability;
62
+ const reliability = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability;
63
63
  if (reliability === undefined) return;
64
64
  expect(typeof reliability.supported, 'reliability.supported MUST be boolean').toBe('boolean');
65
65
  if (reliability.events !== undefined) {
@@ -114,6 +114,7 @@ describe.skipIf(HTTP_SKIP)('envelope-retry-attempted: advertisement shape (RFC 0
114
114
 
115
115
  import { pollUntilTerminal } from '../lib/polling.js';
116
116
  import { isFixtureAdvertised } from '../lib/fixtures.js';
117
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
117
118
 
118
119
  const FIXTURE = 'conformance-envelope-retry-attempted';
119
120
  const NODE_ID = 'structured-call';
@@ -34,6 +34,7 @@ import { readFileSync, existsSync } from 'node:fs';
34
34
  import { join } from 'node:path';
35
35
  import { driver } from '../lib/driver.js';
36
36
  import { SCHEMAS_DIR } from '../lib/paths.js';
37
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
37
38
 
38
39
  const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
39
40
 
@@ -179,9 +180,9 @@ describe.skipIf(HTTP_SKIP)('envelope-tier-one-subset-static (RFC 0030 §B)', ()
179
180
  it('hosts advertising tierOneSubsetCompliance: "strict" have payload schemas that satisfy the Tier-1 intersection', async () => {
180
181
  const d = await readDiscovery();
181
182
  if (d === null) return; // host unreachable; soft-skip
182
- const compliance = d.capabilities?.envelopes?.tierOneSubsetCompliance;
183
+ const compliance = capabilityFamily(d, 'envelopes')?.tierOneSubsetCompliance;
183
184
  if (compliance !== 'strict') return; // gated on "strict" only
184
- const advertised = (d.capabilities?.supportedEnvelopes ?? []) as string[];
185
+ const advertised = (capabilityFamily(d, 'supportedEnvelopes') ?? []) as string[];
185
186
  if (advertised.length === 0) return;
186
187
 
187
188
  const violationsByKind: Record<string, Violation[]> = {};
@@ -0,0 +1,137 @@
1
+ /**
2
+ * exec-class tools MUST NOT be protocol-tier (RFC 0069, `Draft`).
3
+ *
4
+ * Always-on, server-free structural assertion over the spec corpus. Verifies
5
+ * the SECURITY invariant `exec-must-not-be-protocol-tier`: the protocol
6
+ * defines NO arbitrary-command (`exec`-class) primitive under a
7
+ * protocol-owned namespace (`core.*` / `openwop.*`), NO exec capability
8
+ * flag in `capabilities.schema.json`, and NO exec-class entry in the
9
+ * canonical RunEventType vocabulary.
10
+ *
11
+ * This guards against an independent implementer reading the protocol's
12
+ * silence as permission to ship a `core.exec` RCE primitive other hosts
13
+ * would treat as canonical. The assertion is against the protocol's OWN
14
+ * surface — it must hold for every release of the corpus regardless of
15
+ * which host runs it. A `vendor.acme.exec` / `x-host-acme-exec` identifier
16
+ * is allowed (host-extension namespace); the check fires only on
17
+ * protocol-owned namespaces.
18
+ *
19
+ * Spec references:
20
+ * - https://github.com/openwop/openwop/blob/main/spec/v1/host-extensions.md §"exec-class tools"
21
+ * - https://github.com/openwop/openwop/blob/main/SECURITY/threat-model-prompt-injection.md §"exec tools"
22
+ * - https://github.com/openwop/openwop/blob/main/RFCS/0069-exec-class-tool-host-extension-safety-contract.md
23
+ */
24
+
25
+ import { describe, it, expect } from 'vitest';
26
+ import { readFileSync, readdirSync } from 'node:fs';
27
+ import { join } from 'node:path';
28
+ import { SCHEMAS_DIR } from '../lib/paths.js';
29
+
30
+ /** Server-free assertion-message helper (mirrors driver.describe's "spec — requirement" shape without requiring OPENWOP_BASE_URL). */
31
+ const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
32
+
33
+ /**
34
+ * Closed denylist of exec-class identifier *segments* (whole tokens). The
35
+ * check matches a protocol-owned namespaced id whose final segment IS one
36
+ * of these — it does NOT flag substrings like `execution` in
37
+ * `multi-agent-execution` or `subprocess` inside an unrelated word.
38
+ */
39
+ const EXEC_SEGMENTS = new Set([
40
+ 'exec',
41
+ 'shell',
42
+ 'spawn',
43
+ 'runcommand',
44
+ 'runscript',
45
+ 'subprocess',
46
+ 'systemcall',
47
+ 'eval',
48
+ ]);
49
+
50
+ /** Protocol-owned namespace prefixes per host-extensions.md §"Canonical prefixes". */
51
+ const PROTOCOL_PREFIXES = ['core.', 'openwop.'];
52
+
53
+ /** Pull every `"core.*"` / `"openwop.*"` quoted identifier out of a corpus file. */
54
+ function protocolOwnedIds(text: string): string[] {
55
+ const out: string[] = [];
56
+ const re = /["'`](core|openwop)\.[a-zA-Z0-9_.-]+["'`]/g;
57
+ let m: RegExpExecArray | null;
58
+ while ((m = re.exec(text)) !== null) {
59
+ out.push(m[0].slice(1, -1));
60
+ }
61
+ return out;
62
+ }
63
+
64
+ function isExecClass(id: string): boolean {
65
+ if (!PROTOCOL_PREFIXES.some((p) => id.startsWith(p))) return false;
66
+ const lastSegment = id.split('.').pop()?.toLowerCase().replace(/-/g, '') ?? '';
67
+ return EXEC_SEGMENTS.has(lastSegment);
68
+ }
69
+
70
+ describe('exec-not-protocol-tier: no exec-class primitive in the protocol corpus (RFC 0069, server-free)', () => {
71
+ it('no protocol-owned (core.* / openwop.*) identifier denotes arbitrary command execution', () => {
72
+ const schemaFiles = readdirSync(SCHEMAS_DIR).filter((f) => f.endsWith('.schema.json'));
73
+ const offenders: string[] = [];
74
+ for (const f of schemaFiles) {
75
+ const text = readFileSync(join(SCHEMAS_DIR, f), 'utf8');
76
+ for (const id of protocolOwnedIds(text)) {
77
+ if (isExecClass(id)) offenders.push(`${f}: ${id}`);
78
+ }
79
+ }
80
+ expect(
81
+ offenders,
82
+ why(
83
+ 'host-extensions.md §exec-class tools',
84
+ 'the protocol corpus MUST NOT define a core.*/openwop.* exec-class identifier',
85
+ ),
86
+ ).toEqual([]);
87
+ });
88
+
89
+ it('no capabilities.schema.json property name denotes arbitrary command execution', () => {
90
+ const caps = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'capabilities.schema.json'), 'utf8')) as Record<string, unknown>;
91
+ const offenders: string[] = [];
92
+ const walkProps = (node: unknown, path: string): void => {
93
+ if (!node || typeof node !== 'object') return;
94
+ const obj = node as Record<string, unknown>;
95
+ const props = obj.properties as Record<string, unknown> | undefined;
96
+ if (props) {
97
+ for (const key of Object.keys(props)) {
98
+ if (EXEC_SEGMENTS.has(key.toLowerCase().replace(/-/g, ''))) {
99
+ offenders.push(`${path}.${key}`);
100
+ }
101
+ walkProps(props[key], `${path}.${key}`);
102
+ }
103
+ }
104
+ };
105
+ walkProps(caps, 'capabilities');
106
+ expect(
107
+ offenders,
108
+ why('host-extensions.md §exec-class tools', 'capabilities.schema.json MUST NOT declare an exec-class capability flag'),
109
+ ).toEqual([]);
110
+ });
111
+
112
+ it('the canonical RunEventType vocabulary contains no exec-class event', () => {
113
+ const runEvent = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'run-event.schema.json'), 'utf8')) as {
114
+ $defs?: { RunEventType?: { enum?: string[] } };
115
+ };
116
+ const enumVals = runEvent.$defs?.RunEventType?.enum ?? [];
117
+ const offenders = enumVals.filter((v) => {
118
+ const lastSegment = v.split('.').pop()?.toLowerCase().replace(/-/g, '') ?? '';
119
+ return EXEC_SEGMENTS.has(lastSegment);
120
+ });
121
+ expect(
122
+ offenders,
123
+ why('host-extensions.md §exec-class tools', 'no RunEventType MUST denote arbitrary command execution'),
124
+ ).toEqual([]);
125
+ });
126
+
127
+ it('positive control: a vendor / x-host exec identifier is allowed (host-extension namespace)', () => {
128
+ expect(isExecClass('vendor.acme.exec')).toBe(false);
129
+ expect(isExecClass('x-host-acme-exec')).toBe(false);
130
+ expect(isExecClass('private.host.shell')).toBe(false);
131
+ // And the denylist actually fires on a protocol-owned id:
132
+ expect(isExecClass('core.exec')).toBe(true);
133
+ expect(isExecClass('openwop.shell')).toBe(true);
134
+ // Negative control: a benign substring is not flagged.
135
+ expect(isExecClass('core.workflowChain.event')).toBe(false);
136
+ });
137
+ });
@@ -29,6 +29,7 @@
29
29
  import { describe, it, expect } from 'vitest';
30
30
  import { driver } from '../lib/driver.js';
31
31
  import { experimentalGate } from '../lib/behavior-gate.js';
32
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
32
33
 
33
34
  const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
34
35
 
@@ -57,7 +58,7 @@ async function readDiscovery(): Promise<DiscoveryDoc | null> {
57
58
  describe.skipIf(HTTP_SKIP)('experimental-tier-shape: §A schema discipline (RFC 0042 §A)', () => {
58
59
  it('multiAgent.executionModel.tier (when present) MUST be one of {stable, experimental}', async (ctx) => {
59
60
  const d = await readDiscovery();
60
- const em = d?.capabilities?.multiAgent?.executionModel;
61
+ const em = capabilityFamily<{ executionModel?: { [k: string]: unknown; crossHostCausation?: Record<string, unknown>; replayDeterminism?: Record<string, unknown> } }>(d, 'multiAgent')?.executionModel;
61
62
  if (em === undefined) {
62
63
  ctx.skip();
63
64
  return;
@@ -77,7 +78,7 @@ describe.skipIf(HTTP_SKIP)('experimental-tier-shape: §A schema discipline (RFC
77
78
 
78
79
  it('when tier === "experimental", experimentalUntil MUST be present + valid date', async (ctx) => {
79
80
  const d = await readDiscovery();
80
- const em = d?.capabilities?.multiAgent?.executionModel;
81
+ const em = capabilityFamily<{ executionModel?: { [k: string]: unknown; crossHostCausation?: Record<string, unknown>; replayDeterminism?: Record<string, unknown> } }>(d, 'multiAgent')?.executionModel;
81
82
  if (em === undefined || em.tier !== 'experimental') {
82
83
  ctx.skip();
83
84
  return;
@@ -112,7 +113,7 @@ describe.skipIf(HTTP_SKIP)('experimental-tier-shape: §A schema discipline (RFC
112
113
 
113
114
  it('experimentalUntil MUST be ≤ 365 days in the future (sunset bound)', async (ctx) => {
114
115
  const d = await readDiscovery();
115
- const em = d?.capabilities?.multiAgent?.executionModel;
116
+ const em = capabilityFamily<{ executionModel?: { [k: string]: unknown; crossHostCausation?: Record<string, unknown>; replayDeterminism?: Record<string, unknown> } }>(d, 'multiAgent')?.executionModel;
116
117
  if (em === undefined || em.tier !== 'experimental') {
117
118
  ctx.skip();
118
119
  return;
@@ -135,7 +136,7 @@ describe.skipIf(HTTP_SKIP)('experimental-tier-shape: §A schema discipline (RFC
135
136
 
136
137
  it('sunset detection: experimentalUntil in the past is non-conformant', async (ctx) => {
137
138
  const d = await readDiscovery();
138
- const em = d?.capabilities?.multiAgent?.executionModel;
139
+ const em = capabilityFamily<{ executionModel?: { [k: string]: unknown; crossHostCausation?: Record<string, unknown>; replayDeterminism?: Record<string, unknown> } }>(d, 'multiAgent')?.executionModel;
139
140
  if (em === undefined || em.tier !== 'experimental') {
140
141
  ctx.skip();
141
142
  return;
@@ -0,0 +1,35 @@
1
+ /**
2
+ * feedback-capability-shape — RFC 0056 §A. The `capabilities.feedback`
3
+ * advertisement block is either absent or a well-formed object.
4
+ *
5
+ * Status: ACTIVE (advertisement-shape; always runs). Behavioral coverage
6
+ * lives in the sibling `feedback-*.test.ts` scenarios, gated on
7
+ * `capabilities.feedback.supported`.
8
+ *
9
+ * @see RFCS/0056-run-feedback-and-annotation-event.md §A
10
+ */
11
+
12
+ import { describe, it, expect } from 'vitest';
13
+ import { driver } from '../lib/driver.js';
14
+ import { readFeedbackCap } from '../lib/feedback.js';
15
+
16
+ describe('feedback-capability-shape: advertisement (RFC 0056 §A)', () => {
17
+ it('capabilities.feedback is absent or a well-formed object', async () => {
18
+ const cap = await readFeedbackCap();
19
+ if (cap === null) return; // not advertised — valid
20
+ expect(
21
+ typeof cap.supported,
22
+ driver.describe('capabilities.schema.json §feedback', 'capabilities.feedback.supported MUST be a boolean when present'),
23
+ ).toBe('boolean');
24
+ if (Array.isArray(cap.targets)) {
25
+ for (const t of cap.targets) {
26
+ expect(['run', 'event', 'node']).toContain(t);
27
+ }
28
+ }
29
+ if (Array.isArray(cap.signals)) {
30
+ for (const s of cap.signals) {
31
+ expect(['rating', 'correction', 'label', 'flag']).toContain(s);
32
+ }
33
+ }
34
+ });
35
+ });
@@ -0,0 +1,35 @@
1
+ /**
2
+ * feedback-correction-redaction — RFC 0056 §E + SECURITY/invariants.yaml
3
+ * `annotation-content-redaction`. `signal.correction` and `note` are
4
+ * untrusted user content; secret-shaped material MUST be redacted under
5
+ * SR-1 before persistence/listing/export.
6
+ *
7
+ * @see RFCS/0056-run-feedback-and-annotation-event.md §E
8
+ * @see SECURITY/invariants.yaml — annotation-content-redaction
9
+ */
10
+
11
+ import { describe, it, expect } from 'vitest';
12
+ import { driver } from '../lib/driver.js';
13
+ import { readFeedbackCap, seedRun } from '../lib/feedback.js';
14
+
15
+ const CANARY = 'sk-canary-rfc0056-do-not-leak-abc123';
16
+
17
+ describe('feedback-correction-redaction (RFC 0056 §E)', () => {
18
+ it('secret-shaped material in correction/note is redacted in the annotation list', async () => {
19
+ const cap = await readFeedbackCap();
20
+ if (cap?.supported !== true) return;
21
+ const runId = await seedRun('feedback-redact');
22
+ if (!runId) return;
23
+ const post = await driver.post(`/v1/runs/${runId}/annotations`, {
24
+ signal: { kind: 'correction', correction: `please use ${CANARY}` },
25
+ note: CANARY,
26
+ });
27
+ if (post.status === 501 || post.status === 404) return;
28
+ expect(post.status).toBe(201);
29
+ const list = await driver.get(`/v1/runs/${runId}/annotations`);
30
+ expect(
31
+ JSON.stringify(list.json ?? {}).includes(CANARY),
32
+ driver.describe('RFC 0056 §E', 'secret-shaped material MUST be redacted before persistence/listing (SR-1)'),
33
+ ).toBe(false);
34
+ });
35
+ });
@@ -0,0 +1,37 @@
1
+ /**
2
+ * feedback-cross-tenant-isolation — RFC 0056 §E + SECURITY/invariants.yaml
3
+ * `annotation-cross-tenant-isolation`. A run's annotation list MUST contain
4
+ * only that run's annotations (mirrors CTI-1).
5
+ *
6
+ * The run-scoped check runs against any feedback host. The full cross-tenant
7
+ * proof (tenant B cannot read tenant A's run) needs a multi-tenant auth seam
8
+ * not yet standardized for this surface — that half soft-skips, mirroring
9
+ * `kv-cross-tenant-isolation`'s seam gate.
10
+ *
11
+ * @see RFCS/0056-run-feedback-and-annotation-event.md §E
12
+ * @see SECURITY/invariants.yaml — annotation-cross-tenant-isolation
13
+ */
14
+
15
+ import { describe, it, expect } from 'vitest';
16
+ import { driver } from '../lib/driver.js';
17
+ import { readFeedbackCap, seedRun } from '../lib/feedback.js';
18
+
19
+ describe('feedback-cross-tenant-isolation (RFC 0056 §E)', () => {
20
+ it('a run\'s annotation list contains only that run\'s annotations', async () => {
21
+ const cap = await readFeedbackCap();
22
+ if (cap?.supported !== true) return;
23
+ const runId = await seedRun('feedback-cti');
24
+ if (!runId) return;
25
+ const post = await driver.post(`/v1/runs/${runId}/annotations`, { signal: { kind: 'label', label: 'cti-probe' } });
26
+ if (post.status === 501 || post.status === 404) return;
27
+ expect(post.status).toBe(201);
28
+ const list = await driver.get(`/v1/runs/${runId}/annotations`);
29
+ const ann = (list.json as { annotations?: Array<{ target?: { runId?: string } }> } | undefined)?.annotations ?? [];
30
+ for (const a of ann) {
31
+ expect(
32
+ a.target?.runId,
33
+ driver.describe('RFC 0056 §E', 'an annotation list MUST contain only this run\'s annotations (CTI-1)'),
34
+ ).toBe(runId);
35
+ }
36
+ });
37
+ });