@openwop/openwop-conformance 1.6.1 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/CHANGELOG.md +10 -0
  2. package/README.md +2 -2
  3. package/api/asyncapi.yaml +57 -0
  4. package/api/openapi.yaml +250 -0
  5. package/coverage.md +14 -0
  6. package/fixtures/conformance-run-duration-breach.json +33 -0
  7. package/fixtures.md +19 -0
  8. package/package.json +1 -1
  9. package/schemas/README.md +10 -0
  10. package/schemas/agent-inventory-response.schema.json +90 -0
  11. package/schemas/ai-envelope.schema.json +28 -0
  12. package/schemas/artifact-type-pack-manifest.schema.json +160 -0
  13. package/schemas/capabilities.schema.json +171 -4
  14. package/schemas/chat-card-pack-manifest.schema.json +158 -0
  15. package/schemas/envelopes/media.audio.schema.json +38 -0
  16. package/schemas/envelopes/media.file.schema.json +37 -0
  17. package/schemas/envelopes/media.image.schema.json +33 -0
  18. package/schemas/heartbeat-evaluated.schema.json +14 -0
  19. package/schemas/heartbeat-state-changed.schema.json +14 -0
  20. package/schemas/node-pack-manifest.schema.json +16 -1
  21. package/schemas/run-event-payloads.schema.json +96 -5
  22. package/schemas/run-event.schema.json +4 -0
  23. package/schemas/workflow-definition.schema.json +5 -0
  24. package/schemas/workspace-file-create.schema.json +20 -0
  25. package/schemas/workspace-file.schema.json +39 -0
  26. package/src/lib/agentLoop.ts +44 -0
  27. package/src/lib/agentRuntime.ts +45 -0
  28. package/src/lib/artifactTypes.ts +96 -0
  29. package/src/lib/cardPacks.ts +52 -0
  30. package/src/lib/discovery-capabilities.ts +50 -0
  31. package/src/lib/distillation.ts +38 -0
  32. package/src/lib/feedback.ts +3 -3
  33. package/src/lib/heartbeat.ts +31 -0
  34. package/src/lib/memoryAttribution.ts +48 -0
  35. package/src/lib/subRunAttestation.ts +35 -0
  36. package/src/lib/toolHooks.ts +33 -0
  37. package/src/scenarios/agent-loop-iteration-monotonic.test.ts +33 -0
  38. package/src/scenarios/agent-loop-stateful-resume.test.ts +28 -0
  39. package/src/scenarios/agent-loop-version5-shape.test.ts +41 -0
  40. package/src/scenarios/agent-loop-workspace-snapshot.test.ts +33 -0
  41. package/src/scenarios/agent-manifest-runtime.test.ts +85 -0
  42. package/src/scenarios/ai-envelope-shape.test.ts +14 -18
  43. package/src/scenarios/aiEnvelope.capBreached.test.ts +2 -1
  44. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +2 -1
  45. package/src/scenarios/aiEnvelope.universalKinds.test.ts +2 -1
  46. package/src/scenarios/approval-gate-flow.test.ts +4 -6
  47. package/src/scenarios/artifact-schema-compile-bounded.test.ts +126 -0
  48. package/src/scenarios/artifact-type-pack-install.test.ts +78 -0
  49. package/src/scenarios/artifact-type-pack-manifest-validation.test.ts +140 -0
  50. package/src/scenarios/artifact-type-store-without-render.test.ts +54 -0
  51. package/src/scenarios/audit-log-integrity.test.ts +3 -2
  52. package/src/scenarios/auth-api-key-rotation.test.ts +2 -1
  53. package/src/scenarios/auth-mtls.test.ts +2 -1
  54. package/src/scenarios/auth-oauth2-client-credentials.test.ts +2 -1
  55. package/src/scenarios/auth-oidc-user-bearer.test.ts +2 -1
  56. package/src/scenarios/auth-saml-profile.test.ts +2 -1
  57. package/src/scenarios/auth-scim-profile.test.ts +2 -1
  58. package/src/scenarios/authorization-fail-closed.test.ts +2 -1
  59. package/src/scenarios/authorization-roles-shape.test.ts +2 -1
  60. package/src/scenarios/byok-auth-modes.test.ts +141 -0
  61. package/src/scenarios/chat-card-pack-execution.test.ts +56 -0
  62. package/src/scenarios/chat-card-pack-manifest-validation.test.ts +128 -0
  63. package/src/scenarios/commitment-fired.test.ts +83 -0
  64. package/src/scenarios/credential-payload-redaction.test.ts +2 -1
  65. package/src/scenarios/credentials-capability-shape.test.ts +2 -1
  66. package/src/scenarios/cross-engine-append-ordering.test.ts +2 -1
  67. package/src/scenarios/cross-host-ancestry-endpoint.test.ts +3 -2
  68. package/src/scenarios/cross-host-causation-shape.test.ts +3 -2
  69. package/src/scenarios/deadletter-capability-shape.test.ts +2 -1
  70. package/src/scenarios/deadletter-retry-exhaustion.test.ts +2 -1
  71. package/src/scenarios/distillation-index-roundtrip.test.ts +35 -0
  72. package/src/scenarios/distillation-secret-carryforward.test.ts +35 -0
  73. package/src/scenarios/distillation-shape.test.ts +41 -0
  74. package/src/scenarios/distillation-stable-archive.test.ts +37 -0
  75. package/src/scenarios/distillation-token-budget.test.ts +45 -0
  76. package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +4 -3
  77. package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +5 -4
  78. package/src/scenarios/envelope-reasoning-shape.test.ts +3 -2
  79. package/src/scenarios/envelope-refusal-shape.test.ts +3 -2
  80. package/src/scenarios/envelope-rendering-hint.test.ts +95 -0
  81. package/src/scenarios/envelope-retry-attempted.test.ts +2 -1
  82. package/src/scenarios/envelope-tier-one-subset-static.test.ts +3 -2
  83. package/src/scenarios/exec-not-protocol-tier.test.ts +137 -0
  84. package/src/scenarios/experimental-tier-shape.test.ts +5 -4
  85. package/src/scenarios/fs-path-traversal.test.ts +2 -1
  86. package/src/scenarios/heartbeat-capability-shape.test.ts +35 -0
  87. package/src/scenarios/heartbeat-fires-once-per-tick.test.ts +28 -0
  88. package/src/scenarios/heartbeat-idempotent-no-spam.test.ts +43 -0
  89. package/src/scenarios/heartbeat-runtime-bound.test.ts +30 -0
  90. package/src/scenarios/http-client-ssrf.test.ts +10 -13
  91. package/src/scenarios/mcp-toolcall-redaction.test.ts +3 -2
  92. package/src/scenarios/media-url-inline-cap.test.ts +167 -0
  93. package/src/scenarios/memory-attribution-emits-on-write.test.ts +54 -0
  94. package/src/scenarios/memory-attribution-no-content.test.ts +45 -0
  95. package/src/scenarios/memory-attribution-replay-stable.test.ts +60 -0
  96. package/src/scenarios/memory-attribution-shape.test.ts +28 -0
  97. package/src/scenarios/memory-attribution-tenant-scoped.test.ts +44 -0
  98. package/src/scenarios/memory-compaction-event-emitted.test.ts +2 -1
  99. package/src/scenarios/memory-compaction-provenance-tag.test.ts +2 -1
  100. package/src/scenarios/memory-compaction-sr1-carry-forward.test.ts +2 -1
  101. package/src/scenarios/memory-consolidation-idempotent.test.ts +77 -0
  102. package/src/scenarios/memory-consolidation-shape.test.ts +90 -0
  103. package/src/scenarios/model-capability-substituted.test.ts +2 -1
  104. package/src/scenarios/multi-agent-confidence-escalation.test.ts +5 -4
  105. package/src/scenarios/multi-agent-handoff-state-machine.test.ts +6 -5
  106. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +4 -3
  107. package/src/scenarios/multi-region-idempotency.test.ts +10 -10
  108. package/src/scenarios/oauth-capability-shape.test.ts +2 -1
  109. package/src/scenarios/oauth-connector-redaction.test.ts +2 -1
  110. package/src/scenarios/pause-resume.test.ts +3 -3
  111. package/src/scenarios/production-backpressure.test.ts +2 -2
  112. package/src/scenarios/production-retention-expiry.test.ts +2 -2
  113. package/src/scenarios/prompt-all-four-kinds-events.test.ts +2 -1
  114. package/src/scenarios/prompt-composed-secret-redaction.test.ts +2 -1
  115. package/src/scenarios/prompt-composed-trust-marker.test.ts +2 -1
  116. package/src/scenarios/prompt-end-to-end-events.test.ts +2 -1
  117. package/src/scenarios/prompt-list-and-fetch.test.ts +2 -1
  118. package/src/scenarios/prompt-mutable-lifecycle.test.ts +2 -1
  119. package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +2 -1
  120. package/src/scenarios/prompt-pack-install.test.ts +2 -1
  121. package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +2 -1
  122. package/src/scenarios/prompt-render-deterministic.test.ts +2 -1
  123. package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +2 -1
  124. package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +2 -1
  125. package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +2 -1
  126. package/src/scenarios/prompt-template-shape.test.ts +2 -1
  127. package/src/scenarios/provider-usage.test.ts +2 -1
  128. package/src/scenarios/replay-divergence-at-refusal.test.ts +4 -3
  129. package/src/scenarios/replay-fork-arbitrary.test.ts +3 -1
  130. package/src/scenarios/replay-llm-cache-key-portable.test.ts +2 -1
  131. package/src/scenarios/replayDeterminism.test.ts +3 -1
  132. package/src/scenarios/run-execution-bounds-shape.test.ts +133 -0
  133. package/src/scenarios/sandbox-memory-cap.test.ts +2 -1
  134. package/src/scenarios/sandbox-mvp-behavior.test.ts +2 -1
  135. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +2 -1
  136. package/src/scenarios/sandbox-timeout-cap.test.ts +2 -1
  137. package/src/scenarios/scheduling-capability-shape.test.ts +2 -1
  138. package/src/scenarios/scheduling-cron-fires-once.test.ts +2 -1
  139. package/src/scenarios/secret-leakage-otel-attribute.test.ts +7 -6
  140. package/src/scenarios/spec-corpus-validity.test.ts +1 -1
  141. package/src/scenarios/subrun-approval-fail-closed.test.ts +33 -0
  142. package/src/scenarios/subrun-approval-gate.test.ts +35 -0
  143. package/src/scenarios/subrun-attestation-shape.test.ts +30 -0
  144. package/src/scenarios/subrun-checksum-stable.test.ts +43 -0
  145. package/src/scenarios/tool-hooks-authorization-fail-closed.test.ts +39 -0
  146. package/src/scenarios/tool-hooks-content-free.test.ts +40 -0
  147. package/src/scenarios/tool-hooks-rate-limit.test.ts +32 -0
  148. package/src/scenarios/tool-hooks-secret-redaction.test.ts +34 -0
  149. package/src/scenarios/tool-hooks-shape.test.ts +34 -0
  150. package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +3 -10
  151. package/src/scenarios/wasm-pack-invoke-completed.test.ts +2 -2
  152. package/src/scenarios/wasm-pack-invoke-suspended.test.ts +2 -2
  153. package/src/scenarios/wasm-pack-load.test.ts +2 -2
  154. package/src/scenarios/wasm-pack-memory-cap.test.ts +3 -6
  155. package/src/scenarios/wasm-pack-replay-determinism.test.ts +2 -2
  156. package/src/scenarios/workflow-primary-output-annotation.test.ts +142 -0
  157. package/src/scenarios/workspace-behavior.test.ts +134 -0
  158. package/src/scenarios/workspace-capability-shape.test.ts +73 -0
  159. package/src/scenarios/workspace-cross-tenant-isolation.test.ts +84 -0
@@ -0,0 +1,133 @@
1
+ /**
2
+ * run-execution-bounds-shape — RFC 0058 advertisement-shape + breach-contract
3
+ * verification for the two run-scoped execution bounds.
4
+ *
5
+ * Status: ACTIVE. RFC 0058 (run execution bounds) is `Active`. The
6
+ * `capabilities.limits.{maxRunDurationMs,maxLoopIterations}` fields and the
7
+ * `run-duration` / `loop-iterations` kinds on `cap.breached` have landed in
8
+ * `schemas/capabilities.schema.json` + `schemas/run-event-payloads.schema.json`.
9
+ *
10
+ * Always runs (shape-only): when the host advertises either limit, its value
11
+ * MUST be well-formed. Behavior is capability- AND fixture-gated. The
12
+ * `run-duration` (wall-clock timeout) block is now enforced + green against the
13
+ * in-memory reference host. The `loop-iterations` block stays soft-skipped until
14
+ * an execution-loop host advertises `multiAgent.executionModel` (RFC 0061),
15
+ * mirroring the RFC 0052 scheduling pattern.
16
+ *
17
+ * What this scenario asserts:
18
+ * 1. `capabilities.limits.maxRunDurationMs`, when present, is an integer ≥ 1000.
19
+ * 2. `capabilities.limits.maxLoopIterations`, when present, is an integer ≥ 1.
20
+ * 3. (gated) A run with `configurable.runTimeoutMs` below its real duration
21
+ * reaches terminal `failed` with `error.code = "run_timeout"` and emits
22
+ * `cap.breached { kind: "run-duration" }` whose `observed > limit`.
23
+ *
24
+ * @see RFCS/0058-run-execution-bounds.md
25
+ * @see spec/v1/run-options.md §Reserved keys (runTimeoutMs / maxLoopIterations)
26
+ * @see spec/v1/capabilities.md §"Engine-enforced limits and the cap.breached event"
27
+ * @see schemas/run-event-payloads.schema.json §capBreached
28
+ */
29
+
30
+ import { describe, it, expect } from 'vitest';
31
+ import { driver } from '../lib/driver.js';
32
+ import { pollUntilTerminal } from '../lib/polling.js';
33
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
34
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
35
+
36
+ interface DiscoveryLimits {
37
+ maxRunDurationMs?: number;
38
+ maxLoopIterations?: number;
39
+ }
40
+
41
+ interface DiscoveryDoc {
42
+ capabilities?: { limits?: DiscoveryLimits };
43
+ }
44
+
45
+ interface RunEvent {
46
+ readonly type: string;
47
+ readonly sequence: number;
48
+ readonly payload?: unknown;
49
+ }
50
+
51
+ const TIMEOUT_FIXTURE = 'conformance-run-duration-breach';
52
+
53
+ async function readLimits(): Promise<DiscoveryLimits | null> {
54
+ const res = await driver.get('/.well-known/openwop');
55
+ const body = res.json as DiscoveryDoc | undefined;
56
+ return capabilityFamily(body, 'limits') ?? null;
57
+ }
58
+
59
+ describe('run-execution-bounds-shape: advertisement shape (RFC 0058)', () => {
60
+ it('maxRunDurationMs is an integer >= 1000 when present', async () => {
61
+ const limits = await readLimits();
62
+ if (limits?.maxRunDurationMs === undefined) return; // not advertised
63
+ expect(
64
+ Number.isInteger(limits.maxRunDurationMs) && limits.maxRunDurationMs >= 1000,
65
+ driver.describe(
66
+ 'capabilities.schema.json §limits.maxRunDurationMs',
67
+ `capabilities.limits.maxRunDurationMs MUST be an integer >= 1000, got: ${limits.maxRunDurationMs}`,
68
+ ),
69
+ ).toBe(true);
70
+ });
71
+
72
+ it('maxLoopIterations is an integer >= 1 when present', async () => {
73
+ const limits = await readLimits();
74
+ if (limits?.maxLoopIterations === undefined) return; // not advertised
75
+ expect(
76
+ Number.isInteger(limits.maxLoopIterations) && limits.maxLoopIterations >= 1,
77
+ driver.describe(
78
+ 'capabilities.schema.json §limits.maxLoopIterations',
79
+ `capabilities.limits.maxLoopIterations MUST be an integer >= 1, got: ${limits.maxLoopIterations}`,
80
+ ),
81
+ ).toBe(true);
82
+ });
83
+ });
84
+
85
+ // Behavior: capability- AND fixture-gated. Skips on hosts that do not enforce
86
+ // run-duration timeouts (incl. the reference hosts) until one wires the seam.
87
+ const SKIP_TIMEOUT = !isFixtureAdvertised(TIMEOUT_FIXTURE);
88
+
89
+ describe.skipIf(SKIP_TIMEOUT)('run-execution-bounds: run-duration breach (RFC 0058)', () => {
90
+ it('a run with runTimeoutMs below its real duration fails with run_timeout + cap.breached{run-duration}', async () => {
91
+ const create = await driver.post('/v1/runs', {
92
+ workflowId: TIMEOUT_FIXTURE,
93
+ configurable: { runTimeoutMs: 1000 },
94
+ });
95
+ expect(create.status, driver.describe(
96
+ 'rest-endpoints.md POST /v1/runs',
97
+ 'run creation MUST accept a runTimeoutMs override',
98
+ )).toBe(201);
99
+ const runId = (create.json as { runId: string }).runId;
100
+
101
+ const terminal = await pollUntilTerminal(runId);
102
+ expect(terminal.status, driver.describe(
103
+ 'run-options.md §runTimeoutMs',
104
+ 'a run exceeding its runTimeoutMs MUST reach terminal `failed`',
105
+ )).toBe('failed');
106
+ expect(terminal.error?.code, driver.describe(
107
+ 'rest-endpoints.md §run_timeout',
108
+ 'RunSnapshot.error.code MUST equal "run_timeout" on wall-clock timeout',
109
+ )).toBe('run_timeout');
110
+
111
+ const eventsRes = await driver.get(
112
+ `/v1/runs/${encodeURIComponent(runId)}/events/poll?lastSequence=0&timeout=1`,
113
+ );
114
+ const events = (eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? [];
115
+ const breach = events.find((e) => e.type === 'cap.breached');
116
+ expect(breach, driver.describe(
117
+ 'capabilities.md §Engine-enforced limits',
118
+ 'a cap.breached event MUST be emitted on run-duration breach',
119
+ )).toBeDefined();
120
+ const payload = breach!.payload as { kind?: string; limit?: number; observed?: number } | undefined;
121
+ expect(payload?.kind, driver.describe(
122
+ 'run-event-payloads.schema.json §capBreached.kind',
123
+ 'cap.breached payload MUST carry kind="run-duration"',
124
+ )).toBe('run-duration');
125
+ expect(
126
+ typeof payload?.observed === 'number' && typeof payload?.limit === 'number' && payload!.observed > payload!.limit,
127
+ driver.describe(
128
+ 'run-event-payloads.schema.json §capBreached.observed',
129
+ 'observed (elapsedMs) MUST be strictly greater than limit (resolved timeout)',
130
+ ),
131
+ ).toBe(true);
132
+ });
133
+ });
@@ -15,6 +15,7 @@
15
15
 
16
16
  import { describe, it, expect } from 'vitest';
17
17
  import { driver } from '../lib/driver.js';
18
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
18
19
 
19
20
  const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
20
21
 
@@ -26,7 +27,7 @@ async function readSandbox(): Promise<{ supported: boolean; memoryLimitBytes?: n
26
27
  try {
27
28
  const r = await driver.get('/.well-known/openwop');
28
29
  if (r.status !== 200) return null;
29
- const sb = (r.json as D).capabilities?.sandbox;
30
+ const sb = capabilityFamily((r.json as D), 'sandbox');
30
31
  if (!sb || sb.supported !== true) return null;
31
32
  return {
32
33
  supported: true,
@@ -37,6 +37,7 @@
37
37
 
38
38
  import { describe, it, expect } from 'vitest';
39
39
  import { driver } from '../lib/driver.js';
40
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
40
41
 
41
42
  const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
42
43
 
@@ -66,7 +67,7 @@ async function isSandboxAdvertised(): Promise<boolean> {
66
67
  try {
67
68
  const res = await driver.get('/.well-known/openwop');
68
69
  if (res.status !== 200) return false;
69
- return (res.json as DiscoveryDoc).capabilities?.sandbox?.supported === true;
70
+ return capabilityFamily((res.json as DiscoveryDoc), 'sandbox')?.supported === true;
70
71
  } catch {
71
72
  return false;
72
73
  }
@@ -26,6 +26,7 @@
26
26
 
27
27
  import { describe, it, expect } from 'vitest';
28
28
  import { driver } from '../lib/driver.js';
29
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
29
30
 
30
31
  const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
31
32
 
@@ -45,7 +46,7 @@ async function readSandboxCaps(): Promise<SandboxCaps | null> {
45
46
  try {
46
47
  const res = await driver.get('/.well-known/openwop');
47
48
  if (res.status !== 200) return null;
48
- return (res.json as DiscoveryDoc).capabilities?.sandbox ?? null;
49
+ return capabilityFamily((res.json as DiscoveryDoc), 'sandbox') ?? null;
49
50
  } catch {
50
51
  return null;
51
52
  }
@@ -15,6 +15,7 @@
15
15
 
16
16
  import { describe, it, expect } from 'vitest';
17
17
  import { driver } from '../lib/driver.js';
18
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
18
19
 
19
20
  const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
20
21
 
@@ -26,7 +27,7 @@ async function readSandbox(): Promise<{ supported: boolean; wallClockLimitMs?: n
26
27
  try {
27
28
  const r = await driver.get('/.well-known/openwop');
28
29
  if (r.status !== 200) return null;
29
- const sb = (r.json as D).capabilities?.sandbox;
30
+ const sb = capabilityFamily((r.json as D), 'sandbox');
30
31
  if (!sb || sb.supported !== true) return null;
31
32
  return {
32
33
  supported: true,
@@ -20,6 +20,7 @@
20
20
 
21
21
  import { describe, it, expect } from 'vitest';
22
22
  import { driver } from '../lib/driver.js';
23
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
23
24
 
24
25
  interface DiscoveryScheduling {
25
26
  supported?: boolean;
@@ -39,7 +40,7 @@ const ISO_DURATION = /^P(?:\d+Y)?(?:\d+M)?(?:\d+W)?(?:\d+D)?(?:T(?:\d+H)?(?:\d+M
39
40
  async function readScheduling(): Promise<DiscoveryScheduling | null> {
40
41
  const res = await driver.get('/.well-known/openwop');
41
42
  const body = res.json as DiscoveryDoc | undefined;
42
- return body?.capabilities?.scheduling ?? null;
43
+ return capabilityFamily(body, 'scheduling') ?? null;
43
44
  }
44
45
 
45
46
  describe('scheduling-capability-shape: advertisement shape (RFC 0052 §A)', () => {
@@ -26,6 +26,7 @@
26
26
 
27
27
  import { describe, it, expect } from 'vitest';
28
28
  import { driver } from '../lib/driver.js';
29
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
29
30
 
30
31
  interface DiscoveryDoc {
31
32
  capabilities?: { scheduling?: { supported?: boolean; cron?: boolean } };
@@ -33,7 +34,7 @@ interface DiscoveryDoc {
33
34
 
34
35
  async function readScheduling(): Promise<{ supported?: boolean; cron?: boolean } | null> {
35
36
  const res = await driver.get('/.well-known/openwop');
36
- return (res.json as DiscoveryDoc | undefined)?.capabilities?.scheduling ?? null;
37
+ return capabilityFamily((res.json as DiscoveryDoc | undefined), 'scheduling') ?? null;
37
38
  }
38
39
 
39
40
  describe('scheduling-cron-fires-once: once-per-tick + missed-tick (RFC 0052 §B)', () => {
@@ -55,6 +55,7 @@ import { describe, it, expect } from 'vitest';
55
55
  import { driver } from '../lib/driver.js';
56
56
  import { pollUntilTerminal } from '../lib/polling.js';
57
57
  import { isFixtureAdvertised } from '../lib/fixtures.js';
58
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
58
59
 
59
60
  const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
60
61
  const BYOK_WORKFLOW_ID = 'openwop-smoke-byok-roundtrip';
@@ -99,8 +100,8 @@ describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
99
100
  return;
100
101
  }
101
102
  const d = await readDiscovery();
102
- const secretsOk = d?.capabilities?.secrets?.supported === true;
103
- const seamOk = d?.capabilities?.observability?.testSeams?.otelScrape === true;
103
+ const secretsOk = capabilityFamily<{ supported?: unknown }>(d, 'secrets')?.supported === true;
104
+ const seamOk = capabilityFamily<{ testSeams?: Record<string, unknown> }>(d, 'observability')?.testSeams?.otelScrape === true;
104
105
  if (!secretsOk || !seamOk) {
105
106
  ctx.skip();
106
107
  return;
@@ -168,8 +169,8 @@ describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
168
169
  return;
169
170
  }
170
171
  const d = await readDiscovery();
171
- const secretsOk = d?.capabilities?.secrets?.supported === true;
172
- const seamOk = d?.capabilities?.observability?.testSeams?.debugBundleExport === true;
172
+ const secretsOk = capabilityFamily<{ supported?: unknown }>(d, 'secrets')?.supported === true;
173
+ const seamOk = capabilityFamily<{ testSeams?: Record<string, unknown> }>(d, 'observability')?.testSeams?.debugBundleExport === true;
173
174
  if (!secretsOk || !seamOk) {
174
175
  ctx.skip();
175
176
  return;
@@ -209,11 +210,11 @@ describe.skipIf(HTTP_SKIP || FIXTURE_SKIP)(
209
210
  () => {
210
211
  it('when secrets.supported is true, observability.testSeams advertisements MUST be boolean if present', async (ctx) => {
211
212
  const d = await readDiscovery();
212
- if (d?.capabilities?.secrets?.supported !== true) {
213
+ if (capabilityFamily<{ supported?: unknown }>(d, 'secrets')?.supported !== true) {
213
214
  ctx.skip();
214
215
  return;
215
216
  }
216
- const seams = d?.capabilities?.observability?.testSeams;
217
+ const seams = capabilityFamily<{ testSeams?: Record<string, unknown> }>(d, 'observability')?.testSeams;
217
218
  if (seams === undefined) {
218
219
  ctx.skip(); // host honest about not exposing the seams — Drift #17 path
219
220
  return;
@@ -1019,7 +1019,7 @@ describe('spec-corpus: AsyncAPI 3.1 spec is structurally valid', () => {
1019
1019
  // `run.annotated` (RFC 0056) is a live SSE notification carrying an
1020
1020
  // Annotation — NOT a RunEventDoc and deliberately NOT in the RunEventType
1021
1021
  // enum (annotations are a side-resource, excluded from fork/replay).
1022
- const syntheticMessageNames = new Set(['state.snapshot', 'ai.message.chunk', 'any', 'run.annotated']);
1022
+ const syntheticMessageNames = new Set(['state.snapshot', 'ai.message.chunk', 'any', 'run.annotated', 'heartbeat.evaluated', 'heartbeat.stateChanged']);
1023
1023
 
1024
1024
  expect(messageNames.length, 'AsyncAPI MUST declare named SSE messages').toBeGreaterThan(0);
1025
1025
 
@@ -0,0 +1,33 @@
1
+ /**
2
+ * subrun-approval-fail-closed — RFC 0063 §C. A parent that terminates or whose
3
+ * approval interrupt expires WITHOUT an `accept`/`edit-accept` MUST NOT merge the
4
+ * child outputs. Absence of an approval is denial — backs the proposed
5
+ * protocol-tier SECURITY invariant `subrun-merge-approval-fail-closed` (lands
6
+ * with this test promoted to load-bearing at reference-host implementation).
7
+ *
8
+ * Gated on `capabilities.agents.subRunAttestation` + the host sub-run attestation
9
+ * seam; soft-skips when either is absent.
10
+ *
11
+ * @see RFCS/0063-subrun-output-attestation-and-merge-gating.md §C
12
+ * @see SECURITY/invariants.yaml — subrun-merge-approval-fail-closed (lands at impl)
13
+ */
14
+
15
+ import { describe, it, expect } from 'vitest';
16
+ import { driver } from '../lib/driver.js';
17
+ import { readSubRunAttestationCap, invokeSubRunAttest } from '../lib/subRunAttestation.js';
18
+
19
+ describe('subrun-approval-fail-closed (RFC 0063 §C)', () => {
20
+ it('no accept/edit-accept (terminated or expired) MUST NOT merge', async () => {
21
+ if ((await readSubRunAttestationCap()) !== true) return;
22
+ // approvalAction omitted models a run that terminated without a response.
23
+ const res = await invokeSubRunAttest({
24
+ childOutputs: { artifact: 'unverified' },
25
+ outputAttestation: { requireApproval: true },
26
+ });
27
+ if (res === null) return; // seam absent — soft-skip
28
+ expect(
29
+ res.merged,
30
+ driver.describe('RFC 0063 §C', 'an unresolved approval MUST fail closed — outputs MUST NOT be merged'),
31
+ ).toBe(false);
32
+ });
33
+ });
@@ -0,0 +1,35 @@
1
+ /**
2
+ * subrun-approval-gate — RFC 0063 §C. When `requireApproval: true`, the host
3
+ * suspends before merge; `accept` merges the child outputs, `reject` does not.
4
+ *
5
+ * Gated on `capabilities.agents.subRunAttestation` + the host sub-run attestation
6
+ * seam; soft-skips when either is absent.
7
+ *
8
+ * @see RFCS/0063-subrun-output-attestation-and-merge-gating.md §C
9
+ * @see spec/v1/interrupt.md — `approval` kind + resume actions (RFC 0051, reused)
10
+ */
11
+
12
+ import { describe, it, expect } from 'vitest';
13
+ import { driver } from '../lib/driver.js';
14
+ import { readSubRunAttestationCap, invokeSubRunAttest } from '../lib/subRunAttestation.js';
15
+
16
+ describe('subrun-approval-gate (RFC 0063 §C)', () => {
17
+ it('accept merges the child outputs; reject does not', async () => {
18
+ if ((await readSubRunAttestationCap()) !== true) return;
19
+ const base = { childOutputs: { artifact: 'x' }, outputAttestation: { requireApproval: true } };
20
+
21
+ const accepted = await invokeSubRunAttest({ ...base, approvalAction: 'accept' });
22
+ if (accepted === null) return; // seam absent — soft-skip
23
+ expect(
24
+ accepted.merged,
25
+ driver.describe('RFC 0063 §C', 'an `accept` approval MUST merge the child outputs'),
26
+ ).toBe(true);
27
+
28
+ const rejected = await invokeSubRunAttest({ ...base, approvalAction: 'reject' });
29
+ if (rejected === null) return;
30
+ expect(
31
+ rejected.merged,
32
+ driver.describe('RFC 0063 §C', 'a `reject` approval MUST NOT merge the child outputs'),
33
+ ).toBe(false);
34
+ });
35
+ });
@@ -0,0 +1,30 @@
1
+ /**
2
+ * subrun-attestation-shape — RFC 0063 §A. The `capabilities.agents.subRunAttestation`
3
+ * advertisement flag is either absent or a boolean.
4
+ *
5
+ * Status: ACTIVE (advertisement-shape; always runs). Behavioral coverage lives
6
+ * in the sibling subrun-*.test.ts scenarios, gated on the flag + the host
7
+ * sub-run attestation seam.
8
+ *
9
+ * @see RFCS/0063-subrun-output-attestation-and-merge-gating.md §A
10
+ * @see spec/v1/node-packs.md §"`outputAttestation` — verify-before-merge"
11
+ */
12
+
13
+ import { describe, it, expect } from 'vitest';
14
+ import { driver } from '../lib/driver.js';
15
+ import { readSubRunAttestationCap } from '../lib/subRunAttestation.js';
16
+
17
+ describe('subrun-attestation-shape: advertisement (RFC 0063 §A)', () => {
18
+ it('capabilities.agents.subRunAttestation is absent or a boolean', async () => {
19
+ const cap = await readSubRunAttestationCap();
20
+ // null = unadvertised (no agents block OR flag omitted) — valid.
21
+ if (cap === null) return;
22
+ expect(
23
+ typeof cap,
24
+ driver.describe(
25
+ 'capabilities.schema.json §agents.subRunAttestation',
26
+ 'agents.subRunAttestation MUST be a boolean when present',
27
+ ),
28
+ ).toBe('boolean');
29
+ });
30
+ });
@@ -0,0 +1,43 @@
1
+ /**
2
+ * subrun-checksum-stable — RFC 0063 §B. A child's output checksum is byte-stable
3
+ * for identical outputs and host-independent (the RFC 8785 JCS + SHA-256 recipe
4
+ * pinned in replay.md), and is surfaced as the `attestation` object on the
5
+ * existing `core.workflowChain.event { phase: 'output.harvested' }`.
6
+ *
7
+ * Gated on `capabilities.agents.subRunAttestation` + the host sub-run attestation
8
+ * seam; soft-skips when either is absent.
9
+ *
10
+ * @see RFCS/0063-subrun-output-attestation-and-merge-gating.md §B
11
+ */
12
+
13
+ import { describe, it, expect } from 'vitest';
14
+ import { driver } from '../lib/driver.js';
15
+ import { readSubRunAttestationCap, invokeSubRunAttest } from '../lib/subRunAttestation.js';
16
+
17
+ describe('subrun-checksum-stable (RFC 0063 §B)', () => {
18
+ it('identical child outputs produce an identical sha256 attestation checksum', async () => {
19
+ if ((await readSubRunAttestationCap()) !== true) return;
20
+ const childOutputs = { report: 'done', score: 0.9, tags: ['a', 'b'] };
21
+ const a = await invokeSubRunAttest({ childOutputs, outputAttestation: { checksum: true } });
22
+ if (a === null) return; // seam absent — soft-skip
23
+ // Key-reordered but value-identical: JCS canonicalization MUST yield the same hash.
24
+ const b = await invokeSubRunAttest({
25
+ childOutputs: { tags: ['a', 'b'], score: 0.9, report: 'done' },
26
+ outputAttestation: { checksum: true },
27
+ });
28
+ if (b === null) return;
29
+ const att = a.attestation ?? {};
30
+ expect(
31
+ typeof att.checksum === 'string' && (att.checksum as string).length > 0,
32
+ driver.describe('RFC 0063 §B', 'output.harvested MUST carry a non-empty attestation.checksum when checksum:true'),
33
+ ).toBe(true);
34
+ expect(
35
+ att.algorithm,
36
+ driver.describe('RFC 0063 §B', 'attestation.algorithm MUST be "sha256" (the v1 recipe)'),
37
+ ).toBe('sha256');
38
+ expect(
39
+ (b.attestation ?? {}).checksum,
40
+ driver.describe('RFC 0063 §B', 'JCS canonicalization MUST make the checksum invariant to key order — same content, same hash'),
41
+ ).toBe(att.checksum);
42
+ });
43
+ });
@@ -0,0 +1,39 @@
1
+ /**
2
+ * tool-hooks-authorization-fail-closed — RFC 0064 §C. A principal lacking a
3
+ * tool's required scope (or whose authorization cannot be evaluated) gets
4
+ * `agent.toolReturned { status: 'forbidden' }` and the tool is never invoked —
5
+ * the per-tool application of RFC 0049's `authorization-fail-closed` invariant.
6
+ *
7
+ * Gated on `capabilities.toolHooks.perToolAuthorization` + the host tool-hooks
8
+ * seam; soft-skips when either is absent.
9
+ *
10
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §C
11
+ * @see SECURITY/invariants.yaml — authorization-fail-closed (RFC 0049, reused)
12
+ */
13
+
14
+ import { describe, it, expect } from 'vitest';
15
+ import { driver } from '../lib/driver.js';
16
+ import { readToolHooksCap, invokeToolHook } from '../lib/toolHooks.js';
17
+
18
+ describe('tool-hooks-authorization-fail-closed (RFC 0064 §C)', () => {
19
+ it('a principal lacking a tool scope is denied and the tool is not invoked', async () => {
20
+ const cap = await readToolHooksCap();
21
+ if (cap?.perToolAuthorization !== true) return;
22
+ // A principal with no scopes against a tool requiring one MUST be denied.
23
+ const res = await invokeToolHook({
24
+ principal: 'conformance-unprivileged',
25
+ toolName: 'db.delete',
26
+ requiredScopes: ['db:write'],
27
+ args: {},
28
+ });
29
+ if (res === null) return; // seam absent — soft-skip
30
+ expect(
31
+ (res.toolReturned ?? {}).status,
32
+ driver.describe('RFC 0064 §C', 'a missing/unevaluable tool scope MUST fail closed → status:"forbidden"'),
33
+ ).toBe('forbidden');
34
+ expect(
35
+ (res.toolReturned ?? {}).durationMs,
36
+ driver.describe('RFC 0064 §C', 'a forbidden call never starts, so durationMs MUST be absent'),
37
+ ).toBeUndefined();
38
+ });
39
+ });
@@ -0,0 +1,40 @@
1
+ /**
2
+ * tool-hooks-content-free — RFC 0064 §B. When `prePostEvents`, a tool call's
3
+ * `agent.toolCalled` carries `argsHash` (the content-free, SIEM-safe
4
+ * alternative to raw `inputs`) + `agent.toolReturned` carries `status` +
5
+ * `durationMs`.
6
+ *
7
+ * Gated on `capabilities.toolHooks.prePostEvents` + the host tool-hooks seam;
8
+ * soft-skips when either is absent.
9
+ *
10
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §B
11
+ */
12
+
13
+ import { describe, it, expect } from 'vitest';
14
+ import { driver } from '../lib/driver.js';
15
+ import { readToolHooksCap, invokeToolHook } from '../lib/toolHooks.js';
16
+
17
+ describe('tool-hooks-content-free (RFC 0064 §B)', () => {
18
+ it('toolCalled carries argsHash; toolReturned carries status + durationMs', async () => {
19
+ const cap = await readToolHooksCap();
20
+ if (cap?.prePostEvents !== true) return;
21
+ const res = await invokeToolHook({ principal: 'core.system', toolName: 'web.search', args: { q: 'openwop' } });
22
+ if (res === null) return; // seam absent — soft-skip
23
+ const called = res.toolCalled ?? {};
24
+ const returned = res.toolReturned ?? {};
25
+ expect(
26
+ typeof called.argsHash === 'string' && (called.argsHash as string).length > 0,
27
+ driver.describe('RFC 0064 §B', 'agent.toolCalled MUST carry a non-empty argsHash when prePostEvents'),
28
+ ).toBe(true);
29
+ expect(
30
+ ['ok', 'error', 'forbidden', 'rate_limited'].includes(returned.status as string),
31
+ driver.describe('RFC 0064 §B', 'agent.toolReturned MUST carry a tool-hooks status'),
32
+ ).toBe(true);
33
+ if (returned.status === 'ok') {
34
+ expect(
35
+ typeof returned.durationMs === 'number' && (returned.durationMs as number) >= 0,
36
+ driver.describe('RFC 0064 §B', 'a completed tool call MUST record a non-negative durationMs'),
37
+ ).toBe(true);
38
+ }
39
+ });
40
+ });
@@ -0,0 +1,32 @@
1
+ /**
2
+ * tool-hooks-rate-limit — RFC 0064 §D. Exhausting a `(principal, tool)` token
3
+ * bucket → `agent.toolReturned { status: 'rate_limited' }` and the tool is not
4
+ * invoked, surfacing the existing `rate_limited` (429) error.
5
+ *
6
+ * Gated on `capabilities.toolHooks.perToolRateLimit` + the host tool-hooks
7
+ * seam; soft-skips when either is absent.
8
+ *
9
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §D
10
+ */
11
+
12
+ import { describe, it, expect } from 'vitest';
13
+ import { driver } from '../lib/driver.js';
14
+ import { readToolHooksCap, invokeToolHook } from '../lib/toolHooks.js';
15
+
16
+ describe('tool-hooks-rate-limit (RFC 0064 §D)', () => {
17
+ it('an exhausted (principal, tool) bucket yields status:"rate_limited"', async () => {
18
+ const cap = await readToolHooksCap();
19
+ if (cap?.perToolRateLimit !== true) return;
20
+ const res = await invokeToolHook({
21
+ principal: 'core.system',
22
+ toolName: 'web.search',
23
+ args: { q: 'x' },
24
+ simulateRateLimitExhausted: true,
25
+ });
26
+ if (res === null) return; // seam absent — soft-skip
27
+ expect(
28
+ (res.toolReturned ?? {}).status,
29
+ driver.describe('RFC 0064 §D', 'an exhausted token bucket MUST yield status:"rate_limited" without invoking the tool'),
30
+ ).toBe('rate_limited');
31
+ });
32
+ });
@@ -0,0 +1,34 @@
1
+ /**
2
+ * tool-hooks-secret-redaction — RFC 0064 §B/§E. A tool arg containing a
3
+ * resolved secret is redacted (SR-1) before hashing; the raw value never
4
+ * appears in `argsHash` or anywhere in the emitted `agent.toolCalled` /
5
+ * `agent.toolReturned` pair.
6
+ *
7
+ * Gated on `capabilities.toolHooks.prePostEvents` + the host tool-hooks seam;
8
+ * soft-skips when either is absent.
9
+ *
10
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §B (argsHash SR-1 redaction), §E (credentials)
11
+ */
12
+
13
+ import { describe, it, expect } from 'vitest';
14
+ import { driver } from '../lib/driver.js';
15
+ import { readToolHooksCap, invokeToolHook } from '../lib/toolHooks.js';
16
+
17
+ const CANARY = 'sk-canary-rfc0064-do-not-leak-xyz789';
18
+
19
+ describe('tool-hooks-secret-redaction (RFC 0064 §E)', () => {
20
+ it('a secret-shaped tool arg never appears in the emitted events', async () => {
21
+ const cap = await readToolHooksCap();
22
+ if (cap?.prePostEvents !== true) return;
23
+ const res = await invokeToolHook({
24
+ principal: 'core.system',
25
+ toolName: 'web.search',
26
+ args: { apiKey: CANARY, q: 'openwop' },
27
+ });
28
+ if (res === null) return; // seam absent — soft-skip
29
+ expect(
30
+ JSON.stringify(res).includes(CANARY),
31
+ driver.describe('RFC 0064 §B', 'a resolved secret MUST be redacted before hashing; the raw value MUST NOT appear in argsHash or any emitted field (SR-1)'),
32
+ ).toBe(false);
33
+ });
34
+ });
@@ -0,0 +1,34 @@
1
+ /**
2
+ * tool-hooks-shape — RFC 0064 §A. The `capabilities.toolHooks` advertisement
3
+ * block is either absent or a well-formed object.
4
+ *
5
+ * Status: ACTIVE (advertisement-shape; always runs). Behavioral coverage lives
6
+ * in the sibling tool-hooks-*.test.ts scenarios, gated on the sub-flags + the
7
+ * host tool-hooks seam.
8
+ *
9
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §A
10
+ * @see spec/v1/host-capabilities.md §host.toolHooks
11
+ */
12
+
13
+ import { describe, it, expect } from 'vitest';
14
+ import { driver } from '../lib/driver.js';
15
+ import { readToolHooksCap } from '../lib/toolHooks.js';
16
+
17
+ describe('tool-hooks-shape: advertisement (RFC 0064 §A)', () => {
18
+ it('capabilities.toolHooks is absent or a well-formed object', async () => {
19
+ const cap = await readToolHooksCap();
20
+ if (cap === null) return; // not advertised — valid
21
+ expect(
22
+ typeof cap.supported,
23
+ driver.describe('capabilities.schema.json §toolHooks', 'toolHooks.supported MUST be a boolean when the block is present'),
24
+ ).toBe('boolean');
25
+ for (const k of ['prePostEvents', 'perToolAuthorization', 'perToolRateLimit'] as const) {
26
+ if (cap[k] !== undefined) {
27
+ expect(
28
+ typeof cap[k],
29
+ driver.describe('capabilities.schema.json §toolHooks', `toolHooks.${k} MUST be a boolean when present`),
30
+ ).toBe('boolean');
31
+ }
32
+ }
33
+ });
34
+ });
@@ -26,6 +26,7 @@
26
26
 
27
27
  import { describe, it, expect } from 'vitest';
28
28
  import { driver } from '../lib/driver.js';
29
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
29
30
 
30
31
  const MISBEHAVING_PACK_NAME = 'vendor.openwop.misbehaving-abi';
31
32
  const WELL_BEHAVED_PACK_NAME = 'vendor.openwop.rust-hello';
@@ -34,9 +35,7 @@ describe('wasm-pack-abi-version-rejection: host advertises supported ABI version
34
35
  it('abiVersions[] contains positive integers; loader rejects unsupported versions', async () => {
35
36
  const disco = await driver.get('/.well-known/openwop');
36
37
  const wasm =
37
- (disco.json as {
38
- capabilities?: { nodePackRuntimes?: { wasm?: { supported?: boolean; abiVersions?: unknown } } };
39
- }).capabilities?.nodePackRuntimes?.wasm;
38
+ capabilityFamily<{ wasm?: Record<string, unknown> }>(disco.json, 'nodePackRuntimes')?.wasm;
40
39
 
41
40
  if (!wasm?.supported) return;
42
41
 
@@ -62,13 +61,7 @@ describe('wasm-pack-abi-version-rejection: positive path via misbehaving pack',
62
61
  it('misbehaving-abi pack (declares ABI 999) MUST NOT appear in loadedPacks[]', async () => {
63
62
  const disco = await driver.get('/.well-known/openwop');
64
63
  const wasm =
65
- (disco.json as {
66
- capabilities?: {
67
- nodePackRuntimes?: {
68
- wasm?: { supported?: boolean; loadedPacks?: unknown };
69
- };
70
- };
71
- }).capabilities?.nodePackRuntimes?.wasm;
64
+ capabilityFamily<{ wasm?: Record<string, unknown> }>(disco.json, 'nodePackRuntimes')?.wasm;
72
65
 
73
66
  if (!wasm?.supported) return;
74
67