@openwop/openwop-conformance 1.6.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/README.md +2 -2
  3. package/api/asyncapi.yaml +74 -1
  4. package/api/openapi.yaml +316 -0
  5. package/coverage.md +16 -0
  6. package/fixtures/conformance-run-duration-breach.json +33 -0
  7. package/fixtures.md +19 -0
  8. package/package.json +1 -1
  9. package/schemas/README.md +12 -0
  10. package/schemas/agent-inventory-response.schema.json +90 -0
  11. package/schemas/ai-envelope.schema.json +28 -0
  12. package/schemas/annotation-create.schema.json +37 -0
  13. package/schemas/annotation.schema.json +56 -0
  14. package/schemas/artifact-type-pack-manifest.schema.json +160 -0
  15. package/schemas/capabilities.schema.json +195 -4
  16. package/schemas/chat-card-pack-manifest.schema.json +158 -0
  17. package/schemas/envelopes/media.audio.schema.json +38 -0
  18. package/schemas/envelopes/media.file.schema.json +37 -0
  19. package/schemas/envelopes/media.image.schema.json +33 -0
  20. package/schemas/heartbeat-evaluated.schema.json +14 -0
  21. package/schemas/heartbeat-state-changed.schema.json +14 -0
  22. package/schemas/node-pack-manifest.schema.json +16 -1
  23. package/schemas/run-event-payloads.schema.json +96 -5
  24. package/schemas/run-event.schema.json +4 -0
  25. package/schemas/workflow-definition.schema.json +5 -0
  26. package/schemas/workspace-file-create.schema.json +20 -0
  27. package/schemas/workspace-file.schema.json +39 -0
  28. package/src/lib/agentLoop.ts +44 -0
  29. package/src/lib/agentRuntime.ts +45 -0
  30. package/src/lib/artifactTypes.ts +96 -0
  31. package/src/lib/cardPacks.ts +52 -0
  32. package/src/lib/discovery-capabilities.ts +50 -0
  33. package/src/lib/distillation.ts +38 -0
  34. package/src/lib/feedback.ts +31 -0
  35. package/src/lib/heartbeat.ts +31 -0
  36. package/src/lib/memoryAttribution.ts +48 -0
  37. package/src/lib/subRunAttestation.ts +35 -0
  38. package/src/lib/toolHooks.ts +33 -0
  39. package/src/scenarios/agent-loop-iteration-monotonic.test.ts +33 -0
  40. package/src/scenarios/agent-loop-stateful-resume.test.ts +28 -0
  41. package/src/scenarios/agent-loop-version5-shape.test.ts +41 -0
  42. package/src/scenarios/agent-loop-workspace-snapshot.test.ts +33 -0
  43. package/src/scenarios/agent-manifest-runtime.test.ts +85 -0
  44. package/src/scenarios/ai-envelope-shape.test.ts +14 -18
  45. package/src/scenarios/aiEnvelope.capBreached.test.ts +2 -1
  46. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +2 -1
  47. package/src/scenarios/aiEnvelope.universalKinds.test.ts +2 -1
  48. package/src/scenarios/approval-gate-flow.test.ts +4 -6
  49. package/src/scenarios/artifact-schema-compile-bounded.test.ts +126 -0
  50. package/src/scenarios/artifact-type-pack-install.test.ts +78 -0
  51. package/src/scenarios/artifact-type-pack-manifest-validation.test.ts +140 -0
  52. package/src/scenarios/artifact-type-store-without-render.test.ts +54 -0
  53. package/src/scenarios/audit-log-integrity.test.ts +3 -2
  54. package/src/scenarios/auth-api-key-rotation.test.ts +2 -1
  55. package/src/scenarios/auth-mtls.test.ts +2 -1
  56. package/src/scenarios/auth-oauth2-client-credentials.test.ts +2 -1
  57. package/src/scenarios/auth-oidc-user-bearer.test.ts +2 -1
  58. package/src/scenarios/auth-saml-profile.test.ts +2 -1
  59. package/src/scenarios/auth-scim-profile.test.ts +2 -1
  60. package/src/scenarios/authorization-fail-closed.test.ts +2 -1
  61. package/src/scenarios/authorization-roles-shape.test.ts +2 -1
  62. package/src/scenarios/byok-auth-modes.test.ts +141 -0
  63. package/src/scenarios/chat-card-pack-execution.test.ts +56 -0
  64. package/src/scenarios/chat-card-pack-manifest-validation.test.ts +128 -0
  65. package/src/scenarios/commitment-fired.test.ts +83 -0
  66. package/src/scenarios/credential-payload-redaction.test.ts +2 -1
  67. package/src/scenarios/credentials-capability-shape.test.ts +2 -1
  68. package/src/scenarios/cross-engine-append-ordering.test.ts +2 -1
  69. package/src/scenarios/cross-host-ancestry-endpoint.test.ts +3 -2
  70. package/src/scenarios/cross-host-causation-shape.test.ts +3 -2
  71. package/src/scenarios/deadletter-capability-shape.test.ts +2 -1
  72. package/src/scenarios/deadletter-retry-exhaustion.test.ts +2 -1
  73. package/src/scenarios/distillation-index-roundtrip.test.ts +35 -0
  74. package/src/scenarios/distillation-secret-carryforward.test.ts +35 -0
  75. package/src/scenarios/distillation-shape.test.ts +41 -0
  76. package/src/scenarios/distillation-stable-archive.test.ts +37 -0
  77. package/src/scenarios/distillation-token-budget.test.ts +45 -0
  78. package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +4 -3
  79. package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +5 -4
  80. package/src/scenarios/envelope-reasoning-shape.test.ts +3 -2
  81. package/src/scenarios/envelope-refusal-shape.test.ts +3 -2
  82. package/src/scenarios/envelope-rendering-hint.test.ts +95 -0
  83. package/src/scenarios/envelope-retry-attempted.test.ts +2 -1
  84. package/src/scenarios/envelope-tier-one-subset-static.test.ts +3 -2
  85. package/src/scenarios/exec-not-protocol-tier.test.ts +137 -0
  86. package/src/scenarios/experimental-tier-shape.test.ts +5 -4
  87. package/src/scenarios/feedback-capability-shape.test.ts +35 -0
  88. package/src/scenarios/feedback-correction-redaction.test.ts +35 -0
  89. package/src/scenarios/feedback-cross-tenant-isolation.test.ts +37 -0
  90. package/src/scenarios/feedback-fork-not-copied.test.ts +40 -0
  91. package/src/scenarios/feedback-on-terminal-run.test.ts +32 -0
  92. package/src/scenarios/feedback-record-and-list.test.ts +32 -0
  93. package/src/scenarios/feedback-unsupported-501.test.ts +32 -0
  94. package/src/scenarios/fs-path-traversal.test.ts +2 -1
  95. package/src/scenarios/heartbeat-capability-shape.test.ts +35 -0
  96. package/src/scenarios/heartbeat-fires-once-per-tick.test.ts +28 -0
  97. package/src/scenarios/heartbeat-idempotent-no-spam.test.ts +43 -0
  98. package/src/scenarios/heartbeat-runtime-bound.test.ts +30 -0
  99. package/src/scenarios/http-client-ssrf.test.ts +10 -13
  100. package/src/scenarios/mcp-toolcall-redaction.test.ts +3 -2
  101. package/src/scenarios/media-url-inline-cap.test.ts +167 -0
  102. package/src/scenarios/memory-attribution-emits-on-write.test.ts +54 -0
  103. package/src/scenarios/memory-attribution-no-content.test.ts +45 -0
  104. package/src/scenarios/memory-attribution-replay-stable.test.ts +60 -0
  105. package/src/scenarios/memory-attribution-shape.test.ts +28 -0
  106. package/src/scenarios/memory-attribution-tenant-scoped.test.ts +44 -0
  107. package/src/scenarios/memory-compaction-event-emitted.test.ts +2 -1
  108. package/src/scenarios/memory-compaction-provenance-tag.test.ts +2 -1
  109. package/src/scenarios/memory-compaction-sr1-carry-forward.test.ts +2 -1
  110. package/src/scenarios/memory-consolidation-idempotent.test.ts +77 -0
  111. package/src/scenarios/memory-consolidation-shape.test.ts +90 -0
  112. package/src/scenarios/model-capability-substituted.test.ts +2 -1
  113. package/src/scenarios/multi-agent-confidence-escalation.test.ts +5 -4
  114. package/src/scenarios/multi-agent-handoff-state-machine.test.ts +6 -5
  115. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +4 -3
  116. package/src/scenarios/multi-region-idempotency.test.ts +10 -10
  117. package/src/scenarios/oauth-capability-shape.test.ts +2 -1
  118. package/src/scenarios/oauth-connector-redaction.test.ts +2 -1
  119. package/src/scenarios/pause-resume.test.ts +3 -3
  120. package/src/scenarios/production-backpressure.test.ts +2 -2
  121. package/src/scenarios/production-retention-expiry.test.ts +2 -2
  122. package/src/scenarios/prompt-all-four-kinds-events.test.ts +2 -1
  123. package/src/scenarios/prompt-composed-secret-redaction.test.ts +2 -1
  124. package/src/scenarios/prompt-composed-trust-marker.test.ts +2 -1
  125. package/src/scenarios/prompt-end-to-end-events.test.ts +2 -1
  126. package/src/scenarios/prompt-list-and-fetch.test.ts +2 -1
  127. package/src/scenarios/prompt-mutable-lifecycle.test.ts +2 -1
  128. package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +2 -1
  129. package/src/scenarios/prompt-pack-install.test.ts +2 -1
  130. package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +2 -1
  131. package/src/scenarios/prompt-render-deterministic.test.ts +2 -1
  132. package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +2 -1
  133. package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +2 -1
  134. package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +2 -1
  135. package/src/scenarios/prompt-template-shape.test.ts +2 -1
  136. package/src/scenarios/provider-usage.test.ts +2 -1
  137. package/src/scenarios/redaction.test.ts +4 -1
  138. package/src/scenarios/replay-divergence-at-refusal.test.ts +4 -3
  139. package/src/scenarios/replay-fork-arbitrary.test.ts +3 -1
  140. package/src/scenarios/replay-llm-cache-key-portable.test.ts +2 -1
  141. package/src/scenarios/replayDeterminism.test.ts +3 -1
  142. package/src/scenarios/run-execution-bounds-shape.test.ts +133 -0
  143. package/src/scenarios/sandbox-memory-cap.test.ts +2 -1
  144. package/src/scenarios/sandbox-mvp-behavior.test.ts +2 -1
  145. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +2 -1
  146. package/src/scenarios/sandbox-timeout-cap.test.ts +2 -1
  147. package/src/scenarios/scheduling-capability-shape.test.ts +2 -1
  148. package/src/scenarios/scheduling-cron-fires-once.test.ts +2 -1
  149. package/src/scenarios/secret-leakage-otel-attribute.test.ts +7 -6
  150. package/src/scenarios/spec-corpus-validity.test.ts +4 -1
  151. package/src/scenarios/subrun-approval-fail-closed.test.ts +33 -0
  152. package/src/scenarios/subrun-approval-gate.test.ts +35 -0
  153. package/src/scenarios/subrun-attestation-shape.test.ts +30 -0
  154. package/src/scenarios/subrun-checksum-stable.test.ts +43 -0
  155. package/src/scenarios/tool-hooks-authorization-fail-closed.test.ts +39 -0
  156. package/src/scenarios/tool-hooks-content-free.test.ts +40 -0
  157. package/src/scenarios/tool-hooks-rate-limit.test.ts +32 -0
  158. package/src/scenarios/tool-hooks-secret-redaction.test.ts +34 -0
  159. package/src/scenarios/tool-hooks-shape.test.ts +34 -0
  160. package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +3 -10
  161. package/src/scenarios/wasm-pack-invoke-completed.test.ts +2 -2
  162. package/src/scenarios/wasm-pack-invoke-suspended.test.ts +2 -2
  163. package/src/scenarios/wasm-pack-load.test.ts +2 -2
  164. package/src/scenarios/wasm-pack-memory-cap.test.ts +3 -6
  165. package/src/scenarios/wasm-pack-replay-determinism.test.ts +2 -2
  166. package/src/scenarios/workflow-primary-output-annotation.test.ts +142 -0
  167. package/src/scenarios/workspace-behavior.test.ts +134 -0
  168. package/src/scenarios/workspace-capability-shape.test.ts +73 -0
  169. package/src/scenarios/workspace-cross-tenant-isolation.test.ts +84 -0
@@ -1016,7 +1016,10 @@ describe('spec-corpus: AsyncAPI 3.1 spec is structurally valid', () => {
1016
1016
  const messageNames = extractAsyncApiMessageNames(raw);
1017
1017
  const runEventSchema = readJson(join(SCHEMAS_DIR, 'run-event.schema.json'));
1018
1018
  const runEventTypes = new Set(findRunEventTypeEnum(runEventSchema));
1019
- const syntheticMessageNames = new Set(['state.snapshot', 'ai.message.chunk', 'any']);
1019
+ // `run.annotated` (RFC 0056) is a live SSE notification carrying an
1020
+ // Annotation — NOT a RunEventDoc and deliberately NOT in the RunEventType
1021
+ // enum (annotations are a side-resource, excluded from fork/replay).
1022
+ const syntheticMessageNames = new Set(['state.snapshot', 'ai.message.chunk', 'any', 'run.annotated', 'heartbeat.evaluated', 'heartbeat.stateChanged']);
1020
1023
 
1021
1024
  expect(messageNames.length, 'AsyncAPI MUST declare named SSE messages').toBeGreaterThan(0);
1022
1025
 
@@ -0,0 +1,33 @@
1
+ /**
2
+ * subrun-approval-fail-closed — RFC 0063 §C. A parent that terminates or whose
3
+ * approval interrupt expires WITHOUT an `accept`/`edit-accept` MUST NOT merge the
4
+ * child outputs. Absence of an approval is denial — backs the proposed
5
+ * protocol-tier SECURITY invariant `subrun-merge-approval-fail-closed` (lands
6
+ * with this test promoted to load-bearing at reference-host implementation).
7
+ *
8
+ * Gated on `capabilities.agents.subRunAttestation` + the host sub-run attestation
9
+ * seam; soft-skips when either is absent.
10
+ *
11
+ * @see RFCS/0063-subrun-output-attestation-and-merge-gating.md §C
12
+ * @see SECURITY/invariants.yaml — subrun-merge-approval-fail-closed (lands at impl)
13
+ */
14
+
15
+ import { describe, it, expect } from 'vitest';
16
+ import { driver } from '../lib/driver.js';
17
+ import { readSubRunAttestationCap, invokeSubRunAttest } from '../lib/subRunAttestation.js';
18
+
19
+ describe('subrun-approval-fail-closed (RFC 0063 §C)', () => {
20
+ it('no accept/edit-accept (terminated or expired) MUST NOT merge', async () => {
21
+ if ((await readSubRunAttestationCap()) !== true) return;
22
+ // approvalAction omitted models a run that terminated without a response.
23
+ const res = await invokeSubRunAttest({
24
+ childOutputs: { artifact: 'unverified' },
25
+ outputAttestation: { requireApproval: true },
26
+ });
27
+ if (res === null) return; // seam absent — soft-skip
28
+ expect(
29
+ res.merged,
30
+ driver.describe('RFC 0063 §C', 'an unresolved approval MUST fail closed — outputs MUST NOT be merged'),
31
+ ).toBe(false);
32
+ });
33
+ });
@@ -0,0 +1,35 @@
1
+ /**
2
+ * subrun-approval-gate — RFC 0063 §C. When `requireApproval: true`, the host
3
+ * suspends before merge; `accept` merges the child outputs, `reject` does not.
4
+ *
5
+ * Gated on `capabilities.agents.subRunAttestation` + the host sub-run attestation
6
+ * seam; soft-skips when either is absent.
7
+ *
8
+ * @see RFCS/0063-subrun-output-attestation-and-merge-gating.md §C
9
+ * @see spec/v1/interrupt.md — `approval` kind + resume actions (RFC 0051, reused)
10
+ */
11
+
12
+ import { describe, it, expect } from 'vitest';
13
+ import { driver } from '../lib/driver.js';
14
+ import { readSubRunAttestationCap, invokeSubRunAttest } from '../lib/subRunAttestation.js';
15
+
16
+ describe('subrun-approval-gate (RFC 0063 §C)', () => {
17
+ it('accept merges the child outputs; reject does not', async () => {
18
+ if ((await readSubRunAttestationCap()) !== true) return;
19
+ const base = { childOutputs: { artifact: 'x' }, outputAttestation: { requireApproval: true } };
20
+
21
+ const accepted = await invokeSubRunAttest({ ...base, approvalAction: 'accept' });
22
+ if (accepted === null) return; // seam absent — soft-skip
23
+ expect(
24
+ accepted.merged,
25
+ driver.describe('RFC 0063 §C', 'an `accept` approval MUST merge the child outputs'),
26
+ ).toBe(true);
27
+
28
+ const rejected = await invokeSubRunAttest({ ...base, approvalAction: 'reject' });
29
+ if (rejected === null) return;
30
+ expect(
31
+ rejected.merged,
32
+ driver.describe('RFC 0063 §C', 'a `reject` approval MUST NOT merge the child outputs'),
33
+ ).toBe(false);
34
+ });
35
+ });
@@ -0,0 +1,30 @@
1
+ /**
2
+ * subrun-attestation-shape — RFC 0063 §A. The `capabilities.agents.subRunAttestation`
3
+ * advertisement flag is either absent or a boolean.
4
+ *
5
+ * Status: ACTIVE (advertisement-shape; always runs). Behavioral coverage lives
6
+ * in the sibling subrun-*.test.ts scenarios, gated on the flag + the host
7
+ * sub-run attestation seam.
8
+ *
9
+ * @see RFCS/0063-subrun-output-attestation-and-merge-gating.md §A
10
+ * @see spec/v1/node-packs.md §"`outputAttestation` — verify-before-merge"
11
+ */
12
+
13
+ import { describe, it, expect } from 'vitest';
14
+ import { driver } from '../lib/driver.js';
15
+ import { readSubRunAttestationCap } from '../lib/subRunAttestation.js';
16
+
17
+ describe('subrun-attestation-shape: advertisement (RFC 0063 §A)', () => {
18
+ it('capabilities.agents.subRunAttestation is absent or a boolean', async () => {
19
+ const cap = await readSubRunAttestationCap();
20
+ // null = unadvertised (no agents block OR flag omitted) — valid.
21
+ if (cap === null) return;
22
+ expect(
23
+ typeof cap,
24
+ driver.describe(
25
+ 'capabilities.schema.json §agents.subRunAttestation',
26
+ 'agents.subRunAttestation MUST be a boolean when present',
27
+ ),
28
+ ).toBe('boolean');
29
+ });
30
+ });
@@ -0,0 +1,43 @@
1
+ /**
2
+ * subrun-checksum-stable — RFC 0063 §B. A child's output checksum is byte-stable
3
+ * for identical outputs and host-independent (the RFC 8785 JCS + SHA-256 recipe
4
+ * pinned in replay.md), and is surfaced as the `attestation` object on the
5
+ * existing `core.workflowChain.event { phase: 'output.harvested' }`.
6
+ *
7
+ * Gated on `capabilities.agents.subRunAttestation` + the host sub-run attestation
8
+ * seam; soft-skips when either is absent.
9
+ *
10
+ * @see RFCS/0063-subrun-output-attestation-and-merge-gating.md §B
11
+ */
12
+
13
+ import { describe, it, expect } from 'vitest';
14
+ import { driver } from '../lib/driver.js';
15
+ import { readSubRunAttestationCap, invokeSubRunAttest } from '../lib/subRunAttestation.js';
16
+
17
+ describe('subrun-checksum-stable (RFC 0063 §B)', () => {
18
+ it('identical child outputs produce an identical sha256 attestation checksum', async () => {
19
+ if ((await readSubRunAttestationCap()) !== true) return;
20
+ const childOutputs = { report: 'done', score: 0.9, tags: ['a', 'b'] };
21
+ const a = await invokeSubRunAttest({ childOutputs, outputAttestation: { checksum: true } });
22
+ if (a === null) return; // seam absent — soft-skip
23
+ // Key-reordered but value-identical: JCS canonicalization MUST yield the same hash.
24
+ const b = await invokeSubRunAttest({
25
+ childOutputs: { tags: ['a', 'b'], score: 0.9, report: 'done' },
26
+ outputAttestation: { checksum: true },
27
+ });
28
+ if (b === null) return;
29
+ const att = a.attestation ?? {};
30
+ expect(
31
+ typeof att.checksum === 'string' && (att.checksum as string).length > 0,
32
+ driver.describe('RFC 0063 §B', 'output.harvested MUST carry a non-empty attestation.checksum when checksum:true'),
33
+ ).toBe(true);
34
+ expect(
35
+ att.algorithm,
36
+ driver.describe('RFC 0063 §B', 'attestation.algorithm MUST be "sha256" (the v1 recipe)'),
37
+ ).toBe('sha256');
38
+ expect(
39
+ (b.attestation ?? {}).checksum,
40
+ driver.describe('RFC 0063 §B', 'JCS canonicalization MUST make the checksum invariant to key order — same content, same hash'),
41
+ ).toBe(att.checksum);
42
+ });
43
+ });
@@ -0,0 +1,39 @@
1
+ /**
2
+ * tool-hooks-authorization-fail-closed — RFC 0064 §C. A principal lacking a
3
+ * tool's required scope (or whose authorization cannot be evaluated) gets
4
+ * `agent.toolReturned { status: 'forbidden' }` and the tool is never invoked —
5
+ * the per-tool application of RFC 0049's `authorization-fail-closed` invariant.
6
+ *
7
+ * Gated on `capabilities.toolHooks.perToolAuthorization` + the host tool-hooks
8
+ * seam; soft-skips when either is absent.
9
+ *
10
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §C
11
+ * @see SECURITY/invariants.yaml — authorization-fail-closed (RFC 0049, reused)
12
+ */
13
+
14
+ import { describe, it, expect } from 'vitest';
15
+ import { driver } from '../lib/driver.js';
16
+ import { readToolHooksCap, invokeToolHook } from '../lib/toolHooks.js';
17
+
18
+ describe('tool-hooks-authorization-fail-closed (RFC 0064 §C)', () => {
19
+ it('a principal lacking a tool scope is denied and the tool is not invoked', async () => {
20
+ const cap = await readToolHooksCap();
21
+ if (cap?.perToolAuthorization !== true) return;
22
+ // A principal with no scopes against a tool requiring one MUST be denied.
23
+ const res = await invokeToolHook({
24
+ principal: 'conformance-unprivileged',
25
+ toolName: 'db.delete',
26
+ requiredScopes: ['db:write'],
27
+ args: {},
28
+ });
29
+ if (res === null) return; // seam absent — soft-skip
30
+ expect(
31
+ (res.toolReturned ?? {}).status,
32
+ driver.describe('RFC 0064 §C', 'a missing/unevaluable tool scope MUST fail closed → status:"forbidden"'),
33
+ ).toBe('forbidden');
34
+ expect(
35
+ (res.toolReturned ?? {}).durationMs,
36
+ driver.describe('RFC 0064 §C', 'a forbidden call never starts, so durationMs MUST be absent'),
37
+ ).toBeUndefined();
38
+ });
39
+ });
@@ -0,0 +1,40 @@
1
+ /**
2
+ * tool-hooks-content-free — RFC 0064 §B. When `prePostEvents`, a tool call's
3
+ * `agent.toolCalled` carries `argsHash` (the content-free, SIEM-safe
4
+ * alternative to raw `inputs`) + `agent.toolReturned` carries `status` +
5
+ * `durationMs`.
6
+ *
7
+ * Gated on `capabilities.toolHooks.prePostEvents` + the host tool-hooks seam;
8
+ * soft-skips when either is absent.
9
+ *
10
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §B
11
+ */
12
+
13
+ import { describe, it, expect } from 'vitest';
14
+ import { driver } from '../lib/driver.js';
15
+ import { readToolHooksCap, invokeToolHook } from '../lib/toolHooks.js';
16
+
17
+ describe('tool-hooks-content-free (RFC 0064 §B)', () => {
18
+ it('toolCalled carries argsHash; toolReturned carries status + durationMs', async () => {
19
+ const cap = await readToolHooksCap();
20
+ if (cap?.prePostEvents !== true) return;
21
+ const res = await invokeToolHook({ principal: 'core.system', toolName: 'web.search', args: { q: 'openwop' } });
22
+ if (res === null) return; // seam absent — soft-skip
23
+ const called = res.toolCalled ?? {};
24
+ const returned = res.toolReturned ?? {};
25
+ expect(
26
+ typeof called.argsHash === 'string' && (called.argsHash as string).length > 0,
27
+ driver.describe('RFC 0064 §B', 'agent.toolCalled MUST carry a non-empty argsHash when prePostEvents'),
28
+ ).toBe(true);
29
+ expect(
30
+ ['ok', 'error', 'forbidden', 'rate_limited'].includes(returned.status as string),
31
+ driver.describe('RFC 0064 §B', 'agent.toolReturned MUST carry a tool-hooks status'),
32
+ ).toBe(true);
33
+ if (returned.status === 'ok') {
34
+ expect(
35
+ typeof returned.durationMs === 'number' && (returned.durationMs as number) >= 0,
36
+ driver.describe('RFC 0064 §B', 'a completed tool call MUST record a non-negative durationMs'),
37
+ ).toBe(true);
38
+ }
39
+ });
40
+ });
@@ -0,0 +1,32 @@
1
+ /**
2
+ * tool-hooks-rate-limit — RFC 0064 §D. Exhausting a `(principal, tool)` token
3
+ * bucket → `agent.toolReturned { status: 'rate_limited' }` and the tool is not
4
+ * invoked, surfacing the existing `rate_limited` (429) error.
5
+ *
6
+ * Gated on `capabilities.toolHooks.perToolRateLimit` + the host tool-hooks
7
+ * seam; soft-skips when either is absent.
8
+ *
9
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §D
10
+ */
11
+
12
+ import { describe, it, expect } from 'vitest';
13
+ import { driver } from '../lib/driver.js';
14
+ import { readToolHooksCap, invokeToolHook } from '../lib/toolHooks.js';
15
+
16
+ describe('tool-hooks-rate-limit (RFC 0064 §D)', () => {
17
+ it('an exhausted (principal, tool) bucket yields status:"rate_limited"', async () => {
18
+ const cap = await readToolHooksCap();
19
+ if (cap?.perToolRateLimit !== true) return;
20
+ const res = await invokeToolHook({
21
+ principal: 'core.system',
22
+ toolName: 'web.search',
23
+ args: { q: 'x' },
24
+ simulateRateLimitExhausted: true,
25
+ });
26
+ if (res === null) return; // seam absent — soft-skip
27
+ expect(
28
+ (res.toolReturned ?? {}).status,
29
+ driver.describe('RFC 0064 §D', 'an exhausted token bucket MUST yield status:"rate_limited" without invoking the tool'),
30
+ ).toBe('rate_limited');
31
+ });
32
+ });
@@ -0,0 +1,34 @@
1
+ /**
2
+ * tool-hooks-secret-redaction — RFC 0064 §B/§E. A tool arg containing a
3
+ * resolved secret is redacted (SR-1) before hashing; the raw value never
4
+ * appears in `argsHash` or anywhere in the emitted `agent.toolCalled` /
5
+ * `agent.toolReturned` pair.
6
+ *
7
+ * Gated on `capabilities.toolHooks.prePostEvents` + the host tool-hooks seam;
8
+ * soft-skips when either is absent.
9
+ *
10
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §B (argsHash SR-1 redaction), §E (credentials)
11
+ */
12
+
13
+ import { describe, it, expect } from 'vitest';
14
+ import { driver } from '../lib/driver.js';
15
+ import { readToolHooksCap, invokeToolHook } from '../lib/toolHooks.js';
16
+
17
+ const CANARY = 'sk-canary-rfc0064-do-not-leak-xyz789';
18
+
19
+ describe('tool-hooks-secret-redaction (RFC 0064 §E)', () => {
20
+ it('a secret-shaped tool arg never appears in the emitted events', async () => {
21
+ const cap = await readToolHooksCap();
22
+ if (cap?.prePostEvents !== true) return;
23
+ const res = await invokeToolHook({
24
+ principal: 'core.system',
25
+ toolName: 'web.search',
26
+ args: { apiKey: CANARY, q: 'openwop' },
27
+ });
28
+ if (res === null) return; // seam absent — soft-skip
29
+ expect(
30
+ JSON.stringify(res).includes(CANARY),
31
+ driver.describe('RFC 0064 §B', 'a resolved secret MUST be redacted before hashing; the raw value MUST NOT appear in argsHash or any emitted field (SR-1)'),
32
+ ).toBe(false);
33
+ });
34
+ });
@@ -0,0 +1,34 @@
1
+ /**
2
+ * tool-hooks-shape — RFC 0064 §A. The `capabilities.toolHooks` advertisement
3
+ * block is either absent or a well-formed object.
4
+ *
5
+ * Status: ACTIVE (advertisement-shape; always runs). Behavioral coverage lives
6
+ * in the sibling tool-hooks-*.test.ts scenarios, gated on the sub-flags + the
7
+ * host tool-hooks seam.
8
+ *
9
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §A
10
+ * @see spec/v1/host-capabilities.md §host.toolHooks
11
+ */
12
+
13
+ import { describe, it, expect } from 'vitest';
14
+ import { driver } from '../lib/driver.js';
15
+ import { readToolHooksCap } from '../lib/toolHooks.js';
16
+
17
+ describe('tool-hooks-shape: advertisement (RFC 0064 §A)', () => {
18
+ it('capabilities.toolHooks is absent or a well-formed object', async () => {
19
+ const cap = await readToolHooksCap();
20
+ if (cap === null) return; // not advertised — valid
21
+ expect(
22
+ typeof cap.supported,
23
+ driver.describe('capabilities.schema.json §toolHooks', 'toolHooks.supported MUST be a boolean when the block is present'),
24
+ ).toBe('boolean');
25
+ for (const k of ['prePostEvents', 'perToolAuthorization', 'perToolRateLimit'] as const) {
26
+ if (cap[k] !== undefined) {
27
+ expect(
28
+ typeof cap[k],
29
+ driver.describe('capabilities.schema.json §toolHooks', `toolHooks.${k} MUST be a boolean when present`),
30
+ ).toBe('boolean');
31
+ }
32
+ }
33
+ });
34
+ });
@@ -26,6 +26,7 @@
26
26
 
27
27
  import { describe, it, expect } from 'vitest';
28
28
  import { driver } from '../lib/driver.js';
29
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
29
30
 
30
31
  const MISBEHAVING_PACK_NAME = 'vendor.openwop.misbehaving-abi';
31
32
  const WELL_BEHAVED_PACK_NAME = 'vendor.openwop.rust-hello';
@@ -34,9 +35,7 @@ describe('wasm-pack-abi-version-rejection: host advertises supported ABI version
34
35
  it('abiVersions[] contains positive integers; loader rejects unsupported versions', async () => {
35
36
  const disco = await driver.get('/.well-known/openwop');
36
37
  const wasm =
37
- (disco.json as {
38
- capabilities?: { nodePackRuntimes?: { wasm?: { supported?: boolean; abiVersions?: unknown } } };
39
- }).capabilities?.nodePackRuntimes?.wasm;
38
+ capabilityFamily<{ wasm?: Record<string, unknown> }>(disco.json, 'nodePackRuntimes')?.wasm;
40
39
 
41
40
  if (!wasm?.supported) return;
42
41
 
@@ -62,13 +61,7 @@ describe('wasm-pack-abi-version-rejection: positive path via misbehaving pack',
62
61
  it('misbehaving-abi pack (declares ABI 999) MUST NOT appear in loadedPacks[]', async () => {
63
62
  const disco = await driver.get('/.well-known/openwop');
64
63
  const wasm =
65
- (disco.json as {
66
- capabilities?: {
67
- nodePackRuntimes?: {
68
- wasm?: { supported?: boolean; loadedPacks?: unknown };
69
- };
70
- };
71
- }).capabilities?.nodePackRuntimes?.wasm;
64
+ capabilityFamily<{ wasm?: Record<string, unknown> }>(disco.json, 'nodePackRuntimes')?.wasm;
72
65
 
73
66
  if (!wasm?.supported) return;
74
67
 
@@ -16,14 +16,14 @@ import { describe, it, expect } from 'vitest';
16
16
  import { driver } from '../lib/driver.js';
17
17
  import { pollUntilTerminal } from '../lib/polling.js';
18
18
  import { isFixtureAdvertised } from '../lib/fixtures.js';
19
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
19
20
 
20
21
  const FIXTURE = 'conformance-wasm-pack-roundtrip';
21
22
 
22
23
  async function isWasmSupported(): Promise<boolean> {
23
24
  const disco = await driver.get('/.well-known/openwop');
24
25
  return Boolean(
25
- (disco.json as { capabilities?: { nodePackRuntimes?: { wasm?: { supported?: boolean } } } })
26
- .capabilities?.nodePackRuntimes?.wasm?.supported,
26
+ capabilityFamily<{ wasm?: { supported?: boolean } }>(disco.json, 'nodePackRuntimes')?.wasm?.supported,
27
27
  );
28
28
  }
29
29
 
@@ -20,14 +20,14 @@ import { describe, it, expect } from 'vitest';
20
20
  import { driver } from '../lib/driver.js';
21
21
  import { pollUntilTerminal } from '../lib/polling.js';
22
22
  import { isFixtureAdvertised } from '../lib/fixtures.js';
23
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
23
24
 
24
25
  const FIXTURE = 'conformance-wasm-pack-roundtrip';
25
26
 
26
27
  async function isWasmSupported(): Promise<boolean> {
27
28
  const disco = await driver.get('/.well-known/openwop');
28
29
  return Boolean(
29
- (disco.json as { capabilities?: { nodePackRuntimes?: { wasm?: { supported?: boolean } } } })
30
- .capabilities?.nodePackRuntimes?.wasm?.supported,
30
+ capabilityFamily<{ wasm?: { supported?: boolean } }>(disco.json, 'nodePackRuntimes')?.wasm?.supported,
31
31
  );
32
32
  }
33
33
 
@@ -15,6 +15,7 @@
15
15
  import { describe, it, expect } from 'vitest';
16
16
  import { driver } from '../lib/driver.js';
17
17
  import { isFixtureAdvertised } from '../lib/fixtures.js';
18
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
18
19
 
19
20
  const FIXTURE = 'conformance-wasm-pack-roundtrip';
20
21
 
@@ -28,8 +29,7 @@ interface WasmCaps {
28
29
  async function getWasmCaps(): Promise<WasmCaps | null> {
29
30
  const disco = await driver.get('/.well-known/openwop');
30
31
  const caps =
31
- (disco.json as { capabilities?: { nodePackRuntimes?: { wasm?: WasmCaps } } })
32
- .capabilities?.nodePackRuntimes?.wasm ?? null;
32
+ capabilityFamily<{ wasm?: WasmCaps }>(disco.json, 'nodePackRuntimes')?.wasm ?? null;
33
33
  return caps;
34
34
  }
35
35
 
@@ -26,6 +26,7 @@ import { describe, it, expect } from 'vitest';
26
26
  import { driver } from '../lib/driver.js';
27
27
  import { pollUntilTerminal } from '../lib/polling.js';
28
28
  import { isFixtureAdvertised } from '../lib/fixtures.js';
29
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
29
30
 
30
31
  const CAP_BREACH_FIXTURE = 'conformance-wasm-pack-memory-cap-breach';
31
32
 
@@ -33,9 +34,7 @@ describe('wasm-pack-memory-cap: host advertises maxMemoryBytes', () => {
33
34
  it('capabilities.nodePackRuntimes.wasm.maxMemoryBytes is a plausible number', async () => {
34
35
  const disco = await driver.get('/.well-known/openwop');
35
36
  const wasm =
36
- (disco.json as {
37
- capabilities?: { nodePackRuntimes?: { wasm?: { supported?: boolean; maxMemoryBytes?: unknown } } };
38
- }).capabilities?.nodePackRuntimes?.wasm;
37
+ capabilityFamily<{ wasm?: Record<string, unknown> }>(disco.json, 'nodePackRuntimes')?.wasm;
39
38
 
40
39
  if (!wasm?.supported) return;
41
40
 
@@ -64,9 +63,7 @@ describe('wasm-pack-memory-cap: positive path via misbehaving pack', () => {
64
63
  }
65
64
  const disco = await driver.get('/.well-known/openwop');
66
65
  const wasm =
67
- (disco.json as {
68
- capabilities?: { nodePackRuntimes?: { wasm?: { supported?: boolean } } };
69
- }).capabilities?.nodePackRuntimes?.wasm;
66
+ capabilityFamily<{ wasm?: Record<string, unknown> }>(disco.json, 'nodePackRuntimes')?.wasm;
70
67
  if (!wasm?.supported) return;
71
68
 
72
69
  const create = await driver.post('/v1/runs', {
@@ -14,14 +14,14 @@ import { describe, it, expect } from 'vitest';
14
14
  import { driver } from '../lib/driver.js';
15
15
  import { pollUntilTerminal } from '../lib/polling.js';
16
16
  import { isFixtureAdvertised } from '../lib/fixtures.js';
17
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
17
18
 
18
19
  const FIXTURE = 'conformance-wasm-pack-roundtrip';
19
20
 
20
21
  async function isWasmSupported(): Promise<boolean> {
21
22
  const disco = await driver.get('/.well-known/openwop');
22
23
  return Boolean(
23
- (disco.json as { capabilities?: { nodePackRuntimes?: { wasm?: { supported?: boolean } } } })
24
- .capabilities?.nodePackRuntimes?.wasm?.supported,
24
+ capabilityFamily<{ wasm?: { supported?: boolean } }>(disco.json, 'nodePackRuntimes')?.wasm?.supported,
25
25
  );
26
26
  }
27
27
 
@@ -0,0 +1,142 @@
1
+ /**
2
+ * workflow-primary-output-annotation — RFC 0065 schema shape conformance.
3
+ *
4
+ * Server-free schema assertions that the optional `outputRole` field on
5
+ * `WorkflowNode` is exactly that — optional, additive, and a closed enum:
6
+ * 1. A WorkflowDefinition with one node declaring `outputRole: "primary"`
7
+ * and another declaring `outputRole: "secondary"` validates.
8
+ * 2. A WorkflowDefinition with the field absent (legacy shape) still
9
+ * validates — preserves the additive promise.
10
+ * 3. An unknown `outputRole` value is rejected by the closed enum.
11
+ * 4. The field set to a non-string is rejected.
12
+ *
13
+ * Always runs (pure on-disk Ajv2020 validation; no host involvement —
14
+ * the field has no engine-observable effect by design).
15
+ *
16
+ * @see RFCS/0065-workflow-node-primary-output-annotation.md
17
+ * @see schemas/workflow-definition.schema.json ($defs.WorkflowNode.outputRole)
18
+ */
19
+
20
+ import { describe, it, expect } from 'vitest';
21
+ import Ajv2020 from 'ajv/dist/2020.js';
22
+ import addFormats from 'ajv-formats';
23
+ import { readFileSync } from 'node:fs';
24
+ import { join } from 'node:path';
25
+ import { SCHEMAS_DIR } from '../lib/paths.js';
26
+
27
+ function compileWorkflowDefinition(): ReturnType<Ajv2020['compile']> {
28
+ const ajv = new Ajv2020({ strict: false, allErrors: true });
29
+ addFormats(ajv);
30
+ // Register cross-file `$ref` targets — same pattern as
31
+ // `fixtures-valid.test.ts`. Without these, Ajv throws
32
+ // `missingRef` when compiling `workflow-definition.schema.json`
33
+ // because it references agent-ref + prompt-ref by URL.
34
+ const agentRefSchema = JSON.parse(
35
+ readFileSync(join(SCHEMAS_DIR, 'agent-ref.schema.json'), 'utf8'),
36
+ ) as Record<string, unknown>;
37
+ const promptRefSchema = JSON.parse(
38
+ readFileSync(join(SCHEMAS_DIR, 'prompt-ref.schema.json'), 'utf8'),
39
+ ) as Record<string, unknown>;
40
+ const promptKindSchema = JSON.parse(
41
+ readFileSync(join(SCHEMAS_DIR, 'prompt-kind.schema.json'), 'utf8'),
42
+ ) as Record<string, unknown>;
43
+ ajv.addSchema(agentRefSchema, 'agent-ref.schema.json');
44
+ ajv.addSchema(promptRefSchema, 'prompt-ref.schema.json');
45
+ ajv.addSchema(promptRefSchema, './prompt-ref.schema.json');
46
+ ajv.addSchema(promptKindSchema, 'prompt-kind.schema.json');
47
+ ajv.addSchema(promptKindSchema, './prompt-kind.schema.json');
48
+ const schema = JSON.parse(
49
+ readFileSync(join(SCHEMAS_DIR, 'workflow-definition.schema.json'), 'utf8'),
50
+ ) as Record<string, unknown>;
51
+ return ajv.compile(schema);
52
+ }
53
+
54
+ /** Build the minimal-required shape of a WorkflowDefinition. Tests
55
+ * inject per-case node overrides via the `nodes` arg. */
56
+ function baseDefinition(nodes: Array<Record<string, unknown>>): Record<string, unknown> {
57
+ return {
58
+ id: 'wf-test',
59
+ name: 'Test',
60
+ version: '1.0.0',
61
+ nodes,
62
+ edges: [],
63
+ triggers: [],
64
+ variables: [],
65
+ metadata: { createdAt: '2026-05-25T00:00:00Z' },
66
+ settings: {},
67
+ };
68
+ }
69
+
70
+ function baseNode(id: string, extras: Record<string, unknown> = {}): Record<string, unknown> {
71
+ return {
72
+ id,
73
+ typeId: 'core.test.noop',
74
+ name: id,
75
+ position: { x: 0, y: 0 },
76
+ config: {},
77
+ inputs: {},
78
+ ...extras,
79
+ };
80
+ }
81
+
82
+ describe('workflow-primary-output-annotation: outputRole shape (RFC 0065)', () => {
83
+ const validate = compileWorkflowDefinition();
84
+
85
+ it('accepts a workflow with one node declaring outputRole="primary"', () => {
86
+ const def = baseDefinition([
87
+ baseNode('a', { outputRole: 'primary' }),
88
+ baseNode('b'),
89
+ ]);
90
+ const ok = validate(def);
91
+ expect(ok, JSON.stringify(validate.errors, null, 2)).toBe(true);
92
+ });
93
+
94
+ it('accepts primary AND secondary annotations on different nodes', () => {
95
+ const def = baseDefinition([
96
+ baseNode('a', { outputRole: 'primary' }),
97
+ baseNode('b', { outputRole: 'secondary' }),
98
+ baseNode('c'),
99
+ ]);
100
+ const ok = validate(def);
101
+ expect(ok, JSON.stringify(validate.errors, null, 2)).toBe(true);
102
+ });
103
+
104
+ it('accepts a workflow with the field absent (additive promise)', () => {
105
+ const def = baseDefinition([
106
+ baseNode('a'),
107
+ baseNode('b'),
108
+ ]);
109
+ const ok = validate(def);
110
+ expect(ok, JSON.stringify(validate.errors, null, 2)).toBe(true);
111
+ });
112
+
113
+ it('rejects an unknown outputRole enum value', () => {
114
+ const def = baseDefinition([
115
+ baseNode('a', { outputRole: 'tertiary' }),
116
+ ]);
117
+ const ok = validate(def);
118
+ expect(ok).toBe(false);
119
+ expect(validate.errors).toBeTruthy();
120
+ });
121
+
122
+ it('rejects outputRole set to a non-string', () => {
123
+ const def = baseDefinition([
124
+ baseNode('a', { outputRole: 1 }),
125
+ ]);
126
+ const ok = validate(def);
127
+ expect(ok).toBe(false);
128
+ });
129
+
130
+ it('permits multiple nodes declaring outputRole="primary" (tooling decides)', () => {
131
+ // The schema doesn't reject multiple primaries — tooling MAY pick
132
+ // any (lexicographic node id is the RFC's recommended tiebreaker).
133
+ // This test pins that the schema-layer doesn't enforce uniqueness,
134
+ // matching the RFC's "schema permits N primaries" promise.
135
+ const def = baseDefinition([
136
+ baseNode('a', { outputRole: 'primary' }),
137
+ baseNode('b', { outputRole: 'primary' }),
138
+ ]);
139
+ const ok = validate(def);
140
+ expect(ok, JSON.stringify(validate.errors, null, 2)).toBe(true);
141
+ });
142
+ });