@openwop/openwop-conformance 1.6.1 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/CHANGELOG.md +10 -0
  2. package/README.md +2 -2
  3. package/api/asyncapi.yaml +57 -0
  4. package/api/openapi.yaml +250 -0
  5. package/coverage.md +14 -0
  6. package/fixtures/conformance-run-duration-breach.json +33 -0
  7. package/fixtures.md +19 -0
  8. package/package.json +1 -1
  9. package/schemas/README.md +10 -0
  10. package/schemas/agent-inventory-response.schema.json +90 -0
  11. package/schemas/ai-envelope.schema.json +28 -0
  12. package/schemas/artifact-type-pack-manifest.schema.json +160 -0
  13. package/schemas/capabilities.schema.json +171 -4
  14. package/schemas/chat-card-pack-manifest.schema.json +158 -0
  15. package/schemas/envelopes/media.audio.schema.json +38 -0
  16. package/schemas/envelopes/media.file.schema.json +37 -0
  17. package/schemas/envelopes/media.image.schema.json +33 -0
  18. package/schemas/heartbeat-evaluated.schema.json +14 -0
  19. package/schemas/heartbeat-state-changed.schema.json +14 -0
  20. package/schemas/node-pack-manifest.schema.json +16 -1
  21. package/schemas/run-event-payloads.schema.json +96 -5
  22. package/schemas/run-event.schema.json +4 -0
  23. package/schemas/workflow-definition.schema.json +5 -0
  24. package/schemas/workspace-file-create.schema.json +20 -0
  25. package/schemas/workspace-file.schema.json +39 -0
  26. package/src/lib/agentLoop.ts +44 -0
  27. package/src/lib/agentRuntime.ts +45 -0
  28. package/src/lib/artifactTypes.ts +96 -0
  29. package/src/lib/cardPacks.ts +52 -0
  30. package/src/lib/discovery-capabilities.ts +50 -0
  31. package/src/lib/distillation.ts +38 -0
  32. package/src/lib/feedback.ts +3 -3
  33. package/src/lib/heartbeat.ts +31 -0
  34. package/src/lib/memoryAttribution.ts +48 -0
  35. package/src/lib/subRunAttestation.ts +35 -0
  36. package/src/lib/toolHooks.ts +33 -0
  37. package/src/scenarios/agent-loop-iteration-monotonic.test.ts +33 -0
  38. package/src/scenarios/agent-loop-stateful-resume.test.ts +28 -0
  39. package/src/scenarios/agent-loop-version5-shape.test.ts +41 -0
  40. package/src/scenarios/agent-loop-workspace-snapshot.test.ts +33 -0
  41. package/src/scenarios/agent-manifest-runtime.test.ts +85 -0
  42. package/src/scenarios/ai-envelope-shape.test.ts +14 -18
  43. package/src/scenarios/aiEnvelope.capBreached.test.ts +2 -1
  44. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +2 -1
  45. package/src/scenarios/aiEnvelope.universalKinds.test.ts +2 -1
  46. package/src/scenarios/approval-gate-flow.test.ts +4 -6
  47. package/src/scenarios/artifact-schema-compile-bounded.test.ts +126 -0
  48. package/src/scenarios/artifact-type-pack-install.test.ts +78 -0
  49. package/src/scenarios/artifact-type-pack-manifest-validation.test.ts +140 -0
  50. package/src/scenarios/artifact-type-store-without-render.test.ts +54 -0
  51. package/src/scenarios/audit-log-integrity.test.ts +3 -2
  52. package/src/scenarios/auth-api-key-rotation.test.ts +2 -1
  53. package/src/scenarios/auth-mtls.test.ts +2 -1
  54. package/src/scenarios/auth-oauth2-client-credentials.test.ts +2 -1
  55. package/src/scenarios/auth-oidc-user-bearer.test.ts +2 -1
  56. package/src/scenarios/auth-saml-profile.test.ts +2 -1
  57. package/src/scenarios/auth-scim-profile.test.ts +2 -1
  58. package/src/scenarios/authorization-fail-closed.test.ts +2 -1
  59. package/src/scenarios/authorization-roles-shape.test.ts +2 -1
  60. package/src/scenarios/byok-auth-modes.test.ts +141 -0
  61. package/src/scenarios/chat-card-pack-execution.test.ts +56 -0
  62. package/src/scenarios/chat-card-pack-manifest-validation.test.ts +128 -0
  63. package/src/scenarios/commitment-fired.test.ts +83 -0
  64. package/src/scenarios/credential-payload-redaction.test.ts +2 -1
  65. package/src/scenarios/credentials-capability-shape.test.ts +2 -1
  66. package/src/scenarios/cross-engine-append-ordering.test.ts +2 -1
  67. package/src/scenarios/cross-host-ancestry-endpoint.test.ts +3 -2
  68. package/src/scenarios/cross-host-causation-shape.test.ts +3 -2
  69. package/src/scenarios/deadletter-capability-shape.test.ts +2 -1
  70. package/src/scenarios/deadletter-retry-exhaustion.test.ts +2 -1
  71. package/src/scenarios/distillation-index-roundtrip.test.ts +35 -0
  72. package/src/scenarios/distillation-secret-carryforward.test.ts +35 -0
  73. package/src/scenarios/distillation-shape.test.ts +41 -0
  74. package/src/scenarios/distillation-stable-archive.test.ts +37 -0
  75. package/src/scenarios/distillation-token-budget.test.ts +45 -0
  76. package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +4 -3
  77. package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +5 -4
  78. package/src/scenarios/envelope-reasoning-shape.test.ts +3 -2
  79. package/src/scenarios/envelope-refusal-shape.test.ts +3 -2
  80. package/src/scenarios/envelope-rendering-hint.test.ts +95 -0
  81. package/src/scenarios/envelope-retry-attempted.test.ts +2 -1
  82. package/src/scenarios/envelope-tier-one-subset-static.test.ts +3 -2
  83. package/src/scenarios/exec-not-protocol-tier.test.ts +137 -0
  84. package/src/scenarios/experimental-tier-shape.test.ts +5 -4
  85. package/src/scenarios/fs-path-traversal.test.ts +2 -1
  86. package/src/scenarios/heartbeat-capability-shape.test.ts +35 -0
  87. package/src/scenarios/heartbeat-fires-once-per-tick.test.ts +28 -0
  88. package/src/scenarios/heartbeat-idempotent-no-spam.test.ts +43 -0
  89. package/src/scenarios/heartbeat-runtime-bound.test.ts +30 -0
  90. package/src/scenarios/http-client-ssrf.test.ts +10 -13
  91. package/src/scenarios/mcp-toolcall-redaction.test.ts +3 -2
  92. package/src/scenarios/media-url-inline-cap.test.ts +167 -0
  93. package/src/scenarios/memory-attribution-emits-on-write.test.ts +54 -0
  94. package/src/scenarios/memory-attribution-no-content.test.ts +45 -0
  95. package/src/scenarios/memory-attribution-replay-stable.test.ts +60 -0
  96. package/src/scenarios/memory-attribution-shape.test.ts +28 -0
  97. package/src/scenarios/memory-attribution-tenant-scoped.test.ts +44 -0
  98. package/src/scenarios/memory-compaction-event-emitted.test.ts +2 -1
  99. package/src/scenarios/memory-compaction-provenance-tag.test.ts +2 -1
  100. package/src/scenarios/memory-compaction-sr1-carry-forward.test.ts +2 -1
  101. package/src/scenarios/memory-consolidation-idempotent.test.ts +77 -0
  102. package/src/scenarios/memory-consolidation-shape.test.ts +90 -0
  103. package/src/scenarios/model-capability-substituted.test.ts +2 -1
  104. package/src/scenarios/multi-agent-confidence-escalation.test.ts +5 -4
  105. package/src/scenarios/multi-agent-handoff-state-machine.test.ts +6 -5
  106. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +4 -3
  107. package/src/scenarios/multi-region-idempotency.test.ts +10 -10
  108. package/src/scenarios/oauth-capability-shape.test.ts +2 -1
  109. package/src/scenarios/oauth-connector-redaction.test.ts +2 -1
  110. package/src/scenarios/pause-resume.test.ts +3 -3
  111. package/src/scenarios/production-backpressure.test.ts +2 -2
  112. package/src/scenarios/production-retention-expiry.test.ts +2 -2
  113. package/src/scenarios/prompt-all-four-kinds-events.test.ts +2 -1
  114. package/src/scenarios/prompt-composed-secret-redaction.test.ts +2 -1
  115. package/src/scenarios/prompt-composed-trust-marker.test.ts +2 -1
  116. package/src/scenarios/prompt-end-to-end-events.test.ts +2 -1
  117. package/src/scenarios/prompt-list-and-fetch.test.ts +2 -1
  118. package/src/scenarios/prompt-mutable-lifecycle.test.ts +2 -1
  119. package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +2 -1
  120. package/src/scenarios/prompt-pack-install.test.ts +2 -1
  121. package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +2 -1
  122. package/src/scenarios/prompt-render-deterministic.test.ts +2 -1
  123. package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +2 -1
  124. package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +2 -1
  125. package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +2 -1
  126. package/src/scenarios/prompt-template-shape.test.ts +2 -1
  127. package/src/scenarios/provider-usage.test.ts +2 -1
  128. package/src/scenarios/replay-divergence-at-refusal.test.ts +4 -3
  129. package/src/scenarios/replay-fork-arbitrary.test.ts +3 -1
  130. package/src/scenarios/replay-llm-cache-key-portable.test.ts +2 -1
  131. package/src/scenarios/replayDeterminism.test.ts +3 -1
  132. package/src/scenarios/run-execution-bounds-shape.test.ts +133 -0
  133. package/src/scenarios/sandbox-memory-cap.test.ts +2 -1
  134. package/src/scenarios/sandbox-mvp-behavior.test.ts +2 -1
  135. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +2 -1
  136. package/src/scenarios/sandbox-timeout-cap.test.ts +2 -1
  137. package/src/scenarios/scheduling-capability-shape.test.ts +2 -1
  138. package/src/scenarios/scheduling-cron-fires-once.test.ts +2 -1
  139. package/src/scenarios/secret-leakage-otel-attribute.test.ts +7 -6
  140. package/src/scenarios/spec-corpus-validity.test.ts +1 -1
  141. package/src/scenarios/subrun-approval-fail-closed.test.ts +33 -0
  142. package/src/scenarios/subrun-approval-gate.test.ts +35 -0
  143. package/src/scenarios/subrun-attestation-shape.test.ts +30 -0
  144. package/src/scenarios/subrun-checksum-stable.test.ts +43 -0
  145. package/src/scenarios/tool-hooks-authorization-fail-closed.test.ts +39 -0
  146. package/src/scenarios/tool-hooks-content-free.test.ts +40 -0
  147. package/src/scenarios/tool-hooks-rate-limit.test.ts +32 -0
  148. package/src/scenarios/tool-hooks-secret-redaction.test.ts +34 -0
  149. package/src/scenarios/tool-hooks-shape.test.ts +34 -0
  150. package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +3 -10
  151. package/src/scenarios/wasm-pack-invoke-completed.test.ts +2 -2
  152. package/src/scenarios/wasm-pack-invoke-suspended.test.ts +2 -2
  153. package/src/scenarios/wasm-pack-load.test.ts +2 -2
  154. package/src/scenarios/wasm-pack-memory-cap.test.ts +3 -6
  155. package/src/scenarios/wasm-pack-replay-determinism.test.ts +2 -2
  156. package/src/scenarios/workflow-primary-output-annotation.test.ts +142 -0
  157. package/src/scenarios/workspace-behavior.test.ts +134 -0
  158. package/src/scenarios/workspace-capability-shape.test.ts +73 -0
  159. package/src/scenarios/workspace-cross-tenant-isolation.test.ts +84 -0
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Shared helper for the RFC 0060 host.heartbeat conformance scenarios.
3
+ * Lives in lib/ (not a *.test.ts) so scenarios import it via `../lib/heartbeat.js`.
4
+ */
5
+ import { driver } from './driver.js';
6
+
7
+ interface DiscoveryDoc {
8
+ capabilities?: Record<string, unknown>;
9
+ }
10
+
11
+ /** Reads `capabilities.heartbeat` from discovery; null when unadvertised. */
12
+ export async function readHeartbeatCap(): Promise<Record<string, unknown> | null> {
13
+ const res = await driver.get('/.well-known/openwop');
14
+ const caps = (res.json as DiscoveryDoc | undefined)?.capabilities;
15
+ const hb = caps && typeof caps === 'object' ? (caps as Record<string, unknown>)['heartbeat'] : undefined;
16
+ return hb && typeof hb === 'object' ? (hb as Record<string, unknown>) : null;
17
+ }
18
+
19
+ /** True when the host advertises a working heartbeat surface. */
20
+ export function heartbeatSupported(cap: Record<string, unknown> | null): boolean {
21
+ return cap?.['supported'] === true;
22
+ }
23
+
24
+ /** Drives one heartbeat tick via the host-sample seam, or null (soft-skip)
25
+ * when the host doesn't expose it. The seam is host-extension surface —
26
+ * hosts wiring RFC 0060 expose `POST /v1/host/sample/heartbeat/tick`. */
27
+ export async function tickHeartbeat(body: Record<string, unknown>): Promise<{ status: number; json: unknown } | null> {
28
+ const res = await driver.post('/v1/host/sample/heartbeat/tick', body);
29
+ if (res.status === 404 || res.status === 405) return null; // seam absent — soft-skip
30
+ return { status: res.status, json: res.json };
31
+ }
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Shared helpers for the RFC 0057 memory write-attribution scenarios.
3
+ * Lives in lib/ (not a *.test.ts) so scenarios import it via `../lib/memoryAttribution.js`.
4
+ */
5
+ import { driver } from './driver.js';
6
+ import { isFixtureAdvertised } from './fixtures.js';
7
+
8
+ interface DiscoveryDoc {
9
+ capabilities?: Record<string, unknown>;
10
+ }
11
+
12
+ /** Reads `capabilities.memory.attribution` from discovery; null when unadvertised. */
13
+ export async function readMemoryAttributionCap(): Promise<Record<string, unknown> | null> {
14
+ const res = await driver.get('/.well-known/openwop');
15
+ const caps = (res.json as DiscoveryDoc | undefined)?.capabilities;
16
+ const mem = caps && typeof caps === 'object' ? (caps as Record<string, unknown>)['memory'] : undefined;
17
+ const attr = mem && typeof mem === 'object' ? (mem as Record<string, unknown>)['attribution'] : undefined;
18
+ return attr && typeof attr === 'object' ? (attr as Record<string, unknown>) : null;
19
+ }
20
+
21
+ /** True when the host commits to emitting `memory.written`. */
22
+ export function emitsWriteEvents(cap: Record<string, unknown> | null): boolean {
23
+ return cap?.['supported'] === true && cap?.['emitsWriteEvents'] === true;
24
+ }
25
+
26
+ const SEED_FIXTURE = 'conformance-noop';
27
+
28
+ /** Seeds a basic run (the host writes a run-summary on completion); null
29
+ * (soft-skip) when the fixture isn't advertised or creation fails. */
30
+ export async function seedRun(tenantId: string): Promise<string | null> {
31
+ if (!isFixtureAdvertised(SEED_FIXTURE)) return null;
32
+ const r = await driver.post('/v1/runs', { workflowId: SEED_FIXTURE, tenantId, inputs: {} });
33
+ if (r.status !== 200 && r.status !== 201) return null;
34
+ return (r.json as { runId?: string } | undefined)?.runId ?? null;
35
+ }
36
+
37
+ interface RunEventLike {
38
+ type: string;
39
+ runId?: string;
40
+ payload?: Record<string, unknown>;
41
+ }
42
+
43
+ /** Fetches a run's events and returns only the `memory.written` ones. */
44
+ export async function memoryWrittenEvents(runId: string): Promise<RunEventLike[]> {
45
+ const res = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
46
+ const events = (res.json as { events?: RunEventLike[] } | undefined)?.events ?? [];
47
+ return events.filter((e) => e.type === 'memory.written');
48
+ }
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Shared helper for the RFC 0063 sub-run output-attestation conformance
3
+ * scenarios. Lives in lib/ (not a *.test.ts) so scenarios import it via
4
+ * `../lib/subRunAttestation.js`.
5
+ */
6
+ import { driver } from './driver.js';
7
+ import { readCapabilityFamily } from './discovery-capabilities.js';
8
+
9
+ /** Reads `agents.subRunAttestation` from discovery (root-first per RFC 0073);
10
+ * null when the host advertises no `agents` block (treated as no support). */
11
+ export async function readSubRunAttestationCap(): Promise<boolean | null> {
12
+ const agents = await readCapabilityFamily<Record<string, unknown>>('agents');
13
+ if (!agents || typeof agents !== 'object') return null;
14
+ const flag = agents['subRunAttestation'];
15
+ return flag === undefined ? null : flag === true;
16
+ }
17
+
18
+ interface AttestResult {
19
+ attestation?: { checksum?: unknown; algorithm?: unknown };
20
+ harvestedEvent?: Record<string, unknown>;
21
+ merged?: unknown;
22
+ mergedValues?: Record<string, unknown>;
23
+ }
24
+
25
+ /** Drives one sub-run harvest-then-merge via the host-sample seam, or null
26
+ * (soft-skip) when the host doesn't expose it. The seam is host-extension
27
+ * surface specified in host-sample-test-seams.md §"Open seams":
28
+ * `POST /v1/host/sample/subrun/attest` returns the `attestation` the host
29
+ * would surface on `core.workflowChain.event { phase: 'output.harvested' }`,
30
+ * plus whether the merge proceeded. */
31
+ export async function invokeSubRunAttest(body: Record<string, unknown>): Promise<AttestResult | null> {
32
+ const res = await driver.post('/v1/host/sample/subrun/attest', body);
33
+ if (res.status === 404 || res.status === 405) return null; // seam absent — soft-skip
34
+ return (res.json as AttestResult | undefined) ?? {};
35
+ }
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Shared helper for the RFC 0064 host.toolHooks conformance scenarios.
3
+ * Lives in lib/ (not a *.test.ts) so scenarios import it via `../lib/toolHooks.js`.
4
+ */
5
+ import { driver } from './driver.js';
6
+
7
+ interface DiscoveryDoc {
8
+ capabilities?: Record<string, unknown>;
9
+ }
10
+
11
+ /** Reads `capabilities.toolHooks` from discovery; null when unadvertised. */
12
+ export async function readToolHooksCap(): Promise<Record<string, unknown> | null> {
13
+ const res = await driver.get('/.well-known/openwop');
14
+ const caps = (res.json as DiscoveryDoc | undefined)?.capabilities;
15
+ const th = caps && typeof caps === 'object' ? (caps as Record<string, unknown>)['toolHooks'] : undefined;
16
+ return th && typeof th === 'object' ? (th as Record<string, unknown>) : null;
17
+ }
18
+
19
+ interface ToolHookResult {
20
+ toolCalled?: Record<string, unknown>;
21
+ toolReturned?: Record<string, unknown>;
22
+ }
23
+
24
+ /** Drives one gated tool invocation via the host-sample seam, or null
25
+ * (soft-skip) when the host doesn't expose it. The seam is host-extension
26
+ * surface specified in host-sample-test-seams.md §"Open seams":
27
+ * `POST /v1/host/sample/toolhooks/invoke` returns the `agent.toolCalled` /
28
+ * `agent.toolReturned` payload pair the host would emit for the call. */
29
+ export async function invokeToolHook(body: Record<string, unknown>): Promise<ToolHookResult | null> {
30
+ const res = await driver.post('/v1/host/sample/toolhooks/invoke', body);
31
+ if (res.status === 404 || res.status === 405) return null; // seam absent — soft-skip
32
+ return (res.json as ToolHookResult | undefined) ?? {};
33
+ }
@@ -0,0 +1,33 @@
1
+ /**
2
+ * agent-loop-iteration-monotonic — RFC 0061 §B. Across a multi-turn loop,
3
+ * `runOrchestrator.decided.iteration` increments 1, 2, 3 … exactly once per turn
4
+ * (1-based, monotonic) — the observable counter `maxLoopIterations` bounds.
5
+ *
6
+ * Gated on `executionModel.version >= 5` + the host agent-loop seam; soft-skips
7
+ * when either is absent.
8
+ *
9
+ * @see RFCS/0061-agent-loop-lifecycle.md §B
10
+ */
11
+
12
+ import { describe, it, expect } from 'vitest';
13
+ import { driver } from '../lib/driver.js';
14
+ import { readExecutionModelCap, isVersion5, invokeAgentLoop } from '../lib/agentLoop.js';
15
+
16
+ describe('agent-loop-iteration-monotonic (RFC 0061 §B)', () => {
17
+ it('iteration increments by exactly 1 per orchestrator turn, 1-based', async () => {
18
+ if (!isVersion5(await readExecutionModelCap())) return;
19
+ const res = await invokeAgentLoop({ turns: 3 });
20
+ if (res === null) return; // seam absent — soft-skip
21
+ const decisions = res.decisions ?? [];
22
+ expect(
23
+ decisions.length >= 1,
24
+ driver.describe('RFC 0061 §B', 'a multi-turn loop MUST emit one runOrchestrator.decided per turn'),
25
+ ).toBe(true);
26
+ const iterations = decisions.map((d) => d.iteration);
27
+ const expected = decisions.map((_, k) => k + 1);
28
+ expect(
29
+ JSON.stringify(iterations),
30
+ driver.describe('RFC 0061 §B', 'iteration MUST be 1-based + monotonic, incrementing by exactly 1 per turn'),
31
+ ).toBe(JSON.stringify(expected));
32
+ });
33
+ });
@@ -0,0 +1,28 @@
1
+ /**
2
+ * agent-loop-stateful-resume — RFC 0061 §D. A loop suspended on a clarify/escalate
3
+ * HITL interrupt resumes at the SAME iteration — the counter does not reset or
4
+ * skip — with the snapshot lineage intact.
5
+ *
6
+ * Gated on `executionModel.statefulResume: true` + the host agent-loop seam;
7
+ * soft-skips when either is absent.
8
+ *
9
+ * @see RFCS/0061-agent-loop-lifecycle.md §D
10
+ */
11
+
12
+ import { describe, it, expect } from 'vitest';
13
+ import { driver } from '../lib/driver.js';
14
+ import { readExecutionModelCap, invokeAgentLoop } from '../lib/agentLoop.js';
15
+
16
+ describe('agent-loop-stateful-resume (RFC 0061 §D)', () => {
17
+ it('a mid-loop suspend resumes at the same iteration, counter intact', async () => {
18
+ const em = await readExecutionModelCap();
19
+ if (em?.statefulResume !== true) return;
20
+ // Suspend at turn 2, then resume: the resumed iteration MUST be 2, not 1 or 3.
21
+ const res = await invokeAgentLoop({ turns: 4, suspendAtTurn: 2, resume: true });
22
+ if (res === null) return; // seam absent — soft-skip
23
+ expect(
24
+ res.resumedIteration,
25
+ driver.describe('RFC 0061 §D', 'a stateful resume MUST continue at the suspend iteration — the counter does not reset or skip'),
26
+ ).toBe(2);
27
+ });
28
+ });
@@ -0,0 +1,41 @@
1
+ /**
2
+ * agent-loop-version5-shape — RFC 0061 §A/§B. The `executionModel.statefulResume`
3
+ * + `transcriptWindow` advertisement fields are well-formed when present, and a
4
+ * host advertising `version >= 5` carries a sane version ceiling.
5
+ *
6
+ * Status: ACTIVE (advertisement-shape; always runs). Behavioral coverage lives
7
+ * in the sibling agent-loop-*.test.ts scenarios, gated on `version >= 5` + the
8
+ * host agent-loop seam.
9
+ *
10
+ * @see RFCS/0061-agent-loop-lifecycle.md §A
11
+ * @see spec/v1/multi-agent-execution.md §"Stateful agent-loop lifecycle"
12
+ */
13
+
14
+ import { describe, it, expect } from 'vitest';
15
+ import { driver } from '../lib/driver.js';
16
+ import { readExecutionModelCap } from '../lib/agentLoop.js';
17
+
18
+ describe('agent-loop-version5-shape: advertisement (RFC 0061 §A)', () => {
19
+ it('executionModel.statefulResume/transcriptWindow are well-formed when present', async () => {
20
+ const em = await readExecutionModelCap();
21
+ if (em === null) return; // no execution model — valid
22
+ if (em.statefulResume !== undefined) {
23
+ expect(
24
+ typeof em.statefulResume,
25
+ driver.describe('capabilities.schema.json §multiAgent.executionModel', 'statefulResume MUST be a boolean when present'),
26
+ ).toBe('boolean');
27
+ }
28
+ if (em.transcriptWindow !== undefined) {
29
+ expect(
30
+ typeof em.transcriptWindow === 'number' && (em.transcriptWindow as number) >= 1,
31
+ driver.describe('capabilities.schema.json §multiAgent.executionModel', 'transcriptWindow MUST be a positive integer when present'),
32
+ ).toBe(true);
33
+ }
34
+ if (typeof em.version === 'number') {
35
+ expect(
36
+ (em.version as number) >= 1 && (em.version as number) <= 5,
37
+ driver.describe('capabilities.schema.json §multiAgent.executionModel', 'version MUST be within the 1–5 ladder'),
38
+ ).toBe(true);
39
+ }
40
+ });
41
+ });
@@ -0,0 +1,33 @@
1
+ /**
2
+ * agent-loop-workspace-snapshot — RFC 0061 §C. A workspace PUT during turn i is
3
+ * invisible to turn i's snapshot and visible to turn i+1 — per-iteration
4
+ * snapshot immutability (writes land next turn, never retroactively).
5
+ *
6
+ * Gated on `executionModel.version >= 5` AND `host.workspace.supported` + the
7
+ * host agent-loop seam; soft-skips when any is absent.
8
+ *
9
+ * @see RFCS/0061-agent-loop-lifecycle.md §C
10
+ * @see RFCS/0059-agent-workspace.md §D — the workspace read snapshot
11
+ */
12
+
13
+ import { describe, it, expect } from 'vitest';
14
+ import { driver } from '../lib/driver.js';
15
+ import { readExecutionModelCap, isVersion5, hasWorkspace, invokeAgentLoop } from '../lib/agentLoop.js';
16
+
17
+ describe('agent-loop-workspace-snapshot (RFC 0061 §C)', () => {
18
+ it('a turn-i workspace write is invisible to turn i, visible to turn i+1', async () => {
19
+ if (!isVersion5(await readExecutionModelCap())) return;
20
+ if (!(await hasWorkspace())) return; // workspace optional — soft-skip
21
+ const res = await invokeAgentLoop({ turns: 2, workspaceWriteAtTurn: 1 });
22
+ if (res === null) return; // seam absent — soft-skip
23
+ const vis = res.workspaceVisible ?? {};
24
+ expect(
25
+ vis.atWriteTurn,
26
+ driver.describe('RFC 0061 §C', 'a workspace write during turn i MUST be invisible to turn i\'s snapshot'),
27
+ ).toBe(false);
28
+ expect(
29
+ vis.atNextTurn,
30
+ driver.describe('RFC 0061 §C', 'a workspace write during turn i MUST be visible to turn i+1'),
31
+ ).toBe(true);
32
+ });
33
+ });
@@ -0,0 +1,85 @@
1
+ /**
2
+ * agent-manifest-runtime — RFC 0070. When a host advertises
3
+ * `capabilities.agents.manifestRuntime.supported`, it has loaded pack `agents[]`
4
+ * (RFC 0003) into an AgentRegistry and can dispatch a manifest agent on the
5
+ * existing core.dispatch/orchestrator loop, enforcing toolAllowlist (RFC 0002
6
+ * §A14) and confidence escalation (§F).
7
+ *
8
+ * The inventory leg is exercised against the NORMATIVE `GET /v1/agents` surface
9
+ * (RFC 0072 §A), so it runs black-box against any conformant host. The dispatch
10
+ * leg uses the sample-extension seam and soft-skips on hosts that don't expose
11
+ * it (full black-box dispatch is the sequenced executor-integration tier). Both
12
+ * legs gate on `capabilities.agents.manifestRuntime.supported`.
13
+ *
14
+ * @see RFCS/0070-agent-manifest-runtime.md, RFCS/0072-agent-inventory-and-dispatch.md
15
+ */
16
+
17
+ import { describe, it, expect } from 'vitest';
18
+ import { driver } from '../lib/driver.js';
19
+ import { readManifestRuntimeCap, listManifestAgents, dispatchAgent } from '../lib/agentRuntime.js';
20
+
21
+ describe('agent-manifest-runtime (RFC 0070)', () => {
22
+ it('lists installed manifest agents and dispatches one with attributed events', async () => {
23
+ const cap = await readManifestRuntimeCap();
24
+ if (cap?.supported !== true) return; // unadvertised — soft-skip
25
+
26
+ // RFC 0074 §B — installScope governs how GET /v1/agents is scoped.
27
+ const installScope = typeof cap.installScope === 'string' ? cap.installScope : 'host';
28
+ expect(
29
+ installScope === 'host' || installScope === 'tenant',
30
+ driver.describe('RFC 0074 §B', "agents.manifestRuntime.installScope (when present) MUST be 'host' or 'tenant'"),
31
+ ).toBe(true);
32
+
33
+ const inv = await listManifestAgents();
34
+ if (inv === null) return; // seam absent — soft-skip
35
+ const agents = inv.agents ?? [];
36
+ expect(
37
+ Array.isArray(agents),
38
+ driver.describe('RFC 0072 §A', 'GET /v1/agents MUST return an agents[] array'),
39
+ ).toBe(true);
40
+
41
+ if (installScope === 'host') {
42
+ // Host-global inventory (RFC 0072 §A): a manifestRuntime host MUST surface ≥1 agent.
43
+ expect(
44
+ agents.length > 0,
45
+ driver.describe('RFC 0070 §A', 'a host-scoped manifestRuntime host MUST surface ≥1 installed manifest agent'),
46
+ ).toBe(true);
47
+ } else if (agents.length === 0) {
48
+ // RFC 0074 §A + Unresolved Q3 — tenant-scoped: GET /v1/agents is the
49
+ // authenticated principal's workspace set, which MAY be empty (the workspace
50
+ // approved no agent packs) while manifestRuntime is advertised host-wide.
51
+ // Empty is conformant; the cross-tenant no-disclosure 404 is covered by the
52
+ // owner-triple isolation harness (RFC 0048/0059), not re-probed here. Nothing
53
+ // to dispatch.
54
+ return;
55
+ }
56
+
57
+ const agentId = agents[0]?.agentId;
58
+ if (typeof agentId !== 'string') return;
59
+
60
+ // Opaque-payload dispatch (validateHandoff:false) so the assertion is
61
+ // independent of the chosen agent's handoff schema.
62
+ const res = await dispatchAgent(agentId, { task: {}, validateHandoff: false, availableTools: [] });
63
+ if (res === null) return; // seam absent — soft-skip
64
+ expect(
65
+ res.status === 'completed' || res.status === 'escalated',
66
+ driver.describe('RFC 0070', 'dispatch MUST resolve to a terminal status (completed | escalated)'),
67
+ ).toBe(true);
68
+ const types = (res.events ?? []).map((e) => e.type);
69
+ expect(
70
+ types.includes('agent.reasoned') && types.includes('agent.decided'),
71
+ driver.describe('RFC 0002 §A', 'dispatch MUST emit attributed agent.reasoned + agent.decided events'),
72
+ ).toBe(true);
73
+ expect(
74
+ (res.events ?? []).every((e) => e.agentId === agentId),
75
+ driver.describe('RFC 0002 §A', 'every emitted agent.* event MUST carry the dispatched agentId'),
76
+ ).toBe(true);
77
+ });
78
+
79
+ // NOTE: RFC 0002 §F confidence escalation is NOT asserted here. Forcing a
80
+ // sub-threshold decision black-box would require a non-normative test hook
81
+ // (the reference host's `simulateConfidence`); a conformant host need not
82
+ // expose one, so asserting it here would wrongly fail conformant peers.
83
+ // Escalation is covered against the reference host in
84
+ // apps/workflow-engine/backend/typescript/test/{agents,agent-dispatch-route}.test.ts.
85
+ });
@@ -23,15 +23,10 @@ import Ajv2020 from 'ajv/dist/2020.js';
23
23
  import { readFileSync } from 'node:fs';
24
24
  import { join } from 'node:path';
25
25
  import { driver } from '../lib/driver.js';
26
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
26
27
  import { SCHEMAS_DIR } from '../lib/paths.js';
27
28
 
28
- interface DiscoveryDoc {
29
- capabilities?: {
30
- aiProviders?: { supported?: unknown };
31
- supportedEnvelopes?: unknown;
32
- };
33
- supportedEnvelopes?: unknown;
34
- }
29
+ type DiscoveryDoc = Record<string, unknown>;
35
30
 
36
31
  const UNIVERSAL_KINDS = [
37
32
  'clarification.request',
@@ -47,19 +42,19 @@ async function readDiscovery(): Promise<DiscoveryDoc | null> {
47
42
  }
48
43
 
49
44
  function aiProvidersSupported(d: DiscoveryDoc | null): boolean {
50
- if (!d?.capabilities?.aiProviders?.supported) return false;
45
+ // Root-first per RFC 0073; `capabilities.aiProviders` is the deprecated wrapper shape.
46
+ const ap = capabilityFamily<{ supported?: unknown }>(d ?? undefined, 'aiProviders');
47
+ if (!ap?.supported) return false;
51
48
  // aiProviders.supported can be `true` or an array per the capabilities schema.
52
- const v = d.capabilities.aiProviders.supported;
49
+ const v = ap.supported;
53
50
  return v === true || (Array.isArray(v) && v.length > 0);
54
51
  }
55
52
 
56
53
  function supportedEnvelopes(d: DiscoveryDoc | null): string[] {
57
- // supportedEnvelopes is at the top level per the capabilities schema, but
58
- // some hosts nest it under capabilities tolerate both.
59
- const top = d?.supportedEnvelopes;
60
- const nested = d?.capabilities?.supportedEnvelopes;
61
- const raw = Array.isArray(top) ? top : Array.isArray(nested) ? nested : [];
62
- return raw.filter((s): s is string => typeof s === 'string');
54
+ // supportedEnvelopes is a document-root field per capabilities.md §"Document-root
55
+ // layout" (RFC 0073); a `capabilities.*` wrapper is the deprecated fallback.
56
+ const raw = capabilityFamily<unknown>(d ?? undefined, 'supportedEnvelopes');
57
+ return Array.isArray(raw) ? raw.filter((s): s is string => typeof s === 'string') : [];
63
58
  }
64
59
 
65
60
  // HTTP-driven blocks soft-skip when no base URL is configured (gate's
@@ -76,9 +71,10 @@ describe.skipIf(HTTP_SKIP)('ai-envelope-shape: advertisement contract (RFC 0021
76
71
  for (const k of env) {
77
72
  expect(typeof k, driver.describe('capabilities.md §supportedEnvelopes', 'each entry MUST be a string')).toBe('string');
78
73
  }
79
- // Re-affirm shape: array if present.
80
- if (d.capabilities?.supportedEnvelopes !== undefined) {
81
- expect(Array.isArray(d.capabilities.supportedEnvelopes), 'supportedEnvelopes MUST be an array').toBe(true);
74
+ // Re-affirm shape: array if present (root-first per RFC 0073).
75
+ const advertised = capabilityFamily<unknown>(d, 'supportedEnvelopes');
76
+ if (advertised !== undefined) {
77
+ expect(Array.isArray(advertised), 'supportedEnvelopes MUST be an array').toBe(true);
82
78
  }
83
79
  });
84
80
  });
@@ -33,7 +33,7 @@ interface DiscoveryDoc {
33
33
  async function readLimits(): Promise<Record<string, number> | null> {
34
34
  const res = await driver.get('/.well-known/openwop');
35
35
  const body = res.json as DiscoveryDoc | undefined;
36
- const limits = body?.limits ?? body?.capabilities?.limits ?? null;
36
+ const limits = body?.limits ?? capabilityFamily(body, 'limits') ?? null;
37
37
  return limits && typeof limits === 'object' ? (limits as Record<string, number>) : null;
38
38
  }
39
39
 
@@ -166,6 +166,7 @@ describe('aiEnvelope.capBreached: behavioral cap enforcement (FINAL v1.1)', () =
166
166
  // node.failed per capabilities.md §"Engine-enforced limits". Tests
167
167
  // soft-skip on HTTP 404 when the seam isn't exposed.
168
168
  import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
169
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
169
170
 
170
171
  describe('aiEnvelope.capBreached: engine projection via event-log seam (capabilities.md §"cap.breached")', () => {
171
172
  it('breached outcome projects to cap.breached { kind: "envelopes" } event with causationId chain', async () => {
@@ -54,7 +54,7 @@ describe('aiEnvelope.schemaDrift: advertisement shape (FINAL v1.1)', () => {
54
54
  if (!(await isEnvelopeContractsAdvertised())) return; // not opted in — skip
55
55
  const res = await driver.get('/.well-known/openwop');
56
56
  const body = res.json as { schemaVersions?: Record<string, number>; capabilities?: { schemaVersions?: Record<string, number> } } | undefined;
57
- const versions = body?.schemaVersions ?? body?.capabilities?.schemaVersions ?? {};
57
+ const versions = body?.schemaVersions ?? capabilityFamily(body, 'schemaVersions') ?? {};
58
58
  expect(
59
59
  Object.keys(versions).length > 0,
60
60
  driver.describe(
@@ -189,6 +189,7 @@ describe('aiEnvelope.schemaDrift: behavioral strictness gate (FINAL v1.1)', () =
189
189
  // E.2 OTel scrape seam.
190
190
  import { queryTestSpans, isOtelSeamAvailable } from '../lib/otel-scrape.js';
191
191
  import { resetTestSeam } from '../lib/event-log-query.js';
192
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
192
193
 
193
194
  describe('aiEnvelope.schemaDrift: OTel drift attribute projection (E.2)', () => {
194
195
  it('below-floor + strictness:warn → OTel span MUST carry envelope_schema_version_drift attribute', async () => {
@@ -166,6 +166,7 @@ describe('aiEnvelope.universalKinds: behavioral accept via /v1/host/sample/envel
166
166
 
167
167
  // E.1 engine-projection via the test-only event-log seam.
168
168
  import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
169
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
169
170
 
170
171
  describe('aiEnvelope.universalKinds: engine projection via event-log seam', () => {
171
172
  it('clarification.request MUST be lifted to interrupt.requested { kind: "clarification" } per interrupt.md', async () => {
@@ -254,7 +255,7 @@ describe('aiEnvelope.universalKinds: schema.response counter-policy advertisemen
254
255
  // advertising a policy field use a documented value.
255
256
  const res = await driver.get('/.well-known/openwop');
256
257
  const body = res.json as { capabilities?: { aiEnvelope?: { schemaResponseCounterPolicy?: string } } } | undefined;
257
- const policy = body?.capabilities?.aiEnvelope?.schemaResponseCounterPolicy;
258
+ const policy = capabilityFamily<{ schemaResponseCounterPolicy?: string }>(body, 'aiEnvelope')?.schemaResponseCounterPolicy;
258
259
  if (policy === undefined) return; // no policy advertised — host MAY omit
259
260
  expect(
260
261
  ['counted', 'exempt'].includes(policy),
@@ -22,14 +22,12 @@
22
22
 
23
23
  import { describe, it, expect } from 'vitest';
24
24
  import { driver } from '../lib/driver.js';
25
-
26
- interface DiscoveryDoc {
27
- capabilities?: { authorization?: { supported?: boolean } };
28
- }
25
+ import { readCapabilityFamily } from '../lib/discovery-capabilities.js';
29
26
 
30
27
  async function authorizationSupported(): Promise<boolean> {
31
- const res = await driver.get('/.well-known/openwop');
32
- return (res.json as DiscoveryDoc | undefined)?.capabilities?.authorization?.supported === true;
28
+ // Root-first per RFC 0073 (`capabilities.authorization` is the deprecated wrapper shape).
29
+ const authz = await readCapabilityFamily<{ supported?: unknown }>('authorization');
30
+ return authz?.supported === true;
33
31
  }
34
32
 
35
33
  describe('approval-gate-flow: role-gated, audited approval (RFC 0051 §A)', () => {
@@ -0,0 +1,126 @@
1
+ /**
2
+ * Bounded artifact-schema compilation (RFC 0071, `Active`).
3
+ *
4
+ * Always-on, server-free assertion for the SECURITY invariant
5
+ * `artifact-schema-compile-bounded`. Artifact-type packs ship third-party
6
+ * JSON Schemas that the engine compiles (Ajv) at install + validation time;
7
+ * an unbounded compile is a denial-of-service vector (schema bombs:
8
+ * pathological `$ref` recursion, keyword-count explosion, oversized payloads,
9
+ * catastrophic-backtracking `pattern`s). This scenario asserts two things
10
+ * that must hold for every release regardless of which host runs it:
11
+ *
12
+ * PART 1 — contract present. `artifact-type-packs.md` carries the normative
13
+ * bounded-compilation MUST (serialized-size, `$ref`-depth, keyword-count
14
+ * bounds + wall-clock timeout), and `host-capabilities.md` §host.artifactTypes
15
+ * references it. Guards against the requirement being silently dropped.
16
+ *
17
+ * PART 2 — defense is well-defined + implementable. A reference bounding
18
+ * predicate built from representative finite limits rejects three schema
19
+ * bombs and admits a benign artifact schema. The specific numeric limits are
20
+ * host-configurable per the spec (advertised, not protocol-mandated); the
21
+ * point is that *some* finite bound exists and catches the bombs while
22
+ * passing legitimate schemas.
23
+ *
24
+ * The behavioral end-to-end form (a host rejects an over-bounds pack at
25
+ * registry `PUT` with `pack_validation_failed`) is capability-gated on
26
+ * `host.artifactTypes.supported` and is `host-pending` until a reference host
27
+ * lands; this server-free scenario is the always-on floor.
28
+ *
29
+ * @see spec/v1/artifact-type-packs.md §"Bounded schema compilation (normative)"
30
+ * @see SECURITY/threat-model-node-packs.md §"Distributed artifact schemas"
31
+ * @see RFCS/0071-artifact-type-and-chat-card-packs.md
32
+ */
33
+
34
+ import { describe, it, expect } from 'vitest';
35
+ import { readFileSync } from 'node:fs';
36
+ import { join } from 'node:path';
37
+ import { V1_DIR } from '../lib/paths.js';
38
+
39
+ const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
40
+
41
+ describe('artifact-schema-compile-bounded: contract present in the corpus (RFC 0071, server-free)', () => {
42
+ const artifactDoc = V1_DIR ? readFileSync(join(V1_DIR, 'artifact-type-packs.md'), 'utf8') : '';
43
+ const hostCaps = V1_DIR ? readFileSync(join(V1_DIR, 'host-capabilities.md'), 'utf8') : '';
44
+
45
+ it.skipIf(V1_DIR === null)('artifact-type-packs.md declares the bounded-compilation MUST', () => {
46
+ expect(
47
+ /Bounded schema compilation/i.test(artifactDoc),
48
+ why('artifact-type-packs.md', 'a "Bounded schema compilation" section MUST exist'),
49
+ ).toBe(true);
50
+ expect(
51
+ /MUST bound/i.test(artifactDoc) && /MUST reject/i.test(artifactDoc),
52
+ why('artifact-type-packs.md §"Bounded schema compilation"', 'host MUST bound + MUST reject over-limit schemas'),
53
+ ).toBe(true);
54
+ // The three structural axes + the timeout MUST all be named.
55
+ for (const axis of [/byte size/i, /\$ref/i, /keyword/i, /timeout/i]) {
56
+ expect(axis.test(artifactDoc), why('artifact-type-packs.md', `bound axis ${axis} MUST be named`)).toBe(true);
57
+ }
58
+ });
59
+
60
+ it.skipIf(V1_DIR === null)('host-capabilities.md §host.artifactTypes references the bound', () => {
61
+ expect(
62
+ /artifact-schema-compile-bounded/.test(hostCaps),
63
+ why('host-capabilities.md §host.artifactTypes', 'MUST reference the bounded-compilation invariant'),
64
+ ).toBe(true);
65
+ });
66
+ });
67
+
68
+ describe('artifact-schema-compile-bounded: a finite bound catches schema bombs (RFC 0071, server-free)', () => {
69
+ // Representative, host-configurable limits (the spec leaves the exact values
70
+ // to host advertisement; these stand in for "some finite bound").
71
+ const LIMITS = { maxBytes: 64 * 1024, maxRefDepth: 16, maxKeywords: 2000 };
72
+
73
+ function refDepth(node: unknown, seen = 0): number {
74
+ if (node === null || typeof node !== 'object') return seen;
75
+ const obj = node as Record<string, unknown>;
76
+ const here = '$ref' in obj ? seen + 1 : seen;
77
+ let max = here;
78
+ for (const v of Object.values(obj)) max = Math.max(max, refDepth(v, here));
79
+ return max;
80
+ }
81
+ function keywordCount(node: unknown): number {
82
+ if (node === null || typeof node !== 'object') return 0;
83
+ const obj = node as Record<string, unknown>;
84
+ let n = Object.keys(obj).length;
85
+ for (const v of Object.values(obj)) n += keywordCount(v);
86
+ return n;
87
+ }
88
+ /** Reference bound predicate — the shape a conformant host applies at PUT/install. */
89
+ function exceedsBounds(schema: unknown): boolean {
90
+ const bytes = Buffer.byteLength(JSON.stringify(schema), 'utf8');
91
+ if (bytes > LIMITS.maxBytes) return true;
92
+ if (refDepth(schema) > LIMITS.maxRefDepth) return true;
93
+ if (keywordCount(schema) > LIMITS.maxKeywords) return true;
94
+ return false;
95
+ }
96
+
97
+ it('admits a benign artifact schema', () => {
98
+ const benign = {
99
+ $schema: 'https://json-schema.org/draft/2020-12/schema',
100
+ $id: 'https://h.example/schemas/artifacts/vendor.acme.cad.model.schema.json',
101
+ type: 'object',
102
+ additionalProperties: false,
103
+ required: ['name'],
104
+ properties: { name: { type: 'string' }, dims: { type: 'array', items: { type: 'number' } } },
105
+ };
106
+ expect(exceedsBounds(benign), why('artifact-type-packs.md', 'a legitimate artifact schema MUST NOT be rejected')).toBe(false);
107
+ });
108
+
109
+ it('rejects a $ref-depth bomb', () => {
110
+ // Nest $ref-bearing objects deeper than maxRefDepth so resolution depth accumulates.
111
+ let node: Record<string, unknown> = { type: 'string' };
112
+ for (let i = 0; i < LIMITS.maxRefDepth + 4; i++) node = { $ref: '#/x', properties: { nested: node } };
113
+ expect(exceedsBounds({ type: 'object', properties: { deep: node } }), why('threat-model-node-packs.md', 'a $ref-depth bomb MUST be rejected')).toBe(true);
114
+ });
115
+
116
+ it('rejects a keyword-count bomb', () => {
117
+ const props: Record<string, unknown> = {};
118
+ for (let i = 0; i < LIMITS.maxKeywords + 100; i++) props[`p${i}`] = { type: 'string' };
119
+ expect(exceedsBounds({ type: 'object', properties: props }), why('threat-model-node-packs.md', 'a keyword-count bomb MUST be rejected')).toBe(true);
120
+ });
121
+
122
+ it('rejects an oversized schema', () => {
123
+ const huge = { type: 'object', description: 'x'.repeat(LIMITS.maxBytes + 1) };
124
+ expect(exceedsBounds(huge), why('threat-model-node-packs.md', 'an over-size schema MUST be rejected')).toBe(true);
125
+ });
126
+ });