@openwop/openwop-conformance 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +241 -0
  3. package/api/asyncapi.yaml +481 -0
  4. package/api/openapi.yaml +830 -0
  5. package/api/redocly.yaml +8 -0
  6. package/coverage.md +80 -0
  7. package/dist/cli.js +161 -0
  8. package/fixtures/conformance-a2a-task-roundtrip.json +27 -0
  9. package/fixtures/conformance-agent-identity.json +27 -0
  10. package/fixtures/conformance-agent-low-confidence.json +29 -0
  11. package/fixtures/conformance-agent-memory-cross-tenant.json +28 -0
  12. package/fixtures/conformance-agent-memory-redaction.json +32 -0
  13. package/fixtures/conformance-agent-memory-roundtrip.json +32 -0
  14. package/fixtures/conformance-agent-memory-ttl.json +31 -0
  15. package/fixtures/conformance-agent-pack-export.json +26 -0
  16. package/fixtures/conformance-agent-pack-install.json +26 -0
  17. package/fixtures/conformance-agent-pack-provenance.json +31 -0
  18. package/fixtures/conformance-agent-reasoning.json +29 -0
  19. package/fixtures/conformance-approval.json +27 -0
  20. package/fixtures/conformance-cancellable.json +33 -0
  21. package/fixtures/conformance-cap-breach.json +27 -0
  22. package/fixtures/conformance-capability-missing.json +23 -0
  23. package/fixtures/conformance-channel-ttl.json +60 -0
  24. package/fixtures/conformance-clarification.json +30 -0
  25. package/fixtures/conformance-conversation-capability-negotiation.json +23 -0
  26. package/fixtures/conformance-conversation-lifecycle.json +32 -0
  27. package/fixtures/conformance-conversation-replay.json +33 -0
  28. package/fixtures/conformance-conversation-vs-clarification.json +26 -0
  29. package/fixtures/conformance-delay.json +33 -0
  30. package/fixtures/conformance-dispatch-loop.json +38 -0
  31. package/fixtures/conformance-failure.json +23 -0
  32. package/fixtures/conformance-idempotent.json +30 -0
  33. package/fixtures/conformance-identity.json +32 -0
  34. package/fixtures/conformance-interrupt-auth-required.json +28 -0
  35. package/fixtures/conformance-interrupt-external-event.json +33 -0
  36. package/fixtures/conformance-interrupt-parent-child-cancel-child.json +27 -0
  37. package/fixtures/conformance-interrupt-parent-child-cancel.json +26 -0
  38. package/fixtures/conformance-interrupt-quorum.json +30 -0
  39. package/fixtures/conformance-mcp-tool-roundtrip.json +32 -0
  40. package/fixtures/conformance-message-reducer.json +31 -0
  41. package/fixtures/conformance-multi-node.json +21 -0
  42. package/fixtures/conformance-noop.json +23 -0
  43. package/fixtures/conformance-orchestrator-dispatch.json +47 -0
  44. package/fixtures/conformance-orchestrator-low-confidence.json +41 -0
  45. package/fixtures/conformance-orchestrator-terminate.json +44 -0
  46. package/fixtures/conformance-stream-text.json +26 -0
  47. package/fixtures/conformance-subworkflow-child.json +21 -0
  48. package/fixtures/conformance-subworkflow-parent.json +49 -0
  49. package/fixtures/conformance-version-fold.json +23 -0
  50. package/fixtures/conformance-wasm-pack-roundtrip.json +25 -0
  51. package/fixtures/pack-manifests/pack-private-example.json +26 -0
  52. package/fixtures.md +404 -0
  53. package/package.json +48 -0
  54. package/schemas/README.md +75 -0
  55. package/schemas/agent-manifest.schema.json +107 -0
  56. package/schemas/agent-ref.schema.json +53 -0
  57. package/schemas/capabilities.schema.json +287 -0
  58. package/schemas/channel-written-payload.schema.json +55 -0
  59. package/schemas/conversation-event.schema.json +120 -0
  60. package/schemas/conversation-turn.schema.json +72 -0
  61. package/schemas/debug-bundle.schema.json +196 -0
  62. package/schemas/dispatch-config.schema.json +46 -0
  63. package/schemas/error-envelope.schema.json +25 -0
  64. package/schemas/memory-entry.schema.json +36 -0
  65. package/schemas/memory-list-options.schema.json +21 -0
  66. package/schemas/node-pack-manifest.schema.json +235 -0
  67. package/schemas/orchestrator-decision.schema.json +60 -0
  68. package/schemas/run-event-payloads.schema.json +663 -0
  69. package/schemas/run-event.schema.json +116 -0
  70. package/schemas/run-options.schema.json +81 -0
  71. package/schemas/run-orchestrator-decided-event.schema.json +20 -0
  72. package/schemas/run-snapshot.schema.json +121 -0
  73. package/schemas/suspend-request.schema.json +182 -0
  74. package/schemas/workflow-definition.schema.json +430 -0
  75. package/src/cli.ts +187 -0
  76. package/src/lib/a2a-fake-peer.ts +233 -0
  77. package/src/lib/canaries.ts +186 -0
  78. package/src/lib/driver.ts +96 -0
  79. package/src/lib/env.ts +49 -0
  80. package/src/lib/fixtures.ts +93 -0
  81. package/src/lib/mcp-fake-server.ts +185 -0
  82. package/src/lib/multi-agent-capabilities.ts +155 -0
  83. package/src/lib/multiProcess.ts +141 -0
  84. package/src/lib/otel-collector.ts +312 -0
  85. package/src/lib/paths.ts +198 -0
  86. package/src/lib/polling.ts +81 -0
  87. package/src/lib/profiles.ts +258 -0
  88. package/src/lib/sse.ts +172 -0
  89. package/src/scenarios/a2a-task-roundtrip.test.ts +149 -0
  90. package/src/scenarios/agentConfidenceEscalation.test.ts +61 -0
  91. package/src/scenarios/agentMemoryCrossTenantIsolation.test.ts +54 -0
  92. package/src/scenarios/agentMemoryRedactionContract.test.ts +46 -0
  93. package/src/scenarios/agentMemoryRoundTrip.test.ts +52 -0
  94. package/src/scenarios/agentMemoryTtlExpiry.test.ts +47 -0
  95. package/src/scenarios/agentMessageReducer.test.ts +57 -0
  96. package/src/scenarios/agentMetadata.test.ts +56 -0
  97. package/src/scenarios/agentPackExport.test.ts +45 -0
  98. package/src/scenarios/agentPackInstall.test.ts +50 -0
  99. package/src/scenarios/agentPackProvenance.test.ts +53 -0
  100. package/src/scenarios/agentReasoningEvents.test.ts +72 -0
  101. package/src/scenarios/append-ordering.test.ts +91 -0
  102. package/src/scenarios/approval-payload.test.ts +120 -0
  103. package/src/scenarios/audit-log-integrity.test.ts +106 -0
  104. package/src/scenarios/auth.test.ts +55 -0
  105. package/src/scenarios/byok-roundtrip.test.ts +166 -0
  106. package/src/scenarios/cancellation.test.ts +68 -0
  107. package/src/scenarios/cap-breach.test.ts +149 -0
  108. package/src/scenarios/channel-ttl.test.ts +70 -0
  109. package/src/scenarios/configurable-schema.test.ts +76 -0
  110. package/src/scenarios/conversationCapabilityNegotiation.test.ts +39 -0
  111. package/src/scenarios/conversationLifecycle.test.ts +64 -0
  112. package/src/scenarios/conversationReplayDeterminism.test.ts +52 -0
  113. package/src/scenarios/conversationVsLegacySuspend.test.ts +46 -0
  114. package/src/scenarios/cost-attribution.test.ts +207 -0
  115. package/src/scenarios/debugBundle.test.ts +222 -0
  116. package/src/scenarios/discovery.test.ts +147 -0
  117. package/src/scenarios/dispatchLoop.test.ts +52 -0
  118. package/src/scenarios/errors.test.ts +144 -0
  119. package/src/scenarios/eventOrdering.test.ts +144 -0
  120. package/src/scenarios/failure-path.test.ts +46 -0
  121. package/src/scenarios/fixtures-gating.test.ts +137 -0
  122. package/src/scenarios/fixtures-valid.test.ts +140 -0
  123. package/src/scenarios/highConcurrency.test.ts +263 -0
  124. package/src/scenarios/idempotency.test.ts +83 -0
  125. package/src/scenarios/idempotencyRetry.test.ts +130 -0
  126. package/src/scenarios/identity-passthrough.test.ts +54 -0
  127. package/src/scenarios/interrupt-approval.test.ts +97 -0
  128. package/src/scenarios/interrupt-auth-required-resume.test.ts +88 -0
  129. package/src/scenarios/interrupt-clarification.test.ts +45 -0
  130. package/src/scenarios/interrupt-external-event-correlation.test.ts +113 -0
  131. package/src/scenarios/interrupt-parent-child-cascade.test.ts +102 -0
  132. package/src/scenarios/interrupt-quorum-resolution.test.ts +97 -0
  133. package/src/scenarios/interruptRace.test.ts +176 -0
  134. package/src/scenarios/maliciousManifest.test.ts +154 -0
  135. package/src/scenarios/mcp-discoverability.test.ts +129 -0
  136. package/src/scenarios/mcp-tool-roundtrip.test.ts +149 -0
  137. package/src/scenarios/multi-node-ordering.test.ts +60 -0
  138. package/src/scenarios/multi-region-idempotency.test.ts +52 -0
  139. package/src/scenarios/orchestratorConservativePath.test.ts +63 -0
  140. package/src/scenarios/orchestratorDispatch.test.ts +66 -0
  141. package/src/scenarios/orchestratorTermination.test.ts +54 -0
  142. package/src/scenarios/otel-emission.test.ts +113 -0
  143. package/src/scenarios/otel-trace-propagation.test.ts +90 -0
  144. package/src/scenarios/pack-registry-publish.test.ts +93 -0
  145. package/src/scenarios/pack-registry.test.ts +328 -0
  146. package/src/scenarios/pause-resume.test.ts +109 -0
  147. package/src/scenarios/policies.test.ts +162 -0
  148. package/src/scenarios/profileDerivation.test.ts +335 -0
  149. package/src/scenarios/providerPolicyEnforcement.test.ts +132 -0
  150. package/src/scenarios/rate-limit-envelope.test.ts +97 -0
  151. package/src/scenarios/redaction.test.ts +254 -0
  152. package/src/scenarios/redactionAdversarial.test.ts +162 -0
  153. package/src/scenarios/replay-fork-arbitrary.test.ts +347 -0
  154. package/src/scenarios/replay-fork.test.ts +216 -0
  155. package/src/scenarios/replayDeterminism.test.ts +171 -0
  156. package/src/scenarios/route-coverage.test.ts +129 -0
  157. package/src/scenarios/runs-lifecycle.test.ts +65 -0
  158. package/src/scenarios/runtime-capabilities.test.ts +118 -0
  159. package/src/scenarios/spec-corpus-validity.test.ts +1257 -0
  160. package/src/scenarios/staleClaim.test.ts +223 -0
  161. package/src/scenarios/stream-modes-buffer.test.ts +148 -0
  162. package/src/scenarios/stream-modes-mixed.test.ts +149 -0
  163. package/src/scenarios/stream-modes.test.ts +139 -0
  164. package/src/scenarios/streamReconnect.test.ts +162 -0
  165. package/src/scenarios/subworkflow.test.ts +126 -0
  166. package/src/scenarios/version-negotiation.test.ts +157 -0
  167. package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +47 -0
  168. package/src/scenarios/wasm-pack-invoke-completed.test.ts +69 -0
  169. package/src/scenarios/wasm-pack-invoke-suspended.test.ts +74 -0
  170. package/src/scenarios/wasm-pack-load.test.ts +75 -0
  171. package/src/scenarios/wasm-pack-memory-cap.test.ts +43 -0
  172. package/src/scenarios/wasm-pack-replay-determinism.test.ts +61 -0
  173. package/src/scenarios/webhook-sig-algorithm.test.ts +61 -0
  174. package/src/setup.ts +173 -0
  175. package/vitest.config.ts +17 -0
@@ -0,0 +1,258 @@
1
+ /**
2
+ * Compatibility-profile derivation for openwop v1.x.
3
+ *
4
+ * Profiles are a named set of capability requirements. A host's profile
5
+ * set is derived from the `/.well-known/openwop` discovery payload — never
6
+ * declared as a separate wire field. See `spec/v1/profiles.md` for the
7
+ * normative predicate definitions.
8
+ *
9
+ * This module is the single canonical implementation of profile membership.
10
+ * Conformance scenarios use it to gate profile-specific assertions; SDKs
11
+ * MAY re-export the derivation helper to give clients a way to ask
12
+ * "does this host satisfy `openwop-secrets`?" without re-implementing the
13
+ * predicates.
14
+ *
15
+ * **Derivation is deterministic and pure.** Same payload, same profile
16
+ * set. No time-of-day, host-specific state, or hidden inputs.
17
+ */
18
+
19
+ /**
20
+ * Closed v1.x catalog. Adding a profile requires an RFC per
21
+ * `RFCS/0001-rfc-process.md`.
22
+ */
23
+ export const PROFILE_NAMES = [
24
+ 'openwop-core',
25
+ 'openwop-interrupts',
26
+ 'openwop-stream-sse',
27
+ 'openwop-stream-poll',
28
+ 'openwop-secrets',
29
+ 'openwop-provider-policy',
30
+ 'openwop-node-packs',
31
+ 'openwop-replay-fork',
32
+ 'openwop-fixtures',
33
+ ] as const;
34
+
35
+ export type ProfileName = (typeof PROFILE_NAMES)[number];
36
+
37
+ /**
38
+ * Loose typing for the discovery payload — just enough structure to
39
+ * apply the predicates safely. Schema-level validation is the
40
+ * conformance suite's `discovery.test.ts` job.
41
+ */
42
+ export interface DiscoveryPayload {
43
+ protocolVersion?: unknown;
44
+ supportedEnvelopes?: unknown;
45
+ schemaVersions?: unknown;
46
+ limits?: {
47
+ clarificationRounds?: unknown;
48
+ schemaRounds?: unknown;
49
+ envelopesPerTurn?: unknown;
50
+ [key: string]: unknown;
51
+ };
52
+ supportedTransports?: unknown;
53
+ secrets?: {
54
+ supported?: unknown;
55
+ scopes?: unknown;
56
+ [key: string]: unknown;
57
+ };
58
+ aiProviders?: {
59
+ supported?: unknown;
60
+ byok?: unknown;
61
+ policies?: {
62
+ modes?: unknown;
63
+ [key: string]: unknown;
64
+ };
65
+ [key: string]: unknown;
66
+ };
67
+ replay?: {
68
+ supported?: unknown;
69
+ modes?: unknown;
70
+ [key: string]: unknown;
71
+ };
72
+ fixtures?: unknown;
73
+ [key: string]: unknown;
74
+ }
75
+
76
+ function isStringArray(value: unknown): value is readonly string[] {
77
+ return Array.isArray(value) && value.every((entry) => typeof entry === 'string');
78
+ }
79
+
80
+ function isNonNegativeInteger(value: unknown): value is number {
81
+ return typeof value === 'number' && Number.isInteger(value) && value >= 0;
82
+ }
83
+
84
+ /**
85
+ * `openwop-core` predicate. Every other profile implies `openwop-core`. A host
86
+ * that fails this predicate is not openwop-compatible.
87
+ *
88
+ * @see spec/v1/profiles.md §`openwop-core`
89
+ */
90
+ export function isCore(c: DiscoveryPayload): boolean {
91
+ if (typeof c.protocolVersion !== 'string') return false;
92
+ if (!c.protocolVersion.startsWith('1.')) return false;
93
+ if (!Array.isArray(c.supportedEnvelopes)) return false;
94
+ if (!c.supportedEnvelopes.every((entry) => typeof entry === 'string')) return false;
95
+ if (typeof c.schemaVersions !== 'object' || c.schemaVersions === null) return false;
96
+ if (typeof c.limits !== 'object' || c.limits === null) return false;
97
+ if (!isNonNegativeInteger(c.limits.clarificationRounds)) return false;
98
+ if (!isNonNegativeInteger(c.limits.schemaRounds)) return false;
99
+ if (!isNonNegativeInteger(c.limits.envelopesPerTurn)) return false;
100
+ return true;
101
+ }
102
+
103
+ /**
104
+ * `openwop-interrupts` predicate.
105
+ *
106
+ * @see spec/v1/profiles.md §`openwop-interrupts`
107
+ */
108
+ export function isInterrupts(c: DiscoveryPayload): boolean {
109
+ if (!isCore(c)) return false;
110
+ if (!isStringArray(c.supportedEnvelopes)) return false;
111
+ return c.supportedEnvelopes.includes('clarification.request');
112
+ }
113
+
114
+ /**
115
+ * `openwop-stream-sse` predicate (discovery-payload only — runtime SSE
116
+ * behavior is verified by `stream-modes*.test.ts`).
117
+ *
118
+ * @see spec/v1/profiles.md §`openwop-stream-sse`
119
+ */
120
+ export function isStreamSse(c: DiscoveryPayload): boolean {
121
+ if (!isCore(c)) return false;
122
+ if (c.supportedTransports == null) return true;
123
+ if (!isStringArray(c.supportedTransports)) return false;
124
+ return c.supportedTransports.includes('rest');
125
+ }
126
+
127
+ /**
128
+ * `openwop-stream-poll` predicate (discovery-payload only — runtime polling
129
+ * behavior is verified by `stream-modes.test.ts`).
130
+ *
131
+ * @see spec/v1/profiles.md §`openwop-stream-poll`
132
+ */
133
+ export function isStreamPoll(c: DiscoveryPayload): boolean {
134
+ if (!isCore(c)) return false;
135
+ if (c.supportedTransports == null) return true;
136
+ if (!isStringArray(c.supportedTransports)) return false;
137
+ return c.supportedTransports.includes('rest');
138
+ }
139
+
140
+ /**
141
+ * `openwop-secrets` predicate.
142
+ *
143
+ * @see spec/v1/profiles.md §`openwop-secrets`
144
+ */
145
+ export function isSecrets(c: DiscoveryPayload): boolean {
146
+ if (!isCore(c)) return false;
147
+ if (c.secrets == null || typeof c.secrets !== 'object') return false;
148
+ if (c.secrets.supported !== true) return false;
149
+ if (!isStringArray(c.secrets.scopes)) return false;
150
+ return c.secrets.scopes.includes('user');
151
+ }
152
+
153
+ /**
154
+ * `openwop-provider-policy` predicate.
155
+ *
156
+ * @see spec/v1/profiles.md §`openwop-provider-policy`
157
+ */
158
+ export function isProviderPolicy(c: DiscoveryPayload): boolean {
159
+ if (!isCore(c)) return false;
160
+ if (c.aiProviders == null || typeof c.aiProviders !== 'object') return false;
161
+ const policies = c.aiProviders.policies;
162
+ if (policies == null || typeof policies !== 'object') return false;
163
+ if (!isStringArray(policies.modes)) return false;
164
+ if (policies.modes.length === 0) return false;
165
+ return policies.modes.includes('optional');
166
+ }
167
+
168
+ /**
169
+ * `openwop-node-packs` discovery-only predicate. Runtime registry behavior
170
+ * is verified by `pack-registry*.test.ts`. Discovery alone cannot tell
171
+ * whether GET /v1/packs returns a list-shaped body.
172
+ *
173
+ * @see spec/v1/profiles.md §`openwop-node-packs`
174
+ */
175
+ export function isNodePacksDiscovery(c: DiscoveryPayload): boolean {
176
+ return isCore(c);
177
+ }
178
+
179
+ /**
180
+ * `openwop-replay-fork` predicate. Host advertises `replay.supported: true`
181
+ * with at least one entry in `replay.modes`. Runtime determinism /
182
+ * branch behavior is verified by `replayDeterminism.test.ts` and
183
+ * `replay-fork.test.ts`.
184
+ *
185
+ * @see spec/v1/profiles.md §`openwop-replay-fork`
186
+ * @see spec/v1/replay.md
187
+ */
188
+ export function isReplayFork(c: DiscoveryPayload): boolean {
189
+ if (!isCore(c)) return false;
190
+ if (c.replay == null || typeof c.replay !== 'object') return false;
191
+ if (c.replay.supported !== true) return false;
192
+ if (!isStringArray(c.replay.modes)) return false;
193
+ return c.replay.modes.length > 0;
194
+ }
195
+
196
+ /**
197
+ * `openwop-fixtures` predicate (RFC 0003). Host advertises `fixtures` as a
198
+ * non-empty array of non-empty strings — fixture-workflow IDs the host
199
+ * has seeded. Per-fixture skip decisions are made by the suite via
200
+ * `lib/fixtures.ts`; the profile predicate is the all-up "any-advertised"
201
+ * check.
202
+ *
203
+ * @see spec/v1/profiles.md §`openwop-fixtures`
204
+ * @see spec/v1/capabilities.md §`fixtures`
205
+ * @see RFCS/0003-fixture-gating.md
206
+ */
207
+ export function isFixtures(c: DiscoveryPayload): boolean {
208
+ if (!isCore(c)) return false;
209
+ if (!Array.isArray(c.fixtures)) return false;
210
+ if (c.fixtures.length === 0) return false;
211
+ return c.fixtures.every((id) => typeof id === 'string' && id.length > 0);
212
+ }
213
+
214
+ /**
215
+ * Derive the full profile set from a discovery payload.
216
+ *
217
+ * Returns a set sorted by `PROFILE_NAMES` order so output is stable
218
+ * across calls and across implementations.
219
+ */
220
+ export function deriveProfiles(c: DiscoveryPayload): readonly ProfileName[] {
221
+ const result: ProfileName[] = [];
222
+ if (isCore(c)) result.push('openwop-core');
223
+ if (isInterrupts(c)) result.push('openwop-interrupts');
224
+ if (isStreamSse(c)) result.push('openwop-stream-sse');
225
+ if (isStreamPoll(c)) result.push('openwop-stream-poll');
226
+ if (isSecrets(c)) result.push('openwop-secrets');
227
+ if (isProviderPolicy(c)) result.push('openwop-provider-policy');
228
+ if (isNodePacksDiscovery(c)) result.push('openwop-node-packs');
229
+ if (isReplayFork(c)) result.push('openwop-replay-fork');
230
+ if (isFixtures(c)) result.push('openwop-fixtures');
231
+ return result;
232
+ }
233
+
234
+ /**
235
+ * One-shot membership check.
236
+ */
237
+ export function hasProfile(c: DiscoveryPayload, profile: ProfileName): boolean {
238
+ switch (profile) {
239
+ case 'openwop-core':
240
+ return isCore(c);
241
+ case 'openwop-interrupts':
242
+ return isInterrupts(c);
243
+ case 'openwop-stream-sse':
244
+ return isStreamSse(c);
245
+ case 'openwop-stream-poll':
246
+ return isStreamPoll(c);
247
+ case 'openwop-secrets':
248
+ return isSecrets(c);
249
+ case 'openwop-provider-policy':
250
+ return isProviderPolicy(c);
251
+ case 'openwop-node-packs':
252
+ return isNodePacksDiscovery(c);
253
+ case 'openwop-replay-fork':
254
+ return isReplayFork(c);
255
+ case 'openwop-fixtures':
256
+ return isFixtures(c);
257
+ }
258
+ }
package/src/lib/sse.ts ADDED
@@ -0,0 +1,172 @@
1
+ /**
2
+ * Minimal SSE client for the conformance suite.
3
+ *
4
+ * Why hand-rolled rather than `eventsource` npm package: keeping the
5
+ * conformance suite zero-dependency on third-party SSE libs makes it
6
+ * easier to audit and to port to other ecosystems. Native fetch +
7
+ * ReadableStream parsing is enough for our scope.
8
+ *
9
+ * Scope:
10
+ * - parses the `event:` / `data:` / `id:` lines per RFC 8895
11
+ * - fires a callback for each parsed event
12
+ * - resolves the connection promise when the server closes the stream
13
+ * - bounded by an absolute timeout (no infinite hangs in CI)
14
+ *
15
+ * NOT supported (not needed for the v1 stream-mode scenarios):
16
+ * - automatic reconnect with Last-Event-ID
17
+ * - retry intervals from `retry:` lines
18
+ * - keep-alive comment handling beyond ignoring lines that start with ':'
19
+ */
20
+
21
+ import { loadEnv } from './env.js';
22
+
23
+ export interface SseEvent {
24
+ readonly event: string; // event type; defaults to 'message' if absent
25
+ readonly data: string; // raw data lines joined with \n
26
+ readonly id: string | null; // last `id:` line in the event, if any
27
+ }
28
+
29
+ export interface SseSubscribeOptions {
30
+ /** Absolute timeout — connection is aborted after this regardless of state. Default 30s. */
31
+ readonly timeoutMs?: number;
32
+ /** Optional `Last-Event-ID` request header for resumption. */
33
+ readonly lastEventId?: string;
34
+ /** Optional fetch-level abort. Useful for cancellation in long tests. */
35
+ readonly signal?: AbortSignal;
36
+ }
37
+
38
+ export interface SseSubscribeResult {
39
+ readonly events: readonly SseEvent[];
40
+ readonly status: number;
41
+ readonly closedBy: 'server' | 'timeout' | 'caller';
42
+ }
43
+
44
+ /**
45
+ * Subscribe to an SSE endpoint, collect every event until the server
46
+ * closes the connection (or timeout/caller abort fires), and return the
47
+ * full event list. Use when the test expects a bounded stream.
48
+ */
49
+ export async function subscribe(
50
+ pathWithQuery: string,
51
+ opts: SseSubscribeOptions = {},
52
+ ): Promise<SseSubscribeResult> {
53
+ const env = loadEnv();
54
+ const url = `${env.baseUrl}${pathWithQuery}`;
55
+ const timeoutMs = opts.timeoutMs ?? 30_000;
56
+
57
+ const headers: Record<string, string> = {
58
+ Accept: 'text/event-stream',
59
+ Authorization: `Bearer ${env.apiKey}`,
60
+ 'Cache-Control': 'no-cache',
61
+ };
62
+ if (opts.lastEventId) {
63
+ headers['Last-Event-ID'] = opts.lastEventId;
64
+ }
65
+
66
+ const internalAbort = new AbortController();
67
+ const timeoutHandle = setTimeout(() => internalAbort.abort(), timeoutMs);
68
+ const externalSignal = opts.signal;
69
+ if (externalSignal) {
70
+ if (externalSignal.aborted) internalAbort.abort();
71
+ else externalSignal.addEventListener('abort', () => internalAbort.abort(), { once: true });
72
+ }
73
+
74
+ let res: Response;
75
+ try {
76
+ res = await fetch(url, { method: 'GET', headers, signal: internalAbort.signal });
77
+ } catch (err) {
78
+ clearTimeout(timeoutHandle);
79
+ throw err;
80
+ }
81
+
82
+ if (!res.ok || res.body === null) {
83
+ clearTimeout(timeoutHandle);
84
+ return { events: [], status: res.status, closedBy: 'server' };
85
+ }
86
+
87
+ const events: SseEvent[] = [];
88
+ const reader = res.body.getReader();
89
+ const decoder = new TextDecoder('utf-8');
90
+
91
+ let buffer = '';
92
+ let pendingEvent = 'message';
93
+ let pendingData: string[] = [];
94
+ let pendingId: string | null = null;
95
+ let closedBy: SseSubscribeResult['closedBy'] = 'server';
96
+
97
+ const flushEvent = (): void => {
98
+ if (pendingData.length === 0) {
99
+ pendingEvent = 'message';
100
+ pendingId = null;
101
+ return;
102
+ }
103
+ events.push({
104
+ event: pendingEvent,
105
+ data: pendingData.join('\n'),
106
+ id: pendingId,
107
+ });
108
+ pendingEvent = 'message';
109
+ pendingData = [];
110
+ pendingId = null;
111
+ };
112
+
113
+ try {
114
+ while (true) {
115
+ const { done, value } = await reader.read();
116
+ if (done) break;
117
+ buffer += decoder.decode(value, { stream: true });
118
+ let nlIdx: number;
119
+ while ((nlIdx = buffer.indexOf('\n')) !== -1) {
120
+ const rawLine = buffer.slice(0, nlIdx).replace(/\r$/, '');
121
+ buffer = buffer.slice(nlIdx + 1);
122
+
123
+ if (rawLine === '') {
124
+ flushEvent();
125
+ continue;
126
+ }
127
+ if (rawLine.startsWith(':')) {
128
+ // Comment / keep-alive — ignore.
129
+ continue;
130
+ }
131
+ const colon = rawLine.indexOf(':');
132
+ const field = colon === -1 ? rawLine : rawLine.slice(0, colon);
133
+ const valueRaw = colon === -1 ? '' : rawLine.slice(colon + 1);
134
+ const fieldValue = valueRaw.startsWith(' ') ? valueRaw.slice(1) : valueRaw;
135
+
136
+ switch (field) {
137
+ case 'event':
138
+ pendingEvent = fieldValue;
139
+ break;
140
+ case 'data':
141
+ pendingData.push(fieldValue);
142
+ break;
143
+ case 'id':
144
+ pendingId = fieldValue;
145
+ break;
146
+ default:
147
+ // unknown field — ignore per RFC
148
+ break;
149
+ }
150
+ }
151
+ }
152
+ } catch (err) {
153
+ if ((err as { name?: string }).name === 'AbortError') {
154
+ closedBy = externalSignal?.aborted ? 'caller' : 'timeout';
155
+ } else {
156
+ throw err;
157
+ }
158
+ } finally {
159
+ clearTimeout(timeoutHandle);
160
+ try {
161
+ reader.releaseLock();
162
+ } catch {
163
+ // best-effort
164
+ }
165
+ }
166
+
167
+ // Flush a pending event that wasn't terminated by a blank line (some
168
+ // servers drop the trailing \n\n on close).
169
+ flushEvent();
170
+
171
+ return { events, status: res.status, closedBy };
172
+ }
@@ -0,0 +1,149 @@
1
+ /**
2
+ * Track 6: A2A task-roundtrip + state-projection conformance.
3
+ *
4
+ * Exercises the four documented drift points from
5
+ * `spec/v1/a2a-integration.md` §"State projection":
6
+ *
7
+ * #1. openwop `paused` → A2A `WORKING` (forward, lossy via metadata)
8
+ * #2. openwop `waiting-approval` / `waiting-input` → A2A `INPUT_REQUIRED` (lossy)
9
+ * #3. A2A `AUTH_REQUIRED` → openwop `waiting-input` (no native auth kind)
10
+ * #4. A2A `REJECTED` → openwop `failed` with `reason: 'rejected_by_remote'`
11
+ *
12
+ * Two layers:
13
+ *
14
+ * - **Direct fake-peer probe** (always when peer started): walks the
15
+ * fake peer through SUBMITTED → WORKING → INPUT_REQUIRED → COMPLETED
16
+ * and asserts the AgentCard + task lifecycle wire shape.
17
+ * - **Host-mediated reverse-projection** (gated on fixture
18
+ * advertisement): when the host advertises
19
+ * `conformance-a2a-task-roundtrip`, run it against the fake peer
20
+ * forced into AUTH_REQUIRED / REJECTED to verify the host applies
21
+ * the documented projections.
22
+ *
23
+ * Operator contract: `OPENWOP_A2A_FAKE_PEER=true` on suite side; configure
24
+ * the host to use the printed AgentCard URL.
25
+ *
26
+ * @see spec/v1/a2a-integration.md §"State projection"
27
+ */
28
+
29
+ import { describe, it, expect } from 'vitest';
30
+ import { driver } from '../lib/driver.js';
31
+ import { getA2AFakePeer } from '../lib/a2a-fake-peer.js';
32
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
33
+ import { pollUntilTerminal, pollUntilStatus } from '../lib/polling.js';
34
+
35
+ const ROUNDTRIP_FIXTURE = 'conformance-a2a-task-roundtrip';
36
+
37
+ describe('a2a-task-roundtrip: AgentCard + task lifecycle', () => {
38
+ it('AgentCard exposes protocolVersion + skills; task SUBMITTED → COMPLETED', async () => {
39
+ const peer = getA2AFakePeer();
40
+ if (!peer) {
41
+ // eslint-disable-next-line no-console
42
+ console.warn('[a2a-task-roundtrip] peer not started; set OPENWOP_A2A_FAKE_PEER=true');
43
+ return;
44
+ }
45
+ peer.reset();
46
+
47
+ // AgentCard fetch.
48
+ const card = await fetch(`${peer.endpoint()}/agent.json`);
49
+ expect(card.status).toBe(200);
50
+ const cardJson = (await card.json()) as { protocolVersion?: string; skills?: unknown[] };
51
+ expect(typeof cardJson.protocolVersion).toBe('string');
52
+ expect(Array.isArray(cardJson.skills)).toBe(true);
53
+
54
+ // Create + poll a task.
55
+ const create = await fetch(`${peer.endpoint()}/tasks`, {
56
+ method: 'POST',
57
+ headers: { 'Content-Type': 'application/json' },
58
+ body: JSON.stringify({ skill: 'echo', input: { text: 'hello' } }),
59
+ });
60
+ expect(create.status).toBe(200);
61
+ const { taskId } = (await create.json()) as { taskId: string; state: string };
62
+
63
+ // Advance through states.
64
+ peer.advanceTask(taskId, 'WORKING');
65
+ peer.advanceTask(taskId, 'COMPLETED');
66
+
67
+ const get = await fetch(`${peer.endpoint()}/tasks/${taskId}`);
68
+ const finalTask = (await get.json()) as { state: string };
69
+ expect(finalTask.state).toBe('COMPLETED');
70
+ });
71
+ });
72
+
73
+ describe('a2a-task-roundtrip: drift point #3 — AUTH_REQUIRED projects to waiting-input', () => {
74
+ it('host consuming an A2A peer that returns AUTH_REQUIRED projects to waiting-input with metadata.subkind=auth', async () => {
75
+ const peer = getA2AFakePeer();
76
+ if (!peer) {
77
+ // eslint-disable-next-line no-console
78
+ console.warn('[a2a-task-roundtrip] peer not started; skipping drift-point #3 subtest');
79
+ return;
80
+ }
81
+ if (!isFixtureAdvertised(ROUNDTRIP_FIXTURE)) {
82
+ // eslint-disable-next-line no-console
83
+ console.warn(
84
+ `[a2a-task-roundtrip] fixture ${ROUNDTRIP_FIXTURE} not advertised; skipping drift-point #3 subtest`,
85
+ );
86
+ return;
87
+ }
88
+ peer.reset();
89
+ peer.setNextState('AUTH_REQUIRED');
90
+
91
+ const create = await driver.post('/v1/runs', {
92
+ workflowId: ROUNDTRIP_FIXTURE,
93
+ inputs: { driftScenario: 'auth-required' },
94
+ });
95
+ expect(create.status).toBe(201);
96
+ const runId = (create.json as { runId: string }).runId;
97
+
98
+ // Host should project AUTH_REQUIRED into `waiting-input` per
99
+ // a2a-integration.md §"State projection (reverse)".
100
+ const snapshot = await pollUntilStatus(runId, 'waiting-input', { timeoutMs: 15_000 });
101
+ expect(snapshot.status, driver.describe(
102
+ 'a2a-integration.md §"State projection" drift point #3',
103
+ "A2A AUTH_REQUIRED MUST project to openwop 'waiting-input' (no native auth-required kind in v1)",
104
+ )).toBe('waiting-input');
105
+
106
+ // Cleanup so we don't leak a suspended run.
107
+ await driver.post(`/v1/runs/${encodeURIComponent(runId)}/cancel`, {
108
+ reason: 'conformance-cleanup',
109
+ });
110
+ });
111
+ });
112
+
113
+ describe('a2a-task-roundtrip: drift point #4 — REJECTED projects to failed', () => {
114
+ it('host consuming an A2A peer that returns REJECTED projects to failed with rejected_by_remote', async () => {
115
+ const peer = getA2AFakePeer();
116
+ if (!peer) {
117
+ // eslint-disable-next-line no-console
118
+ console.warn('[a2a-task-roundtrip] peer not started; skipping drift-point #4 subtest');
119
+ return;
120
+ }
121
+ if (!isFixtureAdvertised(ROUNDTRIP_FIXTURE)) {
122
+ return;
123
+ }
124
+ peer.reset();
125
+ peer.setNextState('REJECTED');
126
+
127
+ const create = await driver.post('/v1/runs', {
128
+ workflowId: ROUNDTRIP_FIXTURE,
129
+ inputs: { driftScenario: 'rejected' },
130
+ });
131
+ expect(create.status).toBe(201);
132
+ const runId = (create.json as { runId: string }).runId;
133
+
134
+ const terminal = await pollUntilTerminal(runId, { timeoutMs: 15_000 });
135
+ expect(terminal.status, driver.describe(
136
+ 'a2a-integration.md §"State projection" drift point #4',
137
+ 'A2A REJECTED MUST project to openwop terminal status `failed`',
138
+ )).toBe('failed');
139
+
140
+ // Reason carrier: host MAY surface 'rejected_by_remote' in the run
141
+ // snapshot, the final node payload, or the run-level error envelope.
142
+ // We accept any of those: stringify the snapshot and search.
143
+ const haystack = JSON.stringify(terminal).toLowerCase();
144
+ expect(haystack.includes('rejected'), driver.describe(
145
+ 'a2a-integration.md §"State projection" drift point #4',
146
+ "host SHOULD surface 'rejected_by_remote' (or equivalent) so observers can attribute the failure to the remote A2A peer",
147
+ )).toBe(true);
148
+ });
149
+ });
@@ -0,0 +1,61 @@
1
+ /**
2
+ * Multi-Agent Shift Phase 1 — confidence-escalation contract (CP-1).
3
+ *
4
+ * Verifies: when an `agent.decided` event carries `confidence < threshold`,
5
+ * the host MUST emit `node.suspended { reason: 'low-confidence' }` and
6
+ * transition the run to `'waiting-approval'`. Resume value carries the
7
+ * operator-ratified decision; a follow-up `agent.decided` (or
8
+ * `runOrchestrator.decided`) follows after resume.
9
+ *
10
+ * Capability-gated: skips when host doesn't advertise
11
+ * `capabilities.agents.supported: true`. Fixture-gated: requires
12
+ * `conformance-agent-low-confidence` with mock confidence below the
13
+ * default 0.7 threshold.
14
+ *
15
+ * @see spec/v1/interrupt.md §`low-confidence`
16
+ * @see spec/v1/run-options.md §`escalationThreshold`
17
+ */
18
+
19
+ import { describe, it, expect } from 'vitest';
20
+ import { driver } from '../lib/driver.js';
21
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
22
+ import { isAgentSupported } from '../lib/multi-agent-capabilities.js';
23
+
24
+ const FIXTURE = 'conformance-agent-low-confidence';
25
+ const SKIP = !isAgentSupported() || !isFixtureAdvertised(FIXTURE);
26
+
27
+ describe.skipIf(SKIP)('agentConfidenceEscalation: confidence < threshold → low-confidence suspend', () => {
28
+ it('low-confidence agent.decided suspends with reason=low-confidence and run reaches waiting-approval', async () => {
29
+ const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
30
+ expect(create.status).toBe(201);
31
+ const runId = (create.json as { runId: string }).runId;
32
+
33
+ // Wait for the run to suspend (not terminal).
34
+ let snap: { status: string } | undefined;
35
+ for (let i = 0; i < 40; i++) {
36
+ const res = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
37
+ const body = res.json as { status: string };
38
+ if (body.status === 'waiting-approval' || body.status === 'failed' || body.status === 'completed') {
39
+ snap = body;
40
+ break;
41
+ }
42
+ await new Promise((r) => setTimeout(r, 100));
43
+ }
44
+ expect(snap?.status).toBe('waiting-approval');
45
+
46
+ const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
47
+ const list = (events.json as { events?: Array<{ type: string; payload?: Record<string, unknown> }> })
48
+ .events ?? [];
49
+
50
+ const lowConfSuspend = list.find(
51
+ (e) => e.type === 'node.suspended' && e.payload?.reason === 'low-confidence',
52
+ );
53
+ expect(lowConfSuspend, 'CP-1: low-confidence agent.decided MUST emit node.suspended { reason: low-confidence }').toBeDefined();
54
+
55
+ const payload = lowConfSuspend!.payload as Record<string, unknown>;
56
+ expect(typeof payload.agentId).toBe('string');
57
+ expect(typeof payload.threshold).toBe('number');
58
+ expect(typeof payload.observed).toBe('number');
59
+ expect(payload.observed).toBeLessThan(payload.threshold as number);
60
+ });
61
+ });
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Multi-Agent Shift Phase 3 — CTI-1 cross-tenant isolation invariant.
3
+ *
4
+ * Verifies the CTI-1 normative invariant: a `memoryRef` resolved by a
5
+ * MemoryAdapter MUST return entries scoped to a single tenant. If
6
+ * `memoryRef` is associated with tenant T, no `list` or `get` call
7
+ * against `memoryRef` MAY return entries belonging to tenant T' ≠ T,
8
+ * regardless of the calling principal's permissions on T'.
9
+ *
10
+ * Capability-gated: skips when host doesn't advertise long-term memory.
11
+ * Fixture-gated: requires `conformance-agent-memory-cross-tenant`
12
+ * (intentionally constructs a cross-tenant probe).
13
+ *
14
+ * @see docs/MULTI-AGENT-INTEGRATION-GAPS.md §`Phase 3`
15
+ */
16
+
17
+ import { describe, it, expect } from 'vitest';
18
+ import { driver } from '../lib/driver.js';
19
+ import { pollUntilTerminal } from '../lib/polling.js';
20
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
21
+ import { hasLongTermMemory } from '../lib/multi-agent-capabilities.js';
22
+
23
+ const FIXTURE = 'conformance-agent-memory-cross-tenant';
24
+ const SKIP = !hasLongTermMemory() || !isFixtureAdvertised(FIXTURE);
25
+
26
+ describe.skipIf(SKIP)('agentMemoryCrossTenantIsolation: CTI-1 invariant', () => {
27
+ it('cross-tenant memoryRef returns empty / null — no leak across tenant boundary', async () => {
28
+ const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
29
+ expect(create.status).toBe(201);
30
+ const runId = (create.json as { runId: string }).runId;
31
+
32
+ const terminal = await pollUntilTerminal(runId);
33
+ // Fixture either completes (cross-tenant probe returned empty, as
34
+ // expected) or fails with a security-related error. Both are
35
+ // CTI-1-compliant; what MUST NOT happen is `completed` + leaked
36
+ // entries surfacing in the run's variables.
37
+ expect(['completed', 'failed']).toContain(terminal.status);
38
+
39
+ if (terminal.status === 'completed') {
40
+ const snap = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
41
+ const body = snap.json as { variables?: Record<string, unknown> };
42
+ const crossTenantResult = body.variables?.crossTenantProbe as
43
+ | Array<unknown>
44
+ | null
45
+ | undefined;
46
+ // Cross-tenant list MUST return [] (or null); never another tenant's entries.
47
+ if (Array.isArray(crossTenantResult)) {
48
+ expect(crossTenantResult.length).toBe(0);
49
+ } else {
50
+ expect(crossTenantResult).toBeFalsy();
51
+ }
52
+ }
53
+ });
54
+ });