npm - @openwop/openwop-conformance - Versions diffs - 1.0.0 - Mend

@openwop/openwop-conformance 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (175) hide show

package/LICENSE +201 -0
package/README.md +241 -0
package/api/asyncapi.yaml +481 -0
package/api/openapi.yaml +830 -0
package/api/redocly.yaml +8 -0
package/coverage.md +80 -0
package/dist/cli.js +161 -0
package/fixtures/conformance-a2a-task-roundtrip.json +27 -0
package/fixtures/conformance-agent-identity.json +27 -0
package/fixtures/conformance-agent-low-confidence.json +29 -0
package/fixtures/conformance-agent-memory-cross-tenant.json +28 -0
package/fixtures/conformance-agent-memory-redaction.json +32 -0
package/fixtures/conformance-agent-memory-roundtrip.json +32 -0
package/fixtures/conformance-agent-memory-ttl.json +31 -0
package/fixtures/conformance-agent-pack-export.json +26 -0
package/fixtures/conformance-agent-pack-install.json +26 -0
package/fixtures/conformance-agent-pack-provenance.json +31 -0
package/fixtures/conformance-agent-reasoning.json +29 -0
package/fixtures/conformance-approval.json +27 -0
package/fixtures/conformance-cancellable.json +33 -0
package/fixtures/conformance-cap-breach.json +27 -0
package/fixtures/conformance-capability-missing.json +23 -0
package/fixtures/conformance-channel-ttl.json +60 -0
package/fixtures/conformance-clarification.json +30 -0
package/fixtures/conformance-conversation-capability-negotiation.json +23 -0
package/fixtures/conformance-conversation-lifecycle.json +32 -0
package/fixtures/conformance-conversation-replay.json +33 -0
package/fixtures/conformance-conversation-vs-clarification.json +26 -0
package/fixtures/conformance-delay.json +33 -0
package/fixtures/conformance-dispatch-loop.json +38 -0
package/fixtures/conformance-failure.json +23 -0
package/fixtures/conformance-idempotent.json +30 -0
package/fixtures/conformance-identity.json +32 -0
package/fixtures/conformance-interrupt-auth-required.json +28 -0
package/fixtures/conformance-interrupt-external-event.json +33 -0
package/fixtures/conformance-interrupt-parent-child-cancel-child.json +27 -0
package/fixtures/conformance-interrupt-parent-child-cancel.json +26 -0
package/fixtures/conformance-interrupt-quorum.json +30 -0
package/fixtures/conformance-mcp-tool-roundtrip.json +32 -0
package/fixtures/conformance-message-reducer.json +31 -0
package/fixtures/conformance-multi-node.json +21 -0
package/fixtures/conformance-noop.json +23 -0
package/fixtures/conformance-orchestrator-dispatch.json +47 -0
package/fixtures/conformance-orchestrator-low-confidence.json +41 -0
package/fixtures/conformance-orchestrator-terminate.json +44 -0
package/fixtures/conformance-stream-text.json +26 -0
package/fixtures/conformance-subworkflow-child.json +21 -0
package/fixtures/conformance-subworkflow-parent.json +49 -0
package/fixtures/conformance-version-fold.json +23 -0
package/fixtures/conformance-wasm-pack-roundtrip.json +25 -0
package/fixtures/pack-manifests/pack-private-example.json +26 -0
package/fixtures.md +404 -0
package/package.json +48 -0
package/schemas/README.md +75 -0
package/schemas/agent-manifest.schema.json +107 -0
package/schemas/agent-ref.schema.json +53 -0
package/schemas/capabilities.schema.json +287 -0
package/schemas/channel-written-payload.schema.json +55 -0
package/schemas/conversation-event.schema.json +120 -0
package/schemas/conversation-turn.schema.json +72 -0
package/schemas/debug-bundle.schema.json +196 -0
package/schemas/dispatch-config.schema.json +46 -0
package/schemas/error-envelope.schema.json +25 -0
package/schemas/memory-entry.schema.json +36 -0
package/schemas/memory-list-options.schema.json +21 -0
package/schemas/node-pack-manifest.schema.json +235 -0
package/schemas/orchestrator-decision.schema.json +60 -0
package/schemas/run-event-payloads.schema.json +663 -0
package/schemas/run-event.schema.json +116 -0
package/schemas/run-options.schema.json +81 -0
package/schemas/run-orchestrator-decided-event.schema.json +20 -0
package/schemas/run-snapshot.schema.json +121 -0
package/schemas/suspend-request.schema.json +182 -0
package/schemas/workflow-definition.schema.json +430 -0
package/src/cli.ts +187 -0
package/src/lib/a2a-fake-peer.ts +233 -0
package/src/lib/canaries.ts +186 -0
package/src/lib/driver.ts +96 -0
package/src/lib/env.ts +49 -0
package/src/lib/fixtures.ts +93 -0
package/src/lib/mcp-fake-server.ts +185 -0
package/src/lib/multi-agent-capabilities.ts +155 -0
package/src/lib/multiProcess.ts +141 -0
package/src/lib/otel-collector.ts +312 -0
package/src/lib/paths.ts +198 -0
package/src/lib/polling.ts +81 -0
package/src/lib/profiles.ts +258 -0
package/src/lib/sse.ts +172 -0
package/src/scenarios/a2a-task-roundtrip.test.ts +149 -0
package/src/scenarios/agentConfidenceEscalation.test.ts +61 -0
package/src/scenarios/agentMemoryCrossTenantIsolation.test.ts +54 -0
package/src/scenarios/agentMemoryRedactionContract.test.ts +46 -0
package/src/scenarios/agentMemoryRoundTrip.test.ts +52 -0
package/src/scenarios/agentMemoryTtlExpiry.test.ts +47 -0
package/src/scenarios/agentMessageReducer.test.ts +57 -0
package/src/scenarios/agentMetadata.test.ts +56 -0
package/src/scenarios/agentPackExport.test.ts +45 -0
package/src/scenarios/agentPackInstall.test.ts +50 -0
package/src/scenarios/agentPackProvenance.test.ts +53 -0
package/src/scenarios/agentReasoningEvents.test.ts +72 -0
package/src/scenarios/append-ordering.test.ts +91 -0
package/src/scenarios/approval-payload.test.ts +120 -0
package/src/scenarios/audit-log-integrity.test.ts +106 -0
package/src/scenarios/auth.test.ts +55 -0
package/src/scenarios/byok-roundtrip.test.ts +166 -0
package/src/scenarios/cancellation.test.ts +68 -0
package/src/scenarios/cap-breach.test.ts +149 -0
package/src/scenarios/channel-ttl.test.ts +70 -0
package/src/scenarios/configurable-schema.test.ts +76 -0
package/src/scenarios/conversationCapabilityNegotiation.test.ts +39 -0
package/src/scenarios/conversationLifecycle.test.ts +64 -0
package/src/scenarios/conversationReplayDeterminism.test.ts +52 -0
package/src/scenarios/conversationVsLegacySuspend.test.ts +46 -0
package/src/scenarios/cost-attribution.test.ts +207 -0
package/src/scenarios/debugBundle.test.ts +222 -0
package/src/scenarios/discovery.test.ts +147 -0
package/src/scenarios/dispatchLoop.test.ts +52 -0
package/src/scenarios/errors.test.ts +144 -0
package/src/scenarios/eventOrdering.test.ts +144 -0
package/src/scenarios/failure-path.test.ts +46 -0
package/src/scenarios/fixtures-gating.test.ts +137 -0
package/src/scenarios/fixtures-valid.test.ts +140 -0
package/src/scenarios/highConcurrency.test.ts +263 -0
package/src/scenarios/idempotency.test.ts +83 -0
package/src/scenarios/idempotencyRetry.test.ts +130 -0
package/src/scenarios/identity-passthrough.test.ts +54 -0
package/src/scenarios/interrupt-approval.test.ts +97 -0
package/src/scenarios/interrupt-auth-required-resume.test.ts +88 -0
package/src/scenarios/interrupt-clarification.test.ts +45 -0
package/src/scenarios/interrupt-external-event-correlation.test.ts +113 -0
package/src/scenarios/interrupt-parent-child-cascade.test.ts +102 -0
package/src/scenarios/interrupt-quorum-resolution.test.ts +97 -0
package/src/scenarios/interruptRace.test.ts +176 -0
package/src/scenarios/maliciousManifest.test.ts +154 -0
package/src/scenarios/mcp-discoverability.test.ts +129 -0
package/src/scenarios/mcp-tool-roundtrip.test.ts +149 -0
package/src/scenarios/multi-node-ordering.test.ts +60 -0
package/src/scenarios/multi-region-idempotency.test.ts +52 -0
package/src/scenarios/orchestratorConservativePath.test.ts +63 -0
package/src/scenarios/orchestratorDispatch.test.ts +66 -0
package/src/scenarios/orchestratorTermination.test.ts +54 -0
package/src/scenarios/otel-emission.test.ts +113 -0
package/src/scenarios/otel-trace-propagation.test.ts +90 -0
package/src/scenarios/pack-registry-publish.test.ts +93 -0
package/src/scenarios/pack-registry.test.ts +328 -0
package/src/scenarios/pause-resume.test.ts +109 -0
package/src/scenarios/policies.test.ts +162 -0
package/src/scenarios/profileDerivation.test.ts +335 -0
package/src/scenarios/providerPolicyEnforcement.test.ts +132 -0
package/src/scenarios/rate-limit-envelope.test.ts +97 -0
package/src/scenarios/redaction.test.ts +254 -0
package/src/scenarios/redactionAdversarial.test.ts +162 -0
package/src/scenarios/replay-fork-arbitrary.test.ts +347 -0
package/src/scenarios/replay-fork.test.ts +216 -0
package/src/scenarios/replayDeterminism.test.ts +171 -0
package/src/scenarios/route-coverage.test.ts +129 -0
package/src/scenarios/runs-lifecycle.test.ts +65 -0
package/src/scenarios/runtime-capabilities.test.ts +118 -0
package/src/scenarios/spec-corpus-validity.test.ts +1257 -0
package/src/scenarios/staleClaim.test.ts +223 -0
package/src/scenarios/stream-modes-buffer.test.ts +148 -0
package/src/scenarios/stream-modes-mixed.test.ts +149 -0
package/src/scenarios/stream-modes.test.ts +139 -0
package/src/scenarios/streamReconnect.test.ts +162 -0
package/src/scenarios/subworkflow.test.ts +126 -0
package/src/scenarios/version-negotiation.test.ts +157 -0
package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +47 -0
package/src/scenarios/wasm-pack-invoke-completed.test.ts +69 -0
package/src/scenarios/wasm-pack-invoke-suspended.test.ts +74 -0
package/src/scenarios/wasm-pack-load.test.ts +75 -0
package/src/scenarios/wasm-pack-memory-cap.test.ts +43 -0
package/src/scenarios/wasm-pack-replay-determinism.test.ts +61 -0
package/src/scenarios/webhook-sig-algorithm.test.ts +61 -0
package/src/setup.ts +173 -0
package/vitest.config.ts +17 -0

package/src/lib/profiles.ts ADDED Viewed

@@ -0,0 +1,258 @@
+/**
+ * Compatibility-profile derivation for openwop v1.x.
+ *
+ * Profiles are a named set of capability requirements. A host's profile
+ * set is derived from the `/.well-known/openwop` discovery payload — never
+ * declared as a separate wire field. See `spec/v1/profiles.md` for the
+ * normative predicate definitions.
+ *
+ * This module is the single canonical implementation of profile membership.
+ * Conformance scenarios use it to gate profile-specific assertions; SDKs
+ * MAY re-export the derivation helper to give clients a way to ask
+ * "does this host satisfy `openwop-secrets`?" without re-implementing the
+ * predicates.
+ *
+ * **Derivation is deterministic and pure.** Same payload, same profile
+ * set. No time-of-day, host-specific state, or hidden inputs.
+ */
+/**
+ * Closed v1.x catalog. Adding a profile requires an RFC per
+ * `RFCS/0001-rfc-process.md`.
+ */
+export const PROFILE_NAMES = [
+  'openwop-core',
+  'openwop-interrupts',
+  'openwop-stream-sse',
+  'openwop-stream-poll',
+  'openwop-secrets',
+  'openwop-provider-policy',
+  'openwop-node-packs',
+  'openwop-replay-fork',
+  'openwop-fixtures',
+] as const;
+export type ProfileName = (typeof PROFILE_NAMES)[number];
+/**
+ * Loose typing for the discovery payload — just enough structure to
+ * apply the predicates safely. Schema-level validation is the
+ * conformance suite's `discovery.test.ts` job.
+ */
+export interface DiscoveryPayload {
+  protocolVersion?: unknown;
+  supportedEnvelopes?: unknown;
+  schemaVersions?: unknown;
+  limits?: {
+    clarificationRounds?: unknown;
+    schemaRounds?: unknown;
+    envelopesPerTurn?: unknown;
+    [key: string]: unknown;
+  };
+  supportedTransports?: unknown;
+  secrets?: {
+    supported?: unknown;
+    scopes?: unknown;
+    [key: string]: unknown;
+  };
+  aiProviders?: {
+    supported?: unknown;
+    byok?: unknown;
+    policies?: {
+      modes?: unknown;
+      [key: string]: unknown;
+    };
+    [key: string]: unknown;
+  };
+  replay?: {
+    supported?: unknown;
+    modes?: unknown;
+    [key: string]: unknown;
+  };
+  fixtures?: unknown;
+  [key: string]: unknown;
+}
+function isStringArray(value: unknown): value is readonly string[] {
+  return Array.isArray(value) && value.every((entry) => typeof entry === 'string');
+}
+function isNonNegativeInteger(value: unknown): value is number {
+  return typeof value === 'number' && Number.isInteger(value) && value >= 0;
+}
+/**
+ * `openwop-core` predicate. Every other profile implies `openwop-core`. A host
+ * that fails this predicate is not openwop-compatible.
+ *
+ * @see spec/v1/profiles.md §`openwop-core`
+ */
+export function isCore(c: DiscoveryPayload): boolean {
+  if (typeof c.protocolVersion !== 'string') return false;
+  if (!c.protocolVersion.startsWith('1.')) return false;
+  if (!Array.isArray(c.supportedEnvelopes)) return false;
+  if (!c.supportedEnvelopes.every((entry) => typeof entry === 'string')) return false;
+  if (typeof c.schemaVersions !== 'object' || c.schemaVersions === null) return false;
+  if (typeof c.limits !== 'object' || c.limits === null) return false;
+  if (!isNonNegativeInteger(c.limits.clarificationRounds)) return false;
+  if (!isNonNegativeInteger(c.limits.schemaRounds)) return false;
+  if (!isNonNegativeInteger(c.limits.envelopesPerTurn)) return false;
+  return true;
+}
+/**
+ * `openwop-interrupts` predicate.
+ *
+ * @see spec/v1/profiles.md §`openwop-interrupts`
+ */
+export function isInterrupts(c: DiscoveryPayload): boolean {
+  if (!isCore(c)) return false;
+  if (!isStringArray(c.supportedEnvelopes)) return false;
+  return c.supportedEnvelopes.includes('clarification.request');
+}
+/**
+ * `openwop-stream-sse` predicate (discovery-payload only — runtime SSE
+ * behavior is verified by `stream-modes*.test.ts`).
+ *
+ * @see spec/v1/profiles.md §`openwop-stream-sse`
+ */
+export function isStreamSse(c: DiscoveryPayload): boolean {
+  if (!isCore(c)) return false;
+  if (c.supportedTransports == null) return true;
+  if (!isStringArray(c.supportedTransports)) return false;
+  return c.supportedTransports.includes('rest');
+}
+/**
+ * `openwop-stream-poll` predicate (discovery-payload only — runtime polling
+ * behavior is verified by `stream-modes.test.ts`).
+ *
+ * @see spec/v1/profiles.md §`openwop-stream-poll`
+ */
+export function isStreamPoll(c: DiscoveryPayload): boolean {
+  if (!isCore(c)) return false;
+  if (c.supportedTransports == null) return true;
+  if (!isStringArray(c.supportedTransports)) return false;
+  return c.supportedTransports.includes('rest');
+}
+/**
+ * `openwop-secrets` predicate.
+ *
+ * @see spec/v1/profiles.md §`openwop-secrets`
+ */
+export function isSecrets(c: DiscoveryPayload): boolean {
+  if (!isCore(c)) return false;
+  if (c.secrets == null || typeof c.secrets !== 'object') return false;
+  if (c.secrets.supported !== true) return false;
+  if (!isStringArray(c.secrets.scopes)) return false;
+  return c.secrets.scopes.includes('user');
+}
+/**
+ * `openwop-provider-policy` predicate.
+ *
+ * @see spec/v1/profiles.md §`openwop-provider-policy`
+ */
+export function isProviderPolicy(c: DiscoveryPayload): boolean {
+  if (!isCore(c)) return false;
+  if (c.aiProviders == null || typeof c.aiProviders !== 'object') return false;
+  const policies = c.aiProviders.policies;
+  if (policies == null || typeof policies !== 'object') return false;
+  if (!isStringArray(policies.modes)) return false;
+  if (policies.modes.length === 0) return false;
+  return policies.modes.includes('optional');
+}
+/**
+ * `openwop-node-packs` discovery-only predicate. Runtime registry behavior
+ * is verified by `pack-registry*.test.ts`. Discovery alone cannot tell
+ * whether GET /v1/packs returns a list-shaped body.
+ *
+ * @see spec/v1/profiles.md §`openwop-node-packs`
+ */
+export function isNodePacksDiscovery(c: DiscoveryPayload): boolean {
+  return isCore(c);
+}
+/**
+ * `openwop-replay-fork` predicate. Host advertises `replay.supported: true`
+ * with at least one entry in `replay.modes`. Runtime determinism /
+ * branch behavior is verified by `replayDeterminism.test.ts` and
+ * `replay-fork.test.ts`.
+ *
+ * @see spec/v1/profiles.md §`openwop-replay-fork`
+ * @see spec/v1/replay.md
+ */
+export function isReplayFork(c: DiscoveryPayload): boolean {
+  if (!isCore(c)) return false;
+  if (c.replay == null || typeof c.replay !== 'object') return false;
+  if (c.replay.supported !== true) return false;
+  if (!isStringArray(c.replay.modes)) return false;
+  return c.replay.modes.length > 0;
+}
+/**
+ * `openwop-fixtures` predicate (RFC 0003). Host advertises `fixtures` as a
+ * non-empty array of non-empty strings — fixture-workflow IDs the host
+ * has seeded. Per-fixture skip decisions are made by the suite via
+ * `lib/fixtures.ts`; the profile predicate is the all-up "any-advertised"
+ * check.
+ *
+ * @see spec/v1/profiles.md §`openwop-fixtures`
+ * @see spec/v1/capabilities.md §`fixtures`
+ * @see RFCS/0003-fixture-gating.md
+ */
+export function isFixtures(c: DiscoveryPayload): boolean {
+  if (!isCore(c)) return false;
+  if (!Array.isArray(c.fixtures)) return false;
+  if (c.fixtures.length === 0) return false;
+  return c.fixtures.every((id) => typeof id === 'string' && id.length > 0);
+}
+/**
+ * Derive the full profile set from a discovery payload.
+ *
+ * Returns a set sorted by `PROFILE_NAMES` order so output is stable
+ * across calls and across implementations.
+ */
+export function deriveProfiles(c: DiscoveryPayload): readonly ProfileName[] {
+  const result: ProfileName[] = [];
+  if (isCore(c)) result.push('openwop-core');
+  if (isInterrupts(c)) result.push('openwop-interrupts');
+  if (isStreamSse(c)) result.push('openwop-stream-sse');
+  if (isStreamPoll(c)) result.push('openwop-stream-poll');
+  if (isSecrets(c)) result.push('openwop-secrets');
+  if (isProviderPolicy(c)) result.push('openwop-provider-policy');
+  if (isNodePacksDiscovery(c)) result.push('openwop-node-packs');
+  if (isReplayFork(c)) result.push('openwop-replay-fork');
+  if (isFixtures(c)) result.push('openwop-fixtures');
+  return result;
+}
+/**
+ * One-shot membership check.
+ */
+export function hasProfile(c: DiscoveryPayload, profile: ProfileName): boolean {
+  switch (profile) {
+    case 'openwop-core':
+      return isCore(c);
+    case 'openwop-interrupts':
+      return isInterrupts(c);
+    case 'openwop-stream-sse':
+      return isStreamSse(c);
+    case 'openwop-stream-poll':
+      return isStreamPoll(c);
+    case 'openwop-secrets':
+      return isSecrets(c);
+    case 'openwop-provider-policy':
+      return isProviderPolicy(c);
+    case 'openwop-node-packs':
+      return isNodePacksDiscovery(c);
+    case 'openwop-replay-fork':
+      return isReplayFork(c);
+    case 'openwop-fixtures':
+      return isFixtures(c);
+  }
+}

package/src/lib/sse.ts ADDED Viewed

@@ -0,0 +1,172 @@
+/**
+ * Minimal SSE client for the conformance suite.
+ *
+ * Why hand-rolled rather than `eventsource` npm package: keeping the
+ * conformance suite zero-dependency on third-party SSE libs makes it
+ * easier to audit and to port to other ecosystems. Native fetch +
+ * ReadableStream parsing is enough for our scope.
+ *
+ * Scope:
+ *   - parses the `event:` / `data:` / `id:` lines per RFC 8895
+ *   - fires a callback for each parsed event
+ *   - resolves the connection promise when the server closes the stream
+ *   - bounded by an absolute timeout (no infinite hangs in CI)
+ *
+ * NOT supported (not needed for the v1 stream-mode scenarios):
+ *   - automatic reconnect with Last-Event-ID
+ *   - retry intervals from `retry:` lines
+ *   - keep-alive comment handling beyond ignoring lines that start with ':'
+ */
+import { loadEnv } from './env.js';
+export interface SseEvent {
+  readonly event: string; // event type; defaults to 'message' if absent
+  readonly data: string; // raw data lines joined with \n
+  readonly id: string | null; // last `id:` line in the event, if any
+}
+export interface SseSubscribeOptions {
+  /** Absolute timeout — connection is aborted after this regardless of state. Default 30s. */
+  readonly timeoutMs?: number;
+  /** Optional `Last-Event-ID` request header for resumption. */
+  readonly lastEventId?: string;
+  /** Optional fetch-level abort. Useful for cancellation in long tests. */
+  readonly signal?: AbortSignal;
+}
+export interface SseSubscribeResult {
+  readonly events: readonly SseEvent[];
+  readonly status: number;
+  readonly closedBy: 'server' | 'timeout' | 'caller';
+}
+/**
+ * Subscribe to an SSE endpoint, collect every event until the server
+ * closes the connection (or timeout/caller abort fires), and return the
+ * full event list. Use when the test expects a bounded stream.
+ */
+export async function subscribe(
+  pathWithQuery: string,
+  opts: SseSubscribeOptions = {},
+): Promise<SseSubscribeResult> {
+  const env = loadEnv();
+  const url = `${env.baseUrl}${pathWithQuery}`;
+  const timeoutMs = opts.timeoutMs ?? 30_000;
+  const headers: Record<string, string> = {
+    Accept: 'text/event-stream',
+    Authorization: `Bearer ${env.apiKey}`,
+    'Cache-Control': 'no-cache',
+  };
+  if (opts.lastEventId) {
+    headers['Last-Event-ID'] = opts.lastEventId;
+  }
+  const internalAbort = new AbortController();
+  const timeoutHandle = setTimeout(() => internalAbort.abort(), timeoutMs);
+  const externalSignal = opts.signal;
+  if (externalSignal) {
+    if (externalSignal.aborted) internalAbort.abort();
+    else externalSignal.addEventListener('abort', () => internalAbort.abort(), { once: true });
+  }
+  let res: Response;
+  try {
+    res = await fetch(url, { method: 'GET', headers, signal: internalAbort.signal });
+  } catch (err) {
+    clearTimeout(timeoutHandle);
+    throw err;
+  }
+  if (!res.ok || res.body === null) {
+    clearTimeout(timeoutHandle);
+    return { events: [], status: res.status, closedBy: 'server' };
+  }
+  const events: SseEvent[] = [];
+  const reader = res.body.getReader();
+  const decoder = new TextDecoder('utf-8');
+  let buffer = '';
+  let pendingEvent = 'message';
+  let pendingData: string[] = [];
+  let pendingId: string | null = null;
+  let closedBy: SseSubscribeResult['closedBy'] = 'server';
+  const flushEvent = (): void => {
+    if (pendingData.length === 0) {
+      pendingEvent = 'message';
+      pendingId = null;
+      return;
+    }
+    events.push({
+      event: pendingEvent,
+      data: pendingData.join('\n'),
+      id: pendingId,
+    });
+    pendingEvent = 'message';
+    pendingData = [];
+    pendingId = null;
+  };
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      buffer += decoder.decode(value, { stream: true });
+      let nlIdx: number;
+      while ((nlIdx = buffer.indexOf('\n')) !== -1) {
+        const rawLine = buffer.slice(0, nlIdx).replace(/\r$/, '');
+        buffer = buffer.slice(nlIdx + 1);
+        if (rawLine === '') {
+          flushEvent();
+          continue;
+        }
+        if (rawLine.startsWith(':')) {
+          // Comment / keep-alive — ignore.
+          continue;
+        }
+        const colon = rawLine.indexOf(':');
+        const field = colon === -1 ? rawLine : rawLine.slice(0, colon);
+        const valueRaw = colon === -1 ? '' : rawLine.slice(colon + 1);
+        const fieldValue = valueRaw.startsWith(' ') ? valueRaw.slice(1) : valueRaw;
+        switch (field) {
+          case 'event':
+            pendingEvent = fieldValue;
+            break;
+          case 'data':
+            pendingData.push(fieldValue);
+            break;
+          case 'id':
+            pendingId = fieldValue;
+            break;
+          default:
+            // unknown field — ignore per RFC
+            break;
+        }
+      }
+    }
+  } catch (err) {
+    if ((err as { name?: string }).name === 'AbortError') {
+      closedBy = externalSignal?.aborted ? 'caller' : 'timeout';
+    } else {
+      throw err;
+    }
+  } finally {
+    clearTimeout(timeoutHandle);
+    try {
+      reader.releaseLock();
+    } catch {
+      // best-effort
+    }
+  }
+  // Flush a pending event that wasn't terminated by a blank line (some
+  // servers drop the trailing \n\n on close).
+  flushEvent();
+  return { events, status: res.status, closedBy };
+}

package/src/scenarios/a2a-task-roundtrip.test.ts ADDED Viewed

@@ -0,0 +1,149 @@
+/**
+ * Track 6: A2A task-roundtrip + state-projection conformance.
+ *
+ * Exercises the four documented drift points from
+ * `spec/v1/a2a-integration.md` §"State projection":
+ *
+ *   #1. openwop `paused` → A2A `WORKING` (forward, lossy via metadata)
+ *   #2. openwop `waiting-approval` / `waiting-input` → A2A `INPUT_REQUIRED` (lossy)
+ *   #3. A2A `AUTH_REQUIRED` → openwop `waiting-input` (no native auth kind)
+ *   #4. A2A `REJECTED` → openwop `failed` with `reason: 'rejected_by_remote'`
+ *
+ * Two layers:
+ *
+ *   - **Direct fake-peer probe** (always when peer started): walks the
+ *     fake peer through SUBMITTED → WORKING → INPUT_REQUIRED → COMPLETED
+ *     and asserts the AgentCard + task lifecycle wire shape.
+ *   - **Host-mediated reverse-projection** (gated on fixture
+ *     advertisement): when the host advertises
+ *     `conformance-a2a-task-roundtrip`, run it against the fake peer
+ *     forced into AUTH_REQUIRED / REJECTED to verify the host applies
+ *     the documented projections.
+ *
+ * Operator contract: `OPENWOP_A2A_FAKE_PEER=true` on suite side; configure
+ * the host to use the printed AgentCard URL.
+ *
+ * @see spec/v1/a2a-integration.md §"State projection"
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { getA2AFakePeer } from '../lib/a2a-fake-peer.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+import { pollUntilTerminal, pollUntilStatus } from '../lib/polling.js';
+const ROUNDTRIP_FIXTURE = 'conformance-a2a-task-roundtrip';
+describe('a2a-task-roundtrip: AgentCard + task lifecycle', () => {
+  it('AgentCard exposes protocolVersion + skills; task SUBMITTED → COMPLETED', async () => {
+    const peer = getA2AFakePeer();
+    if (!peer) {
+      // eslint-disable-next-line no-console
+      console.warn('[a2a-task-roundtrip] peer not started; set OPENWOP_A2A_FAKE_PEER=true');
+      return;
+    }
+    peer.reset();
+    // AgentCard fetch.
+    const card = await fetch(`${peer.endpoint()}/agent.json`);
+    expect(card.status).toBe(200);
+    const cardJson = (await card.json()) as { protocolVersion?: string; skills?: unknown[] };
+    expect(typeof cardJson.protocolVersion).toBe('string');
+    expect(Array.isArray(cardJson.skills)).toBe(true);
+    // Create + poll a task.
+    const create = await fetch(`${peer.endpoint()}/tasks`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ skill: 'echo', input: { text: 'hello' } }),
+    });
+    expect(create.status).toBe(200);
+    const { taskId } = (await create.json()) as { taskId: string; state: string };
+    // Advance through states.
+    peer.advanceTask(taskId, 'WORKING');
+    peer.advanceTask(taskId, 'COMPLETED');
+    const get = await fetch(`${peer.endpoint()}/tasks/${taskId}`);
+    const finalTask = (await get.json()) as { state: string };
+    expect(finalTask.state).toBe('COMPLETED');
+  });
+});
+describe('a2a-task-roundtrip: drift point #3 — AUTH_REQUIRED projects to waiting-input', () => {
+  it('host consuming an A2A peer that returns AUTH_REQUIRED projects to waiting-input with metadata.subkind=auth', async () => {
+    const peer = getA2AFakePeer();
+    if (!peer) {
+      // eslint-disable-next-line no-console
+      console.warn('[a2a-task-roundtrip] peer not started; skipping drift-point #3 subtest');
+      return;
+    }
+    if (!isFixtureAdvertised(ROUNDTRIP_FIXTURE)) {
+      // eslint-disable-next-line no-console
+      console.warn(
+        `[a2a-task-roundtrip] fixture ${ROUNDTRIP_FIXTURE} not advertised; skipping drift-point #3 subtest`,
+      );
+      return;
+    }
+    peer.reset();
+    peer.setNextState('AUTH_REQUIRED');
+    const create = await driver.post('/v1/runs', {
+      workflowId: ROUNDTRIP_FIXTURE,
+      inputs: { driftScenario: 'auth-required' },
+    });
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    // Host should project AUTH_REQUIRED into `waiting-input` per
+    // a2a-integration.md §"State projection (reverse)".
+    const snapshot = await pollUntilStatus(runId, 'waiting-input', { timeoutMs: 15_000 });
+    expect(snapshot.status, driver.describe(
+      'a2a-integration.md §"State projection" drift point #3',
+      "A2A AUTH_REQUIRED MUST project to openwop 'waiting-input' (no native auth-required kind in v1)",
+    )).toBe('waiting-input');
+    // Cleanup so we don't leak a suspended run.
+    await driver.post(`/v1/runs/${encodeURIComponent(runId)}/cancel`, {
+      reason: 'conformance-cleanup',
+    });
+  });
+});
+describe('a2a-task-roundtrip: drift point #4 — REJECTED projects to failed', () => {
+  it('host consuming an A2A peer that returns REJECTED projects to failed with rejected_by_remote', async () => {
+    const peer = getA2AFakePeer();
+    if (!peer) {
+      // eslint-disable-next-line no-console
+      console.warn('[a2a-task-roundtrip] peer not started; skipping drift-point #4 subtest');
+      return;
+    }
+    if (!isFixtureAdvertised(ROUNDTRIP_FIXTURE)) {
+      return;
+    }
+    peer.reset();
+    peer.setNextState('REJECTED');
+    const create = await driver.post('/v1/runs', {
+      workflowId: ROUNDTRIP_FIXTURE,
+      inputs: { driftScenario: 'rejected' },
+    });
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    const terminal = await pollUntilTerminal(runId, { timeoutMs: 15_000 });
+    expect(terminal.status, driver.describe(
+      'a2a-integration.md §"State projection" drift point #4',
+      'A2A REJECTED MUST project to openwop terminal status `failed`',
+    )).toBe('failed');
+    // Reason carrier: host MAY surface 'rejected_by_remote' in the run
+    // snapshot, the final node payload, or the run-level error envelope.
+    // We accept any of those: stringify the snapshot and search.
+    const haystack = JSON.stringify(terminal).toLowerCase();
+    expect(haystack.includes('rejected'), driver.describe(
+      'a2a-integration.md §"State projection" drift point #4',
+      "host SHOULD surface 'rejected_by_remote' (or equivalent) so observers can attribute the failure to the remote A2A peer",
+    )).toBe(true);
+  });
+});

package/src/scenarios/agentConfidenceEscalation.test.ts ADDED Viewed

@@ -0,0 +1,61 @@
+/**
+ * Multi-Agent Shift Phase 1 — confidence-escalation contract (CP-1).
+ *
+ * Verifies: when an `agent.decided` event carries `confidence < threshold`,
+ * the host MUST emit `node.suspended { reason: 'low-confidence' }` and
+ * transition the run to `'waiting-approval'`. Resume value carries the
+ * operator-ratified decision; a follow-up `agent.decided` (or
+ * `runOrchestrator.decided`) follows after resume.
+ *
+ * Capability-gated: skips when host doesn't advertise
+ * `capabilities.agents.supported: true`. Fixture-gated: requires
+ * `conformance-agent-low-confidence` with mock confidence below the
+ * default 0.7 threshold.
+ *
+ * @see spec/v1/interrupt.md §`low-confidence`
+ * @see spec/v1/run-options.md §`escalationThreshold`
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+import { isAgentSupported } from '../lib/multi-agent-capabilities.js';
+const FIXTURE = 'conformance-agent-low-confidence';
+const SKIP = !isAgentSupported() || !isFixtureAdvertised(FIXTURE);
+describe.skipIf(SKIP)('agentConfidenceEscalation: confidence < threshold → low-confidence suspend', () => {
+  it('low-confidence agent.decided suspends with reason=low-confidence and run reaches waiting-approval', async () => {
+    const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    // Wait for the run to suspend (not terminal).
+    let snap: { status: string } | undefined;
+    for (let i = 0; i < 40; i++) {
+      const res = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
+      const body = res.json as { status: string };
+      if (body.status === 'waiting-approval' || body.status === 'failed' || body.status === 'completed') {
+        snap = body;
+        break;
+      }
+      await new Promise((r) => setTimeout(r, 100));
+    }
+    expect(snap?.status).toBe('waiting-approval');
+    const events = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
+    const list = (events.json as { events?: Array<{ type: string; payload?: Record<string, unknown> }> })
+      .events ?? [];
+    const lowConfSuspend = list.find(
+      (e) => e.type === 'node.suspended' && e.payload?.reason === 'low-confidence',
+    );
+    expect(lowConfSuspend, 'CP-1: low-confidence agent.decided MUST emit node.suspended { reason: low-confidence }').toBeDefined();
+    const payload = lowConfSuspend!.payload as Record<string, unknown>;
+    expect(typeof payload.agentId).toBe('string');
+    expect(typeof payload.threshold).toBe('number');
+    expect(typeof payload.observed).toBe('number');
+    expect(payload.observed).toBeLessThan(payload.threshold as number);
+  });
+});

package/src/scenarios/agentMemoryCrossTenantIsolation.test.ts ADDED Viewed

@@ -0,0 +1,54 @@
+/**
+ * Multi-Agent Shift Phase 3 — CTI-1 cross-tenant isolation invariant.
+ *
+ * Verifies the CTI-1 normative invariant: a `memoryRef` resolved by a
+ * MemoryAdapter MUST return entries scoped to a single tenant. If
+ * `memoryRef` is associated with tenant T, no `list` or `get` call
+ * against `memoryRef` MAY return entries belonging to tenant T' ≠ T,
+ * regardless of the calling principal's permissions on T'.
+ *
+ * Capability-gated: skips when host doesn't advertise long-term memory.
+ * Fixture-gated: requires `conformance-agent-memory-cross-tenant`
+ * (intentionally constructs a cross-tenant probe).
+ *
+ * @see docs/MULTI-AGENT-INTEGRATION-GAPS.md §`Phase 3`
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { pollUntilTerminal } from '../lib/polling.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+import { hasLongTermMemory } from '../lib/multi-agent-capabilities.js';
+const FIXTURE = 'conformance-agent-memory-cross-tenant';
+const SKIP = !hasLongTermMemory() || !isFixtureAdvertised(FIXTURE);
+describe.skipIf(SKIP)('agentMemoryCrossTenantIsolation: CTI-1 invariant', () => {
+  it('cross-tenant memoryRef returns empty / null — no leak across tenant boundary', async () => {
+    const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    const terminal = await pollUntilTerminal(runId);
+    // Fixture either completes (cross-tenant probe returned empty, as
+    // expected) or fails with a security-related error. Both are
+    // CTI-1-compliant; what MUST NOT happen is `completed` + leaked
+    // entries surfacing in the run's variables.
+    expect(['completed', 'failed']).toContain(terminal.status);
+    if (terminal.status === 'completed') {
+      const snap = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
+      const body = snap.json as { variables?: Record<string, unknown> };
+      const crossTenantResult = body.variables?.crossTenantProbe as
+        | Array<unknown>
+        | null
+        | undefined;
+      // Cross-tenant list MUST return [] (or null); never another tenant's entries.
+      if (Array.isArray(crossTenantResult)) {
+        expect(crossTenantResult.length).toBe(0);
+      } else {
+        expect(crossTenantResult).toBeFalsy();
+      }
+    }
+  });
+});