npm - @openwop/openwop-conformance - Versions diffs - 1.2.0 → 1.4.0 - Mend

@openwop/openwop-conformance 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (144) hide show

package/CHANGELOG.md +156 -1
package/README.md +3 -2
package/api/asyncapi.yaml +8 -0
package/api/openapi.yaml +371 -1
package/api/redocly.yaml +15 -0
package/coverage.md +26 -5
package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
package/fixtures/conformance-envelope-recovery-applied.json +39 -0
package/fixtures/conformance-envelope-refusal.json +38 -0
package/fixtures/conformance-envelope-retry-attempted.json +39 -0
package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
package/fixtures/conformance-envelope-truncated.json +39 -0
package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
package/fixtures/conformance-model-capability-insufficient.json +25 -0
package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
package/fixtures/conformance-multi-agent-handoff.json +49 -0
package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
package/fixtures/conformance-prompt-end-to-end.json +33 -0
package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
package/fixtures/openwop-smoke-cost-emit.json +37 -0
package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
package/fixtures.md +45 -0
package/package.json +1 -1
package/schemas/README.md +5 -0
package/schemas/agent-manifest.schema.json +16 -0
package/schemas/capabilities.schema.json +390 -0
package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
package/schemas/envelopes/clarification.request.schema.json +9 -0
package/schemas/envelopes/error.schema.json +4 -0
package/schemas/envelopes/schema.request.schema.json +4 -0
package/schemas/envelopes/schema.response.schema.json +1 -1
package/schemas/node-pack-manifest.schema.json +28 -0
package/schemas/orchestrator-decision.schema.json +12 -0
package/schemas/prompt-kind.schema.json +8 -0
package/schemas/prompt-pack-manifest.schema.json +80 -0
package/schemas/prompt-ref.schema.json +40 -0
package/schemas/prompt-template.schema.json +149 -0
package/schemas/registry-version-manifest.schema.json +5 -0
package/schemas/run-ancestry-response.schema.json +54 -0
package/schemas/run-event-payloads.schema.json +513 -11
package/schemas/run-event.schema.json +17 -1
package/schemas/run-snapshot.schema.json +3 -2
package/schemas/workflow-definition.schema.json +19 -1
package/src/lib/driver.ts +15 -0
package/src/lib/env.ts +51 -0
package/src/lib/event-log-query.ts +62 -0
package/src/lib/fixtures.ts +38 -1
package/src/lib/host-toggle.ts +54 -0
package/src/lib/llm-cache-key-recipe.ts +68 -0
package/src/lib/multi-agent-capabilities.ts +10 -0
package/src/lib/otel-scrape.ts +59 -0
package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
package/src/scenarios/aiEnvelope.contractRefusal.test.ts +224 -15
package/src/scenarios/aiEnvelope.correlationReplay.test.ts +257 -25
package/src/scenarios/aiEnvelope.redaction.test.ts +210 -29
package/src/scenarios/aiEnvelope.schemaDrift.test.ts +163 -24
package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +262 -12
package/src/scenarios/aiEnvelope.universalKinds.test.ts +107 -16
package/src/scenarios/blob-presign-expiry.test.ts +42 -9
package/src/scenarios/blob-roundtrip.test.ts +0 -0
package/src/scenarios/cache-ttl-expiry.test.ts +34 -8
package/src/scenarios/cost-attribution.test.ts +124 -11
package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
package/src/scenarios/envelope-truncated.test.ts +136 -0
package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
package/src/scenarios/fixtures-gating.test.ts +139 -1
package/src/scenarios/fixtures-valid.test.ts +123 -15
package/src/scenarios/kv-ttl-expiry.test.ts +40 -9
package/src/scenarios/model-capability-insufficient.test.ts +221 -0
package/src/scenarios/model-capability-substituted.test.ts +203 -0
package/src/scenarios/multi-agent-confidence-escalation.test.ts +164 -0
package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
package/src/scenarios/multi-region-idempotency.test.ts +58 -0
package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
package/src/scenarios/pack-registry-publish.test.ts +231 -51
package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
package/src/scenarios/prompt-pack-install.test.ts +187 -0
package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
package/src/scenarios/prompt-template-shape.test.ts +359 -0
package/src/scenarios/provider-usage.test.ts +185 -0
package/src/scenarios/queue-ack-nack-dlq.test.ts +64 -10
package/src/scenarios/queue-publish-consume-roundtrip.test.ts +50 -10
package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
package/src/scenarios/replay-llm-cache-key.test.ts +127 -25
package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
package/src/scenarios/sandbox-capability-gate-respected.test.ts +31 -0
package/src/scenarios/sandbox-memory-cap.test.ts +61 -0
package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +35 -0
package/src/scenarios/sandbox-no-host-env-leak.test.ts +38 -0
package/src/scenarios/sandbox-no-host-fs-escape.test.ts +91 -0
package/src/scenarios/sandbox-no-host-process-escape.test.ts +30 -0
package/src/scenarios/sandbox-no-network-escape.test.ts +49 -0
package/src/scenarios/sandbox-timeout-cap.test.ts +61 -0
package/src/scenarios/search-bm25-roundtrip.test.ts +54 -9
package/src/scenarios/spec-corpus-validity.test.ts +34 -6
package/src/scenarios/sql-transaction-atomicity.test.ts +37 -8
package/src/scenarios/stream-subscribe-from-beginning.test.ts +46 -9
package/src/scenarios/subworkflow-input-mapping.test.ts +146 -10
package/src/scenarios/table-cursor-pagination.test.ts +47 -9
package/src/scenarios/table-schema-enforcement.test.ts +46 -9
package/src/scenarios/vector-knn-roundtrip.test.ts +50 -10
package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0

package/src/scenarios/envelope-retry-attempted.test.ts ADDED Viewed

@@ -0,0 +1,258 @@
+/**
+ * envelope-retry-attempted — RFC 0032 §B.1 runtime behavior.
+ *
+ * Capability-gated on `capabilities.envelopes.reliability.supported: true` AND
+ * `events[]` includes `envelope.retry.attempted` AND the host's test seam
+ * `POST /v1/host/sample/test/simulate-envelope-retry`.
+ *
+ * Asserts:
+ *   1. When the mock LLM emits an invalid envelope on attempt 1 then a valid
+ *      one on attempt 2, exactly one `envelope.retry.attempted` event fires
+ *      before the second attempt.
+ *   2. `attempt: 2`, `reason: "schema-violation"` (or `truncation` /
+ *      `type-drift` / `type-mismatch` / `refusal` / `parse-error` / `unknown`
+ *      / `x-host-<host>-*`).
+ *   3. First attempt does NOT emit `envelope.retry.attempted` (per RFC 0032
+ *      §B.1 normative text — only retries past the first emit).
+ *   4. Eventual success is recorded normally (envelope acceptance + downstream
+ *      RunEventDoc).
+ *
+ * Live behavioral via the reference workflow-engine's
+ * `executor/envelopeReliability.ts` emission path + the
+ * `POST /v1/host/sample/test/mock-ai/program` seam. Fixture- + capability-
+ * gated; soft-skip cleanly when the host doesn't expose the seam or doesn't
+ * advertise `capabilities.envelopes.reliability.events[]` containing
+ * `envelope.retry.attempted`.
+ *
+ * @see RFCS/0032-envelope-reliability-events.md §B.1
+ * @see schemas/run-event-payloads.schema.json §envelopeRetryAttempted
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
+interface DiscoveryDoc {
+  capabilities?: {
+    envelopes?: {
+      reliability?: {
+        supported?: unknown;
+        events?: unknown;
+        maxRetryAttempts?: unknown;
+      };
+    };
+  };
+}
+async function readDiscovery(): Promise<DiscoveryDoc | null> {
+  try {
+    const res = await driver.get('/.well-known/openwop');
+    if (res.status !== 200) return null;
+    return res.json as DiscoveryDoc;
+  } catch {
+    return null;
+  }
+}
+describe.skipIf(HTTP_SKIP)('envelope-retry-attempted: advertisement shape (RFC 0032 §C)', () => {
+  it('capabilities.envelopes.reliability (when present) conforms to RFC 0032 §C', async () => {
+    const d = await readDiscovery();
+    if (d === null) return;
+    const reliability = d.capabilities?.envelopes?.reliability;
+    if (reliability === undefined) return;
+    expect(typeof reliability.supported, 'reliability.supported MUST be boolean').toBe('boolean');
+    if (reliability.events !== undefined) {
+      expect(Array.isArray(reliability.events), 'reliability.events MUST be an array').toBe(true);
+      const RFC_0032_EVENTS = [
+        'envelope.retry.attempted',
+        'envelope.retry.exhausted',
+        'envelope.refusal',
+        'envelope.truncated',
+        'envelope.nlToFormat.engaged',
+        'envelope.recovery.applied',
+      ];
+      for (const e of reliability.events as unknown[]) {
+        expect(RFC_0032_EVENTS, `event "${String(e)}" MUST be one of the six RFC 0032 names`).toContain(String(e));
+      }
+      // When supported: true, MUST include the two MUST-tier events (per
+      // RFC 0032 §C). Hosts that have wired end-to-end emission from
+      // dispatchStructured (per RFC 0032 §B + §C — the reference host's
+      // OPENWOP_ENVELOPE_RELIABILITY_END_TO_END=true path) ALSO populate
+      // envelope.retry.attempted + envelope.truncated. Hosts running the
+      // legacy undifferentiated retry loop advertise `events: []` —
+      // soft-skip this stricter check rather than fail on the legacy
+      // posture (the MUST-tier events still appear via the seam).
+      if (reliability.supported === true && Array.isArray(reliability.events) && (reliability.events as unknown[]).length > 0) {
+        const evts = reliability.events as string[];
+        expect(
+          evts.includes('envelope.retry.exhausted'),
+          'RFC 0032 §C: hosts that advertise `supported: true` with non-empty `events[]` MUST include `envelope.retry.exhausted`',
+        ).toBe(true);
+        expect(
+          evts.includes('envelope.refusal'),
+          'RFC 0032 §C: hosts that advertise `supported: true` with non-empty `events[]` MUST include `envelope.refusal`',
+        ).toBe(true);
+      }
+    }
+    if (reliability.maxRetryAttempts !== undefined) {
+      const n = reliability.maxRetryAttempts as number;
+      expect(typeof n === 'number' && n >= 1 && n <= 16, 'maxRetryAttempts MUST be integer in [1, 16]').toBe(true);
+    }
+  });
+});
+// Live runtime behavior — drives the conformance fixture
+// `conformance-envelope-retry-attempted` against the sample's
+// conformance-only `mock` provider. Test pre-seeds a 2-entry program
+// via `POST /v1/host/sample/test/mock-ai/program`: attempt 1 returns
+// invalid JSON, attempt 2 returns a valid envelope. The host's
+// `dispatchStructured` retry loop emits exactly one
+// `envelope.retry.attempted` event between the two attempts (RFC 0032
+// §B.1). Fixture- + capability-gated: soft-skip when either is absent
+// OR when the host doesn't expose the mock-ai program seam.
+import { pollUntilTerminal } from '../lib/polling.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+const FIXTURE = 'conformance-envelope-retry-attempted';
+const NODE_ID = 'structured-call';
+const RFC_0032_REASONS = new Set([
+  'schema-violation',
+  'truncation',
+  'type-drift',
+  'type-mismatch',
+  'refusal',
+  'parse-error',
+  'unknown',
+]);
+const HOST_REASON_EXT_RE = /^x-host-[a-z0-9][a-z0-9-]*-[a-z0-9][a-z0-9-]*$/;
+interface RunEvent {
+  type: string;
+  payload?: Record<string, unknown>;
+  nodeId?: string;
+  sequence: number;
+}
+async function programMock(program: Array<Record<string, unknown>>): Promise<{ status: number }> {
+  const res = await driver.post('/v1/host/sample/test/mock-ai/program', { nodeId: NODE_ID, program });
+  return { status: res.status };
+}
+async function startRunAndRead(): Promise<RunEvent[] | null> {
+  const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
+  if (create.status !== 201) return null;
+  const runId = (create.json as { runId: string }).runId;
+  await pollUntilTerminal(runId, { timeoutMs: 10_000 });
+  const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
+  if (eventsRes.status !== 200) return null;
+  return ((eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? []) as RunEvent[];
+}
+describe.skipIf(HTTP_SKIP)('envelope-retry-attempted: runtime behavior (RFC 0032 §B.1)', () => {
+  it('when mock LLM emits invalid envelope on attempt 1 then valid on attempt 2, exactly one `envelope.retry.attempted` event fires before the second attempt', async () => {
+    if (!isFixtureAdvertised(FIXTURE)) return;
+    const seed = await programMock([
+      { content: 'not valid json — provoke parse-error retry' },
+      { content: '{"valid":true}' },
+    ]);
+    if (seed.status === 404) return; // host doesn't expose the seam
+    expect(seed.status).toBe(200);
+    const events = await startRunAndRead();
+    if (events === null) return;
+    const retries = events.filter((e) => e.type === 'envelope.retry.attempted');
+    expect(
+      retries.length,
+      driver.describe(
+        'RFCS/0032-envelope-reliability-events.md §B.1',
+        'exactly one envelope.retry.attempted event MUST fire between attempts 1 and 2',
+      ),
+    ).toBe(1);
+  });
+  it('event payload carries `attempt: 2` (1-indexed; first attempt does not emit)', async () => {
+    if (!isFixtureAdvertised(FIXTURE)) return;
+    const seed = await programMock([{ content: 'invalid' }, { content: '{"valid":true}' }]);
+    if (seed.status === 404) return;
+    const events = await startRunAndRead();
+    if (events === null) return;
+    const retry = events.find((e) => e.type === 'envelope.retry.attempted');
+    expect(retry, 'envelope.retry.attempted MUST appear in the event log').toBeDefined();
+    expect(
+      retry!.payload?.attempt,
+      driver.describe(
+        'RFCS/0032-envelope-reliability-events.md §B.1',
+        'attempt field MUST be 2 (1-indexed; first attempt does not emit)',
+      ),
+    ).toBe(2);
+  });
+  it('`reason` is one of the spec-reserved closed-enum values OR matches the `x-host-<host>-<key>` extension pattern', async () => {
+    if (!isFixtureAdvertised(FIXTURE)) return;
+    const seed = await programMock([{ content: 'invalid' }, { content: '{"valid":true}' }]);
+    if (seed.status === 404) return;
+    const events = await startRunAndRead();
+    if (events === null) return;
+    const retry = events.find((e) => e.type === 'envelope.retry.attempted');
+    expect(retry).toBeDefined();
+    const reason = retry!.payload?.reason;
+    expect(typeof reason).toBe('string');
+    expect(
+      RFC_0032_REASONS.has(reason as string) || HOST_REASON_EXT_RE.test(reason as string),
+      driver.describe(
+        'RFCS/0032-envelope-reliability-events.md §B.1',
+        'reason MUST be in the spec-reserved set OR match x-host-<host>-<key>',
+      ),
+    ).toBe(true);
+  });
+  it('eventual success records normally via envelope acceptance + downstream RunEventDoc', async () => {
+    if (!isFixtureAdvertised(FIXTURE)) return;
+    const seed = await programMock([{ content: 'invalid' }, { content: '{"valid":true}' }]);
+    if (seed.status === 404) return;
+    const events = await startRunAndRead();
+    if (events === null) return;
+    const nodeCompleted = events.find((e) => e.type === 'node.completed' && e.nodeId === NODE_ID);
+    const runCompleted = events.find((e) => e.type === 'run.completed');
+    expect(
+      nodeCompleted,
+      driver.describe(
+        'RFCS/0032-envelope-reliability-events.md §B.1',
+        'eventual success MUST produce a node.completed for the dispatching node',
+      ),
+    ).toBeDefined();
+    expect(runCompleted).toBeDefined();
+  });
+  it('`previousError` (when populated) MUST NOT contain prompt or response substring excerpts — limit to validator output', async () => {
+    if (!isFixtureAdvertised(FIXTURE)) return;
+    const PROMPT_CANARY = 'PROMPT-CANARY-RETRY-ATTEMPTED-DO-NOT-LEAK-' + Math.random().toString(36).slice(2, 10);
+    const RESPONSE_CANARY = 'RESPONSE-CANARY-' + PROMPT_CANARY;
+    const seed = await programMock([
+      { content: `not valid json mentioning ${RESPONSE_CANARY}` },
+      { content: '{"valid":true}' },
+    ]);
+    if (seed.status === 404) return;
+    const events = await startRunAndRead();
+    if (events === null) return;
+    const retry = events.find((e) => e.type === 'envelope.retry.attempted');
+    if (!retry) return;
+    const previousError = retry.payload?.previousError;
+    if (previousError === undefined || previousError === null) return; // field is optional
+    const serialized = typeof previousError === 'string' ? previousError : JSON.stringify(previousError);
+    expect(
+      serialized.includes(RESPONSE_CANARY),
+      driver.describe(
+        'RFCS/0032-envelope-reliability-events.md §G',
+        'previousError MUST NOT echo provider response substrings — validator output only',
+      ),
+    ).toBe(false);
+  });
+});

package/src/scenarios/envelope-retry-exhausted.test.ts ADDED Viewed

@@ -0,0 +1,168 @@
+/**
+ * envelope-retry-exhausted — RFC 0032 §B.2 runtime behavior (MUST tier).
+ *
+ * Capability- + fixture-gated. Drives the conformance `mock` provider
+ * via `POST /v1/host/sample/test/mock-ai/program` with a program that
+ * returns invalid JSON on EVERY attempt; the host's `dispatchStructured`
+ * retry loop exhausts its budget and emits `envelope.retry.exhausted`
+ * BEFORE the terminal failure.
+ *
+ * Asserts:
+ *   1. Exactly one `envelope.retry.exhausted` event with `totalAttempts`
+ *      matching the host's advertised `maxRetryAttempts`.
+ *   2. `finalReason` is one of the spec-reserved closed-enum values OR
+ *      matches `x-host-<host>-<key>` (RFC 0032 §B.2 + §B.1 share the
+ *      reason enum).
+ *   3. `RunSnapshot.error.code` is `envelope_invalid` per
+ *      RFC 0033 §C (schema-violation-exhaustion → existing RFC 0021 code).
+ *   4. `node.failed` event appears after `envelope.retry.exhausted`
+ *      (cause precedes effect per RFC 0032 §B.2 "emitted ... about to
+ *      surface a terminal envelope failure").
+ *
+ * @see RFCS/0032-envelope-reliability-events.md §B.2
+ * @see RFCS/0033-envelope-completion-contract.md §C + §F
+ * @see schemas/run-event-payloads.schema.json §envelopeRetryExhausted
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { pollUntilTerminal } from '../lib/polling.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
+const FIXTURE = 'conformance-envelope-retry-exhausted';
+const NODE_ID = 'structured-call';
+const RFC_0032_REASONS = new Set([
+  'schema-violation',
+  'truncation',
+  'type-drift',
+  'type-mismatch',
+  'refusal',
+  'parse-error',
+  'unknown',
+]);
+const HOST_REASON_EXT_RE = /^x-host-[a-z0-9][a-z0-9-]*-[a-z0-9][a-z0-9-]*$/;
+interface RunEvent {
+  type: string;
+  payload?: Record<string, unknown>;
+  nodeId?: string;
+  sequence: number;
+}
+async function programMock(program: Array<Record<string, unknown>>): Promise<{ status: number }> {
+  const res = await driver.post('/v1/host/sample/test/mock-ai/program', { nodeId: NODE_ID, program });
+  return { status: res.status };
+}
+async function startRunAndRead(): Promise<{ events: RunEvent[]; terminal: unknown } | null> {
+  const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
+  if (create.status !== 201) return null;
+  const runId = (create.json as { runId: string }).runId;
+  const terminal = await pollUntilTerminal(runId, { timeoutMs: 10_000 });
+  const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
+  if (eventsRes.status !== 200) return null;
+  const events = ((eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? []) as RunEvent[];
+  return { events, terminal };
+}
+describe.skipIf(HTTP_SKIP)('envelope-retry-exhausted: runtime behavior (RFC 0032 §B.2 MUST)', () => {
+  it('host exhausts retries with all-invalid program → exactly one envelope.retry.exhausted event', async () => {
+    if (!isFixtureAdvertised(FIXTURE)) return;
+    // Seed maxRetryAttempts entries of invalid JSON so dispatchStructured
+    // hits every retry and then exhausts. The mock returns empty-stop
+    // after program exhaustion, but dispatchStructured short-circuits
+    // earlier via its own counter.
+    const seed = await programMock([
+      { content: 'not json a' },
+      { content: 'not json b' },
+      { content: 'not json c' },
+      { content: 'not json d' },
+    ]);
+    if (seed.status === 404) return;
+    expect(seed.status).toBe(200);
+    const result = await startRunAndRead();
+    if (result === null) return;
+    const { events } = result;
+    const exhausted = events.filter((e) => e.type === 'envelope.retry.exhausted');
+    expect(
+      exhausted.length,
+      driver.describe(
+        'RFCS/0032-envelope-reliability-events.md §B.2',
+        'exactly one envelope.retry.exhausted event MUST fire on retry-budget exhaustion',
+      ),
+    ).toBe(1);
+  });
+  it('totalAttempts in payload matches the host advertised maxRetryAttempts', async () => {
+    if (!isFixtureAdvertised(FIXTURE)) return;
+    const seed = await programMock([{ content: 'x' }, { content: 'y' }, { content: 'z' }, { content: 'w' }]);
+    if (seed.status === 404) return;
+    const result = await startRunAndRead();
+    if (result === null) return;
+    const exhausted = result.events.find((e) => e.type === 'envelope.retry.exhausted');
+    expect(exhausted).toBeDefined();
+    const total = exhausted!.payload?.totalAttempts;
+    expect(typeof total === 'number' && (total as number) >= 1).toBe(true);
+  });
+  it('finalReason is in the spec-reserved enum OR matches x-host-<host>-<key>', async () => {
+    if (!isFixtureAdvertised(FIXTURE)) return;
+    const seed = await programMock([{ content: 'x' }, { content: 'y' }, { content: 'z' }, { content: 'w' }]);
+    if (seed.status === 404) return;
+    const result = await startRunAndRead();
+    if (result === null) return;
+    const exhausted = result.events.find((e) => e.type === 'envelope.retry.exhausted');
+    expect(exhausted).toBeDefined();
+    const reason = exhausted!.payload?.finalReason;
+    expect(typeof reason).toBe('string');
+    expect(
+      RFC_0032_REASONS.has(reason as string) || HOST_REASON_EXT_RE.test(reason as string),
+      driver.describe(
+        'RFCS/0032-envelope-reliability-events.md §B.2',
+        'finalReason MUST be in the spec-reserved set OR match x-host-<host>-<key>',
+      ),
+    ).toBe(true);
+  });
+  it('RunSnapshot.error.code is envelope_invalid for schema-violation exhaustion (RFC 0033 §C)', async () => {
+    if (!isFixtureAdvertised(FIXTURE)) return;
+    const seed = await programMock([{ content: 'x' }, { content: 'y' }, { content: 'z' }, { content: 'w' }]);
+    if (seed.status === 404) return;
+    const result = await startRunAndRead();
+    if (result === null) return;
+    const code = (result.terminal as { error?: { code?: string } }).error?.code;
+    expect(
+      code,
+      driver.describe(
+        'RFCS/0033-envelope-completion-contract.md §C',
+        'schema-violation-exhaustion MUST surface as RunSnapshot.error.code = envelope_invalid',
+      ),
+    ).toBe('envelope_invalid');
+  });
+  it('envelope.retry.exhausted is emitted BEFORE node.failed (cause precedes effect)', async () => {
+    if (!isFixtureAdvertised(FIXTURE)) return;
+    const seed = await programMock([{ content: 'x' }, { content: 'y' }, { content: 'z' }, { content: 'w' }]);
+    if (seed.status === 404) return;
+    const result = await startRunAndRead();
+    if (result === null) return;
+    const exhaustedIdx = result.events.findIndex((e) => e.type === 'envelope.retry.exhausted');
+    const failedIdx = result.events.findIndex((e) => e.type === 'node.failed');
+    expect(exhaustedIdx).toBeGreaterThanOrEqual(0);
+    expect(failedIdx).toBeGreaterThanOrEqual(0);
+    expect(
+      exhaustedIdx < failedIdx,
+      driver.describe(
+        'RFCS/0032-envelope-reliability-events.md §B.2',
+        'envelope.retry.exhausted MUST be emitted BEFORE node.failed (the event signals the host is about to surface the terminal failure)',
+      ),
+    ).toBe(true);
+  });
+});

package/src/scenarios/envelope-tier-one-subset-static.test.ts ADDED Viewed

@@ -0,0 +1,229 @@
+/**
+ * envelope-tier-one-subset-static — RFC 0030 §B static schema-walker.
+ *
+ * Capability-gated on `capabilities.envelopes.tierOneSubsetCompliance: "strict"`.
+ *
+ * For every kind in `capabilities.supportedEnvelopes` whose payload schema
+ * is reachable via the host's `/schemas/envelopes/<kind>.schema.json`
+ * canonical location OR via this repo's local `schemas/envelopes/` directory
+ * (for the four universal kinds), statically assert the Tier-1 cross-vendor
+ * intersection rules per `spec/v1/structured-output-subset.md`:
+ *
+ *   - Object root (`type: object`)
+ *   - `additionalProperties: false` on every object subschema
+ *   - Every property listed in `required` (OpenAI strict rule)
+ *   - No `oneOf` anywhere (Gemini silently drops)
+ *   - No `allOf` / `not` / `if/then/else` / `dependencies` / `prefixItems`
+ *   - No string format constraints (`minLength` / `maxLength` / `pattern` /
+ *     `format`)
+ *   - No number bounds (`minimum` / `maximum` / `multipleOf`)
+ *   - No array bounds (`minItems` / `maxItems` / `uniqueItems`)
+ *   - No `propertyNames`
+ *   - Max nesting depth 5
+ *   - Max total property count 100
+ *
+ * Hosts that advertise `warn` or `off` (or omit the field) soft-skip — the
+ * conformance suite reports the schemas it walked without failing.
+ *
+ * @see RFCS/0030-envelope-reasoning-and-tier-one-subset.md §B
+ * @see spec/v1/structured-output-subset.md
+ */
+import { describe, it, expect } from 'vitest';
+import { readFileSync, existsSync } from 'node:fs';
+import { join } from 'node:path';
+import { driver } from '../lib/driver.js';
+import { SCHEMAS_DIR } from '../lib/paths.js';
+const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
+const UNIVERSAL_KINDS = ['clarification.request', 'schema.request', 'schema.response', 'error'] as const;
+interface DiscoveryDoc {
+  capabilities?: {
+    supportedEnvelopes?: unknown;
+    envelopes?: { tierOneSubsetCompliance?: unknown };
+  };
+}
+async function readDiscovery(): Promise<DiscoveryDoc | null> {
+  try {
+    const res = await driver.get('/.well-known/openwop');
+    if (res.status !== 200) return null;
+    return res.json as DiscoveryDoc;
+  } catch {
+    return null;
+  }
+}
+function loadLocalSchema(kind: string): Record<string, unknown> | null {
+  const p = join(SCHEMAS_DIR, 'envelopes', `${kind}.schema.json`);
+  if (!existsSync(p)) return null;
+  return JSON.parse(readFileSync(p, 'utf8')) as Record<string, unknown>;
+}
+interface Violation {
+  path: string;
+  rule: string;
+  detail?: string;
+}
+/**
+ * Walk a schema, collecting violations.
+ *
+ * `mode: "load-bearing"` — only flags rules that fail across MULTIPLE vendors
+ * (Gemini silently drops these, producing looser-than-declared schemas — a
+ * silent correctness bug). These are the bare-minimum constraints that
+ * RFC 0030 §B applies even to schemas that predate the RFC.
+ *
+ * `mode: "strict"` — flags every rule outside the OpenAI-strict ∩ Anthropic-
+ * strict ∩ Gemini intersection. Used only when the host advertises
+ * `tierOneSubsetCompliance: "strict"`.
+ */
+function walkSchema(
+  schema: Record<string, unknown>,
+  path: string,
+  depth: number,
+  propCount: { n: number },
+  violations: Violation[],
+  mode: 'load-bearing' | 'strict',
+): void {
+  if (depth > 5) {
+    violations.push({ path, rule: 'max-nesting-depth-5', detail: `depth=${depth}` });
+    return;
+  }
+  // Load-bearing forbidden keywords — fail across multiple vendors.
+  // `oneOf` is the canonical case (Gemini silently drops); `propertyNames` is
+  // dropped by both OpenAI strict and Gemini; `prefixItems` by both Anthropic
+  // and OpenAI strict; `if/then/else` + `dependencies` + `not` + `allOf` by
+  // every Tier-1 vendor.
+  const LOAD_BEARING_KEYWORDS = ['oneOf', 'allOf', 'not', 'if', 'then', 'else', 'dependencies', 'prefixItems', 'propertyNames'] as const;
+  for (const kw of LOAD_BEARING_KEYWORDS) {
+    if (kw in schema) {
+      violations.push({ path, rule: `forbidden-keyword`, detail: kw });
+    }
+  }
+  // anyOf — recurse into branches (anyOf is permitted, but contents are walked)
+  if (Array.isArray(schema.anyOf)) {
+    for (let i = 0; i < schema.anyOf.length; i++) {
+      walkSchema(schema.anyOf[i] as Record<string, unknown>, `${path}/anyOf/${i}`, depth + 1, propCount, violations, mode);
+    }
+  }
+  // Type-specific constraints
+  const type = schema.type;
+  if (type === 'object' || (Array.isArray(type) && type.includes('object'))) {
+    // `additionalProperties: false` is OpenAI-strict + Anthropic-strict required, but
+    // the universal-kind schemas (which predate RFC 0030) deliberately use
+    // `additionalProperties: true` on open metadata bags (e.g., `clarification.request`
+    // `questions[].context` and `error.details`). Treat this as strict-only since
+    // Gemini accepts both modes and the open-bag pattern is a deliberate v1.1
+    // design choice — vendor-kind authors targeting OpenAI/Anthropic strict
+    // mode for portability can satisfy this rule in their own schemas.
+    if (mode === 'strict' && schema.additionalProperties !== false) {
+      violations.push({ path, rule: 'additionalProperties-must-be-false-on-object-strict-only' });
+    }
+    const props = (schema.properties as Record<string, Record<string, unknown>>) ?? {};
+    const required = (schema.required as string[]) ?? [];
+    for (const propName of Object.keys(props)) {
+      propCount.n++;
+      if (mode === 'strict' && !required.includes(propName)) {
+        // OpenAI strict requires every property in required. Vendor-kind authors
+        // who want OpenAI-strict portability use the `["type","null"]` union
+        // pattern per RFC 0030 §D. Universal-kind schemas deliberately omit
+        // `reasoning` from required per RFC 0030 §A so they don't fail this rule
+        // under load-bearing mode; strict-mode advertisement is opt-in.
+        violations.push({ path: `${path}/properties/${propName}`, rule: 'property-not-in-required-strict-mode-only' });
+      }
+      walkSchema(props[propName], `${path}/properties/${propName}`, depth + 1, propCount, violations, mode);
+    }
+  }
+  // String/number/array constraints — OpenAI-strict-only restrictions. Only
+  // flag in `strict` mode; under load-bearing mode these are permitted
+  // because Gemini 2.5+ and Anthropic accept them.
+  if (mode === 'strict') {
+    if (type === 'string' || (Array.isArray(type) && type.includes('string'))) {
+      for (const kw of ['minLength', 'maxLength', 'pattern', 'format']) {
+        if (kw in schema) {
+          violations.push({ path, rule: 'forbidden-string-constraint-strict-only', detail: kw });
+        }
+      }
+    }
+    if (type === 'number' || type === 'integer' || (Array.isArray(type) && (type.includes('number') || type.includes('integer')))) {
+      for (const kw of ['minimum', 'maximum', 'multipleOf']) {
+        if (kw in schema) {
+          violations.push({ path, rule: 'forbidden-number-constraint-strict-only', detail: kw });
+        }
+      }
+    }
+    if (type === 'array' || (Array.isArray(type) && type.includes('array'))) {
+      for (const kw of ['minItems', 'maxItems', 'uniqueItems']) {
+        if (kw in schema) {
+          violations.push({ path, rule: 'forbidden-array-constraint-strict-only', detail: kw });
+        }
+      }
+    }
+  }
+  if (type === 'array' || (Array.isArray(type) && type.includes('array'))) {
+    if (schema.items && typeof schema.items === 'object' && !Array.isArray(schema.items)) {
+      walkSchema(schema.items as Record<string, unknown>, `${path}/items`, depth + 1, propCount, violations, mode);
+    }
+  }
+  // $defs — walk to surface violations inside referenced shapes
+  const defs = (schema.$defs as Record<string, Record<string, unknown>>) ?? {};
+  for (const defName of Object.keys(defs)) {
+    walkSchema(defs[defName], `${path}/$defs/${defName}`, depth + 1, propCount, violations, mode);
+  }
+}
+describe.skipIf(HTTP_SKIP)('envelope-tier-one-subset-static (RFC 0030 §B)', () => {
+  it('hosts advertising tierOneSubsetCompliance: "strict" have payload schemas that satisfy the Tier-1 intersection', async () => {
+    const d = await readDiscovery();
+    if (d === null) return; // host unreachable; soft-skip
+    const compliance = d.capabilities?.envelopes?.tierOneSubsetCompliance;
+    if (compliance !== 'strict') return; // gated on "strict" only
+    const advertised = (d.capabilities?.supportedEnvelopes ?? []) as string[];
+    if (advertised.length === 0) return;
+    const violationsByKind: Record<string, Violation[]> = {};
+    for (const kind of advertised) {
+      const local = loadLocalSchema(kind);
+      if (local === null) continue; // host-served only; skip for now
+      const violations: Violation[] = [];
+      const propCount = { n: 0 };
+      walkSchema(local, `#`, 0, propCount, violations, 'strict');
+      if (propCount.n > 100) {
+        violations.push({ path: '#', rule: 'max-property-count-100-exceeded', detail: `count=${propCount.n}` });
+      }
+      if (violations.length > 0) {
+        violationsByKind[kind] = violations;
+      }
+    }
+    expect(
+      violationsByKind,
+      `RFC 0030 §B: schemas violating the Tier-1 subset under strict-mode advertisement: ${JSON.stringify(violationsByKind, null, 2)}`,
+    ).toEqual({});
+  });
+});
+describe('envelope-tier-one-subset-static: universal-kind schemas satisfy load-bearing rules (always-on)', () => {
+  // Always-on: only flag rules that fail across MULTIPLE vendors (Gemini silently
+  // drops these, producing looser-than-declared schemas — a silent correctness
+  // bug). The OpenAI-strict-only rules (minLength, maxLength, minItems, etc.)
+  // are checked only under host-advertised "strict" mode since Gemini 2.5+
+  // and Anthropic accept them.
+  for (const kind of UNIVERSAL_KINDS) {
+    it(`${kind}.schema.json satisfies load-bearing Tier-1 rules (no oneOf/allOf/not/prefixItems/propertyNames anywhere)`, () => {
+      const schema = loadLocalSchema(kind);
+      expect(schema, `schemas/envelopes/${kind}.schema.json MUST exist`).not.toBeNull();
+      if (schema === null) return;
+      const violations: Violation[] = [];
+      const propCount = { n: 0 };
+      walkSchema(schema, `#`, 0, propCount, violations, 'load-bearing');
+      expect(
+        violations,
+        `${kind}.schema.json load-bearing Tier-1 violations (these fail across multiple vendors): ${JSON.stringify(violations, null, 2)}`,
+      ).toEqual([]);
+    });
+  }
+});