npm - @openwop/openwop-conformance - Versions diffs - 1.0.0 → 1.1.0 - Mend

@openwop/openwop-conformance 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

package/CHANGELOG.md +17 -0
package/README.md +31 -6
package/api/grpc/openwop.proto +251 -0
package/api/openapi.yaml +109 -3
package/coverage.md +48 -9
package/fixtures/conformance-configurable-schema.json +39 -0
package/fixtures/conformance-subworkflow-parent.json +1 -1
package/fixtures/conformance-wasm-pack-memory-cap-breach.json +23 -0
package/fixtures/openwop-smoke-byok-roundtrip.json +25 -0
package/fixtures.md +21 -0
package/package.json +3 -1
package/schemas/README.md +4 -0
package/schemas/audit-verify-result.schema.json +90 -0
package/schemas/capabilities.schema.json +293 -1
package/schemas/node-pack-manifest.schema.json +4 -4
package/schemas/pack-lockfile.schema.json +92 -0
package/schemas/registry-version-manifest.schema.json +145 -0
package/schemas/run-event-payloads.schema.json +2 -2
package/schemas/security-advisory.schema.json +109 -0
package/src/lib/a2a-fake-peer.ts +143 -56
package/src/lib/behavior-gate.ts +68 -0
package/src/lib/env.ts +10 -0
package/src/lib/grpc-framing.test.ts +96 -0
package/src/lib/grpc-framing.ts +76 -0
package/src/lib/oidc-issuer.test.ts +328 -0
package/src/lib/oidc-issuer.ts +241 -0
package/src/lib/otel-collector-grpc.test.ts +191 -0
package/src/lib/otel-collector.test.ts +303 -0
package/src/lib/otel-collector.ts +318 -14
package/src/lib/otlp-protobuf.test.ts +461 -0
package/src/lib/otlp-protobuf.ts +529 -0
package/src/scenarios/a2a-task-roundtrip.test.ts +147 -28
package/src/scenarios/agentConfidenceEscalation.test.ts +1 -0
package/src/scenarios/agentMemoryCrossTenantIsolation.test.ts +1 -0
package/src/scenarios/agentMemoryRedactionContract.test.ts +1 -0
package/src/scenarios/agentMemoryRoundTrip.test.ts +1 -0
package/src/scenarios/agentMemoryTtlExpiry.test.ts +1 -0
package/src/scenarios/agentMessageReducer.test.ts +1 -0
package/src/scenarios/agentMetadata.test.ts +1 -0
package/src/scenarios/agentPackExport.test.ts +1 -0
package/src/scenarios/agentPackInstall.test.ts +1 -0
package/src/scenarios/agentPackProvenance.test.ts +1 -0
package/src/scenarios/audit-log-integrity.test.ts +3 -6
package/src/scenarios/auth-api-key-rotation.test.ts +182 -0
package/src/scenarios/auth-mtls.test.ts +274 -0
package/src/scenarios/auth-oauth2-client-credentials.test.ts +259 -0
package/src/scenarios/auth-oidc-user-bearer.test.ts +361 -0
package/src/scenarios/bulk-cancel.test.ts +111 -0
package/src/scenarios/configurable-schema.test.ts +48 -0
package/src/scenarios/conversationCapabilityNegotiation.test.ts +1 -0
package/src/scenarios/conversationLifecycle.test.ts +1 -0
package/src/scenarios/conversationReplayDeterminism.test.ts +1 -0
package/src/scenarios/conversationVsLegacySuspend.test.ts +1 -0
package/src/scenarios/debug-bundle-truncation.test.ts +95 -0
package/src/scenarios/discovery.test.ts +183 -0
package/src/scenarios/http-client-ssrf.test.ts +71 -0
package/src/scenarios/idempotency.test.ts +6 -0
package/src/scenarios/idempotencyRetry.test.ts +3 -0
package/src/scenarios/mcp-tool-roundtrip.test.ts +198 -34
package/src/scenarios/mcp-toolcall-redaction.test.ts +66 -0
package/src/scenarios/metric-emission.test.ts +113 -0
package/src/scenarios/orchestratorConservativePath.test.ts +1 -0
package/src/scenarios/orchestratorDispatch.test.ts +1 -0
package/src/scenarios/orchestratorTermination.test.ts +1 -0
package/src/scenarios/otel-emission-grpc.test.ts +98 -0
package/src/scenarios/pause-resume.test.ts +119 -0
package/src/scenarios/production-backpressure.test.ts +342 -0
package/src/scenarios/production-retention-expiry.test.ts +164 -0
package/src/scenarios/registry-public.test.ts +131 -0
package/src/scenarios/replay-llm-cache-key.test.ts +35 -0
package/src/scenarios/replay-retention-expiry.test.ts +178 -0
package/src/scenarios/restart-during-run.test.ts +177 -0
package/src/scenarios/spec-corpus-validity.test.ts +54 -26
package/src/scenarios/staleClaim.test.ts +3 -0
package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +67 -10
package/src/scenarios/wasm-pack-memory-cap.test.ts +64 -9
package/src/scenarios/webhook-negative.test.ts +90 -0
package/src/scenarios/webhook-signed-delivery.test.ts +178 -0
package/src/setup.ts +25 -1
package/vitest.config.ts +5 -1

package/src/scenarios/auth-oidc-user-bearer.test.ts ADDED Viewed

@@ -0,0 +1,361 @@
+/**
+ * RFC 0010 §D: openwop-auth-oidc-user-bearer profile.
+ *
+ * Verifies that hosts claiming the OIDC user-bearer profile satisfy
+ * `spec/v1/auth-profiles.md` §`openwop-auth-oidc-user-bearer`:
+ *
+ *   1. `capabilities.auth.profiles[]` includes
+ *      `openwop-auth-oidc-user-bearer` and `oidc.supported`.
+ *   2. `oidc.issuers` is a non-empty array of URI strings; `audience`
+ *      is a non-empty string when advertised; `supportedScopeMapping`
+ *      (if present) is one of the canonical enum values;
+ *      `introspectionIntervalSeconds` (if present) is a non-negative
+ *      integer.
+ *   3. When `OPENWOP_TEST_OIDC_ISSUER_URL` is supplied, the scenario
+ *      binds the synthetic OIDC issuer harness at that URL and exercises
+ *      six host-side validation cases:
+ *        a. Valid sub/iss/aud/exp → 201 on POST /v1/runs.
+ *        b. Wrong `iss` → 401.
+ *        c. Wrong `aud` → 401.
+ *        d. Expired `exp` → 401.
+ *        e. Unknown `kid` (header references a key not in JWKS) → 401.
+ *        f. Insufficient scope (empty groups against a group-claim
+ *           mapping host) → 403.
+ *
+ * The host MUST be pre-configured to trust `OPENWOP_TEST_OIDC_ISSUER_URL`
+ * as one of its `oidc.issuers`. The scenario binds the harness's JWKS
+ * + discovery endpoints on that URL's port so the host's introspection
+ * fetches succeed against this hermetic in-suite issuer.
+ *
+ * Cases (a) and (f) require the host's user-to-scope mapping policy to
+ * accept the harness's `sub`. The scenario soft-skips them with a
+ * warning when the host returns 403 to the "valid" token (no mapping)
+ * or when the host returns 401 to the "valid" token (host trust not
+ * actually wired up).
+ *
+ * @see RFCS/0010-auth-profile-conformance.md §D
+ * @see spec/v1/auth-profiles.md §`openwop-auth-oidc-user-bearer`
+ * @see conformance/src/lib/oidc-issuer.ts — synthetic harness
+ */
+import { afterAll, beforeAll, describe, it, expect } from 'vitest';
+import { createServer, type Server } from 'node:http';
+import { driver } from '../lib/driver.js';
+import { behaviorGate } from '../lib/behavior-gate.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+import {
+  createSyntheticOIDCIssuer,
+  type SyntheticOIDCIssuer,
+} from '../lib/oidc-issuer.js';
+interface OIDCCaps {
+  supported?: boolean;
+  issuers?: string[];
+  audience?: string;
+  supportedScopeMapping?: string;
+  introspectionIntervalSeconds?: number;
+}
+interface AuthCaps {
+  profiles?: string[];
+  oidc?: OIDCCaps;
+}
+const PROFILE = 'openwop-auth-oidc-user-bearer';
+const FIXTURE = 'conformance-noop';
+async function readAuthCaps(): Promise<AuthCaps | undefined> {
+  const disco = await driver.get('/.well-known/openwop');
+  return (disco.json as { capabilities?: { auth?: AuthCaps } }).capabilities?.auth;
+}
+function isProfileAdvertised(auth: AuthCaps | undefined): boolean {
+  return (
+    Array.isArray(auth?.profiles) &&
+    auth.profiles.includes(PROFILE) &&
+    auth.oidc?.supported === true
+  );
+}
+describe('auth-oidc-user-bearer: capability shape', () => {
+  it('host claiming OIDC profile advertises required fields', async () => {
+    const auth = await readAuthCaps();
+    if (!behaviorGate(PROFILE, isProfileAdvertised(auth))) {
+      return;
+    }
+    expect(auth?.profiles?.includes(PROFILE), driver.describe(
+      'auth-profiles.md §`openwop-auth-oidc-user-bearer`',
+      'capabilities.auth.profiles MUST include openwop-auth-oidc-user-bearer when the profile is claimed',
+    )).toBe(true);
+    expect(auth?.oidc?.supported, driver.describe(
+      'auth-profiles.md §`openwop-auth-oidc-user-bearer`',
+      'capabilities.auth.oidc.supported MUST be true when the profile is claimed',
+    )).toBe(true);
+    expect(
+      Array.isArray(auth?.oidc?.issuers) && (auth?.oidc?.issuers?.length ?? 0) > 0,
+      driver.describe(
+        'capabilities.schema.json auth.oidc.issuers',
+        'issuers MUST be a non-empty array when the profile is claimed',
+      ),
+    ).toBe(true);
+    for (const issuer of auth?.oidc?.issuers ?? []) {
+      expect(
+        typeof issuer === 'string' && issuer.length > 0,
+        'each issuer entry MUST be a non-empty string',
+      ).toBe(true);
+    }
+    if (auth?.oidc?.audience !== undefined) {
+      expect(
+        typeof auth.oidc.audience === 'string' && auth.oidc.audience.length > 0,
+        'audience MUST be a non-empty string when advertised',
+      ).toBe(true);
+    }
+    if (auth?.oidc?.supportedScopeMapping !== undefined) {
+      expect(
+        ['group-claim', 'scope-claim', 'host-acl'].includes(
+          auth.oidc.supportedScopeMapping,
+        ),
+        driver.describe(
+          'capabilities.schema.json auth.oidc.supportedScopeMapping',
+          'supportedScopeMapping MUST be one of group-claim/scope-claim/host-acl',
+        ),
+      ).toBe(true);
+    }
+    if (auth?.oidc?.introspectionIntervalSeconds !== undefined) {
+      expect(
+        Number.isInteger(auth.oidc.introspectionIntervalSeconds) &&
+          auth.oidc.introspectionIntervalSeconds >= 0,
+        'introspectionIntervalSeconds MUST be a non-negative integer when advertised',
+      ).toBe(true);
+    }
+  });
+});
+describe('auth-oidc-user-bearer: harness-driven token validation', () => {
+  let server: Server | undefined;
+  let issuer: SyntheticOIDCIssuer | undefined;
+  let harnessUrl: string | undefined;
+  let harnessAudience: string | undefined;
+  let trustWired = false;
+  beforeAll(async () => {
+    const auth = await readAuthCaps();
+    if (!isProfileAdvertised(auth)) return;
+    harnessUrl = process.env.OPENWOP_TEST_OIDC_ISSUER_URL;
+    if (!harnessUrl) return;
+    harnessAudience = auth?.oidc?.audience ?? 'openwop-conformance';
+    issuer = createSyntheticOIDCIssuer({
+      issuer: harnessUrl,
+      audience: harnessAudience,
+      algorithm: 'RS256',
+    });
+    // Bind the harness's JWKS + discovery endpoints so the host can
+    // fetch them when validating tokens.
+    const parsed = new URL(harnessUrl);
+    const port = parsed.port ? Number.parseInt(parsed.port, 10) : 80;
+    server = createServer((req, res) => {
+      if (!issuer) {
+        res.writeHead(503);
+        res.end();
+        return;
+      }
+      if (req.url === '/.well-known/jwks.json') {
+        res.writeHead(200, { 'Content-Type': 'application/json' });
+        res.end(issuer.jwksJson);
+      } else if (req.url === '/.well-known/openid-configuration') {
+        res.writeHead(200, { 'Content-Type': 'application/json' });
+        res.end(issuer.discoveryJson);
+      } else {
+        res.writeHead(404);
+        res.end();
+      }
+    });
+    await new Promise<void>((resolve, reject) => {
+      server!.once('error', reject);
+      server!.listen(port, '127.0.0.1', () => resolve());
+    });
+    // Probe: does the host actually trust this harness? Mint a known-
+    // good token and see what the host returns.
+    if (isFixtureAdvertised(FIXTURE)) {
+      const probe = issuer.mint({ sub: 'conformance-suite', groups: ['openwop:operators'] });
+      const probeRes = await driver.post(
+        '/v1/runs',
+        { workflowId: FIXTURE },
+        {
+          authenticated: false,
+          headers: { Authorization: `Bearer ${probe.token}` },
+        },
+      );
+      // Trust-wired status: host returns 201 (full success) or 403
+      // (token-valid-but-no-scope mapping). Both mean signature
+      // verification succeeded; the host trusts the issuer.
+      trustWired = probeRes.status === 201 || probeRes.status === 403;
+    }
+  });
+  afterAll(async () => {
+    if (server) {
+      await new Promise<void>((resolve) => server!.close(() => resolve()));
+      server = undefined;
+    }
+  });
+  it('wrong iss → 401', async () => {
+    if (!issuer || !trustWired) {
+      // eslint-disable-next-line no-console
+      console.warn(
+        '[auth-oidc-user-bearer] harness not wired or host trust not configured; skipping wrong-iss case',
+      );
+      return;
+    }
+    const wrongIssIssuer = createSyntheticOIDCIssuer({
+      issuer: 'https://untrusted.example.invalid',
+      audience: harnessAudience ?? '',
+    });
+    const wrongIss = wrongIssIssuer.mint({ sub: 'attacker' });
+    const res = await driver.post(
+      '/v1/runs',
+      { workflowId: FIXTURE },
+      {
+        authenticated: false,
+        headers: { Authorization: `Bearer ${wrongIss.token}` },
+      },
+    );
+    expect(res.status, driver.describe(
+      'auth-profiles.md §`openwop-auth-oidc-user-bearer`',
+      'token with non-trusted iss MUST return 401',
+    )).toBe(401);
+  });
+  it('wrong aud → 401', async () => {
+    if (!issuer || !trustWired) return;
+    const wrongAud = issuer.mint({ aud: 'wrong-audience', sub: 'attacker' });
+    const res = await driver.post(
+      '/v1/runs',
+      { workflowId: FIXTURE },
+      {
+        authenticated: false,
+        headers: { Authorization: `Bearer ${wrongAud.token}` },
+      },
+    );
+    expect(res.status, driver.describe(
+      'auth-profiles.md §`openwop-auth-oidc-user-bearer`',
+      'token with wrong aud MUST return 401',
+    )).toBe(401);
+  });
+  it('expired exp → 401', async () => {
+    if (!issuer || !trustWired) return;
+    const expired = issuer.mint(
+      { sub: 'conformance-suite' },
+      { expiresInSeconds: -3600 },
+    );
+    const res = await driver.post(
+      '/v1/runs',
+      { workflowId: FIXTURE },
+      {
+        authenticated: false,
+        headers: { Authorization: `Bearer ${expired.token}` },
+      },
+    );
+    expect(res.status, driver.describe(
+      'auth-profiles.md §`openwop-auth-oidc-user-bearer`',
+      'expired token (exp < now) MUST return 401',
+    )).toBe(401);
+  });
+  it('unknown kid → 401', async () => {
+    if (!issuer || !trustWired) return;
+    const unknownKid = issuer.mint(
+      { sub: 'conformance-suite' },
+      { keyId: 'openwop-conformance-key-NEVER-PUBLISHED' },
+    );
+    const res = await driver.post(
+      '/v1/runs',
+      { workflowId: FIXTURE },
+      {
+        authenticated: false,
+        headers: { Authorization: `Bearer ${unknownKid.token}` },
+      },
+    );
+    expect(res.status, driver.describe(
+      'auth-profiles.md §`openwop-auth-oidc-user-bearer` + threat-model-auth-profiles.md A3',
+      'token referencing a kid not in JWKS MUST return 401',
+    )).toBe(401);
+  });
+  it('valid token → 201 or 403 (depending on host scope mapping)', async () => {
+    if (!issuer || !trustWired) return;
+    if (!isFixtureAdvertised(FIXTURE)) return;
+    const valid = issuer.mint({
+      sub: 'conformance-suite',
+      groups: ['openwop:operators'],
+    });
+    const res = await driver.post(
+      '/v1/runs',
+      { workflowId: FIXTURE },
+      {
+        authenticated: false,
+        headers: { Authorization: `Bearer ${valid.token}` },
+      },
+    );
+    // 201: host trusts the token AND maps the sub to runs:create scope.
+    // 403: host trusts the token but the sub lacks the required scope.
+    // Both indicate the OIDC validation path succeeded; the scope
+    // decision is a separate host-side policy not normated by RFC 0010.
+    expect(
+      [201, 403].includes(res.status),
+      driver.describe(
+        'auth-profiles.md §`openwop-auth-oidc-user-bearer`',
+        'host-trusted token MUST yield 201 (mapped scope) or 403 (unmapped sub), NOT 401',
+      ),
+    ).toBe(true);
+  });
+  it('scope-insufficient → 403 (when host uses group-claim mapping)', async () => {
+    if (!issuer || !trustWired) return;
+    const auth = await readAuthCaps();
+    if (auth?.oidc?.supportedScopeMapping !== 'group-claim') {
+      // eslint-disable-next-line no-console
+      console.warn(
+        '[auth-oidc-user-bearer] host scope mapping is not group-claim; skipping scope-insufficient case',
+      );
+      return;
+    }
+    const noGroups = issuer.mint({ sub: 'conformance-suite', groups: [] });
+    const res = await driver.post(
+      '/v1/runs',
+      { workflowId: FIXTURE },
+      {
+        authenticated: false,
+        headers: { Authorization: `Bearer ${noGroups.token}` },
+      },
+    );
+    expect(res.status, driver.describe(
+      'auth-profiles.md §`openwop-auth-oidc-user-bearer`',
+      'token-valid-but-empty-groups against group-claim host MUST return 403 (forbidden), NOT 401',
+    )).toBe(403);
+  });
+});

package/src/scenarios/bulk-cancel.test.ts ADDED Viewed

@@ -0,0 +1,111 @@
+/**
+ * Bulk-cancel scenario (closes R1 from rest-endpoints.md §Open spec gaps).
+ *
+ * Verifies `POST /v1/runs:bulk-cancel` per
+ * `spec/v1/rest-endpoints.md` §"POST /v1/runs:bulk-cancel":
+ *
+ *   1. Per-id results array shape (`{runId, ok, status?, error?}`).
+ *   2. Mixed-outcome request: known + unknown + already-terminal runIds
+ *      MUST each surface their own outcome — partial failures do NOT
+ *      block sibling cancellations.
+ *   3. Empty `runIds` array → 400 validation_error.
+ *   4. Oversized array (>100 by spec) → 400 validation_error with
+ *      `details.maxRunIds`.
+ *   5. Idempotency: re-bulk-cancelling already-cancelled runs returns
+ *      `ok: true, status: 'cancelled'` (idempotent), NOT an error.
+ *
+ * Normative reference: spec/v1/rest-endpoints.md §"POST /v1/runs:bulk-cancel"
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+const CANCELLABLE = 'conformance-cancellable';
+const NOOP = 'conformance-noop';
+const SKIP =
+  !isFixtureAdvertised(CANCELLABLE) || !isFixtureAdvertised(NOOP);
+interface BulkResult {
+  runId: string;
+  ok: boolean;
+  status?: 'cancelled' | 'cancelling';
+  error?: { code?: string; message?: string };
+}
+describe.skipIf(SKIP)('bulk-cancel: POST /v1/runs:bulk-cancel', () => {
+  it('mixed-outcome request returns per-id results in order', async () => {
+    // Spin up a long-running cancellable run + observe a known-bad id
+    // alongside it. The host MUST handle each independently.
+    const create = await driver.post('/v1/runs', {
+      workflowId: CANCELLABLE,
+      inputs: { delaySeconds: 30 },
+    });
+    expect(create.status).toBe(201);
+    const inflightRunId = (create.json as { runId: string }).runId;
+    const res = await driver.post('/v1/runs:bulk-cancel', {
+      runIds: [inflightRunId, 'run-does-not-exist-xxxxxxxx'],
+      reason: 'conformance bulk-cancel test',
+    });
+    expect(res.status, driver.describe(
+      'rest-endpoints.md §"POST /v1/runs:bulk-cancel"',
+      'top-level operation MUST return 200 when the request reached the host (per-id outcomes carry partial failure)',
+    )).toBe(200);
+    const body = res.json as { results: BulkResult[] };
+    expect(Array.isArray(body.results)).toBe(true);
+    expect(body.results.length, 'results MUST have one entry per request runId').toBe(2);
+    expect(body.results[0]!.runId, 'results order MUST mirror the request order').toBe(inflightRunId);
+    expect(body.results[0]!.ok).toBe(true);
+    expect(['cancelling', 'cancelled']).toContain(body.results[0]!.status);
+    expect(body.results[1]!.runId).toBe('run-does-not-exist-xxxxxxxx');
+    expect(body.results[1]!.ok, 'unknown runId entry MUST have ok=false').toBe(false);
+    expect(body.results[1]!.error?.code, driver.describe(
+      'rest-endpoints.md §"POST /v1/runs:bulk-cancel"',
+      'unknown runId outcomes carry `error.code === "not_found"`',
+    )).toBe('not_found');
+  });
+  it('empty runIds array returns 400 validation_error', async () => {
+    const res = await driver.post('/v1/runs:bulk-cancel', { runIds: [] });
+    expect(res.status).toBe(400);
+    const body = res.json as { error?: string };
+    expect(body.error).toBe('validation_error');
+  });
+  it('oversized runIds array returns 400 with details.maxRunIds', async () => {
+    // 101 entries — exceeds the recommended 100-entry cap.
+    const ids = Array.from({ length: 101 }, (_, i) => `run-overflow-${i}`);
+    const res = await driver.post('/v1/runs:bulk-cancel', { runIds: ids });
+    expect(res.status).toBe(400);
+    const body = res.json as { error?: string; details?: { maxRunIds?: number } };
+    expect(body.error).toBe('validation_error');
+    expect(typeof body.details?.maxRunIds, driver.describe(
+      'rest-endpoints.md §"POST /v1/runs:bulk-cancel"',
+      'over-cap request MUST carry details.maxRunIds disclosing the configured ceiling',
+    )).toBe('number');
+    expect(body.details!.maxRunIds!).toBeGreaterThanOrEqual(1);
+  });
+  it('re-bulk-cancel after first cancel is idempotent', async () => {
+    const create = await driver.post('/v1/runs', {
+      workflowId: CANCELLABLE,
+      inputs: { delaySeconds: 30 },
+    });
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    const first = await driver.post('/v1/runs:bulk-cancel', { runIds: [runId] });
+    expect(first.status).toBe(200);
+    const second = await driver.post('/v1/runs:bulk-cancel', { runIds: [runId] });
+    expect(second.status).toBe(200);
+    const body = second.json as { results: BulkResult[] };
+    expect(body.results[0]!.ok, driver.describe(
+      'rest-endpoints.md §"POST /v1/runs:bulk-cancel" §Idempotency',
+      're-cancelling an already-cancelling/cancelled run MUST be ok: true (idempotent)',
+    )).toBe(true);
+  });
+});

package/src/scenarios/configurable-schema.test.ts CHANGED Viewed

@@ -73,4 +73,52 @@ describe('configurable-schema: per-workflow schema enforced', () => {
     const body = create.json as { error?: string };
     expect(body.error).toBe('validation_error');
   });
+  it('configurable overlay matching configurableSchema is accepted', async () => {
+    const fixture = await pickFixture();
+    if (!fixture) return; // covered by skip warning above
+    const manifest = await driver.get(`/v1/workflows/${encodeURIComponent(fixture)}`);
+    const schema = (manifest.json as { configurableSchema?: Record<string, unknown> })
+      .configurableSchema;
+    if (!schema) return;
+    // Build a minimal valid overlay derived from the schema's first
+    // `properties.*` entry. This stays generic across fixtures: we pick
+    // the first integer property with a `minimum` (if present) and emit
+    // a value at that minimum. Falls back to {} when no usable property
+    // is declared.
+    const props = (schema.properties ?? {}) as Record<string, Record<string, unknown>>;
+    const overlay: Record<string, unknown> = {};
+    for (const [key, p] of Object.entries(props)) {
+      if (p.type === 'integer') {
+        const min = typeof p.minimum === 'number' ? p.minimum : 1;
+        overlay[key] = min;
+        break;
+      }
+      if (p.type === 'string') {
+        overlay[key] = 'conformance-test';
+        break;
+      }
+    }
+    const create = await driver.post('/v1/runs', {
+      workflowId: fixture,
+      configurable: overlay,
+    });
+    expect(create.status, driver.describe(
+      'run-options.md §"Per-workflow configurableSchema"',
+      'configurable matching the declared schema MUST be accepted (201)',
+    )).toBe(201);
+    const body = create.json as { runId?: string };
+    expect(typeof body.runId).toBe('string');
+    // Clean up so subsequent scenario runs don't accumulate state.
+    if (body.runId) {
+      await driver.post(`/v1/runs/${encodeURIComponent(body.runId)}/cancel`, {
+        reason: 'conformance-cleanup',
+      });
+    }
+  });
 });

package/src/scenarios/conversationCapabilityNegotiation.test.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 /**
  * Multi-Agent Shift Phase 4 — capability-gate refusal contract.
+ * Normative reference: RFCS/0005-conversation.md
  *
  * Verifies that a host which does NOT advertise
  * `capabilities.conversationPrimitive: true` MUST refuse a workflow

package/src/scenarios/conversationLifecycle.test.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 /**
  * Multi-Agent Shift Phase 4 — conversation primitive lifecycle.
+ * Normative reference: RFCS/0005-conversation.md
  *
  * Verifies the open → exchange → close lifecycle:
  *   1. `conversation.opened` emitted on `core.conversationGate.open`.

package/src/scenarios/conversationReplayDeterminism.test.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 /**
  * Multi-Agent Shift Phase 4 — replay-fork of a conversation produces identical log.
+ * Normative reference: RFCS/0005-conversation.md
  *
  * Verifies that running `:fork` on a conversation-bearing run yields
  * a child run whose conversation log (folded via the `message` reducer)

package/src/scenarios/conversationVsLegacySuspend.test.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 /**
  * Multi-Agent Shift Phase 4 — `conversation.exchange` differs from `clarification.requested`.
+ * Normative reference: RFCS/0005-conversation.md
  *
  * Verifies that `core.conversationGate.exchange` produces
  * `conversation.exchanged` events in the run log — distinct from the

package/src/scenarios/debug-bundle-truncation.test.ts ADDED Viewed

@@ -0,0 +1,95 @@
+/**
+ * Debug-bundle truncation contract (debug-bundle.md §"Bundle size limits").
+ *
+ * Verifies that when a bundle would exceed the host's size cap, the
+ * response surfaces `truncated: true` + a non-empty `truncatedReason`
+ * and the `events` array is a strict prefix of what the run produced.
+ *
+ * Driving truncation deterministically requires either a fixture that
+ * generates ≥ 8MB of events (impractical) or a host-implementation
+ * override. The SQLite reference host accepts a `?maxEvents=N` query
+ * parameter (host-implementation choice per the spec — "Hosts MAY
+ * raise the cap via implementation-defined configuration"). When
+ * neither the cap can be lowered nor a high-event fixture is available,
+ * this scenario soft-skips the assertion.
+ *
+ * @see spec/v1/debug-bundle.md §"Bundle size limits"
+ * @see spec/v1/production-profile.md §"Debug bundle behavior" (RFC 0009
+ *      — this scenario satisfies the truncation-metadata predicate when
+ *      the host advertises capabilities.production.debugBundle.truncationMetadata: true)
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { pollUntilTerminal } from '../lib/polling.js';
+import { isFixtureAdvertised } from '../lib/fixtures.js';
+// `conformance-multi-node` produces enough events (run.started, three
+// node.started/completed pairs, run.completed = ~8 events) that
+// `?maxEvents=2` reliably forces truncation.
+const FIXTURE = 'conformance-multi-node';
+describe('debug-bundle-truncation: truncated: true contract', () => {
+  it('host that supports ?maxEvents=N (or otherwise caps) surfaces truncated + truncatedReason', async () => {
+    if (!isFixtureAdvertised(FIXTURE)) {
+      // eslint-disable-next-line no-console
+      console.warn(
+        `[debug-bundle-truncation] ${FIXTURE} not advertised; skipping (host doesn't seed a multi-event fixture)`,
+      );
+      return;
+    }
+    const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
+    expect(create.status).toBe(201);
+    const runId = (create.json as { runId: string }).runId;
+    await pollUntilTerminal(runId, { timeoutMs: 15_000 });
+    // First call: full bundle, so we know how many events the run produced.
+    const full = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/debug-bundle`);
+    expect(full.status).toBe(200);
+    const fullBody = full.json as { events?: unknown[]; truncated?: boolean };
+    const fullEventCount = (fullBody.events ?? []).length;
+    expect(fullEventCount, 'multi-node fixture MUST emit ≥ 3 events').toBeGreaterThanOrEqual(3);
+    expect(fullBody.truncated ?? false, 'baseline bundle MUST NOT be truncated').toBe(false);
+    // Force truncation via the host's optional maxEvents override.
+    const truncated = await driver.get(
+      `/v1/runs/${encodeURIComponent(runId)}/debug-bundle?maxEvents=2`,
+    );
+    // Hosts that don't honor `?maxEvents=` will return the full bundle
+    // (truncated: false). Soft-skip the assertion in that case so the
+    // suite remains forward-compatible with hosts using a different
+    // truncation-forcing mechanism.
+    const body = truncated.json as {
+      truncated?: boolean;
+      truncatedReason?: string;
+      events?: unknown[];
+      metrics?: { eventCount?: number };
+    };
+    if (body.truncated !== true) {
+      // eslint-disable-next-line no-console
+      console.warn(
+        '[debug-bundle-truncation] host does not honor ?maxEvents=; skipping truncated-shape assertions',
+      );
+      return;
+    }
+    expect(typeof body.truncatedReason, driver.describe(
+      'debug-bundle.md §"Bundle size limits"',
+      'truncated: true MUST be accompanied by a non-empty truncatedReason string',
+    )).toBe('string');
+    expect((body.truncatedReason ?? '').length).toBeGreaterThan(0);
+    expect((body.events ?? []).length, driver.describe(
+      'debug-bundle.md §"Bundle size limits"',
+      'truncated events array MUST be a prefix (≤ maxEvents)',
+    )).toBeLessThanOrEqual(2);
+    expect(body.metrics?.eventCount, driver.describe(
+      'debug-bundle.md §"Bundle size limits"',
+      'metrics.eventCount MUST reflect the TOTAL event count, not the truncated length',
+    )).toBe(fullEventCount);
+  });
+});