npm - @openwop/openwop-conformance - Versions diffs - 1.1.1 → 1.3.0 - Mend

@openwop/openwop-conformance 1.1.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

package/CHANGELOG.md +90 -0
package/README.md +2 -2
package/api/redocly.yaml +15 -0
package/coverage.md +27 -14
package/fixtures/conformance-agent-low-confidence.json +7 -4
package/fixtures/conformance-agent-pack-handoff-schema-validation.json +30 -0
package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
package/fixtures/conformance-agent-reasoning.json +23 -4
package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
package/fixtures/conformance-dispatch-cross-worker-handoff-child-a.json +27 -0
package/fixtures/conformance-dispatch-cross-worker-handoff-child-b.json +25 -0
package/fixtures/conformance-dispatch-cross-worker-handoff.json +60 -0
package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
package/fixtures/conformance-dispatch-input-mapping-child.json +25 -0
package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
package/fixtures/conformance-dispatch-input-mapping.json +49 -0
package/fixtures/conformance-dispatch-output-mapping-child.json +27 -0
package/fixtures/conformance-dispatch-output-mapping.json +49 -0
package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
package/fixtures/conformance-subworkflow-input-mapping-child.json +27 -0
package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
package/fixtures/conformance-subworkflow-input-mapping.json +33 -0
package/fixtures.md +18 -2
package/package.json +1 -1
package/schemas/README.md +7 -0
package/schemas/agent-ref.schema.json +1 -1
package/schemas/ai-envelope.schema.json +106 -0
package/schemas/capabilities.schema.json +264 -0
package/schemas/core-conformance-mock-agent-config.schema.json +152 -0
package/schemas/dispatch-config.schema.json +26 -0
package/schemas/envelopes/clarification.request.schema.json +43 -0
package/schemas/envelopes/error.schema.json +26 -0
package/schemas/envelopes/schema.request.schema.json +22 -0
package/schemas/envelopes/schema.response.schema.json +22 -0
package/schemas/node-pack-manifest.schema.json +5 -0
package/schemas/pack-lockfile.schema.json +16 -0
package/schemas/run-event-payloads.schema.json +35 -1
package/schemas/run-event.schema.json +2 -0
package/schemas/workflow-chain-pack-manifest.schema.json +226 -0
package/src/lib/driver.ts +15 -0
package/src/lib/env.ts +51 -0
package/src/lib/event-log-query.ts +62 -0
package/src/lib/fixtures.ts +38 -1
package/src/lib/host-toggle.ts +54 -0
package/src/lib/multi-agent-capabilities.ts +10 -0
package/src/lib/otel-scrape.ts +59 -0
package/src/lib/webhook-receiver.ts +137 -0
package/src/lib/workflow-chain-expansion.ts +213 -0
package/src/scenarios/agentPackCatalog.test.ts +216 -0
package/src/scenarios/agentPackHandoffSchemaValidation.test.ts +146 -0
package/src/scenarios/agentReasoningEvents.test.ts +58 -7
package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
package/src/scenarios/agents-run-tool-allowlist.test.ts +182 -0
package/src/scenarios/ai-envelope-shape.test.ts +362 -0
package/src/scenarios/aiEnvelope.capBreached.test.ts +261 -0
package/src/scenarios/aiEnvelope.contractRefusal.test.ts +268 -0
package/src/scenarios/aiEnvelope.correlationReplay.test.ts +284 -0
package/src/scenarios/aiEnvelope.redaction.test.ts +253 -0
package/src/scenarios/aiEnvelope.schemaDrift.test.ts +226 -0
package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +194 -0
package/src/scenarios/aiEnvelope.universalKinds.test.ts +267 -0
package/src/scenarios/append-ordering.test.ts +44 -0
package/src/scenarios/artifact-auth.test.ts +58 -0
package/src/scenarios/blob-cross-tenant-isolation.test.ts +66 -0
package/src/scenarios/blob-presign-expiry.test.ts +99 -0
package/src/scenarios/blob-roundtrip.test.ts +0 -0
package/src/scenarios/cache-cross-tenant-isolation.test.ts +61 -0
package/src/scenarios/cache-ttl-expiry.test.ts +73 -0
package/src/scenarios/dispatch-cross-worker-handoff.test.ts +129 -0
package/src/scenarios/dispatch-input-mapping.test.ts +163 -0
package/src/scenarios/dispatch-output-mapping.test.ts +155 -0
package/src/scenarios/fixtures-gating.test.ts +139 -1
package/src/scenarios/fs-path-traversal.test.ts +124 -0
package/src/scenarios/idempotency-key-determinism.test.ts +230 -0
package/src/scenarios/interrupt-token-matrix.test.ts +126 -0
package/src/scenarios/kv-atomic-increment.test.ts +74 -0
package/src/scenarios/kv-cas.test.ts +75 -0
package/src/scenarios/kv-cross-tenant-isolation.test.ts +85 -0
package/src/scenarios/kv-ttl-expiry.test.ts +78 -0
package/src/scenarios/mcp-server-elicitation-bridge.test.ts +92 -0
package/src/scenarios/mcp-server-prompt-roundtrip.test.ts +80 -0
package/src/scenarios/mcp-server-resource-roundtrip.test.ts +82 -0
package/src/scenarios/mcp-server-sampling-bridge.test.ts +84 -0
package/src/scenarios/mcp-server-tool-roundtrip.test.ts +107 -0
package/src/scenarios/mcp-server-untrusted-args.test.ts +105 -0
package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
package/src/scenarios/pack-registry-publish.test.ts +231 -51
package/src/scenarios/pause-resume.test.ts +43 -0
package/src/scenarios/provider-usage.test.ts +185 -0
package/src/scenarios/queue-ack-nack-dlq.test.ts +121 -0
package/src/scenarios/queue-cross-tenant-isolation.test.ts +66 -0
package/src/scenarios/queue-publish-consume-roundtrip.test.ts +88 -0
package/src/scenarios/replay-llm-cache-key.test.ts +166 -25
package/src/scenarios/search-bm25-roundtrip.test.ts +92 -0
package/src/scenarios/spec-corpus-validity.test.ts +17 -1
package/src/scenarios/sql-injection-rejection.test.ts +84 -0
package/src/scenarios/sql-transaction-atomicity.test.ts +95 -0
package/src/scenarios/stream-subscribe-from-beginning.test.ts +103 -0
package/src/scenarios/subworkflow-input-mapping.test.ts +170 -0
package/src/scenarios/table-cross-tenant-isolation.test.ts +65 -0
package/src/scenarios/table-cursor-pagination.test.ts +85 -0
package/src/scenarios/table-schema-enforcement.test.ts +84 -0
package/src/scenarios/vector-knn-roundtrip.test.ts +88 -0
package/src/scenarios/webhook-receiver-adversarial.test.ts +210 -0
package/src/scenarios/workflow-chain-expansion.test.ts +366 -0
package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
package/src/scenarios/workflow-chain-pack-manifest-validation.test.ts +232 -0
package/src/scenarios/workflow-chain-pack-signature-verification.test.ts +138 -0
package/src/scenarios/workflow-chain-unresolvable-typeid.test.ts +170 -0

package/src/scenarios/provider-usage.test.ts ADDED Viewed

@@ -0,0 +1,185 @@
+/**
+ * RFC 0026 — `provider.usage` event conformance.
+ *
+ * Verifies the new optional event type added to `RunEventType` per RFC
+ * 0026. The event MUST fire after every LLM provider invocation,
+ * carrying per-call token counts + optional cost estimate. Three
+ * describe blocks:
+ *
+ *   1. Advertisement shape (`capabilities.providerUsage` block).
+ *   2. Schema round-trip (positive + negative fixtures).
+ *   3. Event presence + shape via the test-only emit seam +
+ *      event-log query seam (Thread E.1).
+ *
+ * Each describe block soft-skips when the host doesn't expose the
+ * relevant seam OR the matching capability isn't advertised.
+ *
+ * @see RFCS/0026-provider-usage-event.md
+ * @see schemas/run-event-payloads.schema.json#/$defs/providerUsage
+ * @see SECURITY/invariants.yaml#provider-usage-no-credential-leak
+ */
+import { describe, it, expect } from 'vitest';
+import Ajv2020 from 'ajv/dist/2020.js';
+import { readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { driver } from '../lib/driver.js';
+import { SCHEMAS_DIR } from '../lib/paths.js';
+import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
+interface DiscoveryDoc {
+  capabilities?: {
+    providerUsage?: { supported?: boolean; costEstimates?: boolean; currency?: string };
+  };
+}
+async function readProviderUsageCap(): Promise<{ supported?: boolean; costEstimates?: boolean; currency?: string } | null> {
+  const res = await driver.get('/.well-known/openwop');
+  const body = res.json as DiscoveryDoc | undefined;
+  const cap = body?.capabilities?.providerUsage;
+  return cap && typeof cap === 'object' ? cap : null;
+}
+describe('provider-usage: capability advertisement (RFC 0026 §E)', () => {
+  it('capabilities.providerUsage is either absent or a well-formed object', async () => {
+    const cap = await readProviderUsageCap();
+    if (cap === null) return; // host doesn't advertise — skip
+    expect(
+      typeof cap.supported,
+      driver.describe('RFC 0026 §E', 'capabilities.providerUsage.supported MUST be a boolean when the block is present'),
+    ).toBe('boolean');
+    if (cap.costEstimates !== undefined) {
+      expect(
+        typeof cap.costEstimates,
+        driver.describe('RFC 0026 §E', 'capabilities.providerUsage.costEstimates MUST be a boolean when present'),
+      ).toBe('boolean');
+    }
+    if (cap.currency !== undefined) {
+      expect(
+        /^[A-Z]{3}$/.test(cap.currency),
+        driver.describe('RFC 0026 §E', 'capabilities.providerUsage.currency MUST be a 3-letter uppercase ISO 4217 code when present'),
+      ).toBe(true);
+    }
+  });
+});
+describe('provider-usage: schema round-trip (RFC 0026 §A)', () => {
+  const ajv = new Ajv2020({ strict: false, allErrors: true });
+  // Load full payloads schema so internal $refs resolve.
+  const payloadsDoc = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'run-event-payloads.schema.json'), 'utf8')) as Record<string, unknown>;
+  const providerUsageDef = (payloadsDoc.$defs as Record<string, unknown>).providerUsage as Record<string, unknown>;
+  const validate = ajv.compile(providerUsageDef);
+  it('positive fixture validates', () => {
+    const ok = validate({
+      provider: 'anthropic',
+      model: 'claude-3-5-sonnet-20240620',
+      inputTokens: 145,
+      outputTokens: 312,
+      totalTokens: 457,
+      costEstimateUsd: 0.005115,
+      currency: 'USD',
+      cacheHit: false,
+      nodeId: 'chat-respond',
+    });
+    expect(ok, `positive fixture MUST validate; errors: ${JSON.stringify(validate.errors)}`).toBe(true);
+  });
+  it('negative fixture (missing required field) MUST be rejected', () => {
+    const ok = validate({
+      provider: 'anthropic',
+      model: 'claude-3-5-sonnet-20240620',
+      inputTokens: 100,
+      // outputTokens missing — required per §A
+    });
+    expect(
+      ok,
+      driver.describe('RFC 0026 §A', 'payload missing required `outputTokens` MUST fail schema validation'),
+    ).toBe(false);
+  });
+  it('negative fixture (additionalProperties — credentialRef leak) MUST be rejected', () => {
+    const ok = validate({
+      provider: 'anthropic',
+      model: 'claude-3-5-sonnet-20240620',
+      inputTokens: 100,
+      outputTokens: 50,
+      credentialRef: 'secret:tenant:byok-anthropic:v1', // banned — additionalProperties:false
+    });
+    expect(
+      ok,
+      driver.describe('RFC 0026 §D', 'additionalProperties:false MUST reject credentialRef-shaped fields per provider-usage-no-credential-leak'),
+    ).toBe(false);
+  });
+  it('negative fixture (non-integer token count) MUST be rejected', () => {
+    const ok = validate({
+      provider: 'openai',
+      model: 'gpt-4o',
+      inputTokens: 100.5, // non-integer
+      outputTokens: 50,
+    });
+    expect(ok, 'inputTokens MUST be integer per §A').toBe(false);
+  });
+});
+describe('provider-usage: event presence via emit-seam + event-log query (RFC 0026 §B)', () => {
+  it('emit-seam projects exactly one provider.usage event with required fields populated', async () => {
+    if (!(await isEventLogSeamAvailable())) return; // E.1 seam not exposed — soft-skip
+    const runId = `r-pu-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+    const correlationId = `${runId}:node-1:turn-0:pu-1`;
+    const payload = {
+      provider: 'anthropic',
+      model: 'claude-3-5-sonnet-20240620',
+      inputTokens: 200,
+      outputTokens: 80,
+      totalTokens: 280,
+      nodeId: 'node-1',
+    };
+    const emit = await driver.post('/v1/host/sample/test/emit-provider-usage', { runId, payload, correlationId, nodeId: 'node-1' });
+    if (emit.status === 404) return; // emit seam not exposed
+    expect(emit.status).toBe(200);
+    const events = await queryTestEvents(runId, { type: 'provider.usage' });
+    if (!events.ok) return;
+    expect(
+      events.events.length,
+      driver.describe('RFC 0026 §B', 'emit-seam MUST project exactly one provider.usage event'),
+    ).toBe(1);
+    const e = events.events[0]!;
+    expect(e.payload.provider).toBe('anthropic');
+    expect(e.payload.model).toBe('claude-3-5-sonnet-20240620');
+    expect(e.payload.inputTokens).toBe(200);
+    expect(e.payload.outputTokens).toBe(80);
+    expect(e.causationId).toBe(correlationId);
+    expect(e.nodeId).toBe('node-1');
+    await resetTestSeam();
+  });
+  it('emit-seam refuses payloads containing credentialRef-shaped content (provider-usage-no-credential-leak invariant)', async () => {
+    if (!(await isEventLogSeamAvailable())) return;
+    const runId = `r-pu-leak-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+    // Inject a credentialRef-shaped field via a synthetic payload that
+    // contains 'secret:' in a string field. The seam's defense-in-depth
+    // check MUST refuse — even though the production emitter's schema
+    // validation would also catch this via additionalProperties:false.
+    const res = await driver.post('/v1/host/sample/test/emit-provider-usage', {
+      runId,
+      payload: {
+        provider: 'anthropic',
+        model: 'claude-3-5-sonnet-20240620',
+        inputTokens: 100,
+        outputTokens: 50,
+        nodeId: 'secret:tenant:byok-anthropic:v1', // banned content
+      },
+    });
+    if (res.status === 404) return;
+    expect(
+      res.status,
+      driver.describe('SECURITY/invariants.yaml provider-usage-no-credential-leak', 'payload with credentialRef-shaped content MUST be refused'),
+    ).toBe(400);
+    const body = res.json as { error?: { code?: string } };
+    expect(body.error?.code).toBe('provider_usage_credential_leak');
+    await resetTestSeam();
+  });
+});

package/src/scenarios/queue-ack-nack-dlq.test.ts ADDED Viewed

@@ -0,0 +1,121 @@
+/**
+ * queue-ack-nack-dlq — RFC 0017 advertisement-shape verification + behavioral placeholders.
+ *
+ * Status: ACTIVE (advertisement-shape). RFC 0017 promoted to `Active`
+ * 2026-05-17. The matching `capabilities.queueBus` block has landed in
+ * `schemas/capabilities.schema.json`. This scenario asserts the advertisement
+ * shape against any host that boots the conformance suite, and keeps the
+ * deeper behavioral assertions as `it.todo()` until a reference host wires
+ * a test seam.
+ *
+ * Summary: nack returns for redelivery; deadLetter routes to the configured DLQ.
+ *
+ * @see RFCS/0017-*.md
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+interface DiscoveryDoc {
+  capabilities?: Record<string, unknown>;
+}
+async function readCap(): Promise<Record<string, unknown> | null> {
+  const res = await driver.get('/.well-known/openwop');
+  const body = res.json as DiscoveryDoc | undefined;
+  const top = body?.capabilities as Record<string, unknown> | undefined;
+  const final = (top && typeof top === 'object') ? (top as Record<string, unknown>)["queueBus"] : undefined;
+  return (final && typeof final === 'object' ? (final as Record<string, unknown>) : null);
+}
+describe('queue-ack-nack-dlq: advertisement shape (RFC 0017)', () => {
+  it('capabilities.queueBus is either absent or a well-formed object', async () => {
+    const cap = await readCap();
+    if (cap === null) return; // host doesn't advertise — skip
+    expect(
+      typeof cap.supported,
+      driver.describe(
+        'capabilities.schema.json §queueBus',
+        'capabilities.queueBus.supported MUST be a boolean when present',
+      ),
+    ).toBe('boolean');
+  });
+  it('deadLetterSupported is a boolean when set', async () => {
+    const cap = await readCap();
+    if (!cap || cap.supported !== true) return;
+    const subParts = ["deadLetterSupported"];
+    let sub: unknown = cap;
+    for (const p of subParts) {
+      if (sub && typeof sub === 'object') sub = (sub as Record<string, unknown>)[p];
+      else { sub = undefined; break; }
+    }
+    if (sub === undefined) return; // optional sub-field
+    expect(
+      typeof sub,
+      driver.describe(
+        'RFC 0017 §A',
+        'queueBus.deadLetterSupported MUST be boolean when present',
+      ),
+    ).toBe('boolean');
+  });
+});
+async function call(op: string, args: Record<string, unknown>) {
+  return driver.post('/v1/host/sample/test/surface', { tenantId: 'tenant-a', surface: 'queueBus', op, args });
+}
+describe('queue-ack-nack-dlq: behavioral (RFC 0017 §B point 2 — nack + DLQ)', () => {
+  it('nack(requeue=true) → message is redelivered on next consume with deliveryCount incremented', async () => {
+    const probe = await call('consume', { subject: '__probe__' });
+    if (probe.status === 404) return;
+    const subject = `q-nack-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+    await call('publish', { subject, payload: { v: 'redeliver-me' } });
+    const first = await call('consume', { subject });
+    const firstBody = first.json as { deliveryToken?: string; payload?: unknown; deliveryCount?: number };
+    expect(firstBody.deliveryCount).toBe(1);
+    const nackRes = await call('nack', { deliveryToken: firstBody.deliveryToken, requeue: true });
+    expect((nackRes.json as { requeued?: boolean }).requeued).toBe(true);
+    const second = await call('consume', { subject });
+    const secondBody = second.json as { found?: boolean; payload?: unknown; deliveryCount?: number };
+    expect(
+      secondBody.found,
+      driver.describe('RFC 0017 §B point 2', 'nack(requeue=true) MUST make the message available to next consume'),
+    ).toBe(true);
+    expect(secondBody.payload).toEqual(firstBody.payload);
+    expect(
+      secondBody.deliveryCount,
+      driver.describe('RFC 0017 §B point 2', 'redelivered message MUST have incremented deliveryCount'),
+    ).toBe(2);
+  });
+  it('deadLetter → message appears on the <subject>.dlq subject; original subject is empty', async () => {
+    const probe = await call('consume', { subject: '__probe__' });
+    if (probe.status === 404) return;
+    const subject = `q-dlq-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+    await call('publish', { subject, payload: { v: 'poison' } });
+    const consumed = await call('consume', { subject });
+    const deliveryToken = (consumed.json as { deliveryToken?: string }).deliveryToken;
+    const dlqRes = await call('deadLetter', { deliveryToken, reason: 'unparseable_payload' });
+    expect((dlqRes.json as { deadLettered?: boolean }).deadLettered).toBe(true);
+    const dlqSubject = (dlqRes.json as { dlqSubject?: string }).dlqSubject;
+    expect(dlqSubject).toBe(`${subject}.dlq`);
+    // Original subject MUST be empty now
+    const originalEmpty = await call('consume', { subject });
+    expect((originalEmpty.json as { found?: boolean }).found).toBe(false);
+    // DLQ MUST carry the message + the deadLetterReason
+    const dlqMsg = await call('consume', { subject: `${subject}.dlq` });
+    const dlqBody = dlqMsg.json as { found?: boolean; payload?: { original?: unknown; deadLetterReason?: string } };
+    expect(
+      dlqBody.found,
+      driver.describe('RFC 0017 §B point 2', 'deadLetter MUST route the message to the <subject>.dlq subject'),
+    ).toBe(true);
+    expect(dlqBody.payload?.deadLetterReason).toBe('unparseable_payload');
+    expect(dlqBody.payload?.original).toEqual({ v: 'poison' });
+  });
+});

package/src/scenarios/queue-cross-tenant-isolation.test.ts ADDED Viewed

@@ -0,0 +1,66 @@
+/**
+ * queue-cross-tenant-isolation — RFC 0017 §C + SECURITY/invariants.yaml
+ * `queue-cross-tenant-isolation`.
+ *
+ * Status: ACTIVE (advertisement + behavioral). Asserts that messages
+ * published under tenant A on topic T MUST NOT be consumed under tenant B
+ * on the same topic.
+ *
+ * @see RFCS/0017-host-queue-bus-capability.md
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+interface DiscoveryDoc {
+  capabilities?: Record<string, unknown>;
+}
+async function readCap(): Promise<Record<string, unknown> | null> {
+  const res = await driver.get('/.well-known/openwop');
+  const body = res.json as DiscoveryDoc | undefined;
+  const top = body?.capabilities as Record<string, unknown> | undefined;
+  const final = (top && typeof top === 'object') ? (top as Record<string, unknown>)["queueBus"] : undefined;
+  return (final && typeof final === 'object' ? (final as Record<string, unknown>) : null);
+}
+async function call(tenantId: string, op: string, args: Record<string, unknown>) {
+  return driver.post('/v1/host/sample/test/surface', { tenantId, surface: 'queueBus', op, args });
+}
+describe('queue-cross-tenant-isolation: advertisement shape (RFC 0017)', () => {
+  it('capabilities.queueBus is either absent or a well-formed object', async () => {
+    const cap = await readCap();
+    if (cap === null) return;
+    expect(
+      typeof cap.supported,
+      driver.describe(
+        'capabilities.schema.json §queueBus',
+        'capabilities.queueBus.supported MUST be a boolean when present',
+      ),
+    ).toBe('boolean');
+  });
+});
+describe('queue-cross-tenant-isolation: behavioral (RFC 0017 §C)', () => {
+  it('publish under tenant A → consume under tenant B returns not-found', async () => {
+    const cap = await readCap();
+    if (!cap || cap.supported !== true) return;
+    const topic = `xtenant.${Date.now()}.${Math.random().toString(36).slice(2, 6)}`;
+    const pubRes = await call('tenant-a', 'publish', { topic, payload: { hello: 'A' } });
+    if (pubRes.status === 404) return;
+    expect(pubRes.status, 'publish MUST succeed').toBe(200);
+    const consRes = await call('tenant-b', 'consume', { topic, timeoutMs: 100 });
+    expect(consRes.status).toBe(200);
+    const body = consRes.json as { found?: boolean };
+    expect(
+      body.found,
+      driver.describe(
+        'SECURITY/invariants.yaml queue-cross-tenant-isolation',
+        'tenant B MUST NOT consume tenant A messages on the same topic',
+      ),
+    ).toBe(false);
+  });
+});

package/src/scenarios/queue-publish-consume-roundtrip.test.ts ADDED Viewed

@@ -0,0 +1,88 @@
+/**
+ * queue-publish-consume-roundtrip — RFC 0017 advertisement-shape verification + behavioral placeholders.
+ *
+ * Status: ACTIVE (advertisement-shape). RFC 0017 promoted to `Active`
+ * 2026-05-17. The matching `capabilities.queueBus` block has landed in
+ * `schemas/capabilities.schema.json`. This scenario asserts the advertisement
+ * shape against any host that boots the conformance suite, and keeps the
+ * deeper behavioral assertions as `it.todo()` until a reference host wires
+ * a test seam.
+ *
+ * Summary: publish + consume + ack roundtrip.
+ *
+ * @see RFCS/0017-*.md
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+interface DiscoveryDoc {
+  capabilities?: Record<string, unknown>;
+}
+async function readCap(): Promise<Record<string, unknown> | null> {
+  const res = await driver.get('/.well-known/openwop');
+  const body = res.json as DiscoveryDoc | undefined;
+  const top = body?.capabilities as Record<string, unknown> | undefined;
+  const final = (top && typeof top === 'object') ? (top as Record<string, unknown>)["queueBus"] : undefined;
+  return (final && typeof final === 'object' ? (final as Record<string, unknown>) : null);
+}
+describe('queue-publish-consume-roundtrip: advertisement shape (RFC 0017)', () => {
+  it('capabilities.queueBus is either absent or a well-formed object', async () => {
+    const cap = await readCap();
+    if (cap === null) return; // host doesn't advertise — skip
+    expect(
+      typeof cap.supported,
+      driver.describe(
+        'capabilities.schema.json §queueBus',
+        'capabilities.queueBus.supported MUST be a boolean when present',
+      ),
+    ).toBe('boolean');
+  });
+});
+async function call(op: string, args: Record<string, unknown>) {
+  return driver.post('/v1/host/sample/test/surface', { tenantId: 'tenant-a', surface: 'queueBus', op, args });
+}
+describe('queue-publish-consume-roundtrip: behavioral (RFC 0017 §B point 2)', () => {
+  it('publish → consume returns the same payload + subject', async () => {
+    const probe = await call('consume', { subject: '__probe__' });
+    if (probe.status === 404) return; // seam not exposed
+    const subject = `q-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+    const payload = { event: 'order.created', orderId: 42 };
+    const pub = await call('publish', { subject, payload });
+    expect(pub.status).toBe(200);
+    const got = await call('consume', { subject });
+    expect(got.status).toBe(200);
+    const body = got.json as { found?: boolean; subject?: string; payload?: unknown; deliveryToken?: string };
+    expect(body.found, 'consume MUST find the just-published message').toBe(true);
+    expect(body.subject).toBe(subject);
+    expect(
+      body.payload,
+      driver.describe('RFC 0017 §B point 2', 'consume MUST return the exact published payload'),
+    ).toEqual(payload);
+    expect(typeof body.deliveryToken, 'consume MUST return a deliveryToken for ack/nack').toBe('string');
+  });
+  it('ack removes the message; subsequent consume on empty queue returns found:false', async () => {
+    const probe = await call('consume', { subject: '__probe__' });
+    if (probe.status === 404) return;
+    const subject = `q-ack-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+    await call('publish', { subject, payload: { v: 1 } });
+    const got = await call('consume', { subject });
+    const deliveryToken = (got.json as { deliveryToken?: string }).deliveryToken;
+    const ackRes = await call('ack', { deliveryToken });
+    expect(ackRes.status).toBe(200);
+    expect((ackRes.json as { acked?: boolean }).acked).toBe(true);
+    const empty = await call('consume', { subject });
+    const emptyBody = empty.json as { found?: boolean };
+    expect(
+      emptyBody.found,
+      driver.describe('RFC 0017 §B point 2', 'consume after ack MUST surface as found:false'),
+    ).toBe(false);
+  });
+});

package/src/scenarios/replay-llm-cache-key.test.ts CHANGED Viewed

@@ -1,35 +1,176 @@
 /**
- * Cross-host LLM cache-key parity (replay.md §"LLM cache-key recipe").
+ * LLM cache-key recipe — `replay.md §"LLM cache-key recipe"` §A + §B.
  *
- * Verifies that two OpenWOP-compliant hosts replaying the same LLM
- * provider request compute the same cache key. The recipe is normative
- * (replay.md §B): canonical JSON of `(provider, model, messages, tools,
- * temperature, topP, topK, responseFormat)` → SHA-256 → lowercase hex.
+ * Verifies that an OpenWOP host computes the LLM cache key per the
+ * normative recipe: SHA-256 over RFC 8785 JCS-canonicalized JSON of
+ * the closed set of recipe fields (`provider, model, messages, tools,
+ * temperature, topP, topK, responseFormat`).
  *
- * Status: PLACEHOLDER. As of 2026-05-11, neither reference host
- * (`examples/hosts/in-memory/`, `examples/hosts/sqlite/`) implements
- * LLM-calling nodes — both execute only `core.noop` / `core.delay` /
- * `core.approvalGate` fixtures. This scenario lands as `it.todo()` so
- * the contract surface is tracked; assertions land when the first
- * reference host ships an LLM-call node.
+ * The single-host assertions drive the env-gated test seam at
+ * `POST /v1/host/sample/test/llm-cache-key` and recompute the expected
+ * key locally per the recipe, asserting equality. Non-recipe fields
+ * (`max_tokens`, `stop`, `stream`, `seed`, etc.) MUST NOT influence
+ * the key — per §A.
  *
- * What the live scenario WILL exercise (when implemented):
- *   1. Boot host A against `OPENWOP_BASE_URL`.
- *   2. Boot host B against `OPENWOP_BASE_URL_B`.
- *   3. Submit the same workflow + inputs (an LLM-calling fixture).
- *   4. Read each host's emitted `node.completed.payload.cacheKey` (or
- *      equivalent debug-bundle surface).
- *   5. Assert the two hex strings are equal.
+ * The cross-host assertion (two hosts compute the same key) stays
+ * deferred — it requires `OPENWOP_BASE_URL_B` for a second-host probe,
+ * which is operator-supplied and outside this scenario file's scope.
  *
  * @see spec/v1/replay.md §"LLM cache-key recipe"
  */
-import { describe, it } from 'vitest';
+import { describe, it, expect } from 'vitest';
+import { createHash } from 'node:crypto';
+import { driver } from '../lib/driver.js';
-describe('replay-llm-cache-key: cross-host determinism (placeholder)', () => {
-  it.todo(
-    'two hosts replaying the same LLM provider request compute the same cache key (replay.md §D)',
-  );
-  it.todo('LLM cache key is computed via SHA-256 of canonical JSON per replay.md §B');
-  it.todo('cache key omits non-recipe fields (max_tokens, stop, stream, seed, etc.) per replay.md §A');
+/** Mirror of the reference impl's `canonicalize` so the conformance
+ *  scenario can recompute the expected cache key locally and assert
+ *  equality with what the host returns. RFC 8785 JCS-style:
+ *  sorted-keys, no whitespace, preserve array order. */
+function canonicalize(value: unknown): string {
+  if (value === null) return 'null';
+  if (typeof value === 'boolean' || typeof value === 'number') return JSON.stringify(value);
+  if (typeof value === 'string') return JSON.stringify(value);
+  if (Array.isArray(value)) return '[' + value.map((v) => canonicalize(v)).join(',') + ']';
+  if (typeof value === 'object') {
+    const obj = value as Record<string, unknown>;
+    const keys = Object.keys(obj).sort();
+    return '{' + keys.map((k) => `${JSON.stringify(k)}:${canonicalize(obj[k])}`).join(',') + '}';
+  }
+  return JSON.stringify(value);
+}
+function projectRecipe(raw: Record<string, unknown>): Record<string, unknown> {
+  const out: Record<string, unknown> = { provider: raw.provider, model: raw.model, messages: raw.messages };
+  if (Array.isArray(raw.tools) && raw.tools.length > 0) {
+    out.tools = [...(raw.tools as Array<{ name: string }>)].sort((a, b) => a.name.localeCompare(b.name));
+  }
+  if (typeof raw.temperature === 'number') out.temperature = raw.temperature;
+  if (typeof raw.topP === 'number') out.topP = raw.topP;
+  if (typeof raw.topK === 'number') out.topK = raw.topK;
+  if (raw.responseFormat && typeof raw.responseFormat === 'object') out.responseFormat = raw.responseFormat;
+  return out;
+}
+function expectedCacheKey(input: Record<string, unknown>): string {
+  return createHash('sha256').update(canonicalize(projectRecipe(input)), 'utf8').digest('hex');
+}
+async function callSeam(input: Record<string, unknown>): Promise<{ status: number; cacheKey?: string }> {
+  const res = await driver.post('/v1/host/sample/test/llm-cache-key', input);
+  const cacheKey = (res.json as { cacheKey?: string }).cacheKey;
+  return cacheKey !== undefined ? { status: res.status, cacheKey } : { status: res.status };
+}
+describe('replay-llm-cache-key: SHA-256-over-JCS recipe (replay.md §B)', () => {
+  it('host cache key MUST equal locally-recomputed SHA-256 over canonical JSON', async () => {
+    const input = {
+      provider: 'anthropic',
+      model: 'claude-3-5-sonnet-20240620',
+      messages: [
+        { role: 'system' as const, content: 'You are a helpful assistant.' },
+        { role: 'user' as const, content: 'What is 2+2?' },
+      ],
+      temperature: 0.7,
+    };
+    const result = await callSeam(input);
+    if (result.status === 404) return; // seam not exposed
+    expect(result.status).toBe(200);
+    expect(
+      result.cacheKey,
+      driver.describe('replay.md §B', 'host cache key MUST be lowercase-hex SHA-256 of the canonical recipe JSON'),
+    ).toBe(expectedCacheKey(input));
+  });
+  it('cache key MUST be 64 lowercase-hex characters (SHA-256 output shape)', async () => {
+    const result = await callSeam({
+      provider: 'openai',
+      model: 'gpt-4',
+      messages: [{ role: 'user', content: 'hi' }],
+    });
+    if (result.status === 404) return;
+    expect(result.cacheKey).toMatch(/^[0-9a-f]{64}$/);
+  });
+});
+describe('replay-llm-cache-key: non-recipe fields are EXCLUDED (replay.md §A)', () => {
+  it('max_tokens / stop / stream / seed / metadata / user MUST NOT influence the cache key', async () => {
+    const base = {
+      provider: 'openai',
+      model: 'gpt-4',
+      messages: [{ role: 'user', content: 'unit test' }],
+      temperature: 0.5,
+    };
+    const baseResult = await callSeam(base);
+    if (baseResult.status === 404) return;
+    // All these non-recipe fields MUST NOT affect the cache key per §A.
+    const noisy = {
+      ...base,
+      max_tokens: 1000,
+      stop: ['STOP'],
+      stream: true,
+      seed: 42,
+      metadata: { traceId: 'abcd' },
+      user: 'unit-test-user',
+    };
+    const noisyResult = await callSeam(noisy);
+    expect(
+      noisyResult.cacheKey,
+      driver.describe(
+        'replay.md §A',
+        'cache key MUST be invariant under non-recipe field changes (max_tokens, stop, stream, seed, metadata, user)',
+      ),
+    ).toBe(baseResult.cacheKey);
+  });
+  it('changing a recipe field (temperature) MUST yield a different cache key', async () => {
+    const baseInput = {
+      provider: 'openai',
+      model: 'gpt-4',
+      messages: [{ role: 'user', content: 'diversity-probe' }],
+      temperature: 0.0,
+    };
+    const hotInput = { ...baseInput, temperature: 1.0 };
+    const baseResult = await callSeam(baseInput);
+    if (baseResult.status === 404) return;
+    const hotResult = await callSeam(hotInput);
+    expect(
+      baseResult.cacheKey === hotResult.cacheKey,
+      driver.describe('replay.md §A', 'changing a recipe field MUST yield a different cache key (no false collisions)'),
+    ).toBe(false);
+  });
+});
+describe('replay-llm-cache-key: cross-host parity (replay.md §D)', () => {
+  it('two hosts compute the same cache key for the same input (when OPENWOP_BASE_URL_B is configured)', async () => {
+    const otherBaseUrl = process.env.OPENWOP_BASE_URL_B;
+    if (!otherBaseUrl || otherBaseUrl.length === 0) return; // second host not configured — soft-skip
+    const input = {
+      provider: 'anthropic',
+      model: 'claude-3-5-sonnet-20240620',
+      messages: [
+        { role: 'system' as const, content: 'cross-host parity probe' },
+        { role: 'user' as const, content: 'compute the same key' },
+      ],
+      temperature: 0.5,
+    };
+    const a = await callSeam(input);
+    if (a.status === 404) return; // host A doesn't expose the seam
+    const otherApiKey = process.env.OPENWOP_API_KEY_B ?? process.env.OPENWOP_API_KEY ?? '';
+    // Issue the second probe directly via fetch since the driver is bound to
+    // OPENWOP_BASE_URL. Authorization mirrors the suite's default.
+    const resB = await fetch(`${otherBaseUrl.replace(/\/$/, '')}/v1/host/sample/test/llm-cache-key`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${otherApiKey}` },
+      body: JSON.stringify(input),
+    });
+    if (resB.status === 404) return; // host B doesn't expose the seam
+    expect(resB.status).toBe(200);
+    const b = (await resB.json()) as { cacheKey?: string };
+    expect(
+      a.cacheKey,
+      driver.describe('replay.md §D', 'two compliant hosts MUST compute byte-identical cache keys for the same recipe input'),
+    ).toBe(b.cacheKey);
+  });
 });