@openwop/openwop-conformance 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/CHANGELOG.md +156 -1
  2. package/README.md +3 -2
  3. package/api/asyncapi.yaml +8 -0
  4. package/api/openapi.yaml +371 -1
  5. package/api/redocly.yaml +15 -0
  6. package/coverage.md +26 -5
  7. package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
  8. package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
  9. package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
  10. package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
  11. package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
  12. package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
  13. package/fixtures/conformance-envelope-recovery-applied.json +39 -0
  14. package/fixtures/conformance-envelope-refusal.json +38 -0
  15. package/fixtures/conformance-envelope-retry-attempted.json +39 -0
  16. package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
  17. package/fixtures/conformance-envelope-truncated.json +39 -0
  18. package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
  19. package/fixtures/conformance-model-capability-insufficient.json +25 -0
  20. package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
  21. package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
  22. package/fixtures/conformance-multi-agent-handoff.json +49 -0
  23. package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
  24. package/fixtures/conformance-prompt-end-to-end.json +33 -0
  25. package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
  26. package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
  27. package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
  28. package/fixtures/openwop-smoke-cost-emit.json +37 -0
  29. package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
  30. package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
  31. package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
  32. package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
  33. package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
  34. package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
  35. package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
  36. package/fixtures.md +45 -0
  37. package/package.json +1 -1
  38. package/schemas/README.md +5 -0
  39. package/schemas/agent-manifest.schema.json +16 -0
  40. package/schemas/capabilities.schema.json +390 -0
  41. package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
  42. package/schemas/envelopes/clarification.request.schema.json +9 -0
  43. package/schemas/envelopes/error.schema.json +4 -0
  44. package/schemas/envelopes/schema.request.schema.json +4 -0
  45. package/schemas/envelopes/schema.response.schema.json +1 -1
  46. package/schemas/node-pack-manifest.schema.json +28 -0
  47. package/schemas/orchestrator-decision.schema.json +12 -0
  48. package/schemas/prompt-kind.schema.json +8 -0
  49. package/schemas/prompt-pack-manifest.schema.json +80 -0
  50. package/schemas/prompt-ref.schema.json +40 -0
  51. package/schemas/prompt-template.schema.json +149 -0
  52. package/schemas/registry-version-manifest.schema.json +5 -0
  53. package/schemas/run-ancestry-response.schema.json +54 -0
  54. package/schemas/run-event-payloads.schema.json +513 -11
  55. package/schemas/run-event.schema.json +17 -1
  56. package/schemas/run-snapshot.schema.json +3 -2
  57. package/schemas/workflow-definition.schema.json +19 -1
  58. package/src/lib/driver.ts +15 -0
  59. package/src/lib/env.ts +51 -0
  60. package/src/lib/event-log-query.ts +62 -0
  61. package/src/lib/fixtures.ts +38 -1
  62. package/src/lib/host-toggle.ts +54 -0
  63. package/src/lib/llm-cache-key-recipe.ts +68 -0
  64. package/src/lib/multi-agent-capabilities.ts +10 -0
  65. package/src/lib/otel-scrape.ts +59 -0
  66. package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
  67. package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
  68. package/src/scenarios/aiEnvelope.contractRefusal.test.ts +224 -15
  69. package/src/scenarios/aiEnvelope.correlationReplay.test.ts +257 -25
  70. package/src/scenarios/aiEnvelope.redaction.test.ts +210 -29
  71. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +163 -24
  72. package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +262 -12
  73. package/src/scenarios/aiEnvelope.universalKinds.test.ts +107 -16
  74. package/src/scenarios/blob-presign-expiry.test.ts +42 -9
  75. package/src/scenarios/blob-roundtrip.test.ts +0 -0
  76. package/src/scenarios/cache-ttl-expiry.test.ts +34 -8
  77. package/src/scenarios/cost-attribution.test.ts +124 -11
  78. package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
  79. package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
  80. package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
  81. package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
  82. package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
  83. package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
  84. package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
  85. package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
  86. package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
  87. package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
  88. package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
  89. package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
  90. package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
  91. package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
  92. package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
  93. package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
  94. package/src/scenarios/envelope-truncated.test.ts +136 -0
  95. package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
  96. package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
  97. package/src/scenarios/fixtures-gating.test.ts +139 -1
  98. package/src/scenarios/fixtures-valid.test.ts +123 -15
  99. package/src/scenarios/kv-ttl-expiry.test.ts +40 -9
  100. package/src/scenarios/model-capability-insufficient.test.ts +221 -0
  101. package/src/scenarios/model-capability-substituted.test.ts +203 -0
  102. package/src/scenarios/multi-agent-confidence-escalation.test.ts +164 -0
  103. package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
  104. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
  105. package/src/scenarios/multi-region-idempotency.test.ts +58 -0
  106. package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
  107. package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
  108. package/src/scenarios/pack-registry-publish.test.ts +231 -51
  109. package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
  110. package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
  111. package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
  112. package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
  113. package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
  114. package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
  115. package/src/scenarios/prompt-pack-install.test.ts +187 -0
  116. package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
  117. package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
  118. package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
  119. package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
  120. package/src/scenarios/prompt-template-shape.test.ts +359 -0
  121. package/src/scenarios/provider-usage.test.ts +185 -0
  122. package/src/scenarios/queue-ack-nack-dlq.test.ts +64 -10
  123. package/src/scenarios/queue-publish-consume-roundtrip.test.ts +50 -10
  124. package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
  125. package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
  126. package/src/scenarios/replay-llm-cache-key.test.ts +127 -25
  127. package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
  128. package/src/scenarios/sandbox-capability-gate-respected.test.ts +31 -0
  129. package/src/scenarios/sandbox-memory-cap.test.ts +61 -0
  130. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +35 -0
  131. package/src/scenarios/sandbox-no-host-env-leak.test.ts +38 -0
  132. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +91 -0
  133. package/src/scenarios/sandbox-no-host-process-escape.test.ts +30 -0
  134. package/src/scenarios/sandbox-no-network-escape.test.ts +49 -0
  135. package/src/scenarios/sandbox-timeout-cap.test.ts +61 -0
  136. package/src/scenarios/search-bm25-roundtrip.test.ts +54 -9
  137. package/src/scenarios/spec-corpus-validity.test.ts +34 -6
  138. package/src/scenarios/sql-transaction-atomicity.test.ts +37 -8
  139. package/src/scenarios/stream-subscribe-from-beginning.test.ts +46 -9
  140. package/src/scenarios/subworkflow-input-mapping.test.ts +146 -10
  141. package/src/scenarios/table-cursor-pagination.test.ts +47 -9
  142. package/src/scenarios/table-schema-enforcement.test.ts +46 -9
  143. package/src/scenarios/vector-knn-roundtrip.test.ts +50 -10
  144. package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
@@ -101,16 +101,32 @@
101
101
  "lease.lost",
102
102
  "lease.handed-off",
103
103
  "replay.diverged",
104
+ "replay.divergedAtRefusal",
104
105
  "agent.reasoned",
106
+ "agent.reasoning.delta",
107
+ "provider.usage",
108
+ "prompt.composed",
109
+ "agent.promptResolved",
110
+ "model.capability.substituted",
111
+ "model.capability.insufficient",
112
+ "envelope.retry.attempted",
113
+ "envelope.retry.exhausted",
114
+ "envelope.refusal",
115
+ "envelope.truncated",
116
+ "envelope.nlToFormat.engaged",
117
+ "envelope.recovery.applied",
105
118
  "agent.toolCalled",
106
119
  "agent.toolReturned",
107
120
  "agent.handoff",
108
121
  "agent.decided",
109
122
  "runOrchestrator.decided",
123
+ "node.dispatched",
110
124
  "conversation.opened",
111
125
  "conversation.exchanged",
112
126
  "conversation.closed",
113
- "memory.compacted"
127
+ "memory.compacted",
128
+ "core.workflowChain.event",
129
+ "core.workflowChain.confidence-escalated"
114
130
  ]
115
131
  }
116
132
  }
@@ -25,15 +25,16 @@
25
25
  "paused",
26
26
  "waiting-approval",
27
27
  "waiting-input",
28
+ "waiting-external",
28
29
  "completed",
29
30
  "failed",
30
31
  "cancelled"
31
32
  ],
32
- "description": "Current run state. Forward-compat: future statuses MAY be added; readers SHOULD treat unknown values as terminal-unknown rather than throw."
33
+ "description": "Current run state. `waiting-external` MUST be used when the suspended interrupt's `kind` is `external-event` per `interrupt-profiles.md §openwop-interrupt-external-event` — distinguishes external-event waits from HITL waits at the wire level. Forward-compat: future statuses MAY be added; readers SHOULD treat unknown values as terminal-unknown rather than throw."
33
34
  },
34
35
  "currentNodeId": {
35
36
  "type": "string",
36
- "description": "Set when the run is suspended at a specific node (`waiting-approval` / `waiting-input`) — identifies which node holds the interrupt."
37
+ "description": "Set when the run is suspended at a specific node (`waiting-approval` / `waiting-input` / `waiting-external`) — identifies which node holds the interrupt."
37
38
  },
38
39
  "startedAt": { "type": "string", "format": "date-time" },
39
40
  "completedAt": { "type": "string", "format": "date-time" },
@@ -77,6 +77,24 @@
77
77
  "description": "Optional JSON Schema 2020-12 declaring which RunOptions.configurable keys this workflow accepts. When present, hosts MUST validate POST /v1/runs `configurable` payloads against this schema and reject mismatches with `validation_error`. Hosts MUST surface this schema on GET /v1/workflows/{workflowId} so clients can pre-flight-validate. See run-options.md §'Per-workflow configurableSchema'. Additive in v1.1.",
78
78
  "type": "object"
79
79
  },
80
+ "defaults": {
81
+ "type": "object",
82
+ "additionalProperties": false,
83
+ "description": "RFC 0029 §B. Workflow-author-controlled per-kind fallback values that apply at resolution chain layer 3 (`workflow-defaults`) per `spec/v1/prompts.md` §\"Resolution chain (normative)\". Applied when neither the node (layer 1) nor the node's bound agent (layer 2) specifies a value for the kind. Future RFCs MAY add sibling defaults (e.g., `defaults.temperature`, `defaults.modelClass`) without colliding.",
84
+ "properties": {
85
+ "promptRefs": {
86
+ "type": "object",
87
+ "additionalProperties": false,
88
+ "description": "Per-kind PromptRef fallbacks for layer 3 of the resolution chain.",
89
+ "properties": {
90
+ "system": { "$ref": "./prompt-ref.schema.json" },
91
+ "user": { "$ref": "./prompt-ref.schema.json" },
92
+ "few-shot": { "$ref": "./prompt-ref.schema.json" },
93
+ "schema-hint": { "$ref": "./prompt-ref.schema.json" }
94
+ }
95
+ }
96
+ }
97
+ },
80
98
  "metadata": { "$ref": "#/$defs/WorkflowMetadata" },
81
99
  "settings": { "$ref": "#/$defs/WorkflowSettings" },
82
100
  "acceptsInheritedArtifacts": {
@@ -111,7 +129,7 @@
111
129
  },
112
130
  "config": {
113
131
  "type": "object",
114
- "description": "Node configuration (pre-execution constants)."
132
+ "description": "Node configuration (pre-execution constants). The shape is per-typeId — node-pack manifests declare each typeId's `configSchema` for install-time validation. By convention, the keys `systemPromptRef`, `userPromptRef`, and `additionalPromptRefs` MAY hold PromptRef values per `spec/v1/prompts.md` §\"PromptRef\" (RFC 0027). Hosts advertising `capabilities.prompts.supported: true` MUST resolve these keys; hosts without the capability MAY treat them as opaque strings. When both an inline body (e.g., `config.systemPrompt`) and a `*PromptRef` are present, the ref wins and the host MUST emit a `log.appended` warning with `code: \"prompt_ref_supersedes_inline\"` per RFC 0027 §C."
115
133
  },
116
134
  "inputs": {
117
135
  "type": "object",
package/src/lib/driver.ts CHANGED
@@ -78,6 +78,21 @@ class OpenWOPDriver {
78
78
  return this.request('POST', path, { ...init, body });
79
79
  }
80
80
 
81
+ /** PUT helper. The body is JSON-stringified by default; pass a string
82
+ * Content-Type header for raw-body PUTs (e.g. tarball uploads).
83
+ * Production hosts that accept tarball PUTs on /v1/packs/* expect
84
+ * `Content-Type: application/octet-stream`; callers MUST set the
85
+ * header explicitly when uploading non-JSON. */
86
+ put(path: string, body: unknown, init: OpenWOPRequestInit = {}): Promise<OpenWOPResponse> {
87
+ return this.request('PUT', path, { ...init, body });
88
+ }
89
+
90
+ /** DELETE alias for the canonical name. Keeps the call-site shorter
91
+ * for scenarios that delete via `driver.del(...)`. */
92
+ del(path: string, init: OpenWOPRequestInit = {}): Promise<OpenWOPResponse> {
93
+ return this.request('DELETE', path, init);
94
+ }
95
+
81
96
  delete(path: string, init: OpenWOPRequestInit = {}): Promise<OpenWOPResponse> {
82
97
  return this.request('DELETE', path, init);
83
98
  }
package/src/lib/env.ts CHANGED
@@ -25,6 +25,28 @@
25
25
  * hosts go strict-mode green without falsifying capability claims.
26
26
  * Example for SQLite:
27
27
  * OPENWOP_OPTED_OUT_PROFILES=openwop-production,openwop-auth-mtls
28
+ *
29
+ * OPENWOP_OPTED_OUT_FIXTURES — comma-separated fixture ids (or
30
+ * trailing-`*` globs) the host operator has DELIBERATELY chosen
31
+ * not to honor. Applied in `lib/fixtures.ts` by filtering matching
32
+ * entries out of the cached advertised-fixture set, so any
33
+ * scenario gated via `isFixtureAdvertised(...)` skips cleanly.
34
+ * Use when a host auto-loads every `conformance-*.json` on disk
35
+ * (so the fixture id IS in the discovery doc) but the host doesn't
36
+ * implement the gated feature. Symmetric to `OPENWOP_OPTED_OUT_
37
+ * PROFILES` for the fixture-id axis. Example for SQLite:
38
+ * OPENWOP_OPTED_OUT_FIXTURES=conformance-dispatch-*,conformance-subworkflow-input-mapping*
39
+ *
40
+ * OPENWOP_OPTED_OUT_SCENARIOS — comma-separated scenario ids that
41
+ * individual tests consult to skip themselves where neither
42
+ * profile-opt-out nor fixture-opt-out is fine-grained enough
43
+ * (e.g., OTel trace-inheritance across `core.subWorkflow` —
44
+ * `conformance-subworkflow-parent` is correctly advertised because
45
+ * non-OTel subworkflow scenarios pass, but the host doesn't
46
+ * propagate traceparent across the dispatch boundary). Use
47
+ * `isScenarioOptedOut(scenarioId)` from `env.ts` in the test's
48
+ * skip predicate. Reserved for cases where the suite-wide
49
+ * skip mechanisms can't carry the granularity.
28
50
  */
29
51
 
30
52
  export interface ConformanceEnv {
@@ -84,3 +106,32 @@ export function loadEnv(): ConformanceEnv {
84
106
  };
85
107
  return cached;
86
108
  }
109
+
110
+ /**
111
+ * Returns true when the operator has listed `scenarioId` in
112
+ * `OPENWOP_OPTED_OUT_SCENARIOS`. Use inside a test's `describe.skipIf`
113
+ * predicate when neither profile-opt-out nor fixture-opt-out is
114
+ * granular enough. Logs the skip reason via the caller — this helper
115
+ * is silent so callers can format their own message.
116
+ *
117
+ * Re-reads `process.env` on every call (single env access + split, no
118
+ * cache). Symmetric with `lib/fixtures.ts:loadOptedOutPredicate` which
119
+ * re-reads on every `setAdvertisedFixtures(...)` call — so unit tests
120
+ * can mutate `process.env.OPENWOP_OPTED_OUT_SCENARIOS` between cases
121
+ * without having to invalidate a memoization.
122
+ */
123
+ export function isScenarioOptedOut(scenarioId: string): boolean {
124
+ const raw = process.env.OPENWOP_OPTED_OUT_SCENARIOS?.trim() ?? '';
125
+ if (raw.length === 0) return false;
126
+ for (const entry of raw.split(',')) {
127
+ if (entry.trim() === scenarioId) return true;
128
+ }
129
+ return false;
130
+ }
131
+
132
+ /** Test-only: clear the `loadEnv()` memoization so subsequent calls
133
+ * re-read `process.env`. Required for any test that mutates the env
134
+ * vars consumed by `loadEnv()` mid-suite. */
135
+ export function __resetEnvCacheForTests(): void {
136
+ cached = null;
137
+ }
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Driver helpers for the test-only event-log query seam
3
+ * (`GET /v1/host/sample/test/runs/:runId/events`).
4
+ *
5
+ * Used by aiEnvelope engine-projection scenarios that verify the
6
+ * spec-prescribed events the host MUST emit on each envelope outcome
7
+ * (per RFC 0021 §A point 1-7 + interrupt.md + capabilities.md
8
+ * §"cap.breached"). All operations soft-skip on HTTP 404 — hosts
9
+ * without the seam keep the existing advertisement-shape coverage.
10
+ *
11
+ * Reset semantics: callers SHOULD `resetTestSeam()` in their test's
12
+ * `afterEach` (or scope each test to a unique runId) to keep state
13
+ * from leaking across scenarios.
14
+ */
15
+
16
+ import { driver } from './driver.js';
17
+
18
+ export interface TestEvent {
19
+ readonly eventId: string;
20
+ readonly runId: string;
21
+ readonly type: string;
22
+ readonly payload: Record<string, unknown>;
23
+ readonly timestamp: string;
24
+ readonly sequence: number;
25
+ readonly causationId?: string;
26
+ readonly nodeId?: string;
27
+ readonly contentTrust?: 'trusted' | 'untrusted';
28
+ }
29
+
30
+ export type QueryOutcome =
31
+ | { ok: true; events: TestEvent[] }
32
+ | { ok: false; reason: 'seam_unavailable' }
33
+ | { ok: false; reason: 'http_error'; status: number };
34
+
35
+ /** Query the test-only event log for a run, with optional filters. */
36
+ export async function queryTestEvents(
37
+ runId: string,
38
+ filter: { type?: string; correlationId?: string; causationId?: string; nodeId?: string } = {},
39
+ ): Promise<QueryOutcome> {
40
+ const qs = new URLSearchParams();
41
+ if (filter.type) qs.set('type', filter.type);
42
+ if (filter.correlationId) qs.set('correlationId', filter.correlationId);
43
+ if (filter.causationId) qs.set('causationId', filter.causationId);
44
+ if (filter.nodeId) qs.set('nodeId', filter.nodeId);
45
+ const url = `/v1/host/sample/test/runs/${encodeURIComponent(runId)}/events${qs.toString() ? '?' + qs.toString() : ''}`;
46
+ const res = await driver.get(url);
47
+ if (res.status === 404) return { ok: false, reason: 'seam_unavailable' };
48
+ if (res.status !== 200) return { ok: false, reason: 'http_error', status: res.status };
49
+ const body = res.json as { events?: TestEvent[] };
50
+ return { ok: true, events: body.events ?? [] };
51
+ }
52
+
53
+ /** Reset the test-only event log + capability overlay (suite teardown). */
54
+ export async function resetTestSeam(): Promise<void> {
55
+ await driver.post('/v1/host/sample/test/reset', {});
56
+ }
57
+
58
+ /** Probe whether the seam is exposed. Use to soft-skip early. */
59
+ export async function isEventLogSeamAvailable(): Promise<boolean> {
60
+ const res = await queryTestEvents('__probe__');
61
+ return res.ok;
62
+ }
@@ -26,6 +26,16 @@
26
26
  * This module is sync. The async fetch lives in `setup.ts` which calls
27
27
  * `setAdvertisedFixtures(...)` from a top-level `await`.
28
28
  *
29
+ * Honest opt-out (symmetric to `OPENWOP_OPTED_OUT_PROFILES`):
30
+ * `OPENWOP_OPTED_OUT_FIXTURES` (CSV, supports trailing `*` glob)
31
+ * subtracts matching fixture-ids from the cached set even when the
32
+ * host advertises them. Operators use this when the host happens to
33
+ * carry a fixture file (e.g., it auto-loads every `conformance-*.json`
34
+ * on disk) but does NOT implement the underlying feature — so the
35
+ * gated scenario should skip instead of running and failing. The
36
+ * subtraction happens at cache-population time, so the predicate
37
+ * remains a single sync set lookup at scenario-evaluation time.
38
+ *
29
39
  * @see spec/v1/capabilities.md §`fixtures`
30
40
  * @see spec/v1/profiles.md §`openwop-fixtures`
31
41
  * @see RFCS/0003-fixture-gating.md
@@ -35,19 +45,46 @@ import type { DiscoveryPayload } from './profiles.js';
35
45
 
36
46
  let _advertisedFixtures: ReadonlySet<string> | null = null;
37
47
 
48
+ /**
49
+ * Parse `OPENWOP_OPTED_OUT_FIXTURES` into a match predicate. Each entry
50
+ * is either an exact id or a glob with a trailing `*`. Returns a
51
+ * function that answers "is this fixture-id opted out?" — empty / unset
52
+ * env reduces to "always false."
53
+ */
54
+ function loadOptedOutPredicate(): (id: string) => boolean {
55
+ const raw = process.env.OPENWOP_OPTED_OUT_FIXTURES?.trim() ?? '';
56
+ if (raw.length === 0) return () => false;
57
+ const exact = new Set<string>();
58
+ const prefixes: string[] = [];
59
+ for (const entry of raw.split(',').map((s) => s.trim()).filter((s) => s.length > 0)) {
60
+ if (entry.endsWith('*')) {
61
+ prefixes.push(entry.slice(0, -1));
62
+ } else {
63
+ exact.add(entry);
64
+ }
65
+ }
66
+ return (id) => exact.has(id) || prefixes.some((p) => id.startsWith(p));
67
+ }
68
+
38
69
  /**
39
70
  * Populate the cache from a discovery-doc payload. The function is
40
71
  * tolerant of malformed inputs — anything other than a string array
41
72
  * collapses to "no fixtures advertised" rather than throwing, so the
42
73
  * suite remains resilient against host bugs in the discovery surface.
74
+ *
75
+ * Applies `OPENWOP_OPTED_OUT_FIXTURES` at this step: opted-out ids are
76
+ * filtered out of the cache before storage so downstream lookups can
77
+ * stay a single sync set-membership test.
43
78
  */
44
79
  export function setAdvertisedFixtures(c: DiscoveryPayload | null | undefined): void {
45
80
  if (c == null || !Array.isArray(c.fixtures)) {
46
81
  _advertisedFixtures = new Set();
47
82
  return;
48
83
  }
84
+ const isOptedOut = loadOptedOutPredicate();
49
85
  const ids = c.fixtures.filter(
50
- (entry): entry is string => typeof entry === 'string' && entry.length > 0,
86
+ (entry): entry is string =>
87
+ typeof entry === 'string' && entry.length > 0 && !isOptedOut(entry),
51
88
  );
52
89
  _advertisedFixtures = new Set(ids);
53
90
  }
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Capability-toggle harness primitive — driver helper for the
3
+ * env-gated test-seam endpoint at
4
+ * `POST /v1/host/sample/test/capability-toggle`.
5
+ *
6
+ * Lets refusal-case scenarios (RFC 0022 §C HVMAP-1a-refusal,
7
+ * HVMAP-2-refusal, etc.) flip a capability flag off temporarily,
8
+ * exercise the host's refusal path, then restore the default.
9
+ *
10
+ * All operations soft-skip on HTTP 404 — hosts that don't expose the
11
+ * seam keep the existing advertisement-shape coverage intact.
12
+ *
13
+ * Reset semantics: callers MUST `resetHostCapabilities()` in their
14
+ * test's `afterEach` (or equivalent) to keep state from leaking
15
+ * across scenarios.
16
+ */
17
+
18
+ import { driver } from './driver.js';
19
+
20
+ export type ToggleOutcome =
21
+ | { ok: true; overlay: Record<string, boolean> }
22
+ | { ok: false; reason: 'seam_unavailable' }
23
+ | { ok: false; reason: 'http_error'; status: number };
24
+
25
+ /** Set a capability flag's overlay value. `value: null` removes the
26
+ * overlay entry (restoring the host's hard-coded default). */
27
+ export async function setHostCapability(
28
+ name: string,
29
+ value: boolean | null,
30
+ ): Promise<ToggleOutcome> {
31
+ const res = await driver.post('/v1/host/sample/test/capability-toggle', { name, value });
32
+ if (res.status === 404) return { ok: false, reason: 'seam_unavailable' };
33
+ if (res.status !== 200) return { ok: false, reason: 'http_error', status: res.status };
34
+ const body = res.json as { overlay?: Record<string, boolean> };
35
+ return { ok: true, overlay: body.overlay ?? {} };
36
+ }
37
+
38
+ /** Clear ALL capability overlay entries on the host. */
39
+ export async function resetHostCapabilities(): Promise<ToggleOutcome> {
40
+ const res = await driver.post('/v1/host/sample/test/capability-toggle', { reset: true });
41
+ if (res.status === 404) return { ok: false, reason: 'seam_unavailable' };
42
+ if (res.status !== 200) return { ok: false, reason: 'http_error', status: res.status };
43
+ const body = res.json as { overlay?: Record<string, boolean> };
44
+ return { ok: true, overlay: body.overlay ?? {} };
45
+ }
46
+
47
+ /** Probe whether the host exposes the capability-toggle seam at all.
48
+ * Use this to soft-skip a scenario early when the host lacks the
49
+ * toggle (the refusal contract is still spec-normative; the test just
50
+ * can't drive it from outside). */
51
+ export async function isToggleAvailable(): Promise<boolean> {
52
+ const probe = await setHostCapability('__probe__', null);
53
+ return probe.ok;
54
+ }
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Shared helpers for the LLM cache-key recipe per `spec/v1/replay.md`
3
+ * §"LLM cache-key recipe" §A + §B.
4
+ *
5
+ * Used by:
6
+ * - `conformance/src/scenarios/replay-llm-cache-key.test.ts` — single-host
7
+ * recipe assertions + non-recipe-field invariance + (gated)
8
+ * cross-host parity via OPENWOP_BASE_URL_B.
9
+ * - `conformance/src/scenarios/replay-llm-cache-key-portable.test.ts` —
10
+ * RFC 0041 §E SECURITY-invariant probe (intra-host reproducibility +
11
+ * non-recipe-field invariance + Phase 4 advertisement alignment).
12
+ *
13
+ * `canonicalize` mirrors RFC 8785 JCS-style output (sorted keys, no
14
+ * whitespace, preserved array order). Hosts that have a real JCS library
15
+ * available SHOULD prefer it; this helper is for the conformance side,
16
+ * not the host side. Keep in sync with `spec/v1/replay.md` §B.
17
+ */
18
+
19
+ import { createHash } from 'node:crypto';
20
+ import { driver } from './driver.js';
21
+
22
+ /** RFC 8785 JCS-style canonicalization (subset suitable for the recipe
23
+ * fields). Sorted keys recursively; no whitespace; preserved array order;
24
+ * strings JSON-encoded verbatim (no NFC normalization — the recipe
25
+ * inputs in our test seam are ASCII). */
26
+ export function canonicalize(value: unknown): string {
27
+ if (value === null) return 'null';
28
+ if (typeof value === 'boolean' || typeof value === 'number') return JSON.stringify(value);
29
+ if (typeof value === 'string') return JSON.stringify(value);
30
+ if (Array.isArray(value)) return '[' + value.map((v) => canonicalize(v)).join(',') + ']';
31
+ if (typeof value === 'object') {
32
+ const obj = value as Record<string, unknown>;
33
+ const keys = Object.keys(obj).sort();
34
+ return '{' + keys.map((k) => `${JSON.stringify(k)}:${canonicalize(obj[k])}`).join(',') + '}';
35
+ }
36
+ return JSON.stringify(value);
37
+ }
38
+
39
+ /** Project a raw recipe-input object to the closed set of fields per
40
+ * `replay.md` §A — omit absent optionals (do NOT emit null/default
41
+ * placeholders), sort tools[] by name. */
42
+ export function projectRecipe(raw: Record<string, unknown>): Record<string, unknown> {
43
+ const out: Record<string, unknown> = { provider: raw.provider, model: raw.model, messages: raw.messages };
44
+ if (Array.isArray(raw.tools) && raw.tools.length > 0) {
45
+ out.tools = [...(raw.tools as Array<{ name: string }>)].sort((a, b) => a.name.localeCompare(b.name));
46
+ }
47
+ if (typeof raw.temperature === 'number') out.temperature = raw.temperature;
48
+ if (typeof raw.topP === 'number') out.topP = raw.topP;
49
+ if (typeof raw.topK === 'number') out.topK = raw.topK;
50
+ if (raw.responseFormat && typeof raw.responseFormat === 'object') out.responseFormat = raw.responseFormat;
51
+ return out;
52
+ }
53
+
54
+ /** Compute the canonical LLM cache key per `replay.md` §B:
55
+ * SHA-256(canonicalize(projectRecipe(input))) → lowercase hex. */
56
+ export function expectedCacheKey(input: Record<string, unknown>): string {
57
+ return createHash('sha256').update(canonicalize(projectRecipe(input)), 'utf8').digest('hex');
58
+ }
59
+
60
+ /** Drive the host's `POST /v1/host/sample/test/llm-cache-key` test seam.
61
+ * Returns the host's emitted cacheKey when the seam responds 200; status
62
+ * alone when the seam returns 404 (host doesn't expose the seam → caller
63
+ * soft-skips). */
64
+ export async function callCacheKeySeam(input: Record<string, unknown>): Promise<{ status: number; cacheKey?: string }> {
65
+ const res = await driver.post('/v1/host/sample/test/llm-cache-key', input);
66
+ const cacheKey = (res.json as { cacheKey?: string }).cacheKey;
67
+ return cacheKey !== undefined ? { status: res.status, cacheKey } : { status: res.status };
68
+ }
@@ -37,6 +37,9 @@ interface AgentCaps {
37
37
  | {
38
38
  verbosity: 'summary' | 'full' | 'off' | undefined;
39
39
  tokenLimit: number | undefined;
40
+ /** RFC 0024. When true, host may emit `agent.reasoning.delta`
41
+ * events in addition to the closing `agent.reasoned`. */
42
+ streaming: boolean;
40
43
  }
41
44
  | undefined;
42
45
  }
@@ -84,6 +87,7 @@ export function setMultiAgentCapabilities(c: DiscoveryPayload | null | undefined
84
87
  typeof (reasoningRaw as Record<string, unknown>).tokenLimit === 'number'
85
88
  ? ((reasoningRaw as Record<string, unknown>).tokenLimit as number)
86
89
  : undefined,
90
+ streaming: asBoolean((reasoningRaw as Record<string, unknown>).streaming),
87
91
  }
88
92
  : undefined;
89
93
  _agentCaps = {
@@ -113,6 +117,12 @@ export function getReasoningVerbosity(): 'summary' | 'full' | 'off' | undefined
113
117
  return _agentCaps?.reasoning?.verbosity;
114
118
  }
115
119
 
120
+ /** RFC 0024 — host emits incremental `agent.reasoning.delta` events
121
+ * while a reasoning block is still open. */
122
+ export function isReasoningStreamingSupported(): boolean {
123
+ return _agentCaps?.reasoning?.streaming === true;
124
+ }
125
+
116
126
  /** Phase 2 — host supports the named modelClass. */
117
127
  export function hasModelClass(modelClass: string): boolean {
118
128
  return _agentCaps?.modelClasses.has(modelClass) === true;
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Driver helpers for the OTel + debug-bundle test seams (E.2 + E.3).
3
+ *
4
+ * Used by aiEnvelope + cost-attribution scenarios that need to verify
5
+ * span-attribute redaction (no BYOK canary in OTel attributes) and
6
+ * debug-bundle export shape.
7
+ */
8
+
9
+ import { driver } from './driver.js';
10
+
11
+ export interface TestSpan {
12
+ readonly spanId: string;
13
+ readonly name: string;
14
+ readonly attributes: Record<string, string | number | boolean>;
15
+ readonly envelopeId?: string;
16
+ readonly runId?: string;
17
+ readonly timestamp: string;
18
+ }
19
+
20
+ export interface DebugBundle {
21
+ readonly runId: string;
22
+ readonly events: unknown[];
23
+ readonly spans: TestSpan[];
24
+ readonly exportedAt: string;
25
+ }
26
+
27
+ export type ScrapeOutcome<T> =
28
+ | { ok: true; data: T }
29
+ | { ok: false; reason: 'seam_unavailable' }
30
+ | { ok: false; reason: 'http_error'; status: number };
31
+
32
+ export async function queryTestSpans(
33
+ filter: { envelopeId?: string; runId?: string; name?: string } = {},
34
+ ): Promise<ScrapeOutcome<TestSpan[]>> {
35
+ const qs = new URLSearchParams();
36
+ if (filter.envelopeId) qs.set('envelopeId', filter.envelopeId);
37
+ if (filter.runId) qs.set('runId', filter.runId);
38
+ if (filter.name) qs.set('name', filter.name);
39
+ const url = `/v1/host/sample/test/otel/spans${qs.toString() ? '?' + qs.toString() : ''}`;
40
+ const res = await driver.get(url);
41
+ if (res.status === 404) return { ok: false, reason: 'seam_unavailable' };
42
+ if (res.status !== 200) return { ok: false, reason: 'http_error', status: res.status };
43
+ const body = res.json as { spans?: TestSpan[] };
44
+ return { ok: true, data: body.spans ?? [] };
45
+ }
46
+
47
+ export async function exportDebugBundle(runId: string): Promise<ScrapeOutcome<DebugBundle>> {
48
+ const res = await driver.post('/v1/host/sample/test/debug-bundle/export', { runId });
49
+ if (res.status === 404) return { ok: false, reason: 'seam_unavailable' };
50
+ if (res.status !== 200) return { ok: false, reason: 'http_error', status: res.status };
51
+ const body = res.json as { bundle?: DebugBundle };
52
+ if (!body.bundle) return { ok: false, reason: 'http_error', status: 500 };
53
+ return { ok: true, data: body.bundle };
54
+ }
55
+
56
+ export async function isOtelSeamAvailable(): Promise<boolean> {
57
+ const res = await queryTestSpans({ runId: '__probe__' });
58
+ return res.ok;
59
+ }