@openwop/openwop-conformance 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +156 -1
- package/README.md +3 -2
- package/api/asyncapi.yaml +8 -0
- package/api/openapi.yaml +371 -1
- package/api/redocly.yaml +15 -0
- package/coverage.md +26 -5
- package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
- package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
- package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
- package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
- package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
- package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
- package/fixtures/conformance-envelope-recovery-applied.json +39 -0
- package/fixtures/conformance-envelope-refusal.json +38 -0
- package/fixtures/conformance-envelope-retry-attempted.json +39 -0
- package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
- package/fixtures/conformance-envelope-truncated.json +39 -0
- package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
- package/fixtures/conformance-model-capability-insufficient.json +25 -0
- package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
- package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
- package/fixtures/conformance-multi-agent-handoff.json +49 -0
- package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
- package/fixtures/conformance-prompt-end-to-end.json +33 -0
- package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
- package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
- package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
- package/fixtures/openwop-smoke-cost-emit.json +37 -0
- package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
- package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
- package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
- package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
- package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
- package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
- package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
- package/fixtures.md +45 -0
- package/package.json +1 -1
- package/schemas/README.md +5 -0
- package/schemas/agent-manifest.schema.json +16 -0
- package/schemas/capabilities.schema.json +390 -0
- package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
- package/schemas/envelopes/clarification.request.schema.json +9 -0
- package/schemas/envelopes/error.schema.json +4 -0
- package/schemas/envelopes/schema.request.schema.json +4 -0
- package/schemas/envelopes/schema.response.schema.json +1 -1
- package/schemas/node-pack-manifest.schema.json +28 -0
- package/schemas/orchestrator-decision.schema.json +12 -0
- package/schemas/prompt-kind.schema.json +8 -0
- package/schemas/prompt-pack-manifest.schema.json +80 -0
- package/schemas/prompt-ref.schema.json +40 -0
- package/schemas/prompt-template.schema.json +149 -0
- package/schemas/registry-version-manifest.schema.json +5 -0
- package/schemas/run-ancestry-response.schema.json +54 -0
- package/schemas/run-event-payloads.schema.json +513 -11
- package/schemas/run-event.schema.json +17 -1
- package/schemas/run-snapshot.schema.json +3 -2
- package/schemas/workflow-definition.schema.json +19 -1
- package/src/lib/driver.ts +15 -0
- package/src/lib/env.ts +51 -0
- package/src/lib/event-log-query.ts +62 -0
- package/src/lib/fixtures.ts +38 -1
- package/src/lib/host-toggle.ts +54 -0
- package/src/lib/llm-cache-key-recipe.ts +68 -0
- package/src/lib/multi-agent-capabilities.ts +10 -0
- package/src/lib/otel-scrape.ts +59 -0
- package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
- package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
- package/src/scenarios/aiEnvelope.contractRefusal.test.ts +224 -15
- package/src/scenarios/aiEnvelope.correlationReplay.test.ts +257 -25
- package/src/scenarios/aiEnvelope.redaction.test.ts +210 -29
- package/src/scenarios/aiEnvelope.schemaDrift.test.ts +163 -24
- package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +262 -12
- package/src/scenarios/aiEnvelope.universalKinds.test.ts +107 -16
- package/src/scenarios/blob-presign-expiry.test.ts +42 -9
- package/src/scenarios/blob-roundtrip.test.ts +0 -0
- package/src/scenarios/cache-ttl-expiry.test.ts +34 -8
- package/src/scenarios/cost-attribution.test.ts +124 -11
- package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
- package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
- package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
- package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
- package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
- package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
- package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
- package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
- package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
- package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
- package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
- package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
- package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
- package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
- package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
- package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
- package/src/scenarios/envelope-truncated.test.ts +136 -0
- package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
- package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
- package/src/scenarios/fixtures-gating.test.ts +139 -1
- package/src/scenarios/fixtures-valid.test.ts +123 -15
- package/src/scenarios/kv-ttl-expiry.test.ts +40 -9
- package/src/scenarios/model-capability-insufficient.test.ts +221 -0
- package/src/scenarios/model-capability-substituted.test.ts +203 -0
- package/src/scenarios/multi-agent-confidence-escalation.test.ts +164 -0
- package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
- package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
- package/src/scenarios/multi-region-idempotency.test.ts +58 -0
- package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
- package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
- package/src/scenarios/pack-registry-publish.test.ts +231 -51
- package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
- package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
- package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
- package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
- package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
- package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
- package/src/scenarios/prompt-pack-install.test.ts +187 -0
- package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
- package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
- package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
- package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
- package/src/scenarios/prompt-template-shape.test.ts +359 -0
- package/src/scenarios/provider-usage.test.ts +185 -0
- package/src/scenarios/queue-ack-nack-dlq.test.ts +64 -10
- package/src/scenarios/queue-publish-consume-roundtrip.test.ts +50 -10
- package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
- package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
- package/src/scenarios/replay-llm-cache-key.test.ts +127 -25
- package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
- package/src/scenarios/sandbox-capability-gate-respected.test.ts +31 -0
- package/src/scenarios/sandbox-memory-cap.test.ts +61 -0
- package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +35 -0
- package/src/scenarios/sandbox-no-host-env-leak.test.ts +38 -0
- package/src/scenarios/sandbox-no-host-fs-escape.test.ts +91 -0
- package/src/scenarios/sandbox-no-host-process-escape.test.ts +30 -0
- package/src/scenarios/sandbox-no-network-escape.test.ts +49 -0
- package/src/scenarios/sandbox-timeout-cap.test.ts +61 -0
- package/src/scenarios/search-bm25-roundtrip.test.ts +54 -9
- package/src/scenarios/spec-corpus-validity.test.ts +34 -6
- package/src/scenarios/sql-transaction-atomicity.test.ts +37 -8
- package/src/scenarios/stream-subscribe-from-beginning.test.ts +46 -9
- package/src/scenarios/subworkflow-input-mapping.test.ts +146 -10
- package/src/scenarios/table-cursor-pagination.test.ts +47 -9
- package/src/scenarios/table-schema-enforcement.test.ts +46 -9
- package/src/scenarios/vector-knn-roundtrip.test.ts +50 -10
- package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
|
@@ -101,16 +101,32 @@
|
|
|
101
101
|
"lease.lost",
|
|
102
102
|
"lease.handed-off",
|
|
103
103
|
"replay.diverged",
|
|
104
|
+
"replay.divergedAtRefusal",
|
|
104
105
|
"agent.reasoned",
|
|
106
|
+
"agent.reasoning.delta",
|
|
107
|
+
"provider.usage",
|
|
108
|
+
"prompt.composed",
|
|
109
|
+
"agent.promptResolved",
|
|
110
|
+
"model.capability.substituted",
|
|
111
|
+
"model.capability.insufficient",
|
|
112
|
+
"envelope.retry.attempted",
|
|
113
|
+
"envelope.retry.exhausted",
|
|
114
|
+
"envelope.refusal",
|
|
115
|
+
"envelope.truncated",
|
|
116
|
+
"envelope.nlToFormat.engaged",
|
|
117
|
+
"envelope.recovery.applied",
|
|
105
118
|
"agent.toolCalled",
|
|
106
119
|
"agent.toolReturned",
|
|
107
120
|
"agent.handoff",
|
|
108
121
|
"agent.decided",
|
|
109
122
|
"runOrchestrator.decided",
|
|
123
|
+
"node.dispatched",
|
|
110
124
|
"conversation.opened",
|
|
111
125
|
"conversation.exchanged",
|
|
112
126
|
"conversation.closed",
|
|
113
|
-
"memory.compacted"
|
|
127
|
+
"memory.compacted",
|
|
128
|
+
"core.workflowChain.event",
|
|
129
|
+
"core.workflowChain.confidence-escalated"
|
|
114
130
|
]
|
|
115
131
|
}
|
|
116
132
|
}
|
|
@@ -25,15 +25,16 @@
|
|
|
25
25
|
"paused",
|
|
26
26
|
"waiting-approval",
|
|
27
27
|
"waiting-input",
|
|
28
|
+
"waiting-external",
|
|
28
29
|
"completed",
|
|
29
30
|
"failed",
|
|
30
31
|
"cancelled"
|
|
31
32
|
],
|
|
32
|
-
"description": "Current run state. Forward-compat: future statuses MAY be added; readers SHOULD treat unknown values as terminal-unknown rather than throw."
|
|
33
|
+
"description": "Current run state. `waiting-external` MUST be used when the suspended interrupt's `kind` is `external-event` per `interrupt-profiles.md §openwop-interrupt-external-event` — distinguishes external-event waits from HITL waits at the wire level. Forward-compat: future statuses MAY be added; readers SHOULD treat unknown values as terminal-unknown rather than throw."
|
|
33
34
|
},
|
|
34
35
|
"currentNodeId": {
|
|
35
36
|
"type": "string",
|
|
36
|
-
"description": "Set when the run is suspended at a specific node (`waiting-approval` / `waiting-input`) — identifies which node holds the interrupt."
|
|
37
|
+
"description": "Set when the run is suspended at a specific node (`waiting-approval` / `waiting-input` / `waiting-external`) — identifies which node holds the interrupt."
|
|
37
38
|
},
|
|
38
39
|
"startedAt": { "type": "string", "format": "date-time" },
|
|
39
40
|
"completedAt": { "type": "string", "format": "date-time" },
|
|
@@ -77,6 +77,24 @@
|
|
|
77
77
|
"description": "Optional JSON Schema 2020-12 declaring which RunOptions.configurable keys this workflow accepts. When present, hosts MUST validate POST /v1/runs `configurable` payloads against this schema and reject mismatches with `validation_error`. Hosts MUST surface this schema on GET /v1/workflows/{workflowId} so clients can pre-flight-validate. See run-options.md §'Per-workflow configurableSchema'. Additive in v1.1.",
|
|
78
78
|
"type": "object"
|
|
79
79
|
},
|
|
80
|
+
"defaults": {
|
|
81
|
+
"type": "object",
|
|
82
|
+
"additionalProperties": false,
|
|
83
|
+
"description": "RFC 0029 §B. Workflow-author-controlled per-kind fallback values that apply at resolution chain layer 3 (`workflow-defaults`) per `spec/v1/prompts.md` §\"Resolution chain (normative)\". Applied when neither the node (layer 1) nor the node's bound agent (layer 2) specifies a value for the kind. Future RFCs MAY add sibling defaults (e.g., `defaults.temperature`, `defaults.modelClass`) without colliding.",
|
|
84
|
+
"properties": {
|
|
85
|
+
"promptRefs": {
|
|
86
|
+
"type": "object",
|
|
87
|
+
"additionalProperties": false,
|
|
88
|
+
"description": "Per-kind PromptRef fallbacks for layer 3 of the resolution chain.",
|
|
89
|
+
"properties": {
|
|
90
|
+
"system": { "$ref": "./prompt-ref.schema.json" },
|
|
91
|
+
"user": { "$ref": "./prompt-ref.schema.json" },
|
|
92
|
+
"few-shot": { "$ref": "./prompt-ref.schema.json" },
|
|
93
|
+
"schema-hint": { "$ref": "./prompt-ref.schema.json" }
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
},
|
|
80
98
|
"metadata": { "$ref": "#/$defs/WorkflowMetadata" },
|
|
81
99
|
"settings": { "$ref": "#/$defs/WorkflowSettings" },
|
|
82
100
|
"acceptsInheritedArtifacts": {
|
|
@@ -111,7 +129,7 @@
|
|
|
111
129
|
},
|
|
112
130
|
"config": {
|
|
113
131
|
"type": "object",
|
|
114
|
-
"description": "Node configuration (pre-execution constants)."
|
|
132
|
+
"description": "Node configuration (pre-execution constants). The shape is per-typeId — node-pack manifests declare each typeId's `configSchema` for install-time validation. By convention, the keys `systemPromptRef`, `userPromptRef`, and `additionalPromptRefs` MAY hold PromptRef values per `spec/v1/prompts.md` §\"PromptRef\" (RFC 0027). Hosts advertising `capabilities.prompts.supported: true` MUST resolve these keys; hosts without the capability MAY treat them as opaque strings. When both an inline body (e.g., `config.systemPrompt`) and a `*PromptRef` are present, the ref wins and the host MUST emit a `log.appended` warning with `code: \"prompt_ref_supersedes_inline\"` per RFC 0027 §C."
|
|
115
133
|
},
|
|
116
134
|
"inputs": {
|
|
117
135
|
"type": "object",
|
package/src/lib/driver.ts
CHANGED
|
@@ -78,6 +78,21 @@ class OpenWOPDriver {
|
|
|
78
78
|
return this.request('POST', path, { ...init, body });
|
|
79
79
|
}
|
|
80
80
|
|
|
81
|
+
/** PUT helper. The body is JSON-stringified by default; pass a string
|
|
82
|
+
* Content-Type header for raw-body PUTs (e.g. tarball uploads).
|
|
83
|
+
* Production hosts that accept tarball PUTs on /v1/packs/* expect
|
|
84
|
+
* `Content-Type: application/octet-stream`; callers MUST set the
|
|
85
|
+
* header explicitly when uploading non-JSON. */
|
|
86
|
+
put(path: string, body: unknown, init: OpenWOPRequestInit = {}): Promise<OpenWOPResponse> {
|
|
87
|
+
return this.request('PUT', path, { ...init, body });
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/** DELETE alias for the canonical name. Keeps the call-site shorter
|
|
91
|
+
* for scenarios that delete via `driver.del(...)`. */
|
|
92
|
+
del(path: string, init: OpenWOPRequestInit = {}): Promise<OpenWOPResponse> {
|
|
93
|
+
return this.request('DELETE', path, init);
|
|
94
|
+
}
|
|
95
|
+
|
|
81
96
|
delete(path: string, init: OpenWOPRequestInit = {}): Promise<OpenWOPResponse> {
|
|
82
97
|
return this.request('DELETE', path, init);
|
|
83
98
|
}
|
package/src/lib/env.ts
CHANGED
|
@@ -25,6 +25,28 @@
|
|
|
25
25
|
* hosts go strict-mode green without falsifying capability claims.
|
|
26
26
|
* Example for SQLite:
|
|
27
27
|
* OPENWOP_OPTED_OUT_PROFILES=openwop-production,openwop-auth-mtls
|
|
28
|
+
*
|
|
29
|
+
* OPENWOP_OPTED_OUT_FIXTURES — comma-separated fixture ids (or
|
|
30
|
+
* trailing-`*` globs) the host operator has DELIBERATELY chosen
|
|
31
|
+
* not to honor. Applied in `lib/fixtures.ts` by filtering matching
|
|
32
|
+
* entries out of the cached advertised-fixture set, so any
|
|
33
|
+
* scenario gated via `isFixtureAdvertised(...)` skips cleanly.
|
|
34
|
+
* Use when a host auto-loads every `conformance-*.json` on disk
|
|
35
|
+
* (so the fixture id IS in the discovery doc) but the host doesn't
|
|
36
|
+
* implement the gated feature. Symmetric to `OPENWOP_OPTED_OUT_
|
|
37
|
+
* PROFILES` for the fixture-id axis. Example for SQLite:
|
|
38
|
+
* OPENWOP_OPTED_OUT_FIXTURES=conformance-dispatch-*,conformance-subworkflow-input-mapping*
|
|
39
|
+
*
|
|
40
|
+
* OPENWOP_OPTED_OUT_SCENARIOS — comma-separated scenario ids that
|
|
41
|
+
* individual tests consult to skip themselves where neither
|
|
42
|
+
* profile-opt-out nor fixture-opt-out is fine-grained enough
|
|
43
|
+
* (e.g., OTel trace-inheritance across `core.subWorkflow` —
|
|
44
|
+
* `conformance-subworkflow-parent` is correctly advertised because
|
|
45
|
+
* non-OTel subworkflow scenarios pass, but the host doesn't
|
|
46
|
+
* propagate traceparent across the dispatch boundary). Use
|
|
47
|
+
* `isScenarioOptedOut(scenarioId)` from `env.ts` in the test's
|
|
48
|
+
* skip predicate. Reserved for cases where the suite-wide
|
|
49
|
+
* skip mechanisms can't carry the granularity.
|
|
28
50
|
*/
|
|
29
51
|
|
|
30
52
|
export interface ConformanceEnv {
|
|
@@ -84,3 +106,32 @@ export function loadEnv(): ConformanceEnv {
|
|
|
84
106
|
};
|
|
85
107
|
return cached;
|
|
86
108
|
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Returns true when the operator has listed `scenarioId` in
|
|
112
|
+
* `OPENWOP_OPTED_OUT_SCENARIOS`. Use inside a test's `describe.skipIf`
|
|
113
|
+
* predicate when neither profile-opt-out nor fixture-opt-out is
|
|
114
|
+
* granular enough. Logs the skip reason via the caller — this helper
|
|
115
|
+
* is silent so callers can format their own message.
|
|
116
|
+
*
|
|
117
|
+
* Re-reads `process.env` on every call (single env access + split, no
|
|
118
|
+
* cache). Symmetric with `lib/fixtures.ts:loadOptedOutPredicate` which
|
|
119
|
+
* re-reads on every `setAdvertisedFixtures(...)` call — so unit tests
|
|
120
|
+
* can mutate `process.env.OPENWOP_OPTED_OUT_SCENARIOS` between cases
|
|
121
|
+
* without having to invalidate a memoization.
|
|
122
|
+
*/
|
|
123
|
+
export function isScenarioOptedOut(scenarioId: string): boolean {
|
|
124
|
+
const raw = process.env.OPENWOP_OPTED_OUT_SCENARIOS?.trim() ?? '';
|
|
125
|
+
if (raw.length === 0) return false;
|
|
126
|
+
for (const entry of raw.split(',')) {
|
|
127
|
+
if (entry.trim() === scenarioId) return true;
|
|
128
|
+
}
|
|
129
|
+
return false;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/** Test-only: clear the `loadEnv()` memoization so subsequent calls
|
|
133
|
+
* re-read `process.env`. Required for any test that mutates the env
|
|
134
|
+
* vars consumed by `loadEnv()` mid-suite. */
|
|
135
|
+
export function __resetEnvCacheForTests(): void {
|
|
136
|
+
cached = null;
|
|
137
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Driver helpers for the test-only event-log query seam
|
|
3
|
+
* (`GET /v1/host/sample/test/runs/:runId/events`).
|
|
4
|
+
*
|
|
5
|
+
* Used by aiEnvelope engine-projection scenarios that verify the
|
|
6
|
+
* spec-prescribed events the host MUST emit on each envelope outcome
|
|
7
|
+
* (per RFC 0021 §A point 1-7 + interrupt.md + capabilities.md
|
|
8
|
+
* §"cap.breached"). All operations soft-skip on HTTP 404 — hosts
|
|
9
|
+
* without the seam keep the existing advertisement-shape coverage.
|
|
10
|
+
*
|
|
11
|
+
* Reset semantics: callers SHOULD `resetTestSeam()` in their test's
|
|
12
|
+
* `afterEach` (or scope each test to a unique runId) to keep state
|
|
13
|
+
* from leaking across scenarios.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { driver } from './driver.js';
|
|
17
|
+
|
|
18
|
+
export interface TestEvent {
|
|
19
|
+
readonly eventId: string;
|
|
20
|
+
readonly runId: string;
|
|
21
|
+
readonly type: string;
|
|
22
|
+
readonly payload: Record<string, unknown>;
|
|
23
|
+
readonly timestamp: string;
|
|
24
|
+
readonly sequence: number;
|
|
25
|
+
readonly causationId?: string;
|
|
26
|
+
readonly nodeId?: string;
|
|
27
|
+
readonly contentTrust?: 'trusted' | 'untrusted';
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export type QueryOutcome =
|
|
31
|
+
| { ok: true; events: TestEvent[] }
|
|
32
|
+
| { ok: false; reason: 'seam_unavailable' }
|
|
33
|
+
| { ok: false; reason: 'http_error'; status: number };
|
|
34
|
+
|
|
35
|
+
/** Query the test-only event log for a run, with optional filters. */
|
|
36
|
+
export async function queryTestEvents(
|
|
37
|
+
runId: string,
|
|
38
|
+
filter: { type?: string; correlationId?: string; causationId?: string; nodeId?: string } = {},
|
|
39
|
+
): Promise<QueryOutcome> {
|
|
40
|
+
const qs = new URLSearchParams();
|
|
41
|
+
if (filter.type) qs.set('type', filter.type);
|
|
42
|
+
if (filter.correlationId) qs.set('correlationId', filter.correlationId);
|
|
43
|
+
if (filter.causationId) qs.set('causationId', filter.causationId);
|
|
44
|
+
if (filter.nodeId) qs.set('nodeId', filter.nodeId);
|
|
45
|
+
const url = `/v1/host/sample/test/runs/${encodeURIComponent(runId)}/events${qs.toString() ? '?' + qs.toString() : ''}`;
|
|
46
|
+
const res = await driver.get(url);
|
|
47
|
+
if (res.status === 404) return { ok: false, reason: 'seam_unavailable' };
|
|
48
|
+
if (res.status !== 200) return { ok: false, reason: 'http_error', status: res.status };
|
|
49
|
+
const body = res.json as { events?: TestEvent[] };
|
|
50
|
+
return { ok: true, events: body.events ?? [] };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** Reset the test-only event log + capability overlay (suite teardown). */
|
|
54
|
+
export async function resetTestSeam(): Promise<void> {
|
|
55
|
+
await driver.post('/v1/host/sample/test/reset', {});
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** Probe whether the seam is exposed. Use to soft-skip early. */
|
|
59
|
+
export async function isEventLogSeamAvailable(): Promise<boolean> {
|
|
60
|
+
const res = await queryTestEvents('__probe__');
|
|
61
|
+
return res.ok;
|
|
62
|
+
}
|
package/src/lib/fixtures.ts
CHANGED
|
@@ -26,6 +26,16 @@
|
|
|
26
26
|
* This module is sync. The async fetch lives in `setup.ts` which calls
|
|
27
27
|
* `setAdvertisedFixtures(...)` from a top-level `await`.
|
|
28
28
|
*
|
|
29
|
+
* Honest opt-out (symmetric to `OPENWOP_OPTED_OUT_PROFILES`):
|
|
30
|
+
* `OPENWOP_OPTED_OUT_FIXTURES` (CSV, supports trailing `*` glob)
|
|
31
|
+
* subtracts matching fixture-ids from the cached set even when the
|
|
32
|
+
* host advertises them. Operators use this when the host happens to
|
|
33
|
+
* carry a fixture file (e.g., it auto-loads every `conformance-*.json`
|
|
34
|
+
* on disk) but does NOT implement the underlying feature — so the
|
|
35
|
+
* gated scenario should skip instead of running and failing. The
|
|
36
|
+
* subtraction happens at cache-population time, so the predicate
|
|
37
|
+
* remains a single sync set lookup at scenario-evaluation time.
|
|
38
|
+
*
|
|
29
39
|
* @see spec/v1/capabilities.md §`fixtures`
|
|
30
40
|
* @see spec/v1/profiles.md §`openwop-fixtures`
|
|
31
41
|
* @see RFCS/0003-fixture-gating.md
|
|
@@ -35,19 +45,46 @@ import type { DiscoveryPayload } from './profiles.js';
|
|
|
35
45
|
|
|
36
46
|
let _advertisedFixtures: ReadonlySet<string> | null = null;
|
|
37
47
|
|
|
48
|
+
/**
|
|
49
|
+
* Parse `OPENWOP_OPTED_OUT_FIXTURES` into a match predicate. Each entry
|
|
50
|
+
* is either an exact id or a glob with a trailing `*`. Returns a
|
|
51
|
+
* function that answers "is this fixture-id opted out?" — empty / unset
|
|
52
|
+
* env reduces to "always false."
|
|
53
|
+
*/
|
|
54
|
+
function loadOptedOutPredicate(): (id: string) => boolean {
|
|
55
|
+
const raw = process.env.OPENWOP_OPTED_OUT_FIXTURES?.trim() ?? '';
|
|
56
|
+
if (raw.length === 0) return () => false;
|
|
57
|
+
const exact = new Set<string>();
|
|
58
|
+
const prefixes: string[] = [];
|
|
59
|
+
for (const entry of raw.split(',').map((s) => s.trim()).filter((s) => s.length > 0)) {
|
|
60
|
+
if (entry.endsWith('*')) {
|
|
61
|
+
prefixes.push(entry.slice(0, -1));
|
|
62
|
+
} else {
|
|
63
|
+
exact.add(entry);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return (id) => exact.has(id) || prefixes.some((p) => id.startsWith(p));
|
|
67
|
+
}
|
|
68
|
+
|
|
38
69
|
/**
|
|
39
70
|
* Populate the cache from a discovery-doc payload. The function is
|
|
40
71
|
* tolerant of malformed inputs — anything other than a string array
|
|
41
72
|
* collapses to "no fixtures advertised" rather than throwing, so the
|
|
42
73
|
* suite remains resilient against host bugs in the discovery surface.
|
|
74
|
+
*
|
|
75
|
+
* Applies `OPENWOP_OPTED_OUT_FIXTURES` at this step: opted-out ids are
|
|
76
|
+
* filtered out of the cache before storage so downstream lookups can
|
|
77
|
+
* stay a single sync set-membership test.
|
|
43
78
|
*/
|
|
44
79
|
export function setAdvertisedFixtures(c: DiscoveryPayload | null | undefined): void {
|
|
45
80
|
if (c == null || !Array.isArray(c.fixtures)) {
|
|
46
81
|
_advertisedFixtures = new Set();
|
|
47
82
|
return;
|
|
48
83
|
}
|
|
84
|
+
const isOptedOut = loadOptedOutPredicate();
|
|
49
85
|
const ids = c.fixtures.filter(
|
|
50
|
-
(entry): entry is string =>
|
|
86
|
+
(entry): entry is string =>
|
|
87
|
+
typeof entry === 'string' && entry.length > 0 && !isOptedOut(entry),
|
|
51
88
|
);
|
|
52
89
|
_advertisedFixtures = new Set(ids);
|
|
53
90
|
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Capability-toggle harness primitive — driver helper for the
|
|
3
|
+
* env-gated test-seam endpoint at
|
|
4
|
+
* `POST /v1/host/sample/test/capability-toggle`.
|
|
5
|
+
*
|
|
6
|
+
* Lets refusal-case scenarios (RFC 0022 §C HVMAP-1a-refusal,
|
|
7
|
+
* HVMAP-2-refusal, etc.) flip a capability flag off temporarily,
|
|
8
|
+
* exercise the host's refusal path, then restore the default.
|
|
9
|
+
*
|
|
10
|
+
* All operations soft-skip on HTTP 404 — hosts that don't expose the
|
|
11
|
+
* seam keep the existing advertisement-shape coverage intact.
|
|
12
|
+
*
|
|
13
|
+
* Reset semantics: callers MUST `resetHostCapabilities()` in their
|
|
14
|
+
* test's `afterEach` (or equivalent) to keep state from leaking
|
|
15
|
+
* across scenarios.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { driver } from './driver.js';
|
|
19
|
+
|
|
20
|
+
export type ToggleOutcome =
|
|
21
|
+
| { ok: true; overlay: Record<string, boolean> }
|
|
22
|
+
| { ok: false; reason: 'seam_unavailable' }
|
|
23
|
+
| { ok: false; reason: 'http_error'; status: number };
|
|
24
|
+
|
|
25
|
+
/** Set a capability flag's overlay value. `value: null` removes the
|
|
26
|
+
* overlay entry (restoring the host's hard-coded default). */
|
|
27
|
+
export async function setHostCapability(
|
|
28
|
+
name: string,
|
|
29
|
+
value: boolean | null,
|
|
30
|
+
): Promise<ToggleOutcome> {
|
|
31
|
+
const res = await driver.post('/v1/host/sample/test/capability-toggle', { name, value });
|
|
32
|
+
if (res.status === 404) return { ok: false, reason: 'seam_unavailable' };
|
|
33
|
+
if (res.status !== 200) return { ok: false, reason: 'http_error', status: res.status };
|
|
34
|
+
const body = res.json as { overlay?: Record<string, boolean> };
|
|
35
|
+
return { ok: true, overlay: body.overlay ?? {} };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/** Clear ALL capability overlay entries on the host. */
|
|
39
|
+
export async function resetHostCapabilities(): Promise<ToggleOutcome> {
|
|
40
|
+
const res = await driver.post('/v1/host/sample/test/capability-toggle', { reset: true });
|
|
41
|
+
if (res.status === 404) return { ok: false, reason: 'seam_unavailable' };
|
|
42
|
+
if (res.status !== 200) return { ok: false, reason: 'http_error', status: res.status };
|
|
43
|
+
const body = res.json as { overlay?: Record<string, boolean> };
|
|
44
|
+
return { ok: true, overlay: body.overlay ?? {} };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/** Probe whether the host exposes the capability-toggle seam at all.
|
|
48
|
+
* Use this to soft-skip a scenario early when the host lacks the
|
|
49
|
+
* toggle (the refusal contract is still spec-normative; the test just
|
|
50
|
+
* can't drive it from outside). */
|
|
51
|
+
export async function isToggleAvailable(): Promise<boolean> {
|
|
52
|
+
const probe = await setHostCapability('__probe__', null);
|
|
53
|
+
return probe.ok;
|
|
54
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared helpers for the LLM cache-key recipe per `spec/v1/replay.md`
|
|
3
|
+
* §"LLM cache-key recipe" §A + §B.
|
|
4
|
+
*
|
|
5
|
+
* Used by:
|
|
6
|
+
* - `conformance/src/scenarios/replay-llm-cache-key.test.ts` — single-host
|
|
7
|
+
* recipe assertions + non-recipe-field invariance + (gated)
|
|
8
|
+
* cross-host parity via OPENWOP_BASE_URL_B.
|
|
9
|
+
* - `conformance/src/scenarios/replay-llm-cache-key-portable.test.ts` —
|
|
10
|
+
* RFC 0041 §E SECURITY-invariant probe (intra-host reproducibility +
|
|
11
|
+
* non-recipe-field invariance + Phase 4 advertisement alignment).
|
|
12
|
+
*
|
|
13
|
+
* `canonicalize` mirrors RFC 8785 JCS-style output (sorted keys, no
|
|
14
|
+
* whitespace, preserved array order). Hosts that have a real JCS library
|
|
15
|
+
* available SHOULD prefer it; this helper is for the conformance side,
|
|
16
|
+
* not the host side. Keep in sync with `spec/v1/replay.md` §B.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { createHash } from 'node:crypto';
|
|
20
|
+
import { driver } from './driver.js';
|
|
21
|
+
|
|
22
|
+
/** RFC 8785 JCS-style canonicalization (subset suitable for the recipe
|
|
23
|
+
* fields). Sorted keys recursively; no whitespace; preserved array order;
|
|
24
|
+
* strings JSON-encoded verbatim (no NFC normalization — the recipe
|
|
25
|
+
* inputs in our test seam are ASCII). */
|
|
26
|
+
export function canonicalize(value: unknown): string {
|
|
27
|
+
if (value === null) return 'null';
|
|
28
|
+
if (typeof value === 'boolean' || typeof value === 'number') return JSON.stringify(value);
|
|
29
|
+
if (typeof value === 'string') return JSON.stringify(value);
|
|
30
|
+
if (Array.isArray(value)) return '[' + value.map((v) => canonicalize(v)).join(',') + ']';
|
|
31
|
+
if (typeof value === 'object') {
|
|
32
|
+
const obj = value as Record<string, unknown>;
|
|
33
|
+
const keys = Object.keys(obj).sort();
|
|
34
|
+
return '{' + keys.map((k) => `${JSON.stringify(k)}:${canonicalize(obj[k])}`).join(',') + '}';
|
|
35
|
+
}
|
|
36
|
+
return JSON.stringify(value);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** Project a raw recipe-input object to the closed set of fields per
|
|
40
|
+
* `replay.md` §A — omit absent optionals (do NOT emit null/default
|
|
41
|
+
* placeholders), sort tools[] by name. */
|
|
42
|
+
export function projectRecipe(raw: Record<string, unknown>): Record<string, unknown> {
|
|
43
|
+
const out: Record<string, unknown> = { provider: raw.provider, model: raw.model, messages: raw.messages };
|
|
44
|
+
if (Array.isArray(raw.tools) && raw.tools.length > 0) {
|
|
45
|
+
out.tools = [...(raw.tools as Array<{ name: string }>)].sort((a, b) => a.name.localeCompare(b.name));
|
|
46
|
+
}
|
|
47
|
+
if (typeof raw.temperature === 'number') out.temperature = raw.temperature;
|
|
48
|
+
if (typeof raw.topP === 'number') out.topP = raw.topP;
|
|
49
|
+
if (typeof raw.topK === 'number') out.topK = raw.topK;
|
|
50
|
+
if (raw.responseFormat && typeof raw.responseFormat === 'object') out.responseFormat = raw.responseFormat;
|
|
51
|
+
return out;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Compute the canonical LLM cache key per `replay.md` §B:
|
|
55
|
+
* SHA-256(canonicalize(projectRecipe(input))) → lowercase hex. */
|
|
56
|
+
export function expectedCacheKey(input: Record<string, unknown>): string {
|
|
57
|
+
return createHash('sha256').update(canonicalize(projectRecipe(input)), 'utf8').digest('hex');
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/** Drive the host's `POST /v1/host/sample/test/llm-cache-key` test seam.
|
|
61
|
+
* Returns the host's emitted cacheKey when the seam responds 200; status
|
|
62
|
+
* alone when the seam returns 404 (host doesn't expose the seam → caller
|
|
63
|
+
* soft-skips). */
|
|
64
|
+
export async function callCacheKeySeam(input: Record<string, unknown>): Promise<{ status: number; cacheKey?: string }> {
|
|
65
|
+
const res = await driver.post('/v1/host/sample/test/llm-cache-key', input);
|
|
66
|
+
const cacheKey = (res.json as { cacheKey?: string }).cacheKey;
|
|
67
|
+
return cacheKey !== undefined ? { status: res.status, cacheKey } : { status: res.status };
|
|
68
|
+
}
|
|
@@ -37,6 +37,9 @@ interface AgentCaps {
|
|
|
37
37
|
| {
|
|
38
38
|
verbosity: 'summary' | 'full' | 'off' | undefined;
|
|
39
39
|
tokenLimit: number | undefined;
|
|
40
|
+
/** RFC 0024. When true, host may emit `agent.reasoning.delta`
|
|
41
|
+
* events in addition to the closing `agent.reasoned`. */
|
|
42
|
+
streaming: boolean;
|
|
40
43
|
}
|
|
41
44
|
| undefined;
|
|
42
45
|
}
|
|
@@ -84,6 +87,7 @@ export function setMultiAgentCapabilities(c: DiscoveryPayload | null | undefined
|
|
|
84
87
|
typeof (reasoningRaw as Record<string, unknown>).tokenLimit === 'number'
|
|
85
88
|
? ((reasoningRaw as Record<string, unknown>).tokenLimit as number)
|
|
86
89
|
: undefined,
|
|
90
|
+
streaming: asBoolean((reasoningRaw as Record<string, unknown>).streaming),
|
|
87
91
|
}
|
|
88
92
|
: undefined;
|
|
89
93
|
_agentCaps = {
|
|
@@ -113,6 +117,12 @@ export function getReasoningVerbosity(): 'summary' | 'full' | 'off' | undefined
|
|
|
113
117
|
return _agentCaps?.reasoning?.verbosity;
|
|
114
118
|
}
|
|
115
119
|
|
|
120
|
+
/** RFC 0024 — host emits incremental `agent.reasoning.delta` events
|
|
121
|
+
* while a reasoning block is still open. */
|
|
122
|
+
export function isReasoningStreamingSupported(): boolean {
|
|
123
|
+
return _agentCaps?.reasoning?.streaming === true;
|
|
124
|
+
}
|
|
125
|
+
|
|
116
126
|
/** Phase 2 — host supports the named modelClass. */
|
|
117
127
|
export function hasModelClass(modelClass: string): boolean {
|
|
118
128
|
return _agentCaps?.modelClasses.has(modelClass) === true;
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Driver helpers for the OTel + debug-bundle test seams (E.2 + E.3).
|
|
3
|
+
*
|
|
4
|
+
* Used by aiEnvelope + cost-attribution scenarios that need to verify
|
|
5
|
+
* span-attribute redaction (no BYOK canary in OTel attributes) and
|
|
6
|
+
* debug-bundle export shape.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { driver } from './driver.js';
|
|
10
|
+
|
|
11
|
+
export interface TestSpan {
|
|
12
|
+
readonly spanId: string;
|
|
13
|
+
readonly name: string;
|
|
14
|
+
readonly attributes: Record<string, string | number | boolean>;
|
|
15
|
+
readonly envelopeId?: string;
|
|
16
|
+
readonly runId?: string;
|
|
17
|
+
readonly timestamp: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface DebugBundle {
|
|
21
|
+
readonly runId: string;
|
|
22
|
+
readonly events: unknown[];
|
|
23
|
+
readonly spans: TestSpan[];
|
|
24
|
+
readonly exportedAt: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export type ScrapeOutcome<T> =
|
|
28
|
+
| { ok: true; data: T }
|
|
29
|
+
| { ok: false; reason: 'seam_unavailable' }
|
|
30
|
+
| { ok: false; reason: 'http_error'; status: number };
|
|
31
|
+
|
|
32
|
+
export async function queryTestSpans(
|
|
33
|
+
filter: { envelopeId?: string; runId?: string; name?: string } = {},
|
|
34
|
+
): Promise<ScrapeOutcome<TestSpan[]>> {
|
|
35
|
+
const qs = new URLSearchParams();
|
|
36
|
+
if (filter.envelopeId) qs.set('envelopeId', filter.envelopeId);
|
|
37
|
+
if (filter.runId) qs.set('runId', filter.runId);
|
|
38
|
+
if (filter.name) qs.set('name', filter.name);
|
|
39
|
+
const url = `/v1/host/sample/test/otel/spans${qs.toString() ? '?' + qs.toString() : ''}`;
|
|
40
|
+
const res = await driver.get(url);
|
|
41
|
+
if (res.status === 404) return { ok: false, reason: 'seam_unavailable' };
|
|
42
|
+
if (res.status !== 200) return { ok: false, reason: 'http_error', status: res.status };
|
|
43
|
+
const body = res.json as { spans?: TestSpan[] };
|
|
44
|
+
return { ok: true, data: body.spans ?? [] };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export async function exportDebugBundle(runId: string): Promise<ScrapeOutcome<DebugBundle>> {
|
|
48
|
+
const res = await driver.post('/v1/host/sample/test/debug-bundle/export', { runId });
|
|
49
|
+
if (res.status === 404) return { ok: false, reason: 'seam_unavailable' };
|
|
50
|
+
if (res.status !== 200) return { ok: false, reason: 'http_error', status: res.status };
|
|
51
|
+
const body = res.json as { bundle?: DebugBundle };
|
|
52
|
+
if (!body.bundle) return { ok: false, reason: 'http_error', status: 500 };
|
|
53
|
+
return { ok: true, data: body.bundle };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export async function isOtelSeamAvailable(): Promise<boolean> {
|
|
57
|
+
const res = await queryTestSpans({ runId: '__probe__' });
|
|
58
|
+
return res.ok;
|
|
59
|
+
}
|