@openwop/openwop-conformance 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +241 -0
- package/api/asyncapi.yaml +481 -0
- package/api/openapi.yaml +830 -0
- package/api/redocly.yaml +8 -0
- package/coverage.md +80 -0
- package/dist/cli.js +161 -0
- package/fixtures/conformance-a2a-task-roundtrip.json +27 -0
- package/fixtures/conformance-agent-identity.json +27 -0
- package/fixtures/conformance-agent-low-confidence.json +29 -0
- package/fixtures/conformance-agent-memory-cross-tenant.json +28 -0
- package/fixtures/conformance-agent-memory-redaction.json +32 -0
- package/fixtures/conformance-agent-memory-roundtrip.json +32 -0
- package/fixtures/conformance-agent-memory-ttl.json +31 -0
- package/fixtures/conformance-agent-pack-export.json +26 -0
- package/fixtures/conformance-agent-pack-install.json +26 -0
- package/fixtures/conformance-agent-pack-provenance.json +31 -0
- package/fixtures/conformance-agent-reasoning.json +29 -0
- package/fixtures/conformance-approval.json +27 -0
- package/fixtures/conformance-cancellable.json +33 -0
- package/fixtures/conformance-cap-breach.json +27 -0
- package/fixtures/conformance-capability-missing.json +23 -0
- package/fixtures/conformance-channel-ttl.json +60 -0
- package/fixtures/conformance-clarification.json +30 -0
- package/fixtures/conformance-conversation-capability-negotiation.json +23 -0
- package/fixtures/conformance-conversation-lifecycle.json +32 -0
- package/fixtures/conformance-conversation-replay.json +33 -0
- package/fixtures/conformance-conversation-vs-clarification.json +26 -0
- package/fixtures/conformance-delay.json +33 -0
- package/fixtures/conformance-dispatch-loop.json +38 -0
- package/fixtures/conformance-failure.json +23 -0
- package/fixtures/conformance-idempotent.json +30 -0
- package/fixtures/conformance-identity.json +32 -0
- package/fixtures/conformance-interrupt-auth-required.json +28 -0
- package/fixtures/conformance-interrupt-external-event.json +33 -0
- package/fixtures/conformance-interrupt-parent-child-cancel-child.json +27 -0
- package/fixtures/conformance-interrupt-parent-child-cancel.json +26 -0
- package/fixtures/conformance-interrupt-quorum.json +30 -0
- package/fixtures/conformance-mcp-tool-roundtrip.json +32 -0
- package/fixtures/conformance-message-reducer.json +31 -0
- package/fixtures/conformance-multi-node.json +21 -0
- package/fixtures/conformance-noop.json +23 -0
- package/fixtures/conformance-orchestrator-dispatch.json +47 -0
- package/fixtures/conformance-orchestrator-low-confidence.json +41 -0
- package/fixtures/conformance-orchestrator-terminate.json +44 -0
- package/fixtures/conformance-stream-text.json +26 -0
- package/fixtures/conformance-subworkflow-child.json +21 -0
- package/fixtures/conformance-subworkflow-parent.json +49 -0
- package/fixtures/conformance-version-fold.json +23 -0
- package/fixtures/conformance-wasm-pack-roundtrip.json +25 -0
- package/fixtures/pack-manifests/pack-private-example.json +26 -0
- package/fixtures.md +404 -0
- package/package.json +48 -0
- package/schemas/README.md +75 -0
- package/schemas/agent-manifest.schema.json +107 -0
- package/schemas/agent-ref.schema.json +53 -0
- package/schemas/capabilities.schema.json +287 -0
- package/schemas/channel-written-payload.schema.json +55 -0
- package/schemas/conversation-event.schema.json +120 -0
- package/schemas/conversation-turn.schema.json +72 -0
- package/schemas/debug-bundle.schema.json +196 -0
- package/schemas/dispatch-config.schema.json +46 -0
- package/schemas/error-envelope.schema.json +25 -0
- package/schemas/memory-entry.schema.json +36 -0
- package/schemas/memory-list-options.schema.json +21 -0
- package/schemas/node-pack-manifest.schema.json +235 -0
- package/schemas/orchestrator-decision.schema.json +60 -0
- package/schemas/run-event-payloads.schema.json +663 -0
- package/schemas/run-event.schema.json +116 -0
- package/schemas/run-options.schema.json +81 -0
- package/schemas/run-orchestrator-decided-event.schema.json +20 -0
- package/schemas/run-snapshot.schema.json +121 -0
- package/schemas/suspend-request.schema.json +182 -0
- package/schemas/workflow-definition.schema.json +430 -0
- package/src/cli.ts +187 -0
- package/src/lib/a2a-fake-peer.ts +233 -0
- package/src/lib/canaries.ts +186 -0
- package/src/lib/driver.ts +96 -0
- package/src/lib/env.ts +49 -0
- package/src/lib/fixtures.ts +93 -0
- package/src/lib/mcp-fake-server.ts +185 -0
- package/src/lib/multi-agent-capabilities.ts +155 -0
- package/src/lib/multiProcess.ts +141 -0
- package/src/lib/otel-collector.ts +312 -0
- package/src/lib/paths.ts +198 -0
- package/src/lib/polling.ts +81 -0
- package/src/lib/profiles.ts +258 -0
- package/src/lib/sse.ts +172 -0
- package/src/scenarios/a2a-task-roundtrip.test.ts +149 -0
- package/src/scenarios/agentConfidenceEscalation.test.ts +61 -0
- package/src/scenarios/agentMemoryCrossTenantIsolation.test.ts +54 -0
- package/src/scenarios/agentMemoryRedactionContract.test.ts +46 -0
- package/src/scenarios/agentMemoryRoundTrip.test.ts +52 -0
- package/src/scenarios/agentMemoryTtlExpiry.test.ts +47 -0
- package/src/scenarios/agentMessageReducer.test.ts +57 -0
- package/src/scenarios/agentMetadata.test.ts +56 -0
- package/src/scenarios/agentPackExport.test.ts +45 -0
- package/src/scenarios/agentPackInstall.test.ts +50 -0
- package/src/scenarios/agentPackProvenance.test.ts +53 -0
- package/src/scenarios/agentReasoningEvents.test.ts +72 -0
- package/src/scenarios/append-ordering.test.ts +91 -0
- package/src/scenarios/approval-payload.test.ts +120 -0
- package/src/scenarios/audit-log-integrity.test.ts +106 -0
- package/src/scenarios/auth.test.ts +55 -0
- package/src/scenarios/byok-roundtrip.test.ts +166 -0
- package/src/scenarios/cancellation.test.ts +68 -0
- package/src/scenarios/cap-breach.test.ts +149 -0
- package/src/scenarios/channel-ttl.test.ts +70 -0
- package/src/scenarios/configurable-schema.test.ts +76 -0
- package/src/scenarios/conversationCapabilityNegotiation.test.ts +39 -0
- package/src/scenarios/conversationLifecycle.test.ts +64 -0
- package/src/scenarios/conversationReplayDeterminism.test.ts +52 -0
- package/src/scenarios/conversationVsLegacySuspend.test.ts +46 -0
- package/src/scenarios/cost-attribution.test.ts +207 -0
- package/src/scenarios/debugBundle.test.ts +222 -0
- package/src/scenarios/discovery.test.ts +147 -0
- package/src/scenarios/dispatchLoop.test.ts +52 -0
- package/src/scenarios/errors.test.ts +144 -0
- package/src/scenarios/eventOrdering.test.ts +144 -0
- package/src/scenarios/failure-path.test.ts +46 -0
- package/src/scenarios/fixtures-gating.test.ts +137 -0
- package/src/scenarios/fixtures-valid.test.ts +140 -0
- package/src/scenarios/highConcurrency.test.ts +263 -0
- package/src/scenarios/idempotency.test.ts +83 -0
- package/src/scenarios/idempotencyRetry.test.ts +130 -0
- package/src/scenarios/identity-passthrough.test.ts +54 -0
- package/src/scenarios/interrupt-approval.test.ts +97 -0
- package/src/scenarios/interrupt-auth-required-resume.test.ts +88 -0
- package/src/scenarios/interrupt-clarification.test.ts +45 -0
- package/src/scenarios/interrupt-external-event-correlation.test.ts +113 -0
- package/src/scenarios/interrupt-parent-child-cascade.test.ts +102 -0
- package/src/scenarios/interrupt-quorum-resolution.test.ts +97 -0
- package/src/scenarios/interruptRace.test.ts +176 -0
- package/src/scenarios/maliciousManifest.test.ts +154 -0
- package/src/scenarios/mcp-discoverability.test.ts +129 -0
- package/src/scenarios/mcp-tool-roundtrip.test.ts +149 -0
- package/src/scenarios/multi-node-ordering.test.ts +60 -0
- package/src/scenarios/multi-region-idempotency.test.ts +52 -0
- package/src/scenarios/orchestratorConservativePath.test.ts +63 -0
- package/src/scenarios/orchestratorDispatch.test.ts +66 -0
- package/src/scenarios/orchestratorTermination.test.ts +54 -0
- package/src/scenarios/otel-emission.test.ts +113 -0
- package/src/scenarios/otel-trace-propagation.test.ts +90 -0
- package/src/scenarios/pack-registry-publish.test.ts +93 -0
- package/src/scenarios/pack-registry.test.ts +328 -0
- package/src/scenarios/pause-resume.test.ts +109 -0
- package/src/scenarios/policies.test.ts +162 -0
- package/src/scenarios/profileDerivation.test.ts +335 -0
- package/src/scenarios/providerPolicyEnforcement.test.ts +132 -0
- package/src/scenarios/rate-limit-envelope.test.ts +97 -0
- package/src/scenarios/redaction.test.ts +254 -0
- package/src/scenarios/redactionAdversarial.test.ts +162 -0
- package/src/scenarios/replay-fork-arbitrary.test.ts +347 -0
- package/src/scenarios/replay-fork.test.ts +216 -0
- package/src/scenarios/replayDeterminism.test.ts +171 -0
- package/src/scenarios/route-coverage.test.ts +129 -0
- package/src/scenarios/runs-lifecycle.test.ts +65 -0
- package/src/scenarios/runtime-capabilities.test.ts +118 -0
- package/src/scenarios/spec-corpus-validity.test.ts +1257 -0
- package/src/scenarios/staleClaim.test.ts +223 -0
- package/src/scenarios/stream-modes-buffer.test.ts +148 -0
- package/src/scenarios/stream-modes-mixed.test.ts +149 -0
- package/src/scenarios/stream-modes.test.ts +139 -0
- package/src/scenarios/streamReconnect.test.ts +162 -0
- package/src/scenarios/subworkflow.test.ts +126 -0
- package/src/scenarios/version-negotiation.test.ts +157 -0
- package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +47 -0
- package/src/scenarios/wasm-pack-invoke-completed.test.ts +69 -0
- package/src/scenarios/wasm-pack-invoke-suspended.test.ts +74 -0
- package/src/scenarios/wasm-pack-load.test.ts +75 -0
- package/src/scenarios/wasm-pack-memory-cap.test.ts +43 -0
- package/src/scenarios/wasm-pack-replay-determinism.test.ts +61 -0
- package/src/scenarios/webhook-sig-algorithm.test.ts +61 -0
- package/src/setup.ts +173 -0
- package/vitest.config.ts +17 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Track 6: in-process synthetic A2A peer for state-projection conformance.
|
|
3
|
+
*
|
|
4
|
+
* The A2A protocol (https://a2a-protocol.org/) defines an `AgentCard` for
|
|
5
|
+
* discovery plus a Task lifecycle whose `TaskState` enum drives most of
|
|
6
|
+
* the conformance burden. We expose just enough of the HTTP+JSON
|
|
7
|
+
* transport to let conformance scenarios drive the host through the
|
|
8
|
+
* four documented drift points from `spec/v1/a2a-integration.md`
|
|
9
|
+
* §"State projection".
|
|
10
|
+
*
|
|
11
|
+
* Endpoints (minimal):
|
|
12
|
+
* GET /agent.json — AgentCard
|
|
13
|
+
* POST /tasks — create a task; returns { taskId, state: 'SUBMITTED' }
|
|
14
|
+
* GET /tasks/{taskId} — poll task state + last message
|
|
15
|
+
* POST /tasks/{taskId}/messages — append a message (used by host to resume an INPUT_REQUIRED task)
|
|
16
|
+
*
|
|
17
|
+
* Test fixtures set the *next* state transition via `setNextState(...)`
|
|
18
|
+
* so a single scenario can walk the peer through SUBMITTED → WORKING →
|
|
19
|
+
* INPUT_REQUIRED → COMPLETED (or AUTH_REQUIRED, or REJECTED) without
|
|
20
|
+
* implementing a real agent.
|
|
21
|
+
*
|
|
22
|
+
* @see spec/v1/a2a-integration.md §"State projection"
|
|
23
|
+
* @see https://a2a-protocol.org/latest/specification/
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
import { createServer, type Server } from 'node:http';
|
|
27
|
+
import type { AddressInfo } from 'node:net';
|
|
28
|
+
|
|
29
|
+
export type A2ATaskState =
|
|
30
|
+
| 'UNSPECIFIED'
|
|
31
|
+
| 'SUBMITTED'
|
|
32
|
+
| 'WORKING'
|
|
33
|
+
| 'INPUT_REQUIRED'
|
|
34
|
+
| 'AUTH_REQUIRED'
|
|
35
|
+
| 'COMPLETED'
|
|
36
|
+
| 'FAILED'
|
|
37
|
+
| 'CANCELED'
|
|
38
|
+
| 'REJECTED';
|
|
39
|
+
|
|
40
|
+
interface A2ATask {
|
|
41
|
+
taskId: string;
|
|
42
|
+
state: A2ATaskState;
|
|
43
|
+
messages: Array<{ role: 'user' | 'agent'; content: unknown }>;
|
|
44
|
+
metadata?: Record<string, unknown>;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export interface A2APeerInvocation {
|
|
48
|
+
readonly method: string;
|
|
49
|
+
readonly path: string;
|
|
50
|
+
readonly body: unknown;
|
|
51
|
+
readonly timestamp: number;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export class A2AFakePeer {
|
|
55
|
+
private _server: Server | null = null;
|
|
56
|
+
private _boundPort = 0;
|
|
57
|
+
private readonly _tasks = new Map<string, A2ATask>();
|
|
58
|
+
private readonly _invocations: A2APeerInvocation[] = [];
|
|
59
|
+
private _nextStateOverride: A2ATaskState | null = null;
|
|
60
|
+
private _taskIdCounter = 0;
|
|
61
|
+
|
|
62
|
+
async start(port: number = 0): Promise<void> {
|
|
63
|
+
return new Promise((resolve, reject) => {
|
|
64
|
+
const server = createServer((req, res) => this._handle(req, res));
|
|
65
|
+
server.on('error', reject);
|
|
66
|
+
server.listen(port, '127.0.0.1', () => {
|
|
67
|
+
const addr = server.address() as AddressInfo;
|
|
68
|
+
this._server = server;
|
|
69
|
+
this._boundPort = addr.port;
|
|
70
|
+
resolve();
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
async stop(): Promise<void> {
|
|
76
|
+
if (!this._server) return;
|
|
77
|
+
const server = this._server;
|
|
78
|
+
this._server = null;
|
|
79
|
+
return new Promise((resolve, reject) => {
|
|
80
|
+
server.close((err) => (err ? reject(err) : resolve()));
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
endpoint(): string {
|
|
85
|
+
return `http://127.0.0.1:${this._boundPort}`;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
reset(): void {
|
|
89
|
+
this._tasks.clear();
|
|
90
|
+
this._invocations.length = 0;
|
|
91
|
+
this._nextStateOverride = null;
|
|
92
|
+
this._taskIdCounter = 0;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
invocations(): readonly A2APeerInvocation[] {
|
|
96
|
+
return this._invocations;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
taskCount(): number {
|
|
100
|
+
return this._tasks.size;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Force the next task created to immediately transition to this state.
|
|
105
|
+
* Used by drift-point scenarios to drive AUTH_REQUIRED / REJECTED /
|
|
106
|
+
* INPUT_REQUIRED paths deterministically.
|
|
107
|
+
*/
|
|
108
|
+
setNextState(state: A2ATaskState | null): void {
|
|
109
|
+
this._nextStateOverride = state;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/** Advance an existing task to a new state. Used by host-mediated tests. */
|
|
113
|
+
advanceTask(taskId: string, state: A2ATaskState): boolean {
|
|
114
|
+
const task = this._tasks.get(taskId);
|
|
115
|
+
if (!task) return false;
|
|
116
|
+
task.state = state;
|
|
117
|
+
return true;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
getTask(taskId: string): Readonly<A2ATask> | undefined {
|
|
121
|
+
return this._tasks.get(taskId);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
private async _handle(
|
|
125
|
+
req: import('node:http').IncomingMessage,
|
|
126
|
+
res: import('node:http').ServerResponse,
|
|
127
|
+
): Promise<void> {
|
|
128
|
+
const url = req.url ?? '/';
|
|
129
|
+
const chunks: Buffer[] = [];
|
|
130
|
+
for await (const c of req) chunks.push(c as Buffer);
|
|
131
|
+
const bodyText = Buffer.concat(chunks).toString('utf8');
|
|
132
|
+
let body: unknown = null;
|
|
133
|
+
if (bodyText.length > 0) {
|
|
134
|
+
try {
|
|
135
|
+
body = JSON.parse(bodyText);
|
|
136
|
+
} catch {
|
|
137
|
+
body = bodyText;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
this._invocations.push({
|
|
142
|
+
method: req.method ?? 'GET',
|
|
143
|
+
path: url,
|
|
144
|
+
body,
|
|
145
|
+
timestamp: Date.now(),
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
// GET /agent.json
|
|
149
|
+
if (req.method === 'GET' && url.startsWith('/agent.json')) {
|
|
150
|
+
const card = {
|
|
151
|
+
protocolVersion: '0.3.0',
|
|
152
|
+
name: 'openwop-conformance-fake-a2a',
|
|
153
|
+
description: 'Synthetic A2A peer for openwop conformance suite',
|
|
154
|
+
url: this.endpoint(),
|
|
155
|
+
version: '1.0.0',
|
|
156
|
+
capabilities: { streaming: false },
|
|
157
|
+
skills: [
|
|
158
|
+
{
|
|
159
|
+
id: 'echo',
|
|
160
|
+
name: 'echo',
|
|
161
|
+
description: 'Returns input verbatim',
|
|
162
|
+
},
|
|
163
|
+
],
|
|
164
|
+
};
|
|
165
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
166
|
+
res.end(JSON.stringify(card));
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// POST /tasks — create task
|
|
171
|
+
if (req.method === 'POST' && url === '/tasks') {
|
|
172
|
+
const taskId = `task-${++this._taskIdCounter}`;
|
|
173
|
+
const initial: A2ATaskState = this._nextStateOverride ?? 'SUBMITTED';
|
|
174
|
+
this._nextStateOverride = null;
|
|
175
|
+
const task: A2ATask = {
|
|
176
|
+
taskId,
|
|
177
|
+
state: initial,
|
|
178
|
+
messages: body && typeof body === 'object' ? [{ role: 'user', content: body }] : [],
|
|
179
|
+
};
|
|
180
|
+
this._tasks.set(taskId, task);
|
|
181
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
182
|
+
res.end(JSON.stringify({ taskId, state: task.state }));
|
|
183
|
+
return;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// GET /tasks/{taskId}
|
|
187
|
+
const getMatch = url.match(/^\/tasks\/([^/?]+)$/);
|
|
188
|
+
if (req.method === 'GET' && getMatch) {
|
|
189
|
+
const task = this._tasks.get(decodeURIComponent(getMatch[1]));
|
|
190
|
+
if (!task) {
|
|
191
|
+
res.writeHead(404, { 'Content-Type': 'application/json' });
|
|
192
|
+
res.end(JSON.stringify({ error: 'not_found' }));
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
195
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
196
|
+
res.end(JSON.stringify(task));
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// POST /tasks/{taskId}/messages — host resumes an INPUT_REQUIRED task
|
|
201
|
+
const msgMatch = url.match(/^\/tasks\/([^/?]+)\/messages$/);
|
|
202
|
+
if (req.method === 'POST' && msgMatch) {
|
|
203
|
+
const task = this._tasks.get(decodeURIComponent(msgMatch[1]));
|
|
204
|
+
if (!task) {
|
|
205
|
+
res.writeHead(404, { 'Content-Type': 'application/json' });
|
|
206
|
+
res.end(JSON.stringify({ error: 'not_found' }));
|
|
207
|
+
return;
|
|
208
|
+
}
|
|
209
|
+
task.messages.push({ role: 'user', content: body });
|
|
210
|
+
// Move from INPUT_REQUIRED back to WORKING then to COMPLETED for the
|
|
211
|
+
// simple roundtrip. Tests that need a different next-state set it
|
|
212
|
+
// via setNextState() before posting the message.
|
|
213
|
+
task.state = this._nextStateOverride ?? 'COMPLETED';
|
|
214
|
+
this._nextStateOverride = null;
|
|
215
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
216
|
+
res.end(JSON.stringify({ taskId: task.taskId, state: task.state }));
|
|
217
|
+
return;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
res.writeHead(404, { 'Content-Type': 'application/json' });
|
|
221
|
+
res.end(JSON.stringify({ error: 'not_found' }));
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
let _instance: A2AFakePeer | null = null;
|
|
226
|
+
|
|
227
|
+
export function setA2AFakePeer(p: A2AFakePeer | null): void {
|
|
228
|
+
_instance = p;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
export function getA2AFakePeer(): A2AFakePeer | null {
|
|
232
|
+
return _instance;
|
|
233
|
+
}
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vendor-neutral canary fixtures + leak detector for redaction
|
|
3
|
+
* conformance scenarios.
|
|
4
|
+
*
|
|
5
|
+
* This is the spec-side companion to host-specific redaction harnesses.
|
|
6
|
+
* Hosts running `@openwop/openwop-conformance` get vendor-neutral assertions that
|
|
7
|
+
* their server doesn't leak secrets in observable surfaces — without
|
|
8
|
+
* pulling in any host-specific code.
|
|
9
|
+
*
|
|
10
|
+
* **Why this is here:** spec rule NFR-7 (`capabilities.md` §"Secrets")
|
|
11
|
+
* — any code path that emits a `RunEvent`, OTel span, log line, error
|
|
12
|
+
* message, or exported artifact MUST NOT contain raw key material. The
|
|
13
|
+
* conformance suite needs to verify this against the live HTTP surface
|
|
14
|
+
* of any OpenWOP-compliant server.
|
|
15
|
+
*
|
|
16
|
+
* Canary values are built via runtime string concatenation, NOT as
|
|
17
|
+
* contiguous string literals, so static-analysis secret scanners
|
|
18
|
+
* (TruffleHog, gitleaks) don't flag this file. The runtime-assembled
|
|
19
|
+
* strings have the exact same shape that real-world keys do.
|
|
20
|
+
*
|
|
21
|
+
* @see capabilities.md §"Secrets" + NFR-7
|
|
22
|
+
* @see scenarios/redaction.test.ts
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
/** Stable marker substring present in every canary. Detector finds it
|
|
26
|
+
* unambiguously; anyone reading a leaked log line sees this and knows
|
|
27
|
+
* it's a test fixture, not a real key. */
|
|
28
|
+
export const CANARY_MARKER = 'CANARY-openwop-CONFORMANCE-NEVER-SECRET';
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Build a canary value by concatenating an obvious-prefix shape with
|
|
32
|
+
* the marker + a deterministic body. Pure; same args always return
|
|
33
|
+
* the same string. Runtime concat exists purely to defeat static
|
|
34
|
+
* secret-scanners.
|
|
35
|
+
*/
|
|
36
|
+
function buildCanary(prefix: string, body: string): string {
|
|
37
|
+
return prefix + CANARY_MARKER + '-' + body;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/** Tagged canary value. Opaque so callers can't accidentally treat a
|
|
41
|
+
* canary as a generic string. */
|
|
42
|
+
export interface Canary {
|
|
43
|
+
/** Provider/format label (e.g., "openai", "jwt-bearer"). */
|
|
44
|
+
readonly label: string;
|
|
45
|
+
/** The synthetic key/token string. Carries the marker. */
|
|
46
|
+
readonly value: string;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* The canonical canary set. Each value matches the regex shape of a
|
|
51
|
+
* common provider key but contains the unique marker substring so
|
|
52
|
+
* leaks are unambiguously identifiable. Real production secrets do
|
|
53
|
+
* NOT contain this marker, eliminating false positives.
|
|
54
|
+
*
|
|
55
|
+
* Format references (rough — server-side regex redactors should not
|
|
56
|
+
* rely on exact length, only on prefix shape):
|
|
57
|
+
* - OpenAI: `sk-...` or `sk-proj-...`
|
|
58
|
+
* - Anthropic: `sk-ant-...`
|
|
59
|
+
* - Google API key: `AIza...`
|
|
60
|
+
* - JWT: `base64url.base64url.base64url`
|
|
61
|
+
* - Opaque BYOK secret IDs: vendor-defined
|
|
62
|
+
*/
|
|
63
|
+
export const CANARIES: readonly Canary[] = [
|
|
64
|
+
{ label: 'openai', value: buildCanary('sk-', 'oai9Lt7Nw2QrZ0aB8mYjPpQe') },
|
|
65
|
+
{
|
|
66
|
+
label: 'anthropic',
|
|
67
|
+
value: buildCanary('sk-' + 'ant-', 'ant3Ko0LqFqzv9Sb1J7mNcR'),
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
label: 'google',
|
|
71
|
+
value: 'AIza' + CANARY_MARKER + 'Goog12345abcdef9876',
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
label: 'jwt-bearer',
|
|
75
|
+
value:
|
|
76
|
+
'eyJhbGciOiJIUzI1NiJ9.' +
|
|
77
|
+
'eyJjYW5hcnkiOnRydWV9.' +
|
|
78
|
+
CANARY_MARKER +
|
|
79
|
+
'-jwt-signature-xyz',
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
label: 'byok-credential-ref',
|
|
83
|
+
value: buildCanary('cred_', 'OpaqueRefAlphaNumX9Y8Z7'),
|
|
84
|
+
},
|
|
85
|
+
] as const;
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* A single leak occurrence — what was leaked + roughly where in the
|
|
89
|
+
* captured surface it appeared.
|
|
90
|
+
*/
|
|
91
|
+
export interface CanaryLeak {
|
|
92
|
+
readonly label: string;
|
|
93
|
+
readonly value: string;
|
|
94
|
+
/** First match position in the captured text. */
|
|
95
|
+
readonly position: number;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Search a captured surface (response body, header value, etc.) for
|
|
100
|
+
* any canary value or the canary marker. Returns one entry per leak.
|
|
101
|
+
*
|
|
102
|
+
* Implementation: exact substring match against each canary value,
|
|
103
|
+
* plus a separate scan for the marker so partial leaks (e.g., a
|
|
104
|
+
* server-side substring extraction) still trip the detector.
|
|
105
|
+
*/
|
|
106
|
+
export function findCanaryLeaks(text: string): readonly CanaryLeak[] {
|
|
107
|
+
const leaks: CanaryLeak[] = [];
|
|
108
|
+
|
|
109
|
+
// Pass 1 — exact canary value matches.
|
|
110
|
+
for (const c of CANARIES) {
|
|
111
|
+
const pos = text.indexOf(c.value);
|
|
112
|
+
if (pos !== -1) {
|
|
113
|
+
leaks.push({ label: c.label, value: c.value, position: pos });
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Pass 2 — marker-only fallback. Skip positions inside an already-
|
|
118
|
+
// matched exact canary range (avoid double-counting).
|
|
119
|
+
let scanFrom = 0;
|
|
120
|
+
while (scanFrom < text.length) {
|
|
121
|
+
const pos = text.indexOf(CANARY_MARKER, scanFrom);
|
|
122
|
+
if (pos === -1) break;
|
|
123
|
+
const within = leaks.some(
|
|
124
|
+
(l) => l.position <= pos && pos < l.position + l.value.length,
|
|
125
|
+
);
|
|
126
|
+
if (!within) {
|
|
127
|
+
leaks.push({
|
|
128
|
+
label: 'marker-only',
|
|
129
|
+
value: CANARY_MARKER,
|
|
130
|
+
position: pos,
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
scanFrom = pos + CANARY_MARKER.length;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return leaks;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/** Pick a canary by label. Throws if not found. */
|
|
140
|
+
export function getCanary(label: string): Canary {
|
|
141
|
+
const c = CANARIES.find((x) => x.label === label);
|
|
142
|
+
if (!c) throw new Error(`Unknown canary label: ${label}`);
|
|
143
|
+
return c;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Stringify a captured value (HTTP response, JSON body, etc.) to a
|
|
148
|
+
* single string the detector can scan. Idempotent for strings; deep-
|
|
149
|
+
* stringifies objects + arrays.
|
|
150
|
+
*/
|
|
151
|
+
export function captureToText(captured: unknown): string {
|
|
152
|
+
if (typeof captured === 'string') return captured;
|
|
153
|
+
try {
|
|
154
|
+
return JSON.stringify(captured);
|
|
155
|
+
} catch {
|
|
156
|
+
return String(captured);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Throw a descriptive error if any canary appears in the captured
|
|
162
|
+
* text. For use in conformance assertions.
|
|
163
|
+
*/
|
|
164
|
+
export function assertNoCanaryLeak(
|
|
165
|
+
capturedText: string,
|
|
166
|
+
surfaceLabel: string,
|
|
167
|
+
): void {
|
|
168
|
+
const leaks = findCanaryLeaks(capturedText);
|
|
169
|
+
if (leaks.length === 0) return;
|
|
170
|
+
|
|
171
|
+
const details = leaks
|
|
172
|
+
.map((l) => {
|
|
173
|
+
const start = Math.max(0, l.position - 32);
|
|
174
|
+
const end = Math.min(capturedText.length, l.position + l.value.length + 32);
|
|
175
|
+
const excerpt = capturedText.slice(start, end);
|
|
176
|
+
return ` - [${l.label}] @${l.position}: ...${excerpt}...`;
|
|
177
|
+
})
|
|
178
|
+
.join('\n');
|
|
179
|
+
|
|
180
|
+
throw new Error(
|
|
181
|
+
`Canary leak detected in surface "${surfaceLabel}":\n${details}\n\n` +
|
|
182
|
+
`Per capabilities.md §"Secrets" + NFR-7, this surface MUST NOT ` +
|
|
183
|
+
`echo back canary content. Either redact at the emission boundary ` +
|
|
184
|
+
`or document the surface as out-of-scope for redaction obligations.`,
|
|
185
|
+
);
|
|
186
|
+
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenWOPDriver — thin HTTP client wrapper used by all conformance scenarios.
|
|
3
|
+
*
|
|
4
|
+
* Why a wrapper rather than raw fetch in every test:
|
|
5
|
+
* 1. Auth header is applied once.
|
|
6
|
+
* 2. URL composition is consistent (base + path).
|
|
7
|
+
* 3. Failure messages cite the implementation name + version so log
|
|
8
|
+
* output identifies the server under test.
|
|
9
|
+
* 4. JSON decoding errors are surfaced with the raw body for debug.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { loadEnv } from './env.js';
|
|
13
|
+
|
|
14
|
+
export interface OpenWOPResponse {
|
|
15
|
+
readonly status: number;
|
|
16
|
+
readonly headers: Headers;
|
|
17
|
+
readonly text: string;
|
|
18
|
+
readonly json: unknown;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface OpenWOPRequestInit {
|
|
22
|
+
readonly headers?: Record<string, string>;
|
|
23
|
+
readonly body?: unknown;
|
|
24
|
+
readonly authenticated?: boolean;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
class OpenWOPDriver {
|
|
28
|
+
/**
|
|
29
|
+
* Issue a request and return the decoded body. JSON decode is best-effort —
|
|
30
|
+
* `json` is `undefined` if the response wasn't JSON.
|
|
31
|
+
*/
|
|
32
|
+
async request(
|
|
33
|
+
method: string,
|
|
34
|
+
path: string,
|
|
35
|
+
init: OpenWOPRequestInit = {},
|
|
36
|
+
): Promise<OpenWOPResponse> {
|
|
37
|
+
const env = loadEnv();
|
|
38
|
+
const url = `${env.baseUrl}${path}`;
|
|
39
|
+
|
|
40
|
+
const headers: Record<string, string> = {
|
|
41
|
+
Accept: 'application/json',
|
|
42
|
+
...(init.headers ?? {}),
|
|
43
|
+
};
|
|
44
|
+
if (init.body !== undefined && headers['Content-Type'] === undefined) {
|
|
45
|
+
headers['Content-Type'] = 'application/json';
|
|
46
|
+
}
|
|
47
|
+
if (init.authenticated !== false) {
|
|
48
|
+
headers.Authorization = `Bearer ${env.apiKey}`;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const fetchInit: RequestInit = { method, headers };
|
|
52
|
+
if (init.body !== undefined) {
|
|
53
|
+
fetchInit.body = JSON.stringify(init.body);
|
|
54
|
+
}
|
|
55
|
+
const res = await fetch(url, fetchInit);
|
|
56
|
+
|
|
57
|
+
const text = await res.text();
|
|
58
|
+
let json: unknown;
|
|
59
|
+
try {
|
|
60
|
+
json = text.length > 0 ? JSON.parse(text) : undefined;
|
|
61
|
+
} catch {
|
|
62
|
+
json = undefined;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
status: res.status,
|
|
67
|
+
headers: res.headers,
|
|
68
|
+
text,
|
|
69
|
+
json,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
get(path: string, init: OpenWOPRequestInit = {}): Promise<OpenWOPResponse> {
|
|
74
|
+
return this.request('GET', path, init);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
post(path: string, body: unknown, init: OpenWOPRequestInit = {}): Promise<OpenWOPResponse> {
|
|
78
|
+
return this.request('POST', path, { ...init, body });
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
delete(path: string, init: OpenWOPRequestInit = {}): Promise<OpenWOPResponse> {
|
|
82
|
+
return this.request('DELETE', path, init);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Compose a "spec failure" message that cites the implementation under
|
|
87
|
+
* test plus the spec section that requires the assertion. Use as the
|
|
88
|
+
* second argument to `expect(...).toBe(..., msg)`-style assertions.
|
|
89
|
+
*/
|
|
90
|
+
describe(specSection: string, requirement: string): string {
|
|
91
|
+
const env = loadEnv();
|
|
92
|
+
return `[${env.implementationName}@${env.implementationVersion}] ${specSection}: ${requirement}`;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export const driver = new OpenWOPDriver();
|
package/src/lib/env.ts
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Env-var validation for the openwop conformance suite.
|
|
3
|
+
*
|
|
4
|
+
* Required:
|
|
5
|
+
* OPENWOP_BASE_URL — the server root, e.g., https://api.example.com
|
|
6
|
+
* OPENWOP_API_KEY — credential for runs:read / manifest:read scopes
|
|
7
|
+
*
|
|
8
|
+
* Optional (cosmetic — surfaced in failure messages):
|
|
9
|
+
* OPENWOP_IMPLEMENTATION_NAME — e.g., "acme-openwop-server"
|
|
10
|
+
* OPENWOP_IMPLEMENTATION_VERSION — e.g., "1.0"
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
export interface ConformanceEnv {
|
|
14
|
+
readonly baseUrl: string;
|
|
15
|
+
readonly apiKey: string;
|
|
16
|
+
readonly implementationName: string;
|
|
17
|
+
readonly implementationVersion: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
let cached: ConformanceEnv | null = null;
|
|
21
|
+
|
|
22
|
+
export function loadEnv(): ConformanceEnv {
|
|
23
|
+
if (cached) return cached;
|
|
24
|
+
|
|
25
|
+
const baseUrl = process.env.OPENWOP_BASE_URL?.trim();
|
|
26
|
+
const apiKey = process.env.OPENWOP_API_KEY?.trim();
|
|
27
|
+
|
|
28
|
+
if (!baseUrl) {
|
|
29
|
+
throw new Error(
|
|
30
|
+
'OPENWOP_BASE_URL env var is required. Example: OPENWOP_BASE_URL=https://api.example.com',
|
|
31
|
+
);
|
|
32
|
+
}
|
|
33
|
+
if (!apiKey) {
|
|
34
|
+
throw new Error(
|
|
35
|
+
'OPENWOP_API_KEY env var is required. See auth.md for credential format.',
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Strip trailing slash so URL composition is consistent.
|
|
40
|
+
const normalizedBase = baseUrl.replace(/\/$/, '');
|
|
41
|
+
|
|
42
|
+
cached = {
|
|
43
|
+
baseUrl: normalizedBase,
|
|
44
|
+
apiKey,
|
|
45
|
+
implementationName: process.env.OPENWOP_IMPLEMENTATION_NAME?.trim() ?? 'unknown',
|
|
46
|
+
implementationVersion: process.env.OPENWOP_IMPLEMENTATION_VERSION?.trim() ?? 'unknown',
|
|
47
|
+
};
|
|
48
|
+
return cached;
|
|
49
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fixture-gating helper (RFC 0003).
|
|
3
|
+
*
|
|
4
|
+
* Reads the host's `/.well-known/openwop` `fixtures` array at suite init,
|
|
5
|
+
* caches the advertised fixture-id set, and exposes a sync predicate
|
|
6
|
+
* (`isFixtureAdvertised`) so each scenario can gate with
|
|
7
|
+
* `it.skipIf(!isFixtureAdvertised('conformance-noop'))`.
|
|
8
|
+
*
|
|
9
|
+
* Why a separate module from `profiles.ts`:
|
|
10
|
+
* - Profiles answer "does the host claim this surface?" (binary).
|
|
11
|
+
* - Fixtures answer "did the host claim THIS specific fixture id?"
|
|
12
|
+
* (per-id). Different shape; profile-style derivation can't carry it.
|
|
13
|
+
*
|
|
14
|
+
* Cache lifecycle:
|
|
15
|
+
* - `setAdvertisedFixtures(...)` populates the cache from a discovery
|
|
16
|
+
* payload. Idempotent — calling repeatedly with the same payload is
|
|
17
|
+
* a no-op. Calling with a different payload replaces the cache.
|
|
18
|
+
* - `isFixtureAdvertised(...)` returns false until the cache is set
|
|
19
|
+
* (defensive default — when the cache hasn't loaded yet, scenarios
|
|
20
|
+
* skip rather than fail). The setupFile populates it before any
|
|
21
|
+
* `describe()` runs.
|
|
22
|
+
* - `getAdvertisedFixtures()` returns the cached set or null. Used by
|
|
23
|
+
* `discovery.test.ts` to assert the field shape end-to-end.
|
|
24
|
+
* - `__resetForTests()` clears the cache for unit tests of this module.
|
|
25
|
+
*
|
|
26
|
+
* This module is sync. The async fetch lives in `setup.ts` which calls
|
|
27
|
+
* `setAdvertisedFixtures(...)` from a top-level `await`.
|
|
28
|
+
*
|
|
29
|
+
* @see spec/v1/capabilities.md §`fixtures`
|
|
30
|
+
* @see spec/v1/profiles.md §`openwop-fixtures`
|
|
31
|
+
* @see RFCS/0003-fixture-gating.md
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
import type { DiscoveryPayload } from './profiles.js';
|
|
35
|
+
|
|
36
|
+
let _advertisedFixtures: ReadonlySet<string> | null = null;
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Populate the cache from a discovery-doc payload. The function is
|
|
40
|
+
* tolerant of malformed inputs — anything other than a string array
|
|
41
|
+
* collapses to "no fixtures advertised" rather than throwing, so the
|
|
42
|
+
* suite remains resilient against host bugs in the discovery surface.
|
|
43
|
+
*/
|
|
44
|
+
export function setAdvertisedFixtures(c: DiscoveryPayload | null | undefined): void {
|
|
45
|
+
if (c == null || !Array.isArray(c.fixtures)) {
|
|
46
|
+
_advertisedFixtures = new Set();
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
const ids = c.fixtures.filter(
|
|
50
|
+
(entry): entry is string => typeof entry === 'string' && entry.length > 0,
|
|
51
|
+
);
|
|
52
|
+
_advertisedFixtures = new Set(ids);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Sync predicate — returns true if the host advertised the given
|
|
57
|
+
* fixture id. Returns false until `setAdvertisedFixtures(...)` has been
|
|
58
|
+
* called (i.e., before the suite-init setup file populates the cache).
|
|
59
|
+
*
|
|
60
|
+
* Use as the predicate inside `it.skipIf(...)` / `describe.skipIf(...)`.
|
|
61
|
+
* Example:
|
|
62
|
+
*
|
|
63
|
+
* describe.skipIf(!isFixtureAdvertised('conformance-noop'))(
|
|
64
|
+
* 'runs-lifecycle: ...',
|
|
65
|
+
* () => { it('...', async () => { ... }); },
|
|
66
|
+
* );
|
|
67
|
+
*/
|
|
68
|
+
export function isFixtureAdvertised(id: string): boolean {
|
|
69
|
+
return _advertisedFixtures?.has(id) ?? false;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Returns the cached set or `null` if `setAdvertisedFixtures(...)` has
|
|
74
|
+
* not been called. Used by `discovery.test.ts` and consumers that need
|
|
75
|
+
* the full advertised set.
|
|
76
|
+
*/
|
|
77
|
+
export function getAdvertisedFixtures(): ReadonlySet<string> | null {
|
|
78
|
+
return _advertisedFixtures;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* `true` once the suite-init setup has populated the cache (even with
|
|
83
|
+
* an empty set). Useful for distinguishing "host advertises no
|
|
84
|
+
* fixtures" from "we haven't called setAdvertisedFixtures yet."
|
|
85
|
+
*/
|
|
86
|
+
export function isFixtureCacheReady(): boolean {
|
|
87
|
+
return _advertisedFixtures !== null;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/** Test-only: reset the module-level cache. */
|
|
91
|
+
export function __resetForTests(): void {
|
|
92
|
+
_advertisedFixtures = null;
|
|
93
|
+
}
|