@openwop/openwop-conformance 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CHANGELOG.md +17 -0
  2. package/README.md +31 -6
  3. package/api/grpc/openwop.proto +251 -0
  4. package/api/openapi.yaml +109 -3
  5. package/coverage.md +48 -9
  6. package/fixtures/conformance-configurable-schema.json +39 -0
  7. package/fixtures/conformance-subworkflow-parent.json +1 -1
  8. package/fixtures/conformance-wasm-pack-memory-cap-breach.json +23 -0
  9. package/fixtures/openwop-smoke-byok-roundtrip.json +25 -0
  10. package/fixtures.md +21 -0
  11. package/package.json +3 -1
  12. package/schemas/README.md +4 -0
  13. package/schemas/audit-verify-result.schema.json +90 -0
  14. package/schemas/capabilities.schema.json +293 -1
  15. package/schemas/node-pack-manifest.schema.json +4 -4
  16. package/schemas/pack-lockfile.schema.json +92 -0
  17. package/schemas/registry-version-manifest.schema.json +145 -0
  18. package/schemas/run-event-payloads.schema.json +2 -2
  19. package/schemas/security-advisory.schema.json +109 -0
  20. package/src/lib/a2a-fake-peer.ts +143 -56
  21. package/src/lib/behavior-gate.ts +68 -0
  22. package/src/lib/env.ts +10 -0
  23. package/src/lib/grpc-framing.test.ts +96 -0
  24. package/src/lib/grpc-framing.ts +76 -0
  25. package/src/lib/oidc-issuer.test.ts +328 -0
  26. package/src/lib/oidc-issuer.ts +241 -0
  27. package/src/lib/otel-collector-grpc.test.ts +191 -0
  28. package/src/lib/otel-collector.test.ts +303 -0
  29. package/src/lib/otel-collector.ts +318 -14
  30. package/src/lib/otlp-protobuf.test.ts +461 -0
  31. package/src/lib/otlp-protobuf.ts +529 -0
  32. package/src/scenarios/a2a-task-roundtrip.test.ts +147 -28
  33. package/src/scenarios/agentConfidenceEscalation.test.ts +1 -0
  34. package/src/scenarios/agentMemoryCrossTenantIsolation.test.ts +1 -0
  35. package/src/scenarios/agentMemoryRedactionContract.test.ts +1 -0
  36. package/src/scenarios/agentMemoryRoundTrip.test.ts +1 -0
  37. package/src/scenarios/agentMemoryTtlExpiry.test.ts +1 -0
  38. package/src/scenarios/agentMessageReducer.test.ts +1 -0
  39. package/src/scenarios/agentMetadata.test.ts +1 -0
  40. package/src/scenarios/agentPackExport.test.ts +1 -0
  41. package/src/scenarios/agentPackInstall.test.ts +1 -0
  42. package/src/scenarios/agentPackProvenance.test.ts +1 -0
  43. package/src/scenarios/audit-log-integrity.test.ts +3 -6
  44. package/src/scenarios/auth-api-key-rotation.test.ts +182 -0
  45. package/src/scenarios/auth-mtls.test.ts +274 -0
  46. package/src/scenarios/auth-oauth2-client-credentials.test.ts +259 -0
  47. package/src/scenarios/auth-oidc-user-bearer.test.ts +361 -0
  48. package/src/scenarios/bulk-cancel.test.ts +111 -0
  49. package/src/scenarios/configurable-schema.test.ts +48 -0
  50. package/src/scenarios/conversationCapabilityNegotiation.test.ts +1 -0
  51. package/src/scenarios/conversationLifecycle.test.ts +1 -0
  52. package/src/scenarios/conversationReplayDeterminism.test.ts +1 -0
  53. package/src/scenarios/conversationVsLegacySuspend.test.ts +1 -0
  54. package/src/scenarios/debug-bundle-truncation.test.ts +95 -0
  55. package/src/scenarios/discovery.test.ts +183 -0
  56. package/src/scenarios/http-client-ssrf.test.ts +71 -0
  57. package/src/scenarios/idempotency.test.ts +6 -0
  58. package/src/scenarios/idempotencyRetry.test.ts +3 -0
  59. package/src/scenarios/mcp-tool-roundtrip.test.ts +198 -34
  60. package/src/scenarios/mcp-toolcall-redaction.test.ts +66 -0
  61. package/src/scenarios/metric-emission.test.ts +113 -0
  62. package/src/scenarios/orchestratorConservativePath.test.ts +1 -0
  63. package/src/scenarios/orchestratorDispatch.test.ts +1 -0
  64. package/src/scenarios/orchestratorTermination.test.ts +1 -0
  65. package/src/scenarios/otel-emission-grpc.test.ts +98 -0
  66. package/src/scenarios/pause-resume.test.ts +119 -0
  67. package/src/scenarios/production-backpressure.test.ts +342 -0
  68. package/src/scenarios/production-retention-expiry.test.ts +164 -0
  69. package/src/scenarios/registry-public.test.ts +131 -0
  70. package/src/scenarios/replay-llm-cache-key.test.ts +35 -0
  71. package/src/scenarios/replay-retention-expiry.test.ts +178 -0
  72. package/src/scenarios/restart-during-run.test.ts +177 -0
  73. package/src/scenarios/spec-corpus-validity.test.ts +54 -26
  74. package/src/scenarios/staleClaim.test.ts +3 -0
  75. package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +67 -10
  76. package/src/scenarios/wasm-pack-memory-cap.test.ts +64 -9
  77. package/src/scenarios/webhook-negative.test.ts +90 -0
  78. package/src/scenarios/webhook-signed-delivery.test.ts +178 -0
  79. package/src/setup.ts +25 -1
  80. package/vitest.config.ts +5 -1
@@ -0,0 +1,113 @@
1
+ /**
2
+ * Track 11: metric-emission verification.
3
+ *
4
+ * Verifies that hosts claiming `capabilities.observability.metrics`
5
+ * emit the canonical `openwop.run.backlog`, `openwop.queue.depth`, and
6
+ * (after at least one completed run) `openwop.run.duration` metrics
7
+ * documented in `spec/v1/observability.md`.
8
+ *
9
+ * Operator contract (same as `otel-emission.test.ts`):
10
+ * 1. Start the conformance suite with `OPENWOP_OTEL_COLLECTOR=true`
11
+ * and `OPENWOP_OTEL_COLLECTOR_PORT=<port>`.
12
+ * 2. Boot the host with `OTEL_EXPORTER_OTLP_ENDPOINT=http://127.0.0.1:<port>`.
13
+ *
14
+ * Skip conditions:
15
+ * - Collector disabled (`OPENWOP_OTEL_COLLECTOR` unset / false).
16
+ * - Host doesn't advertise `capabilities.observability.metrics.supported`.
17
+ *
18
+ * @see spec/v1/observability.md §"Metrics"
19
+ */
20
+
21
+ import { describe, it, expect } from 'vitest';
22
+ import { driver } from '../lib/driver.js';
23
+ import { pollUntilTerminal } from '../lib/polling.js';
24
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
25
+ import { getCollector } from '../lib/otel-collector.js';
26
+
27
+ const FIXTURE = 'conformance-noop';
28
+
29
+ interface MetricsCaps {
30
+ supported?: boolean;
31
+ names?: ReadonlyArray<string>;
32
+ }
33
+
34
+ async function metricsAdvertised(): Promise<MetricsCaps | null> {
35
+ try {
36
+ const disco = await driver.get('/.well-known/openwop');
37
+ const caps = (disco.json as {
38
+ capabilities?: { observability?: { metrics?: MetricsCaps } };
39
+ }).capabilities;
40
+ return caps?.observability?.metrics ?? null;
41
+ } catch {
42
+ return null;
43
+ }
44
+ }
45
+
46
+ async function waitForMetric(name: string, timeoutMs = 5_000): Promise<boolean> {
47
+ const collector = getCollector();
48
+ if (!collector) return false;
49
+ const deadline = Date.now() + timeoutMs;
50
+ while (Date.now() < deadline) {
51
+ if (collector.metricByName(name)) return true;
52
+ await new Promise((r) => setTimeout(r, 100));
53
+ }
54
+ return false;
55
+ }
56
+
57
+ describe('metric-emission: canonical openwop.* metrics arrive at the collector', () => {
58
+ it('host emits openwop.run.backlog, openwop.queue.depth, and openwop.run.duration', async () => {
59
+ if (!getCollector()) {
60
+ // eslint-disable-next-line no-console
61
+ console.warn(
62
+ '[metric-emission] collector not started; set OPENWOP_OTEL_COLLECTOR=true to run',
63
+ );
64
+ return;
65
+ }
66
+ const metricsCaps = await metricsAdvertised();
67
+ if (!metricsCaps?.supported) {
68
+ // eslint-disable-next-line no-console
69
+ console.warn(
70
+ '[metric-emission] host does not advertise observability.metrics.supported; skipping',
71
+ );
72
+ return;
73
+ }
74
+ if (!isFixtureAdvertised(FIXTURE)) {
75
+ // eslint-disable-next-line no-console
76
+ console.warn(`[metric-emission] ${FIXTURE} not advertised; skipping`);
77
+ return;
78
+ }
79
+
80
+ const collector = getCollector()!;
81
+ collector.reset();
82
+
83
+ // Drive at least one completed run so openwop.run.duration has a sample.
84
+ const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
85
+ expect(create.status).toBe(201);
86
+ const runId = (create.json as { runId: string }).runId;
87
+ await pollUntilTerminal(runId, { timeoutMs: 10_000 });
88
+
89
+ // Wait for the host's metric-emit tick to land at the collector.
90
+ const sawBacklog = await waitForMetric('openwop.run.backlog', 5_000);
91
+ expect(sawBacklog, driver.describe(
92
+ 'observability.md §"Metrics"',
93
+ 'host claiming metrics MUST emit openwop.run.backlog',
94
+ )).toBe(true);
95
+
96
+ const sawQueueDepth = await waitForMetric('openwop.queue.depth', 5_000);
97
+ expect(sawQueueDepth, driver.describe(
98
+ 'observability.md §"Metrics"',
99
+ 'host claiming metrics MUST emit openwop.queue.depth',
100
+ )).toBe(true);
101
+
102
+ const sawDuration = await waitForMetric('openwop.run.duration', 5_000);
103
+ expect(sawDuration, driver.describe(
104
+ 'observability.md §"Metrics"',
105
+ 'host claiming metrics MUST emit openwop.run.duration after a completed run',
106
+ )).toBe(true);
107
+
108
+ // Shape spot-check: backlog gauge data point has a numeric value.
109
+ const backlog = collector.metricByName('openwop.run.backlog')!;
110
+ expect(backlog.kind).toBe('gauge');
111
+ expect(typeof backlog.dataPoint.value).toBe('number');
112
+ });
113
+ });
@@ -1,5 +1,6 @@
1
1
  /**
2
2
  * Multi-Agent Shift Phase 5 — CP-1 conservative-path orchestrator suspend.
3
+ * Normative reference: RFCS/0006-orchestrator.md
3
4
  *
4
5
  * Verifies the CP-1 invariant: when a `core.orchestrator.supervisor`
5
6
  * would emit a decision with `confidence < escalationThreshold`, the
@@ -1,5 +1,6 @@
1
1
  /**
2
2
  * Multi-Agent Shift Phase 5 — orchestrator → dispatch → next-worker round-trip.
3
+ * Normative reference: RFCS/0006-orchestrator.md
3
4
  *
4
5
  * Verifies that a workflow with `core.orchestrator.supervisor` →
5
6
  * `core.dispatch` topology emits the canonical event sequence:
@@ -1,5 +1,6 @@
1
1
  /**
2
2
  * Multi-Agent Shift Phase 5 — orchestrator terminate decision (CO-3).
3
+ * Normative reference: RFCS/0006-orchestrator.md
3
4
  *
4
5
  * Verifies that when an `core.orchestrator.supervisor` emits a decision
5
6
  * with `kind: 'terminate'`:
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Track 11: OTel span emission over OTLP/gRPC.
3
+ *
4
+ * Verifies that hosts advertising `capabilities.observability.otel.exportProtocols`
5
+ * including `"grpc"` can emit `openwop.*` spans over OTLP/gRPC to the
6
+ * in-suite collector and that the gRPC framing path captures the same
7
+ * `openwop.run` + `openwop.node.*` shape as the HTTP-JSON path.
8
+ *
9
+ * The gRPC collector is the parallel HTTP/2 server inside
10
+ * `OtelCollector` (started via `startGrpc()` in setup.ts when the
11
+ * `OPENWOP_OTEL_COLLECTOR=true` flag is set). The host points its
12
+ * exporter at the printed `:<grpcPort>` (h2c) with
13
+ * `OTEL_EXPORTER_OTLP_PROTOCOL=grpc`. Spans captured over gRPC land
14
+ * in the same store as HTTP — `getCollector().spans()` returns the
15
+ * union.
16
+ *
17
+ * Skip conditions:
18
+ * - Collector disabled (`OPENWOP_OTEL_COLLECTOR` unset / false).
19
+ * - Host does not advertise `capabilities.observability.otel.exportProtocols`
20
+ * including `"grpc"` (presumed not configured for gRPC emission).
21
+ * - Required fixture (`conformance-noop`) not advertised.
22
+ *
23
+ * @see spec/v1/observability.md §"Export protocols"
24
+ * @see conformance/src/lib/otel-collector.ts §_handleGrpcStream
25
+ * @see conformance/src/lib/grpc-framing.ts
26
+ */
27
+
28
+ import { describe, it, expect } from 'vitest';
29
+ import { driver } from '../lib/driver.js';
30
+ import { pollUntilTerminal } from '../lib/polling.js';
31
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
32
+ import { getCollector, waitForRunSpans } from '../lib/otel-collector.js';
33
+
34
+ const FIXTURE = 'conformance-noop';
35
+
36
+ async function advertisesGrpcExport(): Promise<boolean> {
37
+ try {
38
+ const disco = await driver.get('/.well-known/openwop');
39
+ const caps = (disco.json as {
40
+ capabilities?: {
41
+ observability?: { otel?: { exportProtocols?: unknown } };
42
+ };
43
+ }).capabilities;
44
+ const protocols = caps?.observability?.otel?.exportProtocols;
45
+ return Array.isArray(protocols) && protocols.includes('grpc');
46
+ } catch {
47
+ return false;
48
+ }
49
+ }
50
+
51
+ describe('otel-emission-grpc: OTLP/gRPC export path', () => {
52
+ it('host emits openwop.run spans over OTLP/gRPC; collector captures them via the shared store', async () => {
53
+ if (!getCollector()) {
54
+ // eslint-disable-next-line no-console
55
+ console.warn('[otel-emission-grpc] collector not started; set OPENWOP_OTEL_COLLECTOR=true to run');
56
+ return;
57
+ }
58
+ if (!isFixtureAdvertised(FIXTURE)) {
59
+ // eslint-disable-next-line no-console
60
+ console.warn(`[otel-emission-grpc] fixture ${FIXTURE} not advertised; skipping`);
61
+ return;
62
+ }
63
+ if (!(await advertisesGrpcExport())) {
64
+ // eslint-disable-next-line no-console
65
+ console.warn(
66
+ '[otel-emission-grpc] host does not advertise capabilities.observability.otel.exportProtocols including "grpc"; skipping. ' +
67
+ 'Hosts MAY opt into gRPC export by emitting OTLP via the OTLP/gRPC transport and adding `"grpc"` to the array.',
68
+ );
69
+ return;
70
+ }
71
+
72
+ const collector = getCollector()!;
73
+ collector.reset();
74
+
75
+ const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
76
+ expect(create.status).toBe(201);
77
+ const runId = (create.json as { runId: string }).runId;
78
+
79
+ await pollUntilTerminal(runId, { timeoutMs: 15_000 });
80
+
81
+ // gRPC and HTTP-JSON spans both land in `_spans`, so the existing
82
+ // span-query helpers work transparently. If the host emits over
83
+ // both transports, we capture both; the assertion only requires
84
+ // at least one openwop.run span correlated by runId.
85
+ const runSpans = await waitForRunSpans(runId, { timeoutMs: 5_000, minCount: 1 });
86
+
87
+ expect(runSpans.length, driver.describe(
88
+ 'observability.md §"Export protocols" + RFC 0008/0009 Track 11',
89
+ 'host advertising exportProtocols ∋ "grpc" MUST emit openwop.* spans over OTLP/gRPC',
90
+ )).toBeGreaterThan(0);
91
+
92
+ const runSpan = runSpans.find((s) => s.name === 'openwop.run');
93
+ expect(runSpan?.attributes.get('openwop.run_id'), driver.describe(
94
+ 'observability.md §"Run-level attributes"',
95
+ 'openwop.run span MUST carry openwop.run_id attribute',
96
+ )).toBe(runId);
97
+ });
98
+ });
@@ -107,3 +107,122 @@ describe.skipIf(SKIP)('pause/resume: :resume on a non-paused run returns 409', (
107
107
  });
108
108
  });
109
109
  });
110
+
111
+ describe.skipIf(SKIP)('pause/resume: pause is idempotent when already paused', () => {
112
+ it(':pause on an already-paused run is a no-op (200/202) — idempotent', async () => {
113
+ const create = await driver.post('/v1/runs', {
114
+ workflowId: FIXTURE!,
115
+ inputs: { delaySeconds: 30 },
116
+ });
117
+ expect(create.status).toBe(201);
118
+ const runId = (create.json as { runId: string }).runId;
119
+ await pollUntilStatus(runId, 'running', { timeoutMs: 10_000 });
120
+
121
+ const first = await driver.post(`/v1/runs/${encodeURIComponent(runId)}:pause`, {});
122
+ if (first.status === 404) {
123
+ await driver.post(`/v1/runs/${encodeURIComponent(runId)}/cancel`, {
124
+ reason: 'conformance-cleanup',
125
+ });
126
+ return;
127
+ }
128
+ expect([200, 202]).toContain(first.status);
129
+ await pollUntilStatus(runId, 'paused', { timeoutMs: 10_000 });
130
+
131
+ // Idempotent second :pause — MUST NOT 409 just because the run is
132
+ // already paused. 200/202 are both acceptable per the additive
133
+ // contract; 409 would force callers to read state before calling.
134
+ const second = await driver.post(`/v1/runs/${encodeURIComponent(runId)}:pause`, {});
135
+ expect(
136
+ [200, 202].includes(second.status),
137
+ driver.describe(
138
+ 'rest-endpoints.md POST /v1/runs/{runId}:pause',
139
+ ':pause on an already-paused run MUST be idempotent (200/202), not 409',
140
+ ),
141
+ ).toBe(true);
142
+
143
+ await driver.post(`/v1/runs/${encodeURIComponent(runId)}/cancel`, {
144
+ reason: 'conformance-cleanup',
145
+ });
146
+ });
147
+ });
148
+
149
+ describe.skipIf(SKIP)('pause/resume: :pause on a terminal run returns 409', () => {
150
+ it(':pause on a completed/cancelled/failed run MUST return 409', async () => {
151
+ const create = await driver.post('/v1/runs', {
152
+ workflowId: 'conformance-noop',
153
+ });
154
+ if (create.status !== 201) return; // conformance-noop not seeded; skip cleanly
155
+ const runId = (create.json as { runId: string }).runId;
156
+ await pollUntilTerminal(runId, { timeoutMs: 10_000 });
157
+
158
+ const pause = await driver.post(`/v1/runs/${encodeURIComponent(runId)}:pause`, {});
159
+ if (pause.status === 404) return;
160
+ expect(pause.status, driver.describe(
161
+ 'rest-endpoints.md POST /v1/runs/{runId}:pause',
162
+ ':pause on a terminal run MUST return 409',
163
+ )).toBe(409);
164
+
165
+ const body = pause.json as { error?: string; details?: { runStatus?: string } };
166
+ expect(body.error).toBe('conflict');
167
+ // Spec requires `details.runStatus` to disclose the terminal state so
168
+ // the caller can decide whether to retry or surface the conflict.
169
+ expect(['completed', 'failed', 'cancelled']).toContain(body.details?.runStatus);
170
+ });
171
+ });
172
+
173
+ describe.skipIf(SKIP)('pause/resume: :pause-during-suspend race', () => {
174
+ it(':pause MUST NOT silently override an active interrupt suspend', async () => {
175
+ // If the host seeds an approval fixture, drive a suspend then attempt
176
+ // :pause. The expected behavior is that :pause either (a) noops with
177
+ // 409 because the run is already waiting-approval (not in a pausable
178
+ // state), or (b) accepts and stacks pause atop the suspend with the
179
+ // run's terminal state still being waiting-approval. Either is
180
+ // acceptable; what's NOT acceptable is the host quietly flipping
181
+ // status to `paused` and discarding the suspended interrupt.
182
+ if (!isFixtureAdvertised('conformance-approval')) {
183
+ // eslint-disable-next-line no-console
184
+ console.warn(
185
+ '[pause-resume] conformance-approval not advertised; skipping :pause-during-suspend race subtest',
186
+ );
187
+ return;
188
+ }
189
+ const create = await driver.post('/v1/runs', { workflowId: 'conformance-approval' });
190
+ expect(create.status).toBe(201);
191
+ const runId = (create.json as { runId: string }).runId;
192
+ await pollUntilStatus(runId, 'waiting-approval', { timeoutMs: 10_000 });
193
+
194
+ const pause = await driver.post(`/v1/runs/${encodeURIComponent(runId)}:pause`, {
195
+ reason: 'race-test',
196
+ });
197
+ if (pause.status === 404) {
198
+ // Cleanup.
199
+ await driver.post(`/v1/runs/${encodeURIComponent(runId)}/cancel`, {
200
+ reason: 'conformance-cleanup',
201
+ });
202
+ return;
203
+ }
204
+
205
+ // Either rejection (preferred) or stacked-pause is OK; silent override is not.
206
+ if (pause.status === 409) {
207
+ const body = pause.json as { details?: { runStatus?: string } };
208
+ expect(body.details?.runStatus, driver.describe(
209
+ 'rest-endpoints.md POST /v1/runs/{runId}:pause',
210
+ ':pause-during-suspend MUST surface the active waiting-* status in the conflict envelope',
211
+ )).toMatch(/^waiting-/);
212
+ } else {
213
+ // Stacked-pause accepted: verify the run's reported status still
214
+ // surfaces the underlying suspend — the host MUST NOT lose track
215
+ // of the interrupt waiting for resolution.
216
+ const snap = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
217
+ const status = (snap.json as { status: string }).status;
218
+ expect(
219
+ status === 'paused' || status.startsWith('waiting-'),
220
+ ':pause-during-suspend MUST NOT silently discard the active interrupt',
221
+ ).toBe(true);
222
+ }
223
+
224
+ await driver.post(`/v1/runs/${encodeURIComponent(runId)}/cancel`, {
225
+ reason: 'conformance-cleanup',
226
+ });
227
+ });
228
+ });