@openwop/openwop-conformance 1.15.0 → 1.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/README.md +2 -2
- package/coverage.md +4 -2
- package/package.json +1 -1
- package/schemas/run-event-payloads.schema.json +2 -2
- package/src/lib/budgetPolicy.ts +63 -0
- package/src/lib/event-log-query.ts +18 -0
- package/src/lib/otel-collector.ts +34 -4
- package/src/scenarios/agent-deployment-lifecycle.test.ts +82 -59
- package/src/scenarios/agent-eval-run.test.ts +95 -68
- package/src/scenarios/agent-platform-aggregate-evidence.test.ts +68 -0
- package/src/scenarios/agent-platform-profile.test.ts +5 -4
- package/src/scenarios/budget-enforcement.test.ts +152 -0
- package/src/scenarios/otel-collector-canary-inspection.test.ts +50 -0
- package/src/scenarios/replay-observable-sequence-determinism.test.ts +35 -10
- package/src/scenarios/trigger-bridge-delivery.test.ts +92 -56
|
@@ -104,6 +104,44 @@ function metricsPayload(metricName: string, attrs: Record<string, string>): unkn
|
|
|
104
104
|
};
|
|
105
105
|
}
|
|
106
106
|
|
|
107
|
+
// NOTE: assertions here intentionally use bare `expect(...)` rather than
|
|
108
|
+
// `expect(..., driver.describe('spec.md §section', 'requirement'))`. This is a
|
|
109
|
+
// HARNESS self-test — it verifies the conformance collector's own
|
|
110
|
+
// `findCanaryLeakage()` inspector, not a host's compliance with a spec
|
|
111
|
+
// requirement, so there is no spec section to cite (consistent with other
|
|
112
|
+
// library-level tests, e.g. `sandbox-wasm-isolation.test.ts`). The
|
|
113
|
+
// host-facing, spec-citing assertion lives in the collector-export block of
|
|
114
|
+
// `secret-leakage-otel-attribute.test.ts`.
|
|
115
|
+
/**
|
|
116
|
+
* Build a traces export with `spanCount` spans that all share ONE resource
|
|
117
|
+
* (hence one set of resource attributes). Used to prove resource-attribute
|
|
118
|
+
* leaks are deduped to a single hit rather than reported once per span.
|
|
119
|
+
*/
|
|
120
|
+
function multiSpanSharedResourcePayload(spanCount: number, resourceAttrs: Record<string, string>): unknown {
|
|
121
|
+
const toAttrs = (m: Record<string, string>) =>
|
|
122
|
+
Object.entries(m).map(([key, value]) => ({ key, value: { stringValue: value } }));
|
|
123
|
+
return {
|
|
124
|
+
resourceSpans: [
|
|
125
|
+
{
|
|
126
|
+
resource: { attributes: toAttrs(resourceAttrs) },
|
|
127
|
+
scopeSpans: [
|
|
128
|
+
{
|
|
129
|
+
scope: { name: 'openwop' },
|
|
130
|
+
spans: Array.from({ length: spanCount }, (_unused, i) => ({
|
|
131
|
+
traceId: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
|
|
132
|
+
spanId: `span${i}`.padEnd(16, '0'),
|
|
133
|
+
name: `openwop.node.execute.${i}`,
|
|
134
|
+
startTimeUnixNano: '1',
|
|
135
|
+
endTimeUnixNano: '2',
|
|
136
|
+
attributes: toAttrs({ 'openwop.node.id': `n${i}` }),
|
|
137
|
+
})),
|
|
138
|
+
},
|
|
139
|
+
],
|
|
140
|
+
},
|
|
141
|
+
],
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
107
145
|
describe('otel-collector-canary-inspection: collector inspects real OTLP exports', () => {
|
|
108
146
|
let collector: OtelCollector | null = null;
|
|
109
147
|
|
|
@@ -179,6 +217,18 @@ describe('otel-collector-canary-inspection: collector inspects real OTLP exports
|
|
|
179
217
|
expect(metricLeak!.emitterName).toBe('openwop.node.duration');
|
|
180
218
|
});
|
|
181
219
|
|
|
220
|
+
it('dedups a resource-attribute leak to ONE hit even when shared across many spans', async () => {
|
|
221
|
+
collector = new OtelCollector();
|
|
222
|
+
await collector.start();
|
|
223
|
+
// 5 spans sharing one resource whose attribute leaks the canary. Without
|
|
224
|
+
// dedup this would report 5 identical resource-attribute hits.
|
|
225
|
+
await postTraces(multiSpanSharedResourcePayload(5, { 'service.name': 'host', 'deployment.token': CANARY }));
|
|
226
|
+
|
|
227
|
+
const leaks = collector.findCanaryLeakage(CANARY);
|
|
228
|
+
const resourceLeaks = leaks.filter((l) => l.surface === 'span.resourceAttribute' && l.key === 'deployment.token');
|
|
229
|
+
expect(resourceLeaks.length).toBe(1);
|
|
230
|
+
});
|
|
231
|
+
|
|
182
232
|
it('reports ZERO hits when the host redacts the canary before export (positive control)', async () => {
|
|
183
233
|
collector = new OtelCollector();
|
|
184
234
|
await collector.start();
|
|
@@ -109,17 +109,42 @@ async function readEvents(runId: string): Promise<RunEventDoc[]> {
|
|
|
109
109
|
}
|
|
110
110
|
|
|
111
111
|
/**
|
|
112
|
-
*
|
|
113
|
-
*
|
|
114
|
-
*
|
|
115
|
-
*
|
|
112
|
+
* Volatile field names that differ legitimately between an original run and
|
|
113
|
+
* its replay: freshly-minted event ids/ULIDs, the run id, and per-region
|
|
114
|
+
* clock fields (RFC 0036 §E carve-out). Stripped wherever they appear —
|
|
115
|
+
* including NESTED inside payloads — so the byte-equivalence comparison
|
|
116
|
+
* tolerates only these carve-outs and flags any other divergence.
|
|
116
117
|
*/
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
118
|
+
const VOLATILE_KEYS = new Set([
|
|
119
|
+
'eventId',
|
|
120
|
+
'runId',
|
|
121
|
+
'observedAt',
|
|
122
|
+
'timestamp',
|
|
123
|
+
'occurredAt',
|
|
124
|
+
'emittedAt',
|
|
125
|
+
'id',
|
|
126
|
+
]);
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Recursively strip {@link VOLATILE_KEYS} from an event so two runs of the
|
|
130
|
+
* same workflow are comparable. Recurses into nested objects + arrays (a
|
|
131
|
+
* host that buries a clock or ULID inside a payload is normalized too),
|
|
132
|
+
* leaving every non-volatile field intact for the equivalence assertion.
|
|
133
|
+
*/
|
|
134
|
+
function stripVolatile(ev: RunEventDoc): unknown {
|
|
135
|
+
const walk = (node: unknown): unknown => {
|
|
136
|
+
if (Array.isArray(node)) return node.map(walk);
|
|
137
|
+
if (node !== null && typeof node === 'object') {
|
|
138
|
+
const out: Record<string, unknown> = {};
|
|
139
|
+
for (const [k, v] of Object.entries(node as Record<string, unknown>)) {
|
|
140
|
+
if (VOLATILE_KEYS.has(k)) continue;
|
|
141
|
+
out[k] = walk(v);
|
|
142
|
+
}
|
|
143
|
+
return out;
|
|
144
|
+
}
|
|
145
|
+
return node;
|
|
146
|
+
};
|
|
147
|
+
return walk(JSON.parse(JSON.stringify(ev)));
|
|
123
148
|
}
|
|
124
149
|
|
|
125
150
|
/** Create the fixture run; returns null (with a skip) if it isn't advertised. */
|
|
@@ -36,7 +36,7 @@ import {
|
|
|
36
36
|
DELIVERY_OUTCOMES,
|
|
37
37
|
SUBSCRIPTION_STATES,
|
|
38
38
|
} from '../lib/triggerBridge.js';
|
|
39
|
-
import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
|
|
39
|
+
import { queryTestEvents, requireEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
|
|
40
40
|
|
|
41
41
|
const CONTENT_FREE_FORBIDDEN = ['body', 'headers', 'payload', 'secret', 'credentials', 'token', 'apiKey'];
|
|
42
42
|
|
|
@@ -57,69 +57,105 @@ describe('trigger-bridge-delivery (RFC 0083 §C)', () => {
|
|
|
57
57
|
// ---- Leg 1: dedup → effectively-once (§C-1) ---------------------------
|
|
58
58
|
const dedup = await driveDelivery({ scenario: 'dedup', dedupKey: 'conformance-dedup-key', source: 'queue' });
|
|
59
59
|
if (dedup === null) return; // delivery seam unwired — soft-skip the whole behavioral suite
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
60
|
+
|
|
61
|
+
// The profile is derived AND the seam is wired — missing evidence is a
|
|
62
|
+
// FAILURE, not a soft-skip. A repeated dedupKey MUST be effectively-once:
|
|
63
|
+
// EXACTLY one delivered attempt for the key (zero would mean no delivery at all).
|
|
64
|
+
const dedupQueryId = dedup.runId ?? '__dedup__';
|
|
65
|
+
const dedupEvents = requireEvents(
|
|
66
|
+
await queryTestEvents(dedupQueryId, { type: 'trigger.delivery.attempted' }),
|
|
67
|
+
'trigger.delivery.attempted (dedup)',
|
|
68
|
+
);
|
|
69
|
+
const deliveredForKey = dedupEvents.filter(
|
|
70
|
+
(e) => e.payload.dedupKey === 'conformance-dedup-key' && e.payload.outcome === 'delivered',
|
|
71
|
+
);
|
|
72
|
+
expect(
|
|
73
|
+
deliveredForKey.length === 1,
|
|
74
|
+
driver.describe('trigger-bridge.md §C-1', 'a repeated dedupKey MUST be effectively-once — EXACTLY one delivered attempt (not zero, not two)'),
|
|
75
|
+
).toBe(true);
|
|
76
|
+
for (const e of dedupEvents) {
|
|
77
|
+
expect(
|
|
78
|
+
typeof e.payload.outcome === 'string' && DELIVERY_OUTCOMES.includes(e.payload.outcome as string),
|
|
79
|
+
driver.describe('run-event-payloads.schema.json#triggerDeliveryAttempted', 'outcome MUST be delivered|retrying|dead-lettered'),
|
|
80
|
+
).toBe(true);
|
|
81
|
+
expectContentFree(e.payload, 'trigger.delivery.attempted');
|
|
81
82
|
}
|
|
82
83
|
|
|
83
84
|
// ---- Leg 2: retry → dead-letter (§C-2 + RFC 0053) --------------------
|
|
84
85
|
const exhaust = await driveDelivery({ scenario: 'exhaust', source: 'webhook' });
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
86
|
+
expect(
|
|
87
|
+
exhaust !== null,
|
|
88
|
+
driver.describe('trigger-bridge.md §C-2', 'the exhaust scenario MUST be wired when the delivery seam is'),
|
|
89
|
+
).toBe(true);
|
|
90
|
+
const exKey = exhaust!.runId ?? '__exhaust__';
|
|
91
|
+
const exhaustEvents = requireEvents(
|
|
92
|
+
await queryTestEvents(exKey, { type: 'trigger.delivery.attempted' }),
|
|
93
|
+
'trigger.delivery.attempted (exhaust)',
|
|
94
|
+
);
|
|
95
|
+
expect(
|
|
96
|
+
exhaustEvents.length >= 1,
|
|
97
|
+
driver.describe('trigger-bridge.md §C-2', 'an exhausted delivery MUST emit ≥1 trigger.delivery.attempted'),
|
|
98
|
+
).toBe(true);
|
|
99
|
+
const terminal = exhaustEvents.sort((a, b) => a.sequence - b.sequence)[exhaustEvents.length - 1]!;
|
|
100
|
+
expect(
|
|
101
|
+
terminal.payload.outcome === 'dead-lettered',
|
|
102
|
+
driver.describe('trigger-bridge.md §C-2', 'an exhausted retry policy MUST terminate in a dead-lettered delivery'),
|
|
103
|
+
).toBe(true);
|
|
104
|
+
const stateEvents = requireEvents(
|
|
105
|
+
await queryTestEvents(exKey, { type: 'trigger.subscription.state.changed' }),
|
|
106
|
+
'trigger.subscription.state.changed (exhaust)',
|
|
107
|
+
);
|
|
108
|
+
expect(
|
|
109
|
+
stateEvents.length >= 1,
|
|
110
|
+
driver.describe('trigger-bridge.md §B', 'exhaustion MUST emit ≥1 trigger.subscription.state.changed'),
|
|
111
|
+
).toBe(true);
|
|
112
|
+
expect(
|
|
113
|
+
stateEvents.some((e) => e.payload.toState === 'dead-lettered'),
|
|
114
|
+
driver.describe('trigger-bridge.md §B', 'the subscription MUST transition to dead-lettered on exhaustion'),
|
|
115
|
+
).toBe(true);
|
|
116
|
+
for (const e of stateEvents) {
|
|
117
|
+
expect(
|
|
118
|
+
typeof e.payload.toState === 'string' && SUBSCRIPTION_STATES.includes(e.payload.toState as string),
|
|
119
|
+
driver.describe('trigger-bridge.md §B', 'toState MUST be in the four-state vocabulary'),
|
|
120
|
+
).toBe(true);
|
|
121
|
+
expectContentFree(e.payload, 'trigger.subscription.state.changed');
|
|
110
122
|
}
|
|
111
123
|
|
|
112
124
|
// ---- Leg 3: delivery → run causation (§C / RFC 0040) -----------------
|
|
125
|
+
// §C: "the run started by a successful delivery MUST carry the delivery's
|
|
126
|
+
// id as causationId on its run.started." The delivery's id is the
|
|
127
|
+
// trigger.delivery.attempted{delivered} event's id, so we assert EQUALITY
|
|
128
|
+
// (not merely "a causation id exists") — the trigger→run link MUST resolve.
|
|
113
129
|
const delivered = await driveDelivery({ scenario: 'deliver', source: 'schedule' });
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
130
|
+
expect(
|
|
131
|
+
delivered !== null && typeof delivered.runId === 'string' && (delivered.runId as string).length > 0,
|
|
132
|
+
driver.describe('trigger-bridge.md §C', 'a successful delivery MUST create a run'),
|
|
133
|
+
).toBe(true);
|
|
134
|
+
const deliveredRunId = delivered!.runId as string;
|
|
135
|
+
const attemptEvents = requireEvents(
|
|
136
|
+
await queryTestEvents(deliveredRunId, { type: 'trigger.delivery.attempted' }),
|
|
137
|
+
'trigger.delivery.attempted (deliver)',
|
|
138
|
+
);
|
|
139
|
+
const deliveredEvent = attemptEvents.find((e) => e.payload.outcome === 'delivered');
|
|
140
|
+
expect(
|
|
141
|
+
deliveredEvent !== undefined,
|
|
142
|
+
driver.describe('trigger-bridge.md §C-1', 'a successful delivery MUST emit a trigger.delivery.attempted{outcome:delivered}'),
|
|
143
|
+
).toBe(true);
|
|
144
|
+
const runStartedEvents = requireEvents(
|
|
145
|
+
await queryTestEvents(deliveredRunId, { type: 'run.started' }),
|
|
146
|
+
'run.started (deliver)',
|
|
147
|
+
);
|
|
148
|
+
expect(
|
|
149
|
+
runStartedEvents.length >= 1,
|
|
150
|
+
driver.describe('trigger-bridge.md §C', 'a delivered run MUST emit run.started'),
|
|
151
|
+
).toBe(true);
|
|
152
|
+
const runStarted = runStartedEvents.sort((a, b) => a.sequence - b.sequence)[0]!;
|
|
153
|
+
expect(
|
|
154
|
+
typeof runStarted.causationId === 'string' &&
|
|
155
|
+
(runStarted.causationId as string).length > 0 &&
|
|
156
|
+
runStarted.causationId === deliveredEvent!.eventId,
|
|
157
|
+
driver.describe('trigger-bridge.md §C / RFC 0040', 'run.started.causationId MUST EQUAL the delivery id (the trigger.delivery.attempted{delivered} eventId) — resolvable via /ancestry'),
|
|
158
|
+
).toBe(true);
|
|
123
159
|
|
|
124
160
|
await resetTestSeam();
|
|
125
161
|
});
|