@openwop/openwop-conformance 1.10.0 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/README.md +2 -2
- package/api/asyncapi.yaml +70 -0
- package/api/openapi.yaml +268 -1
- package/coverage.md +30 -2
- package/fixtures/oauth-providers/synthetic.json +38 -0
- package/fixtures.md +10 -0
- package/package.json +1 -1
- package/schemas/README.md +12 -0
- package/schemas/agent-deployment-transition.schema.json +49 -0
- package/schemas/agent-deployment.schema.json +54 -0
- package/schemas/agent-eval-suite.schema.json +140 -0
- package/schemas/agent-inventory-response.schema.json +25 -0
- package/schemas/agent-manifest.schema.json +5 -0
- package/schemas/agent-org-chart.schema.json +82 -0
- package/schemas/agent-ref.schema.json +12 -2
- package/schemas/agent-roster-entry.schema.json +81 -0
- package/schemas/agent-roster-response.schema.json +21 -0
- package/schemas/budget-policy.schema.json +18 -0
- package/schemas/capabilities.schema.json +277 -0
- package/schemas/credential-provenance.schema.json +18 -0
- package/schemas/eval-summary.schema.json +92 -0
- package/schemas/node-pack-manifest.schema.json +17 -0
- package/schemas/org-chart-responsibility-view.schema.json +26 -0
- package/schemas/run-event-payloads.schema.json +286 -3
- package/schemas/run-event.schema.json +19 -0
- package/schemas/tool-descriptor.schema.json +63 -0
- package/schemas/trigger-subscription.schema.json +26 -0
- package/src/lib/agentRoster.ts +76 -0
- package/src/lib/liveRuntime.ts +59 -0
- package/src/lib/profiles.ts +157 -0
- package/src/lib/runtimeRequires.ts +38 -0
- package/src/lib/safeFetch.ts +87 -0
- package/src/scenarios/agent-deployment-shape.test.ts +139 -0
- package/src/scenarios/agent-eval-suite-shape.test.ts +167 -0
- package/src/scenarios/agent-live-allowlist-enforced.test.ts +53 -0
- package/src/scenarios/agent-live-invocation-bracket.test.ts +98 -0
- package/src/scenarios/agent-live-runtime-shape.test.ts +98 -0
- package/src/scenarios/agent-live-structured-output.test.ts +58 -0
- package/src/scenarios/agent-org-chart-shape.test.ts +127 -0
- package/src/scenarios/agent-platform-profile.test.ts +158 -0
- package/src/scenarios/agent-roster-attribution.test.ts +179 -0
- package/src/scenarios/agent-roster-shape.test.ts +146 -0
- package/src/scenarios/budget-policy-shape.test.ts +136 -0
- package/src/scenarios/egress-provenance-shape.test.ts +137 -0
- package/src/scenarios/memory-capability-model-shape.test.ts +186 -0
- package/src/scenarios/oauth-authorization-code-roundtrip.test.ts +145 -0
- package/src/scenarios/runtime-requires-install-gate.test.ts +92 -0
- package/src/scenarios/runtime-requires-shape.test.ts +134 -0
- package/src/scenarios/safefetch-behavior.test.ts +99 -0
- package/src/scenarios/safefetch-live-audit.test.ts +175 -0
- package/src/scenarios/spec-corpus-validity.test.ts +19 -3
- package/src/scenarios/tool-descriptor-shape.test.ts +133 -0
- package/src/scenarios/trigger-bridge-shape.test.ts +135 -0
- package/src/scenarios/x-openwop-form-pack-manifest.test.ts +155 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent evaluation — suite + summary + event shapes (RFC 0081).
|
|
3
|
+
*
|
|
4
|
+
* Always-on, server-free schema-shape probe. Verifies that:
|
|
5
|
+
* - `capabilities.agents.evalSuite` is declared with its `supported` / `modes`
|
|
6
|
+
* sub-flags.
|
|
7
|
+
* - the `AgentEvalSuite` + `EvalSummary` schemas compile and round-trip a
|
|
8
|
+
* conforming artifact, and reject malformed ones (a bad `suiteId`; a
|
|
9
|
+
* `thresholds.passScore` out of 0..1).
|
|
10
|
+
* - the `eval.started` / `eval.scored` / `eval.completed` payload $defs
|
|
11
|
+
* validate conforming content-free payloads and reject malformed ones.
|
|
12
|
+
* - both the summary and the per-task `eval.scored` payload are CONTENT-FREE:
|
|
13
|
+
* an `EvalSummary` carrying a task-output body and a `safetyFinding` carrying
|
|
14
|
+
* an excerpt are rejected. This is the public test for the protocol-tier
|
|
15
|
+
* SECURITY invariant `eval-summary-no-content-leak`.
|
|
16
|
+
* - all three event names appear in the RunEventType enum.
|
|
17
|
+
*
|
|
18
|
+
* Behavioral assertions (the eval-run event ordering, per-task scoring, the
|
|
19
|
+
* EvalSummary round-trip against a live host, the `mode: "eval"` 501 on
|
|
20
|
+
* unadvertised hosts) are gated on `capabilities.agents.evalSuite.supported` and
|
|
21
|
+
* land in `agent-eval-run.test.ts` (deferred per RFC 0081 §Conformance — reference
|
|
22
|
+
* host deferred). This scenario asserts the wire contract, not host behavior.
|
|
23
|
+
*
|
|
24
|
+
* Spec references:
|
|
25
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/agent-evaluation.md
|
|
26
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0081-agent-evaluation-and-scorecards.md
|
|
27
|
+
* - https://github.com/openwop/openwop/blob/main/SECURITY/invariants.yaml (eval-summary-no-content-leak)
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
import { describe, it, expect } from 'vitest';
|
|
31
|
+
import { readFileSync } from 'node:fs';
|
|
32
|
+
import { join } from 'node:path';
|
|
33
|
+
import Ajv2020 from 'ajv/dist/2020.js';
|
|
34
|
+
import addFormats from 'ajv-formats';
|
|
35
|
+
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
36
|
+
|
|
37
|
+
/** Server-free assertion-message helper (mirrors driver.describe's "spec — requirement" shape without requiring OPENWOP_BASE_URL). */
|
|
38
|
+
const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
|
|
39
|
+
|
|
40
|
+
function loadSchema(name: string): Record<string, unknown> {
|
|
41
|
+
return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
describe('agent-eval-suite-shape: capability advertisement (RFC 0081, server-free)', () => {
|
|
45
|
+
it('the capabilities schema declares agents.evalSuite with its sub-flags', () => {
|
|
46
|
+
const caps = loadSchema('capabilities.schema.json');
|
|
47
|
+
const agents = (caps.properties as Record<string, { properties?: Record<string, { properties?: Record<string, unknown> }> }>).agents;
|
|
48
|
+
const evalSuite = agents?.properties?.evalSuite;
|
|
49
|
+
expect(
|
|
50
|
+
evalSuite,
|
|
51
|
+
why('capabilities.md §agents', 'agents.evalSuite MUST be declared'),
|
|
52
|
+
).toBeDefined();
|
|
53
|
+
for (const flag of ['supported', 'modes']) {
|
|
54
|
+
expect(
|
|
55
|
+
evalSuite?.properties?.[flag],
|
|
56
|
+
why('agent-evaluation.md §Capability advertisement', `agents.evalSuite.${flag} MUST be declared`),
|
|
57
|
+
).toBeDefined();
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
describe('agent-eval-suite-shape: AgentEvalSuite + EvalSummary schemas (RFC 0081, server-free)', () => {
|
|
63
|
+
const ajv = new Ajv2020({ strict: false, allErrors: true });
|
|
64
|
+
addFormats(ajv);
|
|
65
|
+
const suite = ajv.compile(loadSchema('agent-eval-suite.schema.json'));
|
|
66
|
+
const summary = ajv.compile(loadSchema('eval-summary.schema.json'));
|
|
67
|
+
|
|
68
|
+
it('AgentEvalSuite validates a conforming suite and rejects a malformed suiteId / out-of-range threshold', () => {
|
|
69
|
+
const good = {
|
|
70
|
+
suiteId: 'core.openwop.evals.support-resolver',
|
|
71
|
+
version: '1.0.0',
|
|
72
|
+
modes: ['golden', 'regression'],
|
|
73
|
+
thresholds: { passScore: 0.8 },
|
|
74
|
+
tasks: [
|
|
75
|
+
{ taskId: 'refund-window', input: { q: 'refund policy?' }, expected: { kind: 'golden', match: { strategy: 'contains', value: '30 days' } } },
|
|
76
|
+
],
|
|
77
|
+
};
|
|
78
|
+
expect(suite(good), why('RFC 0081 §A', 'a conforming AgentEvalSuite MUST validate')).toBe(true);
|
|
79
|
+
// Negative: suiteId must carry the `.evals.` infix.
|
|
80
|
+
expect(suite({ ...good, suiteId: 'core.openwop.support-resolver' }), why('RFC 0081 §A', 'a suiteId without the `.evals.` infix MUST be rejected')).toBe(false);
|
|
81
|
+
// Negative: passScore out of 0..1.
|
|
82
|
+
expect(suite({ ...good, thresholds: { passScore: 1.5 } }), why('RFC 0081 §A', 'thresholds.passScore > 1 MUST be rejected')).toBe(false);
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it('EvalSummary validates a conforming scorecard and rejects an out-of-range score', () => {
|
|
86
|
+
const good = {
|
|
87
|
+
suiteId: 'core.openwop.evals.support-resolver',
|
|
88
|
+
suiteVersion: '1.0.0',
|
|
89
|
+
aggregateScore: 0.86,
|
|
90
|
+
passed: true,
|
|
91
|
+
taskCount: 2,
|
|
92
|
+
passedCount: 2,
|
|
93
|
+
tasks: [{ taskId: 'refund-window', score: 0.9, passed: true, safetyFindings: [{ kind: 'jailbreak', severity: 'low' }] }],
|
|
94
|
+
};
|
|
95
|
+
expect(summary(good), why('RFC 0081 §C', 'a conforming EvalSummary MUST validate')).toBe(true);
|
|
96
|
+
expect(summary({ ...good, aggregateScore: 1.4 }), why('RFC 0081 §C', 'aggregateScore > 1 MUST be rejected')).toBe(false);
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it('EvalSummary is content-free — a task-output body and a safety-finding excerpt are rejected (eval-summary-no-content-leak)', () => {
|
|
100
|
+
const base = { suiteId: 'core.openwop.evals.x', suiteVersion: '1.0.0', aggregateScore: 0.5, passed: false, taskCount: 1, passedCount: 0 };
|
|
101
|
+
// Negative: a per-task entry carrying the output body.
|
|
102
|
+
expect(
|
|
103
|
+
summary({ ...base, tasks: [{ taskId: 't1', score: 0.5, passed: false, taskOutput: 'the model said …' }] }),
|
|
104
|
+
why('SECURITY invariant eval-summary-no-content-leak', 'an EvalSummary task entry MUST NOT carry an output body'),
|
|
105
|
+
).toBe(false);
|
|
106
|
+
// Negative: a safety finding carrying excerpted content rather than a {kind, severity} descriptor.
|
|
107
|
+
expect(
|
|
108
|
+
summary({ ...base, tasks: [{ taskId: 't1', score: 0.5, passed: false, safetyFindings: [{ kind: 'pii-leak', severity: 'high', excerpt: 'SSN 123-45-6789' }] }] }),
|
|
109
|
+
why('SECURITY invariant eval-summary-no-content-leak', 'a safetyFinding MUST NOT carry excerpted content'),
|
|
110
|
+
).toBe(false);
|
|
111
|
+
});
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
describe('agent-eval-suite-shape: eval event payloads (RFC 0081, server-free)', () => {
|
|
115
|
+
const payloads = loadSchema('run-event-payloads.schema.json');
|
|
116
|
+
const ajv = new Ajv2020({ strict: false, allErrors: true });
|
|
117
|
+
addFormats(ajv);
|
|
118
|
+
ajv.addSchema(payloads, 'payloads');
|
|
119
|
+
|
|
120
|
+
const started = ajv.getSchema('payloads#/$defs/evalStarted');
|
|
121
|
+
const scored = ajv.getSchema('payloads#/$defs/evalScored');
|
|
122
|
+
const completed = ajv.getSchema('payloads#/$defs/evalCompleted');
|
|
123
|
+
|
|
124
|
+
it('eval.started validates a content-free start record and requires the suite provenance', () => {
|
|
125
|
+
expect(started, 'the evalStarted $def MUST exist').toBeTruthy();
|
|
126
|
+
expect(
|
|
127
|
+
started!({ suiteId: 'core.openwop.evals.support-resolver', suiteVersion: '1.0.0', taskCount: 12, modes: ['golden'] }),
|
|
128
|
+
why('RFC 0081 §C', 'a conforming eval.started payload MUST validate'),
|
|
129
|
+
).toBe(true);
|
|
130
|
+
expect(
|
|
131
|
+
started!({ suiteId: 'core.openwop.evals.x' }),
|
|
132
|
+
why('RFC 0081 §C', 'eval.started without suiteVersion/taskCount/modes MUST be rejected'),
|
|
133
|
+
).toBe(false);
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
it('eval.scored validates a content-free per-task score and requires score + passed', () => {
|
|
137
|
+
expect(scored, 'the evalScored $def MUST exist').toBeTruthy();
|
|
138
|
+
expect(
|
|
139
|
+
scored!({ taskId: 'refund-window', score: 0.9, passed: true, costUsd: 0.012 }),
|
|
140
|
+
why('RFC 0081 §C', 'a conforming eval.scored payload MUST validate'),
|
|
141
|
+
).toBe(true);
|
|
142
|
+
expect(
|
|
143
|
+
scored!({ taskId: 'refund-window' }),
|
|
144
|
+
why('RFC 0081 §C', 'eval.scored without score/passed MUST be rejected'),
|
|
145
|
+
).toBe(false);
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
it('eval.completed validates a content-free aggregate record', () => {
|
|
149
|
+
expect(completed, 'the evalCompleted $def MUST exist').toBeTruthy();
|
|
150
|
+
expect(
|
|
151
|
+
completed!({ aggregateScore: 0.86, passed: true, taskCount: 12, passedCount: 11, regressionVsBaseline: 0.04 }),
|
|
152
|
+
why('RFC 0081 §C', 'a conforming eval.completed payload MUST validate'),
|
|
153
|
+
).toBe(true);
|
|
154
|
+
expect(
|
|
155
|
+
completed!({ aggregateScore: 2 }),
|
|
156
|
+
why('RFC 0081 §C', 'eval.completed with an out-of-range aggregateScore MUST be rejected'),
|
|
157
|
+
).toBe(false);
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
it('all three eval event names appear in the RunEventType enum', () => {
|
|
161
|
+
const runEvent = loadSchema('run-event.schema.json');
|
|
162
|
+
const enumVals = (runEvent.$defs as Record<string, { enum?: string[] }>).RunEventType?.enum ?? [];
|
|
163
|
+
expect(enumVals).toContain('eval.started');
|
|
164
|
+
expect(enumVals).toContain('eval.scored');
|
|
165
|
+
expect(enumVals).toContain('eval.completed');
|
|
166
|
+
});
|
|
167
|
+
});
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live manifest-dispatch tool-allowlist enforcement (RFC 0077 §F-1) —
|
|
3
|
+
* behavioral.
|
|
4
|
+
*
|
|
5
|
+
* Gated on `capabilities.agents.liveRuntime.supported` (root-first per RFC 0073).
|
|
6
|
+
* Soft-skips when unadvertised (default) / hard-fails under
|
|
7
|
+
* `OPENWOP_REQUIRE_BEHAVIOR=true`.
|
|
8
|
+
*
|
|
9
|
+
* Asserts the §F-1 safety carry-forward: a live invocation MUST NOT call a tool
|
|
10
|
+
* outside the agent's `toolAllowlist` (the per-tool application of the RFC 0002
|
|
11
|
+
* §A14 mandatory-allowlist floor). Driven by the `attemptTool` seam param naming
|
|
12
|
+
* a disallowed tool; the invocation MUST NOT emit an `agent.toolCalled` for it
|
|
13
|
+
* (a refused/failed outcome is acceptable, a silent successful call is not).
|
|
14
|
+
* Soft-skips when the seam/hook is unwired.
|
|
15
|
+
*
|
|
16
|
+
* Spec references:
|
|
17
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/multi-agent-execution.md (§"Live manifest dispatch")
|
|
18
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0077-agent-run-lifecycle-and-live-manifest-dispatch.md (§F-1)
|
|
19
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0002-agent-identity-and-handoff.md (§A14 toolAllowlist)
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { describe, it, expect } from 'vitest';
|
|
23
|
+
import { driver } from '../lib/driver.js';
|
|
24
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
25
|
+
import { readLiveRuntimeCap, invokeLive } from '../lib/liveRuntime.js';
|
|
26
|
+
import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
|
|
27
|
+
|
|
28
|
+
const DISALLOWED_TOOL = 'conformance-disallowed-tool';
|
|
29
|
+
|
|
30
|
+
describe('agent-live-allowlist-enforced (RFC 0077 §F-1)', () => {
|
|
31
|
+
it('does not call a tool outside the agent toolAllowlist', async () => {
|
|
32
|
+
const cap = await readLiveRuntimeCap();
|
|
33
|
+
if (!behaviorGate('openwop-live-allowlist-enforced', cap?.supported === true)) return;
|
|
34
|
+
|
|
35
|
+
if (!(await isEventLogSeamAvailable())) return; // soft-skip
|
|
36
|
+
const res = await invokeLive({ source: 'run-api', attemptTool: DISALLOWED_TOOL });
|
|
37
|
+
if (res === null || !res.runId) return; // seam/hook absent — soft-skip
|
|
38
|
+
|
|
39
|
+
const q = await queryTestEvents(res.runId, { type: 'agent.toolCalled' });
|
|
40
|
+
if (!q.ok) return;
|
|
41
|
+
|
|
42
|
+
const calledDisallowed = q.events.some((e) => {
|
|
43
|
+
const tool = e.payload.tool ?? e.payload.toolId ?? e.payload.name;
|
|
44
|
+
return tool === DISALLOWED_TOOL;
|
|
45
|
+
});
|
|
46
|
+
expect(
|
|
47
|
+
calledDisallowed === false,
|
|
48
|
+
driver.describe('RFC 0077 §F-1 / RFC 0002 §A14', 'a live invocation MUST NOT call a tool outside the agent toolAllowlist'),
|
|
49
|
+
).toBe(true);
|
|
50
|
+
|
|
51
|
+
await resetTestSeam();
|
|
52
|
+
});
|
|
53
|
+
});
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live manifest-dispatch invocation bracket (RFC 0077 §E) — behavioral.
|
|
3
|
+
*
|
|
4
|
+
* Gated on `capabilities.agents.liveRuntime.supported` (root-first per RFC 0073).
|
|
5
|
+
* Soft-skips when unadvertised (default) / hard-fails under
|
|
6
|
+
* `OPENWOP_REQUIRE_BEHAVIOR=true`. The always-on wire-shape coverage lives in
|
|
7
|
+
* `agent-live-runtime-shape.test.ts`; this asserts host BEHAVIOR: a live
|
|
8
|
+
* invocation brackets its `agent.*` family with
|
|
9
|
+
* `agent.invocation.started` (FIRST agent-scoped event) and
|
|
10
|
+
* `agent.invocation.completed` (LAST), with a matching `invocationId`, a
|
|
11
|
+
* `source` in the enum, an `outcome` in the enum, and both events content-free
|
|
12
|
+
* (no prompt/result body).
|
|
13
|
+
*
|
|
14
|
+
* Drives the OPTIONAL `POST /v1/host/sample/agents/live-invoke` seam + reads the
|
|
15
|
+
* bracket back via the test event-log seam (both deferred per RFC 0077
|
|
16
|
+
* §Conformance — soft-skip on 404).
|
|
17
|
+
*
|
|
18
|
+
* Spec references:
|
|
19
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/multi-agent-execution.md (§"Live manifest dispatch")
|
|
20
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0077-agent-run-lifecycle-and-live-manifest-dispatch.md
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { describe, it, expect } from 'vitest';
|
|
24
|
+
import { driver } from '../lib/driver.js';
|
|
25
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
26
|
+
import { readLiveRuntimeCap, invokeLive } from '../lib/liveRuntime.js';
|
|
27
|
+
import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
|
|
28
|
+
|
|
29
|
+
const SOURCES = ['workflow-node', 'run-api', 'chat-mention'];
|
|
30
|
+
const OUTCOMES = ['completed', 'handed-off', 'escalated', 'refused', 'failed'];
|
|
31
|
+
const AGENT_SCOPED = (t: string): boolean => t === 'agent.invocation.started' || t === 'agent.invocation.completed' || t.startsWith('agent.');
|
|
32
|
+
|
|
33
|
+
describe('agent-live-invocation-bracket (RFC 0077 §E)', () => {
|
|
34
|
+
it('brackets a live invocation with started-first / completed-last + matching invocationId, content-free', async () => {
|
|
35
|
+
const cap = await readLiveRuntimeCap();
|
|
36
|
+
if (!behaviorGate('openwop-live-invocation-bracket', cap?.supported === true)) return;
|
|
37
|
+
|
|
38
|
+
if (!(await isEventLogSeamAvailable())) return; // event-log seam absent — soft-skip
|
|
39
|
+
const res = await invokeLive({ source: 'run-api' });
|
|
40
|
+
if (res === null || !res.runId) return; // live-invoke seam absent — soft-skip
|
|
41
|
+
|
|
42
|
+
const q = await queryTestEvents(res.runId);
|
|
43
|
+
if (!q.ok) return;
|
|
44
|
+
const events = q.events.slice().sort((a, b) => a.sequence - b.sequence);
|
|
45
|
+
|
|
46
|
+
const started = events.filter((e) => e.type === 'agent.invocation.started');
|
|
47
|
+
const completed = events.filter((e) => e.type === 'agent.invocation.completed');
|
|
48
|
+
expect(
|
|
49
|
+
started.length >= 1 && completed.length >= 1,
|
|
50
|
+
driver.describe('multi-agent-execution.md §"Live manifest dispatch"', 'a live invocation MUST emit agent.invocation.started + agent.invocation.completed'),
|
|
51
|
+
).toBe(true);
|
|
52
|
+
if (started.length === 0 || completed.length === 0) return;
|
|
53
|
+
|
|
54
|
+
const start = started[0]!;
|
|
55
|
+
const end = completed[completed.length - 1]!;
|
|
56
|
+
|
|
57
|
+
// §E ordering: started is the FIRST agent-scoped event, completed the LAST.
|
|
58
|
+
const agentScoped = events.filter((e) => AGENT_SCOPED(e.type));
|
|
59
|
+
expect(
|
|
60
|
+
agentScoped[0]?.type === 'agent.invocation.started',
|
|
61
|
+
driver.describe('RFC 0077 §E', 'agent.invocation.started MUST be the first agent-scoped event of the invocation'),
|
|
62
|
+
).toBe(true);
|
|
63
|
+
expect(
|
|
64
|
+
agentScoped[agentScoped.length - 1]?.type === 'agent.invocation.completed',
|
|
65
|
+
driver.describe('RFC 0077 §E', 'agent.invocation.completed MUST be the last agent-scoped event of the invocation'),
|
|
66
|
+
).toBe(true);
|
|
67
|
+
|
|
68
|
+
// Matching invocationId across the bracket.
|
|
69
|
+
const startId = start.payload.invocationId;
|
|
70
|
+
const endId = end.payload.invocationId;
|
|
71
|
+
expect(
|
|
72
|
+
typeof startId === 'string' && startId === endId,
|
|
73
|
+
driver.describe('run-event-payloads.schema.json#agentInvocation*', 'the bracket MUST share one invocationId'),
|
|
74
|
+
).toBe(true);
|
|
75
|
+
|
|
76
|
+
// Enum discipline.
|
|
77
|
+
expect(
|
|
78
|
+
typeof start.payload.source === 'string' && SOURCES.includes(start.payload.source as string),
|
|
79
|
+
driver.describe('run-event-payloads.schema.json#agentInvocationStarted', 'source MUST be workflow-node|run-api|chat-mention'),
|
|
80
|
+
).toBe(true);
|
|
81
|
+
expect(
|
|
82
|
+
typeof end.payload.outcome === 'string' && OUTCOMES.includes(end.payload.outcome as string),
|
|
83
|
+
driver.describe('run-event-payloads.schema.json#agentInvocationCompleted', 'outcome MUST be in the closed enum'),
|
|
84
|
+
).toBe(true);
|
|
85
|
+
|
|
86
|
+
// Content-free: identifiers + metadata only, never prompt/result body.
|
|
87
|
+
for (const evt of [start, end]) {
|
|
88
|
+
for (const forbidden of ['prompt', 'result', 'body', 'input', 'output', 'apiKey', 'secret', 'credentials', 'token']) {
|
|
89
|
+
expect(
|
|
90
|
+
!(forbidden in evt.payload),
|
|
91
|
+
driver.describe('RFC 0077', `agent.invocation.* MUST be content-free (no ${forbidden})`),
|
|
92
|
+
).toBe(true);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
await resetTestSeam();
|
|
97
|
+
});
|
|
98
|
+
});
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live manifest dispatch — capability + invocation-event shapes (RFC 0077).
|
|
3
|
+
*
|
|
4
|
+
* Always-on, server-free schema-shape probe. Verifies that:
|
|
5
|
+
* - `capabilities.agents.liveRuntime` is declared on the capabilities schema
|
|
6
|
+
* (with the `supported` / `structuredOutput` / `confidenceEscalation` /
|
|
7
|
+
* `sources` sub-flags).
|
|
8
|
+
* - the `agent.invocation.started` + `agent.invocation.completed` payload
|
|
9
|
+
* $defs validate conforming content-free payloads and reject malformed
|
|
10
|
+
* ones (a `started` missing `source`; a `completed` with an out-of-enum
|
|
11
|
+
* `outcome`).
|
|
12
|
+
* - both event names appear in the RunEventType enum.
|
|
13
|
+
*
|
|
14
|
+
* Behavioral assertions (the started→completed bracket ordering, structured-
|
|
15
|
+
* output enforcement, toolAllowlist enforcement) are gated on
|
|
16
|
+
* `capabilities.agents.liveRuntime.supported` and soft-skip until a reference
|
|
17
|
+
* host wires the live-invoke seam (RFC 0077 §Conformance — reference host
|
|
18
|
+
* deferred). This scenario asserts the wire contract, not host behavior.
|
|
19
|
+
*
|
|
20
|
+
* Spec references:
|
|
21
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/multi-agent-execution.md §"Live manifest dispatch"
|
|
22
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0077-agent-run-lifecycle-and-live-manifest-dispatch.md
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import { describe, it, expect } from 'vitest';
|
|
26
|
+
import { readFileSync } from 'node:fs';
|
|
27
|
+
import { join } from 'node:path';
|
|
28
|
+
import Ajv2020 from 'ajv/dist/2020.js';
|
|
29
|
+
import addFormats from 'ajv-formats';
|
|
30
|
+
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
31
|
+
|
|
32
|
+
/** Server-free assertion-message helper (mirrors driver.describe's "spec — requirement" shape without requiring OPENWOP_BASE_URL). */
|
|
33
|
+
const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
|
|
34
|
+
|
|
35
|
+
function loadSchema(name: string): Record<string, unknown> {
|
|
36
|
+
return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
describe('agent-live-runtime-shape: capability advertisement (RFC 0077, server-free)', () => {
|
|
40
|
+
it('the capabilities schema declares agents.liveRuntime with its sub-flags', () => {
|
|
41
|
+
const caps = loadSchema('capabilities.schema.json');
|
|
42
|
+
const agents = (caps.properties as Record<string, { properties?: Record<string, { properties?: Record<string, unknown> }> }>).agents;
|
|
43
|
+
const live = agents?.properties?.liveRuntime;
|
|
44
|
+
expect(
|
|
45
|
+
live,
|
|
46
|
+
why('capabilities.md §agents', 'agents.liveRuntime MUST be declared'),
|
|
47
|
+
).toBeDefined();
|
|
48
|
+
for (const flag of ['supported', 'structuredOutput', 'confidenceEscalation', 'sources']) {
|
|
49
|
+
expect(
|
|
50
|
+
live?.properties?.[flag],
|
|
51
|
+
why('multi-agent-execution.md §Live manifest dispatch', `agents.liveRuntime.${flag} MUST be declared`),
|
|
52
|
+
).toBeDefined();
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
describe('agent-live-runtime-shape: invocation event payloads (RFC 0077, server-free)', () => {
|
|
58
|
+
const payloads = loadSchema('run-event-payloads.schema.json');
|
|
59
|
+
const ajv = new Ajv2020({ strict: false, allErrors: true });
|
|
60
|
+
addFormats(ajv);
|
|
61
|
+
ajv.addSchema(payloads, 'payloads');
|
|
62
|
+
|
|
63
|
+
const started = ajv.getSchema('payloads#/$defs/agentInvocationStarted');
|
|
64
|
+
const completed = ajv.getSchema('payloads#/$defs/agentInvocationCompleted');
|
|
65
|
+
|
|
66
|
+
it('agent.invocation.started validates a content-free start record and requires source', () => {
|
|
67
|
+
expect(started, 'the agentInvocationStarted $def MUST exist').toBeTruthy();
|
|
68
|
+
expect(
|
|
69
|
+
started!({ invocationId: 'inv-1', agentId: 'vendor.acme.review.code-reviewer', source: 'run-api', modelClass: 'coding', toolSurfaceCount: 3, memoryBound: false }),
|
|
70
|
+
why('RFC 0077 §C', 'a conforming agent.invocation.started payload MUST validate'),
|
|
71
|
+
).toBe(true);
|
|
72
|
+
// Negative: missing source — every invocation must record its entry point.
|
|
73
|
+
expect(
|
|
74
|
+
started!({ invocationId: 'inv-1', agentId: 'vendor.acme.review.code-reviewer' }),
|
|
75
|
+
why('RFC 0077 §C', 'agent.invocation.started without source MUST be rejected'),
|
|
76
|
+
).toBe(false);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it('agent.invocation.completed validates a content-free outcome record and pins the outcome enum', () => {
|
|
80
|
+
expect(completed, 'the agentInvocationCompleted $def MUST exist').toBeTruthy();
|
|
81
|
+
expect(
|
|
82
|
+
completed!({ invocationId: 'inv-1', agentId: 'vendor.acme.review.code-reviewer', outcome: 'completed', schemaValidated: true, confidence: 0.91 }),
|
|
83
|
+
why('RFC 0077 §C', 'a conforming agent.invocation.completed payload MUST validate'),
|
|
84
|
+
).toBe(true);
|
|
85
|
+
// Negative: out-of-enum outcome — the canonical value is `completed`, not `done`.
|
|
86
|
+
expect(
|
|
87
|
+
completed!({ invocationId: 'inv-1', agentId: 'a', outcome: 'done' }),
|
|
88
|
+
why('RFC 0077 §C', 'agent.invocation.completed with an out-of-enum outcome MUST be rejected'),
|
|
89
|
+
).toBe(false);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it('both invocation event names appear in the RunEventType enum', () => {
|
|
93
|
+
const runEvent = loadSchema('run-event.schema.json');
|
|
94
|
+
const enumVals = (runEvent.$defs as Record<string, { enum?: string[] }>).RunEventType?.enum ?? [];
|
|
95
|
+
expect(enumVals).toContain('agent.invocation.started');
|
|
96
|
+
expect(enumVals).toContain('agent.invocation.completed');
|
|
97
|
+
});
|
|
98
|
+
});
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live manifest-dispatch structured-output enforcement (RFC 0077 §B step 6) —
|
|
3
|
+
* behavioral.
|
|
4
|
+
*
|
|
5
|
+
* Gated on `capabilities.agents.liveRuntime.structuredOutput` (root-first per
|
|
6
|
+
* RFC 0073) — itself meaningful only alongside `liveRuntime.supported`.
|
|
7
|
+
* Soft-skips when unadvertised (default) / hard-fails under
|
|
8
|
+
* `OPENWOP_REQUIRE_BEHAVIOR=true`.
|
|
9
|
+
*
|
|
10
|
+
* Asserts the §B step-6 MUST: when the host advertises `structuredOutput` and an
|
|
11
|
+
* agent declares a `handoff.returnSchemaRef`, a terminal result that VIOLATES
|
|
12
|
+
* that schema MUST fail the invocation (`agent.invocation.completed.outcome ===
|
|
13
|
+
* "failed"`, `schemaValidated !== true`) rather than ship a non-conforming
|
|
14
|
+
* result as `completed`. Driven by the `forceInvalidResult` seam param so the
|
|
15
|
+
* assertion is deterministic; soft-skips when the seam/hook is unwired.
|
|
16
|
+
*
|
|
17
|
+
* Spec references:
|
|
18
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/multi-agent-execution.md (§"Live manifest dispatch")
|
|
19
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0077-agent-run-lifecycle-and-live-manifest-dispatch.md (§B step 6)
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { describe, it, expect } from 'vitest';
|
|
23
|
+
import { driver } from '../lib/driver.js';
|
|
24
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
25
|
+
import { readLiveRuntimeCap, invokeLive } from '../lib/liveRuntime.js';
|
|
26
|
+
import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
|
|
27
|
+
|
|
28
|
+
describe('agent-live-structured-output (RFC 0077 §B step 6)', () => {
|
|
29
|
+
it('fails the invocation on a result that violates handoff.returnSchemaRef', async () => {
|
|
30
|
+
const cap = await readLiveRuntimeCap();
|
|
31
|
+
// structuredOutput is a sub-flag of a supported liveRuntime; gate on both.
|
|
32
|
+
const advertised = cap?.supported === true && cap?.structuredOutput === true;
|
|
33
|
+
if (!behaviorGate('openwop-live-structured-output', advertised)) return;
|
|
34
|
+
|
|
35
|
+
if (!(await isEventLogSeamAvailable())) return; // soft-skip
|
|
36
|
+
const res = await invokeLive({
|
|
37
|
+
source: 'run-api',
|
|
38
|
+
returnSchemaRef: 'conformance-strict-handoff',
|
|
39
|
+
forceInvalidResult: true,
|
|
40
|
+
});
|
|
41
|
+
if (res === null || !res.runId) return; // seam/hook absent — soft-skip
|
|
42
|
+
|
|
43
|
+
const q = await queryTestEvents(res.runId, { type: 'agent.invocation.completed' });
|
|
44
|
+
if (!q.ok || !q.events[0]) return;
|
|
45
|
+
const payload = q.events[q.events.length - 1]!.payload;
|
|
46
|
+
|
|
47
|
+
expect(
|
|
48
|
+
payload.outcome === 'failed',
|
|
49
|
+
driver.describe('RFC 0077 §B step 6', 'a result violating handoff.returnSchemaRef MUST fail the invocation (outcome "failed"), not ship as completed'),
|
|
50
|
+
).toBe(true);
|
|
51
|
+
expect(
|
|
52
|
+
payload.schemaValidated !== true,
|
|
53
|
+
driver.describe('RFC 0077 §B step 6', 'schemaValidated MUST NOT be true for a schema-violating result'),
|
|
54
|
+
).toBe(true);
|
|
55
|
+
|
|
56
|
+
await resetTestSeam();
|
|
57
|
+
});
|
|
58
|
+
});
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent org-chart — record + capability + the non-authority guarantee (RFC 0087).
|
|
3
|
+
*
|
|
4
|
+
* Always-on, server-free schema-shape probe. Verifies that:
|
|
5
|
+
* - `capabilities.agents.orgChart` is declared with its `supported` /
|
|
6
|
+
* `installScope` / `departmentNesting` / `responsibilityView` sub-flags.
|
|
7
|
+
* - `agent-org-chart.schema.json` compiles and round-trips a conforming
|
|
8
|
+
* chart, and rejects malformed ones (a non-`host:` member rosterId).
|
|
9
|
+
* - the §B structural non-authority guarantee: the schema REJECTS an
|
|
10
|
+
* authority-bearing field on a member (`scopes` / `canDispatch` /
|
|
11
|
+
* `permissions`) — every object is `additionalProperties:false`, so a
|
|
12
|
+
* host cannot express position-as-authority through it. This is the public
|
|
13
|
+
* test for the protocol-tier SECURITY invariant
|
|
14
|
+
* `org-position-no-authority-escalation`.
|
|
15
|
+
*
|
|
16
|
+
* Behavioral assertions (a manager's tool over-reach is refused; an RFC 0049
|
|
17
|
+
* decision is invariant to org position; the cross-tenant 404; the §D roll-up
|
|
18
|
+
* over live roster portfolios) are gated on `capabilities.agents.orgChart.supported`
|
|
19
|
+
* and land at Active → Accepted (reference-host org store deferred per RFC 0087
|
|
20
|
+
* §Conformance — the host-extension at `/v1/host/sample/org-chart`, #371, is the
|
|
21
|
+
* reference demonstration). This scenario asserts the wire contract, not host behavior.
|
|
22
|
+
*
|
|
23
|
+
* Spec references:
|
|
24
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/agent-org-chart.md
|
|
25
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0087-agent-org-chart.md
|
|
26
|
+
* - https://github.com/openwop/openwop/blob/main/SECURITY/invariants.yaml (org-position-no-authority-escalation)
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
import { describe, it, expect } from 'vitest';
|
|
30
|
+
import { readFileSync } from 'node:fs';
|
|
31
|
+
import { join } from 'node:path';
|
|
32
|
+
import Ajv2020 from 'ajv/dist/2020.js';
|
|
33
|
+
import addFormats from 'ajv-formats';
|
|
34
|
+
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
35
|
+
|
|
36
|
+
/** Server-free assertion-message helper. */
|
|
37
|
+
const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
|
|
38
|
+
|
|
39
|
+
function loadSchema(name: string): Record<string, unknown> {
|
|
40
|
+
return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const CHART = {
|
|
44
|
+
owner: { tenantId: 'acme', workspaceId: 'growth' },
|
|
45
|
+
departments: [
|
|
46
|
+
{
|
|
47
|
+
departmentId: 'dept-marketing',
|
|
48
|
+
name: 'Marketing',
|
|
49
|
+
parentDepartmentId: null,
|
|
50
|
+
roles: [
|
|
51
|
+
{ roleId: 'role-cm', name: 'Campaign Manager' },
|
|
52
|
+
{ roleId: 'role-bw', name: 'Brief Writer' },
|
|
53
|
+
],
|
|
54
|
+
},
|
|
55
|
+
],
|
|
56
|
+
members: [
|
|
57
|
+
{ rosterId: 'host:sally-marketing', departmentId: 'dept-marketing', roleId: 'role-bw', reportsTo: 'host:morgan-cmo' },
|
|
58
|
+
{ rosterId: 'host:morgan-cmo', departmentId: 'dept-marketing', roleId: 'role-cm', reportsTo: null },
|
|
59
|
+
],
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
describe('agent-org-chart-shape: capability advertisement (RFC 0087, server-free)', () => {
|
|
63
|
+
it('the capabilities schema declares agents.orgChart with its sub-flags', () => {
|
|
64
|
+
const caps = loadSchema('capabilities.schema.json');
|
|
65
|
+
const agents = (caps.properties as Record<string, { properties?: Record<string, { properties?: Record<string, unknown> }> }>).agents;
|
|
66
|
+
const orgChart = agents?.properties?.orgChart;
|
|
67
|
+
expect(orgChart, why('capabilities.md §agents', 'agents.orgChart MUST be declared')).toBeDefined();
|
|
68
|
+
for (const flag of ['supported', 'installScope', 'departmentNesting', 'responsibilityView']) {
|
|
69
|
+
expect(orgChart?.properties?.[flag], why('agent-org-chart.md §E', `agents.orgChart.${flag} MUST be declared`)).toBeDefined();
|
|
70
|
+
}
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
describe('agent-org-chart-shape: chart record (RFC 0087 §A, server-free)', () => {
|
|
75
|
+
const ajv = new Ajv2020({ strict: false, allErrors: true });
|
|
76
|
+
addFormats(ajv);
|
|
77
|
+
const chart = ajv.compile(loadSchema('agent-org-chart.schema.json'));
|
|
78
|
+
|
|
79
|
+
it('AgentOrgChart validates a conforming chart', () => {
|
|
80
|
+
expect(chart(CHART), why('RFC 0087 §A', 'a conforming org-chart MUST validate')).toBe(true);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it('rejects a non-host: member rosterId and a chart missing required arrays', () => {
|
|
84
|
+
const badMember = { ...CHART, members: [{ rosterId: 'core.openwop.agents.sally', departmentId: 'dept-marketing', roleId: 'role-bw', reportsTo: null }] };
|
|
85
|
+
expect(chart(badMember), why('RFC 0087 §A', 'a non-`host:` member rosterId MUST be rejected')).toBe(false);
|
|
86
|
+
expect(chart({ owner: { tenantId: 'acme' }, departments: [] }), why('RFC 0087 §A', 'a chart without `members` MUST be rejected')).toBe(false);
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
describe('agent-org-chart-shape: §B non-authority guarantee (RFC 0087, server-free)', () => {
|
|
91
|
+
const ajv = new Ajv2020({ strict: false, allErrors: true });
|
|
92
|
+
addFormats(ajv);
|
|
93
|
+
const chart = ajv.compile(loadSchema('agent-org-chart.schema.json'));
|
|
94
|
+
|
|
95
|
+
it('the schema rejects an authority-bearing field on a member (org-position-no-authority-escalation)', () => {
|
|
96
|
+
for (const authorityField of ['scopes', 'canDispatch', 'permissions', 'authority']) {
|
|
97
|
+
const withAuthority = {
|
|
98
|
+
...CHART,
|
|
99
|
+
members: [{ ...CHART.members[1], [authorityField]: ['anything'] }],
|
|
100
|
+
};
|
|
101
|
+
expect(
|
|
102
|
+
chart(withAuthority),
|
|
103
|
+
why('SECURITY invariant org-position-no-authority-escalation', `a member carrying \`${authorityField}\` MUST be rejected (additionalProperties:false — position confers no authority)`),
|
|
104
|
+
).toBe(false);
|
|
105
|
+
}
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it('a conforming member object carries exactly the descriptive key set — nothing authority-bearing', () => {
|
|
109
|
+
const memberKeys = Object.keys(CHART.members[1]!).sort();
|
|
110
|
+
expect(memberKeys, why('RFC 0087 §B', 'a member is descriptive only: {departmentId, reportsTo, roleId, rosterId}')).toEqual(['departmentId', 'reportsTo', 'roleId', 'rosterId']);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it('the GET /v1/agents/org-chart/{departmentId} responsibility-view response validates (RFC 0087 §D)', () => {
|
|
114
|
+
const ajv = new Ajv2020({ strict: false, allErrors: true });
|
|
115
|
+
addFormats(ajv);
|
|
116
|
+
ajv.addSchema(loadSchema('agent-org-chart.schema.json'), 'https://openwop.dev/spec/v1/agent-org-chart.schema.json');
|
|
117
|
+
const view = ajv.compile(loadSchema('org-chart-responsibility-view.schema.json'));
|
|
118
|
+
const good = {
|
|
119
|
+
department: CHART.departments[0],
|
|
120
|
+
members: CHART.members,
|
|
121
|
+
responsibilities: ['marketing-email-campaign', 'social-post-scheduler'],
|
|
122
|
+
};
|
|
123
|
+
expect(view(good), why('RFC 0087 §D', 'a conforming responsibility-view response MUST validate')).toBe(true);
|
|
124
|
+
expect(view({ ...good, unexpected: true }), why('RFC 0087 §D', 'an extra top-level property MUST be rejected')).toBe(false);
|
|
125
|
+
expect(view({ department: CHART.departments[0], members: CHART.members }), why('RFC 0087 §D', '`responsibilities` is required')).toBe(false);
|
|
126
|
+
});
|
|
127
|
+
});
|