@openwop/openwop-conformance 1.10.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +34 -0
  2. package/README.md +2 -2
  3. package/api/asyncapi.yaml +70 -0
  4. package/api/openapi.yaml +268 -1
  5. package/coverage.md +30 -2
  6. package/fixtures/oauth-providers/synthetic.json +38 -0
  7. package/fixtures.md +10 -0
  8. package/package.json +1 -1
  9. package/schemas/README.md +12 -0
  10. package/schemas/agent-deployment-transition.schema.json +49 -0
  11. package/schemas/agent-deployment.schema.json +54 -0
  12. package/schemas/agent-eval-suite.schema.json +140 -0
  13. package/schemas/agent-inventory-response.schema.json +25 -0
  14. package/schemas/agent-manifest.schema.json +5 -0
  15. package/schemas/agent-org-chart.schema.json +82 -0
  16. package/schemas/agent-ref.schema.json +12 -2
  17. package/schemas/agent-roster-entry.schema.json +81 -0
  18. package/schemas/agent-roster-response.schema.json +21 -0
  19. package/schemas/budget-policy.schema.json +18 -0
  20. package/schemas/capabilities.schema.json +277 -0
  21. package/schemas/credential-provenance.schema.json +18 -0
  22. package/schemas/eval-summary.schema.json +92 -0
  23. package/schemas/node-pack-manifest.schema.json +17 -0
  24. package/schemas/org-chart-responsibility-view.schema.json +26 -0
  25. package/schemas/run-event-payloads.schema.json +286 -3
  26. package/schemas/run-event.schema.json +19 -0
  27. package/schemas/tool-descriptor.schema.json +63 -0
  28. package/schemas/trigger-subscription.schema.json +26 -0
  29. package/src/lib/agentRoster.ts +76 -0
  30. package/src/lib/liveRuntime.ts +59 -0
  31. package/src/lib/profiles.ts +157 -0
  32. package/src/lib/runtimeRequires.ts +38 -0
  33. package/src/lib/safeFetch.ts +87 -0
  34. package/src/scenarios/agent-deployment-shape.test.ts +139 -0
  35. package/src/scenarios/agent-eval-suite-shape.test.ts +167 -0
  36. package/src/scenarios/agent-live-allowlist-enforced.test.ts +53 -0
  37. package/src/scenarios/agent-live-invocation-bracket.test.ts +98 -0
  38. package/src/scenarios/agent-live-runtime-shape.test.ts +98 -0
  39. package/src/scenarios/agent-live-structured-output.test.ts +58 -0
  40. package/src/scenarios/agent-org-chart-shape.test.ts +127 -0
  41. package/src/scenarios/agent-platform-profile.test.ts +158 -0
  42. package/src/scenarios/agent-roster-attribution.test.ts +179 -0
  43. package/src/scenarios/agent-roster-shape.test.ts +146 -0
  44. package/src/scenarios/budget-policy-shape.test.ts +136 -0
  45. package/src/scenarios/egress-provenance-shape.test.ts +137 -0
  46. package/src/scenarios/memory-capability-model-shape.test.ts +186 -0
  47. package/src/scenarios/oauth-authorization-code-roundtrip.test.ts +145 -0
  48. package/src/scenarios/runtime-requires-install-gate.test.ts +92 -0
  49. package/src/scenarios/runtime-requires-shape.test.ts +134 -0
  50. package/src/scenarios/safefetch-behavior.test.ts +99 -0
  51. package/src/scenarios/safefetch-live-audit.test.ts +175 -0
  52. package/src/scenarios/spec-corpus-validity.test.ts +19 -3
  53. package/src/scenarios/tool-descriptor-shape.test.ts +133 -0
  54. package/src/scenarios/trigger-bridge-shape.test.ts +135 -0
  55. package/src/scenarios/x-openwop-form-pack-manifest.test.ts +155 -0
@@ -0,0 +1,167 @@
1
+ /**
2
+ * Agent evaluation — suite + summary + event shapes (RFC 0081).
3
+ *
4
+ * Always-on, server-free schema-shape probe. Verifies that:
5
+ * - `capabilities.agents.evalSuite` is declared with its `supported` / `modes`
6
+ * sub-flags.
7
+ * - the `AgentEvalSuite` + `EvalSummary` schemas compile and round-trip a
8
+ * conforming artifact, and reject malformed ones (a bad `suiteId`; a
9
+ * `thresholds.passScore` out of 0..1).
10
+ * - the `eval.started` / `eval.scored` / `eval.completed` payload $defs
11
+ * validate conforming content-free payloads and reject malformed ones.
12
+ * - both the summary and the per-task `eval.scored` payload are CONTENT-FREE:
13
+ * an `EvalSummary` carrying a task-output body and a `safetyFinding` carrying
14
+ * an excerpt are rejected. This is the public test for the protocol-tier
15
+ * SECURITY invariant `eval-summary-no-content-leak`.
16
+ * - all three event names appear in the RunEventType enum.
17
+ *
18
+ * Behavioral assertions (the eval-run event ordering, per-task scoring, the
19
+ * EvalSummary round-trip against a live host, the `mode: "eval"` 501 on
20
+ * unadvertised hosts) are gated on `capabilities.agents.evalSuite.supported` and
21
+ * land in `agent-eval-run.test.ts` (deferred per RFC 0081 §Conformance — reference
22
+ * host deferred). This scenario asserts the wire contract, not host behavior.
23
+ *
24
+ * Spec references:
25
+ * - https://github.com/openwop/openwop/blob/main/spec/v1/agent-evaluation.md
26
+ * - https://github.com/openwop/openwop/blob/main/RFCS/0081-agent-evaluation-and-scorecards.md
27
+ * - https://github.com/openwop/openwop/blob/main/SECURITY/invariants.yaml (eval-summary-no-content-leak)
28
+ */
29
+
30
+ import { describe, it, expect } from 'vitest';
31
+ import { readFileSync } from 'node:fs';
32
+ import { join } from 'node:path';
33
+ import Ajv2020 from 'ajv/dist/2020.js';
34
+ import addFormats from 'ajv-formats';
35
+ import { SCHEMAS_DIR } from '../lib/paths.js';
36
+
37
+ /** Server-free assertion-message helper (mirrors driver.describe's "spec — requirement" shape without requiring OPENWOP_BASE_URL). */
38
+ const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
39
+
40
+ function loadSchema(name: string): Record<string, unknown> {
41
+ return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
42
+ }
43
+
44
+ describe('agent-eval-suite-shape: capability advertisement (RFC 0081, server-free)', () => {
45
+ it('the capabilities schema declares agents.evalSuite with its sub-flags', () => {
46
+ const caps = loadSchema('capabilities.schema.json');
47
+ const agents = (caps.properties as Record<string, { properties?: Record<string, { properties?: Record<string, unknown> }> }>).agents;
48
+ const evalSuite = agents?.properties?.evalSuite;
49
+ expect(
50
+ evalSuite,
51
+ why('capabilities.md §agents', 'agents.evalSuite MUST be declared'),
52
+ ).toBeDefined();
53
+ for (const flag of ['supported', 'modes']) {
54
+ expect(
55
+ evalSuite?.properties?.[flag],
56
+ why('agent-evaluation.md §Capability advertisement', `agents.evalSuite.${flag} MUST be declared`),
57
+ ).toBeDefined();
58
+ }
59
+ });
60
+ });
61
+
62
+ describe('agent-eval-suite-shape: AgentEvalSuite + EvalSummary schemas (RFC 0081, server-free)', () => {
63
+ const ajv = new Ajv2020({ strict: false, allErrors: true });
64
+ addFormats(ajv);
65
+ const suite = ajv.compile(loadSchema('agent-eval-suite.schema.json'));
66
+ const summary = ajv.compile(loadSchema('eval-summary.schema.json'));
67
+
68
+ it('AgentEvalSuite validates a conforming suite and rejects a malformed suiteId / out-of-range threshold', () => {
69
+ const good = {
70
+ suiteId: 'core.openwop.evals.support-resolver',
71
+ version: '1.0.0',
72
+ modes: ['golden', 'regression'],
73
+ thresholds: { passScore: 0.8 },
74
+ tasks: [
75
+ { taskId: 'refund-window', input: { q: 'refund policy?' }, expected: { kind: 'golden', match: { strategy: 'contains', value: '30 days' } } },
76
+ ],
77
+ };
78
+ expect(suite(good), why('RFC 0081 §A', 'a conforming AgentEvalSuite MUST validate')).toBe(true);
79
+ // Negative: suiteId must carry the `.evals.` infix.
80
+ expect(suite({ ...good, suiteId: 'core.openwop.support-resolver' }), why('RFC 0081 §A', 'a suiteId without the `.evals.` infix MUST be rejected')).toBe(false);
81
+ // Negative: passScore out of 0..1.
82
+ expect(suite({ ...good, thresholds: { passScore: 1.5 } }), why('RFC 0081 §A', 'thresholds.passScore > 1 MUST be rejected')).toBe(false);
83
+ });
84
+
85
+ it('EvalSummary validates a conforming scorecard and rejects an out-of-range score', () => {
86
+ const good = {
87
+ suiteId: 'core.openwop.evals.support-resolver',
88
+ suiteVersion: '1.0.0',
89
+ aggregateScore: 0.86,
90
+ passed: true,
91
+ taskCount: 2,
92
+ passedCount: 2,
93
+ tasks: [{ taskId: 'refund-window', score: 0.9, passed: true, safetyFindings: [{ kind: 'jailbreak', severity: 'low' }] }],
94
+ };
95
+ expect(summary(good), why('RFC 0081 §C', 'a conforming EvalSummary MUST validate')).toBe(true);
96
+ expect(summary({ ...good, aggregateScore: 1.4 }), why('RFC 0081 §C', 'aggregateScore > 1 MUST be rejected')).toBe(false);
97
+ });
98
+
99
+ it('EvalSummary is content-free — a task-output body and a safety-finding excerpt are rejected (eval-summary-no-content-leak)', () => {
100
+ const base = { suiteId: 'core.openwop.evals.x', suiteVersion: '1.0.0', aggregateScore: 0.5, passed: false, taskCount: 1, passedCount: 0 };
101
+ // Negative: a per-task entry carrying the output body.
102
+ expect(
103
+ summary({ ...base, tasks: [{ taskId: 't1', score: 0.5, passed: false, taskOutput: 'the model said …' }] }),
104
+ why('SECURITY invariant eval-summary-no-content-leak', 'an EvalSummary task entry MUST NOT carry an output body'),
105
+ ).toBe(false);
106
+ // Negative: a safety finding carrying excerpted content rather than a {kind, severity} descriptor.
107
+ expect(
108
+ summary({ ...base, tasks: [{ taskId: 't1', score: 0.5, passed: false, safetyFindings: [{ kind: 'pii-leak', severity: 'high', excerpt: 'SSN 123-45-6789' }] }] }),
109
+ why('SECURITY invariant eval-summary-no-content-leak', 'a safetyFinding MUST NOT carry excerpted content'),
110
+ ).toBe(false);
111
+ });
112
+ });
113
+
114
+ describe('agent-eval-suite-shape: eval event payloads (RFC 0081, server-free)', () => {
115
+ const payloads = loadSchema('run-event-payloads.schema.json');
116
+ const ajv = new Ajv2020({ strict: false, allErrors: true });
117
+ addFormats(ajv);
118
+ ajv.addSchema(payloads, 'payloads');
119
+
120
+ const started = ajv.getSchema('payloads#/$defs/evalStarted');
121
+ const scored = ajv.getSchema('payloads#/$defs/evalScored');
122
+ const completed = ajv.getSchema('payloads#/$defs/evalCompleted');
123
+
124
+ it('eval.started validates a content-free start record and requires the suite provenance', () => {
125
+ expect(started, 'the evalStarted $def MUST exist').toBeTruthy();
126
+ expect(
127
+ started!({ suiteId: 'core.openwop.evals.support-resolver', suiteVersion: '1.0.0', taskCount: 12, modes: ['golden'] }),
128
+ why('RFC 0081 §C', 'a conforming eval.started payload MUST validate'),
129
+ ).toBe(true);
130
+ expect(
131
+ started!({ suiteId: 'core.openwop.evals.x' }),
132
+ why('RFC 0081 §C', 'eval.started without suiteVersion/taskCount/modes MUST be rejected'),
133
+ ).toBe(false);
134
+ });
135
+
136
+ it('eval.scored validates a content-free per-task score and requires score + passed', () => {
137
+ expect(scored, 'the evalScored $def MUST exist').toBeTruthy();
138
+ expect(
139
+ scored!({ taskId: 'refund-window', score: 0.9, passed: true, costUsd: 0.012 }),
140
+ why('RFC 0081 §C', 'a conforming eval.scored payload MUST validate'),
141
+ ).toBe(true);
142
+ expect(
143
+ scored!({ taskId: 'refund-window' }),
144
+ why('RFC 0081 §C', 'eval.scored without score/passed MUST be rejected'),
145
+ ).toBe(false);
146
+ });
147
+
148
+ it('eval.completed validates a content-free aggregate record', () => {
149
+ expect(completed, 'the evalCompleted $def MUST exist').toBeTruthy();
150
+ expect(
151
+ completed!({ aggregateScore: 0.86, passed: true, taskCount: 12, passedCount: 11, regressionVsBaseline: 0.04 }),
152
+ why('RFC 0081 §C', 'a conforming eval.completed payload MUST validate'),
153
+ ).toBe(true);
154
+ expect(
155
+ completed!({ aggregateScore: 2 }),
156
+ why('RFC 0081 §C', 'eval.completed with an out-of-range aggregateScore MUST be rejected'),
157
+ ).toBe(false);
158
+ });
159
+
160
+ it('all three eval event names appear in the RunEventType enum', () => {
161
+ const runEvent = loadSchema('run-event.schema.json');
162
+ const enumVals = (runEvent.$defs as Record<string, { enum?: string[] }>).RunEventType?.enum ?? [];
163
+ expect(enumVals).toContain('eval.started');
164
+ expect(enumVals).toContain('eval.scored');
165
+ expect(enumVals).toContain('eval.completed');
166
+ });
167
+ });
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Live manifest-dispatch tool-allowlist enforcement (RFC 0077 §F-1) —
3
+ * behavioral.
4
+ *
5
+ * Gated on `capabilities.agents.liveRuntime.supported` (root-first per RFC 0073).
6
+ * Soft-skips when unadvertised (default) / hard-fails under
7
+ * `OPENWOP_REQUIRE_BEHAVIOR=true`.
8
+ *
9
+ * Asserts the §F-1 safety carry-forward: a live invocation MUST NOT call a tool
10
+ * outside the agent's `toolAllowlist` (the per-tool application of the RFC 0002
11
+ * §A14 mandatory-allowlist floor). Driven by the `attemptTool` seam param naming
12
+ * a disallowed tool; the invocation MUST NOT emit an `agent.toolCalled` for it
13
+ * (a refused/failed outcome is acceptable, a silent successful call is not).
14
+ * Soft-skips when the seam/hook is unwired.
15
+ *
16
+ * Spec references:
17
+ * - https://github.com/openwop/openwop/blob/main/spec/v1/multi-agent-execution.md (§"Live manifest dispatch")
18
+ * - https://github.com/openwop/openwop/blob/main/RFCS/0077-agent-run-lifecycle-and-live-manifest-dispatch.md (§F-1)
19
+ * - https://github.com/openwop/openwop/blob/main/RFCS/0002-agent-identity-and-handoff.md (§A14 toolAllowlist)
20
+ */
21
+
22
+ import { describe, it, expect } from 'vitest';
23
+ import { driver } from '../lib/driver.js';
24
+ import { behaviorGate } from '../lib/behavior-gate.js';
25
+ import { readLiveRuntimeCap, invokeLive } from '../lib/liveRuntime.js';
26
+ import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
27
+
28
+ const DISALLOWED_TOOL = 'conformance-disallowed-tool';
29
+
30
+ describe('agent-live-allowlist-enforced (RFC 0077 §F-1)', () => {
31
+ it('does not call a tool outside the agent toolAllowlist', async () => {
32
+ const cap = await readLiveRuntimeCap();
33
+ if (!behaviorGate('openwop-live-allowlist-enforced', cap?.supported === true)) return;
34
+
35
+ if (!(await isEventLogSeamAvailable())) return; // soft-skip
36
+ const res = await invokeLive({ source: 'run-api', attemptTool: DISALLOWED_TOOL });
37
+ if (res === null || !res.runId) return; // seam/hook absent — soft-skip
38
+
39
+ const q = await queryTestEvents(res.runId, { type: 'agent.toolCalled' });
40
+ if (!q.ok) return;
41
+
42
+ const calledDisallowed = q.events.some((e) => {
43
+ const tool = e.payload.tool ?? e.payload.toolId ?? e.payload.name;
44
+ return tool === DISALLOWED_TOOL;
45
+ });
46
+ expect(
47
+ calledDisallowed === false,
48
+ driver.describe('RFC 0077 §F-1 / RFC 0002 §A14', 'a live invocation MUST NOT call a tool outside the agent toolAllowlist'),
49
+ ).toBe(true);
50
+
51
+ await resetTestSeam();
52
+ });
53
+ });
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Live manifest-dispatch invocation bracket (RFC 0077 §E) — behavioral.
3
+ *
4
+ * Gated on `capabilities.agents.liveRuntime.supported` (root-first per RFC 0073).
5
+ * Soft-skips when unadvertised (default) / hard-fails under
6
+ * `OPENWOP_REQUIRE_BEHAVIOR=true`. The always-on wire-shape coverage lives in
7
+ * `agent-live-runtime-shape.test.ts`; this asserts host BEHAVIOR: a live
8
+ * invocation brackets its `agent.*` family with
9
+ * `agent.invocation.started` (FIRST agent-scoped event) and
10
+ * `agent.invocation.completed` (LAST), with a matching `invocationId`, a
11
+ * `source` in the enum, an `outcome` in the enum, and both events content-free
12
+ * (no prompt/result body).
13
+ *
14
+ * Drives the OPTIONAL `POST /v1/host/sample/agents/live-invoke` seam + reads the
15
+ * bracket back via the test event-log seam (both deferred per RFC 0077
16
+ * §Conformance — soft-skip on 404).
17
+ *
18
+ * Spec references:
19
+ * - https://github.com/openwop/openwop/blob/main/spec/v1/multi-agent-execution.md (§"Live manifest dispatch")
20
+ * - https://github.com/openwop/openwop/blob/main/RFCS/0077-agent-run-lifecycle-and-live-manifest-dispatch.md
21
+ */
22
+
23
+ import { describe, it, expect } from 'vitest';
24
+ import { driver } from '../lib/driver.js';
25
+ import { behaviorGate } from '../lib/behavior-gate.js';
26
+ import { readLiveRuntimeCap, invokeLive } from '../lib/liveRuntime.js';
27
+ import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
28
+
29
+ const SOURCES = ['workflow-node', 'run-api', 'chat-mention'];
30
+ const OUTCOMES = ['completed', 'handed-off', 'escalated', 'refused', 'failed'];
31
+ const AGENT_SCOPED = (t: string): boolean => t === 'agent.invocation.started' || t === 'agent.invocation.completed' || t.startsWith('agent.');
32
+
33
+ describe('agent-live-invocation-bracket (RFC 0077 §E)', () => {
34
+ it('brackets a live invocation with started-first / completed-last + matching invocationId, content-free', async () => {
35
+ const cap = await readLiveRuntimeCap();
36
+ if (!behaviorGate('openwop-live-invocation-bracket', cap?.supported === true)) return;
37
+
38
+ if (!(await isEventLogSeamAvailable())) return; // event-log seam absent — soft-skip
39
+ const res = await invokeLive({ source: 'run-api' });
40
+ if (res === null || !res.runId) return; // live-invoke seam absent — soft-skip
41
+
42
+ const q = await queryTestEvents(res.runId);
43
+ if (!q.ok) return;
44
+ const events = q.events.slice().sort((a, b) => a.sequence - b.sequence);
45
+
46
+ const started = events.filter((e) => e.type === 'agent.invocation.started');
47
+ const completed = events.filter((e) => e.type === 'agent.invocation.completed');
48
+ expect(
49
+ started.length >= 1 && completed.length >= 1,
50
+ driver.describe('multi-agent-execution.md §"Live manifest dispatch"', 'a live invocation MUST emit agent.invocation.started + agent.invocation.completed'),
51
+ ).toBe(true);
52
+ if (started.length === 0 || completed.length === 0) return;
53
+
54
+ const start = started[0]!;
55
+ const end = completed[completed.length - 1]!;
56
+
57
+ // §E ordering: started is the FIRST agent-scoped event, completed the LAST.
58
+ const agentScoped = events.filter((e) => AGENT_SCOPED(e.type));
59
+ expect(
60
+ agentScoped[0]?.type === 'agent.invocation.started',
61
+ driver.describe('RFC 0077 §E', 'agent.invocation.started MUST be the first agent-scoped event of the invocation'),
62
+ ).toBe(true);
63
+ expect(
64
+ agentScoped[agentScoped.length - 1]?.type === 'agent.invocation.completed',
65
+ driver.describe('RFC 0077 §E', 'agent.invocation.completed MUST be the last agent-scoped event of the invocation'),
66
+ ).toBe(true);
67
+
68
+ // Matching invocationId across the bracket.
69
+ const startId = start.payload.invocationId;
70
+ const endId = end.payload.invocationId;
71
+ expect(
72
+ typeof startId === 'string' && startId === endId,
73
+ driver.describe('run-event-payloads.schema.json#agentInvocation*', 'the bracket MUST share one invocationId'),
74
+ ).toBe(true);
75
+
76
+ // Enum discipline.
77
+ expect(
78
+ typeof start.payload.source === 'string' && SOURCES.includes(start.payload.source as string),
79
+ driver.describe('run-event-payloads.schema.json#agentInvocationStarted', 'source MUST be workflow-node|run-api|chat-mention'),
80
+ ).toBe(true);
81
+ expect(
82
+ typeof end.payload.outcome === 'string' && OUTCOMES.includes(end.payload.outcome as string),
83
+ driver.describe('run-event-payloads.schema.json#agentInvocationCompleted', 'outcome MUST be in the closed enum'),
84
+ ).toBe(true);
85
+
86
+ // Content-free: identifiers + metadata only, never prompt/result body.
87
+ for (const evt of [start, end]) {
88
+ for (const forbidden of ['prompt', 'result', 'body', 'input', 'output', 'apiKey', 'secret', 'credentials', 'token']) {
89
+ expect(
90
+ !(forbidden in evt.payload),
91
+ driver.describe('RFC 0077', `agent.invocation.* MUST be content-free (no ${forbidden})`),
92
+ ).toBe(true);
93
+ }
94
+ }
95
+
96
+ await resetTestSeam();
97
+ });
98
+ });
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Live manifest dispatch — capability + invocation-event shapes (RFC 0077).
3
+ *
4
+ * Always-on, server-free schema-shape probe. Verifies that:
5
+ * - `capabilities.agents.liveRuntime` is declared on the capabilities schema
6
+ * (with the `supported` / `structuredOutput` / `confidenceEscalation` /
7
+ * `sources` sub-flags).
8
+ * - the `agent.invocation.started` + `agent.invocation.completed` payload
9
+ * $defs validate conforming content-free payloads and reject malformed
10
+ * ones (a `started` missing `source`; a `completed` with an out-of-enum
11
+ * `outcome`).
12
+ * - both event names appear in the RunEventType enum.
13
+ *
14
+ * Behavioral assertions (the started→completed bracket ordering, structured-
15
+ * output enforcement, toolAllowlist enforcement) are gated on
16
+ * `capabilities.agents.liveRuntime.supported` and soft-skip until a reference
17
+ * host wires the live-invoke seam (RFC 0077 §Conformance — reference host
18
+ * deferred). This scenario asserts the wire contract, not host behavior.
19
+ *
20
+ * Spec references:
21
+ * - https://github.com/openwop/openwop/blob/main/spec/v1/multi-agent-execution.md §"Live manifest dispatch"
22
+ * - https://github.com/openwop/openwop/blob/main/RFCS/0077-agent-run-lifecycle-and-live-manifest-dispatch.md
23
+ */
24
+
25
+ import { describe, it, expect } from 'vitest';
26
+ import { readFileSync } from 'node:fs';
27
+ import { join } from 'node:path';
28
+ import Ajv2020 from 'ajv/dist/2020.js';
29
+ import addFormats from 'ajv-formats';
30
+ import { SCHEMAS_DIR } from '../lib/paths.js';
31
+
32
+ /** Server-free assertion-message helper (mirrors driver.describe's "spec — requirement" shape without requiring OPENWOP_BASE_URL). */
33
+ const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
34
+
35
+ function loadSchema(name: string): Record<string, unknown> {
36
+ return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
37
+ }
38
+
39
+ describe('agent-live-runtime-shape: capability advertisement (RFC 0077, server-free)', () => {
40
+ it('the capabilities schema declares agents.liveRuntime with its sub-flags', () => {
41
+ const caps = loadSchema('capabilities.schema.json');
42
+ const agents = (caps.properties as Record<string, { properties?: Record<string, { properties?: Record<string, unknown> }> }>).agents;
43
+ const live = agents?.properties?.liveRuntime;
44
+ expect(
45
+ live,
46
+ why('capabilities.md §agents', 'agents.liveRuntime MUST be declared'),
47
+ ).toBeDefined();
48
+ for (const flag of ['supported', 'structuredOutput', 'confidenceEscalation', 'sources']) {
49
+ expect(
50
+ live?.properties?.[flag],
51
+ why('multi-agent-execution.md §Live manifest dispatch', `agents.liveRuntime.${flag} MUST be declared`),
52
+ ).toBeDefined();
53
+ }
54
+ });
55
+ });
56
+
57
+ describe('agent-live-runtime-shape: invocation event payloads (RFC 0077, server-free)', () => {
58
+ const payloads = loadSchema('run-event-payloads.schema.json');
59
+ const ajv = new Ajv2020({ strict: false, allErrors: true });
60
+ addFormats(ajv);
61
+ ajv.addSchema(payloads, 'payloads');
62
+
63
+ const started = ajv.getSchema('payloads#/$defs/agentInvocationStarted');
64
+ const completed = ajv.getSchema('payloads#/$defs/agentInvocationCompleted');
65
+
66
+ it('agent.invocation.started validates a content-free start record and requires source', () => {
67
+ expect(started, 'the agentInvocationStarted $def MUST exist').toBeTruthy();
68
+ expect(
69
+ started!({ invocationId: 'inv-1', agentId: 'vendor.acme.review.code-reviewer', source: 'run-api', modelClass: 'coding', toolSurfaceCount: 3, memoryBound: false }),
70
+ why('RFC 0077 §C', 'a conforming agent.invocation.started payload MUST validate'),
71
+ ).toBe(true);
72
+ // Negative: missing source — every invocation must record its entry point.
73
+ expect(
74
+ started!({ invocationId: 'inv-1', agentId: 'vendor.acme.review.code-reviewer' }),
75
+ why('RFC 0077 §C', 'agent.invocation.started without source MUST be rejected'),
76
+ ).toBe(false);
77
+ });
78
+
79
+ it('agent.invocation.completed validates a content-free outcome record and pins the outcome enum', () => {
80
+ expect(completed, 'the agentInvocationCompleted $def MUST exist').toBeTruthy();
81
+ expect(
82
+ completed!({ invocationId: 'inv-1', agentId: 'vendor.acme.review.code-reviewer', outcome: 'completed', schemaValidated: true, confidence: 0.91 }),
83
+ why('RFC 0077 §C', 'a conforming agent.invocation.completed payload MUST validate'),
84
+ ).toBe(true);
85
+ // Negative: out-of-enum outcome — the canonical value is `completed`, not `done`.
86
+ expect(
87
+ completed!({ invocationId: 'inv-1', agentId: 'a', outcome: 'done' }),
88
+ why('RFC 0077 §C', 'agent.invocation.completed with an out-of-enum outcome MUST be rejected'),
89
+ ).toBe(false);
90
+ });
91
+
92
+ it('both invocation event names appear in the RunEventType enum', () => {
93
+ const runEvent = loadSchema('run-event.schema.json');
94
+ const enumVals = (runEvent.$defs as Record<string, { enum?: string[] }>).RunEventType?.enum ?? [];
95
+ expect(enumVals).toContain('agent.invocation.started');
96
+ expect(enumVals).toContain('agent.invocation.completed');
97
+ });
98
+ });
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Live manifest-dispatch structured-output enforcement (RFC 0077 §B step 6) —
3
+ * behavioral.
4
+ *
5
+ * Gated on `capabilities.agents.liveRuntime.structuredOutput` (root-first per
6
+ * RFC 0073) — itself meaningful only alongside `liveRuntime.supported`.
7
+ * Soft-skips when unadvertised (default) / hard-fails under
8
+ * `OPENWOP_REQUIRE_BEHAVIOR=true`.
9
+ *
10
+ * Asserts the §B step-6 MUST: when the host advertises `structuredOutput` and an
11
+ * agent declares a `handoff.returnSchemaRef`, a terminal result that VIOLATES
12
+ * that schema MUST fail the invocation (`agent.invocation.completed.outcome ===
13
+ * "failed"`, `schemaValidated !== true`) rather than ship a non-conforming
14
+ * result as `completed`. Driven by the `forceInvalidResult` seam param so the
15
+ * assertion is deterministic; soft-skips when the seam/hook is unwired.
16
+ *
17
+ * Spec references:
18
+ * - https://github.com/openwop/openwop/blob/main/spec/v1/multi-agent-execution.md (§"Live manifest dispatch")
19
+ * - https://github.com/openwop/openwop/blob/main/RFCS/0077-agent-run-lifecycle-and-live-manifest-dispatch.md (§B step 6)
20
+ */
21
+
22
+ import { describe, it, expect } from 'vitest';
23
+ import { driver } from '../lib/driver.js';
24
+ import { behaviorGate } from '../lib/behavior-gate.js';
25
+ import { readLiveRuntimeCap, invokeLive } from '../lib/liveRuntime.js';
26
+ import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
27
+
28
+ describe('agent-live-structured-output (RFC 0077 §B step 6)', () => {
29
+ it('fails the invocation on a result that violates handoff.returnSchemaRef', async () => {
30
+ const cap = await readLiveRuntimeCap();
31
+ // structuredOutput is a sub-flag of a supported liveRuntime; gate on both.
32
+ const advertised = cap?.supported === true && cap?.structuredOutput === true;
33
+ if (!behaviorGate('openwop-live-structured-output', advertised)) return;
34
+
35
+ if (!(await isEventLogSeamAvailable())) return; // soft-skip
36
+ const res = await invokeLive({
37
+ source: 'run-api',
38
+ returnSchemaRef: 'conformance-strict-handoff',
39
+ forceInvalidResult: true,
40
+ });
41
+ if (res === null || !res.runId) return; // seam/hook absent — soft-skip
42
+
43
+ const q = await queryTestEvents(res.runId, { type: 'agent.invocation.completed' });
44
+ if (!q.ok || !q.events[0]) return;
45
+ const payload = q.events[q.events.length - 1]!.payload;
46
+
47
+ expect(
48
+ payload.outcome === 'failed',
49
+ driver.describe('RFC 0077 §B step 6', 'a result violating handoff.returnSchemaRef MUST fail the invocation (outcome "failed"), not ship as completed'),
50
+ ).toBe(true);
51
+ expect(
52
+ payload.schemaValidated !== true,
53
+ driver.describe('RFC 0077 §B step 6', 'schemaValidated MUST NOT be true for a schema-violating result'),
54
+ ).toBe(true);
55
+
56
+ await resetTestSeam();
57
+ });
58
+ });
@@ -0,0 +1,127 @@
1
+ /**
2
+ * Agent org-chart — record + capability + the non-authority guarantee (RFC 0087).
3
+ *
4
+ * Always-on, server-free schema-shape probe. Verifies that:
5
+ * - `capabilities.agents.orgChart` is declared with its `supported` /
6
+ * `installScope` / `departmentNesting` / `responsibilityView` sub-flags.
7
+ * - `agent-org-chart.schema.json` compiles and round-trips a conforming
8
+ * chart, and rejects malformed ones (a non-`host:` member rosterId).
9
+ * - the §B structural non-authority guarantee: the schema REJECTS an
10
+ * authority-bearing field on a member (`scopes` / `canDispatch` /
11
+ * `permissions`) — every object is `additionalProperties:false`, so a
12
+ * host cannot express position-as-authority through it. This is the public
13
+ * test for the protocol-tier SECURITY invariant
14
+ * `org-position-no-authority-escalation`.
15
+ *
16
+ * Behavioral assertions (a manager's tool over-reach is refused; an RFC 0049
17
+ * decision is invariant to org position; the cross-tenant 404; the §D roll-up
18
+ * over live roster portfolios) are gated on `capabilities.agents.orgChart.supported`
19
+ * and land at Active → Accepted (reference-host org store deferred per RFC 0087
20
+ * §Conformance — the host-extension at `/v1/host/sample/org-chart`, #371, is the
21
+ * reference demonstration). This scenario asserts the wire contract, not host behavior.
22
+ *
23
+ * Spec references:
24
+ * - https://github.com/openwop/openwop/blob/main/spec/v1/agent-org-chart.md
25
+ * - https://github.com/openwop/openwop/blob/main/RFCS/0087-agent-org-chart.md
26
+ * - https://github.com/openwop/openwop/blob/main/SECURITY/invariants.yaml (org-position-no-authority-escalation)
27
+ */
28
+
29
+ import { describe, it, expect } from 'vitest';
30
+ import { readFileSync } from 'node:fs';
31
+ import { join } from 'node:path';
32
+ import Ajv2020 from 'ajv/dist/2020.js';
33
+ import addFormats from 'ajv-formats';
34
+ import { SCHEMAS_DIR } from '../lib/paths.js';
35
+
36
+ /** Server-free assertion-message helper. */
37
+ const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
38
+
39
+ function loadSchema(name: string): Record<string, unknown> {
40
+ return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
41
+ }
42
+
43
+ const CHART = {
44
+ owner: { tenantId: 'acme', workspaceId: 'growth' },
45
+ departments: [
46
+ {
47
+ departmentId: 'dept-marketing',
48
+ name: 'Marketing',
49
+ parentDepartmentId: null,
50
+ roles: [
51
+ { roleId: 'role-cm', name: 'Campaign Manager' },
52
+ { roleId: 'role-bw', name: 'Brief Writer' },
53
+ ],
54
+ },
55
+ ],
56
+ members: [
57
+ { rosterId: 'host:sally-marketing', departmentId: 'dept-marketing', roleId: 'role-bw', reportsTo: 'host:morgan-cmo' },
58
+ { rosterId: 'host:morgan-cmo', departmentId: 'dept-marketing', roleId: 'role-cm', reportsTo: null },
59
+ ],
60
+ };
61
+
62
+ describe('agent-org-chart-shape: capability advertisement (RFC 0087, server-free)', () => {
63
+ it('the capabilities schema declares agents.orgChart with its sub-flags', () => {
64
+ const caps = loadSchema('capabilities.schema.json');
65
+ const agents = (caps.properties as Record<string, { properties?: Record<string, { properties?: Record<string, unknown> }> }>).agents;
66
+ const orgChart = agents?.properties?.orgChart;
67
+ expect(orgChart, why('capabilities.md §agents', 'agents.orgChart MUST be declared')).toBeDefined();
68
+ for (const flag of ['supported', 'installScope', 'departmentNesting', 'responsibilityView']) {
69
+ expect(orgChart?.properties?.[flag], why('agent-org-chart.md §E', `agents.orgChart.${flag} MUST be declared`)).toBeDefined();
70
+ }
71
+ });
72
+ });
73
+
74
+ describe('agent-org-chart-shape: chart record (RFC 0087 §A, server-free)', () => {
75
+ const ajv = new Ajv2020({ strict: false, allErrors: true });
76
+ addFormats(ajv);
77
+ const chart = ajv.compile(loadSchema('agent-org-chart.schema.json'));
78
+
79
+ it('AgentOrgChart validates a conforming chart', () => {
80
+ expect(chart(CHART), why('RFC 0087 §A', 'a conforming org-chart MUST validate')).toBe(true);
81
+ });
82
+
83
+ it('rejects a non-host: member rosterId and a chart missing required arrays', () => {
84
+ const badMember = { ...CHART, members: [{ rosterId: 'core.openwop.agents.sally', departmentId: 'dept-marketing', roleId: 'role-bw', reportsTo: null }] };
85
+ expect(chart(badMember), why('RFC 0087 §A', 'a non-`host:` member rosterId MUST be rejected')).toBe(false);
86
+ expect(chart({ owner: { tenantId: 'acme' }, departments: [] }), why('RFC 0087 §A', 'a chart without `members` MUST be rejected')).toBe(false);
87
+ });
88
+ });
89
+
90
+ describe('agent-org-chart-shape: §B non-authority guarantee (RFC 0087, server-free)', () => {
91
+ const ajv = new Ajv2020({ strict: false, allErrors: true });
92
+ addFormats(ajv);
93
+ const chart = ajv.compile(loadSchema('agent-org-chart.schema.json'));
94
+
95
+ it('the schema rejects an authority-bearing field on a member (org-position-no-authority-escalation)', () => {
96
+ for (const authorityField of ['scopes', 'canDispatch', 'permissions', 'authority']) {
97
+ const withAuthority = {
98
+ ...CHART,
99
+ members: [{ ...CHART.members[1], [authorityField]: ['anything'] }],
100
+ };
101
+ expect(
102
+ chart(withAuthority),
103
+ why('SECURITY invariant org-position-no-authority-escalation', `a member carrying \`${authorityField}\` MUST be rejected (additionalProperties:false — position confers no authority)`),
104
+ ).toBe(false);
105
+ }
106
+ });
107
+
108
+ it('a conforming member object carries exactly the descriptive key set — nothing authority-bearing', () => {
109
+ const memberKeys = Object.keys(CHART.members[1]!).sort();
110
+ expect(memberKeys, why('RFC 0087 §B', 'a member is descriptive only: {departmentId, reportsTo, roleId, rosterId}')).toEqual(['departmentId', 'reportsTo', 'roleId', 'rosterId']);
111
+ });
112
+
113
+ it('the GET /v1/agents/org-chart/{departmentId} responsibility-view response validates (RFC 0087 §D)', () => {
114
+ const ajv = new Ajv2020({ strict: false, allErrors: true });
115
+ addFormats(ajv);
116
+ ajv.addSchema(loadSchema('agent-org-chart.schema.json'), 'https://openwop.dev/spec/v1/agent-org-chart.schema.json');
117
+ const view = ajv.compile(loadSchema('org-chart-responsibility-view.schema.json'));
118
+ const good = {
119
+ department: CHART.departments[0],
120
+ members: CHART.members,
121
+ responsibilities: ['marketing-email-campaign', 'social-post-scheduler'],
122
+ };
123
+ expect(view(good), why('RFC 0087 §D', 'a conforming responsibility-view response MUST validate')).toBe(true);
124
+ expect(view({ ...good, unexpected: true }), why('RFC 0087 §D', 'an extra top-level property MUST be rejected')).toBe(false);
125
+ expect(view({ department: CHART.departments[0], members: CHART.members }), why('RFC 0087 §D', '`responsibilities` is required')).toBe(false);
126
+ });
127
+ });