@openwop/openwop-conformance 1.11.0 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/README.md +2 -2
- package/coverage.md +11 -6
- package/package.json +1 -1
- package/src/lib/agentDeployment.ts +117 -0
- package/src/lib/agentEval.ts +83 -0
- package/src/lib/agentOrgChart.ts +82 -0
- package/src/lib/triggerBridge.ts +74 -0
- package/src/scenarios/agent-deployment-lifecycle.test.ts +147 -0
- package/src/scenarios/agent-eval-run.test.ts +145 -0
- package/src/scenarios/agent-org-chart-scoping.test.ts +137 -0
- package/src/scenarios/org-position-no-authority-escalation.test.ts +78 -0
- package/src/scenarios/trigger-bridge-delivery.test.ts +126 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared helpers for the RFC 0087 `agents.orgChart` conformance scenarios.
|
|
3
|
+
* Lives in lib/ (not a `*.test.ts`) so scenarios import it via
|
|
4
|
+
* `../lib/agentOrgChart.js`.
|
|
5
|
+
*
|
|
6
|
+
* The org-chart is structure + a read (like the RFC 0072 inventory), not an
|
|
7
|
+
* event surface — so these helpers wrap the two NORMATIVE reads
|
|
8
|
+
* (`GET /v1/agents/org-chart` + `GET /v1/agents/org-chart/{departmentId}`),
|
|
9
|
+
* exercised black-box against any conformant host. Tenant scoping (RFC 0074)
|
|
10
|
+
* is probed with the `OPENWOP_CROSS_TENANT_ORG_CHART_DEPARTMENT_ID` env var (a
|
|
11
|
+
* department id outside the caller's owner triple), the org-chart analog of the
|
|
12
|
+
* roster scenario's `OPENWOP_CROSS_TENANT_ROSTER_ID`.
|
|
13
|
+
*
|
|
14
|
+
* @see RFCS/0087-agent-org-chart.md
|
|
15
|
+
* @see spec/v1/agent-org-chart.md
|
|
16
|
+
*/
|
|
17
|
+
import { driver } from './driver.js';
|
|
18
|
+
import { readCapabilityFamily } from './discovery-capabilities.js';
|
|
19
|
+
|
|
20
|
+
/** Reads `agents.orgChart` from discovery (root-first per RFC 0073); null when
|
|
21
|
+
* unadvertised. */
|
|
22
|
+
export async function readOrgChartCap(): Promise<Record<string, unknown> | null> {
|
|
23
|
+
const agents = await readCapabilityFamily<{ orgChart?: unknown }>('agents');
|
|
24
|
+
const oc = agents?.orgChart;
|
|
25
|
+
return oc && typeof oc === 'object' ? (oc as Record<string, unknown>) : null;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface OrgDepartment {
|
|
29
|
+
departmentId?: string;
|
|
30
|
+
parentDepartmentId?: string | null;
|
|
31
|
+
[k: string]: unknown;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface OrgMember {
|
|
35
|
+
rosterId?: string;
|
|
36
|
+
departmentId?: string;
|
|
37
|
+
roleId?: string;
|
|
38
|
+
reportsTo?: string | null;
|
|
39
|
+
[k: string]: unknown;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface OrgChart {
|
|
43
|
+
owner?: { tenantId?: string; workspaceId?: string };
|
|
44
|
+
departments?: OrgDepartment[];
|
|
45
|
+
members?: OrgMember[];
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export interface ResponsibilityView {
|
|
49
|
+
department?: { departmentId?: string; [k: string]: unknown };
|
|
50
|
+
members?: OrgMember[];
|
|
51
|
+
responsibilities?: string[];
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** GET the NORMATIVE org-chart (RFC 0087 §A `GET /v1/agents/org-chart`);
|
|
55
|
+
* null when the host doesn't serve it (404/405/501). */
|
|
56
|
+
export async function getOrgChart(): Promise<OrgChart | null> {
|
|
57
|
+
const res = await driver.get('/v1/agents/org-chart');
|
|
58
|
+
if (res.status === 404 || res.status === 405 || res.status === 501) return null;
|
|
59
|
+
return (res.json as OrgChart | undefined) ?? {};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/** GET a department's §D responsibility roll-up. `recursive` defaults to the
|
|
63
|
+
* host default (true) when undefined. Returns `{ status, view }` so a caller
|
|
64
|
+
* can distinguish a cross-tenant 404 from a served view. */
|
|
65
|
+
export async function getDepartmentView(
|
|
66
|
+
departmentId: string,
|
|
67
|
+
recursive?: boolean,
|
|
68
|
+
): Promise<{ status: number; view: ResponsibilityView | undefined }> {
|
|
69
|
+
const qs = recursive === undefined ? '' : `?recursive=${recursive ? 'true' : 'false'}`;
|
|
70
|
+
const res = await driver.get(`/v1/agents/org-chart/${encodeURIComponent(departmentId)}${qs}`);
|
|
71
|
+
return { status: res.status, view: res.json as ResponsibilityView | undefined };
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/** The descriptive key set a member object is allowed to carry on the wire
|
|
75
|
+
* (RFC 0087 §A). Anything outside this — in particular an authority-bearing
|
|
76
|
+
* field — is a §B `org-position-no-authority-escalation` violation. */
|
|
77
|
+
export const MEMBER_DESCRIPTIVE_KEYS = new Set(['rosterId', 'departmentId', 'roleId', 'reportsTo']);
|
|
78
|
+
|
|
79
|
+
/** Authority-bearing field names that MUST NEVER appear on an org-chart wire
|
|
80
|
+
* object (member / department / responsibility view) — position confers no
|
|
81
|
+
* authority (RFC 0087 §B). */
|
|
82
|
+
export const AUTHORITY_FIELDS = ['scopes', 'canDispatch', 'permissions', 'authority', 'roleGrants', 'capabilities'];
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared helpers for the RFC 0083 `triggerBridge` conformance scenario.
|
|
3
|
+
* Lives in lib/ (not a `*.test.ts`) so scenarios import it via
|
|
4
|
+
* `../lib/triggerBridge.js`.
|
|
5
|
+
*
|
|
6
|
+
* Two surfaces:
|
|
7
|
+
* - the NORMATIVE read (`GET /v1/trigger-subscriptions[/{subscriptionId}]`,
|
|
8
|
+
* RFC 0083 §A), exercised black-box; and
|
|
9
|
+
* - the host-sample delivery seam (`POST /v1/host/sample/trigger-bridge/deliver`),
|
|
10
|
+
* used to drive the §C delivery model (dedup → retry → dead-letter →
|
|
11
|
+
* causation) so the two `trigger.*` events can be asserted against the test
|
|
12
|
+
* event-log seam. The seam is OPTIONAL — scenarios soft-skip on 404/405
|
|
13
|
+
* (reference durable-delivery is deferred per RFC 0083 §Conformance).
|
|
14
|
+
*
|
|
15
|
+
* Gating uses the `openwop-trigger-bridge` PROFILE derived from the live
|
|
16
|
+
* discovery doc (the bridge + a dead-letter sink + a durable source, §D), not a
|
|
17
|
+
* bare capability flag.
|
|
18
|
+
*
|
|
19
|
+
* @see RFCS/0083-durable-trigger-and-channel-bridge-profile.md
|
|
20
|
+
* @see spec/v1/trigger-bridge.md
|
|
21
|
+
* @see spec/v1/profiles.md (§openwop-trigger-bridge)
|
|
22
|
+
*/
|
|
23
|
+
import { driver } from './driver.js';
|
|
24
|
+
import { deriveProfiles, type DiscoveryPayload } from './profiles.js';
|
|
25
|
+
|
|
26
|
+
/** True when the live host's discovery derives the `openwop-trigger-bridge`
|
|
27
|
+
* profile (RFC 0083 §D predicate: bridge advertised + dead-letter sink + a
|
|
28
|
+
* durable source). */
|
|
29
|
+
export async function isTriggerBridgeProfileAdvertised(): Promise<boolean> {
|
|
30
|
+
const disco = await driver.get('/.well-known/openwop');
|
|
31
|
+
if (disco.status !== 200 || !disco.json) return false;
|
|
32
|
+
return deriveProfiles(disco.json as DiscoveryPayload).includes('openwop-trigger-bridge');
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface TriggerSubscription {
|
|
36
|
+
subscriptionId?: string;
|
|
37
|
+
source?: string;
|
|
38
|
+
state?: string;
|
|
39
|
+
[k: string]: unknown;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** GET the NORMATIVE subscription read surface (RFC 0083 §A
|
|
43
|
+
* `GET /v1/trigger-subscriptions`); null when not served (404/405/501). */
|
|
44
|
+
export async function listTriggerSubscriptions(): Promise<{ subscriptions?: TriggerSubscription[] } | null> {
|
|
45
|
+
const res = await driver.get('/v1/trigger-subscriptions');
|
|
46
|
+
if (res.status === 404 || res.status === 405 || res.status === 501) return null;
|
|
47
|
+
return (res.json as { subscriptions?: TriggerSubscription[] } | undefined) ?? {};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export interface DeliveryResult {
|
|
51
|
+
runId?: string;
|
|
52
|
+
subscriptionId?: string;
|
|
53
|
+
outcome?: string;
|
|
54
|
+
deliveredCount?: number;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Drive one delivery through the host-sample bridge seam. `scenario`:
|
|
59
|
+
* - `dedup` — deliver the same `dedupKey` twice; effectively-once (§C-1).
|
|
60
|
+
* - `exhaust` — exhaust the retry policy → `dead-lettered` (§C-2 + RFC 0053).
|
|
61
|
+
* - `deliver` — a single successful delivery whose run's `run.started`
|
|
62
|
+
* carries the delivery `causationId` (§C / RFC 0040).
|
|
63
|
+
* Returns null when the seam is unwired (404/405).
|
|
64
|
+
*/
|
|
65
|
+
export async function driveDelivery(
|
|
66
|
+
body: { scenario: 'dedup' | 'exhaust' | 'deliver'; dedupKey?: string; source?: string },
|
|
67
|
+
): Promise<DeliveryResult | null> {
|
|
68
|
+
const res = await driver.post('/v1/host/sample/trigger-bridge/deliver', body);
|
|
69
|
+
if (res.status === 404 || res.status === 405) return null;
|
|
70
|
+
return (res.json as DeliveryResult | undefined) ?? {};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export const SUBSCRIPTION_STATES = ['active', 'paused', 'failed', 'dead-lettered'];
|
|
74
|
+
export const DELIVERY_OUTCOMES = ['delivered', 'retrying', 'dead-lettered'];
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent deployment lifecycle — the §E promotion contract + §B channel pin
|
|
3
|
+
* (RFC 0082) — behavioral.
|
|
4
|
+
*
|
|
5
|
+
* Capability-gated on `agents.deployment.supported` (root-first per RFC 0073).
|
|
6
|
+
* Soft-skips when unadvertised (default) / hard-fails under
|
|
7
|
+
* `OPENWOP_REQUIRE_BEHAVIOR=true`. The always-on wire-shape coverage lives in
|
|
8
|
+
* `agent-deployment-shape.test.ts`; this asserts host BEHAVIOR via the
|
|
9
|
+
* `POST /v1/host/sample/agents/deployment-transition` seam + the test event-log
|
|
10
|
+
* seam + the NORMATIVE `GET /v1/agents/{agentId}/deployments` read:
|
|
11
|
+
*
|
|
12
|
+
* 1. PROMOTE (§E) — authorize → approvalGate → eval-verify → a content-free
|
|
13
|
+
* `deployment.promoted` with `toState` in the seven-state vocabulary; the
|
|
14
|
+
* returned record validates against `agent-deployment.schema.json`.
|
|
15
|
+
* 2. FAIL-CLOSED (§E-1, `deployment-promotion-fail-closed`) — a principal
|
|
16
|
+
* lacking `deploy:promote` is denied (`allowed !== true`) and emits NO
|
|
17
|
+
* `deployment.promoted`.
|
|
18
|
+
* 3. EVAL-GATE-UNMET (§E-3) — a promote whose `evalRunId` has `passed:false`
|
|
19
|
+
* is denied with `eval_gate_unmet` and emits NO `deployment.promoted`.
|
|
20
|
+
* 4. CHANNEL PIN (§B) — a `@channel`-bound run records the resolved version as
|
|
21
|
+
* `resolvedAgentVersion` on `agent.invocation.started` (the recorded fact a
|
|
22
|
+
* replay re-reads rather than re-resolving).
|
|
23
|
+
*
|
|
24
|
+
* Each leg soft-skips independently (seam absent / event-log seam absent).
|
|
25
|
+
*
|
|
26
|
+
* Spec references:
|
|
27
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/agent-deployment.md (§B/§E)
|
|
28
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0082-agent-deployment-lifecycle.md
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import { describe, it, expect } from 'vitest';
|
|
32
|
+
import { readFileSync } from 'node:fs';
|
|
33
|
+
import { join } from 'node:path';
|
|
34
|
+
import Ajv2020 from 'ajv/dist/2020.js';
|
|
35
|
+
import addFormats from 'ajv-formats';
|
|
36
|
+
import { driver } from '../lib/driver.js';
|
|
37
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
38
|
+
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
39
|
+
import {
|
|
40
|
+
readDeploymentCap,
|
|
41
|
+
driveDeploymentTransition,
|
|
42
|
+
DEPLOYMENT_STATES,
|
|
43
|
+
DEPLOYMENT_CONTENT_FORBIDDEN,
|
|
44
|
+
} from '../lib/agentDeployment.js';
|
|
45
|
+
import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
|
|
46
|
+
|
|
47
|
+
function loadSchema(name: string): Record<string, unknown> {
|
|
48
|
+
return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function expectContentFree(payload: Record<string, unknown>, where: string): void {
|
|
52
|
+
for (const f of DEPLOYMENT_CONTENT_FORBIDDEN) {
|
|
53
|
+
expect(
|
|
54
|
+
!(f in payload),
|
|
55
|
+
driver.describe('RFC 0082 §D (deployment-event-no-content-leak)', `${where} MUST be content-free (no ${f})`),
|
|
56
|
+
).toBe(true);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
describe('agent-deployment-lifecycle (RFC 0082 §B/§E)', () => {
|
|
61
|
+
it('promotes via the eval+RBAC+approval gate, fails closed without scope/eval, and pins the channel version', async () => {
|
|
62
|
+
const cap = await readDeploymentCap();
|
|
63
|
+
if (!behaviorGate('openwop-deployment-lifecycle', cap?.supported === true)) return;
|
|
64
|
+
if (!(await isEventLogSeamAvailable())) return; // event-log seam absent — soft-skip
|
|
65
|
+
|
|
66
|
+
const ajv = new Ajv2020({ strict: false, allErrors: true });
|
|
67
|
+
addFormats(ajv);
|
|
68
|
+
const validateRecord = ajv.compile(loadSchema('agent-deployment.schema.json'));
|
|
69
|
+
|
|
70
|
+
// ---- Leg 1: eval+RBAC+approval-gated promotion (§E) ------------------
|
|
71
|
+
const promote = await driveDeploymentTransition({ scenario: 'promote' });
|
|
72
|
+
if (promote === null) return; // deployment seam unwired — soft-skip the whole behavioral suite
|
|
73
|
+
|
|
74
|
+
if (promote.record) {
|
|
75
|
+
expect(
|
|
76
|
+
validateRecord(promote.record),
|
|
77
|
+
driver.describe(
|
|
78
|
+
'agent-deployment.schema.json',
|
|
79
|
+
`a promoted deployment record MUST validate (${ajv.errorsText(validateRecord.errors)})`,
|
|
80
|
+
),
|
|
81
|
+
).toBe(true);
|
|
82
|
+
}
|
|
83
|
+
if (promote.runId) {
|
|
84
|
+
const pq = await queryTestEvents(promote.runId, { type: 'deployment.promoted' });
|
|
85
|
+
if (pq.ok) {
|
|
86
|
+
for (const e of pq.events) {
|
|
87
|
+
expectContentFree(e.payload, 'deployment.promoted');
|
|
88
|
+
expect(
|
|
89
|
+
typeof e.payload.toState === 'string' && DEPLOYMENT_STATES.includes(e.payload.toState as string),
|
|
90
|
+
driver.describe('run-event-payloads.schema.json#/$defs/deploymentPromoted', 'toState MUST be in the seven-state vocabulary'),
|
|
91
|
+
).toBe(true);
|
|
92
|
+
expect(
|
|
93
|
+
typeof e.payload.toVersion === 'string' && (e.payload.toVersion as string).length > 0,
|
|
94
|
+
driver.describe('agent-deployment.md §D', 'deployment.promoted MUST carry the promoted toVersion'),
|
|
95
|
+
).toBe(true);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// ---- Leg 2: fail-closed authz (§E-1; deployment-promotion-fail-closed) -
|
|
101
|
+
const unauth = await driveDeploymentTransition({ scenario: 'unauthorized' });
|
|
102
|
+
if (unauth && unauth.runId) {
|
|
103
|
+
expect(
|
|
104
|
+
unauth.allowed !== true,
|
|
105
|
+
driver.describe('agent-deployment.md §E-1', 'a principal without deploy:promote MUST be denied (fail-closed)'),
|
|
106
|
+
).toBe(true);
|
|
107
|
+
const uq = await queryTestEvents(unauth.runId, { type: 'deployment.promoted' });
|
|
108
|
+
if (uq.ok) {
|
|
109
|
+
expect(
|
|
110
|
+
uq.events.length === 0,
|
|
111
|
+
driver.describe('SECURITY invariant deployment-promotion-fail-closed', 'a denied transition MUST emit NO deployment.promoted'),
|
|
112
|
+
).toBe(true);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// ---- Leg 3: eval-gate-unmet denial (§E-3) ----------------------------
|
|
117
|
+
const evalUnmet = await driveDeploymentTransition({ scenario: 'eval-gate-unmet' });
|
|
118
|
+
if (evalUnmet && evalUnmet.runId) {
|
|
119
|
+
expect(
|
|
120
|
+
evalUnmet.error === 'eval_gate_unmet' || evalUnmet.allowed !== true,
|
|
121
|
+
driver.describe('agent-deployment.md §E-3', 'a promote whose eval evidence has passed:false MUST be denied (eval_gate_unmet)'),
|
|
122
|
+
).toBe(true);
|
|
123
|
+
const eq = await queryTestEvents(evalUnmet.runId, { type: 'deployment.promoted' });
|
|
124
|
+
if (eq.ok) {
|
|
125
|
+
expect(
|
|
126
|
+
eq.events.length === 0,
|
|
127
|
+
driver.describe('agent-deployment.md §E-3', 'an unmet eval gate MUST emit NO deployment.promoted'),
|
|
128
|
+
).toBe(true);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// ---- Leg 4: channel-resolution pin (§B) ------------------------------
|
|
133
|
+
const pin = await driveDeploymentTransition({ scenario: 'channel-pin', channel: 'stable' });
|
|
134
|
+
if (pin && pin.runId) {
|
|
135
|
+
const iq = await queryTestEvents(pin.runId, { type: 'agent.invocation.started' });
|
|
136
|
+
if (iq.ok && iq.events.length > 0) {
|
|
137
|
+
const started = iq.events.sort((a, b) => a.sequence - b.sequence)[0]!;
|
|
138
|
+
expect(
|
|
139
|
+
typeof started.payload.resolvedAgentVersion === 'string' && (started.payload.resolvedAgentVersion as string).length > 0,
|
|
140
|
+
driver.describe('agent-deployment.md §B', 'a @channel-bound run MUST record resolvedAgentVersion on agent.invocation.started (the recorded fact a replay re-reads)'),
|
|
141
|
+
).toBe(true);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
await resetTestSeam();
|
|
146
|
+
});
|
|
147
|
+
});
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent eval-run — the `mode:"eval"` projection (RFC 0081 §B/§C) — behavioral.
|
|
3
|
+
*
|
|
4
|
+
* Capability-gated on `agents.evalSuite.supported` (root-first per RFC 0073).
|
|
5
|
+
* Soft-skips when unadvertised (default) / hard-fails under
|
|
6
|
+
* `OPENWOP_REQUIRE_BEHAVIOR=true`. The always-on wire-shape coverage lives in
|
|
7
|
+
* `agent-eval-suite-shape.test.ts`; this asserts host BEHAVIOR via the
|
|
8
|
+
* `POST /v1/host/sample/agents/eval-run` seam + the test event-log seam + the
|
|
9
|
+
* NORMATIVE `GET /v1/runs/{runId}/eval-summary` read:
|
|
10
|
+
*
|
|
11
|
+
* 1. ORDERING (§C) — an eval run emits `eval.started` FIRST, one `eval.scored`
|
|
12
|
+
* per task, then `eval.completed` once (count == eval.completed.taskCount).
|
|
13
|
+
* 2. CONTENT-FREE (SR-1 / `eval-summary-no-content-leak`) — every `eval.scored`
|
|
14
|
+
* carries scores / ids / scalars ONLY (never task output / rubric / prose);
|
|
15
|
+
* `score` ∈ 0..1; `passed` is a boolean.
|
|
16
|
+
* 3. NORMATIVE SUMMARY (§C) — `GET /v1/runs/{runId}/eval-summary` returns a
|
|
17
|
+
* schema-valid `EvalSummary` whose `passedCount <= taskCount` and whose
|
|
18
|
+
* task entries carry no output body.
|
|
19
|
+
*
|
|
20
|
+
* Each leg soft-skips independently (seam absent / event-log seam absent).
|
|
21
|
+
*
|
|
22
|
+
* Spec references:
|
|
23
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/agent-evaluation.md (§B/§C)
|
|
24
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0081-agent-evaluation-and-scorecards.md
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { describe, it, expect } from 'vitest';
|
|
28
|
+
import { readFileSync } from 'node:fs';
|
|
29
|
+
import { join } from 'node:path';
|
|
30
|
+
import Ajv2020 from 'ajv/dist/2020.js';
|
|
31
|
+
import addFormats from 'ajv-formats';
|
|
32
|
+
import { driver } from '../lib/driver.js';
|
|
33
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
34
|
+
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
35
|
+
import {
|
|
36
|
+
readEvalSuiteCap,
|
|
37
|
+
driveEvalRun,
|
|
38
|
+
getEvalSummary,
|
|
39
|
+
EVAL_CONTENT_FORBIDDEN,
|
|
40
|
+
} from '../lib/agentEval.js';
|
|
41
|
+
import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
|
|
42
|
+
|
|
43
|
+
function loadSchema(name: string): Record<string, unknown> {
|
|
44
|
+
return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function expectContentFree(payload: Record<string, unknown>, where: string): void {
|
|
48
|
+
for (const f of EVAL_CONTENT_FORBIDDEN) {
|
|
49
|
+
expect(
|
|
50
|
+
!(f in payload),
|
|
51
|
+
driver.describe('RFC 0081 §C (eval-summary-no-content-leak)', `${where} MUST be content-free (no ${f})`),
|
|
52
|
+
).toBe(true);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
describe('agent-eval-run (RFC 0081 §B/§C)', () => {
|
|
57
|
+
it('emits eval.started → per-task eval.scored → eval.completed and serves a content-free EvalSummary', async () => {
|
|
58
|
+
const cap = await readEvalSuiteCap();
|
|
59
|
+
if (!behaviorGate('openwop-eval-run', cap?.supported === true)) return;
|
|
60
|
+
if (!(await isEventLogSeamAvailable())) return; // event-log seam absent — soft-skip
|
|
61
|
+
|
|
62
|
+
const run = await driveEvalRun({ modes: ['golden'] });
|
|
63
|
+
if (run === null) return; // eval-run seam unwired — soft-skip the whole behavioral suite
|
|
64
|
+
if (!run.runId) return;
|
|
65
|
+
|
|
66
|
+
// ---- Legs 1+2: eval.* ordering + content-free (§C) -------------------
|
|
67
|
+
const startedQ = await queryTestEvents(run.runId, { type: 'eval.started' });
|
|
68
|
+
const scoredQ = await queryTestEvents(run.runId, { type: 'eval.scored' });
|
|
69
|
+
const completedQ = await queryTestEvents(run.runId, { type: 'eval.completed' });
|
|
70
|
+
|
|
71
|
+
if (startedQ.ok && scoredQ.ok && startedQ.events.length > 0) {
|
|
72
|
+
const started = startedQ.events.sort((a, b) => a.sequence - b.sequence)[0]!;
|
|
73
|
+
|
|
74
|
+
// eval.started precedes every eval.scored (§C ordering).
|
|
75
|
+
for (const s of scoredQ.events) {
|
|
76
|
+
expect(
|
|
77
|
+
started.sequence < s.sequence,
|
|
78
|
+
driver.describe('agent-evaluation.md §C', 'eval.started MUST precede every eval.scored'),
|
|
79
|
+
).toBe(true);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (completedQ.ok && completedQ.events.length > 0) {
|
|
83
|
+
const completed = completedQ.events.sort((a, b) => a.sequence - b.sequence)[completedQ.events.length - 1]!;
|
|
84
|
+
for (const s of scoredQ.events) {
|
|
85
|
+
expect(
|
|
86
|
+
s.sequence < completed.sequence,
|
|
87
|
+
driver.describe('agent-evaluation.md §C', 'every eval.scored MUST precede eval.completed'),
|
|
88
|
+
).toBe(true);
|
|
89
|
+
}
|
|
90
|
+
// eval.scored is emitted once per task (count == eval.completed.taskCount).
|
|
91
|
+
if (typeof completed.payload.taskCount === 'number') {
|
|
92
|
+
expect(
|
|
93
|
+
scoredQ.events.length === completed.payload.taskCount,
|
|
94
|
+
driver.describe('agent-evaluation.md §C', 'one eval.scored per task (count == eval.completed.taskCount)'),
|
|
95
|
+
).toBe(true);
|
|
96
|
+
}
|
|
97
|
+
expectContentFree(completed.payload, 'eval.completed');
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// each eval.scored content-free + score ∈ 0..1, passed boolean.
|
|
101
|
+
for (const s of scoredQ.events) {
|
|
102
|
+
expectContentFree(s.payload, 'eval.scored');
|
|
103
|
+
expect(
|
|
104
|
+
typeof s.payload.score === 'number' && (s.payload.score as number) >= 0 && (s.payload.score as number) <= 1,
|
|
105
|
+
driver.describe('run-event-payloads.schema.json#/$defs/evalScored', 'eval.scored.score MUST be in 0..1'),
|
|
106
|
+
).toBe(true);
|
|
107
|
+
expect(
|
|
108
|
+
typeof s.payload.passed === 'boolean',
|
|
109
|
+
driver.describe('run-event-payloads.schema.json#/$defs/evalScored', 'eval.scored.passed MUST be a boolean'),
|
|
110
|
+
).toBe(true);
|
|
111
|
+
}
|
|
112
|
+
expectContentFree(started.payload, 'eval.started');
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// ---- Leg 3: NORMATIVE EvalSummary read (§C) --------------------------
|
|
116
|
+
const { status, summary } = await getEvalSummary(run.runId);
|
|
117
|
+
if (status === 200 && summary) {
|
|
118
|
+
const ajv = new Ajv2020({ strict: false, allErrors: true });
|
|
119
|
+
addFormats(ajv);
|
|
120
|
+
const validate = ajv.compile(loadSchema('eval-summary.schema.json'));
|
|
121
|
+
expect(
|
|
122
|
+
validate(summary),
|
|
123
|
+
driver.describe(
|
|
124
|
+
'eval-summary.schema.json',
|
|
125
|
+
`GET /v1/runs/{runId}/eval-summary MUST return a schema-valid EvalSummary (${ajv.errorsText(validate.errors)})`,
|
|
126
|
+
),
|
|
127
|
+
).toBe(true);
|
|
128
|
+
|
|
129
|
+
const tasks = (summary.tasks as Array<Record<string, unknown>> | undefined) ?? [];
|
|
130
|
+
const passedCount = summary.passedCount as number | undefined;
|
|
131
|
+
const taskCount = summary.taskCount as number | undefined;
|
|
132
|
+
if (typeof passedCount === 'number' && typeof taskCount === 'number') {
|
|
133
|
+
expect(
|
|
134
|
+
passedCount <= taskCount,
|
|
135
|
+
driver.describe('agent-evaluation.md §C', 'EvalSummary.passedCount MUST NOT exceed taskCount'),
|
|
136
|
+
).toBe(true);
|
|
137
|
+
}
|
|
138
|
+
for (const t of tasks) {
|
|
139
|
+
expectContentFree(t, 'EvalSummary.tasks[]');
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
await resetTestSeam();
|
|
144
|
+
});
|
|
145
|
+
});
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent org-chart — normative read, responsibility roll-up + tenant scoping
|
|
3
|
+
* (RFC 0087 §A/§C/§D) — behavioral.
|
|
4
|
+
*
|
|
5
|
+
* Gated on `capabilities.agents.orgChart.supported` (root-first per RFC 0073).
|
|
6
|
+
* Soft-skips when unadvertised (default) / hard-fails under
|
|
7
|
+
* `OPENWOP_REQUIRE_BEHAVIOR=true` via `behaviorGate`. The always-on wire-shape
|
|
8
|
+
* coverage lives in `agent-org-chart-shape.test.ts`; this asserts host
|
|
9
|
+
* BEHAVIOR against the live `/v1/agents/org-chart` surface:
|
|
10
|
+
*
|
|
11
|
+
* 1. NORMATIVE read — `GET /v1/agents/org-chart` returns the
|
|
12
|
+
* `agent-org-chart.schema.json` shape `{ owner, departments, members }`;
|
|
13
|
+
* departments form a tree (every `parentDepartmentId` resolves; no cycle);
|
|
14
|
+
* members reference roster entries (`host:<id>` rosterId) and the
|
|
15
|
+
* `reportsTo` graph is acyclic. Black-box on any org-chart host.
|
|
16
|
+
* 2. §D RESPONSIBILITY ROLL-UP — `GET /v1/agents/org-chart/{departmentId}`
|
|
17
|
+
* returns `{ department, members, responsibilities }` where
|
|
18
|
+
* `responsibilities` is a deduped `string[]` (the union of the subtree
|
|
19
|
+
* members' RFC 0086 portfolios); `recursive=false` scopes to direct
|
|
20
|
+
* members without changing the response shape.
|
|
21
|
+
* 3. TENANT SCOPING (§C / RFC 0074) — a `GET /v1/agents/org-chart/{id}` for a
|
|
22
|
+
* department outside the caller's owner triple 404s (probed only when
|
|
23
|
+
* `OPENWOP_CROSS_TENANT_ORG_CHART_DEPARTMENT_ID` is supplied; soft-skip
|
|
24
|
+
* otherwise — the org-chart analog of the roster scoping env var).
|
|
25
|
+
*
|
|
26
|
+
* Spec references:
|
|
27
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/agent-org-chart.md
|
|
28
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0087-agent-org-chart.md
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import { describe, it, expect } from 'vitest';
|
|
32
|
+
import { driver } from '../lib/driver.js';
|
|
33
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
34
|
+
import { readOrgChartCap, getOrgChart, getDepartmentView } from '../lib/agentOrgChart.js';
|
|
35
|
+
|
|
36
|
+
const ROSTER_ID_RE = /^host:[a-z0-9][a-z0-9._-]*$/;
|
|
37
|
+
|
|
38
|
+
describe('agent-org-chart-scoping (RFC 0087 §A/§C/§D)', () => {
|
|
39
|
+
it('serves the normative org-chart + responsibility roll-up, tree-shaped and tenant-scoped', async () => {
|
|
40
|
+
const cap = await readOrgChartCap();
|
|
41
|
+
if (!behaviorGate('openwop-org-chart-scoping', cap?.supported === true)) return;
|
|
42
|
+
|
|
43
|
+
const installScope = typeof cap?.installScope === 'string' ? cap.installScope : 'tenant';
|
|
44
|
+
expect(
|
|
45
|
+
installScope === 'host' || installScope === 'tenant',
|
|
46
|
+
driver.describe('RFC 0087 §E / RFC 0074', "agents.orgChart.installScope (when present) MUST be 'host' or 'tenant'"),
|
|
47
|
+
).toBe(true);
|
|
48
|
+
|
|
49
|
+
// ---- Leg 1: normative read (black-box) -------------------------------
|
|
50
|
+
const chart = await getOrgChart();
|
|
51
|
+
if (chart === null) return; // advertised but read not served yet — soft-skip
|
|
52
|
+
const departments = chart.departments ?? [];
|
|
53
|
+
const members = chart.members ?? [];
|
|
54
|
+
expect(
|
|
55
|
+
Array.isArray(departments) && Array.isArray(members),
|
|
56
|
+
driver.describe('agent-org-chart.schema.json', 'GET /v1/agents/org-chart MUST return departments[] + members[]'),
|
|
57
|
+
).toBe(true);
|
|
58
|
+
|
|
59
|
+
const deptIds = new Set(departments.map((d) => d.departmentId).filter((x): x is string => typeof x === 'string'));
|
|
60
|
+
for (const d of departments) {
|
|
61
|
+
const parent = d.parentDepartmentId;
|
|
62
|
+
if (parent !== undefined && parent !== null) {
|
|
63
|
+
expect(
|
|
64
|
+
deptIds.has(parent),
|
|
65
|
+
driver.describe('agent-org-chart.md §A', 'every parentDepartmentId MUST resolve to a department in the chart (a tree)'),
|
|
66
|
+
).toBe(true);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
// Department tree is acyclic (walk parents from each node, bound by node count).
|
|
70
|
+
for (const d of departments) {
|
|
71
|
+
const seen = new Set<string>();
|
|
72
|
+
let cur: string | null | undefined = d.departmentId;
|
|
73
|
+
let steps = 0;
|
|
74
|
+
while (typeof cur === 'string' && steps <= departments.length) {
|
|
75
|
+
if (seen.has(cur)) break;
|
|
76
|
+
seen.add(cur);
|
|
77
|
+
cur = departments.find((x) => x.departmentId === cur)?.parentDepartmentId ?? null;
|
|
78
|
+
steps++;
|
|
79
|
+
}
|
|
80
|
+
expect(
|
|
81
|
+
steps <= departments.length,
|
|
82
|
+
driver.describe('agent-org-chart.md §A', 'the department parent graph MUST be acyclic'),
|
|
83
|
+
).toBe(true);
|
|
84
|
+
}
|
|
85
|
+
for (const m of members) {
|
|
86
|
+
expect(
|
|
87
|
+
typeof m.rosterId === 'string' && ROSTER_ID_RE.test(m.rosterId),
|
|
88
|
+
driver.describe('agent-org-chart.md §A', 'each member MUST reference a roster entry (host:<id> rosterId)'),
|
|
89
|
+
).toBe(true);
|
|
90
|
+
if (typeof m.departmentId === 'string') {
|
|
91
|
+
expect(
|
|
92
|
+
deptIds.size === 0 || deptIds.has(m.departmentId),
|
|
93
|
+
driver.describe('agent-org-chart.md §A', "a member's departmentId MUST be a department in the chart"),
|
|
94
|
+
).toBe(true);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// ---- Leg 2: §D responsibility roll-up --------------------------------
|
|
99
|
+
const probeDeptId = departments[0]?.departmentId;
|
|
100
|
+
if (typeof probeDeptId === 'string') {
|
|
101
|
+
const { status, view } = await getDepartmentView(probeDeptId);
|
|
102
|
+
if (status === 200 && view) {
|
|
103
|
+
expect(
|
|
104
|
+
Array.isArray(view.responsibilities),
|
|
105
|
+
driver.describe('agent-org-chart.md §D', 'the responsibility view MUST carry a responsibilities[] roll-up'),
|
|
106
|
+
).toBe(true);
|
|
107
|
+
const r = view.responsibilities ?? [];
|
|
108
|
+
expect(
|
|
109
|
+
r.length === new Set(r).size,
|
|
110
|
+
driver.describe('agent-org-chart.md §D', 'responsibilities MUST be a deduped union (no duplicate workflow ids)'),
|
|
111
|
+
).toBe(true);
|
|
112
|
+
expect(
|
|
113
|
+
r.every((w) => typeof w === 'string'),
|
|
114
|
+
driver.describe('org-chart-responsibility-view.schema.json', 'responsibilities[] entries MUST be workflow-id strings'),
|
|
115
|
+
).toBe(true);
|
|
116
|
+
// recursive=false MUST keep the response shape (a subset roll-up).
|
|
117
|
+
const direct = await getDepartmentView(probeDeptId, false);
|
|
118
|
+
if (direct.status === 200 && direct.view) {
|
|
119
|
+
expect(
|
|
120
|
+
Array.isArray(direct.view.responsibilities),
|
|
121
|
+
driver.describe('agent-org-chart.md §D', 'recursive=false MUST return the same shape, scoped to direct members'),
|
|
122
|
+
).toBe(true);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// ---- Leg 3: tenant scoping (RFC 0074) --------------------------------
|
|
128
|
+
const crossTenantDept = process.env.OPENWOP_CROSS_TENANT_ORG_CHART_DEPARTMENT_ID;
|
|
129
|
+
if (typeof crossTenantDept === 'string' && crossTenantDept.length > 0) {
|
|
130
|
+
const probe = await getDepartmentView(crossTenantDept);
|
|
131
|
+
expect(
|
|
132
|
+
probe.status === 404,
|
|
133
|
+
driver.describe('agent-org-chart.md §C / RFC 0074', 'GET /v1/agents/org-chart/{id} for a cross-tenant department MUST 404 (no cross-tenant disclosure)'),
|
|
134
|
+
).toBe(true);
|
|
135
|
+
}
|
|
136
|
+
});
|
|
137
|
+
});
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Org position confers no authority — the §B invariant, behavioral leg
|
|
3
|
+
* (RFC 0087 §B) — the protocol-tier `org-position-no-authority-escalation`.
|
|
4
|
+
*
|
|
5
|
+
* The STRUCTURAL leg (the `agent-org-chart.schema.json` is `additionalProperties:
|
|
6
|
+
* false` and rejects an authority-bearing field on a member) is always-on /
|
|
7
|
+
* server-free in `agent-org-chart-shape.test.ts`. This scenario is the
|
|
8
|
+
* BEHAVIORAL leg, gated on `capabilities.agents.orgChart.supported`: it proves
|
|
9
|
+
* against the LIVE host that the org-chart projector strips position-as-authority
|
|
10
|
+
* — no member, department, or responsibility-view object served on the wire
|
|
11
|
+
* carries an authority-bearing field (`scopes` / `canDispatch` / `permissions` /
|
|
12
|
+
* `authority` / `roleGrants` / `capabilities`), at every install scope. An org
|
|
13
|
+
* edge is an *ownership + reporting* record, never an authority grant.
|
|
14
|
+
*
|
|
15
|
+
* Soft-skips when unadvertised (default) / hard-fails under
|
|
16
|
+
* `OPENWOP_REQUIRE_BEHAVIOR=true`.
|
|
17
|
+
*
|
|
18
|
+
* The deeper authority-invariance legs — a manager agent cannot dispatch a
|
|
19
|
+
* report's tools (RFC 0002 §A14), an RFC 0049 authorization decision is
|
|
20
|
+
* invariant to org position, an RFC 0051 approval gate is not satisfied by org
|
|
21
|
+
* seniority — require a non-normative host authorization-decide hook to force
|
|
22
|
+
* black-box; a conformant host need not expose one, so (mirroring the RFC 0070
|
|
23
|
+
* `agent-manifest-runtime` confidence-escalation note) they stay reference-impl
|
|
24
|
+
* tier and are NOT asserted here. The wire-projection proof below is the
|
|
25
|
+
* load-bearing, hook-free behavioral guarantee.
|
|
26
|
+
*
|
|
27
|
+
* Spec references:
|
|
28
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/agent-org-chart.md (§B)
|
|
29
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0087-agent-org-chart.md (§B)
|
|
30
|
+
* - https://github.com/openwop/openwop/blob/main/SECURITY/invariants.yaml (org-position-no-authority-escalation)
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
import { describe, it, expect } from 'vitest';
|
|
34
|
+
import { driver } from '../lib/driver.js';
|
|
35
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
36
|
+
import { readOrgChartCap, getOrgChart, getDepartmentView, AUTHORITY_FIELDS } from '../lib/agentOrgChart.js';
|
|
37
|
+
|
|
38
|
+
/** Assert an org-chart wire object carries no authority-bearing field. */
|
|
39
|
+
function expectNoAuthority(obj: Record<string, unknown> | undefined, where: string): void {
|
|
40
|
+
if (!obj || typeof obj !== 'object') return;
|
|
41
|
+
for (const f of AUTHORITY_FIELDS) {
|
|
42
|
+
expect(
|
|
43
|
+
!(f in obj),
|
|
44
|
+
driver.describe('RFC 0087 §B / org-position-no-authority-escalation', `${where} MUST NOT carry the authority field "${f}" — org position confers no authority`),
|
|
45
|
+
).toBe(true);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
describe('org-position-no-authority-escalation (RFC 0087 §B, behavioral)', () => {
|
|
50
|
+
it('the live org-chart wire carries no authority-bearing field on any member/department/view', async () => {
|
|
51
|
+
const cap = await readOrgChartCap();
|
|
52
|
+
if (!behaviorGate('openwop-org-position-no-authority', cap?.supported === true)) return;
|
|
53
|
+
|
|
54
|
+
const chart = await getOrgChart();
|
|
55
|
+
if (chart === null) return; // advertised but read not served yet — soft-skip
|
|
56
|
+
|
|
57
|
+
for (const m of chart.members ?? []) {
|
|
58
|
+
expectNoAuthority(m as Record<string, unknown>, 'an org-chart member');
|
|
59
|
+
}
|
|
60
|
+
for (const d of chart.departments ?? []) {
|
|
61
|
+
expectNoAuthority(d as Record<string, unknown>, 'an org-chart department');
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// The §D responsibility roll-up is a portfolio union (workflow ids), never an
|
|
65
|
+
// authority grant — assert its members + the view object are authority-free too.
|
|
66
|
+
const probeDeptId = (chart.departments ?? [])[0]?.departmentId;
|
|
67
|
+
if (typeof probeDeptId === 'string') {
|
|
68
|
+
const { status, view } = await getDepartmentView(probeDeptId);
|
|
69
|
+
if (status === 200 && view) {
|
|
70
|
+
expectNoAuthority(view as unknown as Record<string, unknown>, 'the responsibility view');
|
|
71
|
+
expectNoAuthority(view.department as Record<string, unknown> | undefined, "the responsibility view's department");
|
|
72
|
+
for (const m of view.members ?? []) {
|
|
73
|
+
expectNoAuthority(m as Record<string, unknown>, 'a responsibility-view member');
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
});
|