npm - @openwop/openwop-conformance - Versions diffs - 1.10.0 → 1.11.0 - Mend

@openwop/openwop-conformance 1.10.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/CHANGELOG.md +34 -0
package/README.md +2 -2
package/api/asyncapi.yaml +70 -0
package/api/openapi.yaml +268 -1
package/coverage.md +30 -2
package/fixtures/oauth-providers/synthetic.json +38 -0
package/fixtures.md +10 -0
package/package.json +1 -1
package/schemas/README.md +12 -0
package/schemas/agent-deployment-transition.schema.json +49 -0
package/schemas/agent-deployment.schema.json +54 -0
package/schemas/agent-eval-suite.schema.json +140 -0
package/schemas/agent-inventory-response.schema.json +25 -0
package/schemas/agent-manifest.schema.json +5 -0
package/schemas/agent-org-chart.schema.json +82 -0
package/schemas/agent-ref.schema.json +12 -2
package/schemas/agent-roster-entry.schema.json +81 -0
package/schemas/agent-roster-response.schema.json +21 -0
package/schemas/budget-policy.schema.json +18 -0
package/schemas/capabilities.schema.json +277 -0
package/schemas/credential-provenance.schema.json +18 -0
package/schemas/eval-summary.schema.json +92 -0
package/schemas/node-pack-manifest.schema.json +17 -0
package/schemas/org-chart-responsibility-view.schema.json +26 -0
package/schemas/run-event-payloads.schema.json +286 -3
package/schemas/run-event.schema.json +19 -0
package/schemas/tool-descriptor.schema.json +63 -0
package/schemas/trigger-subscription.schema.json +26 -0
package/src/lib/agentRoster.ts +76 -0
package/src/lib/liveRuntime.ts +59 -0
package/src/lib/profiles.ts +157 -0
package/src/lib/runtimeRequires.ts +38 -0
package/src/lib/safeFetch.ts +87 -0
package/src/scenarios/agent-deployment-shape.test.ts +139 -0
package/src/scenarios/agent-eval-suite-shape.test.ts +167 -0
package/src/scenarios/agent-live-allowlist-enforced.test.ts +53 -0
package/src/scenarios/agent-live-invocation-bracket.test.ts +98 -0
package/src/scenarios/agent-live-runtime-shape.test.ts +98 -0
package/src/scenarios/agent-live-structured-output.test.ts +58 -0
package/src/scenarios/agent-org-chart-shape.test.ts +127 -0
package/src/scenarios/agent-platform-profile.test.ts +158 -0
package/src/scenarios/agent-roster-attribution.test.ts +179 -0
package/src/scenarios/agent-roster-shape.test.ts +146 -0
package/src/scenarios/budget-policy-shape.test.ts +136 -0
package/src/scenarios/egress-provenance-shape.test.ts +137 -0
package/src/scenarios/memory-capability-model-shape.test.ts +186 -0
package/src/scenarios/oauth-authorization-code-roundtrip.test.ts +145 -0
package/src/scenarios/runtime-requires-install-gate.test.ts +92 -0
package/src/scenarios/runtime-requires-shape.test.ts +134 -0
package/src/scenarios/safefetch-behavior.test.ts +99 -0
package/src/scenarios/safefetch-live-audit.test.ts +175 -0
package/src/scenarios/spec-corpus-validity.test.ts +19 -3
package/src/scenarios/tool-descriptor-shape.test.ts +133 -0
package/src/scenarios/trigger-bridge-shape.test.ts +135 -0
package/src/scenarios/x-openwop-form-pack-manifest.test.ts +155 -0

package/src/scenarios/agent-platform-profile.test.ts ADDED Viewed

@@ -0,0 +1,158 @@
+/**
+ * openwop-agent-platform — operational-annex predicate + status derivation (RFC 0085).
+ *
+ * Always-on, server-free derivation probe. Verifies that:
+ *   - `isAgentPlatformPartial` / `isAgentPlatformFull` / `agentPlatformStatus`
+ *     derive `none` / `partial` / `full` correctly from representative discovery
+ *     payloads (RFC 0085 §B).
+ *   - the floor's replay-OR-nondeterminism term is honored: a host with no
+ *     `replay.supported` but `nondeterminismPolicy.declared: true` still meets the
+ *     floor.
+ *   - the `full` tier requires the governance terms (RBAC + tenant installScope +
+ *     memory.attribution + debug-bundle + trigger-bridge + egress-policy); a host
+ *     missing any reports `partial`, never `full` (the honest-advertisement rule).
+ *   - `capabilities.nondeterminismPolicy.declared` is declared in the schema.
+ *
+ * The LIVE aggregate-evidence assertion (does every required constituent scenario
+ * actually pass against a host claiming `full`?) is the `Active → Accepted` step
+ * per RFC 0085 §C — naturally gated on a reference host reaching partial/full, and
+ * deferred here. This scenario asserts the discovery-predicate derivation only.
+ *
+ * Spec references:
+ *   - https://github.com/openwop/openwop/blob/main/spec/v1/agent-platform-profile.md
+ *   - https://github.com/openwop/openwop/blob/main/RFCS/0085-agent-platform-meta-profile.md
+ */
+import { describe, it, expect } from 'vitest';
+import { readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { SCHEMAS_DIR } from '../lib/paths.js';
+import { isAgentPlatformPartial, isAgentPlatformFull, agentPlatformStatus, agentPlatformSatisfiedTerms } from '../lib/profiles.js';
+const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
+const CORE = {
+  protocolVersion: '1.0',
+  supportedEnvelopes: ['clarification.request'],
+  schemaVersions: {},
+  limits: { clarificationRounds: 1, schemaRounds: 1, envelopesPerTurn: 1 },
+};
+/** A discovery payload meeting the §B floor (partial). */
+function floorPayload(extra: Record<string, unknown> = {}): Record<string, unknown> {
+  return {
+    ...CORE,
+    agents: { manifestRuntime: { supported: true }, liveRuntime: { supported: true } },
+    toolCatalog: { supported: true },
+    toolHooks: { supported: true },
+    httpClient: { safeFetch: { supported: true } },
+    providerUsage: { supported: true },
+    prompts: { supported: true },
+    memory: { supported: true },
+    feedback: { supported: true },
+    replay: { supported: true },
+    ...extra,
+  };
+}
+describe('agent-platform-profile: floor (partial) predicate (RFC 0085 §B, server-free)', () => {
+  it('a host meeting all floor flags is partial', () => {
+    const c = floorPayload();
+    expect(isAgentPlatformPartial(c), why('agent-platform-profile.md §B', 'all floor flags ⇒ partial')).toBe(true);
+    expect(agentPlatformStatus(c)).toBe('partial');
+  });
+  it('missing a single floor flag (feedback) ⇒ none', () => {
+    const c = floorPayload({ feedback: { supported: false } });
+    expect(isAgentPlatformPartial(c), why('agent-platform-profile.md §B', 'a missing floor flag ⇒ not partial')).toBe(false);
+    expect(agentPlatformStatus(c)).toBe('none');
+  });
+  it('replay-OR-nondeterminism: no replay but declared nondeterminism still meets the floor', () => {
+    const c = floorPayload({ replay: { supported: false }, nondeterminismPolicy: { declared: true } });
+    expect(isAgentPlatformPartial(c), why('agent-platform-profile.md §B', 'declared nondeterminism satisfies the replay-OR term')).toBe(true);
+  });
+  it('neither replay nor declared nondeterminism ⇒ floor unmet', () => {
+    const c = floorPayload({ replay: { supported: false } });
+    expect(isAgentPlatformPartial(c), why('agent-platform-profile.md §B', 'neither replay nor declared policy ⇒ not partial')).toBe(false);
+  });
+});
+describe('agent-platform-profile: full predicate + honest-advertisement (RFC 0085 §B/§D, server-free)', () => {
+  const fullExtra = {
+    authorization: { supported: true },
+    agents: { manifestRuntime: { supported: true, installScope: 'tenant' }, liveRuntime: { supported: true } },
+    memory: { supported: true, attribution: { supported: true } },
+    debugBundle: { supported: true },
+    triggerBridge: { supported: true },
+    httpClient: { safeFetch: { supported: true }, egressPolicy: { supported: true } },
+  };
+  it('a host meeting floor + all governance terms is full', () => {
+    const c = floorPayload(fullExtra);
+    expect(isAgentPlatformFull(c), why('agent-platform-profile.md §B', 'floor + governance ⇒ full')).toBe(true);
+    expect(agentPlatformStatus(c)).toBe('full');
+  });
+  it('a host advertising governance flags but missing tenant installScope reports partial, not full', () => {
+    const c = floorPayload({
+      ...fullExtra,
+      agents: { manifestRuntime: { supported: true, installScope: 'host' }, liveRuntime: { supported: true } },
+    });
+    expect(isAgentPlatformFull(c), why('agent-platform-profile.md §D', 'missing a governance term ⇒ MUST NOT be full')).toBe(false);
+    expect(agentPlatformStatus(c)).toBe('partial');
+  });
+  it('eval/deploy/budget are NOT hard full terms (a full host without them is still full)', () => {
+    const c = floorPayload(fullExtra); // no agents.evalSuite / agents.deployment / budget
+    expect(isAgentPlatformFull(c), why('agent-platform-profile.md §B', 'platform-plus tier is advisory, not a hard full term')).toBe(true);
+  });
+});
+describe('agent-platform-profile: satisfiedTerms[] non-contiguous adoption (RFC 0085 §D, server-free)', () => {
+  it('a host honoring full-tier terms but failing floor terms is status none yet has a non-empty satisfiedTerms[]', () => {
+    // The real-host (MyndHyve) shape: RBAC + memory.attribution + tenant installScope (3 full terms)
+    // satisfied, while liveRuntime / toolCatalog / providerUsage / memory floor terms are absent.
+    const c = {
+      ...CORE,
+      agents: { manifestRuntime: { supported: true, installScope: 'tenant' } }, // no liveRuntime
+      authorization: { supported: true },
+      memory: { attribution: { supported: true } }, // attribution but NOT memory.supported
+      toolHooks: { supported: true },
+      httpClient: { safeFetch: { supported: true } },
+      prompts: { supported: true },
+      feedback: { supported: true },
+      replay: { supported: true },
+    } as Record<string, unknown>;
+    expect(agentPlatformStatus(c), why('agent-platform-profile.md §D', 'floor unmet ⇒ none')).toBe('none');
+    const terms = agentPlatformSatisfiedTerms(c);
+    expect(terms.includes('full:authorization'), why('§D', 'a satisfied full term is reported even at none')).toBe(true);
+    expect(terms.includes('full:memory.attribution')).toBe(true);
+    expect(terms.includes('full:tenant-installScope')).toBe(true);
+    expect(terms.includes('floor:agents.liveRuntime'), why('§D', 'an unmet floor term is NOT reported')).toBe(false);
+    expect(terms.length).toBeGreaterThan(0); // distinguishable from a 0/16 do-nothing host
+  });
+  it('a full host reports all sixteen terms satisfied', () => {
+    const c = floorPayload({
+      authorization: { supported: true },
+      agents: { manifestRuntime: { supported: true, installScope: 'tenant' }, liveRuntime: { supported: true } },
+      memory: { supported: true, attribution: { supported: true } },
+      debugBundle: { supported: true },
+      triggerBridge: { supported: true },
+      httpClient: { safeFetch: { supported: true }, egressPolicy: { supported: true } },
+    });
+    expect(agentPlatformSatisfiedTerms(c).length, why('§D', 'a full host satisfies all 16 terms')).toBe(16);
+  });
+});
+describe('agent-platform-profile: capability shape (RFC 0085, server-free)', () => {
+  it('capabilities.nondeterminismPolicy.declared is declared', () => {
+    const caps = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'capabilities.schema.json'), 'utf8')) as { properties?: Record<string, { properties?: Record<string, unknown> }> };
+    expect(
+      caps.properties?.nondeterminismPolicy?.properties?.declared,
+      why('agent-platform-profile.md §B', 'capabilities.nondeterminismPolicy.declared MUST be declared'),
+    ).toBeDefined();
+  });
+});

package/src/scenarios/agent-roster-attribution.test.ts ADDED Viewed

@@ -0,0 +1,179 @@
+/**
+ * Standing-agent roster attribution + ordering (RFC 0086 §B/§C) — behavioral.
+ *
+ * Gated on `capabilities.agents.roster.supported` (root-first per RFC 0073).
+ * Soft-skips when unadvertised (default) / hard-fails under
+ * `OPENWOP_REQUIRE_BEHAVIOR=true` via `behaviorGate`. The companion always-on
+ * wire-shape coverage lives in `agent-roster-shape.test.ts`; this scenario
+ * asserts host BEHAVIOR:
+ *
+ *   1. NORMATIVE read — `GET /v1/agents/roster` (RFC 0086 §B) returns the
+ *      `agent-roster-response` shape (roster[] + `total == roster.length`), and
+ *      every entry carries a `host:<id>` `rosterId`, a `persona`, an
+ *      `agentRef.agentId`, and an `owner.tenantId`. Runs black-box against the
+ *      normative path on any roster host.
+ *   2. ATTRIBUTION + ORDERING (seam-gated) — a portfolio fire emits
+ *      `roster.run.initiated` as the run's FIRST attribution event, BEFORE any
+ *      `agent.invocation.*` / `agent.*` event (§C), content-free (no work-item
+ *      `body`/`prompt`/credential — the `roster-attribution-no-content`
+ *      invariant), with `rosterId`/`persona`/`agentId`/`workflowId`/
+ *      `triggerSource`. A durable work-item fire additionally carries
+ *      `triggerSubscriptionId` (RFC 0083) traceable on the run's `causationId`.
+ *   3. TENANT SCOPING (§B / RFC 0074) — a `GET /v1/agents/roster/{id}` for an id
+ *      outside the caller's owner triple 404s (probed only when a cross-tenant id
+ *      is supplied via `OPENWOP_CROSS_TENANT_ROSTER_ID`; soft-skip otherwise).
+ *
+ * The fire + event-log seams are OPTIONAL (reference roster store deferred per
+ * RFC 0086 §Conformance); each leg soft-skips independently so a host that
+ * serves only the normative read still exercises leg 1.
+ *
+ * Spec references:
+ *   - https://github.com/openwop/openwop/blob/main/spec/v1/agent-roster.md
+ *   - https://github.com/openwop/openwop/blob/main/RFCS/0086-standing-agent-roster-and-workflow-portfolio.md
+ *   - https://github.com/openwop/openwop/blob/main/SECURITY/invariants.yaml (roster-attribution-no-content)
+ */
+import { describe, it, expect } from 'vitest';
+import { driver } from '../lib/driver.js';
+import { behaviorGate } from '../lib/behavior-gate.js';
+import { readRosterCap, listRoster, getRosterEntry, fireRosterPortfolio } from '../lib/agentRoster.js';
+import {
+  queryTestEvents,
+  isEventLogSeamAvailable,
+  resetTestSeam,
+  type TestEvent,
+} from '../lib/event-log-query.js';
+const ROSTER_ID_RE = /^host:[a-z0-9][a-z0-9._-]*$/;
+/** Lowest-sequence event matching one of `types`; undefined when none present. */
+function firstOf(events: TestEvent[], types: string[]): TestEvent | undefined {
+  return events
+    .filter((e) => types.includes(e.type))
+    .sort((a, b) => a.sequence - b.sequence)[0];
+}
+describe('agent-roster-attribution (RFC 0086 §B/§C)', () => {
+  it('serves the normative roster, attributes a portfolio fire content-free + ordered, and tenant-scopes', async () => {
+    const cap = await readRosterCap();
+    if (!behaviorGate('openwop-roster-attribution', cap?.supported === true)) return;
+    // RFC 0074 carry-forward: installScope MUST be host|tenant when present.
+    const installScope = typeof cap?.installScope === 'string' ? cap.installScope : 'host';
+    expect(
+      installScope === 'host' || installScope === 'tenant',
+      driver.describe('RFC 0086 §F / RFC 0074 §B', "agents.roster.installScope (when present) MUST be 'host' or 'tenant'"),
+    ).toBe(true);
+    // ---- Leg 1: normative read (black-box on any roster host) -------------
+    const body = await listRoster();
+    if (body === null) return; // host advertises roster but doesn't serve the read yet — soft-skip
+    const roster = body.roster ?? [];
+    expect(
+      Array.isArray(roster),
+      driver.describe('agent-roster.md §B', 'GET /v1/agents/roster MUST return a roster[] array'),
+    ).toBe(true);
+    expect(
+      body.total === roster.length,
+      driver.describe('agent-roster-response.schema.json', 'total MUST equal roster.length'),
+    ).toBe(true);
+    for (const entry of roster) {
+      expect(
+        typeof entry.rosterId === 'string' && ROSTER_ID_RE.test(entry.rosterId),
+        driver.describe('agent-roster-entry.schema.json', 'each entry MUST carry a host:<id> rosterId'),
+      ).toBe(true);
+      expect(
+        typeof entry.persona === 'string' && entry.persona.length > 0,
+        driver.describe('agent-roster.md §A', 'each entry MUST carry a non-empty persona'),
+      ).toBe(true);
+      expect(
+        typeof entry.agentRef?.agentId === 'string',
+        driver.describe('agent-roster.md §A', 'each entry MUST reference an agentRef.agentId'),
+      ).toBe(true);
+      expect(
+        typeof entry.owner?.tenantId === 'string',
+        driver.describe('agent-roster.md §B / RFC 0074', 'each entry MUST carry an owner.tenantId scope'),
+      ).toBe(true);
+      // RFC 0082 §A XOR: an agentRef MUST NOT pin both version and channel.
+      expect(
+        !(entry.agentRef?.version !== undefined && entry.agentRef?.channel !== undefined),
+        driver.describe('RFC 0082 §A', 'agentRef MUST NOT carry both version and channel'),
+      ).toBe(true);
+    }
+    // ---- Leg 2: attribution + ordering (seam-gated) ----------------------
+    if (await isEventLogSeamAvailable()) {
+      // Scheduled portfolio fire.
+      const fired = await fireRosterPortfolio({ triggerSource: 'schedule' });
+      if (fired?.runId) {
+        const q = await queryTestEvents(fired.runId);
+        if (q.ok) {
+          const init = firstOf(q.events, ['roster.run.initiated']);
+          expect(
+            init !== undefined,
+            driver.describe('agent-roster.md §C', 'a portfolio fire MUST emit roster.run.initiated'),
+          ).toBe(true);
+          if (init) {
+            // Ordering: roster.run.initiated precedes ANY agent invocation/event.
+            const firstAgent = firstOf(q.events, [
+              'agent.invocation.started',
+              'agent.reasoned',
+              'agent.decided',
+            ]);
+            if (firstAgent) {
+              expect(
+                init.sequence < firstAgent.sequence,
+                driver.describe('agent-roster.md §C', 'roster.run.initiated MUST precede any agent.* event in the run'),
+              ).toBe(true);
+            }
+            // Content-free: required ids present; NO work-item body/prompt/credential.
+            const p = init.payload;
+            for (const key of ['rosterId', 'persona', 'agentId', 'workflowId', 'triggerSource']) {
+              expect(
+                typeof p[key] === 'string' && (p[key] as string).length > 0,
+                driver.describe('run-event-payloads.schema.json#rosterRunInitiated', `roster.run.initiated MUST carry ${key}`),
+              ).toBe(true);
+            }
+            for (const forbidden of ['body', 'prompt', 'input', 'payload', 'apiKey', 'secret', 'credentials', 'token']) {
+              expect(
+                !(forbidden in p),
+                driver.describe('SECURITY roster-attribution-no-content', `roster.run.initiated MUST be content-free (no ${forbidden})`),
+              ).toBe(true);
+            }
+            expect(
+              typeof p.rosterId === 'string' && ROSTER_ID_RE.test(p.rosterId),
+              driver.describe('agent-roster.md §C', 'roster.run.initiated.rosterId MUST be a host:<id> AgentRef id'),
+            ).toBe(true);
+          }
+        }
+      }
+      // Durable work-item fire: carries the RFC 0083 triggerSubscriptionId + causation.
+      const work = await fireRosterPortfolio({ triggerSource: 'webhook', asWorkItem: true });
+      if (work?.runId) {
+        const q = await queryTestEvents(work.runId, { type: 'roster.run.initiated' });
+        if (q.ok && q.events[0]) {
+          const p = q.events[0].payload;
+          expect(
+            typeof p.triggerSubscriptionId === 'string' && (p.triggerSubscriptionId as string).length > 0,
+            driver.describe('agent-roster.md §D / RFC 0083', 'a durable work-item fire MUST carry triggerSubscriptionId for trigger→run→roster ancestry'),
+          ).toBe(true);
+        }
+      }
+      await resetTestSeam();
+    }
+    // ---- Leg 3: tenant scoping (RFC 0074) --------------------------------
+    const crossTenantId = process.env.OPENWOP_CROSS_TENANT_ROSTER_ID;
+    if (typeof crossTenantId === 'string' && crossTenantId.length > 0) {
+      const probe = await getRosterEntry(crossTenantId);
+      expect(
+        probe.status === 404,
+        driver.describe('agent-roster.md §B / RFC 0074', "GET /v1/agents/roster/{id} for a cross-tenant id MUST 404 (no cross-tenant disclosure)"),
+      ).toBe(true);
+    }
+  });
+});

package/src/scenarios/agent-roster-shape.test.ts ADDED Viewed

@@ -0,0 +1,146 @@
+/**
+ * Standing agent roster — entry + capability + attribution-event shapes (RFC 0086).
+ *
+ * Always-on, server-free schema-shape probe. Verifies that:
+ *   - `capabilities.agents.roster` is declared with its `supported` /
+ *     `installScope` / `portfolioTriggerSources` sub-flags.
+ *   - `agent-roster-entry.schema.json` compiles and round-trips a conforming
+ *     entry, and rejects malformed ones (a non-`host:` rosterId; an `agentRef`
+ *     carrying BOTH `version` and `channel` — the RFC 0082 §A XOR rule).
+ *   - the `roster.run.initiated` payload $def validates a conforming
+ *     content-free attribution record and requires its ids + persona.
+ *   - `roster.run.initiated` is CONTENT-FREE: a payload carrying a work-item
+ *     `body` or a `prompt` is rejected (`additionalProperties:false`). This is
+ *     the public test for the protocol-tier SECURITY invariant
+ *     `roster-attribution-no-content`.
+ *   - the `AgentInventoryEntry` carries the additive optional `roster`
+ *     portfolio projection (RFC 0086 §B).
+ *   - `roster.run.initiated` appears in the RunEventType enum.
+ *
+ * Behavioral assertions (a scheduled portfolio fire emitting roster.run.initiated
+ * before agent.invocation.started; the work-item causation chain; the replay
+ * re-read; cross-tenant 404) are gated on `capabilities.agents.roster.supported`
+ * and land at Active → Accepted (reference-host roster store deferred per RFC 0086
+ * §Conformance). This scenario asserts the wire contract, not host behavior.
+ *
+ * Spec references:
+ *   - https://github.com/openwop/openwop/blob/main/spec/v1/agent-roster.md
+ *   - https://github.com/openwop/openwop/blob/main/RFCS/0086-standing-agent-roster-and-workflow-portfolio.md
+ *   - https://github.com/openwop/openwop/blob/main/SECURITY/invariants.yaml (roster-attribution-no-content)
+ */
+import { describe, it, expect } from 'vitest';
+import { readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import Ajv2020 from 'ajv/dist/2020.js';
+import addFormats from 'ajv-formats';
+import { SCHEMAS_DIR } from '../lib/paths.js';
+/** Server-free assertion-message helper. */
+const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
+function loadSchema(name: string): Record<string, unknown> {
+  return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
+}
+describe('agent-roster-shape: capability advertisement (RFC 0086, server-free)', () => {
+  it('the capabilities schema declares agents.roster with its sub-flags', () => {
+    const caps = loadSchema('capabilities.schema.json');
+    const agents = (caps.properties as Record<string, { properties?: Record<string, { properties?: Record<string, unknown> }> }>).agents;
+    const roster = agents?.properties?.roster;
+    expect(roster, why('capabilities.md §agents', 'agents.roster MUST be declared')).toBeDefined();
+    for (const flag of ['supported', 'installScope', 'portfolioTriggerSources']) {
+      expect(roster?.properties?.[flag], why('agent-roster.md §F', `agents.roster.${flag} MUST be declared`)).toBeDefined();
+    }
+  });
+});
+describe('agent-roster-shape: roster entry (RFC 0086 §A, server-free)', () => {
+  const ajv = new Ajv2020({ strict: false, allErrors: true });
+  addFormats(ajv);
+  const entry = ajv.compile(loadSchema('agent-roster-entry.schema.json'));
+  it('AgentRosterEntry validates a conforming entry', () => {
+    const good = {
+      rosterId: 'host:sally-marketing',
+      persona: 'Sally',
+      agentRef: { agentId: 'core.openwop.agents.brief-writer', channel: 'stable' },
+      workflows: ['marketing-email-campaign'],
+      owner: { tenantId: 'acme', workspaceId: 'growth' },
+      enabled: true,
+    };
+    expect(entry(good), why('RFC 0086 §A', 'a conforming roster entry MUST validate')).toBe(true);
+  });
+  it('rejects a non-host: rosterId and an agentRef carrying both version and channel', () => {
+    const base = {
+      rosterId: 'host:sally-marketing',
+      persona: 'Sally',
+      agentRef: { agentId: 'core.openwop.agents.brief-writer' },
+      owner: { tenantId: 'acme' },
+    };
+    expect(entry({ ...base, rosterId: 'core.openwop.agents.sally' }), why('RFC 0086 §A', 'a non-`host:` rosterId MUST be rejected')).toBe(false);
+    expect(
+      entry({ ...base, agentRef: { agentId: 'core.x.y.z', version: '1.0.0', channel: 'stable' } }),
+      why('RFC 0082 §A', 'an agentRef with BOTH version and channel MUST be rejected'),
+    ).toBe(false);
+    expect(entry({ persona: 'x', agentRef: { agentId: 'core.x.y.z' }, owner: { tenantId: 'acme' } }), why('RFC 0086 §A', 'a roster entry without rosterId MUST be rejected')).toBe(false);
+  });
+});
+describe('agent-roster-shape: roster.run.initiated event (RFC 0086 §C, server-free)', () => {
+  const payloads = loadSchema('run-event-payloads.schema.json');
+  const ajv = new Ajv2020({ strict: false, allErrors: true });
+  addFormats(ajv);
+  ajv.addSchema(payloads, 'payloads');
+  const initiated = ajv.getSchema('payloads#/$defs/rosterRunInitiated');
+  it('roster.run.initiated validates a content-free attribution record and requires its ids', () => {
+    expect(initiated, 'the rosterRunInitiated $def MUST exist').toBeTruthy();
+    expect(
+      initiated!({ rosterId: 'host:sally-marketing', persona: 'Sally', agentId: 'core.openwop.agents.brief-writer', workflowId: 'marketing-email-campaign', triggerSource: 'schedule' }),
+      why('RFC 0086 §C', 'a conforming roster.run.initiated payload MUST validate'),
+    ).toBe(true);
+    expect(initiated!({ rosterId: 'host:s', persona: 'S' }), why('RFC 0086 §C', 'roster.run.initiated without agentId/workflowId/triggerSource MUST be rejected')).toBe(false);
+  });
+  it('roster.run.initiated is content-free — a work-item body and a prompt are rejected (roster-attribution-no-content)', () => {
+    const base = { rosterId: 'host:s', persona: 'S', agentId: 'a.b.c.d', workflowId: 'wf', triggerSource: 'queue' };
+    expect(
+      initiated!({ ...base, body: 'the card description' }),
+      why('SECURITY invariant roster-attribution-no-content', 'roster.run.initiated MUST NOT carry the work-item body'),
+    ).toBe(false);
+    expect(
+      initiated!({ ...base, prompt: 'system: …' }),
+      why('SECURITY invariant roster-attribution-no-content', 'roster.run.initiated MUST NOT carry prompt content'),
+    ).toBe(false);
+  });
+});
+describe('agent-roster-shape: inventory projection + enum (RFC 0086 §B, server-free)', () => {
+  it('AgentInventoryEntry carries the additive optional roster portfolio projection', () => {
+    const inv = loadSchema('agent-inventory-response.schema.json');
+    const entry = (inv.$defs as Record<string, { properties?: Record<string, unknown> }>).AgentInventoryEntry?.properties ?? {};
+    expect(entry.roster, why('RFC 0086 §B', 'AgentInventoryEntry.roster (the portfolio projection) MUST be declared')).toBeDefined();
+  });
+  it('roster.run.initiated appears in the RunEventType enum', () => {
+    const runEvent = loadSchema('run-event.schema.json');
+    const enumVals = (runEvent.$defs as Record<string, { enum?: string[] }>).RunEventType?.enum ?? [];
+    expect(enumVals).toContain('roster.run.initiated');
+  });
+  it('the GET /v1/agents/roster response schema validates + rejects extras (RFC 0086 §B)', () => {
+    const ajv = new Ajv2020({ strict: false, allErrors: true });
+    addFormats(ajv);
+    ajv.addSchema(loadSchema('agent-roster-entry.schema.json'), 'https://openwop.dev/spec/v1/agent-roster-entry.schema.json');
+    const resp = ajv.compile(loadSchema('agent-roster-response.schema.json'));
+    const good = {
+      roster: [{ rosterId: 'host:sally', persona: 'Sally', agentRef: { agentId: 'core.x.y.z' }, owner: { tenantId: 'acme' } }],
+      total: 1,
+    };
+    expect(resp(good), why('RFC 0086 §B', 'a conforming GET /v1/agents/roster response MUST validate')).toBe(true);
+    expect(resp({ ...good, unexpected: true }), why('RFC 0086 §B', 'an extra top-level property MUST be rejected')).toBe(false);
+    expect(resp({ roster: [] }), why('RFC 0086 §B', 'the response MUST require `total`')).toBe(false);
+  });
+});

package/src/scenarios/budget-policy-shape.test.ts ADDED Viewed

@@ -0,0 +1,136 @@
+/**
+ * Budget, quota, and cost policy — policy + events + cap.breached kinds (RFC 0084).
+ *
+ * Always-on, server-free schema-shape probe. Verifies that:
+ *   - `budget-policy.schema.json` round-trips a conforming `BudgetPolicy` and
+ *     rejects the malformed (the §A orthogonality guard — a wall-time field is
+ *     rejected by `additionalProperties:false`, because wall-time is RFC 0058's
+ *     `runTimeoutMs`; a `thresholdPercent` out of 0..100; an out-of-enum
+ *     `onExhaustion`).
+ *   - the four `budget.{reserved,consumed,threshold.crossed,exhausted}` payload
+ *     $defs validate conforming content-free records and reject malformed ones.
+ *   - the four new `cap.breached.kind` values (`budget-tokens`/`budget-cost`/
+ *     `budget-tool-calls`/`budget-retries`) are present in the enum.
+ *   - the four `budget.*` event names appear in the RunEventType enum.
+ *   - the `budget.*` payloads are CONTENT-FREE OF PRICING: none declares a
+ *     rate-card / per-token-price / model-prose property (the public test for the
+ *     protocol-tier SECURITY invariant `budget-no-pricing-leak`).
+ *   - `capabilities.budget` + `limits.maxBudget{Tokens,CostUsd}` are declared.
+ *
+ * Behavioral assertions (accrue → threshold → exhaust → `cap.breached{budget-cost}`
+ * → `run.failed{budget_exhausted}`; `budget_model_denied`; the advisory no-stop
+ * path) are gated on `capabilities.budget.supported` + `enforce` and land in
+ * `budget-enforcement.test.ts` (deferred per RFC 0084 §Conformance — reference host
+ * deferred). This scenario asserts the wire contract, not host behavior.
+ *
+ * Spec references:
+ *   - https://github.com/openwop/openwop/blob/main/spec/v1/budget-policy.md
+ *   - https://github.com/openwop/openwop/blob/main/RFCS/0084-budget-quota-and-cost-policy.md
+ *   - https://github.com/openwop/openwop/blob/main/SECURITY/invariants.yaml (budget-no-pricing-leak)
+ */
+import { describe, it, expect } from 'vitest';
+import { readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import Ajv2020 from 'ajv/dist/2020.js';
+import addFormats from 'ajv-formats';
+import { SCHEMAS_DIR } from '../lib/paths.js';
+const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
+function loadSchema(name: string): Record<string, unknown> {
+  return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
+}
+/** Property names that would betray pricing / credential prose leaking onto a budget event. */
+const PRICING_PROP_NAMES = ['ratecard', 'pricepertoken', 'unitprice', 'pricing', 'rate', 'credential', 'apikey', 'model'];
+describe('budget-policy-shape: BudgetPolicy (RFC 0084 §A, server-free)', () => {
+  const ajv = addFormats(new Ajv2020({ strict: false }));
+  const validate = ajv.compile(loadSchema('budget-policy.schema.json'));
+  it('a conforming budget policy validates', () => {
+    expect(
+      validate({ maxTokens: 200000, maxCostUsd: 1.0, maxToolCalls: 50, maxRetries: 10, modelAllow: ['claude-*'], modelDeny: ['gpt-4-32k'], thresholdPercent: 80, onExhaustion: 'fail' }),
+      why('budget-policy.md §A', 'a conforming BudgetPolicy MUST validate'),
+    ).toBe(true);
+  });
+  it('the orthogonality guard: a wall-time field is rejected (it is RFC 0058 runTimeoutMs)', () => {
+    expect(validate({ maxCostUsd: 1.0, maxWallTimeMs: 60000 }), why('budget-policy.md §A/§E', 'wall-time is NOT a budget dimension')).toBe(false);
+  });
+  it('rejects an out-of-range thresholdPercent and an out-of-enum onExhaustion', () => {
+    expect(validate({ thresholdPercent: 120 }), why('budget-policy.md §A', 'thresholdPercent MUST be 0..100')).toBe(false);
+    expect(validate({ onExhaustion: 'explode' }), why('budget-policy.md §A', 'onExhaustion is a closed enum')).toBe(false);
+  });
+});
+describe('budget-policy-shape: budget.* events + cap.breached kinds (RFC 0084 §C/§D, server-free)', () => {
+  const payloads = loadSchema('run-event-payloads.schema.json');
+  const ajv = addFormats(new Ajv2020({ strict: false }));
+  const compile = (defName: string) => ajv.compile({
+    $schema: 'https://json-schema.org/draft/2020-12/schema',
+    $defs: (payloads as { $defs: Record<string, unknown> }).$defs,
+    $ref: `#/$defs/${defName}`,
+  } as Record<string, unknown>);
+  it('the four budget.* payloads validate conforming content-free records', () => {
+    expect(compile('budgetReserved')({ effectiveBudget: { maxCostUsd: 1.0 }, scope: 'run' }), why('budget-policy.md §C', 'budget.reserved MUST validate')).toBe(true);
+    expect(compile('budgetConsumed')({ dimension: 'cost', consumed: 0.7, limit: 1.0, remaining: 0.3 }), why('budget-policy.md §C', 'budget.consumed MUST validate')).toBe(true);
+    expect(compile('budgetThresholdCrossed')({ dimension: 'cost', consumed: 0.8, limit: 1.0, percent: 80 }), why('budget-policy.md §C', 'budget.threshold.crossed MUST validate')).toBe(true);
+    expect(compile('budgetExhausted')({ dimension: 'cost', consumed: 1.02, limit: 1.0 }), why('budget-policy.md §C', 'budget.exhausted MUST validate')).toBe(true);
+  });
+  it('rejects an out-of-enum dimension and a missing required field', () => {
+    expect(compile('budgetConsumed')({ dimension: 'vibes', consumed: 1, limit: 2 }), why('budget-policy.md §C', 'dimension is a closed enum')).toBe(false);
+    expect(compile('budgetExhausted')({ dimension: 'cost', consumed: 1.0 }), why('budget-policy.md §C', 'limit is REQUIRED')).toBe(false);
+  });
+  it('the cap.breached kind enum carries the four budget-* values', () => {
+    const kinds = ((payloads.$defs as Record<string, { properties?: Record<string, { enum?: string[] }> }>).capBreached.properties?.kind?.enum) ?? [];
+    for (const k of ['budget-tokens', 'budget-cost', 'budget-tool-calls', 'budget-retries']) {
+      expect(kinds.includes(k), why('budget-policy.md §D', `cap.breached.kind MUST include ${k}`)).toBe(true);
+    }
+  });
+  it('all four budget.* event names appear in the RunEventType enum', () => {
+    const runEvent = loadSchema('run-event.schema.json');
+    const enumVals = ((runEvent.$defs as Record<string, { enum?: string[] }>).RunEventType?.enum) ?? [];
+    for (const name of ['budget.reserved', 'budget.consumed', 'budget.threshold.crossed', 'budget.exhausted']) {
+      expect(enumVals.includes(name), why('run-event.schema.json', `${name} MUST be in the RunEventType enum`)).toBe(true);
+    }
+  });
+  it('the budget.* payloads declare no pricing/credential property (budget-no-pricing-leak)', () => {
+    const defs = payloads.$defs as Record<string, { properties?: Record<string, unknown> }>;
+    for (const def of ['budgetReserved', 'budgetConsumed', 'budgetThresholdCrossed', 'budgetExhausted']) {
+      for (const p of Object.keys(defs[def].properties ?? {})) {
+        expect(PRICING_PROP_NAMES.includes(p.toLowerCase()), why('budget-no-pricing-leak', `${def} MUST NOT declare a pricing-bearing property (${p})`)).toBe(false);
+      }
+    }
+  });
+  it('the budget.* payloads are additionalProperties:false — a rate-card field on an INSTANCE is rejected', () => {
+    // The aggregate cost total (the user's own budget) is permitted; the host's per-unit rate card is not.
+    // additionalProperties:false makes the rejection structural, not just a declared-property check.
+    expect(compile('budgetConsumed')({ dimension: 'cost', consumed: 0.8, limit: 1.0 }), why('budget-policy.md §F', 'an aggregate cost total (the user budget) MUST validate')).toBe(true);
+    expect(
+      compile('budgetConsumed')({ dimension: 'cost', consumed: 0.8, limit: 1.0, ratePerToken: 0.000003 }),
+      why('budget-no-pricing-leak', 'a rate-card / per-token-price field MUST be rejected (additionalProperties:false)'),
+    ).toBe(false);
+  });
+});
+describe('budget-policy-shape: capability advertisement (RFC 0084 §E, server-free)', () => {
+  it('capabilities.budget + limits.maxBudget{Tokens,CostUsd} are declared', () => {
+    const caps = loadSchema('capabilities.schema.json');
+    const props = caps.properties as Record<string, { properties?: Record<string, unknown> }>;
+    for (const flag of ['supported', 'dimensions', 'enforce', 'scopes']) {
+      expect(props.budget?.properties?.[flag], why('budget-policy.md §E', `capabilities.budget.${flag} MUST be declared`)).toBeDefined();
+    }
+    for (const ceiling of ['maxBudgetTokens', 'maxBudgetCostUsd']) {
+      expect(props.limits?.properties?.[ceiling], why('budget-policy.md §E', `limits.${ceiling} MUST be declared`)).toBeDefined();
+    }
+  });
+});