npm - @openwop/openwop-conformance - Versions diffs - 1.10.0 → 1.11.0 - Mend

@openwop/openwop-conformance 1.10.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/CHANGELOG.md +34 -0
package/README.md +2 -2
package/api/asyncapi.yaml +70 -0
package/api/openapi.yaml +268 -1
package/coverage.md +30 -2
package/fixtures/oauth-providers/synthetic.json +38 -0
package/fixtures.md +10 -0
package/package.json +1 -1
package/schemas/README.md +12 -0
package/schemas/agent-deployment-transition.schema.json +49 -0
package/schemas/agent-deployment.schema.json +54 -0
package/schemas/agent-eval-suite.schema.json +140 -0
package/schemas/agent-inventory-response.schema.json +25 -0
package/schemas/agent-manifest.schema.json +5 -0
package/schemas/agent-org-chart.schema.json +82 -0
package/schemas/agent-ref.schema.json +12 -2
package/schemas/agent-roster-entry.schema.json +81 -0
package/schemas/agent-roster-response.schema.json +21 -0
package/schemas/budget-policy.schema.json +18 -0
package/schemas/capabilities.schema.json +277 -0
package/schemas/credential-provenance.schema.json +18 -0
package/schemas/eval-summary.schema.json +92 -0
package/schemas/node-pack-manifest.schema.json +17 -0
package/schemas/org-chart-responsibility-view.schema.json +26 -0
package/schemas/run-event-payloads.schema.json +286 -3
package/schemas/run-event.schema.json +19 -0
package/schemas/tool-descriptor.schema.json +63 -0
package/schemas/trigger-subscription.schema.json +26 -0
package/src/lib/agentRoster.ts +76 -0
package/src/lib/liveRuntime.ts +59 -0
package/src/lib/profiles.ts +157 -0
package/src/lib/runtimeRequires.ts +38 -0
package/src/lib/safeFetch.ts +87 -0
package/src/scenarios/agent-deployment-shape.test.ts +139 -0
package/src/scenarios/agent-eval-suite-shape.test.ts +167 -0
package/src/scenarios/agent-live-allowlist-enforced.test.ts +53 -0
package/src/scenarios/agent-live-invocation-bracket.test.ts +98 -0
package/src/scenarios/agent-live-runtime-shape.test.ts +98 -0
package/src/scenarios/agent-live-structured-output.test.ts +58 -0
package/src/scenarios/agent-org-chart-shape.test.ts +127 -0
package/src/scenarios/agent-platform-profile.test.ts +158 -0
package/src/scenarios/agent-roster-attribution.test.ts +179 -0
package/src/scenarios/agent-roster-shape.test.ts +146 -0
package/src/scenarios/budget-policy-shape.test.ts +136 -0
package/src/scenarios/egress-provenance-shape.test.ts +137 -0
package/src/scenarios/memory-capability-model-shape.test.ts +186 -0
package/src/scenarios/oauth-authorization-code-roundtrip.test.ts +145 -0
package/src/scenarios/runtime-requires-install-gate.test.ts +92 -0
package/src/scenarios/runtime-requires-shape.test.ts +134 -0
package/src/scenarios/safefetch-behavior.test.ts +99 -0
package/src/scenarios/safefetch-live-audit.test.ts +175 -0
package/src/scenarios/spec-corpus-validity.test.ts +19 -3
package/src/scenarios/tool-descriptor-shape.test.ts +133 -0
package/src/scenarios/trigger-bridge-shape.test.ts +135 -0
package/src/scenarios/x-openwop-form-pack-manifest.test.ts +155 -0

package/schemas/trigger-subscription.schema.json ADDED Viewed

@@ -0,0 +1,26 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://openwop.dev/spec/v1/trigger-subscription.schema.json",
+  "title": "TriggerSubscription",
+  "description": "RFC 0083 §B. A durable inbound-trigger subscription record (a webhook registration, a schedule, a queue consumer) with a standardized four-state machine layered over the existing per-source registration. Composes RFC 0052/0053/0017 + webhooks.md + RFC 0040 causation; the channel wire format stays a vendor extension (§E). Content-free of inbound payloads/credentials (SR-1).",
+  "type": "object",
+  "additionalProperties": false,
+  "required": ["subscriptionId", "source", "state"],
+  "properties": {
+    "subscriptionId": { "type": "string", "minLength": 1, "description": "Stable host-unique id for the subscription. Correlates the §C delivery events + the management surface." },
+    "source": { "type": "string", "enum": ["webhook", "schedule", "queue", "email", "form"], "description": "Which trigger source backs the subscription. Channels beyond these (Slack/Discord/SMS) bridge as a vendor extension by registering a subscription of the closest source kind (§E)." },
+    "state": { "type": "string", "enum": ["active", "paused", "failed", "dead-lettered"], "description": "The §B state. `active`: accepting + delivering; `paused`: retained, not delivering (operator-held); `failed`: delivery failing past policy (the webhooks.md circuit-breaker generalized); `dead-lettered`: terminal, deliveries routed to the RFC 0053 sink." },
+    "dedupEnabled": { "type": "boolean", "description": "When true, the host de-duplicates inbound events by `dedupKey` within the retention window (§C-1; the idempotency.md Layer-1 model applied to inbound triggers)." },
+    "retryPolicy": {
+      "type": "object",
+      "additionalProperties": false,
+      "description": "Delivery retry policy (§C-2). On exhaustion the subscription/delivery transitions to `dead-lettered` (RFC 0053).",
+      "properties": {
+        "maxAttempts": { "type": "integer", "minimum": 1, "description": "Maximum delivery attempts before dead-lettering." },
+        "backoff": { "type": "string", "enum": ["none", "fixed", "exponential"], "description": "Backoff strategy between attempts." }
+      }
+    },
+    "webhookId": { "type": "string", "minLength": 1, "description": "MAY — for `source: \"webhook\"`, the existing webhooks.md register key (unchanged; the state machine layers over it)." },
+    "secretFingerprint": { "type": "string", "minLength": 1, "maxLength": 32, "description": "MAY — for `source: \"webhook\"`, an identifier for the signing secret (the `(webhookId, secretFingerprint)` register key). It MUST be a **salted or host-keyed, TRUNCATED** one-way digest (e.g. the first 8–16 hex of `HMAC(hostKey, secret)`) — NOT the raw secret (SR-1) and NOT a full unsalted `SHA256(secret)` (a full unsalted hash of a low-entropy secret is an offline brute-force / confirmation oracle). The `maxLength: 32` ceiling structurally rejects a full 64-hex digest." }
+  }
+}

package/src/lib/agentRoster.ts ADDED Viewed

@@ -0,0 +1,76 @@
+/**
+ * Shared helpers for the RFC 0086 `agents.roster` conformance scenarios.
+ * Lives in lib/ (not a `*.test.ts`) so scenarios import it via
+ * `../lib/agentRoster.js`.
+ *
+ * Two surfaces:
+ *   - the NORMATIVE read (`GET /v1/agents/roster[/{rosterId}]`, RFC 0086 §B),
+ *     exercised black-box against any conformant host; and
+ *   - the host-sample fire seam (`POST /v1/host/sample/roster/fire`), used to
+ *     drive a portfolio trigger so the `roster.run.initiated` attribution +
+ *     ordering can be asserted against the test event-log seam. The fire seam
+ *     is OPTIONAL — scenarios soft-skip on 404/405 (the reference roster store
+ *     is deferred per RFC 0086 §Conformance).
+ *
+ * @see RFCS/0086-standing-agent-roster-and-workflow-portfolio.md
+ * @see spec/v1/agent-roster.md
+ */
+import { driver } from './driver.js';
+import { readCapabilityFamily } from './discovery-capabilities.js';
+/** Reads `agents.roster` from discovery (root-first per RFC 0073); null when
+ *  unadvertised. */
+export async function readRosterCap(): Promise<Record<string, unknown> | null> {
+  const agents = await readCapabilityFamily<{ roster?: unknown }>('agents');
+  const r = agents?.roster;
+  return r && typeof r === 'object' ? (r as Record<string, unknown>) : null;
+}
+export interface RosterEntry {
+  rosterId?: string;
+  persona?: string;
+  agentRef?: { agentId?: string; version?: string; channel?: string };
+  workflows?: string[];
+  owner?: { tenantId?: string; workspaceId?: string };
+  [k: string]: unknown;
+}
+export interface RosterResponse {
+  roster?: RosterEntry[];
+  total?: number;
+}
+/** GET the NORMATIVE standing roster (RFC 0086 §B `GET /v1/agents/roster`);
+ *  null when the host doesn't serve it (404/405/501). */
+export async function listRoster(): Promise<RosterResponse | null> {
+  const res = await driver.get('/v1/agents/roster');
+  if (res.status === 404 || res.status === 405 || res.status === 501) return null;
+  return (res.json as RosterResponse | undefined) ?? {};
+}
+/** GET a single roster entry by id. Returns `{ status, entry }` so a caller can
+ *  distinguish a 404 (cross-tenant / unknown) from a served entry. */
+export async function getRosterEntry(
+  rosterId: string,
+): Promise<{ status: number; entry: RosterEntry | undefined }> {
+  const res = await driver.get(`/v1/agents/roster/${encodeURIComponent(rosterId)}`);
+  return { status: res.status, entry: res.json as RosterEntry | undefined };
+}
+export interface RosterFireResult {
+  runId?: string;
+  rosterId?: string;
+  triggerSubscriptionId?: string;
+}
+/** Drive a portfolio trigger for a roster member via the host-sample fire seam.
+ *  `asWorkItem:true` requests the RFC 0083 durable-work-item path (carries a
+ *  `triggerSubscriptionId` + run `causationId`). Returns null when the seam is
+ *  unwired (404/405). */
+export async function fireRosterPortfolio(
+  body: { rosterId?: string; triggerSource?: string; asWorkItem?: boolean } = {},
+): Promise<RosterFireResult | null> {
+  const res = await driver.post('/v1/host/sample/roster/fire', body);
+  if (res.status === 404 || res.status === 405) return null;
+  return (res.json as RosterFireResult | undefined) ?? {};
+}

package/src/lib/liveRuntime.ts ADDED Viewed

@@ -0,0 +1,59 @@
+/**
+ * Shared helpers for the RFC 0077 `agents.liveRuntime` conformance scenarios.
+ * Lives in lib/ (not a `*.test.ts`) so scenarios import it via
+ * `../lib/liveRuntime.js`.
+ *
+ * RFC 0077 adds NO new endpoint — a live manifest invocation rides the existing
+ * run surface (agent as root of `POST /v1/runs`, a `WorkflowNode.agent` step, or
+ * a chat `@mention`) and brackets the existing `agent.*` family with
+ * `agent.invocation.started` / `agent.invocation.completed`. To drive one
+ * deterministically in conformance, the host exposes the OPTIONAL sample seam
+ * `POST /v1/host/sample/agents/live-invoke` returning `{ runId, invocationId }`;
+ * the bracketed events are read back via the test event-log seam. The seam is
+ * deferred per RFC 0077 §Conformance, so scenarios soft-skip on 404/405.
+ *
+ * @see RFCS/0077-agent-run-lifecycle-and-live-manifest-dispatch.md
+ * @see spec/v1/multi-agent-execution.md §"Live manifest dispatch"
+ */
+import { driver } from './driver.js';
+import { readCapabilityFamily } from './discovery-capabilities.js';
+/** Reads `agents.liveRuntime` from discovery (root-first per RFC 0073); null
+ *  when unadvertised. */
+export async function readLiveRuntimeCap(): Promise<Record<string, unknown> | null> {
+  const agents = await readCapabilityFamily<{ liveRuntime?: unknown }>('agents');
+  const lr = agents?.liveRuntime;
+  return lr && typeof lr === 'object' ? (lr as Record<string, unknown>) : null;
+}
+export interface LiveInvokeResult {
+  runId?: string;
+  invocationId?: string;
+  outcome?: string;
+}
+/**
+ * Drive one live manifest invocation via the host-sample seam. Body fields:
+ *   - `agentId` (optional): the manifest agent to invoke; host picks a default
+ *     when omitted.
+ *   - `source` (optional): `workflow-node` | `run-api` | `chat-mention`.
+ *   - `returnSchemaRef` (optional) + `forceInvalidResult` (optional): exercise
+ *     the §B step-6 structured-output enforcement — force a result that violates
+ *     the handoff schema so a `structuredOutput` host fails the run.
+ *   - `attemptTool` (optional): the id of a tool OUTSIDE the agent's
+ *     `toolAllowlist` the invocation should attempt (the §F-1 allowlist floor).
+ * Returns null when the seam is unwired (404/405).
+ */
+export async function invokeLive(
+  body: {
+    agentId?: string;
+    source?: string;
+    returnSchemaRef?: string;
+    forceInvalidResult?: boolean;
+    attemptTool?: string;
+  } = {},
+): Promise<LiveInvokeResult | null> {
+  const res = await driver.post('/v1/host/sample/agents/live-invoke', body);
+  if (res.status === 404 || res.status === 405) return null;
+  return (res.json as LiveInvokeResult | undefined) ?? {};
+}

package/src/lib/profiles.ts CHANGED Viewed

@@ -30,6 +30,8 @@ export const PROFILE_NAMES = [
   'openwop-node-packs',
   'openwop-replay-fork',
   'openwop-fixtures',
+  'openwop-memory',
+  'openwop-trigger-bridge',
 ] as const;
 export type ProfileName = (typeof PROFILE_NAMES)[number];
@@ -211,6 +213,155 @@ export function isFixtures(c: DiscoveryPayload): boolean {
   return c.fixtures.every((id) => typeof id === 'string' && id.length > 0);
 }
+/**
+ * `openwop-memory` predicate (RFC 0080). Host implements the reconciled
+ * memory-capability model at the core tier: a read/write `MemoryAdapter`
+ * (`memory.supported: true` and `memory.writable !== false`) plus a cross-run
+ * durable store (`agents.memoryBackends` includes `'long-term'`). Capability
+ * families are document-root properties of the discovery payload (RFC 0073),
+ * so this reads `c.memory` / `c.agents`, matching `isReplayFork`.
+ *
+ * @see spec/v1/profiles.md §`openwop-memory`
+ * @see spec/v1/agent-memory.md §"Memory capability model"
+ */
+export function isMemory(c: DiscoveryPayload): boolean {
+  if (!isCore(c)) return false;
+  const memory = c.memory as { supported?: unknown; writable?: unknown } | undefined;
+  if (memory == null || typeof memory !== 'object') return false;
+  if (memory.supported !== true) return false;
+  if (memory.writable === false) return false;
+  const agents = c.agents as { memoryBackends?: unknown } | undefined;
+  if (agents == null || !isStringArray(agents.memoryBackends)) return false;
+  return agents.memoryBackends.includes('long-term');
+}
+/**
+ * `openwop-trigger-bridge` predicate (RFC 0083). Host composes the durable
+ * inbound-work contract: advertises the `triggerBridge`, has a `deadLetter`
+ * sink for exhausted deliveries, and has at least one durable inbound source
+ * (queue bus, durable webhooks, or scheduling). Capability families are
+ * document-root properties (RFC 0073), so this reads `c.triggerBridge` /
+ * `c.deadLetter` / `c.queueBus` / `c.webhooks` / `c.scheduling`.
+ *
+ * @see spec/v1/profiles.md §`openwop-trigger-bridge`
+ * @see spec/v1/trigger-bridge.md
+ */
+export function isTriggerBridge(c: DiscoveryPayload): boolean {
+  if (!isCore(c)) return false;
+  const supported = (v: unknown): boolean =>
+    v != null && typeof v === 'object' && (v as { supported?: unknown }).supported === true;
+  if (!supported(c.triggerBridge)) return false;
+  if (!supported(c.deadLetter)) return false;
+  const webhooks = c.webhooks as { durable?: unknown } | undefined;
+  const durableSource =
+    supported(c.queueBus) ||
+    supported(c.scheduling) ||
+    (webhooks != null && typeof webhooks === 'object' && webhooks.durable === true);
+  return durableSource;
+}
+// ─────────────────────────────────────────────────────────────────────────────
+// Operational annex: openwop-agent-platform (RFC 0085).
+//
+// NOT part of the closed `profiles.md` predicate catalog (PROFILE_NAMES /
+// deriveProfiles above) — it is an operational ANNEX (the production-profile.md /
+// auth-profiles.md pattern) combining a discovery predicate with required runtime
+// conformance evidence + documentation + a badge. These helpers compute only the
+// discovery-PREDICATE part; the live aggregate-evidence assertion (does every
+// constituent scenario actually pass?) lives in agent-platform-profile.test.ts.
+//
+// @see spec/v1/agent-platform-profile.md
+// ─────────────────────────────────────────────────────────────────────────────
+/** Narrow helper: a capability sub-block with `supported === true`. */
+function blockSupported(v: unknown): boolean {
+  return v != null && typeof v === 'object' && (v as { supported?: unknown }).supported === true;
+}
+/** The `openwop-agent-platform` FLOOR (`partial`) discovery predicate — RFC 0085 §B. */
+export function isAgentPlatformPartial(c: DiscoveryPayload): boolean {
+  if (!isCore(c)) return false;
+  const agents = c.agents as { manifestRuntime?: unknown; liveRuntime?: unknown } | undefined;
+  const httpClient = c.httpClient as { safeFetch?: unknown; egressPolicy?: unknown } | undefined;
+  const replay = c.replay as { supported?: unknown } | undefined;
+  const nondet = c.nondeterminismPolicy as { declared?: unknown } | undefined;
+  return (
+    blockSupported(agents?.manifestRuntime) &&
+    blockSupported(agents?.liveRuntime) &&
+    blockSupported(c.toolCatalog) &&
+    blockSupported(c.toolHooks) &&
+    blockSupported(httpClient?.safeFetch) &&
+    blockSupported(c.providerUsage) &&
+    blockSupported(c.prompts) &&
+    blockSupported(c.memory) &&
+    blockSupported(c.feedback) &&
+    (replay?.supported === true || nondet?.declared === true)
+  );
+}
+/** The `openwop-agent-platform` `full` discovery predicate (floor + governance tier) — RFC 0085 §B. */
+export function isAgentPlatformFull(c: DiscoveryPayload): boolean {
+  if (!isAgentPlatformPartial(c)) return false;
+  const agents = c.agents as { manifestRuntime?: { installScope?: unknown } } | undefined;
+  const memory = c.memory as { attribution?: unknown } | undefined;
+  // Debug bundle is advertised at `capabilities.debugBundle.supported` (debug-bundle.md /
+  // RFC 0009), NOT under `production.*` — the production block only adds stricter truncation MUSTs.
+  const httpClient = c.httpClient as { egressPolicy?: unknown } | undefined;
+  return (
+    blockSupported(c.authorization) &&
+    agents?.manifestRuntime?.installScope === 'tenant' &&
+    blockSupported(memory?.attribution) &&
+    blockSupported(c.debugBundle) &&
+    blockSupported(c.triggerBridge) &&
+    blockSupported(httpClient?.egressPolicy)
+  );
+}
+/** The host-reported annex status: `full` ⊃ `partial` ⊃ `none` (discovery-predicate only). */
+export function agentPlatformStatus(c: DiscoveryPayload): 'none' | 'partial' | 'full' {
+  if (isAgentPlatformFull(c)) return 'full';
+  if (isAgentPlatformPartial(c)) return 'partial';
+  return 'none';
+}
+/**
+ * The per-term satisfaction breakdown (RFC 0085 §D) — the richer interop signal
+ * alongside the flat `none`/`partial`/`full` ladder. Adoption is NON-CONTIGUOUS:
+ * a real host built feature-by-feature can satisfy `full`-tier terms (RBAC,
+ * memory-attribution, tenant-scoping) while still failing `floor` terms, so the
+ * flat status would understate it (reads identical to a do-nothing host). This
+ * returns exactly the term ids a host satisfies, so a `none` host honoring 6/16
+ * terms is distinguishable from one honoring 0/16.
+ */
+export function agentPlatformSatisfiedTerms(c: DiscoveryPayload): readonly string[] {
+  const agents = c.agents as { manifestRuntime?: { installScope?: unknown }; liveRuntime?: unknown } | undefined;
+  const httpClient = c.httpClient as { safeFetch?: unknown; egressPolicy?: unknown } | undefined;
+  const memory = c.memory as { attribution?: unknown } | undefined;
+  const replay = c.replay as { supported?: unknown } | undefined;
+  const nondet = c.nondeterminismPolicy as { declared?: unknown } | undefined;
+  const checks: ReadonlyArray<readonly [string, boolean]> = [
+    // floor
+    ['floor:agents.manifestRuntime', blockSupported(agents?.manifestRuntime)],
+    ['floor:agents.liveRuntime', blockSupported(agents?.liveRuntime)],
+    ['floor:toolCatalog', blockSupported(c.toolCatalog)],
+    ['floor:toolHooks', blockSupported(c.toolHooks)],
+    ['floor:httpClient.safeFetch', blockSupported(httpClient?.safeFetch)],
+    ['floor:providerUsage', blockSupported(c.providerUsage)],
+    ['floor:prompts', blockSupported(c.prompts)],
+    ['floor:memory', blockSupported(c.memory)],
+    ['floor:feedback', blockSupported(c.feedback)],
+    ['floor:replay-or-nondeterminism', replay?.supported === true || nondet?.declared === true],
+    // full (governance)
+    ['full:authorization', blockSupported(c.authorization)],
+    ['full:tenant-installScope', agents?.manifestRuntime?.installScope === 'tenant'],
+    ['full:memory.attribution', blockSupported(memory?.attribution)],
+    ['full:debugBundle', blockSupported(c.debugBundle)],
+    ['full:triggerBridge', blockSupported(c.triggerBridge)],
+    ['full:egressPolicy', blockSupported(httpClient?.egressPolicy)],
+  ];
+  return checks.filter(([, ok]) => ok).map(([id]) => id);
+}
 /**
  * Derive the full profile set from a discovery payload.
  *
@@ -228,6 +379,8 @@ export function deriveProfiles(c: DiscoveryPayload): readonly ProfileName[] {
   if (isNodePacksDiscovery(c)) result.push('openwop-node-packs');
   if (isReplayFork(c)) result.push('openwop-replay-fork');
   if (isFixtures(c)) result.push('openwop-fixtures');
+  if (isMemory(c)) result.push('openwop-memory');
+  if (isTriggerBridge(c)) result.push('openwop-trigger-bridge');
   return result;
 }
@@ -254,5 +407,9 @@ export function hasProfile(c: DiscoveryPayload, profile: ProfileName): boolean {
       return isReplayFork(c);
     case 'openwop-fixtures':
       return isFixtures(c);
+    case 'openwop-memory':
+      return isMemory(c);
+    case 'openwop-trigger-bridge':
+      return isTriggerBridge(c);
   }
 }

package/src/lib/runtimeRequires.ts ADDED Viewed

@@ -0,0 +1,38 @@
+/**
+ * Shared helper for the RFC 0076 §A `runtime.requires[]` install-gate
+ * conformance scenarios. Lives in lib/ (not a *.test.ts) so scenarios import it
+ * via `../lib/runtimeRequires.js`.
+ *
+ * Drives the conformance-only host seam specified in host-sample-test-seams.md
+ * §"Open seams": `POST /v1/host/sample/packs/install-gate`. The seam evaluates a
+ * manifest's `runtime.requires[]` against a simulated host grant-set and returns
+ * the install-time outcome the host would produce — letting a single seam
+ * exercise the grant / refuse / non-sandbox-projection behaviors deterministically.
+ */
+import { driver } from './driver.js';
+export interface InstallGateRequest {
+  /** The candidate pack manifest (carrying runtime.requires[]). */
+  manifest: Record<string, unknown>;
+  /** Primitives the simulated sandbox grants. Ignored when `gating === false`. */
+  grantSet?: string[];
+  /** Whether the simulated host gates platform access. Default true (sandbox host). */
+  gating?: boolean;
+}
+export interface InstallGateResponse {
+  /** HTTP status the seam returned (200 install, 400 refuse). */
+  status: number;
+  /** Parsed response body. */
+  body: Record<string, unknown>;
+}
+/**
+ * Drives one install-gate evaluation via the host-sample seam, or null
+ * (soft-skip) when the host doesn't expose it.
+ */
+export async function installGate(req: InstallGateRequest): Promise<InstallGateResponse | null> {
+  const res = await driver.post('/v1/host/sample/packs/install-gate', req as unknown as Record<string, unknown>);
+  if (res.status === 404 || res.status === 405) return null; // seam absent — soft-skip
+  return { status: res.status, body: (res.json as Record<string, unknown> | undefined) ?? {} };
+}

package/src/lib/safeFetch.ts ADDED Viewed

@@ -0,0 +1,87 @@
+/**
+ * Shared helper for the RFC 0076 §B `ctx.http.safeFetch` conformance scenarios.
+ * Lives in lib/ (not a *.test.ts) so scenarios import it via `../lib/safeFetch.js`.
+ *
+ * Reads `capabilities.httpClient.safeFetch` (root-first, wrapper-fallback) and
+ * drives the conformance-only host seam `POST /v1/host/sample/http/safe-fetch`
+ * (host-sample-test-seams.md §"Open seams").
+ */
+import { driver } from './driver.js';
+import { capabilityFamily } from './discovery-capabilities.js';
+interface HttpClientCap {
+  supported?: boolean;
+  safeFetch?: { supported?: boolean };
+}
+/** True when the host advertises `capabilities.httpClient.safeFetch.supported`. */
+export async function isSafeFetchSupported(): Promise<boolean> {
+  const disco = await driver.get('/.well-known/openwop');
+  return capabilityFamily<HttpClientCap>(disco.json, 'httpClient')?.safeFetch?.supported === true;
+}
+/** True when the host also advertises `capabilities.toolHooks.prePostEvents`. */
+export async function isToolHookAuditOn(): Promise<boolean> {
+  const disco = await driver.get('/.well-known/openwop');
+  return capabilityFamily<{ prePostEvents?: boolean }>(disco.json, 'toolHooks')?.prePostEvents === true;
+}
+export interface SafeFetchResult {
+  outcome?: 'fetched' | 'blocked';
+  status?: number;
+  blocked?: 'ssrf' | 'upgrade' | string;
+  toolCalled?: Record<string, unknown>;
+  toolReturned?: Record<string, unknown>;
+}
+/**
+ * Drives one safeFetch evaluation via the host-sample seam, or null (soft-skip)
+ * when the host doesn't expose it.
+ */
+export async function safeFetch(body: Record<string, unknown>): Promise<SafeFetchResult | null> {
+  const res = await driver.post('/v1/host/sample/http/safe-fetch', body);
+  if (res.status === 404 || res.status === 405) return null; // seam absent — soft-skip
+  return (res.json as SafeFetchResult | undefined) ?? {};
+}
+/**
+ * True when the host advertises BOTH `httpClient.safeFetch.supported` AND
+ * `toolHooks.prePostEvents` — the co-advertisement that, per
+ * `host-capabilities.md` §host.http + RFC 0076 §B, makes live audit-pair
+ * emission a MUST. One discovery fetch (the two single-flag helpers above each
+ * fetch; this avoids the double round-trip for the live-audit gate).
+ */
+export async function isSafeFetchLiveAuditAdvertised(): Promise<boolean> {
+  const disco = await driver.get('/.well-known/openwop');
+  const safeFetchOn =
+    capabilityFamily<HttpClientCap>(disco.json, 'httpClient')?.safeFetch?.supported === true;
+  const auditOn =
+    capabilityFamily<{ prePostEvents?: boolean }>(disco.json, 'toolHooks')?.prePostEvents === true;
+  return safeFetchOn && auditOn;
+}
+/** Result of the live-run safe-fetch seam: the host executed one
+ *  `ctx.http.safeFetch` call inside a real run via the production injection
+ *  path, and returns the run's id so the caller can read the durable event
+ *  log. `null` ⇒ the run seam is unwired (soft-skip, host-pending). */
+export interface SafeFetchRunResult {
+  runId?: string;
+  outcome?: 'fetched' | 'blocked';
+}
+/**
+ * Drives one `ctx.http.safeFetch` call **inside a real run** via the open seam
+ * `POST /v1/host/sample/http/safe-fetch-run`, returning `{ runId, outcome }`,
+ * or null (soft-skip) when the run seam isn't wired. Distinct from `safeFetch`
+ * (which returns the audit pair INLINE from the seam): this exercises the
+ * production per-ctx `ctx.http.safeFetch` path so the caller can assert the
+ * `agent.toolCalled`/`agent.toolReturned` pair landed in the DURABLE run event
+ * log — closing the seam-vs-production gap in `safefetch-behavior.test.ts`.
+ */
+export async function safeFetchViaRun(
+  body: Record<string, unknown>,
+): Promise<SafeFetchRunResult | null> {
+  const res = await driver.post('/v1/host/sample/http/safe-fetch-run', body);
+  if (res.status === 404 || res.status === 405) return null; // run seam unwired — soft-skip
+  return (res.json as SafeFetchRunResult | undefined) ?? {};
+}

package/src/scenarios/agent-deployment-shape.test.ts ADDED Viewed

@@ -0,0 +1,139 @@
+/**
+ * Agent deployment lifecycle — record + binding + event shapes (RFC 0082).
+ *
+ * Always-on, server-free schema-shape probe. Verifies that:
+ *   - `capabilities.agents.deployment` is declared with its `supported` /
+ *     `channels` / `canary` / `rollback` / `states` sub-flags.
+ *   - `agent-deployment.schema.json` compiles and round-trips a conforming
+ *     deployment record, and rejects malformed ones (an out-of-enum `state`;
+ *     `canaryPercent` out of 0..100).
+ *   - the `AgentRef` `channel` XOR `version` rule holds: each alone (and
+ *     neither) validates; both together is rejected (the `not` clause).
+ *   - the four `deployment.*` payload $defs validate conforming content-free
+ *     payloads and reject malformed ones.
+ *   - the four `deployment.*` payloads are CONTENT-FREE: a `deployment.promoted`
+ *     carrying a `manifestBody`, and a `deployment.state.changed` carrying a
+ *     `prompt`, are rejected (`additionalProperties:false`). This is the public
+ *     test for the protocol-tier SECURITY invariant `deployment-event-no-content-leak`.
+ *   - `agent.invocation.started` carries the additive recorded-fact
+ *     `resolvedAgentVersion` / `resolvedChannel` fields (RFC 0082 §B).
+ *   - all four event names appear in the RunEventType enum.
+ *
+ * Behavioral assertions (the authz → approvalGate → eval-verify → promotion path,
+ * the fail-closed denial, the §B replay re-read of `resolvedAgentVersion`) are
+ * gated on `capabilities.agents.deployment.supported` and land in
+ * `agent-deployment-lifecycle.test.ts` (deferred per RFC 0082 §Conformance —
+ * reference host deferred). This scenario asserts the wire contract, not host
+ * behavior.
+ *
+ * Spec references:
+ *   - https://github.com/openwop/openwop/blob/main/spec/v1/agent-deployment.md
+ *   - https://github.com/openwop/openwop/blob/main/RFCS/0082-agent-deployment-lifecycle.md
+ *   - https://github.com/openwop/openwop/blob/main/SECURITY/invariants.yaml (deployment-event-no-content-leak)
+ */
+import { describe, it, expect } from 'vitest';
+import { readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import Ajv2020 from 'ajv/dist/2020.js';
+import addFormats from 'ajv-formats';
+import { SCHEMAS_DIR } from '../lib/paths.js';
+/** Server-free assertion-message helper. */
+const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
+function loadSchema(name: string): Record<string, unknown> {
+  return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
+}
+describe('agent-deployment-shape: capability advertisement (RFC 0082, server-free)', () => {
+  it('the capabilities schema declares agents.deployment with its sub-flags', () => {
+    const caps = loadSchema('capabilities.schema.json');
+    const agents = (caps.properties as Record<string, { properties?: Record<string, { properties?: Record<string, unknown> }> }>).agents;
+    const deployment = agents?.properties?.deployment;
+    expect(deployment, why('capabilities.md §agents', 'agents.deployment MUST be declared')).toBeDefined();
+    for (const flag of ['supported', 'channels', 'canary', 'rollback', 'states']) {
+      expect(
+        deployment?.properties?.[flag],
+        why('agent-deployment.md §F', `agents.deployment.${flag} MUST be declared`),
+      ).toBeDefined();
+    }
+  });
+});
+describe('agent-deployment-shape: deployment record + AgentRef binding (RFC 0082, server-free)', () => {
+  const ajv = new Ajv2020({ strict: false, allErrors: true });
+  addFormats(ajv);
+  const record = ajv.compile(loadSchema('agent-deployment.schema.json'));
+  const agentRef = ajv.compile(loadSchema('agent-ref.schema.json'));
+  it('AgentDeployment validates a conforming record and rejects a bad state / out-of-range canary', () => {
+    const good = { agentId: 'core.openwop.agents.support-resolver', version: '2.4.0', state: 'active', canaryPercent: 10, channels: ['stable'] };
+    expect(record(good), why('RFC 0082 §C', 'a conforming deployment record MUST validate')).toBe(true);
+    expect(record({ ...good, state: 'live' }), why('RFC 0082 §C', 'an out-of-enum state MUST be rejected')).toBe(false);
+    expect(record({ ...good, canaryPercent: 150 }), why('RFC 0082 §C', 'canaryPercent > 100 MUST be rejected')).toBe(false);
+  });
+  it('AgentRef channel XOR version: each alone and neither validate; both is rejected (RFC 0082 §A)', () => {
+    expect(agentRef({ agentId: 'core.x.y.z', version: '1.0.0' }), why('RFC 0082 §A', 'version-only AgentRef MUST validate')).toBe(true);
+    expect(agentRef({ agentId: 'core.x.y.z', channel: 'stable' }), why('RFC 0082 §A', 'channel-only AgentRef MUST validate')).toBe(true);
+    expect(agentRef({ agentId: 'core.x.y.z' }), why('RFC 0082 §A', 'a ref with neither version nor channel MUST validate (host default)')).toBe(true);
+    expect(agentRef({ agentId: 'core.x.y.z', version: '1.0.0', channel: 'stable' }), why('RFC 0082 §A', 'a ref with BOTH version and channel MUST be rejected')).toBe(false);
+  });
+});
+describe('agent-deployment-shape: deployment.* event payloads (RFC 0082, server-free)', () => {
+  const payloads = loadSchema('run-event-payloads.schema.json');
+  const ajv = new Ajv2020({ strict: false, allErrors: true });
+  addFormats(ajv);
+  ajv.addSchema(payloads, 'payloads');
+  const promoted = ajv.getSchema('payloads#/$defs/deploymentPromoted');
+  const rolledBack = ajv.getSchema('payloads#/$defs/deploymentRolledBack');
+  const canary = ajv.getSchema('payloads#/$defs/deploymentCanaryAdjusted');
+  const stateChanged = ajv.getSchema('payloads#/$defs/deploymentStateChanged');
+  it('deployment.promoted validates a content-free promotion record and requires toVersion + toState', () => {
+    expect(promoted, 'the deploymentPromoted $def MUST exist').toBeTruthy();
+    expect(
+      promoted!({ agentId: 'core.openwop.agents.support-resolver', toVersion: '2.4.0', toState: 'active', channel: 'stable', canaryPercent: 10, evalRunId: 'run_abc' }),
+      why('RFC 0082 §D', 'a conforming deployment.promoted payload MUST validate'),
+    ).toBe(true);
+    expect(promoted!({ agentId: 'a' }), why('RFC 0082 §D', 'deployment.promoted without toVersion/toState MUST be rejected')).toBe(false);
+  });
+  it('deployment.rolled-back / canary.adjusted / state.changed validate conforming records', () => {
+    expect(rolledBack!({ agentId: 'a', fromVersion: '2.4.0', toVersion: '2.3.1', rollbackPointer: '2.3.1' }), why('RFC 0082 §D', 'a conforming deployment.rolled-back MUST validate')).toBe(true);
+    expect(canary!({ agentId: 'a', version: '2.4.0', fromPercent: 10, toPercent: 50 }), why('RFC 0082 §D', 'a conforming deployment.canary.adjusted MUST validate')).toBe(true);
+    expect(stateChanged!({ agentId: 'a', version: '2.4.0', fromState: 'active', toState: 'paused' }), why('RFC 0082 §D', 'a conforming deployment.state.changed MUST validate')).toBe(true);
+    expect(stateChanged!({ agentId: 'a', version: '2.4.0', fromState: 'active', toState: 'live' }), why('RFC 0082 §D', 'an out-of-enum toState MUST be rejected')).toBe(false);
+  });
+  it('deployment.* events are content-free — a manifest body and a prompt are rejected (deployment-event-no-content-leak)', () => {
+    expect(
+      promoted!({ agentId: 'a', toVersion: '2.4.0', toState: 'active', manifestBody: '{...}' }),
+      why('SECURITY invariant deployment-event-no-content-leak', 'a deployment.promoted MUST NOT carry a manifest body'),
+    ).toBe(false);
+    expect(
+      stateChanged!({ agentId: 'a', version: '2.4.0', fromState: 'active', toState: 'paused', prompt: 'system: …' }),
+      why('SECURITY invariant deployment-event-no-content-leak', 'a deployment.state.changed MUST NOT carry prompt content'),
+    ).toBe(false);
+  });
+});
+describe('agent-deployment-shape: §B recorded-fact pin + enum (RFC 0082, server-free)', () => {
+  it('agent.invocation.started carries the additive recorded-fact resolvedAgentVersion / resolvedChannel', () => {
+    const payloads = loadSchema('run-event-payloads.schema.json');
+    const started = ((payloads.$defs as Record<string, { properties?: Record<string, unknown> }>).agentInvocationStarted)?.properties ?? {};
+    expect(started.resolvedAgentVersion, why('RFC 0082 §B', 'agent.invocation.started.resolvedAgentVersion MUST be declared (the channel pin)')).toBeDefined();
+    expect(started.resolvedChannel, why('RFC 0082 §B', 'agent.invocation.started.resolvedChannel MUST be declared')).toBeDefined();
+  });
+  it('all four deployment event names appear in the RunEventType enum', () => {
+    const runEvent = loadSchema('run-event.schema.json');
+    const enumVals = (runEvent.$defs as Record<string, { enum?: string[] }>).RunEventType?.enum ?? [];
+    for (const e of ['deployment.promoted', 'deployment.rolled-back', 'deployment.canary.adjusted', 'deployment.state.changed']) {
+      expect(enumVals).toContain(e);
+    }
+  });
+});