@openwop/openwop-conformance 1.10.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +34 -0
  2. package/README.md +2 -2
  3. package/api/asyncapi.yaml +70 -0
  4. package/api/openapi.yaml +268 -1
  5. package/coverage.md +30 -2
  6. package/fixtures/oauth-providers/synthetic.json +38 -0
  7. package/fixtures.md +10 -0
  8. package/package.json +1 -1
  9. package/schemas/README.md +12 -0
  10. package/schemas/agent-deployment-transition.schema.json +49 -0
  11. package/schemas/agent-deployment.schema.json +54 -0
  12. package/schemas/agent-eval-suite.schema.json +140 -0
  13. package/schemas/agent-inventory-response.schema.json +25 -0
  14. package/schemas/agent-manifest.schema.json +5 -0
  15. package/schemas/agent-org-chart.schema.json +82 -0
  16. package/schemas/agent-ref.schema.json +12 -2
  17. package/schemas/agent-roster-entry.schema.json +81 -0
  18. package/schemas/agent-roster-response.schema.json +21 -0
  19. package/schemas/budget-policy.schema.json +18 -0
  20. package/schemas/capabilities.schema.json +277 -0
  21. package/schemas/credential-provenance.schema.json +18 -0
  22. package/schemas/eval-summary.schema.json +92 -0
  23. package/schemas/node-pack-manifest.schema.json +17 -0
  24. package/schemas/org-chart-responsibility-view.schema.json +26 -0
  25. package/schemas/run-event-payloads.schema.json +286 -3
  26. package/schemas/run-event.schema.json +19 -0
  27. package/schemas/tool-descriptor.schema.json +63 -0
  28. package/schemas/trigger-subscription.schema.json +26 -0
  29. package/src/lib/agentRoster.ts +76 -0
  30. package/src/lib/liveRuntime.ts +59 -0
  31. package/src/lib/profiles.ts +157 -0
  32. package/src/lib/runtimeRequires.ts +38 -0
  33. package/src/lib/safeFetch.ts +87 -0
  34. package/src/scenarios/agent-deployment-shape.test.ts +139 -0
  35. package/src/scenarios/agent-eval-suite-shape.test.ts +167 -0
  36. package/src/scenarios/agent-live-allowlist-enforced.test.ts +53 -0
  37. package/src/scenarios/agent-live-invocation-bracket.test.ts +98 -0
  38. package/src/scenarios/agent-live-runtime-shape.test.ts +98 -0
  39. package/src/scenarios/agent-live-structured-output.test.ts +58 -0
  40. package/src/scenarios/agent-org-chart-shape.test.ts +127 -0
  41. package/src/scenarios/agent-platform-profile.test.ts +158 -0
  42. package/src/scenarios/agent-roster-attribution.test.ts +179 -0
  43. package/src/scenarios/agent-roster-shape.test.ts +146 -0
  44. package/src/scenarios/budget-policy-shape.test.ts +136 -0
  45. package/src/scenarios/egress-provenance-shape.test.ts +137 -0
  46. package/src/scenarios/memory-capability-model-shape.test.ts +186 -0
  47. package/src/scenarios/oauth-authorization-code-roundtrip.test.ts +145 -0
  48. package/src/scenarios/runtime-requires-install-gate.test.ts +92 -0
  49. package/src/scenarios/runtime-requires-shape.test.ts +134 -0
  50. package/src/scenarios/safefetch-behavior.test.ts +99 -0
  51. package/src/scenarios/safefetch-live-audit.test.ts +175 -0
  52. package/src/scenarios/spec-corpus-validity.test.ts +19 -3
  53. package/src/scenarios/tool-descriptor-shape.test.ts +133 -0
  54. package/src/scenarios/trigger-bridge-shape.test.ts +135 -0
  55. package/src/scenarios/x-openwop-form-pack-manifest.test.ts +155 -0
@@ -0,0 +1,134 @@
1
+ /**
2
+ * Pack runtime-requirements vocabulary + shape — `node-packs.md`
3
+ * §"Runtime platform requirements" + `schemas/node-pack-manifest.schema.json`
4
+ * `$defs/Runtime.requires` (RFC 0076 §A).
5
+ *
6
+ * Server-free schema-validation scenario. The `runtime.requires[]` field is an
7
+ * OPTIONAL, closed, runtime-agnostic vocabulary a pack uses to declare the
8
+ * platform primitives its code exercises, so a sandbox host can gate at install
9
+ * time instead of trial-load. This file exercises the schema layer (the §A
10
+ * "vocabulary-validation" normative behavior — a raw builtin name is rejected —
11
+ * plus the additive/empty-array shape contract):
12
+ *
13
+ * 1. Positive: a manifest declaring valid primitives validates cleanly.
14
+ * 2. Positive: the field is OPTIONAL — a manifest omitting it validates.
15
+ * 3. Positive: an empty array (`requires: []`) validates and is equivalent to
16
+ * omission (no host may read a distinct meaning into it; §A).
17
+ * 4. Positive: every one of the 8 vocabulary tokens individually validates.
18
+ * 5. Negative — raw builtin name: `"node:dns/promises"` (the value that
19
+ * motivated the abstract vocabulary) is rejected; the registry/host
20
+ * surfaces this as `invalid_manifest`.
21
+ * 6. Negative — duplicate token: `uniqueItems` is enforced.
22
+ *
23
+ * The install-time GATE behavior (grant / refuse → `pack_runtime_requirement_unmet`,
24
+ * and the non-sandbox-host SHOULD-projection) is host behavior and lives in the
25
+ * seam-gated `runtime-requires-install-gate.test.ts`.
26
+ *
27
+ * @see spec/v1/node-packs.md §"Runtime platform requirements"
28
+ * @see spec/v1/registry-operations.md §"Runtime-requirement install gate"
29
+ * @see schemas/node-pack-manifest.schema.json
30
+ * @see RFCS/0076-pack-runtime-requirements-and-host-safe-fetch.md
31
+ */
32
+
33
+ import { describe, it, expect } from 'vitest';
34
+ import { readFileSync, readdirSync } from 'node:fs';
35
+ import { join } from 'node:path';
36
+ import Ajv2020 from 'ajv/dist/2020.js';
37
+ import addFormats from 'ajv-formats';
38
+ import type { ErrorObject, ValidateFunction } from 'ajv';
39
+ import { SCHEMAS_DIR } from '../lib/paths.js';
40
+
41
+ const SCHEMA_PATH = join(SCHEMAS_DIR, 'node-pack-manifest.schema.json');
42
+
43
+ const VOCABULARY = [
44
+ 'net.dns',
45
+ 'net.outbound',
46
+ 'crypto',
47
+ 'subprocess',
48
+ 'fs.read',
49
+ 'fs.write',
50
+ 'env.read',
51
+ 'clock',
52
+ ] as const;
53
+
54
+ function manifest(requires?: unknown) {
55
+ const runtime: Record<string, unknown> = { language: 'javascript', entry: 'index.mjs' };
56
+ if (requires !== undefined) runtime.requires = requires;
57
+ return {
58
+ name: 'vendor.example.http',
59
+ version: '1.0.0',
60
+ engines: { openwop: '>=1.1 <2.0.0' },
61
+ runtime,
62
+ nodes: [{ typeId: 'vendor.example.http.fetch', version: '1.0.0', category: 'integration', role: 'side-effect' }],
63
+ };
64
+ }
65
+
66
+ describe('category: runtime.requires vocabulary + shape (RFC 0076 §A)', () => {
67
+ const ajv = new Ajv2020({ allErrors: true, strict: false });
68
+ addFormats(ajv);
69
+ // Register every schema first so cross-$refs resolve (node-pack-manifest
70
+ // references agent-manifest.schema.json for its agents[] branch). addSchema
71
+ // registers without compiling; the target compiles below.
72
+ for (const file of readdirSync(SCHEMAS_DIR)) {
73
+ if (!file.endsWith('.schema.json')) continue;
74
+ try {
75
+ ajv.addSchema(JSON.parse(readFileSync(join(SCHEMAS_DIR, file), 'utf8')) as Record<string, unknown>);
76
+ } catch {
77
+ /* duplicate/already-registered — the target is compiled below */
78
+ }
79
+ }
80
+ const schema = JSON.parse(readFileSync(SCHEMA_PATH, 'utf8'));
81
+ const validate = (ajv.getSchema(schema['$id'] as string) ?? ajv.compile(schema)) as ValidateFunction;
82
+
83
+ const errorsOn = (m: unknown): ErrorObject[] => {
84
+ expect(validate(m)).toBe(false);
85
+ return validate.errors ?? [];
86
+ };
87
+
88
+ it('positive: a manifest declaring valid primitives validates cleanly', () => {
89
+ const ok = validate(manifest(['net.dns', 'net.outbound']));
90
+ expect(
91
+ ok,
92
+ `node-packs.md §"Runtime platform requirements": a well-formed runtime.requires MUST validate. Errors: ${JSON.stringify(validate.errors)}`,
93
+ ).toBe(true);
94
+ });
95
+
96
+ it('positive: runtime.requires is OPTIONAL — a manifest omitting it validates (additive)', () => {
97
+ expect(
98
+ validate(manifest(undefined)),
99
+ 'node-pack-manifest.schema.json: runtime.requires is additive/OPTIONAL — packs predating RFC 0076 validate unchanged',
100
+ ).toBe(true);
101
+ });
102
+
103
+ it('positive: an empty requires[] validates (equivalent to omission per §A)', () => {
104
+ expect(
105
+ validate(manifest([])),
106
+ 'node-packs.md §"Runtime platform requirements": runtime.requires:[] is valid and equivalent to omission',
107
+ ).toBe(true);
108
+ });
109
+
110
+ it('positive: every vocabulary token individually validates', () => {
111
+ for (const token of VOCABULARY) {
112
+ expect(
113
+ validate(manifest([token])),
114
+ `node-pack-manifest.schema.json: "${token}" is in the RFC 0076 §A vocabulary. Errors: ${JSON.stringify(validate.errors)}`,
115
+ ).toBe(true);
116
+ }
117
+ });
118
+
119
+ it('negative: a raw builtin name (node:dns/promises) is rejected (→ invalid_manifest)', () => {
120
+ const errs = errorsOn(manifest(['node:dns/promises']));
121
+ expect(
122
+ errs.some((e) => e.instancePath.includes('/runtime/requires')),
123
+ 'node-packs.md §"Runtime platform requirements": raw language builtin names are NOT in the closed vocabulary — the abstract net.dns is the portable equivalent; the registry/host surfaces this as invalid_manifest',
124
+ ).toBe(true);
125
+ });
126
+
127
+ it('negative: a duplicate token is rejected (uniqueItems)', () => {
128
+ const errs = errorsOn(manifest(['net.dns', 'net.dns']));
129
+ expect(
130
+ errs.some((e) => e.keyword === 'uniqueItems'),
131
+ 'node-pack-manifest.schema.json: runtime.requires has uniqueItems:true',
132
+ ).toBe(true);
133
+ });
134
+ });
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Host-provided safe-fetch behavior — `host-capabilities.md` §host.http
3
+ * (`ctx.http.safeFetch`) + RFC 0076 §B.
4
+ *
5
+ * Seam-gated behavioral scenarios for the pack-facing `ctx.http.safeFetch`. When
6
+ * a host advertises `capabilities.httpClient.safeFetch.supported`, the
7
+ * host-mediated fetch MUST apply the §host.http SSRF guard (resolve→pin→connect)
8
+ * so a pack can do outbound HTTP without reaching for `node:dns` / raw sockets:
9
+ *
10
+ * 1. SSRF block — a loopback / RFC 1918 / cloud-metadata target ⇒
11
+ * `{ outcome: "blocked", blocked: "ssrf" }`; the host MUST NOT connect.
12
+ * 2. DNS-rebinding — a public name re-resolving to a blocked address
13
+ * (`simulateRebindTo`) ⇒ also blocked (the resolved IP is pinned).
14
+ * 3. Connection-upgrade refusal — `Connection: upgrade` ⇒
15
+ * `{ outcome: "blocked", blocked: "upgrade" }` (no 101 socket-hijack escape).
16
+ * 4. Audit-when-both — when `toolHooks.prePostEvents` is also advertised, a
17
+ * fetched call emits the `agent.toolCalled` / `agent.toolReturned` pair
18
+ * (`transport: "http"`).
19
+ *
20
+ * All drive `POST /v1/host/sample/http/safe-fetch` and soft-skip when the host
21
+ * doesn't advertise `safeFetch` or doesn't wire the seam (404). Behavior grade
22
+ * is `host-pending` until a `safeFetch` host lights it up. The SSRF *guarantee*
23
+ * reuses the `http-client-ssrf-guard` SECURITY invariant — no new invariant.
24
+ *
25
+ * @see spec/v1/host-capabilities.md §host.http
26
+ * @see spec/v1/host-sample-test-seams.md §"Open seams"
27
+ * @see RFCS/0076-pack-runtime-requirements-and-host-safe-fetch.md §B
28
+ * @see SECURITY/invariants.yaml id: http-client-ssrf-guard
29
+ */
30
+
31
+ import { describe, it, expect } from 'vitest';
32
+ import { driver } from '../lib/driver.js';
33
+ import { isSafeFetchSupported, isToolHookAuditOn, safeFetch } from '../lib/safeFetch.js';
34
+
35
+ describe('safefetch-behavior (RFC 0076 §B / §host.http)', () => {
36
+ it('blocks a metadata-endpoint target (SSRF guard)', async () => {
37
+ if (!(await isSafeFetchSupported())) return; // capability absent — soft-skip
38
+ const res = await safeFetch({ url: 'http://169.254.169.254/latest/meta-data/' });
39
+ if (res === null) return; // seam absent — soft-skip
40
+ expect(
41
+ res.outcome,
42
+ driver.describe('host-capabilities.md §host.http', 'safeFetch MUST NOT connect to a cloud-metadata address'),
43
+ ).toBe('blocked');
44
+ expect(
45
+ res.blocked,
46
+ driver.describe('host-capabilities.md §host.http', 'a blocked SSRF target reports blocked:"ssrf" (http-client-ssrf-guard invariant)'),
47
+ ).toBe('ssrf');
48
+ });
49
+
50
+ it('blocks a loopback target (SSRF guard)', async () => {
51
+ if (!(await isSafeFetchSupported())) return;
52
+ const res = await safeFetch({ url: 'http://127.0.0.1:6379/' });
53
+ if (res === null) return;
54
+ expect(
55
+ res.outcome,
56
+ driver.describe('host-capabilities.md §host.http', 'safeFetch MUST NOT connect to loopback'),
57
+ ).toBe('blocked');
58
+ });
59
+
60
+ it('blocks DNS-rebinding (resolved IP is pinned for the connection)', async () => {
61
+ if (!(await isSafeFetchSupported())) return;
62
+ const res = await safeFetch({ url: 'http://example.com/', simulateRebindTo: '169.254.169.254' });
63
+ if (res === null) return;
64
+ expect(
65
+ res.outcome,
66
+ driver.describe('host-capabilities.md §host.http', 'a public name that re-resolves to a blocked address MUST be blocked (rebinding defeat)'),
67
+ ).toBe('blocked');
68
+ });
69
+
70
+ it('refuses a Connection: upgrade request (no 101 socket-hijack escape)', async () => {
71
+ if (!(await isSafeFetchSupported())) return;
72
+ const res = await safeFetch({ url: 'https://example.com/', init: { headers: { Connection: 'upgrade' } } });
73
+ if (res === null) return;
74
+ expect(
75
+ res.outcome,
76
+ driver.describe('host-capabilities.md §host.http', 'safeFetch MUST refuse a connection-upgrade attempt'),
77
+ ).toBe('blocked');
78
+ expect(
79
+ res.blocked,
80
+ driver.describe('host-capabilities.md §host.http', 'a refused upgrade reports blocked:"upgrade"'),
81
+ ).toBe('upgrade');
82
+ });
83
+
84
+ it('emits the tool-hooks audit pair when prePostEvents is also advertised', async () => {
85
+ if (!(await isSafeFetchSupported())) return;
86
+ if (!(await isToolHookAuditOn())) return; // audit MUST applies only when both advertised
87
+ const res = await safeFetch({ url: 'https://example.com/' });
88
+ if (res === null) return;
89
+ if (res.outcome !== 'fetched') return; // only a completed call carries the pair
90
+ expect(
91
+ res.toolCalled !== undefined && res.toolReturned !== undefined,
92
+ driver.describe('host-capabilities.md §host.http', 'when toolHooks.prePostEvents + safeFetch are both advertised, a safeFetch call MUST emit the agent.toolCalled/agent.toolReturned pair'),
93
+ ).toBe(true);
94
+ expect(
95
+ (res.toolCalled as { transport?: string } | undefined)?.transport,
96
+ driver.describe('host-capabilities.md §host.http', 'the audit pair carries transport:"http"'),
97
+ ).toBe('http');
98
+ });
99
+ });
@@ -0,0 +1,175 @@
1
+ /**
2
+ * Live-run safe-fetch audit emission — `host-capabilities.md` §host.http
3
+ * (`ctx.http.safeFetch`) + RFC 0076 §B + RFC 0064 §B.
4
+ *
5
+ * Closes the seam-vs-production gap left by `safefetch-behavior.test.ts`. That
6
+ * scenario drives `POST /v1/host/sample/http/safe-fetch` and reads the audit
7
+ * pair the SEAM returns INLINE — it never proves the *production* per-ctx
8
+ * `ctx.http.safeFetch` (the client injected into a real run) emits anything. A
9
+ * host can co-advertise `toolHooks.prePostEvents` + `httpClient.safeFetch`,
10
+ * pass the seam, and still ship a production `createSafeFetch()` with no audit
11
+ * hooks — the "quiet bypass" §host.http line "centralizing egress in the host
12
+ * must increase auditability, not become a quiet bypass" forbids.
13
+ *
14
+ * The normative MUST (host-capabilities.md §host.http; RFC 0076 §B):
15
+ * When `toolHooks.prePostEvents: true` AND `httpClient.safeFetch.supported:
16
+ * true` are BOTH advertised, the host MUST emit the `agent.toolCalled` /
17
+ * `agent.toolReturned` pair (`transport: "http"`) **for every `safeFetch`
18
+ * invocation** — including a *refused* one (a blocked egress attempt is
19
+ * exactly the security-relevant event the audit log must capture).
20
+ *
21
+ * This scenario verifies that MUST against the DURABLE run event log, not the
22
+ * seam's inline echo, and does so **without depending on outbound egress** so
23
+ * the bar can never pass vacuously:
24
+ * 1. EGRESS-FREE FLOOR (required): drive one `ctx.http.safeFetch` to a
25
+ * guaranteed-blocked link-local / cloud-metadata URL inside a REAL run via
26
+ * `POST /v1/host/sample/http/safe-fetch-run`. A conformant SSRF guard
27
+ * refuses it on every host with zero connectivity, yet the production
28
+ * injection + auditHooks path is still exercised, so the durable pair MUST
29
+ * be present. This removes the "no public egress ⇒ green-but-proves-nothing"
30
+ * hole that a `fetched`-only assertion left.
31
+ * 2. SUCCESS-PATH COVERAGE (best-effort): drive a public URL; when it actually
32
+ * `fetched`, assert the same durable pair (catches a host that audits only
33
+ * the reject path). Skipped — not failed — where the environment has no
34
+ * public egress; the floor already proved emission.
35
+ * 3. Read each run's persisted events via the test event-log seam
36
+ * (`GET /v1/host/sample/test/runs/:runId/events`) and assert a `callId`-
37
+ * paired `agent.toolCalled` (`transport:"http"`) / `agent.toolReturned`.
38
+ *
39
+ * Gating: `behaviorGate('openwop-safefetch-live-audit', <both flags>)` — NOT an
40
+ * inline soft-skip. So it skips-with-reason in default mode but FAILS under
41
+ * `OPENWOP_REQUIRE_BEHAVIOR=true` when a host advertises both flags yet does not
42
+ * emit. This is the RFC 0076 §B → Accepted bar a non-steward host validates
43
+ * against. The run seam itself (`safe-fetch-run`) is host-pending: a 404 from a
44
+ * not-yet-wired seam soft-skips even in strict mode (the seam is test-only
45
+ * infrastructure, distinct from the advertised production capability).
46
+ * The SSRF guarantee reuses the existing `http-client-ssrf-guard` invariant —
47
+ * no new SECURITY invariant; the audit MUST is RFC 0064's existing posture.
48
+ *
49
+ * @see spec/v1/host-capabilities.md §host.http
50
+ * @see spec/v1/host-sample-test-seams.md §"Open seams" (safe-fetch-run)
51
+ * @see RFCS/0076-pack-runtime-requirements-and-host-safe-fetch.md §B
52
+ * @see RFCS/0064-tool-invocation-hooks-and-authorization.md §B
53
+ */
54
+
55
+ import { describe, it, expect } from 'vitest';
56
+ import { driver } from '../lib/driver.js';
57
+ import { behaviorGate } from '../lib/behavior-gate.js';
58
+ import { isSafeFetchLiveAuditAdvertised, safeFetchViaRun } from '../lib/safeFetch.js';
59
+ import { queryTestEvents } from '../lib/event-log-query.js';
60
+
61
+ const PROFILE = 'openwop-safefetch-live-audit';
62
+ const CITE = 'host-capabilities.md §host.http';
63
+
64
+ // A link-local / cloud-metadata URL the SSRF guard MUST refuse — reachable on
65
+ // EVERY host regardless of outbound egress, so the durable-pair assertion never
66
+ // passes vacuously. Per §host.http the audit MUST is per-invocation: a *blocked*
67
+ // safeFetch still emits the agent.toolCalled/agent.toolReturned pair (the
68
+ // toolReturned carries the forbidden status). cf. `http-client-ssrf-guard`.
69
+ const BLOCKED_URL = 'http://169.254.169.254/latest/meta-data/';
70
+ // A public URL the guard SHOULD allow — best-effort coverage of the *success*
71
+ // path; skipped (not failed) where the environment has no public egress.
72
+ const FETCH_URL = 'https://example.com/';
73
+
74
+ /**
75
+ * Read the durable run event log for `runId` and assert a `callId`-paired
76
+ * `agent.toolCalled` (`transport:"http"`) / `agent.toolReturned` exists, with
77
+ * the RFC 0002 §B causation chain tolerated when the host surfaces it. Returns
78
+ * `false` (caller treats as host-pending soft-skip) only when the event-log
79
+ * query seam is unavailable; otherwise asserts and returns `true`.
80
+ */
81
+ async function assertDurableHttpPair(runId: string, label: string): Promise<boolean> {
82
+ const calledQ = await queryTestEvents(runId, { type: 'agent.toolCalled' });
83
+ const returnedQ = await queryTestEvents(runId, { type: 'agent.toolReturned' });
84
+ if (!calledQ.ok || !returnedQ.ok) {
85
+ // eslint-disable-next-line no-console
86
+ console.warn(`[${PROFILE}] event-log query seam unavailable; host-pending — skipping`);
87
+ return false;
88
+ }
89
+
90
+ // The HTTP-transport tool call: a durable agent.toolCalled with transport:"http".
91
+ const httpCall = calledQ.events.find((e) => (e.payload as { transport?: string }).transport === 'http');
92
+ expect(
93
+ httpCall !== undefined,
94
+ driver.describe(
95
+ CITE,
96
+ `(${label}) when toolHooks.prePostEvents + safeFetch are both advertised, a production ctx.http.safeFetch call MUST persist an agent.toolCalled with transport:"http" to the durable run event log (not just the seam echo), for EVERY invocation incl. blocked ones`,
97
+ ),
98
+ ).toBe(true);
99
+ if (!httpCall) return true;
100
+
101
+ const callId = (httpCall.payload as { callId?: string }).callId;
102
+ expect(
103
+ typeof callId === 'string' && callId.length > 0,
104
+ driver.describe(CITE, `(${label}) the persisted agent.toolCalled MUST carry the required callId (run-event-payloads.schema.json §agentToolCalled)`),
105
+ ).toBe(true);
106
+
107
+ // The paired agent.toolReturned — matched by the required callId (RFC 0002 §B pairing).
108
+ const paired = returnedQ.events.find((e) => (e.payload as { callId?: string }).callId === callId);
109
+ expect(
110
+ paired !== undefined,
111
+ driver.describe(CITE, `(${label}) the agent.toolCalled MUST be followed by a callId-paired agent.toolReturned in the durable log (no quiet bypass)`),
112
+ ).toBe(true);
113
+
114
+ // Stricter, when the host surfaces causation: RFC 0002 §B says
115
+ // toolReturned.causationId === the paired toolCalled.eventId. Tolerate
116
+ // hosts that omit causationId (callId pairing already proven above).
117
+ if (paired && typeof paired.causationId === 'string') {
118
+ expect(
119
+ paired.causationId,
120
+ driver.describe('RFC 0002 §B', 'agent.toolReturned.causationId MUST equal the paired agent.toolCalled.eventId when surfaced'),
121
+ ).toBe(httpCall.eventId);
122
+ }
123
+ return true;
124
+ }
125
+
126
+ describe('safefetch-live-audit (RFC 0076 §B / RFC 0064 §B — production path, durable log)', () => {
127
+ it('a BLOCKED real-run safeFetch emits the durable agent.toolCalled/agent.toolReturned pair (transport:"http") — egress-free floor', async () => {
128
+ const advertised = await isSafeFetchLiveAuditAdvertised();
129
+ if (!behaviorGate(PROFILE, advertised)) return; // default-skip; strict-fail when both flags advertised
130
+
131
+ // Run seam is host-pending infrastructure — soft-skip (even in strict mode)
132
+ // until a safeFetch host wires it. behaviorGate above already enforced the
133
+ // capability co-advertisement; this only gates on the test vehicle.
134
+ const run = await safeFetchViaRun({ url: BLOCKED_URL });
135
+ if (run === null) {
136
+ // eslint-disable-next-line no-console
137
+ console.warn(`[${PROFILE}] safe-fetch-run seam unwired (404); host-pending — skipping`);
138
+ return;
139
+ }
140
+
141
+ // The metadata IP MUST be refused by a conformant SSRF guard
142
+ // (http-client-ssrf.test.ts owns that contract). Regardless of the exact
143
+ // outcome, the production injection path ran, so the durable audit pair MUST
144
+ // exist — this is the egress-independent floor that makes the bar non-vacuous.
145
+ expect(
146
+ typeof run.runId === 'string' && (run.runId as string).length > 0,
147
+ driver.describe(CITE, 'the safe-fetch-run seam MUST return the runId of the real run it executed the safeFetch in'),
148
+ ).toBe(true);
149
+ await assertDurableHttpPair(run.runId as string, 'blocked');
150
+ });
151
+
152
+ it('a FETCHED real-run safeFetch also emits the durable pair (success-path coverage — skipped without public egress)', async () => {
153
+ const advertised = await isSafeFetchLiveAuditAdvertised();
154
+ if (!behaviorGate(PROFILE, advertised)) return;
155
+
156
+ const run = await safeFetchViaRun({ url: FETCH_URL });
157
+ if (run === null) return; // seam unwired — already warned by the floor test
158
+
159
+ if (run.outcome !== 'fetched') {
160
+ // No public egress in this environment — the blocked-path floor already
161
+ // proved the production audit path emits. Skip success-path coverage
162
+ // rather than fail; this is coverage, not the floor.
163
+ // eslint-disable-next-line no-console
164
+ console.warn(
165
+ `[${PROFILE}] ${FETCH_URL} did not fetch (outcome=${run.outcome ?? 'n/a'}); no public egress — success-path coverage skipped (the blocked floor covers emission)`,
166
+ );
167
+ return;
168
+ }
169
+ expect(
170
+ typeof run.runId === 'string' && (run.runId as string).length > 0,
171
+ driver.describe(CITE, 'the safe-fetch-run seam MUST return the runId of the real run it executed the fetch in'),
172
+ ).toBe(true);
173
+ await assertDurableHttpPair(run.runId as string, 'fetched');
174
+ });
175
+ });
@@ -384,16 +384,32 @@ function extractReadmeDocumentIndex(readme: string): string {
384
384
  return readme.slice(start, end);
385
385
  }
386
386
 
387
- function listMarkdownFilesRecursive(dir: string): string[] {
387
+ function listMarkdownFilesRecursive(dir: string, repoRoot: string = dir): string[] {
388
388
  const ignoredDirs = new Set(['.git', 'node_modules', 'dist']);
389
+ // Repo-relative directory paths to prune. These are subtrees whose
390
+ // content shouldn't be link-checked because either (a) they're
391
+ // generated build output (`site/out`) or (b) they're a vendored
392
+ // mirror of a canonical source whose READMEs use links relative to
393
+ // the canonical path, not the vendored path:
394
+ //
395
+ // - `apps/workflow-engine/packs/` mirrors repo-root `packs/`, synced
396
+ // via `apps/workflow-engine/scripts/sync-packs.sh` so the Cloud
397
+ // Run image's `apps/workflow-engine/` build context can ship them.
398
+ // Pack READMEs use `../../RFCS/...` / `../../spec/v1/...` links
399
+ // that resolve from the canonical location (which this walker
400
+ // DOES check) but break from the deeper vendored path. The
401
+ // canonical copies are authoritative; the vendored copies are
402
+ // byte-for-byte identical via cp -R.
403
+ const prunedRepoRelative = new Set(['site/out', 'apps/workflow-engine/packs']);
389
404
  const files: string[] = [];
390
405
 
391
406
  for (const entry of readdirSync(dir, { withFileTypes: true })) {
392
407
  if (entry.isDirectory()) {
393
408
  if (ignoredDirs.has(entry.name)) continue;
394
409
  const child = join(dir, entry.name);
395
- if (relative(dir, child).startsWith('site/out')) continue;
396
- files.push(...listMarkdownFilesRecursive(child));
410
+ const repoRelChild = relative(repoRoot, child);
411
+ if (prunedRepoRelative.has(repoRelChild)) continue;
412
+ files.push(...listMarkdownFilesRecursive(child, repoRoot));
397
413
  continue;
398
414
  }
399
415
  if (entry.isFile() && entry.name.endsWith('.md')) {
@@ -0,0 +1,133 @@
1
+ /**
2
+ * Portable tool catalog — descriptor + capability + session-event shapes (RFC 0078).
3
+ *
4
+ * Always-on, server-free schema-shape probe. Verifies that:
5
+ * - `tool-descriptor.schema.json` compiles and round-trips a conforming
6
+ * `ToolDescriptor`, and rejects a descriptor missing the REQUIRED
7
+ * `safetyTier`.
8
+ * - the §C-1 / §F-4 cross-field MUST is enforced: a `safetyTier: "exec"`
9
+ * descriptor MUST carry `source: "host-extension"` (RFC 0069 — exec is never
10
+ * protocol-tier); an `exec` + `node-pack` descriptor is rejected, an `exec`
11
+ * + `host-extension` descriptor is accepted.
12
+ * - `capabilities.toolCatalog` is declared with its `supported` / `sources` /
13
+ * `sessionLifecycle` sub-flags.
14
+ * - the `tool.session.opened` / `tool.session.closed` payload $defs validate
15
+ * conforming content-free records and reject malformed ones (a `closed`
16
+ * missing `outcome`; an out-of-enum `outcome`), and both event names appear
17
+ * in the RunEventType enum.
18
+ *
19
+ * Behavioral assertions (a live `GET /v1/tools` returning authorization-scoped
20
+ * descriptors, the `404` non-disclosure, the `tool.session.*` bracket ordering)
21
+ * are gated on `capabilities.toolCatalog.supported` and land in
22
+ * `tool-catalog-projection.test.ts` + `tool-session-lifecycle.test.ts` (deferred
23
+ * per RFC 0078 §Conformance — reference host deferred). This scenario asserts the
24
+ * wire contract, not host behavior.
25
+ *
26
+ * Spec references:
27
+ * - https://github.com/openwop/openwop/blob/main/spec/v1/tool-catalog.md
28
+ * - https://github.com/openwop/openwop/blob/main/RFCS/0078-portable-tool-catalog-and-tool-session-contract.md
29
+ * - https://github.com/openwop/openwop/blob/main/RFCS/0069-exec-class-tool-host-extension-safety-contract.md (exec ⇒ host-extension)
30
+ */
31
+
32
+ import { describe, it, expect } from 'vitest';
33
+ import { readFileSync } from 'node:fs';
34
+ import { join } from 'node:path';
35
+ import Ajv2020 from 'ajv/dist/2020.js';
36
+ import addFormats from 'ajv-formats';
37
+ import { SCHEMAS_DIR } from '../lib/paths.js';
38
+
39
+ const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
40
+
41
+ function loadSchema(name: string): Record<string, unknown> {
42
+ return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
43
+ }
44
+
45
+ describe('tool-descriptor-shape: ToolDescriptor (RFC 0078 §C, server-free)', () => {
46
+ const ajv = addFormats(new Ajv2020({ strict: false }));
47
+ const validate = ajv.compile(loadSchema('tool-descriptor.schema.json'));
48
+
49
+ it('a conforming descriptor validates', () => {
50
+ expect(
51
+ validate({
52
+ toolId: 'mcp:fs.read', source: 'mcp', title: 'Read file',
53
+ inputSchema: { type: 'object' }, auth: { scopes: ['tools:fs:read'] },
54
+ egress: 'none', approval: 'never', replayPolicy: 'idempotent',
55
+ safetyTier: 'read', costHint: 'low', latencyHint: 'low',
56
+ }),
57
+ why('tool-catalog.md §C', 'a conforming ToolDescriptor MUST validate'),
58
+ ).toBe(true);
59
+ });
60
+
61
+ it('a descriptor missing the REQUIRED safetyTier is rejected', () => {
62
+ expect(
63
+ validate({ toolId: 'x', source: 'mcp' }),
64
+ why('tool-catalog.md §C', 'safetyTier is REQUIRED'),
65
+ ).toBe(false);
66
+ });
67
+
68
+ it('enforces exec ⇒ host-extension (RFC 0069; §C-1/§F-4)', () => {
69
+ expect(
70
+ validate({ toolId: 'x-host-acme-shell', source: 'host-extension', safetyTier: 'exec', approval: 'always', egress: 'host-owned' }),
71
+ why('tool-catalog.md §C-1', 'an exec tool sourced from host-extension MUST validate'),
72
+ ).toBe(true);
73
+ expect(
74
+ validate({ toolId: 'openwop:run-shell', source: 'node-pack', safetyTier: 'exec' }),
75
+ why('tool-catalog.md §C-1 / RFC 0069', 'an exec tool MUST NOT be protocol-tier (node-pack)'),
76
+ ).toBe(false);
77
+ });
78
+
79
+ it('rejects an unknown property (additionalProperties:false)', () => {
80
+ expect(
81
+ validate({ toolId: 'x', source: 'mcp', safetyTier: 'read', danger: true }),
82
+ why('tool-catalog.md §C', 'ToolDescriptor MUST be additionalProperties:false'),
83
+ ).toBe(false);
84
+ });
85
+ });
86
+
87
+ describe('tool-descriptor-shape: capability advertisement (RFC 0078 §A, server-free)', () => {
88
+ it('capabilities.toolCatalog is declared with its sub-flags', () => {
89
+ const caps = loadSchema('capabilities.schema.json');
90
+ const toolCatalog = (caps.properties as Record<string, { properties?: Record<string, unknown> }>).toolCatalog;
91
+ expect(
92
+ toolCatalog,
93
+ why('capabilities.md §toolCatalog', 'capabilities.toolCatalog MUST be declared'),
94
+ ).toBeDefined();
95
+ for (const flag of ['supported', 'sources', 'sessionLifecycle']) {
96
+ expect(
97
+ toolCatalog?.properties?.[flag],
98
+ why('tool-catalog.md §A', `capabilities.toolCatalog.${flag} MUST be declared`),
99
+ ).toBeDefined();
100
+ }
101
+ });
102
+ });
103
+
104
+ describe('tool-descriptor-shape: session lifecycle events (RFC 0078 §D, server-free)', () => {
105
+ const payloads = loadSchema('run-event-payloads.schema.json');
106
+ const ajv = addFormats(new Ajv2020({ strict: false }));
107
+ const compile = (defName: string) => ajv.compile({
108
+ $schema: 'https://json-schema.org/draft/2020-12/schema',
109
+ $defs: (payloads as { $defs: Record<string, unknown> }).$defs,
110
+ $ref: `#/$defs/${defName}`,
111
+ } as Record<string, unknown>);
112
+
113
+ it('tool.session.opened validates a content-free record', () => {
114
+ const v = compile('toolSessionOpened');
115
+ expect(v({ sessionId: 's1', toolId: 'mcp:fs.read' }), why('tool-catalog.md §D', 'opened MUST validate')).toBe(true);
116
+ expect(v({ toolId: 'mcp:fs.read' }), why('tool-catalog.md §D', 'opened requires sessionId')).toBe(false);
117
+ });
118
+
119
+ it('tool.session.closed validates + enforces the closed outcome enum', () => {
120
+ const v = compile('toolSessionClosed');
121
+ expect(v({ sessionId: 's1', toolId: 'mcp:fs.read', outcome: 'completed' }), why('tool-catalog.md §D', 'closed MUST validate')).toBe(true);
122
+ expect(v({ sessionId: 's1', toolId: 'mcp:fs.read' }), why('tool-catalog.md §D', 'closed requires outcome')).toBe(false);
123
+ expect(v({ sessionId: 's1', toolId: 'mcp:fs.read', outcome: 'exploded' }), why('tool-catalog.md §D', 'outcome is a closed enum')).toBe(false);
124
+ });
125
+
126
+ it('both session event names appear in the RunEventType enum', () => {
127
+ const runEvent = loadSchema('run-event.schema.json');
128
+ const enumVals = ((runEvent.$defs as Record<string, { enum?: string[] }>).RunEventType?.enum) ?? [];
129
+ for (const name of ['tool.session.opened', 'tool.session.closed']) {
130
+ expect(enumVals.includes(name), why('run-event.schema.json', `${name} MUST be in the RunEventType enum`)).toBe(true);
131
+ }
132
+ });
133
+ });