@openwop/openwop-conformance 1.10.0 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/README.md +2 -2
- package/api/asyncapi.yaml +70 -0
- package/api/openapi.yaml +268 -1
- package/coverage.md +30 -2
- package/fixtures/oauth-providers/synthetic.json +38 -0
- package/fixtures.md +10 -0
- package/package.json +1 -1
- package/schemas/README.md +12 -0
- package/schemas/agent-deployment-transition.schema.json +49 -0
- package/schemas/agent-deployment.schema.json +54 -0
- package/schemas/agent-eval-suite.schema.json +140 -0
- package/schemas/agent-inventory-response.schema.json +25 -0
- package/schemas/agent-manifest.schema.json +5 -0
- package/schemas/agent-org-chart.schema.json +82 -0
- package/schemas/agent-ref.schema.json +12 -2
- package/schemas/agent-roster-entry.schema.json +81 -0
- package/schemas/agent-roster-response.schema.json +21 -0
- package/schemas/budget-policy.schema.json +18 -0
- package/schemas/capabilities.schema.json +277 -0
- package/schemas/credential-provenance.schema.json +18 -0
- package/schemas/eval-summary.schema.json +92 -0
- package/schemas/node-pack-manifest.schema.json +17 -0
- package/schemas/org-chart-responsibility-view.schema.json +26 -0
- package/schemas/run-event-payloads.schema.json +286 -3
- package/schemas/run-event.schema.json +19 -0
- package/schemas/tool-descriptor.schema.json +63 -0
- package/schemas/trigger-subscription.schema.json +26 -0
- package/src/lib/agentRoster.ts +76 -0
- package/src/lib/liveRuntime.ts +59 -0
- package/src/lib/profiles.ts +157 -0
- package/src/lib/runtimeRequires.ts +38 -0
- package/src/lib/safeFetch.ts +87 -0
- package/src/scenarios/agent-deployment-shape.test.ts +139 -0
- package/src/scenarios/agent-eval-suite-shape.test.ts +167 -0
- package/src/scenarios/agent-live-allowlist-enforced.test.ts +53 -0
- package/src/scenarios/agent-live-invocation-bracket.test.ts +98 -0
- package/src/scenarios/agent-live-runtime-shape.test.ts +98 -0
- package/src/scenarios/agent-live-structured-output.test.ts +58 -0
- package/src/scenarios/agent-org-chart-shape.test.ts +127 -0
- package/src/scenarios/agent-platform-profile.test.ts +158 -0
- package/src/scenarios/agent-roster-attribution.test.ts +179 -0
- package/src/scenarios/agent-roster-shape.test.ts +146 -0
- package/src/scenarios/budget-policy-shape.test.ts +136 -0
- package/src/scenarios/egress-provenance-shape.test.ts +137 -0
- package/src/scenarios/memory-capability-model-shape.test.ts +186 -0
- package/src/scenarios/oauth-authorization-code-roundtrip.test.ts +145 -0
- package/src/scenarios/runtime-requires-install-gate.test.ts +92 -0
- package/src/scenarios/runtime-requires-shape.test.ts +134 -0
- package/src/scenarios/safefetch-behavior.test.ts +99 -0
- package/src/scenarios/safefetch-live-audit.test.ts +175 -0
- package/src/scenarios/spec-corpus-validity.test.ts +19 -3
- package/src/scenarios/tool-descriptor-shape.test.ts +133 -0
- package/src/scenarios/trigger-bridge-shape.test.ts +135 -0
- package/src/scenarios/x-openwop-form-pack-manifest.test.ts +155 -0
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pack runtime-requirements vocabulary + shape — `node-packs.md`
|
|
3
|
+
* §"Runtime platform requirements" + `schemas/node-pack-manifest.schema.json`
|
|
4
|
+
* `$defs/Runtime.requires` (RFC 0076 §A).
|
|
5
|
+
*
|
|
6
|
+
* Server-free schema-validation scenario. The `runtime.requires[]` field is an
|
|
7
|
+
* OPTIONAL, closed, runtime-agnostic vocabulary a pack uses to declare the
|
|
8
|
+
* platform primitives its code exercises, so a sandbox host can gate at install
|
|
9
|
+
* time instead of trial-load. This file exercises the schema layer (the §A
|
|
10
|
+
* "vocabulary-validation" normative behavior — a raw builtin name is rejected —
|
|
11
|
+
* plus the additive/empty-array shape contract):
|
|
12
|
+
*
|
|
13
|
+
* 1. Positive: a manifest declaring valid primitives validates cleanly.
|
|
14
|
+
* 2. Positive: the field is OPTIONAL — a manifest omitting it validates.
|
|
15
|
+
* 3. Positive: an empty array (`requires: []`) validates and is equivalent to
|
|
16
|
+
* omission (no host may read a distinct meaning into it; §A).
|
|
17
|
+
* 4. Positive: every one of the 8 vocabulary tokens individually validates.
|
|
18
|
+
* 5. Negative — raw builtin name: `"node:dns/promises"` (the value that
|
|
19
|
+
* motivated the abstract vocabulary) is rejected; the registry/host
|
|
20
|
+
* surfaces this as `invalid_manifest`.
|
|
21
|
+
* 6. Negative — duplicate token: `uniqueItems` is enforced.
|
|
22
|
+
*
|
|
23
|
+
* The install-time GATE behavior (grant / refuse → `pack_runtime_requirement_unmet`,
|
|
24
|
+
* and the non-sandbox-host SHOULD-projection) is host behavior and lives in the
|
|
25
|
+
* seam-gated `runtime-requires-install-gate.test.ts`.
|
|
26
|
+
*
|
|
27
|
+
* @see spec/v1/node-packs.md §"Runtime platform requirements"
|
|
28
|
+
* @see spec/v1/registry-operations.md §"Runtime-requirement install gate"
|
|
29
|
+
* @see schemas/node-pack-manifest.schema.json
|
|
30
|
+
* @see RFCS/0076-pack-runtime-requirements-and-host-safe-fetch.md
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
import { describe, it, expect } from 'vitest';
|
|
34
|
+
import { readFileSync, readdirSync } from 'node:fs';
|
|
35
|
+
import { join } from 'node:path';
|
|
36
|
+
import Ajv2020 from 'ajv/dist/2020.js';
|
|
37
|
+
import addFormats from 'ajv-formats';
|
|
38
|
+
import type { ErrorObject, ValidateFunction } from 'ajv';
|
|
39
|
+
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
40
|
+
|
|
41
|
+
const SCHEMA_PATH = join(SCHEMAS_DIR, 'node-pack-manifest.schema.json');
|
|
42
|
+
|
|
43
|
+
const VOCABULARY = [
|
|
44
|
+
'net.dns',
|
|
45
|
+
'net.outbound',
|
|
46
|
+
'crypto',
|
|
47
|
+
'subprocess',
|
|
48
|
+
'fs.read',
|
|
49
|
+
'fs.write',
|
|
50
|
+
'env.read',
|
|
51
|
+
'clock',
|
|
52
|
+
] as const;
|
|
53
|
+
|
|
54
|
+
function manifest(requires?: unknown) {
|
|
55
|
+
const runtime: Record<string, unknown> = { language: 'javascript', entry: 'index.mjs' };
|
|
56
|
+
if (requires !== undefined) runtime.requires = requires;
|
|
57
|
+
return {
|
|
58
|
+
name: 'vendor.example.http',
|
|
59
|
+
version: '1.0.0',
|
|
60
|
+
engines: { openwop: '>=1.1 <2.0.0' },
|
|
61
|
+
runtime,
|
|
62
|
+
nodes: [{ typeId: 'vendor.example.http.fetch', version: '1.0.0', category: 'integration', role: 'side-effect' }],
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
describe('category: runtime.requires vocabulary + shape (RFC 0076 §A)', () => {
|
|
67
|
+
const ajv = new Ajv2020({ allErrors: true, strict: false });
|
|
68
|
+
addFormats(ajv);
|
|
69
|
+
// Register every schema first so cross-$refs resolve (node-pack-manifest
|
|
70
|
+
// references agent-manifest.schema.json for its agents[] branch). addSchema
|
|
71
|
+
// registers without compiling; the target compiles below.
|
|
72
|
+
for (const file of readdirSync(SCHEMAS_DIR)) {
|
|
73
|
+
if (!file.endsWith('.schema.json')) continue;
|
|
74
|
+
try {
|
|
75
|
+
ajv.addSchema(JSON.parse(readFileSync(join(SCHEMAS_DIR, file), 'utf8')) as Record<string, unknown>);
|
|
76
|
+
} catch {
|
|
77
|
+
/* duplicate/already-registered — the target is compiled below */
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
const schema = JSON.parse(readFileSync(SCHEMA_PATH, 'utf8'));
|
|
81
|
+
const validate = (ajv.getSchema(schema['$id'] as string) ?? ajv.compile(schema)) as ValidateFunction;
|
|
82
|
+
|
|
83
|
+
const errorsOn = (m: unknown): ErrorObject[] => {
|
|
84
|
+
expect(validate(m)).toBe(false);
|
|
85
|
+
return validate.errors ?? [];
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
it('positive: a manifest declaring valid primitives validates cleanly', () => {
|
|
89
|
+
const ok = validate(manifest(['net.dns', 'net.outbound']));
|
|
90
|
+
expect(
|
|
91
|
+
ok,
|
|
92
|
+
`node-packs.md §"Runtime platform requirements": a well-formed runtime.requires MUST validate. Errors: ${JSON.stringify(validate.errors)}`,
|
|
93
|
+
).toBe(true);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it('positive: runtime.requires is OPTIONAL — a manifest omitting it validates (additive)', () => {
|
|
97
|
+
expect(
|
|
98
|
+
validate(manifest(undefined)),
|
|
99
|
+
'node-pack-manifest.schema.json: runtime.requires is additive/OPTIONAL — packs predating RFC 0076 validate unchanged',
|
|
100
|
+
).toBe(true);
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
it('positive: an empty requires[] validates (equivalent to omission per §A)', () => {
|
|
104
|
+
expect(
|
|
105
|
+
validate(manifest([])),
|
|
106
|
+
'node-packs.md §"Runtime platform requirements": runtime.requires:[] is valid and equivalent to omission',
|
|
107
|
+
).toBe(true);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
it('positive: every vocabulary token individually validates', () => {
|
|
111
|
+
for (const token of VOCABULARY) {
|
|
112
|
+
expect(
|
|
113
|
+
validate(manifest([token])),
|
|
114
|
+
`node-pack-manifest.schema.json: "${token}" is in the RFC 0076 §A vocabulary. Errors: ${JSON.stringify(validate.errors)}`,
|
|
115
|
+
).toBe(true);
|
|
116
|
+
}
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
it('negative: a raw builtin name (node:dns/promises) is rejected (→ invalid_manifest)', () => {
|
|
120
|
+
const errs = errorsOn(manifest(['node:dns/promises']));
|
|
121
|
+
expect(
|
|
122
|
+
errs.some((e) => e.instancePath.includes('/runtime/requires')),
|
|
123
|
+
'node-packs.md §"Runtime platform requirements": raw language builtin names are NOT in the closed vocabulary — the abstract net.dns is the portable equivalent; the registry/host surfaces this as invalid_manifest',
|
|
124
|
+
).toBe(true);
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it('negative: a duplicate token is rejected (uniqueItems)', () => {
|
|
128
|
+
const errs = errorsOn(manifest(['net.dns', 'net.dns']));
|
|
129
|
+
expect(
|
|
130
|
+
errs.some((e) => e.keyword === 'uniqueItems'),
|
|
131
|
+
'node-pack-manifest.schema.json: runtime.requires has uniqueItems:true',
|
|
132
|
+
).toBe(true);
|
|
133
|
+
});
|
|
134
|
+
});
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Host-provided safe-fetch behavior — `host-capabilities.md` §host.http
|
|
3
|
+
* (`ctx.http.safeFetch`) + RFC 0076 §B.
|
|
4
|
+
*
|
|
5
|
+
* Seam-gated behavioral scenarios for the pack-facing `ctx.http.safeFetch`. When
|
|
6
|
+
* a host advertises `capabilities.httpClient.safeFetch.supported`, the
|
|
7
|
+
* host-mediated fetch MUST apply the §host.http SSRF guard (resolve→pin→connect)
|
|
8
|
+
* so a pack can do outbound HTTP without reaching for `node:dns` / raw sockets:
|
|
9
|
+
*
|
|
10
|
+
* 1. SSRF block — a loopback / RFC 1918 / cloud-metadata target ⇒
|
|
11
|
+
* `{ outcome: "blocked", blocked: "ssrf" }`; the host MUST NOT connect.
|
|
12
|
+
* 2. DNS-rebinding — a public name re-resolving to a blocked address
|
|
13
|
+
* (`simulateRebindTo`) ⇒ also blocked (the resolved IP is pinned).
|
|
14
|
+
* 3. Connection-upgrade refusal — `Connection: upgrade` ⇒
|
|
15
|
+
* `{ outcome: "blocked", blocked: "upgrade" }` (no 101 socket-hijack escape).
|
|
16
|
+
* 4. Audit-when-both — when `toolHooks.prePostEvents` is also advertised, a
|
|
17
|
+
* fetched call emits the `agent.toolCalled` / `agent.toolReturned` pair
|
|
18
|
+
* (`transport: "http"`).
|
|
19
|
+
*
|
|
20
|
+
* All drive `POST /v1/host/sample/http/safe-fetch` and soft-skip when the host
|
|
21
|
+
* doesn't advertise `safeFetch` or doesn't wire the seam (404). Behavior grade
|
|
22
|
+
* is `host-pending` until a `safeFetch` host lights it up. The SSRF *guarantee*
|
|
23
|
+
* reuses the `http-client-ssrf-guard` SECURITY invariant — no new invariant.
|
|
24
|
+
*
|
|
25
|
+
* @see spec/v1/host-capabilities.md §host.http
|
|
26
|
+
* @see spec/v1/host-sample-test-seams.md §"Open seams"
|
|
27
|
+
* @see RFCS/0076-pack-runtime-requirements-and-host-safe-fetch.md §B
|
|
28
|
+
* @see SECURITY/invariants.yaml id: http-client-ssrf-guard
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import { describe, it, expect } from 'vitest';
|
|
32
|
+
import { driver } from '../lib/driver.js';
|
|
33
|
+
import { isSafeFetchSupported, isToolHookAuditOn, safeFetch } from '../lib/safeFetch.js';
|
|
34
|
+
|
|
35
|
+
describe('safefetch-behavior (RFC 0076 §B / §host.http)', () => {
|
|
36
|
+
it('blocks a metadata-endpoint target (SSRF guard)', async () => {
|
|
37
|
+
if (!(await isSafeFetchSupported())) return; // capability absent — soft-skip
|
|
38
|
+
const res = await safeFetch({ url: 'http://169.254.169.254/latest/meta-data/' });
|
|
39
|
+
if (res === null) return; // seam absent — soft-skip
|
|
40
|
+
expect(
|
|
41
|
+
res.outcome,
|
|
42
|
+
driver.describe('host-capabilities.md §host.http', 'safeFetch MUST NOT connect to a cloud-metadata address'),
|
|
43
|
+
).toBe('blocked');
|
|
44
|
+
expect(
|
|
45
|
+
res.blocked,
|
|
46
|
+
driver.describe('host-capabilities.md §host.http', 'a blocked SSRF target reports blocked:"ssrf" (http-client-ssrf-guard invariant)'),
|
|
47
|
+
).toBe('ssrf');
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it('blocks a loopback target (SSRF guard)', async () => {
|
|
51
|
+
if (!(await isSafeFetchSupported())) return;
|
|
52
|
+
const res = await safeFetch({ url: 'http://127.0.0.1:6379/' });
|
|
53
|
+
if (res === null) return;
|
|
54
|
+
expect(
|
|
55
|
+
res.outcome,
|
|
56
|
+
driver.describe('host-capabilities.md §host.http', 'safeFetch MUST NOT connect to loopback'),
|
|
57
|
+
).toBe('blocked');
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it('blocks DNS-rebinding (resolved IP is pinned for the connection)', async () => {
|
|
61
|
+
if (!(await isSafeFetchSupported())) return;
|
|
62
|
+
const res = await safeFetch({ url: 'http://example.com/', simulateRebindTo: '169.254.169.254' });
|
|
63
|
+
if (res === null) return;
|
|
64
|
+
expect(
|
|
65
|
+
res.outcome,
|
|
66
|
+
driver.describe('host-capabilities.md §host.http', 'a public name that re-resolves to a blocked address MUST be blocked (rebinding defeat)'),
|
|
67
|
+
).toBe('blocked');
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it('refuses a Connection: upgrade request (no 101 socket-hijack escape)', async () => {
|
|
71
|
+
if (!(await isSafeFetchSupported())) return;
|
|
72
|
+
const res = await safeFetch({ url: 'https://example.com/', init: { headers: { Connection: 'upgrade' } } });
|
|
73
|
+
if (res === null) return;
|
|
74
|
+
expect(
|
|
75
|
+
res.outcome,
|
|
76
|
+
driver.describe('host-capabilities.md §host.http', 'safeFetch MUST refuse a connection-upgrade attempt'),
|
|
77
|
+
).toBe('blocked');
|
|
78
|
+
expect(
|
|
79
|
+
res.blocked,
|
|
80
|
+
driver.describe('host-capabilities.md §host.http', 'a refused upgrade reports blocked:"upgrade"'),
|
|
81
|
+
).toBe('upgrade');
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
it('emits the tool-hooks audit pair when prePostEvents is also advertised', async () => {
|
|
85
|
+
if (!(await isSafeFetchSupported())) return;
|
|
86
|
+
if (!(await isToolHookAuditOn())) return; // audit MUST applies only when both advertised
|
|
87
|
+
const res = await safeFetch({ url: 'https://example.com/' });
|
|
88
|
+
if (res === null) return;
|
|
89
|
+
if (res.outcome !== 'fetched') return; // only a completed call carries the pair
|
|
90
|
+
expect(
|
|
91
|
+
res.toolCalled !== undefined && res.toolReturned !== undefined,
|
|
92
|
+
driver.describe('host-capabilities.md §host.http', 'when toolHooks.prePostEvents + safeFetch are both advertised, a safeFetch call MUST emit the agent.toolCalled/agent.toolReturned pair'),
|
|
93
|
+
).toBe(true);
|
|
94
|
+
expect(
|
|
95
|
+
(res.toolCalled as { transport?: string } | undefined)?.transport,
|
|
96
|
+
driver.describe('host-capabilities.md §host.http', 'the audit pair carries transport:"http"'),
|
|
97
|
+
).toBe('http');
|
|
98
|
+
});
|
|
99
|
+
});
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live-run safe-fetch audit emission — `host-capabilities.md` §host.http
|
|
3
|
+
* (`ctx.http.safeFetch`) + RFC 0076 §B + RFC 0064 §B.
|
|
4
|
+
*
|
|
5
|
+
* Closes the seam-vs-production gap left by `safefetch-behavior.test.ts`. That
|
|
6
|
+
* scenario drives `POST /v1/host/sample/http/safe-fetch` and reads the audit
|
|
7
|
+
* pair the SEAM returns INLINE — it never proves the *production* per-ctx
|
|
8
|
+
* `ctx.http.safeFetch` (the client injected into a real run) emits anything. A
|
|
9
|
+
* host can co-advertise `toolHooks.prePostEvents` + `httpClient.safeFetch`,
|
|
10
|
+
* pass the seam, and still ship a production `createSafeFetch()` with no audit
|
|
11
|
+
* hooks — the "quiet bypass" §host.http line "centralizing egress in the host
|
|
12
|
+
* must increase auditability, not become a quiet bypass" forbids.
|
|
13
|
+
*
|
|
14
|
+
* The normative MUST (host-capabilities.md §host.http; RFC 0076 §B):
|
|
15
|
+
* When `toolHooks.prePostEvents: true` AND `httpClient.safeFetch.supported:
|
|
16
|
+
* true` are BOTH advertised, the host MUST emit the `agent.toolCalled` /
|
|
17
|
+
* `agent.toolReturned` pair (`transport: "http"`) **for every `safeFetch`
|
|
18
|
+
* invocation** — including a *refused* one (a blocked egress attempt is
|
|
19
|
+
* exactly the security-relevant event the audit log must capture).
|
|
20
|
+
*
|
|
21
|
+
* This scenario verifies that MUST against the DURABLE run event log, not the
|
|
22
|
+
* seam's inline echo, and does so **without depending on outbound egress** so
|
|
23
|
+
* the bar can never pass vacuously:
|
|
24
|
+
* 1. EGRESS-FREE FLOOR (required): drive one `ctx.http.safeFetch` to a
|
|
25
|
+
* guaranteed-blocked link-local / cloud-metadata URL inside a REAL run via
|
|
26
|
+
* `POST /v1/host/sample/http/safe-fetch-run`. A conformant SSRF guard
|
|
27
|
+
* refuses it on every host with zero connectivity, yet the production
|
|
28
|
+
* injection + auditHooks path is still exercised, so the durable pair MUST
|
|
29
|
+
* be present. This removes the "no public egress ⇒ green-but-proves-nothing"
|
|
30
|
+
* hole that a `fetched`-only assertion left.
|
|
31
|
+
* 2. SUCCESS-PATH COVERAGE (best-effort): drive a public URL; when it actually
|
|
32
|
+
* `fetched`, assert the same durable pair (catches a host that audits only
|
|
33
|
+
* the reject path). Skipped — not failed — where the environment has no
|
|
34
|
+
* public egress; the floor already proved emission.
|
|
35
|
+
* 3. Read each run's persisted events via the test event-log seam
|
|
36
|
+
* (`GET /v1/host/sample/test/runs/:runId/events`) and assert a `callId`-
|
|
37
|
+
* paired `agent.toolCalled` (`transport:"http"`) / `agent.toolReturned`.
|
|
38
|
+
*
|
|
39
|
+
* Gating: `behaviorGate('openwop-safefetch-live-audit', <both flags>)` — NOT an
|
|
40
|
+
* inline soft-skip. So it skips-with-reason in default mode but FAILS under
|
|
41
|
+
* `OPENWOP_REQUIRE_BEHAVIOR=true` when a host advertises both flags yet does not
|
|
42
|
+
* emit. This is the RFC 0076 §B → Accepted bar a non-steward host validates
|
|
43
|
+
* against. The run seam itself (`safe-fetch-run`) is host-pending: a 404 from a
|
|
44
|
+
* not-yet-wired seam soft-skips even in strict mode (the seam is test-only
|
|
45
|
+
* infrastructure, distinct from the advertised production capability).
|
|
46
|
+
* The SSRF guarantee reuses the existing `http-client-ssrf-guard` invariant —
|
|
47
|
+
* no new SECURITY invariant; the audit MUST is RFC 0064's existing posture.
|
|
48
|
+
*
|
|
49
|
+
* @see spec/v1/host-capabilities.md §host.http
|
|
50
|
+
* @see spec/v1/host-sample-test-seams.md §"Open seams" (safe-fetch-run)
|
|
51
|
+
* @see RFCS/0076-pack-runtime-requirements-and-host-safe-fetch.md §B
|
|
52
|
+
* @see RFCS/0064-tool-invocation-hooks-and-authorization.md §B
|
|
53
|
+
*/
|
|
54
|
+
|
|
55
|
+
import { describe, it, expect } from 'vitest';
|
|
56
|
+
import { driver } from '../lib/driver.js';
|
|
57
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
58
|
+
import { isSafeFetchLiveAuditAdvertised, safeFetchViaRun } from '../lib/safeFetch.js';
|
|
59
|
+
import { queryTestEvents } from '../lib/event-log-query.js';
|
|
60
|
+
|
|
61
|
+
const PROFILE = 'openwop-safefetch-live-audit';
|
|
62
|
+
const CITE = 'host-capabilities.md §host.http';
|
|
63
|
+
|
|
64
|
+
// A link-local / cloud-metadata URL the SSRF guard MUST refuse — reachable on
|
|
65
|
+
// EVERY host regardless of outbound egress, so the durable-pair assertion never
|
|
66
|
+
// passes vacuously. Per §host.http the audit MUST is per-invocation: a *blocked*
|
|
67
|
+
// safeFetch still emits the agent.toolCalled/agent.toolReturned pair (the
|
|
68
|
+
// toolReturned carries the forbidden status). cf. `http-client-ssrf-guard`.
|
|
69
|
+
const BLOCKED_URL = 'http://169.254.169.254/latest/meta-data/';
|
|
70
|
+
// A public URL the guard SHOULD allow — best-effort coverage of the *success*
|
|
71
|
+
// path; skipped (not failed) where the environment has no public egress.
|
|
72
|
+
const FETCH_URL = 'https://example.com/';
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Read the durable run event log for `runId` and assert a `callId`-paired
|
|
76
|
+
* `agent.toolCalled` (`transport:"http"`) / `agent.toolReturned` exists, with
|
|
77
|
+
* the RFC 0002 §B causation chain tolerated when the host surfaces it. Returns
|
|
78
|
+
* `false` (caller treats as host-pending soft-skip) only when the event-log
|
|
79
|
+
* query seam is unavailable; otherwise asserts and returns `true`.
|
|
80
|
+
*/
|
|
81
|
+
async function assertDurableHttpPair(runId: string, label: string): Promise<boolean> {
|
|
82
|
+
const calledQ = await queryTestEvents(runId, { type: 'agent.toolCalled' });
|
|
83
|
+
const returnedQ = await queryTestEvents(runId, { type: 'agent.toolReturned' });
|
|
84
|
+
if (!calledQ.ok || !returnedQ.ok) {
|
|
85
|
+
// eslint-disable-next-line no-console
|
|
86
|
+
console.warn(`[${PROFILE}] event-log query seam unavailable; host-pending — skipping`);
|
|
87
|
+
return false;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// The HTTP-transport tool call: a durable agent.toolCalled with transport:"http".
|
|
91
|
+
const httpCall = calledQ.events.find((e) => (e.payload as { transport?: string }).transport === 'http');
|
|
92
|
+
expect(
|
|
93
|
+
httpCall !== undefined,
|
|
94
|
+
driver.describe(
|
|
95
|
+
CITE,
|
|
96
|
+
`(${label}) when toolHooks.prePostEvents + safeFetch are both advertised, a production ctx.http.safeFetch call MUST persist an agent.toolCalled with transport:"http" to the durable run event log (not just the seam echo), for EVERY invocation incl. blocked ones`,
|
|
97
|
+
),
|
|
98
|
+
).toBe(true);
|
|
99
|
+
if (!httpCall) return true;
|
|
100
|
+
|
|
101
|
+
const callId = (httpCall.payload as { callId?: string }).callId;
|
|
102
|
+
expect(
|
|
103
|
+
typeof callId === 'string' && callId.length > 0,
|
|
104
|
+
driver.describe(CITE, `(${label}) the persisted agent.toolCalled MUST carry the required callId (run-event-payloads.schema.json §agentToolCalled)`),
|
|
105
|
+
).toBe(true);
|
|
106
|
+
|
|
107
|
+
// The paired agent.toolReturned — matched by the required callId (RFC 0002 §B pairing).
|
|
108
|
+
const paired = returnedQ.events.find((e) => (e.payload as { callId?: string }).callId === callId);
|
|
109
|
+
expect(
|
|
110
|
+
paired !== undefined,
|
|
111
|
+
driver.describe(CITE, `(${label}) the agent.toolCalled MUST be followed by a callId-paired agent.toolReturned in the durable log (no quiet bypass)`),
|
|
112
|
+
).toBe(true);
|
|
113
|
+
|
|
114
|
+
// Stricter, when the host surfaces causation: RFC 0002 §B says
|
|
115
|
+
// toolReturned.causationId === the paired toolCalled.eventId. Tolerate
|
|
116
|
+
// hosts that omit causationId (callId pairing already proven above).
|
|
117
|
+
if (paired && typeof paired.causationId === 'string') {
|
|
118
|
+
expect(
|
|
119
|
+
paired.causationId,
|
|
120
|
+
driver.describe('RFC 0002 §B', 'agent.toolReturned.causationId MUST equal the paired agent.toolCalled.eventId when surfaced'),
|
|
121
|
+
).toBe(httpCall.eventId);
|
|
122
|
+
}
|
|
123
|
+
return true;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
describe('safefetch-live-audit (RFC 0076 §B / RFC 0064 §B — production path, durable log)', () => {
|
|
127
|
+
it('a BLOCKED real-run safeFetch emits the durable agent.toolCalled/agent.toolReturned pair (transport:"http") — egress-free floor', async () => {
|
|
128
|
+
const advertised = await isSafeFetchLiveAuditAdvertised();
|
|
129
|
+
if (!behaviorGate(PROFILE, advertised)) return; // default-skip; strict-fail when both flags advertised
|
|
130
|
+
|
|
131
|
+
// Run seam is host-pending infrastructure — soft-skip (even in strict mode)
|
|
132
|
+
// until a safeFetch host wires it. behaviorGate above already enforced the
|
|
133
|
+
// capability co-advertisement; this only gates on the test vehicle.
|
|
134
|
+
const run = await safeFetchViaRun({ url: BLOCKED_URL });
|
|
135
|
+
if (run === null) {
|
|
136
|
+
// eslint-disable-next-line no-console
|
|
137
|
+
console.warn(`[${PROFILE}] safe-fetch-run seam unwired (404); host-pending — skipping`);
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// The metadata IP MUST be refused by a conformant SSRF guard
|
|
142
|
+
// (http-client-ssrf.test.ts owns that contract). Regardless of the exact
|
|
143
|
+
// outcome, the production injection path ran, so the durable audit pair MUST
|
|
144
|
+
// exist — this is the egress-independent floor that makes the bar non-vacuous.
|
|
145
|
+
expect(
|
|
146
|
+
typeof run.runId === 'string' && (run.runId as string).length > 0,
|
|
147
|
+
driver.describe(CITE, 'the safe-fetch-run seam MUST return the runId of the real run it executed the safeFetch in'),
|
|
148
|
+
).toBe(true);
|
|
149
|
+
await assertDurableHttpPair(run.runId as string, 'blocked');
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
it('a FETCHED real-run safeFetch also emits the durable pair (success-path coverage — skipped without public egress)', async () => {
|
|
153
|
+
const advertised = await isSafeFetchLiveAuditAdvertised();
|
|
154
|
+
if (!behaviorGate(PROFILE, advertised)) return;
|
|
155
|
+
|
|
156
|
+
const run = await safeFetchViaRun({ url: FETCH_URL });
|
|
157
|
+
if (run === null) return; // seam unwired — already warned by the floor test
|
|
158
|
+
|
|
159
|
+
if (run.outcome !== 'fetched') {
|
|
160
|
+
// No public egress in this environment — the blocked-path floor already
|
|
161
|
+
// proved the production audit path emits. Skip success-path coverage
|
|
162
|
+
// rather than fail; this is coverage, not the floor.
|
|
163
|
+
// eslint-disable-next-line no-console
|
|
164
|
+
console.warn(
|
|
165
|
+
`[${PROFILE}] ${FETCH_URL} did not fetch (outcome=${run.outcome ?? 'n/a'}); no public egress — success-path coverage skipped (the blocked floor covers emission)`,
|
|
166
|
+
);
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
expect(
|
|
170
|
+
typeof run.runId === 'string' && (run.runId as string).length > 0,
|
|
171
|
+
driver.describe(CITE, 'the safe-fetch-run seam MUST return the runId of the real run it executed the fetch in'),
|
|
172
|
+
).toBe(true);
|
|
173
|
+
await assertDurableHttpPair(run.runId as string, 'fetched');
|
|
174
|
+
});
|
|
175
|
+
});
|
|
@@ -384,16 +384,32 @@ function extractReadmeDocumentIndex(readme: string): string {
|
|
|
384
384
|
return readme.slice(start, end);
|
|
385
385
|
}
|
|
386
386
|
|
|
387
|
-
function listMarkdownFilesRecursive(dir: string): string[] {
|
|
387
|
+
function listMarkdownFilesRecursive(dir: string, repoRoot: string = dir): string[] {
|
|
388
388
|
const ignoredDirs = new Set(['.git', 'node_modules', 'dist']);
|
|
389
|
+
// Repo-relative directory paths to prune. These are subtrees whose
|
|
390
|
+
// content shouldn't be link-checked because either (a) they're
|
|
391
|
+
// generated build output (`site/out`) or (b) they're a vendored
|
|
392
|
+
// mirror of a canonical source whose READMEs use links relative to
|
|
393
|
+
// the canonical path, not the vendored path:
|
|
394
|
+
//
|
|
395
|
+
// - `apps/workflow-engine/packs/` mirrors repo-root `packs/`, synced
|
|
396
|
+
// via `apps/workflow-engine/scripts/sync-packs.sh` so the Cloud
|
|
397
|
+
// Run image's `apps/workflow-engine/` build context can ship them.
|
|
398
|
+
// Pack READMEs use `../../RFCS/...` / `../../spec/v1/...` links
|
|
399
|
+
// that resolve from the canonical location (which this walker
|
|
400
|
+
// DOES check) but break from the deeper vendored path. The
|
|
401
|
+
// canonical copies are authoritative; the vendored copies are
|
|
402
|
+
// byte-for-byte identical via cp -R.
|
|
403
|
+
const prunedRepoRelative = new Set(['site/out', 'apps/workflow-engine/packs']);
|
|
389
404
|
const files: string[] = [];
|
|
390
405
|
|
|
391
406
|
for (const entry of readdirSync(dir, { withFileTypes: true })) {
|
|
392
407
|
if (entry.isDirectory()) {
|
|
393
408
|
if (ignoredDirs.has(entry.name)) continue;
|
|
394
409
|
const child = join(dir, entry.name);
|
|
395
|
-
|
|
396
|
-
|
|
410
|
+
const repoRelChild = relative(repoRoot, child);
|
|
411
|
+
if (prunedRepoRelative.has(repoRelChild)) continue;
|
|
412
|
+
files.push(...listMarkdownFilesRecursive(child, repoRoot));
|
|
397
413
|
continue;
|
|
398
414
|
}
|
|
399
415
|
if (entry.isFile() && entry.name.endsWith('.md')) {
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Portable tool catalog — descriptor + capability + session-event shapes (RFC 0078).
|
|
3
|
+
*
|
|
4
|
+
* Always-on, server-free schema-shape probe. Verifies that:
|
|
5
|
+
* - `tool-descriptor.schema.json` compiles and round-trips a conforming
|
|
6
|
+
* `ToolDescriptor`, and rejects a descriptor missing the REQUIRED
|
|
7
|
+
* `safetyTier`.
|
|
8
|
+
* - the §C-1 / §F-4 cross-field MUST is enforced: a `safetyTier: "exec"`
|
|
9
|
+
* descriptor MUST carry `source: "host-extension"` (RFC 0069 — exec is never
|
|
10
|
+
* protocol-tier); an `exec` + `node-pack` descriptor is rejected, an `exec`
|
|
11
|
+
* + `host-extension` descriptor is accepted.
|
|
12
|
+
* - `capabilities.toolCatalog` is declared with its `supported` / `sources` /
|
|
13
|
+
* `sessionLifecycle` sub-flags.
|
|
14
|
+
* - the `tool.session.opened` / `tool.session.closed` payload $defs validate
|
|
15
|
+
* conforming content-free records and reject malformed ones (a `closed`
|
|
16
|
+
* missing `outcome`; an out-of-enum `outcome`), and both event names appear
|
|
17
|
+
* in the RunEventType enum.
|
|
18
|
+
*
|
|
19
|
+
* Behavioral assertions (a live `GET /v1/tools` returning authorization-scoped
|
|
20
|
+
* descriptors, the `404` non-disclosure, the `tool.session.*` bracket ordering)
|
|
21
|
+
* are gated on `capabilities.toolCatalog.supported` and land in
|
|
22
|
+
* `tool-catalog-projection.test.ts` + `tool-session-lifecycle.test.ts` (deferred
|
|
23
|
+
* per RFC 0078 §Conformance — reference host deferred). This scenario asserts the
|
|
24
|
+
* wire contract, not host behavior.
|
|
25
|
+
*
|
|
26
|
+
* Spec references:
|
|
27
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/tool-catalog.md
|
|
28
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0078-portable-tool-catalog-and-tool-session-contract.md
|
|
29
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0069-exec-class-tool-host-extension-safety-contract.md (exec ⇒ host-extension)
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
import { describe, it, expect } from 'vitest';
|
|
33
|
+
import { readFileSync } from 'node:fs';
|
|
34
|
+
import { join } from 'node:path';
|
|
35
|
+
import Ajv2020 from 'ajv/dist/2020.js';
|
|
36
|
+
import addFormats from 'ajv-formats';
|
|
37
|
+
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
38
|
+
|
|
39
|
+
const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
|
|
40
|
+
|
|
41
|
+
function loadSchema(name: string): Record<string, unknown> {
|
|
42
|
+
return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
describe('tool-descriptor-shape: ToolDescriptor (RFC 0078 §C, server-free)', () => {
|
|
46
|
+
const ajv = addFormats(new Ajv2020({ strict: false }));
|
|
47
|
+
const validate = ajv.compile(loadSchema('tool-descriptor.schema.json'));
|
|
48
|
+
|
|
49
|
+
it('a conforming descriptor validates', () => {
|
|
50
|
+
expect(
|
|
51
|
+
validate({
|
|
52
|
+
toolId: 'mcp:fs.read', source: 'mcp', title: 'Read file',
|
|
53
|
+
inputSchema: { type: 'object' }, auth: { scopes: ['tools:fs:read'] },
|
|
54
|
+
egress: 'none', approval: 'never', replayPolicy: 'idempotent',
|
|
55
|
+
safetyTier: 'read', costHint: 'low', latencyHint: 'low',
|
|
56
|
+
}),
|
|
57
|
+
why('tool-catalog.md §C', 'a conforming ToolDescriptor MUST validate'),
|
|
58
|
+
).toBe(true);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it('a descriptor missing the REQUIRED safetyTier is rejected', () => {
|
|
62
|
+
expect(
|
|
63
|
+
validate({ toolId: 'x', source: 'mcp' }),
|
|
64
|
+
why('tool-catalog.md §C', 'safetyTier is REQUIRED'),
|
|
65
|
+
).toBe(false);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
it('enforces exec ⇒ host-extension (RFC 0069; §C-1/§F-4)', () => {
|
|
69
|
+
expect(
|
|
70
|
+
validate({ toolId: 'x-host-acme-shell', source: 'host-extension', safetyTier: 'exec', approval: 'always', egress: 'host-owned' }),
|
|
71
|
+
why('tool-catalog.md §C-1', 'an exec tool sourced from host-extension MUST validate'),
|
|
72
|
+
).toBe(true);
|
|
73
|
+
expect(
|
|
74
|
+
validate({ toolId: 'openwop:run-shell', source: 'node-pack', safetyTier: 'exec' }),
|
|
75
|
+
why('tool-catalog.md §C-1 / RFC 0069', 'an exec tool MUST NOT be protocol-tier (node-pack)'),
|
|
76
|
+
).toBe(false);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it('rejects an unknown property (additionalProperties:false)', () => {
|
|
80
|
+
expect(
|
|
81
|
+
validate({ toolId: 'x', source: 'mcp', safetyTier: 'read', danger: true }),
|
|
82
|
+
why('tool-catalog.md §C', 'ToolDescriptor MUST be additionalProperties:false'),
|
|
83
|
+
).toBe(false);
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
describe('tool-descriptor-shape: capability advertisement (RFC 0078 §A, server-free)', () => {
|
|
88
|
+
it('capabilities.toolCatalog is declared with its sub-flags', () => {
|
|
89
|
+
const caps = loadSchema('capabilities.schema.json');
|
|
90
|
+
const toolCatalog = (caps.properties as Record<string, { properties?: Record<string, unknown> }>).toolCatalog;
|
|
91
|
+
expect(
|
|
92
|
+
toolCatalog,
|
|
93
|
+
why('capabilities.md §toolCatalog', 'capabilities.toolCatalog MUST be declared'),
|
|
94
|
+
).toBeDefined();
|
|
95
|
+
for (const flag of ['supported', 'sources', 'sessionLifecycle']) {
|
|
96
|
+
expect(
|
|
97
|
+
toolCatalog?.properties?.[flag],
|
|
98
|
+
why('tool-catalog.md §A', `capabilities.toolCatalog.${flag} MUST be declared`),
|
|
99
|
+
).toBeDefined();
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
describe('tool-descriptor-shape: session lifecycle events (RFC 0078 §D, server-free)', () => {
|
|
105
|
+
const payloads = loadSchema('run-event-payloads.schema.json');
|
|
106
|
+
const ajv = addFormats(new Ajv2020({ strict: false }));
|
|
107
|
+
const compile = (defName: string) => ajv.compile({
|
|
108
|
+
$schema: 'https://json-schema.org/draft/2020-12/schema',
|
|
109
|
+
$defs: (payloads as { $defs: Record<string, unknown> }).$defs,
|
|
110
|
+
$ref: `#/$defs/${defName}`,
|
|
111
|
+
} as Record<string, unknown>);
|
|
112
|
+
|
|
113
|
+
it('tool.session.opened validates a content-free record', () => {
|
|
114
|
+
const v = compile('toolSessionOpened');
|
|
115
|
+
expect(v({ sessionId: 's1', toolId: 'mcp:fs.read' }), why('tool-catalog.md §D', 'opened MUST validate')).toBe(true);
|
|
116
|
+
expect(v({ toolId: 'mcp:fs.read' }), why('tool-catalog.md §D', 'opened requires sessionId')).toBe(false);
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
it('tool.session.closed validates + enforces the closed outcome enum', () => {
|
|
120
|
+
const v = compile('toolSessionClosed');
|
|
121
|
+
expect(v({ sessionId: 's1', toolId: 'mcp:fs.read', outcome: 'completed' }), why('tool-catalog.md §D', 'closed MUST validate')).toBe(true);
|
|
122
|
+
expect(v({ sessionId: 's1', toolId: 'mcp:fs.read' }), why('tool-catalog.md §D', 'closed requires outcome')).toBe(false);
|
|
123
|
+
expect(v({ sessionId: 's1', toolId: 'mcp:fs.read', outcome: 'exploded' }), why('tool-catalog.md §D', 'outcome is a closed enum')).toBe(false);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
it('both session event names appear in the RunEventType enum', () => {
|
|
127
|
+
const runEvent = loadSchema('run-event.schema.json');
|
|
128
|
+
const enumVals = ((runEvent.$defs as Record<string, { enum?: string[] }>).RunEventType?.enum) ?? [];
|
|
129
|
+
for (const name of ['tool.session.opened', 'tool.session.closed']) {
|
|
130
|
+
expect(enumVals.includes(name), why('run-event.schema.json', `${name} MUST be in the RunEventType enum`)).toBe(true);
|
|
131
|
+
}
|
|
132
|
+
});
|
|
133
|
+
});
|