@openwop/openwop-conformance 1.13.0 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/README.md +2 -2
- package/api/openapi.yaml +60 -0
- package/coverage.md +15 -4
- package/fixtures/wasm-sandbox/isolation-global.wasm +0 -0
- package/fixtures/wasm-sandbox/isolation-global.wat +6 -0
- package/fixtures/wasm-sandbox/misbehaving-capability-gate.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-capability-gate.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-env.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-env.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-fs.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-fs.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-memory.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-memory.wat +5 -0
- package/fixtures/wasm-sandbox/misbehaving-network.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-network.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-process.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-process.wat +4 -0
- package/fixtures/wasm-sandbox/misbehaving-timeout.wasm +0 -0
- package/fixtures/wasm-sandbox/misbehaving-timeout.wat +4 -0
- package/fixtures/wasm-sandbox/well-behaved-echo.wasm +0 -0
- package/fixtures/wasm-sandbox/well-behaved-echo.wat +2 -0
- package/fixtures/wasm-sandbox/well-behaved-host-fetch.wasm +0 -0
- package/fixtures/wasm-sandbox/well-behaved-host-fetch.wat +3 -0
- package/package.json +1 -1
- package/src/lib/discovery-capabilities.ts +18 -19
- package/src/lib/egressPolicy.ts +76 -0
- package/src/lib/otel-collector.ts +72 -0
- package/src/lib/profiles.ts +15 -0
- package/src/lib/sandbox-timeout-worker.mjs +31 -0
- package/src/lib/toolCatalog.ts +81 -0
- package/src/lib/wasm-sandbox-probe.ts +168 -0
- package/src/scenarios/core-standard-profile.test.ts +75 -0
- package/src/scenarios/egress-audience-binding.test.ts +81 -0
- package/src/scenarios/egress-decision-content-free.test.ts +57 -0
- package/src/scenarios/memory-degraded-projection.test.ts +121 -0
- package/src/scenarios/multi-agent-confidence-escalation.test.ts +12 -7
- package/src/scenarios/otel-collector-canary-inspection.test.ts +211 -0
- package/src/scenarios/prompt-resolution-chain-event.test.ts +113 -0
- package/src/scenarios/replay-observable-sequence-determinism.test.ts +192 -75
- package/src/scenarios/sandbox-wasm-isolation.test.ts +98 -0
- package/src/scenarios/sandbox-wasm-timeout.test.ts +40 -0
- package/src/scenarios/secret-leakage-otel-attribute.test.ts +52 -0
- package/src/scenarios/tool-catalog-projection.test.ts +120 -0
- package/src/scenarios/tool-session-lifecycle.test.ts +105 -0
- package/src/scenarios/workspace-cross-tenant-isolation-blackbox.test.ts +89 -0
|
@@ -1,16 +1,19 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Shared root
|
|
2
|
+
* Shared document-root reader for `/.well-known/openwop` capability families.
|
|
3
3
|
*
|
|
4
4
|
* Per `spec/v1/capabilities.md` §"Document-root layout" (RFC 0073), every
|
|
5
5
|
* capability family (`agents`, `secrets`, `aiProviders`, `auth`, `memory`,
|
|
6
6
|
* `multiAgent`, `authorization`, …) is a property of the discovery document
|
|
7
7
|
* ROOT — `capabilities.schema.json` defines no `capabilities` wrapper property.
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
8
|
+
* Root is the normative MUST, so the suite reads the ROOT ONLY: a host that
|
|
9
|
+
* serves families exclusively under a deprecated top-level `capabilities`
|
|
10
|
+
* wrapper is already non-conformant, and the suite grades it as such rather
|
|
11
|
+
* than tolerating the legacy shape. RFC 0073 Phase 4 dropped the
|
|
12
|
+
* wrapper-fallback the accessor previously carried through the v1.x migration
|
|
13
|
+
* window; the host-side mirror + the schema's `additionalProperties` tolerance
|
|
14
|
+
* retire together at v2.0 (they serve laggard *clients* reading discovery, not
|
|
15
|
+
* the host emission the suite grades). Standardizes the read so every helper
|
|
16
|
+
* reads the same way.
|
|
14
17
|
*
|
|
15
18
|
* @see spec/v1/capabilities.md §"Document-root layout"
|
|
16
19
|
* @see RFCS/0073-capability-document-root-layout.md
|
|
@@ -18,9 +21,10 @@
|
|
|
18
21
|
import { driver } from './driver.js';
|
|
19
22
|
|
|
20
23
|
/**
|
|
21
|
-
* Read one capability family from an already-fetched discovery doc
|
|
22
|
-
* root
|
|
23
|
-
* `
|
|
24
|
+
* Read one capability family from an already-fetched discovery doc at the
|
|
25
|
+
* document root. Returns `undefined` when the family is not a root property —
|
|
26
|
+
* a deprecated top-level `capabilities` wrapper is NOT consulted (root is the
|
|
27
|
+
* RFC 0073 MUST).
|
|
24
28
|
*/
|
|
25
29
|
export function capabilityFamily<T = Record<string, unknown>>(
|
|
26
30
|
doc: unknown,
|
|
@@ -28,18 +32,13 @@ export function capabilityFamily<T = Record<string, unknown>>(
|
|
|
28
32
|
): T | undefined {
|
|
29
33
|
if (!doc || typeof doc !== 'object') return undefined;
|
|
30
34
|
const root = (doc as Record<string, unknown>)[name];
|
|
31
|
-
|
|
32
|
-
const wrapper = (doc as { capabilities?: unknown }).capabilities;
|
|
33
|
-
if (wrapper && typeof wrapper === 'object') {
|
|
34
|
-
return (wrapper as Record<string, unknown>)[name] as T | undefined;
|
|
35
|
-
}
|
|
36
|
-
return undefined;
|
|
35
|
+
return root === undefined ? undefined : (root as T);
|
|
37
36
|
}
|
|
38
37
|
|
|
39
38
|
/**
|
|
40
|
-
* Fetch `/.well-known/openwop` and return one capability family
|
|
41
|
-
*
|
|
42
|
-
* advertise the family.
|
|
39
|
+
* Fetch `/.well-known/openwop` and return one capability family from the
|
|
40
|
+
* document root. `undefined` when the host returns non-200 or doesn't
|
|
41
|
+
* advertise the family at the root.
|
|
43
42
|
*/
|
|
44
43
|
export async function readCapabilityFamily<T = Record<string, unknown>>(
|
|
45
44
|
name: string,
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared helpers for the RFC 0079 `httpClient.egressPolicy` conformance
|
|
3
|
+
* scenarios. Lives in lib/ (not a `*.test.ts`) so scenarios import it via
|
|
4
|
+
* `../lib/egressPolicy.js`.
|
|
5
|
+
*
|
|
6
|
+
* Egress policy is a BEHAVIOR layered over the RFC 0076 `safeFetch` — there is
|
|
7
|
+
* no new normative read endpoint. The behavior is driven through the
|
|
8
|
+
* host-sample egress-decision seam (`POST /v1/host/sample/egress/decide`): a
|
|
9
|
+
* host-issued credential carries `audiences[]` (RFC 0079 §A provenance), and an
|
|
10
|
+
* egress whose destination is OUTSIDE those audiences MUST emit
|
|
11
|
+
* `egress.decided { decision: "denied"|"downgraded", reason: "out-of-audience" }`
|
|
12
|
+
* and MUST NOT attach the credential (the §C confused-deputy MUST, backing the
|
|
13
|
+
* `egress-credential-audience-bound` invariant). A provenance-unevaluable egress
|
|
14
|
+
* MUST be `denied { reason: "provenance-unevaluable" }` — fail-closed. The seam
|
|
15
|
+
* is OPTIONAL — scenarios soft-skip on 404/405.
|
|
16
|
+
*
|
|
17
|
+
* Gating uses the `httpClient.egressPolicy.supported` capability flag from the
|
|
18
|
+
* live discovery doc (root-first per RFC 0073).
|
|
19
|
+
*
|
|
20
|
+
* @see RFCS/0079-credential-provenance-and-egress-policy.md
|
|
21
|
+
* @see spec/v1/host-capabilities.md (§"Credential provenance + egress policy")
|
|
22
|
+
*/
|
|
23
|
+
import { driver } from './driver.js';
|
|
24
|
+
import { readCapabilityFamily } from './discovery-capabilities.js';
|
|
25
|
+
|
|
26
|
+
/** Reads `httpClient.egressPolicy` from discovery (root-first per RFC 0073);
|
|
27
|
+
* null when unadvertised. */
|
|
28
|
+
export async function readEgressPolicyCap(): Promise<Record<string, unknown> | null> {
|
|
29
|
+
const http = await readCapabilityFamily<{ egressPolicy?: unknown }>('httpClient');
|
|
30
|
+
const ep = http?.egressPolicy;
|
|
31
|
+
return ep && typeof ep === 'object' ? (ep as Record<string, unknown>) : null;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface EgressDecision {
|
|
35
|
+
decision?: string;
|
|
36
|
+
reason?: string;
|
|
37
|
+
destination?: string;
|
|
38
|
+
/** Whether the host-issued credential was attached to the egress (§C — MUST
|
|
39
|
+
* be false for an out-of-audience / unevaluable decision). */
|
|
40
|
+
credentialAttached?: boolean;
|
|
41
|
+
/** Set when the seam ran a canary credential and the canary leaked into any
|
|
42
|
+
* observable surface (the SR-1 negative — MUST stay false/absent). */
|
|
43
|
+
canaryLeaked?: boolean;
|
|
44
|
+
runId?: string;
|
|
45
|
+
[k: string]: unknown;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Drive one egress decision through the host-sample seam (RFC 0079 §C).
|
|
50
|
+
* `scenario`:
|
|
51
|
+
* - `out-of-audience` — credential bound to audience A, egress to B;
|
|
52
|
+
* MUST deny/downgrade + NOT attach the credential.
|
|
53
|
+
* - `provenance-unevaluable` — egress whose provenance can't be evaluated;
|
|
54
|
+
* MUST deny fail-closed.
|
|
55
|
+
* - `in-audience` — control: egress within audience; MAY allow.
|
|
56
|
+
* - `canary` — seed a credential whose value is a known canary
|
|
57
|
+
* and assert it never appears on the wire (SR-1).
|
|
58
|
+
* Returns null when the seam is unwired (404/405).
|
|
59
|
+
*/
|
|
60
|
+
export async function driveEgress(
|
|
61
|
+
body: { scenario: 'out-of-audience' | 'provenance-unevaluable' | 'in-audience' | 'canary' },
|
|
62
|
+
): Promise<EgressDecision | null> {
|
|
63
|
+
const res = await driver.post('/v1/host/sample/egress/decide', body);
|
|
64
|
+
if (res.status === 404 || res.status === 405) return null;
|
|
65
|
+
return (res.json as EgressDecision | undefined) ?? {};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/** The closed egress-decision vocabulary (RFC 0079 §B). */
|
|
69
|
+
export const EGRESS_DECISIONS = ['allowed', 'denied', 'downgraded', 'approval-required'];
|
|
70
|
+
/** The closed egress-reason vocabulary (RFC 0079 §B — a CLOSED enum so a host
|
|
71
|
+
* cannot spill a blocked URL/host/header into a free-form reason). */
|
|
72
|
+
export const EGRESS_REASONS = ['ok', 'out-of-audience', 'expired', 'ssrf-blocked', 'provenance-unevaluable', 'scope-denied', 'policy-denied'];
|
|
73
|
+
/** Content keys an `egress.decided` payload / provenance descriptor MUST NEVER
|
|
74
|
+
* carry (SR-1 / `egress-decision-no-secret-leak`): no secret value, no blocked
|
|
75
|
+
* URL/header spill. */
|
|
76
|
+
export const EGRESS_CONTENT_FORBIDDEN = ['secret', 'credential', 'credentials', 'token', 'apiKey', 'password', 'url', 'header', 'headers', 'body'];
|
|
@@ -83,6 +83,23 @@ export interface CapturedMetric {
|
|
|
83
83
|
};
|
|
84
84
|
}
|
|
85
85
|
|
|
86
|
+
/**
|
|
87
|
+
* One place where a canary string was found inside the captured OTLP
|
|
88
|
+
* export. Returned by `OtelCollector.findCanaryLeakage()` so a leak
|
|
89
|
+
* assertion can name the offending surface (which span, which attribute)
|
|
90
|
+
* rather than just `true`.
|
|
91
|
+
*/
|
|
92
|
+
export interface CanaryLeak {
|
|
93
|
+
/** Which captured surface leaked: a span field or a metric data point. */
|
|
94
|
+
readonly surface: 'span.name' | 'span.attribute' | 'span.resourceAttribute' | 'metric.attribute';
|
|
95
|
+
/** The span/metric name the leak was found under. */
|
|
96
|
+
readonly emitterName: string;
|
|
97
|
+
/** Attribute key when `surface` is an attribute; `undefined` for `span.name`. */
|
|
98
|
+
readonly key: string | undefined;
|
|
99
|
+
/** Stringified value (or the name itself) that contained the canary. */
|
|
100
|
+
readonly value: string;
|
|
101
|
+
}
|
|
102
|
+
|
|
86
103
|
/**
|
|
87
104
|
* Decode an OTLP attribute-value object into a primitive. Returns `null`
|
|
88
105
|
* when the value shape is unrecognized.
|
|
@@ -235,6 +252,61 @@ export class OtelCollector {
|
|
|
235
252
|
return this._spans.filter((s) => s.name === name);
|
|
236
253
|
}
|
|
237
254
|
|
|
255
|
+
/**
|
|
256
|
+
* Scan every captured span (name + attribute keys/values + resource
|
|
257
|
+
* attribute keys/values) and metric data-point attribute for the given
|
|
258
|
+
* canary substring, returning one `CanaryLeak` per hit.
|
|
259
|
+
*
|
|
260
|
+
* This is the collector-side complement to the host's
|
|
261
|
+
* `GET /v1/host/sample/test/otel/spans` scrape seam: the scrape seam
|
|
262
|
+
* reports what the host *says* it emitted; this method inspects what
|
|
263
|
+
* the host's OTLP exporter *actually shipped over the wire* to the
|
|
264
|
+
* collector. A host could redact in its scrape seam yet still leak on
|
|
265
|
+
* the real export — only collector-side inspection catches that, which
|
|
266
|
+
* is the gap `docs/KNOWN-LIMITS.md` tracked for
|
|
267
|
+
* `secret-leakage-otel-attribute` / `-debug-bundle-otel`.
|
|
268
|
+
*
|
|
269
|
+
* The match is a plain substring test (case-sensitive) so an attribute
|
|
270
|
+
* value that merely *embeds* the canary (e.g. inside a JSON blob or an
|
|
271
|
+
* error message) is still caught. Empty/whitespace canaries return no
|
|
272
|
+
* hits — a guard against vacuous "everything leaks" assertions.
|
|
273
|
+
*
|
|
274
|
+
* @see SECURITY/invariants.yaml secret-leakage-otel-attribute
|
|
275
|
+
* @see SECURITY/threat-model-secret-leakage.md
|
|
276
|
+
*/
|
|
277
|
+
findCanaryLeakage(canary: string): readonly CanaryLeak[] {
|
|
278
|
+
const hits: CanaryLeak[] = [];
|
|
279
|
+
if (canary.trim() === '') return hits;
|
|
280
|
+
const contains = (v: unknown): string | null => {
|
|
281
|
+
const s = typeof v === 'string' ? v : JSON.stringify(v);
|
|
282
|
+
return s !== undefined && s.includes(canary) ? s : null;
|
|
283
|
+
};
|
|
284
|
+
for (const sp of this._spans) {
|
|
285
|
+
if (sp.name.includes(canary)) {
|
|
286
|
+
hits.push({ surface: 'span.name', emitterName: sp.name, key: undefined, value: sp.name });
|
|
287
|
+
}
|
|
288
|
+
for (const [key, val] of sp.attributes) {
|
|
289
|
+
const m = contains(val) ?? (key.includes(canary) ? key : null);
|
|
290
|
+
if (m !== null) hits.push({ surface: 'span.attribute', emitterName: sp.name, key, value: m });
|
|
291
|
+
}
|
|
292
|
+
for (const [key, val] of sp.resourceAttributes) {
|
|
293
|
+
const m = contains(val) ?? (key.includes(canary) ? key : null);
|
|
294
|
+
if (m !== null) {
|
|
295
|
+
hits.push({ surface: 'span.resourceAttribute', emitterName: sp.name, key, value: m });
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
for (const metric of this._metrics) {
|
|
300
|
+
for (const [key, val] of metric.dataPoint.attributes) {
|
|
301
|
+
const m = contains(val) ?? (key.includes(canary) ? key : null);
|
|
302
|
+
if (m !== null) {
|
|
303
|
+
hits.push({ surface: 'metric.attribute', emitterName: metric.name, key, value: m });
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
return hits;
|
|
308
|
+
}
|
|
309
|
+
|
|
238
310
|
metrics(): readonly CapturedMetric[] {
|
|
239
311
|
return this._metrics;
|
|
240
312
|
}
|
package/src/lib/profiles.ts
CHANGED
|
@@ -362,6 +362,21 @@ export function agentPlatformSatisfiedTerms(c: DiscoveryPayload): readonly strin
|
|
|
362
362
|
return checks.filter(([, ok]) => ok).map(([id]) => id);
|
|
363
363
|
}
|
|
364
364
|
|
|
365
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
366
|
+
// `openwop-core-standard` operational-annex predicate (RFC 0088). Like the
|
|
367
|
+
// agent-platform annex above, this is NOT a closed-catalog profile (so it is
|
|
368
|
+
// absent from deriveProfiles) — it is an operational ANNEX whose claim is backed
|
|
369
|
+
// by the §C floor scenarios passing black-box. This helper computes only the §B
|
|
370
|
+
// discovery predicate (the floor of MUSTs with black-box production-path proof).
|
|
371
|
+
//
|
|
372
|
+
// @see spec/v1/core-standard-profile.md
|
|
373
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
374
|
+
|
|
375
|
+
/** The `openwop-core-standard` floor discovery predicate — RFC 0088 §B. */
|
|
376
|
+
export function isCoreStandard(c: DiscoveryPayload): boolean {
|
|
377
|
+
return isCore(c) && isInterrupts(c) && (isStreamSse(c) || isStreamPoll(c));
|
|
378
|
+
}
|
|
379
|
+
|
|
365
380
|
/**
|
|
366
381
|
* Derive the full profile set from a discovery payload.
|
|
367
382
|
*
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
// Suite-local worker for the RFC 0035 §B wall-clock-timeout conformance probe.
|
|
2
|
+
//
|
|
3
|
+
// Instantiates ONE WASM module on a dedicated worker thread and runs its entry.
|
|
4
|
+
// The main thread (see `probeTimeout` in wasm-sandbox-probe.ts) races a
|
|
5
|
+
// kill-timer against this worker: a non-terminating module (the
|
|
6
|
+
// `misbehaving-timeout` fixture) never posts and is terminated at the wall-clock
|
|
7
|
+
// cap → `sandbox_timeout`; a well-behaved module posts its result first. Mirrors
|
|
8
|
+
// the reference host's `examples/hosts/wasm-sandbox/src/sandbox-worker.mjs`; the
|
|
9
|
+
// suite carries its own copy so the published conformance package is
|
|
10
|
+
// self-contained (no dependency on the reference host).
|
|
11
|
+
import { workerData, parentPort } from 'node:worker_threads';
|
|
12
|
+
|
|
13
|
+
const { wasmBytes, entry, arg, memoryMaxPages } = workerData;
|
|
14
|
+
|
|
15
|
+
try {
|
|
16
|
+
const memory = new WebAssembly.Memory({ initial: 1, maximum: memoryMaxPages });
|
|
17
|
+
const instance = new WebAssembly.Instance(new WebAssembly.Module(wasmBytes), { env: { memory } });
|
|
18
|
+
const fn = instance.exports[entry];
|
|
19
|
+
if (typeof fn !== 'function') {
|
|
20
|
+
parentPort.postMessage({ ok: false, code: 'sandbox_invocation_error' });
|
|
21
|
+
} else {
|
|
22
|
+
const result = fn(arg); // a non-terminating module never returns — the host kill-timer fires
|
|
23
|
+
parentPort.postMessage({ ok: true, result: Number(result) });
|
|
24
|
+
}
|
|
25
|
+
} catch (err) {
|
|
26
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
27
|
+
const code = /out of bounds memory access|memory access out of bounds/i.test(message)
|
|
28
|
+
? 'sandbox_memory_exceeded'
|
|
29
|
+
: 'sandbox_invocation_error';
|
|
30
|
+
parentPort.postMessage({ ok: false, code });
|
|
31
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared helpers for the RFC 0078 `toolCatalog` conformance scenarios.
|
|
3
|
+
* Lives in lib/ (not a `*.test.ts`) so scenarios import it via
|
|
4
|
+
* `../lib/toolCatalog.js`.
|
|
5
|
+
*
|
|
6
|
+
* Two surfaces:
|
|
7
|
+
* - the NORMATIVE reads (`GET /v1/tools` + `GET /v1/tools/{toolId}`, RFC 0078
|
|
8
|
+
* §B), exercised black-box; and
|
|
9
|
+
* - the host-sample tool-session seam (`POST /v1/host/sample/tools/session-run`),
|
|
10
|
+
* used to drive the §D `tool.session.{opened,closed}` bracket over the RFC
|
|
11
|
+
* 0064 call events so the ordering + content-free guarantees can be asserted
|
|
12
|
+
* against the test event-log seam. The seam is OPTIONAL — scenarios soft-skip
|
|
13
|
+
* on 404/405 (the reference session lifecycle is deferred per RFC 0078
|
|
14
|
+
* §Conformance).
|
|
15
|
+
*
|
|
16
|
+
* Gating uses the `toolCatalog.supported` (and `toolCatalog.sessionLifecycle`)
|
|
17
|
+
* capability flags from the live discovery doc (root-first per RFC 0073).
|
|
18
|
+
*
|
|
19
|
+
* @see RFCS/0078-portable-tool-catalog-and-tool-session-contract.md
|
|
20
|
+
* @see spec/v1/tool-catalog.md
|
|
21
|
+
*/
|
|
22
|
+
import { driver } from './driver.js';
|
|
23
|
+
import { readCapabilityFamily } from './discovery-capabilities.js';
|
|
24
|
+
|
|
25
|
+
/** Reads `toolCatalog` from discovery (root-first per RFC 0073); null when
|
|
26
|
+
* unadvertised. */
|
|
27
|
+
export async function readToolCatalogCap(): Promise<Record<string, unknown> | null> {
|
|
28
|
+
const tc = await readCapabilityFamily<Record<string, unknown>>('toolCatalog');
|
|
29
|
+
return tc && typeof tc === 'object' ? tc : null;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface ToolDescriptor {
|
|
33
|
+
toolId?: string;
|
|
34
|
+
source?: string;
|
|
35
|
+
safetyTier?: string;
|
|
36
|
+
[k: string]: unknown;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** GET the NORMATIVE tool catalog (RFC 0078 §B `GET /v1/tools`); null when the
|
|
40
|
+
* host doesn't serve it (404/405/501). */
|
|
41
|
+
export async function listTools(): Promise<ToolDescriptor[] | null> {
|
|
42
|
+
const res = await driver.get('/v1/tools');
|
|
43
|
+
if (res.status === 404 || res.status === 405 || res.status === 501) return null;
|
|
44
|
+
return (res.json as ToolDescriptor[] | undefined) ?? [];
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/** GET one tool by id (RFC 0078 §B `GET /v1/tools/{toolId}`); returns
|
|
48
|
+
* `{ status, descriptor }` so a caller can distinguish a 404 (absent /
|
|
49
|
+
* unauthorized / unadvertised) from a served descriptor. */
|
|
50
|
+
export async function getTool(
|
|
51
|
+
toolId: string,
|
|
52
|
+
): Promise<{ status: number; descriptor: ToolDescriptor | undefined }> {
|
|
53
|
+
const res = await driver.get(`/v1/tools/${encodeURIComponent(toolId)}`);
|
|
54
|
+
return { status: res.status, descriptor: res.json as ToolDescriptor | undefined };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface ToolSessionResult {
|
|
58
|
+
runId?: string;
|
|
59
|
+
sessionId?: string;
|
|
60
|
+
toolId?: string;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/** Drive one tool-session interaction through the host-sample seam (RFC 0078
|
|
64
|
+
* §D). Persists `tool.session.opened` → RFC 0064 call events → `tool.session.closed`
|
|
65
|
+
* to the durable run-event log (read back via the run event-log read seam).
|
|
66
|
+
* Returns null when the seam is unwired (404/405). */
|
|
67
|
+
export async function driveToolSession(
|
|
68
|
+
body: { toolId?: string } = {},
|
|
69
|
+
): Promise<ToolSessionResult | null> {
|
|
70
|
+
const res = await driver.post('/v1/host/sample/tools/session-run', body);
|
|
71
|
+
if (res.status === 404 || res.status === 405) return null;
|
|
72
|
+
return (res.json as ToolSessionResult | undefined) ?? {};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/** The closed tool-source vocabulary (RFC 0078 §C). */
|
|
76
|
+
export const TOOL_SOURCES = ['node-pack', 'workflow', 'mcp', 'connector', 'host-extension'];
|
|
77
|
+
/** The closed safety-tier vocabulary (RFC 0078 §C). */
|
|
78
|
+
export const SAFETY_TIERS = ['pure', 'read', 'write', 'exec'];
|
|
79
|
+
/** Content keys a `ToolDescriptor` / `tool.session.*` MUST NEVER carry (SR-1):
|
|
80
|
+
* no credential/secret material. */
|
|
81
|
+
export const TOOL_CONTENT_FORBIDDEN = ['secret', 'credential', 'credentials', 'token', 'apiKey', 'password'];
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Portable WASM-sandbox probe — the suite-local reference for the RFC 0035 §B
|
|
3
|
+
* isolation invariants.
|
|
4
|
+
*
|
|
5
|
+
* The conformance suite is a standalone package, so it carries its own compact,
|
|
6
|
+
* server-free probe (no host, no worker) rather than importing a reference
|
|
7
|
+
* host's executor. It proves the invariants that hold *by construction* in any
|
|
8
|
+
* WebAssembly sandbox:
|
|
9
|
+
*
|
|
10
|
+
* - escape attempts (fs / env / network / process) and the capability gate are
|
|
11
|
+
* proven by STATIC inspection of `WebAssembly.Module.imports()` — a WASM
|
|
12
|
+
* module has no ambient host access, so a forbidden operation can only be a
|
|
13
|
+
* declared import; a sandbox refuses any import it did not grant, failing
|
|
14
|
+
* closed BEFORE instantiation.
|
|
15
|
+
* - the memory bound is proven by instantiating with a capped host memory and
|
|
16
|
+
* observing the engine trap on an access past the bound.
|
|
17
|
+
* - isolated-context is proven by instantiating the same module twice and
|
|
18
|
+
* observing no shared mutable state.
|
|
19
|
+
*
|
|
20
|
+
* The `timeout` invariant requires thread preemption (a worker kill-timer) and is
|
|
21
|
+
* proven at reference-impl tier by the WASM host's `test/sandbox.test.ts`; it is
|
|
22
|
+
* intentionally NOT exercised here (an in-process infinite loop cannot be
|
|
23
|
+
* interrupted server-free).
|
|
24
|
+
*
|
|
25
|
+
* @see RFCS/0035-sandbox-execution-contract.md §B
|
|
26
|
+
* @see examples/hosts/wasm-sandbox/ (the reference host this mirrors)
|
|
27
|
+
*/
|
|
28
|
+
import { Worker } from 'node:worker_threads';
|
|
29
|
+
import { fileURLToPath } from 'node:url';
|
|
30
|
+
|
|
31
|
+
export type SandboxErrorCode =
|
|
32
|
+
| 'sandbox_memory_exceeded'
|
|
33
|
+
| 'sandbox_timeout'
|
|
34
|
+
| 'sandbox_capability_denied'
|
|
35
|
+
| 'sandbox_escape_attempt'
|
|
36
|
+
| 'sandbox_invocation_error';
|
|
37
|
+
|
|
38
|
+
export type EscapeKind = 'host-fs-escape' | 'host-env-leak' | 'network-escape' | 'host-process-escape';
|
|
39
|
+
|
|
40
|
+
export interface ProbeResult {
|
|
41
|
+
readonly ok: boolean;
|
|
42
|
+
readonly result?: number;
|
|
43
|
+
readonly code?: SandboxErrorCode;
|
|
44
|
+
readonly escapeKind?: EscapeKind;
|
|
45
|
+
readonly requestedCapability?: string;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const WASM_PAGE_BYTES = 65536;
|
|
49
|
+
|
|
50
|
+
// Minimal local types for the Node-global `WebAssembly` value. The full
|
|
51
|
+
// `WebAssembly.*` namespace types live in lib.dom (not @types/node); rather than
|
|
52
|
+
// widen the suite's global lib (which would pull in conflicting DOM `fetch`/
|
|
53
|
+
// `BodyInit` types), we declare exactly what this probe uses and read the global.
|
|
54
|
+
interface WAModule {
|
|
55
|
+
readonly __wasmModule?: never;
|
|
56
|
+
}
|
|
57
|
+
interface WAInstance {
|
|
58
|
+
readonly exports: Record<string, unknown>;
|
|
59
|
+
}
|
|
60
|
+
interface WAImportDescriptor {
|
|
61
|
+
readonly module: string;
|
|
62
|
+
readonly name: string;
|
|
63
|
+
}
|
|
64
|
+
const WA = (globalThis as unknown as {
|
|
65
|
+
WebAssembly: {
|
|
66
|
+
Module: { new (bytes: Uint8Array): WAModule; imports(m: WAModule): readonly WAImportDescriptor[] };
|
|
67
|
+
Instance: { new (m: WAModule, imports: Record<string, Record<string, unknown>>): WAInstance };
|
|
68
|
+
Memory: { new (descriptor: { initial: number; maximum: number }): unknown };
|
|
69
|
+
};
|
|
70
|
+
}).WebAssembly;
|
|
71
|
+
|
|
72
|
+
function escapeKindFor(name: string): EscapeKind {
|
|
73
|
+
if (/^fd_|^path_/.test(name)) return 'host-fs-escape';
|
|
74
|
+
if (/^environ_/.test(name)) return 'host-env-leak';
|
|
75
|
+
if (/^sock_/.test(name)) return 'network-escape';
|
|
76
|
+
return 'host-process-escape';
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/** Static capability gate — the first un-granted import, or `null` if all are host-provided. */
|
|
80
|
+
function gateImports(module: WAModule, allowedHostCalls: readonly string[]): ProbeResult | null {
|
|
81
|
+
const allowed = new Set(allowedHostCalls);
|
|
82
|
+
for (const imp of WA.Module.imports(module)) {
|
|
83
|
+
if (imp.module === 'env' && imp.name === 'memory') continue;
|
|
84
|
+
if (imp.module === 'openwop') {
|
|
85
|
+
if (allowed.has(imp.name)) continue;
|
|
86
|
+
return { ok: false, code: 'sandbox_capability_denied', requestedCapability: imp.name };
|
|
87
|
+
}
|
|
88
|
+
return { ok: false, code: 'sandbox_escape_attempt', escapeKind: escapeKindFor(imp.name) };
|
|
89
|
+
}
|
|
90
|
+
return null;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Probe one WASM-compiled typeId under the RFC 0035 sandbox contract, server-free.
|
|
95
|
+
* Statically gates imports; for a fully-granted module, instantiates with a
|
|
96
|
+
* capped host memory and runs the entry, classifying any trap. Does NOT spawn a
|
|
97
|
+
* worker — callers MUST NOT pass a non-terminating module (see `timeout` note).
|
|
98
|
+
*/
|
|
99
|
+
export function probeSandboxed(
|
|
100
|
+
wasmBytes: Uint8Array,
|
|
101
|
+
config: { readonly allowedHostCalls: readonly string[]; readonly memoryLimitBytes: number },
|
|
102
|
+
entry = 'invoke',
|
|
103
|
+
arg = 0,
|
|
104
|
+
): ProbeResult {
|
|
105
|
+
let module: WAModule;
|
|
106
|
+
try {
|
|
107
|
+
module = new WA.Module(wasmBytes);
|
|
108
|
+
} catch {
|
|
109
|
+
return { ok: false, code: 'sandbox_invocation_error' };
|
|
110
|
+
}
|
|
111
|
+
const gate = gateImports(module, config.allowedHostCalls);
|
|
112
|
+
if (gate) return gate;
|
|
113
|
+
|
|
114
|
+
const memoryMaxPages = Math.max(1, Math.ceil(config.memoryLimitBytes / WASM_PAGE_BYTES));
|
|
115
|
+
try {
|
|
116
|
+
const memory = new WA.Memory({ initial: 1, maximum: memoryMaxPages });
|
|
117
|
+
const openwop: Record<string, (x: number) => number> = {};
|
|
118
|
+
for (const name of config.allowedHostCalls) openwop[name] = (x: number): number => x;
|
|
119
|
+
const instance = new WA.Instance(module, { env: { memory }, openwop });
|
|
120
|
+
const fn = instance.exports[entry];
|
|
121
|
+
if (typeof fn !== 'function') return { ok: false, code: 'sandbox_invocation_error' };
|
|
122
|
+
return { ok: true, result: Number((fn as (a: number) => number)(arg)) };
|
|
123
|
+
} catch (e) {
|
|
124
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
125
|
+
if (/out of bounds memory access|memory access out of bounds/i.test(message)) {
|
|
126
|
+
return { ok: false, code: 'sandbox_memory_exceeded' };
|
|
127
|
+
}
|
|
128
|
+
return { ok: false, code: 'sandbox_invocation_error' };
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const timeoutWorkerPath = fileURLToPath(new URL('./sandbox-timeout-worker.mjs', import.meta.url));
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Worker-based timeout probe — RFC 0035 §B invariant 6 (`node-pack-sandbox-timeout`).
|
|
136
|
+
* A wall-clock cap can only be enforced by THREAD PREEMPTION: a same-thread timer
|
|
137
|
+
* cannot interrupt a synchronous WASM loop. So this spawns a worker thread running
|
|
138
|
+
* the module and races a main-thread kill-timer. A non-terminating module →
|
|
139
|
+
* `sandbox_timeout` (the worker is terminated at `wallClockLimitMs`); a module that
|
|
140
|
+
* completes within the budget posts its result first. This is the worker-driven
|
|
141
|
+
* counterpart to the server-free `probeSandboxed` (which deliberately cannot run a
|
|
142
|
+
* non-terminating module).
|
|
143
|
+
*/
|
|
144
|
+
export function probeTimeout(
|
|
145
|
+
wasmBytes: Uint8Array,
|
|
146
|
+
config: { readonly memoryLimitBytes: number; readonly wallClockLimitMs: number },
|
|
147
|
+
entry = 'invoke',
|
|
148
|
+
arg = 0,
|
|
149
|
+
): Promise<ProbeResult> {
|
|
150
|
+
const memoryMaxPages = Math.max(1, Math.ceil(config.memoryLimitBytes / WASM_PAGE_BYTES));
|
|
151
|
+
return new Promise((resolve) => {
|
|
152
|
+
const worker = new Worker(timeoutWorkerPath, { workerData: { wasmBytes, entry, arg, memoryMaxPages } });
|
|
153
|
+
let settled = false;
|
|
154
|
+
const finish = (r: ProbeResult): void => {
|
|
155
|
+
if (settled) return;
|
|
156
|
+
settled = true;
|
|
157
|
+
clearTimeout(timer);
|
|
158
|
+
void worker.terminate();
|
|
159
|
+
resolve(r);
|
|
160
|
+
};
|
|
161
|
+
const timer = setTimeout(() => finish({ ok: false, code: 'sandbox_timeout' }), config.wallClockLimitMs);
|
|
162
|
+
worker.on('message', (m: { ok: boolean; result?: number; code?: SandboxErrorCode }) => {
|
|
163
|
+
if (m.ok) finish(m.result === undefined ? { ok: true } : { ok: true, result: m.result });
|
|
164
|
+
else finish({ ok: false, code: m.code ?? 'sandbox_invocation_error' });
|
|
165
|
+
});
|
|
166
|
+
worker.on('error', () => finish({ ok: false, code: 'sandbox_invocation_error' }));
|
|
167
|
+
});
|
|
168
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* openwop-core-standard — operational-annex predicate derivation (RFC 0088).
|
|
3
|
+
*
|
|
4
|
+
* Always-on, server-free derivation probe. Verifies that `isCoreStandard`
|
|
5
|
+
* derives the Core Standard Profile floor correctly from representative
|
|
6
|
+
* discovery payloads (RFC 0088 §B / core-standard-profile.md §B):
|
|
7
|
+
* - a host meeting openwop-core + openwop-interrupts + a transport is core-standard;
|
|
8
|
+
* - a bare openwop-core host (no interrupts) is NOT core-standard — the floor is
|
|
9
|
+
* deliberately stricter than the v1 minimum;
|
|
10
|
+
* - a host with no event transport (supportedTransports: []) fails the floor;
|
|
11
|
+
* - the floor is the AND of three existing closed-catalog predicates (it composes,
|
|
12
|
+
* it does not redefine — so it is absent from deriveProfiles()).
|
|
13
|
+
*
|
|
14
|
+
* The LIVE aggregate-evidence assertion (does every §C floor scenario actually
|
|
15
|
+
* pass against a host claiming the profile?) is the `Active → Accepted` step per
|
|
16
|
+
* RFC 0088 §C — already satisfied by MyndHyve + all reference hosts, asserted via
|
|
17
|
+
* each constituent scenario, and deferred here. This scenario asserts the
|
|
18
|
+
* discovery-predicate derivation only.
|
|
19
|
+
*
|
|
20
|
+
* Spec references:
|
|
21
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/core-standard-profile.md
|
|
22
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0088-core-standard-profile.md
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import { describe, it, expect } from 'vitest';
|
|
26
|
+
import { isCoreStandard, isCore, deriveProfiles } from '../lib/profiles.js';
|
|
27
|
+
|
|
28
|
+
const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
|
|
29
|
+
|
|
30
|
+
const CORE = {
|
|
31
|
+
protocolVersion: '1.0',
|
|
32
|
+
supportedEnvelopes: ['clarification.request'],
|
|
33
|
+
schemaVersions: {},
|
|
34
|
+
limits: { clarificationRounds: 1, schemaRounds: 1, envelopesPerTurn: 1 },
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
describe('core-standard-profile: floor predicate (RFC 0088 §B, server-free)', () => {
|
|
38
|
+
it('a host meeting core + interrupts + a default transport is core-standard', () => {
|
|
39
|
+
// No supportedTransports ⇒ both stream predicates default-true (profiles.md).
|
|
40
|
+
const c = { ...CORE };
|
|
41
|
+
expect(isCoreStandard(c), why('core-standard-profile.md §B', 'core + interrupts + transport ⇒ core-standard')).toBe(true);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it('a bare openwop-core host without interrupts is NOT core-standard', () => {
|
|
45
|
+
// openwop-core minimum, but no clarification.request ⇒ fails openwop-interrupts.
|
|
46
|
+
const c = { ...CORE, supportedEnvelopes: ['schema.request'] };
|
|
47
|
+
expect(isCore(c), why('profiles.md §openwop-core', 'still a valid openwop-core host')).toBe(true);
|
|
48
|
+
expect(isCoreStandard(c), why('core-standard-profile.md §B', 'the floor is stricter than the v1 minimum')).toBe(false);
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it('a host advertising no event transport fails the floor', () => {
|
|
52
|
+
const c = { ...CORE, supportedTransports: [] as string[] };
|
|
53
|
+
expect(isCoreStandard(c), why('core-standard-profile.md §B', 'at least one event transport is required')).toBe(false);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it('a host advertising the rest transport satisfies the transport term', () => {
|
|
57
|
+
const c = { ...CORE, supportedTransports: ['rest'] };
|
|
58
|
+
expect(isCoreStandard(c), why('core-standard-profile.md §B', 'rest transport ⇒ stream term satisfied')).toBe(true);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it('a non-1.x host is not core-standard', () => {
|
|
62
|
+
const c = { ...CORE, protocolVersion: '2.0' };
|
|
63
|
+
expect(isCoreStandard(c), why('profiles.md §openwop-core', 'core-standard implies openwop-core (1.x)')).toBe(false);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
describe('core-standard-profile: composes, does not redefine (RFC 0088 §A, server-free)', () => {
|
|
68
|
+
it('openwop-core-standard is an annex, NOT a closed-catalog profile (absent from deriveProfiles)', () => {
|
|
69
|
+
const c = { ...CORE };
|
|
70
|
+
expect(
|
|
71
|
+
(deriveProfiles(c) as readonly string[]).includes('openwop-core-standard'),
|
|
72
|
+
why('core-standard-profile.md §A', 'the annex is not a closed-catalog predicate'),
|
|
73
|
+
).toBe(false);
|
|
74
|
+
});
|
|
75
|
+
});
|