@openwop/openwop-conformance 1.13.0 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/README.md +2 -2
  3. package/api/openapi.yaml +60 -0
  4. package/coverage.md +15 -4
  5. package/fixtures/wasm-sandbox/isolation-global.wasm +0 -0
  6. package/fixtures/wasm-sandbox/isolation-global.wat +6 -0
  7. package/fixtures/wasm-sandbox/misbehaving-capability-gate.wasm +0 -0
  8. package/fixtures/wasm-sandbox/misbehaving-capability-gate.wat +4 -0
  9. package/fixtures/wasm-sandbox/misbehaving-env.wasm +0 -0
  10. package/fixtures/wasm-sandbox/misbehaving-env.wat +4 -0
  11. package/fixtures/wasm-sandbox/misbehaving-fs.wasm +0 -0
  12. package/fixtures/wasm-sandbox/misbehaving-fs.wat +4 -0
  13. package/fixtures/wasm-sandbox/misbehaving-memory.wasm +0 -0
  14. package/fixtures/wasm-sandbox/misbehaving-memory.wat +5 -0
  15. package/fixtures/wasm-sandbox/misbehaving-network.wasm +0 -0
  16. package/fixtures/wasm-sandbox/misbehaving-network.wat +4 -0
  17. package/fixtures/wasm-sandbox/misbehaving-process.wasm +0 -0
  18. package/fixtures/wasm-sandbox/misbehaving-process.wat +4 -0
  19. package/fixtures/wasm-sandbox/misbehaving-timeout.wasm +0 -0
  20. package/fixtures/wasm-sandbox/misbehaving-timeout.wat +4 -0
  21. package/fixtures/wasm-sandbox/well-behaved-echo.wasm +0 -0
  22. package/fixtures/wasm-sandbox/well-behaved-echo.wat +2 -0
  23. package/fixtures/wasm-sandbox/well-behaved-host-fetch.wasm +0 -0
  24. package/fixtures/wasm-sandbox/well-behaved-host-fetch.wat +3 -0
  25. package/package.json +1 -1
  26. package/src/lib/discovery-capabilities.ts +18 -19
  27. package/src/lib/egressPolicy.ts +76 -0
  28. package/src/lib/otel-collector.ts +72 -0
  29. package/src/lib/profiles.ts +15 -0
  30. package/src/lib/sandbox-timeout-worker.mjs +31 -0
  31. package/src/lib/toolCatalog.ts +81 -0
  32. package/src/lib/wasm-sandbox-probe.ts +168 -0
  33. package/src/scenarios/core-standard-profile.test.ts +75 -0
  34. package/src/scenarios/egress-audience-binding.test.ts +81 -0
  35. package/src/scenarios/egress-decision-content-free.test.ts +57 -0
  36. package/src/scenarios/memory-degraded-projection.test.ts +121 -0
  37. package/src/scenarios/multi-agent-confidence-escalation.test.ts +12 -7
  38. package/src/scenarios/otel-collector-canary-inspection.test.ts +211 -0
  39. package/src/scenarios/prompt-resolution-chain-event.test.ts +113 -0
  40. package/src/scenarios/replay-observable-sequence-determinism.test.ts +192 -75
  41. package/src/scenarios/sandbox-wasm-isolation.test.ts +98 -0
  42. package/src/scenarios/sandbox-wasm-timeout.test.ts +40 -0
  43. package/src/scenarios/secret-leakage-otel-attribute.test.ts +52 -0
  44. package/src/scenarios/tool-catalog-projection.test.ts +120 -0
  45. package/src/scenarios/tool-session-lifecycle.test.ts +105 -0
  46. package/src/scenarios/workspace-cross-tenant-isolation-blackbox.test.ts +89 -0
@@ -1,16 +1,19 @@
1
1
  /**
2
- * Shared root-first reader for `/.well-known/openwop` capability families.
2
+ * Shared document-root reader for `/.well-known/openwop` capability families.
3
3
  *
4
4
  * Per `spec/v1/capabilities.md` §"Document-root layout" (RFC 0073), every
5
5
  * capability family (`agents`, `secrets`, `aiProviders`, `auth`, `memory`,
6
6
  * `multiAgent`, `authorization`, …) is a property of the discovery document
7
7
  * ROOT — `capabilities.schema.json` defines no `capabilities` wrapper property.
8
- * A top-level `capabilities` object is a deprecated legacy shape; the suite
9
- * reads the root first and falls back to a `capabilities.*` wrapper only
10
- * through the v1.x migration window. Standardizes the ad-hoc dual-read that
11
- * already existed (e.g. `aiEnvelope.capBreached.test.ts`,
12
- * `ai-envelope-shape.test.ts`) into one accessor so every helper reads the
13
- * same way.
8
+ * Root is the normative MUST, so the suite reads the ROOT ONLY: a host that
9
+ * serves families exclusively under a deprecated top-level `capabilities`
10
+ * wrapper is already non-conformant, and the suite grades it as such rather
11
+ * than tolerating the legacy shape. RFC 0073 Phase 4 dropped the
12
+ * wrapper-fallback the accessor previously carried through the v1.x migration
13
+ * window; the host-side mirror + the schema's `additionalProperties` tolerance
14
+ * retire together at v2.0 (they serve laggard *clients* reading discovery, not
15
+ * the host emission the suite grades). Standardizes the read so every helper
16
+ * reads the same way.
14
17
  *
15
18
  * @see spec/v1/capabilities.md §"Document-root layout"
16
19
  * @see RFCS/0073-capability-document-root-layout.md
@@ -18,9 +21,10 @@
18
21
  import { driver } from './driver.js';
19
22
 
20
23
  /**
21
- * Read one capability family from an already-fetched discovery doc: document
22
- * root first, deprecated `capabilities.*` wrapper as a fallback. Returns
23
- * `undefined` when the family is advertised in neither location.
24
+ * Read one capability family from an already-fetched discovery doc at the
25
+ * document root. Returns `undefined` when the family is not a root property —
26
+ * a deprecated top-level `capabilities` wrapper is NOT consulted (root is the
27
+ * RFC 0073 MUST).
24
28
  */
25
29
  export function capabilityFamily<T = Record<string, unknown>>(
26
30
  doc: unknown,
@@ -28,18 +32,13 @@ export function capabilityFamily<T = Record<string, unknown>>(
28
32
  ): T | undefined {
29
33
  if (!doc || typeof doc !== 'object') return undefined;
30
34
  const root = (doc as Record<string, unknown>)[name];
31
- if (root !== undefined) return root as T;
32
- const wrapper = (doc as { capabilities?: unknown }).capabilities;
33
- if (wrapper && typeof wrapper === 'object') {
34
- return (wrapper as Record<string, unknown>)[name] as T | undefined;
35
- }
36
- return undefined;
35
+ return root === undefined ? undefined : (root as T);
37
36
  }
38
37
 
39
38
  /**
40
- * Fetch `/.well-known/openwop` and return one capability family (root-first,
41
- * wrapper-fallback). `undefined` when the host returns non-200 or doesn't
42
- * advertise the family.
39
+ * Fetch `/.well-known/openwop` and return one capability family from the
40
+ * document root. `undefined` when the host returns non-200 or doesn't
41
+ * advertise the family at the root.
43
42
  */
44
43
  export async function readCapabilityFamily<T = Record<string, unknown>>(
45
44
  name: string,
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Shared helpers for the RFC 0079 `httpClient.egressPolicy` conformance
3
+ * scenarios. Lives in lib/ (not a `*.test.ts`) so scenarios import it via
4
+ * `../lib/egressPolicy.js`.
5
+ *
6
+ * Egress policy is a BEHAVIOR layered over the RFC 0076 `safeFetch` — there is
7
+ * no new normative read endpoint. The behavior is driven through the
8
+ * host-sample egress-decision seam (`POST /v1/host/sample/egress/decide`): a
9
+ * host-issued credential carries `audiences[]` (RFC 0079 §A provenance), and an
10
+ * egress whose destination is OUTSIDE those audiences MUST emit
11
+ * `egress.decided { decision: "denied"|"downgraded", reason: "out-of-audience" }`
12
+ * and MUST NOT attach the credential (the §C confused-deputy MUST, backing the
13
+ * `egress-credential-audience-bound` invariant). A provenance-unevaluable egress
14
+ * MUST be `denied { reason: "provenance-unevaluable" }` — fail-closed. The seam
15
+ * is OPTIONAL — scenarios soft-skip on 404/405.
16
+ *
17
+ * Gating uses the `httpClient.egressPolicy.supported` capability flag from the
18
+ * live discovery doc (root-first per RFC 0073).
19
+ *
20
+ * @see RFCS/0079-credential-provenance-and-egress-policy.md
21
+ * @see spec/v1/host-capabilities.md (§"Credential provenance + egress policy")
22
+ */
23
+ import { driver } from './driver.js';
24
+ import { readCapabilityFamily } from './discovery-capabilities.js';
25
+
26
+ /** Reads `httpClient.egressPolicy` from discovery (root-first per RFC 0073);
27
+ * null when unadvertised. */
28
+ export async function readEgressPolicyCap(): Promise<Record<string, unknown> | null> {
29
+ const http = await readCapabilityFamily<{ egressPolicy?: unknown }>('httpClient');
30
+ const ep = http?.egressPolicy;
31
+ return ep && typeof ep === 'object' ? (ep as Record<string, unknown>) : null;
32
+ }
33
+
34
+ export interface EgressDecision {
35
+ decision?: string;
36
+ reason?: string;
37
+ destination?: string;
38
+ /** Whether the host-issued credential was attached to the egress (§C — MUST
39
+ * be false for an out-of-audience / unevaluable decision). */
40
+ credentialAttached?: boolean;
41
+ /** Set when the seam ran a canary credential and the canary leaked into any
42
+ * observable surface (the SR-1 negative — MUST stay false/absent). */
43
+ canaryLeaked?: boolean;
44
+ runId?: string;
45
+ [k: string]: unknown;
46
+ }
47
+
48
+ /**
49
+ * Drive one egress decision through the host-sample seam (RFC 0079 §C).
50
+ * `scenario`:
51
+ * - `out-of-audience` — credential bound to audience A, egress to B;
52
+ * MUST deny/downgrade + NOT attach the credential.
53
+ * - `provenance-unevaluable` — egress whose provenance can't be evaluated;
54
+ * MUST deny fail-closed.
55
+ * - `in-audience` — control: egress within audience; MAY allow.
56
+ * - `canary` — seed a credential whose value is a known canary
57
+ * and assert it never appears on the wire (SR-1).
58
+ * Returns null when the seam is unwired (404/405).
59
+ */
60
+ export async function driveEgress(
61
+ body: { scenario: 'out-of-audience' | 'provenance-unevaluable' | 'in-audience' | 'canary' },
62
+ ): Promise<EgressDecision | null> {
63
+ const res = await driver.post('/v1/host/sample/egress/decide', body);
64
+ if (res.status === 404 || res.status === 405) return null;
65
+ return (res.json as EgressDecision | undefined) ?? {};
66
+ }
67
+
68
+ /** The closed egress-decision vocabulary (RFC 0079 §B). */
69
+ export const EGRESS_DECISIONS = ['allowed', 'denied', 'downgraded', 'approval-required'];
70
+ /** The closed egress-reason vocabulary (RFC 0079 §B — a CLOSED enum so a host
71
+ * cannot spill a blocked URL/host/header into a free-form reason). */
72
+ export const EGRESS_REASONS = ['ok', 'out-of-audience', 'expired', 'ssrf-blocked', 'provenance-unevaluable', 'scope-denied', 'policy-denied'];
73
+ /** Content keys an `egress.decided` payload / provenance descriptor MUST NEVER
74
+ * carry (SR-1 / `egress-decision-no-secret-leak`): no secret value, no blocked
75
+ * URL/header spill. */
76
+ export const EGRESS_CONTENT_FORBIDDEN = ['secret', 'credential', 'credentials', 'token', 'apiKey', 'password', 'url', 'header', 'headers', 'body'];
@@ -83,6 +83,23 @@ export interface CapturedMetric {
83
83
  };
84
84
  }
85
85
 
86
+ /**
87
+ * One place where a canary string was found inside the captured OTLP
88
+ * export. Returned by `OtelCollector.findCanaryLeakage()` so a leak
89
+ * assertion can name the offending surface (which span, which attribute)
90
+ * rather than just `true`.
91
+ */
92
+ export interface CanaryLeak {
93
+ /** Which captured surface leaked: a span field or a metric data point. */
94
+ readonly surface: 'span.name' | 'span.attribute' | 'span.resourceAttribute' | 'metric.attribute';
95
+ /** The span/metric name the leak was found under. */
96
+ readonly emitterName: string;
97
+ /** Attribute key when `surface` is an attribute; `undefined` for `span.name`. */
98
+ readonly key: string | undefined;
99
+ /** Stringified value (or the name itself) that contained the canary. */
100
+ readonly value: string;
101
+ }
102
+
86
103
  /**
87
104
  * Decode an OTLP attribute-value object into a primitive. Returns `null`
88
105
  * when the value shape is unrecognized.
@@ -235,6 +252,61 @@ export class OtelCollector {
235
252
  return this._spans.filter((s) => s.name === name);
236
253
  }
237
254
 
255
+ /**
256
+ * Scan every captured span (name + attribute keys/values + resource
257
+ * attribute keys/values) and metric data-point attribute for the given
258
+ * canary substring, returning one `CanaryLeak` per hit.
259
+ *
260
+ * This is the collector-side complement to the host's
261
+ * `GET /v1/host/sample/test/otel/spans` scrape seam: the scrape seam
262
+ * reports what the host *says* it emitted; this method inspects what
263
+ * the host's OTLP exporter *actually shipped over the wire* to the
264
+ * collector. A host could redact in its scrape seam yet still leak on
265
+ * the real export — only collector-side inspection catches that, which
266
+ * is the gap `docs/KNOWN-LIMITS.md` tracked for
267
+ * `secret-leakage-otel-attribute` / `-debug-bundle-otel`.
268
+ *
269
+ * The match is a plain substring test (case-sensitive) so an attribute
270
+ * value that merely *embeds* the canary (e.g. inside a JSON blob or an
271
+ * error message) is still caught. Empty/whitespace canaries return no
272
+ * hits — a guard against vacuous "everything leaks" assertions.
273
+ *
274
+ * @see SECURITY/invariants.yaml secret-leakage-otel-attribute
275
+ * @see SECURITY/threat-model-secret-leakage.md
276
+ */
277
+ findCanaryLeakage(canary: string): readonly CanaryLeak[] {
278
+ const hits: CanaryLeak[] = [];
279
+ if (canary.trim() === '') return hits;
280
+ const contains = (v: unknown): string | null => {
281
+ const s = typeof v === 'string' ? v : JSON.stringify(v);
282
+ return s !== undefined && s.includes(canary) ? s : null;
283
+ };
284
+ for (const sp of this._spans) {
285
+ if (sp.name.includes(canary)) {
286
+ hits.push({ surface: 'span.name', emitterName: sp.name, key: undefined, value: sp.name });
287
+ }
288
+ for (const [key, val] of sp.attributes) {
289
+ const m = contains(val) ?? (key.includes(canary) ? key : null);
290
+ if (m !== null) hits.push({ surface: 'span.attribute', emitterName: sp.name, key, value: m });
291
+ }
292
+ for (const [key, val] of sp.resourceAttributes) {
293
+ const m = contains(val) ?? (key.includes(canary) ? key : null);
294
+ if (m !== null) {
295
+ hits.push({ surface: 'span.resourceAttribute', emitterName: sp.name, key, value: m });
296
+ }
297
+ }
298
+ }
299
+ for (const metric of this._metrics) {
300
+ for (const [key, val] of metric.dataPoint.attributes) {
301
+ const m = contains(val) ?? (key.includes(canary) ? key : null);
302
+ if (m !== null) {
303
+ hits.push({ surface: 'metric.attribute', emitterName: metric.name, key, value: m });
304
+ }
305
+ }
306
+ }
307
+ return hits;
308
+ }
309
+
238
310
  metrics(): readonly CapturedMetric[] {
239
311
  return this._metrics;
240
312
  }
@@ -362,6 +362,21 @@ export function agentPlatformSatisfiedTerms(c: DiscoveryPayload): readonly strin
362
362
  return checks.filter(([, ok]) => ok).map(([id]) => id);
363
363
  }
364
364
 
365
+ // ─────────────────────────────────────────────────────────────────────────────
366
+ // `openwop-core-standard` operational-annex predicate (RFC 0088). Like the
367
+ // agent-platform annex above, this is NOT a closed-catalog profile (so it is
368
+ // absent from deriveProfiles) — it is an operational ANNEX whose claim is backed
369
+ // by the §C floor scenarios passing black-box. This helper computes only the §B
370
+ // discovery predicate (the floor of MUSTs with black-box production-path proof).
371
+ //
372
+ // @see spec/v1/core-standard-profile.md
373
+ // ─────────────────────────────────────────────────────────────────────────────
374
+
375
+ /** The `openwop-core-standard` floor discovery predicate — RFC 0088 §B. */
376
+ export function isCoreStandard(c: DiscoveryPayload): boolean {
377
+ return isCore(c) && isInterrupts(c) && (isStreamSse(c) || isStreamPoll(c));
378
+ }
379
+
365
380
  /**
366
381
  * Derive the full profile set from a discovery payload.
367
382
  *
@@ -0,0 +1,31 @@
1
+ // Suite-local worker for the RFC 0035 §B wall-clock-timeout conformance probe.
2
+ //
3
+ // Instantiates ONE WASM module on a dedicated worker thread and runs its entry.
4
+ // The main thread (see `probeTimeout` in wasm-sandbox-probe.ts) races a
5
+ // kill-timer against this worker: a non-terminating module (the
6
+ // `misbehaving-timeout` fixture) never posts and is terminated at the wall-clock
7
+ // cap → `sandbox_timeout`; a well-behaved module posts its result first. Mirrors
8
+ // the reference host's `examples/hosts/wasm-sandbox/src/sandbox-worker.mjs`; the
9
+ // suite carries its own copy so the published conformance package is
10
+ // self-contained (no dependency on the reference host).
11
+ import { workerData, parentPort } from 'node:worker_threads';
12
+
13
+ const { wasmBytes, entry, arg, memoryMaxPages } = workerData;
14
+
15
+ try {
16
+ const memory = new WebAssembly.Memory({ initial: 1, maximum: memoryMaxPages });
17
+ const instance = new WebAssembly.Instance(new WebAssembly.Module(wasmBytes), { env: { memory } });
18
+ const fn = instance.exports[entry];
19
+ if (typeof fn !== 'function') {
20
+ parentPort.postMessage({ ok: false, code: 'sandbox_invocation_error' });
21
+ } else {
22
+ const result = fn(arg); // a non-terminating module never returns — the host kill-timer fires
23
+ parentPort.postMessage({ ok: true, result: Number(result) });
24
+ }
25
+ } catch (err) {
26
+ const message = err instanceof Error ? err.message : String(err);
27
+ const code = /out of bounds memory access|memory access out of bounds/i.test(message)
28
+ ? 'sandbox_memory_exceeded'
29
+ : 'sandbox_invocation_error';
30
+ parentPort.postMessage({ ok: false, code });
31
+ }
@@ -0,0 +1,81 @@
1
+ /**
2
+ * Shared helpers for the RFC 0078 `toolCatalog` conformance scenarios.
3
+ * Lives in lib/ (not a `*.test.ts`) so scenarios import it via
4
+ * `../lib/toolCatalog.js`.
5
+ *
6
+ * Two surfaces:
7
+ * - the NORMATIVE reads (`GET /v1/tools` + `GET /v1/tools/{toolId}`, RFC 0078
8
+ * §B), exercised black-box; and
9
+ * - the host-sample tool-session seam (`POST /v1/host/sample/tools/session-run`),
10
+ * used to drive the §D `tool.session.{opened,closed}` bracket over the RFC
11
+ * 0064 call events so the ordering + content-free guarantees can be asserted
12
+ * against the test event-log seam. The seam is OPTIONAL — scenarios soft-skip
13
+ * on 404/405 (the reference session lifecycle is deferred per RFC 0078
14
+ * §Conformance).
15
+ *
16
+ * Gating uses the `toolCatalog.supported` (and `toolCatalog.sessionLifecycle`)
17
+ * capability flags from the live discovery doc (root-first per RFC 0073).
18
+ *
19
+ * @see RFCS/0078-portable-tool-catalog-and-tool-session-contract.md
20
+ * @see spec/v1/tool-catalog.md
21
+ */
22
+ import { driver } from './driver.js';
23
+ import { readCapabilityFamily } from './discovery-capabilities.js';
24
+
25
+ /** Reads `toolCatalog` from discovery (root-first per RFC 0073); null when
26
+ * unadvertised. */
27
+ export async function readToolCatalogCap(): Promise<Record<string, unknown> | null> {
28
+ const tc = await readCapabilityFamily<Record<string, unknown>>('toolCatalog');
29
+ return tc && typeof tc === 'object' ? tc : null;
30
+ }
31
+
32
+ export interface ToolDescriptor {
33
+ toolId?: string;
34
+ source?: string;
35
+ safetyTier?: string;
36
+ [k: string]: unknown;
37
+ }
38
+
39
+ /** GET the NORMATIVE tool catalog (RFC 0078 §B `GET /v1/tools`); null when the
40
+ * host doesn't serve it (404/405/501). */
41
+ export async function listTools(): Promise<ToolDescriptor[] | null> {
42
+ const res = await driver.get('/v1/tools');
43
+ if (res.status === 404 || res.status === 405 || res.status === 501) return null;
44
+ return (res.json as ToolDescriptor[] | undefined) ?? [];
45
+ }
46
+
47
+ /** GET one tool by id (RFC 0078 §B `GET /v1/tools/{toolId}`); returns
48
+ * `{ status, descriptor }` so a caller can distinguish a 404 (absent /
49
+ * unauthorized / unadvertised) from a served descriptor. */
50
+ export async function getTool(
51
+ toolId: string,
52
+ ): Promise<{ status: number; descriptor: ToolDescriptor | undefined }> {
53
+ const res = await driver.get(`/v1/tools/${encodeURIComponent(toolId)}`);
54
+ return { status: res.status, descriptor: res.json as ToolDescriptor | undefined };
55
+ }
56
+
57
+ export interface ToolSessionResult {
58
+ runId?: string;
59
+ sessionId?: string;
60
+ toolId?: string;
61
+ }
62
+
63
+ /** Drive one tool-session interaction through the host-sample seam (RFC 0078
64
+ * §D). Persists `tool.session.opened` → RFC 0064 call events → `tool.session.closed`
65
+ * to the durable run-event log (read back via the run event-log read seam).
66
+ * Returns null when the seam is unwired (404/405). */
67
+ export async function driveToolSession(
68
+ body: { toolId?: string } = {},
69
+ ): Promise<ToolSessionResult | null> {
70
+ const res = await driver.post('/v1/host/sample/tools/session-run', body);
71
+ if (res.status === 404 || res.status === 405) return null;
72
+ return (res.json as ToolSessionResult | undefined) ?? {};
73
+ }
74
+
75
+ /** The closed tool-source vocabulary (RFC 0078 §C). */
76
+ export const TOOL_SOURCES = ['node-pack', 'workflow', 'mcp', 'connector', 'host-extension'];
77
+ /** The closed safety-tier vocabulary (RFC 0078 §C). */
78
+ export const SAFETY_TIERS = ['pure', 'read', 'write', 'exec'];
79
+ /** Content keys a `ToolDescriptor` / `tool.session.*` MUST NEVER carry (SR-1):
80
+ * no credential/secret material. */
81
+ export const TOOL_CONTENT_FORBIDDEN = ['secret', 'credential', 'credentials', 'token', 'apiKey', 'password'];
@@ -0,0 +1,168 @@
1
+ /**
2
+ * Portable WASM-sandbox probe — the suite-local reference for the RFC 0035 §B
3
+ * isolation invariants.
4
+ *
5
+ * The conformance suite is a standalone package, so it carries its own compact,
6
+ * server-free probe (no host, no worker) rather than importing a reference
7
+ * host's executor. It proves the invariants that hold *by construction* in any
8
+ * WebAssembly sandbox:
9
+ *
10
+ * - escape attempts (fs / env / network / process) and the capability gate are
11
+ * proven by STATIC inspection of `WebAssembly.Module.imports()` — a WASM
12
+ * module has no ambient host access, so a forbidden operation can only be a
13
+ * declared import; a sandbox refuses any import it did not grant, failing
14
+ * closed BEFORE instantiation.
15
+ * - the memory bound is proven by instantiating with a capped host memory and
16
+ * observing the engine trap on an access past the bound.
17
+ * - isolated-context is proven by instantiating the same module twice and
18
+ * observing no shared mutable state.
19
+ *
20
+ * The `timeout` invariant requires thread preemption (a worker kill-timer) and is
21
+ * proven at reference-impl tier by the WASM host's `test/sandbox.test.ts`; it is
22
+ * intentionally NOT exercised here (an in-process infinite loop cannot be
23
+ * interrupted server-free).
24
+ *
25
+ * @see RFCS/0035-sandbox-execution-contract.md §B
26
+ * @see examples/hosts/wasm-sandbox/ (the reference host this mirrors)
27
+ */
28
+ import { Worker } from 'node:worker_threads';
29
+ import { fileURLToPath } from 'node:url';
30
+
31
+ export type SandboxErrorCode =
32
+ | 'sandbox_memory_exceeded'
33
+ | 'sandbox_timeout'
34
+ | 'sandbox_capability_denied'
35
+ | 'sandbox_escape_attempt'
36
+ | 'sandbox_invocation_error';
37
+
38
+ export type EscapeKind = 'host-fs-escape' | 'host-env-leak' | 'network-escape' | 'host-process-escape';
39
+
40
+ export interface ProbeResult {
41
+ readonly ok: boolean;
42
+ readonly result?: number;
43
+ readonly code?: SandboxErrorCode;
44
+ readonly escapeKind?: EscapeKind;
45
+ readonly requestedCapability?: string;
46
+ }
47
+
48
+ const WASM_PAGE_BYTES = 65536;
49
+
50
+ // Minimal local types for the Node-global `WebAssembly` value. The full
51
+ // `WebAssembly.*` namespace types live in lib.dom (not @types/node); rather than
52
+ // widen the suite's global lib (which would pull in conflicting DOM `fetch`/
53
+ // `BodyInit` types), we declare exactly what this probe uses and read the global.
54
+ interface WAModule {
55
+ readonly __wasmModule?: never;
56
+ }
57
+ interface WAInstance {
58
+ readonly exports: Record<string, unknown>;
59
+ }
60
+ interface WAImportDescriptor {
61
+ readonly module: string;
62
+ readonly name: string;
63
+ }
64
+ const WA = (globalThis as unknown as {
65
+ WebAssembly: {
66
+ Module: { new (bytes: Uint8Array): WAModule; imports(m: WAModule): readonly WAImportDescriptor[] };
67
+ Instance: { new (m: WAModule, imports: Record<string, Record<string, unknown>>): WAInstance };
68
+ Memory: { new (descriptor: { initial: number; maximum: number }): unknown };
69
+ };
70
+ }).WebAssembly;
71
+
72
+ function escapeKindFor(name: string): EscapeKind {
73
+ if (/^fd_|^path_/.test(name)) return 'host-fs-escape';
74
+ if (/^environ_/.test(name)) return 'host-env-leak';
75
+ if (/^sock_/.test(name)) return 'network-escape';
76
+ return 'host-process-escape';
77
+ }
78
+
79
+ /** Static capability gate — the first un-granted import, or `null` if all are host-provided. */
80
+ function gateImports(module: WAModule, allowedHostCalls: readonly string[]): ProbeResult | null {
81
+ const allowed = new Set(allowedHostCalls);
82
+ for (const imp of WA.Module.imports(module)) {
83
+ if (imp.module === 'env' && imp.name === 'memory') continue;
84
+ if (imp.module === 'openwop') {
85
+ if (allowed.has(imp.name)) continue;
86
+ return { ok: false, code: 'sandbox_capability_denied', requestedCapability: imp.name };
87
+ }
88
+ return { ok: false, code: 'sandbox_escape_attempt', escapeKind: escapeKindFor(imp.name) };
89
+ }
90
+ return null;
91
+ }
92
+
93
+ /**
94
+ * Probe one WASM-compiled typeId under the RFC 0035 sandbox contract, server-free.
95
+ * Statically gates imports; for a fully-granted module, instantiates with a
96
+ * capped host memory and runs the entry, classifying any trap. Does NOT spawn a
97
+ * worker — callers MUST NOT pass a non-terminating module (see `timeout` note).
98
+ */
99
+ export function probeSandboxed(
100
+ wasmBytes: Uint8Array,
101
+ config: { readonly allowedHostCalls: readonly string[]; readonly memoryLimitBytes: number },
102
+ entry = 'invoke',
103
+ arg = 0,
104
+ ): ProbeResult {
105
+ let module: WAModule;
106
+ try {
107
+ module = new WA.Module(wasmBytes);
108
+ } catch {
109
+ return { ok: false, code: 'sandbox_invocation_error' };
110
+ }
111
+ const gate = gateImports(module, config.allowedHostCalls);
112
+ if (gate) return gate;
113
+
114
+ const memoryMaxPages = Math.max(1, Math.ceil(config.memoryLimitBytes / WASM_PAGE_BYTES));
115
+ try {
116
+ const memory = new WA.Memory({ initial: 1, maximum: memoryMaxPages });
117
+ const openwop: Record<string, (x: number) => number> = {};
118
+ for (const name of config.allowedHostCalls) openwop[name] = (x: number): number => x;
119
+ const instance = new WA.Instance(module, { env: { memory }, openwop });
120
+ const fn = instance.exports[entry];
121
+ if (typeof fn !== 'function') return { ok: false, code: 'sandbox_invocation_error' };
122
+ return { ok: true, result: Number((fn as (a: number) => number)(arg)) };
123
+ } catch (e) {
124
+ const message = e instanceof Error ? e.message : String(e);
125
+ if (/out of bounds memory access|memory access out of bounds/i.test(message)) {
126
+ return { ok: false, code: 'sandbox_memory_exceeded' };
127
+ }
128
+ return { ok: false, code: 'sandbox_invocation_error' };
129
+ }
130
+ }
131
+
132
+ const timeoutWorkerPath = fileURLToPath(new URL('./sandbox-timeout-worker.mjs', import.meta.url));
133
+
134
+ /**
135
+ * Worker-based timeout probe — RFC 0035 §B invariant 6 (`node-pack-sandbox-timeout`).
136
+ * A wall-clock cap can only be enforced by THREAD PREEMPTION: a same-thread timer
137
+ * cannot interrupt a synchronous WASM loop. So this spawns a worker thread running
138
+ * the module and races a main-thread kill-timer. A non-terminating module →
139
+ * `sandbox_timeout` (the worker is terminated at `wallClockLimitMs`); a module that
140
+ * completes within the budget posts its result first. This is the worker-driven
141
+ * counterpart to the server-free `probeSandboxed` (which deliberately cannot run a
142
+ * non-terminating module).
143
+ */
144
+ export function probeTimeout(
145
+ wasmBytes: Uint8Array,
146
+ config: { readonly memoryLimitBytes: number; readonly wallClockLimitMs: number },
147
+ entry = 'invoke',
148
+ arg = 0,
149
+ ): Promise<ProbeResult> {
150
+ const memoryMaxPages = Math.max(1, Math.ceil(config.memoryLimitBytes / WASM_PAGE_BYTES));
151
+ return new Promise((resolve) => {
152
+ const worker = new Worker(timeoutWorkerPath, { workerData: { wasmBytes, entry, arg, memoryMaxPages } });
153
+ let settled = false;
154
+ const finish = (r: ProbeResult): void => {
155
+ if (settled) return;
156
+ settled = true;
157
+ clearTimeout(timer);
158
+ void worker.terminate();
159
+ resolve(r);
160
+ };
161
+ const timer = setTimeout(() => finish({ ok: false, code: 'sandbox_timeout' }), config.wallClockLimitMs);
162
+ worker.on('message', (m: { ok: boolean; result?: number; code?: SandboxErrorCode }) => {
163
+ if (m.ok) finish(m.result === undefined ? { ok: true } : { ok: true, result: m.result });
164
+ else finish({ ok: false, code: m.code ?? 'sandbox_invocation_error' });
165
+ });
166
+ worker.on('error', () => finish({ ok: false, code: 'sandbox_invocation_error' }));
167
+ });
168
+ }
@@ -0,0 +1,75 @@
1
+ /**
2
+ * openwop-core-standard — operational-annex predicate derivation (RFC 0088).
3
+ *
4
+ * Always-on, server-free derivation probe. Verifies that `isCoreStandard`
5
+ * derives the Core Standard Profile floor correctly from representative
6
+ * discovery payloads (RFC 0088 §B / core-standard-profile.md §B):
7
+ * - a host meeting openwop-core + openwop-interrupts + a transport is core-standard;
8
+ * - a bare openwop-core host (no interrupts) is NOT core-standard — the floor is
9
+ * deliberately stricter than the v1 minimum;
10
+ * - a host with no event transport (supportedTransports: []) fails the floor;
11
+ * - the floor is the AND of three existing closed-catalog predicates (it composes,
12
+ * it does not redefine — so it is absent from deriveProfiles()).
13
+ *
14
+ * The LIVE aggregate-evidence assertion (does every §C floor scenario actually
15
+ * pass against a host claiming the profile?) is the `Active → Accepted` step per
16
+ * RFC 0088 §C — already satisfied by MyndHyve + all reference hosts, asserted via
17
+ * each constituent scenario, and deferred here. This scenario asserts the
18
+ * discovery-predicate derivation only.
19
+ *
20
+ * Spec references:
21
+ * - https://github.com/openwop/openwop/blob/main/spec/v1/core-standard-profile.md
22
+ * - https://github.com/openwop/openwop/blob/main/RFCS/0088-core-standard-profile.md
23
+ */
24
+
25
+ import { describe, it, expect } from 'vitest';
26
+ import { isCoreStandard, isCore, deriveProfiles } from '../lib/profiles.js';
27
+
28
+ const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
29
+
30
+ const CORE = {
31
+ protocolVersion: '1.0',
32
+ supportedEnvelopes: ['clarification.request'],
33
+ schemaVersions: {},
34
+ limits: { clarificationRounds: 1, schemaRounds: 1, envelopesPerTurn: 1 },
35
+ };
36
+
37
+ describe('core-standard-profile: floor predicate (RFC 0088 §B, server-free)', () => {
38
+ it('a host meeting core + interrupts + a default transport is core-standard', () => {
39
+ // No supportedTransports ⇒ both stream predicates default-true (profiles.md).
40
+ const c = { ...CORE };
41
+ expect(isCoreStandard(c), why('core-standard-profile.md §B', 'core + interrupts + transport ⇒ core-standard')).toBe(true);
42
+ });
43
+
44
+ it('a bare openwop-core host without interrupts is NOT core-standard', () => {
45
+ // openwop-core minimum, but no clarification.request ⇒ fails openwop-interrupts.
46
+ const c = { ...CORE, supportedEnvelopes: ['schema.request'] };
47
+ expect(isCore(c), why('profiles.md §openwop-core', 'still a valid openwop-core host')).toBe(true);
48
+ expect(isCoreStandard(c), why('core-standard-profile.md §B', 'the floor is stricter than the v1 minimum')).toBe(false);
49
+ });
50
+
51
+ it('a host advertising no event transport fails the floor', () => {
52
+ const c = { ...CORE, supportedTransports: [] as string[] };
53
+ expect(isCoreStandard(c), why('core-standard-profile.md §B', 'at least one event transport is required')).toBe(false);
54
+ });
55
+
56
+ it('a host advertising the rest transport satisfies the transport term', () => {
57
+ const c = { ...CORE, supportedTransports: ['rest'] };
58
+ expect(isCoreStandard(c), why('core-standard-profile.md §B', 'rest transport ⇒ stream term satisfied')).toBe(true);
59
+ });
60
+
61
+ it('a non-1.x host is not core-standard', () => {
62
+ const c = { ...CORE, protocolVersion: '2.0' };
63
+ expect(isCoreStandard(c), why('profiles.md §openwop-core', 'core-standard implies openwop-core (1.x)')).toBe(false);
64
+ });
65
+ });
66
+
67
+ describe('core-standard-profile: composes, does not redefine (RFC 0088 §A, server-free)', () => {
68
+ it('openwop-core-standard is an annex, NOT a closed-catalog profile (absent from deriveProfiles)', () => {
69
+ const c = { ...CORE };
70
+ expect(
71
+ (deriveProfiles(c) as readonly string[]).includes('openwop-core-standard'),
72
+ why('core-standard-profile.md §A', 'the annex is not a closed-catalog predicate'),
73
+ ).toBe(false);
74
+ });
75
+ });