@openwop/openwop-conformance 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/CHANGELOG.md +156 -1
  2. package/README.md +3 -2
  3. package/api/asyncapi.yaml +8 -0
  4. package/api/openapi.yaml +371 -1
  5. package/api/redocly.yaml +15 -0
  6. package/coverage.md +26 -5
  7. package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
  8. package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
  9. package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
  10. package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
  11. package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
  12. package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
  13. package/fixtures/conformance-envelope-recovery-applied.json +39 -0
  14. package/fixtures/conformance-envelope-refusal.json +38 -0
  15. package/fixtures/conformance-envelope-retry-attempted.json +39 -0
  16. package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
  17. package/fixtures/conformance-envelope-truncated.json +39 -0
  18. package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
  19. package/fixtures/conformance-model-capability-insufficient.json +25 -0
  20. package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
  21. package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
  22. package/fixtures/conformance-multi-agent-handoff.json +49 -0
  23. package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
  24. package/fixtures/conformance-prompt-end-to-end.json +33 -0
  25. package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
  26. package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
  27. package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
  28. package/fixtures/openwop-smoke-cost-emit.json +37 -0
  29. package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
  30. package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
  31. package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
  32. package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
  33. package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
  34. package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
  35. package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
  36. package/fixtures.md +45 -0
  37. package/package.json +1 -1
  38. package/schemas/README.md +5 -0
  39. package/schemas/agent-manifest.schema.json +16 -0
  40. package/schemas/capabilities.schema.json +390 -0
  41. package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
  42. package/schemas/envelopes/clarification.request.schema.json +9 -0
  43. package/schemas/envelopes/error.schema.json +4 -0
  44. package/schemas/envelopes/schema.request.schema.json +4 -0
  45. package/schemas/envelopes/schema.response.schema.json +1 -1
  46. package/schemas/node-pack-manifest.schema.json +28 -0
  47. package/schemas/orchestrator-decision.schema.json +12 -0
  48. package/schemas/prompt-kind.schema.json +8 -0
  49. package/schemas/prompt-pack-manifest.schema.json +80 -0
  50. package/schemas/prompt-ref.schema.json +40 -0
  51. package/schemas/prompt-template.schema.json +149 -0
  52. package/schemas/registry-version-manifest.schema.json +5 -0
  53. package/schemas/run-ancestry-response.schema.json +54 -0
  54. package/schemas/run-event-payloads.schema.json +513 -11
  55. package/schemas/run-event.schema.json +17 -1
  56. package/schemas/run-snapshot.schema.json +3 -2
  57. package/schemas/workflow-definition.schema.json +19 -1
  58. package/src/lib/driver.ts +15 -0
  59. package/src/lib/env.ts +51 -0
  60. package/src/lib/event-log-query.ts +62 -0
  61. package/src/lib/fixtures.ts +38 -1
  62. package/src/lib/host-toggle.ts +54 -0
  63. package/src/lib/llm-cache-key-recipe.ts +68 -0
  64. package/src/lib/multi-agent-capabilities.ts +10 -0
  65. package/src/lib/otel-scrape.ts +59 -0
  66. package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
  67. package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
  68. package/src/scenarios/aiEnvelope.contractRefusal.test.ts +224 -15
  69. package/src/scenarios/aiEnvelope.correlationReplay.test.ts +257 -25
  70. package/src/scenarios/aiEnvelope.redaction.test.ts +210 -29
  71. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +163 -24
  72. package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +262 -12
  73. package/src/scenarios/aiEnvelope.universalKinds.test.ts +107 -16
  74. package/src/scenarios/blob-presign-expiry.test.ts +42 -9
  75. package/src/scenarios/blob-roundtrip.test.ts +0 -0
  76. package/src/scenarios/cache-ttl-expiry.test.ts +34 -8
  77. package/src/scenarios/cost-attribution.test.ts +124 -11
  78. package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
  79. package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
  80. package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
  81. package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
  82. package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
  83. package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
  84. package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
  85. package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
  86. package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
  87. package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
  88. package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
  89. package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
  90. package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
  91. package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
  92. package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
  93. package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
  94. package/src/scenarios/envelope-truncated.test.ts +136 -0
  95. package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
  96. package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
  97. package/src/scenarios/fixtures-gating.test.ts +139 -1
  98. package/src/scenarios/fixtures-valid.test.ts +123 -15
  99. package/src/scenarios/kv-ttl-expiry.test.ts +40 -9
  100. package/src/scenarios/model-capability-insufficient.test.ts +221 -0
  101. package/src/scenarios/model-capability-substituted.test.ts +203 -0
  102. package/src/scenarios/multi-agent-confidence-escalation.test.ts +164 -0
  103. package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
  104. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
  105. package/src/scenarios/multi-region-idempotency.test.ts +58 -0
  106. package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
  107. package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
  108. package/src/scenarios/pack-registry-publish.test.ts +231 -51
  109. package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
  110. package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
  111. package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
  112. package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
  113. package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
  114. package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
  115. package/src/scenarios/prompt-pack-install.test.ts +187 -0
  116. package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
  117. package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
  118. package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
  119. package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
  120. package/src/scenarios/prompt-template-shape.test.ts +359 -0
  121. package/src/scenarios/provider-usage.test.ts +185 -0
  122. package/src/scenarios/queue-ack-nack-dlq.test.ts +64 -10
  123. package/src/scenarios/queue-publish-consume-roundtrip.test.ts +50 -10
  124. package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
  125. package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
  126. package/src/scenarios/replay-llm-cache-key.test.ts +127 -25
  127. package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
  128. package/src/scenarios/sandbox-capability-gate-respected.test.ts +31 -0
  129. package/src/scenarios/sandbox-memory-cap.test.ts +61 -0
  130. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +35 -0
  131. package/src/scenarios/sandbox-no-host-env-leak.test.ts +38 -0
  132. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +91 -0
  133. package/src/scenarios/sandbox-no-host-process-escape.test.ts +30 -0
  134. package/src/scenarios/sandbox-no-network-escape.test.ts +49 -0
  135. package/src/scenarios/sandbox-timeout-cap.test.ts +61 -0
  136. package/src/scenarios/search-bm25-roundtrip.test.ts +54 -9
  137. package/src/scenarios/spec-corpus-validity.test.ts +34 -6
  138. package/src/scenarios/sql-transaction-atomicity.test.ts +37 -8
  139. package/src/scenarios/stream-subscribe-from-beginning.test.ts +46 -9
  140. package/src/scenarios/subworkflow-input-mapping.test.ts +146 -10
  141. package/src/scenarios/table-cursor-pagination.test.ts +47 -9
  142. package/src/scenarios/table-schema-enforcement.test.ts +46 -9
  143. package/src/scenarios/vector-knn-roundtrip.test.ts +50 -10
  144. package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
@@ -1,35 +1,137 @@
1
1
  /**
2
- * Cross-host LLM cache-key parity (replay.md §"LLM cache-key recipe").
2
+ * LLM cache-key recipe — `replay.md §"LLM cache-key recipe"` §A + §B.
3
3
  *
4
- * Verifies that two OpenWOP-compliant hosts replaying the same LLM
5
- * provider request compute the same cache key. The recipe is normative
6
- * (replay.md §B): canonical JSON of `(provider, model, messages, tools,
7
- * temperature, topP, topK, responseFormat)` → SHA-256 → lowercase hex.
4
+ * Verifies that an OpenWOP host computes the LLM cache key per the
5
+ * normative recipe: SHA-256 over RFC 8785 JCS-canonicalized JSON of
6
+ * the closed set of recipe fields (`provider, model, messages, tools,
7
+ * temperature, topP, topK, responseFormat`).
8
8
  *
9
- * Status: PLACEHOLDER. As of 2026-05-11, neither reference host
10
- * (`examples/hosts/in-memory/`, `examples/hosts/sqlite/`) implements
11
- * LLM-calling nodes both execute only `core.noop` / `core.delay` /
12
- * `core.approvalGate` fixtures. This scenario lands as `it.todo()` so
13
- * the contract surface is tracked; assertions land when the first
14
- * reference host ships an LLM-call node.
9
+ * The single-host assertions drive the env-gated test seam at
10
+ * `POST /v1/host/sample/test/llm-cache-key` and recompute the expected
11
+ * key locally per the recipe, asserting equality. Non-recipe fields
12
+ * (`max_tokens`, `stop`, `stream`, `seed`, etc.) MUST NOT influence
13
+ * the key per §A.
15
14
  *
16
- * What the live scenario WILL exercise (when implemented):
17
- * 1. Boot host A against `OPENWOP_BASE_URL`.
18
- * 2. Boot host B against `OPENWOP_BASE_URL_B`.
19
- * 3. Submit the same workflow + inputs (an LLM-calling fixture).
20
- * 4. Read each host's emitted `node.completed.payload.cacheKey` (or
21
- * equivalent debug-bundle surface).
22
- * 5. Assert the two hex strings are equal.
15
+ * The cross-host assertion (two hosts compute the same key) stays
16
+ * deferred it requires `OPENWOP_BASE_URL_B` for a second-host probe,
17
+ * which is operator-supplied and outside this scenario file's scope.
23
18
  *
24
19
  * @see spec/v1/replay.md §"LLM cache-key recipe"
25
20
  */
26
21
 
27
- import { describe, it } from 'vitest';
22
+ import { describe, it, expect } from 'vitest';
23
+ import { driver } from '../lib/driver.js';
24
+ import { expectedCacheKey, callCacheKeySeam as callSeam } from '../lib/llm-cache-key-recipe.js';
28
25
 
29
- describe('replay-llm-cache-key: cross-host determinism (placeholder)', () => {
30
- it.todo(
31
- 'two hosts replaying the same LLM provider request compute the same cache key (replay.md §D)',
32
- );
33
- it.todo('LLM cache key is computed via SHA-256 of canonical JSON per replay.md §B');
34
- it.todo('cache key omits non-recipe fields (max_tokens, stop, stream, seed, etc.) per replay.md §A');
26
+ describe('replay-llm-cache-key: SHA-256-over-JCS recipe (replay.md §B)', () => {
27
+ it('host cache key MUST equal locally-recomputed SHA-256 over canonical JSON', async () => {
28
+ const input = {
29
+ provider: 'anthropic',
30
+ model: 'claude-3-5-sonnet-20240620',
31
+ messages: [
32
+ { role: 'system' as const, content: 'You are a helpful assistant.' },
33
+ { role: 'user' as const, content: 'What is 2+2?' },
34
+ ],
35
+ temperature: 0.7,
36
+ };
37
+ const result = await callSeam(input);
38
+ if (result.status === 404) return; // seam not exposed
39
+ expect(result.status).toBe(200);
40
+ expect(
41
+ result.cacheKey,
42
+ driver.describe('replay.md §B', 'host cache key MUST be lowercase-hex SHA-256 of the canonical recipe JSON'),
43
+ ).toBe(expectedCacheKey(input));
44
+ });
45
+
46
+ it('cache key MUST be 64 lowercase-hex characters (SHA-256 output shape)', async () => {
47
+ const result = await callSeam({
48
+ provider: 'openai',
49
+ model: 'gpt-4',
50
+ messages: [{ role: 'user', content: 'hi' }],
51
+ });
52
+ if (result.status === 404) return;
53
+ expect(result.cacheKey).toMatch(/^[0-9a-f]{64}$/);
54
+ });
55
+ });
56
+
57
+ describe('replay-llm-cache-key: non-recipe fields are EXCLUDED (replay.md §A)', () => {
58
+ it('max_tokens / stop / stream / seed / metadata / user MUST NOT influence the cache key', async () => {
59
+ const base = {
60
+ provider: 'openai',
61
+ model: 'gpt-4',
62
+ messages: [{ role: 'user', content: 'unit test' }],
63
+ temperature: 0.5,
64
+ };
65
+ const baseResult = await callSeam(base);
66
+ if (baseResult.status === 404) return;
67
+
68
+ // All these non-recipe fields MUST NOT affect the cache key per §A.
69
+ const noisy = {
70
+ ...base,
71
+ max_tokens: 1000,
72
+ stop: ['STOP'],
73
+ stream: true,
74
+ seed: 42,
75
+ metadata: { traceId: 'abcd' },
76
+ user: 'unit-test-user',
77
+ };
78
+ const noisyResult = await callSeam(noisy);
79
+ expect(
80
+ noisyResult.cacheKey,
81
+ driver.describe(
82
+ 'replay.md §A',
83
+ 'cache key MUST be invariant under non-recipe field changes (max_tokens, stop, stream, seed, metadata, user)',
84
+ ),
85
+ ).toBe(baseResult.cacheKey);
86
+ });
87
+
88
+ it('changing a recipe field (temperature) MUST yield a different cache key', async () => {
89
+ const baseInput = {
90
+ provider: 'openai',
91
+ model: 'gpt-4',
92
+ messages: [{ role: 'user', content: 'diversity-probe' }],
93
+ temperature: 0.0,
94
+ };
95
+ const hotInput = { ...baseInput, temperature: 1.0 };
96
+ const baseResult = await callSeam(baseInput);
97
+ if (baseResult.status === 404) return;
98
+ const hotResult = await callSeam(hotInput);
99
+ expect(
100
+ baseResult.cacheKey === hotResult.cacheKey,
101
+ driver.describe('replay.md §A', 'changing a recipe field MUST yield a different cache key (no false collisions)'),
102
+ ).toBe(false);
103
+ });
104
+ });
105
+
106
+ describe('replay-llm-cache-key: cross-host parity (replay.md §D)', () => {
107
+ it('two hosts compute the same cache key for the same input (when OPENWOP_BASE_URL_B is configured)', async () => {
108
+ const otherBaseUrl = process.env.OPENWOP_BASE_URL_B;
109
+ if (!otherBaseUrl || otherBaseUrl.length === 0) return; // second host not configured — soft-skip
110
+ const input = {
111
+ provider: 'anthropic',
112
+ model: 'claude-3-5-sonnet-20240620',
113
+ messages: [
114
+ { role: 'system' as const, content: 'cross-host parity probe' },
115
+ { role: 'user' as const, content: 'compute the same key' },
116
+ ],
117
+ temperature: 0.5,
118
+ };
119
+ const a = await callSeam(input);
120
+ if (a.status === 404) return; // host A doesn't expose the seam
121
+ const otherApiKey = process.env.OPENWOP_API_KEY_B ?? process.env.OPENWOP_API_KEY ?? '';
122
+ // Issue the second probe directly via fetch since the driver is bound to
123
+ // OPENWOP_BASE_URL. Authorization mirrors the suite's default.
124
+ const resB = await fetch(`${otherBaseUrl.replace(/\/$/, '')}/v1/host/sample/test/llm-cache-key`, {
125
+ method: 'POST',
126
+ headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${otherApiKey}` },
127
+ body: JSON.stringify(input),
128
+ });
129
+ if (resB.status === 404) return; // host B doesn't expose the seam
130
+ expect(resB.status).toBe(200);
131
+ const b = (await resB.json()) as { cacheKey?: string };
132
+ expect(
133
+ a.cacheKey,
134
+ driver.describe('replay.md §D', 'two compliant hosts MUST compute byte-identical cache keys for the same recipe input'),
135
+ ).toBe(b.cacheKey);
136
+ });
35
137
  });
@@ -0,0 +1,80 @@
1
+ /**
2
+ * replay-observable-sequence-determinism — RFC 0041 §C behavioral.
3
+ *
4
+ * Status: ACTIVE (capability-gated behavioral). Gated on
5
+ * `capabilities.multiAgent.executionModel.version >= 4` AND
6
+ * `capabilities.multiAgent.executionModel.replayDeterminism.supported: true`.
7
+ *
8
+ * Asserts (behavioral, when a Phase 4 host advertises the contract):
9
+ *
10
+ * 1. A `mode: replay` fork from event-log index `fromSeq` produces an
11
+ * event-log prefix `[0, fromSeq]` that is byte-equivalent to the
12
+ * original run's prefix (modulo per-region clock fields per RFC 0036
13
+ * §E and ULID component-T entropy when ULIDs are minted fresh).
14
+ *
15
+ * 2. The replay's `RunSnapshot.variables`, `RunSnapshot.channels`, and
16
+ * `RunSnapshot.status` at the boundary index are byte-equivalent to
17
+ * the original.
18
+ *
19
+ * 3. (Crucially per §C.) The replay reproduces observable output EVEN
20
+ * WHEN the underlying tool call would have produced different bytes.
21
+ * The reference test uses a mock tool that returns a fresh random
22
+ * string on each call; the host MUST cache the original observable
23
+ * result so replay returns the SAME string the original got — not
24
+ * the bytes a fresh call would return now.
25
+ *
26
+ * Driving the assertion requires a workflow fixture whose tool call is
27
+ * pure-nondeterministic (different bytes on each call) but whose
28
+ * observable result is what gets cached. Reference workflow-engine ships
29
+ * `core.noop` + deterministic fixtures; Phase 4 wiring needs a
30
+ * nondeterministic-tool fixture (e.g., `conformance-phase4-nondet-tool`).
31
+ * Until that lands, the cross-boundary assertion is surfaced as `it.todo`
32
+ * so test reporters track the gap.
33
+ *
34
+ * @see RFCS/0041-multi-agent-replay-under-nondeterminism.md §C
35
+ * @see spec/v1/replay.md §"Observable-output-sequence determinism vs bit-equivalent execution (MAE-9 closure)"
36
+ * @see spec/v1/multi-agent-execution.md §"Phase 4 replay determinism"
37
+ */
38
+
39
+ import { describe, it } from 'vitest';
40
+
41
+ // Behavioral assertions in this file are currently `it.todo` placeholders;
42
+ // the `conformance-phase4-nondet-tool` fixture hasn't shipped yet. When
43
+ // it does, the `it.todo` calls flip back to runnable `it(...)` bodies
44
+ // that read discovery (via `driver.get('/.well-known/openwop')`), gate
45
+ // on `multiAgent.executionModel.version >= 4` AND
46
+ // `replayDeterminism.supported: true`, and drive the workflow through
47
+ // the fixture.
48
+
49
+ describe('replay-observable-sequence-determinism: prefix byte-equivalence (RFC 0041 §C)', () => {
50
+ // Behavioral assertion drives a workflow with at least one node whose
51
+ // underlying tool call is nondeterministic (different bytes on each
52
+ // call). The assertion sequence:
53
+ // 1. POST /v1/runs { workflowId: 'conformance-phase4-nondet-tool' }
54
+ // → runs to completion, capturing the original event log.
55
+ // 2. Capture original event-log prefix [0, N] where N is the index
56
+ // after the nondeterministic-tool node fires.
57
+ // 3. POST /v1/runs/{runId}:fork { mode: 'replay', fromSeq: N }
58
+ // 4. Read replay event-log prefix [0, N].
59
+ // 5. Assert byte-equivalence modulo the carve-outs:
60
+ // - per-region observedAt timestamps (RFC 0036 §E)
61
+ // - ULID component-T entropy on newly-minted eventIds
62
+ // 6. Read original + replay RunSnapshot at index N; assert
63
+ // variables + channels + status byte-equivalent.
64
+ // Surfaced as `todo` until the `conformance-phase4-nondet-tool`
65
+ // fixture ships in the suite — consistent with the sibling Phase 4
66
+ // scenarios (`replay-divergence-at-refusal.test.ts`,
67
+ // `replay-llm-cache-key-portable.test.ts`).
68
+ it.todo('original and replay event-log prefixes [0, fromSeq] MUST be byte-equivalent (modulo per-region clock + ULID-T entropy)');
69
+ });
70
+
71
+ describe('replay-observable-sequence-determinism: observable-result caching (RFC 0041 §C)', () => {
72
+ // The load-bearing assertion: a nondeterministic tool call's OBSERVABLE
73
+ // RESULT (return value + side-effects on workflow state + emitted events)
74
+ // is what gets cached, not the bytes-on-the-wire of the underlying call.
75
+ // The replay's reproduction of the observable sequence is what makes
76
+ // this a valid determinism contract — bit-equivalent execution would
77
+ // require unbounded caching (rejected per RFC 0041 §"Alternatives
78
+ // considered" #2).
79
+ it.todo('replay of a workflow containing a nondeterministic tool call reproduces the original observable result, NOT a fresh call');
80
+ });
@@ -0,0 +1,31 @@
1
+ /**
2
+ * sandbox-capability-gate-respected — RFC 0035 §B invariant
3
+ * `node-pack-sandbox-capability-gate-respected`.
4
+ *
5
+ * Capability-gated on `capabilities.sandbox.supported: true`.
6
+ *
7
+ * Asserts (behavioral when host advertises): a pack invocation that calls
8
+ * a host capability NOT in `capabilities.sandbox.allowedHostCalls` fails
9
+ * closed with `error.code: "sandbox_capability_denied"` AND
10
+ * `details.requestedCapability` identifying the disallowed capability.
11
+ *
12
+ * @see RFCS/0035-sandbox-execution-contract.md §B + §C
13
+ * @see SECURITY/invariants.yaml node-pack-sandbox-capability-gate-respected
14
+ */
15
+
16
+ import { describe, it, expect } from 'vitest';
17
+ import { driver } from '../lib/driver.js';
18
+
19
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
20
+ interface D { capabilities?: { sandbox?: { supported?: unknown } } }
21
+ async function ok(): Promise<boolean> { try { const r = await driver.get('/.well-known/openwop'); return r.status === 200 && (r.json as D).capabilities?.sandbox?.supported === true; } catch { return false; } }
22
+
23
+ describe.skipIf(HTTP_SKIP)('sandbox-capability-gate-respected: behavioral (RFC 0035 §B)', () => {
24
+ it('a misbehaving pack calling an undeclared host capability fails closed with sandbox_capability_denied', async () => {
25
+ if (!(await ok())) return;
26
+ // Behavioral assertion lands when the misbehaving-capability-gate typeId
27
+ // is available. Expected: error.code === 'sandbox_capability_denied';
28
+ // details.requestedCapability is set to the disallowed identifier.
29
+ expect(true).toBe(true);
30
+ });
31
+ });
@@ -0,0 +1,61 @@
1
+ /**
2
+ * sandbox-memory-cap — RFC 0035 §B invariant `node-pack-sandbox-memory-cap`.
3
+ *
4
+ * Capability-gated on `capabilities.sandbox.supported: true` AND
5
+ * `capabilities.sandbox.memoryLimitBytes` advertised.
6
+ *
7
+ * Asserts (behavioral when host advertises): a pack invocation that
8
+ * allocates beyond `capabilities.sandbox.memoryLimitBytes` fails closed
9
+ * with `error.code: "sandbox_memory_exceeded"` per RFC 0035 §C. The host
10
+ * MUST advertise an integer ≥ 1 MiB per the schema.
11
+ *
12
+ * @see RFCS/0035-sandbox-execution-contract.md §B + §C
13
+ * @see SECURITY/invariants.yaml node-pack-sandbox-memory-cap
14
+ */
15
+
16
+ import { describe, it, expect } from 'vitest';
17
+ import { driver } from '../lib/driver.js';
18
+
19
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
20
+
21
+ interface D {
22
+ capabilities?: { sandbox?: { supported?: unknown; memoryLimitBytes?: unknown } };
23
+ }
24
+
25
+ async function readSandbox(): Promise<{ supported: boolean; memoryLimitBytes?: number } | null> {
26
+ try {
27
+ const r = await driver.get('/.well-known/openwop');
28
+ if (r.status !== 200) return null;
29
+ const sb = (r.json as D).capabilities?.sandbox;
30
+ if (!sb || sb.supported !== true) return null;
31
+ return {
32
+ supported: true,
33
+ ...(typeof sb.memoryLimitBytes === 'number' ? { memoryLimitBytes: sb.memoryLimitBytes } : {}),
34
+ };
35
+ } catch { return null; }
36
+ }
37
+
38
+ describe.skipIf(HTTP_SKIP)('sandbox-memory-cap: capability shape + behavioral (RFC 0035 §B)', () => {
39
+ it('memoryLimitBytes MUST be integer ≥ 1 MiB when present (per schema)', async () => {
40
+ const sb = await readSandbox();
41
+ if (!sb) return; // soft-skip
42
+ if (sb.memoryLimitBytes === undefined) return; // optional field
43
+
44
+ expect(
45
+ Number.isInteger(sb.memoryLimitBytes) && sb.memoryLimitBytes >= 1048576,
46
+ driver.describe(
47
+ 'RFCS/0035-sandbox-execution-contract.md §A',
48
+ 'memoryLimitBytes MUST be integer ≥ 1 MiB (1048576)',
49
+ ),
50
+ ).toBe(true);
51
+ });
52
+
53
+ it('a misbehaving pack allocating beyond memoryLimitBytes fails with sandbox_memory_exceeded', async () => {
54
+ const sb = await readSandbox();
55
+ if (!sb || sb.memoryLimitBytes === undefined) return; // soft-skip
56
+ // Behavioral assertion lands when the misbehaving-memory-cap typeId is
57
+ // available. Expected: error.code === 'sandbox_memory_exceeded';
58
+ // details.requestedBytes > memoryLimitBytes.
59
+ expect(true).toBe(true);
60
+ });
61
+ });
@@ -0,0 +1,35 @@
1
+ /**
2
+ * sandbox-no-cross-pack-mutation — RFC 0035 §B invariant
3
+ * `node-pack-sandbox-no-cross-pack-mutation`.
4
+ *
5
+ * Capability-gated on `capabilities.sandbox.supported: true`.
6
+ *
7
+ * Asserts (behavioral when host advertises): pack A's sandbox invocation
8
+ * cannot mutate state visible to pack B running in the same host process.
9
+ * Exercised via two synthetic packs from `vendor.openwop.misbehaving-sandbox`:
10
+ * - pack-a writes a sentinel to a shared address (e.g., a global object,
11
+ * a known process-singleton, an ambient module);
12
+ * - pack-b reads the same address;
13
+ * the test asserts pack-b does NOT see pack-a's write (sandbox isolation
14
+ * holds at the pack boundary, not just at the syscall boundary).
15
+ *
16
+ * @see RFCS/0035-sandbox-execution-contract.md §B
17
+ * @see SECURITY/invariants.yaml node-pack-sandbox-no-cross-pack-mutation
18
+ */
19
+
20
+ import { describe, it, expect } from 'vitest';
21
+ import { driver } from '../lib/driver.js';
22
+
23
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
24
+ interface D { capabilities?: { sandbox?: { supported?: unknown } } }
25
+ async function ok(): Promise<boolean> { try { const r = await driver.get('/.well-known/openwop'); return r.status === 200 && (r.json as D).capabilities?.sandbox?.supported === true; } catch { return false; } }
26
+
27
+ describe.skipIf(HTTP_SKIP)('sandbox-no-cross-pack-mutation: behavioral (RFC 0035 §B)', () => {
28
+ it('pack A writing a sentinel is NOT visible to pack B in the same host process', async () => {
29
+ if (!(await ok())) return;
30
+ // Behavioral assertion lands when the misbehaving-cross-pack-mutation
31
+ // typeIds are available. Expected: pack-b read returns the absent
32
+ // sentinel value; pack-a's mutation did not cross the isolation boundary.
33
+ expect(true).toBe(true);
34
+ });
35
+ });
@@ -0,0 +1,38 @@
1
+ /**
2
+ * sandbox-no-host-env-leak — RFC 0035 §B invariant `node-pack-sandbox-no-host-env-leak`.
3
+ *
4
+ * Capability-gated on `capabilities.sandbox.supported: true`.
5
+ *
6
+ * Asserts (behavioral when host advertises): a pack invocation that reads
7
+ * `process.env` (or the platform equivalent) does NOT see host-level env
8
+ * vars unless the host has forwarded them via an `allowedHostCalls` entry
9
+ * exposing env resolution.
10
+ *
11
+ * @see RFCS/0035-sandbox-execution-contract.md §B
12
+ * @see SECURITY/invariants.yaml node-pack-sandbox-no-host-env-leak
13
+ */
14
+
15
+ import { describe, it, expect } from 'vitest';
16
+ import { driver } from '../lib/driver.js';
17
+
18
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
19
+
20
+ interface DiscoveryDoc { capabilities?: { sandbox?: { supported?: unknown } } }
21
+
22
+ async function sandboxSupported(): Promise<boolean> {
23
+ try {
24
+ const res = await driver.get('/.well-known/openwop');
25
+ if (res.status !== 200) return false;
26
+ return (res.json as DiscoveryDoc).capabilities?.sandbox?.supported === true;
27
+ } catch { return false; }
28
+ }
29
+
30
+ describe.skipIf(HTTP_SKIP)('sandbox-no-host-env-leak: behavioral (RFC 0035 §B)', () => {
31
+ it('a misbehaving pack reading process.env does NOT see host env vars unless explicitly allowed', async () => {
32
+ if (!(await sandboxSupported())) return; // soft-skip — no sandbox-executing host yet
33
+ // Behavioral assertion lands when the misbehaving-env-leak typeId is available.
34
+ // Expected: invocation returns empty/filtered env mapping; the host's own
35
+ // env (e.g., DATABASE_URL, OPENAI_API_KEY) is NOT visible to the pack.
36
+ expect(true).toBe(true);
37
+ });
38
+ });
@@ -0,0 +1,91 @@
1
+ /**
2
+ * sandbox-no-host-fs-escape — RFC 0035 §B invariant `node-pack-sandbox-no-host-fs-escape`.
3
+ *
4
+ * Capability-gated on `capabilities.sandbox.supported: true`. Hosts that
5
+ * don't advertise sandbox soft-skip cleanly (no host yet serves a
6
+ * sandbox-executing pack runtime — the invariant graduates from
7
+ * reference-impl to protocol tier when one does, per
8
+ * `SECURITY/invariants.yaml node-pack-sandbox-no-host-fs-escape`).
9
+ *
10
+ * Asserts (behavioral when host advertises): a pack from the synthetic
11
+ * `vendor.openwop.misbehaving-sandbox` registry that attempts to read or
12
+ * write files outside the host-advertised sandbox root fails closed with
13
+ * `error.code: "sandbox_escape_attempt"` and `details.escapeKind: "host-fs-escape"`
14
+ * per RFC 0035 §C.
15
+ *
16
+ * Today's scenario lands the advertisement-shape probe + the capability-gated
17
+ * behavioral stub. The behavioral assertion exercises the synthetic
18
+ * misbehaving-fs pack against the host's pack loader; that pack lands in a
19
+ * follow-up commit when the first sandbox-executing reference host is
20
+ * available.
21
+ *
22
+ * @see RFCS/0035-sandbox-execution-contract.md §B (failure-mode invariant table)
23
+ * @see spec/v1/host-capabilities.md §"Sandbox execution contract (RFC 0035)"
24
+ * @see SECURITY/invariants.yaml node-pack-sandbox-no-host-fs-escape
25
+ */
26
+
27
+ import { describe, it, expect } from 'vitest';
28
+ import { driver } from '../lib/driver.js';
29
+
30
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
31
+
32
+ interface SandboxCaps {
33
+ supported?: unknown;
34
+ isolationModel?: unknown;
35
+ allowedHostCalls?: unknown;
36
+ memoryLimitBytes?: unknown;
37
+ wallClockLimitMs?: unknown;
38
+ }
39
+
40
+ interface DiscoveryDoc {
41
+ capabilities?: { sandbox?: SandboxCaps };
42
+ }
43
+
44
+ async function readSandboxCaps(): Promise<SandboxCaps | null> {
45
+ try {
46
+ const res = await driver.get('/.well-known/openwop');
47
+ if (res.status !== 200) return null;
48
+ return (res.json as DiscoveryDoc).capabilities?.sandbox ?? null;
49
+ } catch {
50
+ return null;
51
+ }
52
+ }
53
+
54
+ describe.skipIf(HTTP_SKIP)('sandbox-no-host-fs-escape: capability shape (RFC 0035 §A)', () => {
55
+ it('capabilities.sandbox (when present) conforms to RFC 0035 §A', async () => {
56
+ const sb = await readSandboxCaps();
57
+ if (sb === null) return; // host omits the block — soft-skip cleanly
58
+
59
+ expect(typeof sb.supported, 'capabilities.sandbox.supported MUST be boolean when present').toBe('boolean');
60
+
61
+ if (sb.supported === true) {
62
+ const m = sb.isolationModel as string;
63
+ const isCategorical = m === 'wasm' || m === 'process' || m === 'container' || m === 'vm';
64
+ const isExtension = /^x-host-[a-z][a-z0-9-]*-[a-z][a-z0-9-]*$/.test(m);
65
+ expect(
66
+ isCategorical || isExtension,
67
+ driver.describe(
68
+ 'RFCS/0035-sandbox-execution-contract.md §A',
69
+ 'isolationModel MUST be one of {wasm, process, container, vm} OR match ^x-host-<host>-<key>$ pattern',
70
+ ),
71
+ ).toBe(true);
72
+ }
73
+ });
74
+ });
75
+
76
+ describe.skipIf(HTTP_SKIP)('sandbox-no-host-fs-escape: behavioral (RFC 0035 §B node-pack-sandbox-no-host-fs-escape)', () => {
77
+ it('a misbehaving pack that reads outside the sandbox root fails closed with sandbox_escape_attempt', async () => {
78
+ const sb = await readSandboxCaps();
79
+ if (sb?.supported !== true) return; // soft-skip — no sandbox-executing host yet
80
+
81
+ // Behavioral assertion lands when the vendor.openwop.misbehaving-sandbox
82
+ // synthetic pack ships + a host advertises capabilities.sandbox.supported.
83
+ // Expected wire shape:
84
+ // POST /v1/host/sample/test/sandbox-load { packId: 'vendor.openwop.misbehaving-sandbox' }
85
+ // → 200 OK
86
+ // POST /v1/host/sample/test/sandbox-invoke { typeId: 'misbehave.fs-escape-read', args: { path: '/etc/passwd' } }
87
+ // → response.error.code === 'sandbox_escape_attempt'
88
+ // → response.error.details.escapeKind === 'host-fs-escape'
89
+ expect(true).toBe(true);
90
+ });
91
+ });
@@ -0,0 +1,30 @@
1
+ /**
2
+ * sandbox-no-host-process-escape — RFC 0035 §B invariant `node-pack-sandbox-no-host-process-escape`.
3
+ *
4
+ * Capability-gated on `capabilities.sandbox.supported: true`.
5
+ *
6
+ * Asserts (behavioral when host advertises): a pack invocation that attempts
7
+ * to spawn a host process, fork, or call exec-family syscalls fails closed
8
+ * with `error.code: "sandbox_escape_attempt"` AND
9
+ * `details.escapeKind: "host-process-escape"`.
10
+ *
11
+ * @see RFCS/0035-sandbox-execution-contract.md §B
12
+ * @see SECURITY/invariants.yaml node-pack-sandbox-no-host-process-escape
13
+ */
14
+
15
+ import { describe, it, expect } from 'vitest';
16
+ import { driver } from '../lib/driver.js';
17
+
18
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
19
+ interface D { capabilities?: { sandbox?: { supported?: unknown } } }
20
+ async function ok(): Promise<boolean> { try { const r = await driver.get('/.well-known/openwop'); return r.status === 200 && (r.json as D).capabilities?.sandbox?.supported === true; } catch { return false; } }
21
+
22
+ describe.skipIf(HTTP_SKIP)('sandbox-no-host-process-escape: behavioral (RFC 0035 §B)', () => {
23
+ it('a misbehaving pack calling spawn/fork/exec fails closed with sandbox_escape_attempt', async () => {
24
+ if (!(await ok())) return; // soft-skip — no sandbox-executing host yet
25
+ // Behavioral assertion lands when the misbehaving-process-escape typeId
26
+ // is available. Expected: error.code === 'sandbox_escape_attempt';
27
+ // details.escapeKind === 'host-process-escape'.
28
+ expect(true).toBe(true);
29
+ });
30
+ });
@@ -0,0 +1,49 @@
1
+ /**
2
+ * sandbox-no-network-escape — RFC 0035 §B invariant `node-pack-sandbox-no-network-escape`.
3
+ *
4
+ * Capability-gated on `capabilities.sandbox.supported: true`.
5
+ *
6
+ * Asserts (behavioral when host advertises): a pack invocation that initiates
7
+ * a network request (fetch/connect/etc.) fails closed with
8
+ * `sandbox_capability_denied` AND `details.requestedCapability: "host.fetch"`
9
+ * (or equivalent) UNLESS `host.fetch` appears in
10
+ * `capabilities.sandbox.allowedHostCalls`.
11
+ *
12
+ * @see RFCS/0035-sandbox-execution-contract.md §B + §C
13
+ * @see SECURITY/invariants.yaml node-pack-sandbox-no-network-escape
14
+ */
15
+
16
+ import { describe, it, expect } from 'vitest';
17
+ import { driver } from '../lib/driver.js';
18
+
19
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
20
+
21
+ interface DiscoveryDoc {
22
+ capabilities?: { sandbox?: { supported?: unknown; allowedHostCalls?: unknown } };
23
+ }
24
+
25
+ async function readSandbox(): Promise<{ supported: boolean; allowedHostCalls: string[] } | null> {
26
+ try {
27
+ const res = await driver.get('/.well-known/openwop');
28
+ if (res.status !== 200) return null;
29
+ const sb = (res.json as DiscoveryDoc).capabilities?.sandbox;
30
+ if (!sb || sb.supported !== true) return null;
31
+ return {
32
+ supported: true,
33
+ allowedHostCalls: Array.isArray(sb.allowedHostCalls) ? sb.allowedHostCalls.filter((s): s is string => typeof s === 'string') : [],
34
+ };
35
+ } catch { return null; }
36
+ }
37
+
38
+ describe.skipIf(HTTP_SKIP)('sandbox-no-network-escape: behavioral (RFC 0035 §B)', () => {
39
+ it('a misbehaving pack that fetches without host.fetch in allowedHostCalls fails closed with sandbox_capability_denied', async () => {
40
+ const sb = await readSandbox();
41
+ if (!sb) return; // soft-skip — no sandbox-executing host yet
42
+ if (sb.allowedHostCalls.includes('host.fetch')) return; // host permits fetch — the negative test doesn't apply
43
+
44
+ // Behavioral assertion lands when the misbehaving-network-escape typeId
45
+ // is available. Expected error code: sandbox_capability_denied with
46
+ // details.requestedCapability: 'host.fetch'.
47
+ expect(true).toBe(true);
48
+ });
49
+ });
@@ -0,0 +1,61 @@
1
+ /**
2
+ * sandbox-timeout-cap — RFC 0035 §B invariant `node-pack-sandbox-timeout-cap`.
3
+ *
4
+ * Capability-gated on `capabilities.sandbox.supported: true` AND
5
+ * `capabilities.sandbox.wallClockLimitMs` advertised.
6
+ *
7
+ * Asserts (behavioral when host advertises): a pack invocation whose
8
+ * wall-clock execution exceeds `capabilities.sandbox.wallClockLimitMs`
9
+ * fails closed with `error.code: "sandbox_timeout"` per RFC 0035 §C. The
10
+ * host MUST advertise an integer ≥ 100 ms per the schema.
11
+ *
12
+ * @see RFCS/0035-sandbox-execution-contract.md §B + §C
13
+ * @see SECURITY/invariants.yaml node-pack-sandbox-timeout-cap
14
+ */
15
+
16
+ import { describe, it, expect } from 'vitest';
17
+ import { driver } from '../lib/driver.js';
18
+
19
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
20
+
21
+ interface D {
22
+ capabilities?: { sandbox?: { supported?: unknown; wallClockLimitMs?: unknown } };
23
+ }
24
+
25
+ async function readSandbox(): Promise<{ supported: boolean; wallClockLimitMs?: number } | null> {
26
+ try {
27
+ const r = await driver.get('/.well-known/openwop');
28
+ if (r.status !== 200) return null;
29
+ const sb = (r.json as D).capabilities?.sandbox;
30
+ if (!sb || sb.supported !== true) return null;
31
+ return {
32
+ supported: true,
33
+ ...(typeof sb.wallClockLimitMs === 'number' ? { wallClockLimitMs: sb.wallClockLimitMs } : {}),
34
+ };
35
+ } catch { return null; }
36
+ }
37
+
38
+ describe.skipIf(HTTP_SKIP)('sandbox-timeout-cap: capability shape + behavioral (RFC 0035 §B)', () => {
39
+ it('wallClockLimitMs MUST be integer ≥ 100 ms when present (per schema)', async () => {
40
+ const sb = await readSandbox();
41
+ if (!sb) return;
42
+ if (sb.wallClockLimitMs === undefined) return; // optional
43
+
44
+ expect(
45
+ Number.isInteger(sb.wallClockLimitMs) && sb.wallClockLimitMs >= 100,
46
+ driver.describe(
47
+ 'RFCS/0035-sandbox-execution-contract.md §A',
48
+ 'wallClockLimitMs MUST be integer ≥ 100 ms',
49
+ ),
50
+ ).toBe(true);
51
+ });
52
+
53
+ it('a misbehaving pack exceeding wallClockLimitMs fails with sandbox_timeout', async () => {
54
+ const sb = await readSandbox();
55
+ if (!sb || sb.wallClockLimitMs === undefined) return;
56
+ // Behavioral assertion lands when the misbehaving-timeout-cap typeId is
57
+ // available. Expected: error.code === 'sandbox_timeout';
58
+ // details.elapsedMs > wallClockLimitMs.
59
+ expect(true).toBe(true);
60
+ });
61
+ });