@openwop/openwop-conformance 1.6.0 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/README.md +2 -2
- package/api/asyncapi.yaml +74 -1
- package/api/openapi.yaml +316 -0
- package/coverage.md +16 -0
- package/fixtures/conformance-run-duration-breach.json +33 -0
- package/fixtures.md +19 -0
- package/package.json +1 -1
- package/schemas/README.md +12 -0
- package/schemas/agent-inventory-response.schema.json +90 -0
- package/schemas/ai-envelope.schema.json +28 -0
- package/schemas/annotation-create.schema.json +37 -0
- package/schemas/annotation.schema.json +56 -0
- package/schemas/artifact-type-pack-manifest.schema.json +160 -0
- package/schemas/capabilities.schema.json +195 -4
- package/schemas/chat-card-pack-manifest.schema.json +158 -0
- package/schemas/envelopes/media.audio.schema.json +38 -0
- package/schemas/envelopes/media.file.schema.json +37 -0
- package/schemas/envelopes/media.image.schema.json +33 -0
- package/schemas/heartbeat-evaluated.schema.json +14 -0
- package/schemas/heartbeat-state-changed.schema.json +14 -0
- package/schemas/node-pack-manifest.schema.json +16 -1
- package/schemas/run-event-payloads.schema.json +96 -5
- package/schemas/run-event.schema.json +4 -0
- package/schemas/workflow-definition.schema.json +5 -0
- package/schemas/workspace-file-create.schema.json +20 -0
- package/schemas/workspace-file.schema.json +39 -0
- package/src/lib/agentLoop.ts +44 -0
- package/src/lib/agentRuntime.ts +45 -0
- package/src/lib/artifactTypes.ts +96 -0
- package/src/lib/cardPacks.ts +52 -0
- package/src/lib/discovery-capabilities.ts +50 -0
- package/src/lib/distillation.ts +38 -0
- package/src/lib/feedback.ts +31 -0
- package/src/lib/heartbeat.ts +31 -0
- package/src/lib/memoryAttribution.ts +48 -0
- package/src/lib/subRunAttestation.ts +35 -0
- package/src/lib/toolHooks.ts +33 -0
- package/src/scenarios/agent-loop-iteration-monotonic.test.ts +33 -0
- package/src/scenarios/agent-loop-stateful-resume.test.ts +28 -0
- package/src/scenarios/agent-loop-version5-shape.test.ts +41 -0
- package/src/scenarios/agent-loop-workspace-snapshot.test.ts +33 -0
- package/src/scenarios/agent-manifest-runtime.test.ts +85 -0
- package/src/scenarios/ai-envelope-shape.test.ts +14 -18
- package/src/scenarios/aiEnvelope.capBreached.test.ts +2 -1
- package/src/scenarios/aiEnvelope.schemaDrift.test.ts +2 -1
- package/src/scenarios/aiEnvelope.universalKinds.test.ts +2 -1
- package/src/scenarios/approval-gate-flow.test.ts +4 -6
- package/src/scenarios/artifact-schema-compile-bounded.test.ts +126 -0
- package/src/scenarios/artifact-type-pack-install.test.ts +78 -0
- package/src/scenarios/artifact-type-pack-manifest-validation.test.ts +140 -0
- package/src/scenarios/artifact-type-store-without-render.test.ts +54 -0
- package/src/scenarios/audit-log-integrity.test.ts +3 -2
- package/src/scenarios/auth-api-key-rotation.test.ts +2 -1
- package/src/scenarios/auth-mtls.test.ts +2 -1
- package/src/scenarios/auth-oauth2-client-credentials.test.ts +2 -1
- package/src/scenarios/auth-oidc-user-bearer.test.ts +2 -1
- package/src/scenarios/auth-saml-profile.test.ts +2 -1
- package/src/scenarios/auth-scim-profile.test.ts +2 -1
- package/src/scenarios/authorization-fail-closed.test.ts +2 -1
- package/src/scenarios/authorization-roles-shape.test.ts +2 -1
- package/src/scenarios/byok-auth-modes.test.ts +141 -0
- package/src/scenarios/chat-card-pack-execution.test.ts +56 -0
- package/src/scenarios/chat-card-pack-manifest-validation.test.ts +128 -0
- package/src/scenarios/commitment-fired.test.ts +83 -0
- package/src/scenarios/credential-payload-redaction.test.ts +2 -1
- package/src/scenarios/credentials-capability-shape.test.ts +2 -1
- package/src/scenarios/cross-engine-append-ordering.test.ts +2 -1
- package/src/scenarios/cross-host-ancestry-endpoint.test.ts +3 -2
- package/src/scenarios/cross-host-causation-shape.test.ts +3 -2
- package/src/scenarios/deadletter-capability-shape.test.ts +2 -1
- package/src/scenarios/deadletter-retry-exhaustion.test.ts +2 -1
- package/src/scenarios/distillation-index-roundtrip.test.ts +35 -0
- package/src/scenarios/distillation-secret-carryforward.test.ts +35 -0
- package/src/scenarios/distillation-shape.test.ts +41 -0
- package/src/scenarios/distillation-stable-archive.test.ts +37 -0
- package/src/scenarios/distillation-token-budget.test.ts +45 -0
- package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +4 -3
- package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +5 -4
- package/src/scenarios/envelope-reasoning-shape.test.ts +3 -2
- package/src/scenarios/envelope-refusal-shape.test.ts +3 -2
- package/src/scenarios/envelope-rendering-hint.test.ts +95 -0
- package/src/scenarios/envelope-retry-attempted.test.ts +2 -1
- package/src/scenarios/envelope-tier-one-subset-static.test.ts +3 -2
- package/src/scenarios/exec-not-protocol-tier.test.ts +137 -0
- package/src/scenarios/experimental-tier-shape.test.ts +5 -4
- package/src/scenarios/feedback-capability-shape.test.ts +35 -0
- package/src/scenarios/feedback-correction-redaction.test.ts +35 -0
- package/src/scenarios/feedback-cross-tenant-isolation.test.ts +37 -0
- package/src/scenarios/feedback-fork-not-copied.test.ts +40 -0
- package/src/scenarios/feedback-on-terminal-run.test.ts +32 -0
- package/src/scenarios/feedback-record-and-list.test.ts +32 -0
- package/src/scenarios/feedback-unsupported-501.test.ts +32 -0
- package/src/scenarios/fs-path-traversal.test.ts +2 -1
- package/src/scenarios/heartbeat-capability-shape.test.ts +35 -0
- package/src/scenarios/heartbeat-fires-once-per-tick.test.ts +28 -0
- package/src/scenarios/heartbeat-idempotent-no-spam.test.ts +43 -0
- package/src/scenarios/heartbeat-runtime-bound.test.ts +30 -0
- package/src/scenarios/http-client-ssrf.test.ts +10 -13
- package/src/scenarios/mcp-toolcall-redaction.test.ts +3 -2
- package/src/scenarios/media-url-inline-cap.test.ts +167 -0
- package/src/scenarios/memory-attribution-emits-on-write.test.ts +54 -0
- package/src/scenarios/memory-attribution-no-content.test.ts +45 -0
- package/src/scenarios/memory-attribution-replay-stable.test.ts +60 -0
- package/src/scenarios/memory-attribution-shape.test.ts +28 -0
- package/src/scenarios/memory-attribution-tenant-scoped.test.ts +44 -0
- package/src/scenarios/memory-compaction-event-emitted.test.ts +2 -1
- package/src/scenarios/memory-compaction-provenance-tag.test.ts +2 -1
- package/src/scenarios/memory-compaction-sr1-carry-forward.test.ts +2 -1
- package/src/scenarios/memory-consolidation-idempotent.test.ts +77 -0
- package/src/scenarios/memory-consolidation-shape.test.ts +90 -0
- package/src/scenarios/model-capability-substituted.test.ts +2 -1
- package/src/scenarios/multi-agent-confidence-escalation.test.ts +5 -4
- package/src/scenarios/multi-agent-handoff-state-machine.test.ts +6 -5
- package/src/scenarios/multi-agent-memory-lifecycle.test.ts +4 -3
- package/src/scenarios/multi-region-idempotency.test.ts +10 -10
- package/src/scenarios/oauth-capability-shape.test.ts +2 -1
- package/src/scenarios/oauth-connector-redaction.test.ts +2 -1
- package/src/scenarios/pause-resume.test.ts +3 -3
- package/src/scenarios/production-backpressure.test.ts +2 -2
- package/src/scenarios/production-retention-expiry.test.ts +2 -2
- package/src/scenarios/prompt-all-four-kinds-events.test.ts +2 -1
- package/src/scenarios/prompt-composed-secret-redaction.test.ts +2 -1
- package/src/scenarios/prompt-composed-trust-marker.test.ts +2 -1
- package/src/scenarios/prompt-end-to-end-events.test.ts +2 -1
- package/src/scenarios/prompt-list-and-fetch.test.ts +2 -1
- package/src/scenarios/prompt-mutable-lifecycle.test.ts +2 -1
- package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +2 -1
- package/src/scenarios/prompt-pack-install.test.ts +2 -1
- package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +2 -1
- package/src/scenarios/prompt-render-deterministic.test.ts +2 -1
- package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +2 -1
- package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +2 -1
- package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +2 -1
- package/src/scenarios/prompt-template-shape.test.ts +2 -1
- package/src/scenarios/provider-usage.test.ts +2 -1
- package/src/scenarios/redaction.test.ts +4 -1
- package/src/scenarios/replay-divergence-at-refusal.test.ts +4 -3
- package/src/scenarios/replay-fork-arbitrary.test.ts +3 -1
- package/src/scenarios/replay-llm-cache-key-portable.test.ts +2 -1
- package/src/scenarios/replayDeterminism.test.ts +3 -1
- package/src/scenarios/run-execution-bounds-shape.test.ts +133 -0
- package/src/scenarios/sandbox-memory-cap.test.ts +2 -1
- package/src/scenarios/sandbox-mvp-behavior.test.ts +2 -1
- package/src/scenarios/sandbox-no-host-fs-escape.test.ts +2 -1
- package/src/scenarios/sandbox-timeout-cap.test.ts +2 -1
- package/src/scenarios/scheduling-capability-shape.test.ts +2 -1
- package/src/scenarios/scheduling-cron-fires-once.test.ts +2 -1
- package/src/scenarios/secret-leakage-otel-attribute.test.ts +7 -6
- package/src/scenarios/spec-corpus-validity.test.ts +4 -1
- package/src/scenarios/subrun-approval-fail-closed.test.ts +33 -0
- package/src/scenarios/subrun-approval-gate.test.ts +35 -0
- package/src/scenarios/subrun-attestation-shape.test.ts +30 -0
- package/src/scenarios/subrun-checksum-stable.test.ts +43 -0
- package/src/scenarios/tool-hooks-authorization-fail-closed.test.ts +39 -0
- package/src/scenarios/tool-hooks-content-free.test.ts +40 -0
- package/src/scenarios/tool-hooks-rate-limit.test.ts +32 -0
- package/src/scenarios/tool-hooks-secret-redaction.test.ts +34 -0
- package/src/scenarios/tool-hooks-shape.test.ts +34 -0
- package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +3 -10
- package/src/scenarios/wasm-pack-invoke-completed.test.ts +2 -2
- package/src/scenarios/wasm-pack-invoke-suspended.test.ts +2 -2
- package/src/scenarios/wasm-pack-load.test.ts +2 -2
- package/src/scenarios/wasm-pack-memory-cap.test.ts +3 -6
- package/src/scenarios/wasm-pack-replay-determinism.test.ts +2 -2
- package/src/scenarios/workflow-primary-output-annotation.test.ts +142 -0
- package/src/scenarios/workspace-behavior.test.ts +134 -0
- package/src/scenarios/workspace-capability-shape.test.ts +73 -0
- package/src/scenarios/workspace-cross-tenant-isolation.test.ts +84 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* distillation-stable-archive — RFC 0062 §B(4). The distilled archive is an
|
|
3
|
+
* immutable, addressable artifact: the same source set + budget MUST yield a
|
|
4
|
+
* byte-stable archive checksum (reproducible + auditable).
|
|
5
|
+
*
|
|
6
|
+
* Gated on `capabilities.memory.distillation.supported` + the host memory-
|
|
7
|
+
* distillation seam; soft-skips when either is absent.
|
|
8
|
+
*
|
|
9
|
+
* @see RFCS/0062-scheduled-memory-distillation.md §B
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { describe, it, expect } from 'vitest';
|
|
13
|
+
import { driver } from '../lib/driver.js';
|
|
14
|
+
import { readDistillationCap, invokeDistill } from '../lib/distillation.js';
|
|
15
|
+
|
|
16
|
+
describe('distillation-stable-archive (RFC 0062 §B)', () => {
|
|
17
|
+
it('identical sources + budget produce an identical archive checksum', async () => {
|
|
18
|
+
if ((await readDistillationCap())?.supported !== true) return;
|
|
19
|
+
const req = {
|
|
20
|
+
memoryRef: 'conformance-distill',
|
|
21
|
+
tokenBudget: 8000,
|
|
22
|
+
sources: ['s1', 's2', 's3'],
|
|
23
|
+
};
|
|
24
|
+
const a = await invokeDistill(req);
|
|
25
|
+
if (a === null) return; // seam absent — soft-skip
|
|
26
|
+
const b = await invokeDistill(req);
|
|
27
|
+
if (b === null) return;
|
|
28
|
+
expect(
|
|
29
|
+
typeof a.body.archiveChecksum === 'string' && (a.body.archiveChecksum as string).length > 0,
|
|
30
|
+
driver.describe('RFC 0062 §B', 'a distillation run MUST produce a non-empty archive checksum'),
|
|
31
|
+
).toBe(true);
|
|
32
|
+
expect(
|
|
33
|
+
b.body.archiveChecksum,
|
|
34
|
+
driver.describe('RFC 0062 §B', 'the same source set + budget MUST yield a byte-stable archive'),
|
|
35
|
+
).toBe(a.body.archiveChecksum);
|
|
36
|
+
});
|
|
37
|
+
});
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* distillation-token-budget — RFC 0062 §B. A distillation run stays within its
|
|
3
|
+
* token budget (`memory.compacted.distillation.tokensUsed ≤ tokenBudget`); an
|
|
4
|
+
* un-meetable budget fails with `token_budget_exceeded` and writes no partial
|
|
5
|
+
* archive (atomic).
|
|
6
|
+
*
|
|
7
|
+
* Gated on `capabilities.memory.distillation.supported` + the host memory-
|
|
8
|
+
* distillation seam; soft-skips when either is absent.
|
|
9
|
+
*
|
|
10
|
+
* @see RFCS/0062-scheduled-memory-distillation.md §B
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { describe, it, expect } from 'vitest';
|
|
14
|
+
import { driver } from '../lib/driver.js';
|
|
15
|
+
import { readDistillationCap, invokeDistill } from '../lib/distillation.js';
|
|
16
|
+
|
|
17
|
+
describe('distillation-token-budget (RFC 0062 §B)', () => {
|
|
18
|
+
it('within budget tokensUsed ≤ tokenBudget; an un-meetable budget fails atomically', async () => {
|
|
19
|
+
if ((await readDistillationCap())?.supported !== true) return;
|
|
20
|
+
|
|
21
|
+
const ok = await invokeDistill({ memoryRef: 'conformance-distill', tokenBudget: 8000 });
|
|
22
|
+
if (ok === null) return; // seam absent — soft-skip
|
|
23
|
+
const dist = ok.body.event?.distillation ?? {};
|
|
24
|
+
expect(
|
|
25
|
+
typeof dist.tokenBudget === 'number' && typeof dist.tokensUsed === 'number',
|
|
26
|
+
driver.describe('RFC 0062 §B', 'memory.compacted MUST carry distillation.tokenBudget + tokensUsed on a budgeted run'),
|
|
27
|
+
).toBe(true);
|
|
28
|
+
expect(
|
|
29
|
+
(dist.tokensUsed as number) <= (dist.tokenBudget as number),
|
|
30
|
+
driver.describe('RFC 0062 §B', 'a successful distillation MUST consume ≤ its tokenBudget'),
|
|
31
|
+
).toBe(true);
|
|
32
|
+
|
|
33
|
+
// A budget too small to distill the corpus MUST fail closed, no partial archive.
|
|
34
|
+
const tooSmall = await invokeDistill({ memoryRef: 'conformance-distill', tokenBudget: 1 });
|
|
35
|
+
if (tooSmall === null) return;
|
|
36
|
+
expect(
|
|
37
|
+
tooSmall.status >= 400 && tooSmall.body.error === 'token_budget_exceeded',
|
|
38
|
+
driver.describe('RFC 0062 §B', 'an un-meetable budget MUST fail with token_budget_exceeded'),
|
|
39
|
+
).toBe(true);
|
|
40
|
+
expect(
|
|
41
|
+
tooSmall.body.archiveChecksum,
|
|
42
|
+
driver.describe('RFC 0062 §B', 'a token_budget_exceeded run MUST write no partial archive (atomic)'),
|
|
43
|
+
).toBeUndefined();
|
|
44
|
+
});
|
|
45
|
+
});
|
|
@@ -31,6 +31,7 @@ import { describe, it, expect } from 'vitest';
|
|
|
31
31
|
import { driver } from '../lib/driver.js';
|
|
32
32
|
import { pollUntilTerminal } from '../lib/polling.js';
|
|
33
33
|
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
34
|
+
import { capabilityFamily } from '../lib/discovery-capabilities.js';
|
|
34
35
|
|
|
35
36
|
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
36
37
|
const NODE_ID = 'structured-call';
|
|
@@ -91,7 +92,7 @@ describe.skipIf(HTTP_SKIP)('envelope-completion-distinguishes-truncation: advert
|
|
|
91
92
|
it('capabilities.envelopes.reliability.completion (when present) conforms to RFC 0033 §E', async () => {
|
|
92
93
|
const d = await readDiscovery();
|
|
93
94
|
if (d === null) return;
|
|
94
|
-
const completion = d
|
|
95
|
+
const completion = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability?.completion;
|
|
95
96
|
if (completion === undefined) return;
|
|
96
97
|
expect(
|
|
97
98
|
typeof completion.distinguishesTruncation,
|
|
@@ -114,7 +115,7 @@ describe.skipIf(HTTP_SKIP)('envelope-completion-distinguishes-truncation: trunca
|
|
|
114
115
|
it('truncation: emits envelope.truncated + envelope.retry.attempted with reason: "truncation"', async () => {
|
|
115
116
|
if (!isFixtureAdvertised(TRUNCATED_FIXTURE)) return;
|
|
116
117
|
const d = await readDiscovery();
|
|
117
|
-
if (d
|
|
118
|
+
if (capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability?.completion?.distinguishesTruncation !== true) return;
|
|
118
119
|
const seed = await programMock([
|
|
119
120
|
{ stopReason: 'max_tokens', content: '{"partial' },
|
|
120
121
|
{ stopReason: 'end_turn', content: '{"valid":true}' },
|
|
@@ -139,7 +140,7 @@ describe.skipIf(HTTP_SKIP)('envelope-completion-distinguishes-truncation: trunca
|
|
|
139
140
|
it('truncation: retry budget strictly greater than initial (RFC 0033 §B truncationBudgetMultiplier)', async () => {
|
|
140
141
|
if (!isFixtureAdvertised(TRUNCATED_FIXTURE)) return;
|
|
141
142
|
const d = await readDiscovery();
|
|
142
|
-
if (d
|
|
143
|
+
if (capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability?.completion?.distinguishesTruncation !== true) return;
|
|
143
144
|
const seed = await programMock([
|
|
144
145
|
{ stopReason: 'max_tokens', content: '{"partial' },
|
|
145
146
|
{ stopReason: 'end_turn', content: '{"valid":true}' },
|
|
@@ -35,6 +35,7 @@
|
|
|
35
35
|
|
|
36
36
|
import { describe, it, expect } from 'vitest';
|
|
37
37
|
import { driver } from '../lib/driver.js';
|
|
38
|
+
import { capabilityFamily } from '../lib/discovery-capabilities.js';
|
|
38
39
|
|
|
39
40
|
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
40
41
|
|
|
@@ -97,8 +98,8 @@ describe.skipIf(HTTP_SKIP)('envelope-reasoning-secret-redaction: advertisement s
|
|
|
97
98
|
it('hosts advertising envelope reasoning + BYOK honor SR-1 carry-forward for the reasoning field', async () => {
|
|
98
99
|
const d = await readDiscovery();
|
|
99
100
|
if (d === null) return;
|
|
100
|
-
const reasoning = d
|
|
101
|
-
const secrets = d
|
|
101
|
+
const reasoning = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reasoning?.supported;
|
|
102
|
+
const secrets = capabilityFamily<{ supported?: unknown }>(d, 'secrets')?.supported;
|
|
102
103
|
if (reasoning !== true || secrets !== true) return; // soft-skip when either is absent
|
|
103
104
|
// The contract is invariant-based, not capability-flag-based — the
|
|
104
105
|
// advertisement-shape check here just confirms both surfaces are claimed.
|
|
@@ -257,7 +258,7 @@ describe.skipIf(HTTP_SKIP)('envelope-reasoning-secret-redaction: downstream-proj
|
|
|
257
258
|
// RFC 0034 §B: gate on capabilities.observability.testSeams.otelScrape.
|
|
258
259
|
// Hosts that don't advertise it soft-skip; hosts that DO advertise MUST serve a valid response.
|
|
259
260
|
const d = await readDiscovery();
|
|
260
|
-
const otelScrapeAdvertised = d
|
|
261
|
+
const otelScrapeAdvertised = capabilityFamily<{ testSeams?: Record<string, unknown> }>(d, 'observability')?.testSeams?.otelScrape === true;
|
|
261
262
|
if (!otelScrapeAdvertised) return; // soft-skip — host honest about not implementing per RFC 0034 §A
|
|
262
263
|
|
|
263
264
|
const r = await acceptForRun(
|
|
@@ -291,7 +292,7 @@ describe.skipIf(HTTP_SKIP)('envelope-reasoning-secret-redaction: downstream-proj
|
|
|
291
292
|
it("debug-bundle export MUST NOT include plaintext `secret:`-prefixed substrings from envelope.reasoning", async () => {
|
|
292
293
|
// RFC 0034 §B: gate on capabilities.observability.testSeams.debugBundleExport.
|
|
293
294
|
const d = await readDiscovery();
|
|
294
|
-
const debugBundleAdvertised = d
|
|
295
|
+
const debugBundleAdvertised = capabilityFamily<{ testSeams?: Record<string, unknown> }>(d, 'observability')?.testSeams?.debugBundleExport === true;
|
|
295
296
|
if (!debugBundleAdvertised) return; // soft-skip — host honest about not implementing per RFC 0034 §A
|
|
296
297
|
|
|
297
298
|
const r = await acceptForRun(
|
|
@@ -32,6 +32,7 @@ import { readFileSync } from 'node:fs';
|
|
|
32
32
|
import { join } from 'node:path';
|
|
33
33
|
import { driver } from '../lib/driver.js';
|
|
34
34
|
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
35
|
+
import { capabilityFamily } from '../lib/discovery-capabilities.js';
|
|
35
36
|
|
|
36
37
|
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
37
38
|
|
|
@@ -163,7 +164,7 @@ describe.skipIf(HTTP_SKIP)('envelope-reasoning-shape: capabilities.envelopes adv
|
|
|
163
164
|
it('capabilities.envelopes.reasoning (when present) conforms to RFC 0030 §C', async () => {
|
|
164
165
|
const d = await readDiscovery();
|
|
165
166
|
if (d === null) return;
|
|
166
|
-
const reasoning = d
|
|
167
|
+
const reasoning = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reasoning;
|
|
167
168
|
if (reasoning === undefined) return; // optional block; host MAY omit
|
|
168
169
|
expect(
|
|
169
170
|
typeof reasoning.supported,
|
|
@@ -180,7 +181,7 @@ describe.skipIf(HTTP_SKIP)('envelope-reasoning-shape: capabilities.envelopes adv
|
|
|
180
181
|
it('capabilities.envelopes.tierOneSubsetCompliance (when present) conforms to RFC 0030 §B', async () => {
|
|
181
182
|
const d = await readDiscovery();
|
|
182
183
|
if (d === null) return;
|
|
183
|
-
const compliance = d
|
|
184
|
+
const compliance = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.tierOneSubsetCompliance;
|
|
184
185
|
if (compliance === undefined) return; // optional; host MAY omit
|
|
185
186
|
expect(
|
|
186
187
|
['strict', 'warn', 'off'],
|
|
@@ -64,7 +64,7 @@ describe.skipIf(HTTP_SKIP)('envelope-refusal-shape: seam emission (RFC 0032 §B.
|
|
|
64
64
|
it('accepts a well-formed `envelope.refusal` payload + writes it to the test event log', async () => {
|
|
65
65
|
const d = await readDiscovery();
|
|
66
66
|
if (d === null) return;
|
|
67
|
-
const reliability = d
|
|
67
|
+
const reliability = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability;
|
|
68
68
|
if (!reliability || reliability.supported !== true) return;
|
|
69
69
|
if (!Array.isArray(reliability.events) || !(reliability.events as unknown[]).includes('envelope.refusal')) return;
|
|
70
70
|
|
|
@@ -154,7 +154,7 @@ describe.skipIf(HTTP_SKIP)('envelope-refusal-shape: advertisement contract (RFC
|
|
|
154
154
|
it('capabilities.envelopes.reliability (when supported: true with non-empty events[]) MUST list both MUST-tier events', async () => {
|
|
155
155
|
const d = await readDiscovery();
|
|
156
156
|
if (d === null) return;
|
|
157
|
-
const reliability = d
|
|
157
|
+
const reliability = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability;
|
|
158
158
|
if (!reliability || reliability.supported !== true) return;
|
|
159
159
|
// Hosts running the legacy undifferentiated retry loop advertise
|
|
160
160
|
// `events: []` (per the OPENWOP_ENVELOPE_RELIABILITY_END_TO_END=false
|
|
@@ -190,6 +190,7 @@ describe.skipIf(HTTP_SKIP)('envelope-refusal-shape: advertisement contract (RFC
|
|
|
190
190
|
|
|
191
191
|
import { pollUntilTerminal } from '../lib/polling.js';
|
|
192
192
|
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
193
|
+
import { capabilityFamily } from '../lib/discovery-capabilities.js';
|
|
193
194
|
|
|
194
195
|
const E2E_FIXTURE = 'conformance-envelope-refusal';
|
|
195
196
|
const E2E_NODE_ID = 'structured-call';
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* envelope-rendering-hint — RFC 0055 §B `meta.rendering` shape conformance.
|
|
3
|
+
*
|
|
4
|
+
* Server-free schema assertions that the optional rendering hint is exactly
|
|
5
|
+
* that — optional and additive:
|
|
6
|
+
* 1. An envelope WITH a well-formed `meta.rendering` validates.
|
|
7
|
+
* 2. An envelope WITHOUT `meta.rendering` still validates (proves the
|
|
8
|
+
* property is optional — existing envelopes are unaffected).
|
|
9
|
+
* 3. An unknown `display` value is rejected by the closed enum (the
|
|
10
|
+
* vocabulary is fixed; consumers fall back, producers don't invent).
|
|
11
|
+
* 4. An unknown property under `rendering` is rejected
|
|
12
|
+
* (additionalProperties:false on the hint object).
|
|
13
|
+
*
|
|
14
|
+
* Always runs (pure on-disk Ajv2020 validation).
|
|
15
|
+
*
|
|
16
|
+
* @see RFCS/0055-multimodal-envelope-variants-and-rendering-hints.md §B
|
|
17
|
+
* @see spec/v1/ai-envelope.md §"Rendering hints"
|
|
18
|
+
* @see schemas/ai-envelope.schema.json ($defs.EnvelopeMeta.rendering)
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { describe, it, expect } from 'vitest';
|
|
22
|
+
import Ajv2020 from 'ajv/dist/2020.js';
|
|
23
|
+
import addFormats from 'ajv-formats';
|
|
24
|
+
import { readFileSync } from 'node:fs';
|
|
25
|
+
import { join } from 'node:path';
|
|
26
|
+
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
27
|
+
|
|
28
|
+
function compileEnvelope(): ReturnType<Ajv2020['compile']> {
|
|
29
|
+
const ajv = new Ajv2020({ strict: false, allErrors: true });
|
|
30
|
+
addFormats(ajv);
|
|
31
|
+
const schema = JSON.parse(
|
|
32
|
+
readFileSync(join(SCHEMAS_DIR, 'ai-envelope.schema.json'), 'utf8'),
|
|
33
|
+
) as Record<string, unknown>;
|
|
34
|
+
return ajv.compile(schema);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const baseEnvelope = {
|
|
38
|
+
type: 'error',
|
|
39
|
+
schemaVersion: 1,
|
|
40
|
+
envelopeId: 'env-rendering-1',
|
|
41
|
+
correlationId: 'run-1:node-2:turn-0:abc123',
|
|
42
|
+
payload: { code: 'x', message: 'y' },
|
|
43
|
+
meta: { source: 'ai-generation' as const, ts: '2026-05-25T10:00:00Z' },
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
describe('envelope-rendering-hint: meta.rendering shape (RFC 0055 §B)', () => {
|
|
47
|
+
const validate = compileEnvelope();
|
|
48
|
+
|
|
49
|
+
it('accepts an envelope carrying a well-formed meta.rendering hint', () => {
|
|
50
|
+
const env = {
|
|
51
|
+
...baseEnvelope,
|
|
52
|
+
meta: {
|
|
53
|
+
...baseEnvelope.meta,
|
|
54
|
+
rendering: { display: 'image', mimeType: 'image/png', alt: 'Q3 revenue chart', title: 'Revenue' },
|
|
55
|
+
},
|
|
56
|
+
};
|
|
57
|
+
const ok = validate(env);
|
|
58
|
+
expect(
|
|
59
|
+
ok,
|
|
60
|
+
'ai-envelope.md §"Rendering hints": ' + `meta.rendering MUST validate; errors: ${JSON.stringify(validate.errors)}`,
|
|
61
|
+
).toBe(true);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it('accepts an envelope with NO meta.rendering (proves the property is optional)', () => {
|
|
65
|
+
const ok = validate(baseEnvelope);
|
|
66
|
+
expect(
|
|
67
|
+
ok,
|
|
68
|
+
'ai-envelope.md §"Rendering hints": ' + 'meta.rendering MUST be optional — envelopes omitting it still validate',
|
|
69
|
+
).toBe(true);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it('rejects an unknown display value (closed enum)', () => {
|
|
73
|
+
const env = {
|
|
74
|
+
...baseEnvelope,
|
|
75
|
+
meta: { ...baseEnvelope.meta, rendering: { display: 'hologram' } },
|
|
76
|
+
};
|
|
77
|
+
const ok = validate(env);
|
|
78
|
+
expect(
|
|
79
|
+
ok,
|
|
80
|
+
'ai-envelope.md §"Rendering hints": ' + 'display is a closed enum — unknown families MUST be rejected',
|
|
81
|
+
).toBe(false);
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
it('rejects an unknown property under rendering (additionalProperties:false)', () => {
|
|
85
|
+
const env = {
|
|
86
|
+
...baseEnvelope,
|
|
87
|
+
meta: { ...baseEnvelope.meta, rendering: { display: 'markdown', wat: true } },
|
|
88
|
+
};
|
|
89
|
+
const ok = validate(env);
|
|
90
|
+
expect(
|
|
91
|
+
ok,
|
|
92
|
+
'ai-envelope.md §"Rendering hints": ' + 'rendering is additionalProperties:false',
|
|
93
|
+
).toBe(false);
|
|
94
|
+
});
|
|
95
|
+
});
|
|
@@ -59,7 +59,7 @@ describe.skipIf(HTTP_SKIP)('envelope-retry-attempted: advertisement shape (RFC 0
|
|
|
59
59
|
it('capabilities.envelopes.reliability (when present) conforms to RFC 0032 §C', async () => {
|
|
60
60
|
const d = await readDiscovery();
|
|
61
61
|
if (d === null) return;
|
|
62
|
-
const reliability = d
|
|
62
|
+
const reliability = capabilityFamily<{ reasoning?: Record<string, unknown>; tierOneSubsetCompliance?: unknown; reliability?: { completion?: Record<string, unknown> } & Record<string, unknown> }>(d, 'envelopes')?.reliability;
|
|
63
63
|
if (reliability === undefined) return;
|
|
64
64
|
expect(typeof reliability.supported, 'reliability.supported MUST be boolean').toBe('boolean');
|
|
65
65
|
if (reliability.events !== undefined) {
|
|
@@ -114,6 +114,7 @@ describe.skipIf(HTTP_SKIP)('envelope-retry-attempted: advertisement shape (RFC 0
|
|
|
114
114
|
|
|
115
115
|
import { pollUntilTerminal } from '../lib/polling.js';
|
|
116
116
|
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
117
|
+
import { capabilityFamily } from '../lib/discovery-capabilities.js';
|
|
117
118
|
|
|
118
119
|
const FIXTURE = 'conformance-envelope-retry-attempted';
|
|
119
120
|
const NODE_ID = 'structured-call';
|
|
@@ -34,6 +34,7 @@ import { readFileSync, existsSync } from 'node:fs';
|
|
|
34
34
|
import { join } from 'node:path';
|
|
35
35
|
import { driver } from '../lib/driver.js';
|
|
36
36
|
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
37
|
+
import { capabilityFamily } from '../lib/discovery-capabilities.js';
|
|
37
38
|
|
|
38
39
|
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
39
40
|
|
|
@@ -179,9 +180,9 @@ describe.skipIf(HTTP_SKIP)('envelope-tier-one-subset-static (RFC 0030 §B)', ()
|
|
|
179
180
|
it('hosts advertising tierOneSubsetCompliance: "strict" have payload schemas that satisfy the Tier-1 intersection', async () => {
|
|
180
181
|
const d = await readDiscovery();
|
|
181
182
|
if (d === null) return; // host unreachable; soft-skip
|
|
182
|
-
const compliance = d
|
|
183
|
+
const compliance = capabilityFamily(d, 'envelopes')?.tierOneSubsetCompliance;
|
|
183
184
|
if (compliance !== 'strict') return; // gated on "strict" only
|
|
184
|
-
const advertised = (d
|
|
185
|
+
const advertised = (capabilityFamily(d, 'supportedEnvelopes') ?? []) as string[];
|
|
185
186
|
if (advertised.length === 0) return;
|
|
186
187
|
|
|
187
188
|
const violationsByKind: Record<string, Violation[]> = {};
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* exec-class tools MUST NOT be protocol-tier (RFC 0069, `Draft`).
|
|
3
|
+
*
|
|
4
|
+
* Always-on, server-free structural assertion over the spec corpus. Verifies
|
|
5
|
+
* the SECURITY invariant `exec-must-not-be-protocol-tier`: the protocol
|
|
6
|
+
* defines NO arbitrary-command (`exec`-class) primitive under a
|
|
7
|
+
* protocol-owned namespace (`core.*` / `openwop.*`), NO exec capability
|
|
8
|
+
* flag in `capabilities.schema.json`, and NO exec-class entry in the
|
|
9
|
+
* canonical RunEventType vocabulary.
|
|
10
|
+
*
|
|
11
|
+
* This guards against an independent implementer reading the protocol's
|
|
12
|
+
* silence as permission to ship a `core.exec` RCE primitive other hosts
|
|
13
|
+
* would treat as canonical. The assertion is against the protocol's OWN
|
|
14
|
+
* surface — it must hold for every release of the corpus regardless of
|
|
15
|
+
* which host runs it. A `vendor.acme.exec` / `x-host-acme-exec` identifier
|
|
16
|
+
* is allowed (host-extension namespace); the check fires only on
|
|
17
|
+
* protocol-owned namespaces.
|
|
18
|
+
*
|
|
19
|
+
* Spec references:
|
|
20
|
+
* - https://github.com/openwop/openwop/blob/main/spec/v1/host-extensions.md §"exec-class tools"
|
|
21
|
+
* - https://github.com/openwop/openwop/blob/main/SECURITY/threat-model-prompt-injection.md §"exec tools"
|
|
22
|
+
* - https://github.com/openwop/openwop/blob/main/RFCS/0069-exec-class-tool-host-extension-safety-contract.md
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import { describe, it, expect } from 'vitest';
|
|
26
|
+
import { readFileSync, readdirSync } from 'node:fs';
|
|
27
|
+
import { join } from 'node:path';
|
|
28
|
+
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
29
|
+
|
|
30
|
+
/** Server-free assertion-message helper (mirrors driver.describe's "spec — requirement" shape without requiring OPENWOP_BASE_URL). */
|
|
31
|
+
const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Closed denylist of exec-class identifier *segments* (whole tokens). The
|
|
35
|
+
* check matches a protocol-owned namespaced id whose final segment IS one
|
|
36
|
+
* of these — it does NOT flag substrings like `execution` in
|
|
37
|
+
* `multi-agent-execution` or `subprocess` inside an unrelated word.
|
|
38
|
+
*/
|
|
39
|
+
const EXEC_SEGMENTS = new Set([
|
|
40
|
+
'exec',
|
|
41
|
+
'shell',
|
|
42
|
+
'spawn',
|
|
43
|
+
'runcommand',
|
|
44
|
+
'runscript',
|
|
45
|
+
'subprocess',
|
|
46
|
+
'systemcall',
|
|
47
|
+
'eval',
|
|
48
|
+
]);
|
|
49
|
+
|
|
50
|
+
/** Protocol-owned namespace prefixes per host-extensions.md §"Canonical prefixes". */
|
|
51
|
+
const PROTOCOL_PREFIXES = ['core.', 'openwop.'];
|
|
52
|
+
|
|
53
|
+
/** Pull every `"core.*"` / `"openwop.*"` quoted identifier out of a corpus file. */
|
|
54
|
+
function protocolOwnedIds(text: string): string[] {
|
|
55
|
+
const out: string[] = [];
|
|
56
|
+
const re = /["'`](core|openwop)\.[a-zA-Z0-9_.-]+["'`]/g;
|
|
57
|
+
let m: RegExpExecArray | null;
|
|
58
|
+
while ((m = re.exec(text)) !== null) {
|
|
59
|
+
out.push(m[0].slice(1, -1));
|
|
60
|
+
}
|
|
61
|
+
return out;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function isExecClass(id: string): boolean {
|
|
65
|
+
if (!PROTOCOL_PREFIXES.some((p) => id.startsWith(p))) return false;
|
|
66
|
+
const lastSegment = id.split('.').pop()?.toLowerCase().replace(/-/g, '') ?? '';
|
|
67
|
+
return EXEC_SEGMENTS.has(lastSegment);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
describe('exec-not-protocol-tier: no exec-class primitive in the protocol corpus (RFC 0069, server-free)', () => {
|
|
71
|
+
it('no protocol-owned (core.* / openwop.*) identifier denotes arbitrary command execution', () => {
|
|
72
|
+
const schemaFiles = readdirSync(SCHEMAS_DIR).filter((f) => f.endsWith('.schema.json'));
|
|
73
|
+
const offenders: string[] = [];
|
|
74
|
+
for (const f of schemaFiles) {
|
|
75
|
+
const text = readFileSync(join(SCHEMAS_DIR, f), 'utf8');
|
|
76
|
+
for (const id of protocolOwnedIds(text)) {
|
|
77
|
+
if (isExecClass(id)) offenders.push(`${f}: ${id}`);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
expect(
|
|
81
|
+
offenders,
|
|
82
|
+
why(
|
|
83
|
+
'host-extensions.md §exec-class tools',
|
|
84
|
+
'the protocol corpus MUST NOT define a core.*/openwop.* exec-class identifier',
|
|
85
|
+
),
|
|
86
|
+
).toEqual([]);
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it('no capabilities.schema.json property name denotes arbitrary command execution', () => {
|
|
90
|
+
const caps = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'capabilities.schema.json'), 'utf8')) as Record<string, unknown>;
|
|
91
|
+
const offenders: string[] = [];
|
|
92
|
+
const walkProps = (node: unknown, path: string): void => {
|
|
93
|
+
if (!node || typeof node !== 'object') return;
|
|
94
|
+
const obj = node as Record<string, unknown>;
|
|
95
|
+
const props = obj.properties as Record<string, unknown> | undefined;
|
|
96
|
+
if (props) {
|
|
97
|
+
for (const key of Object.keys(props)) {
|
|
98
|
+
if (EXEC_SEGMENTS.has(key.toLowerCase().replace(/-/g, ''))) {
|
|
99
|
+
offenders.push(`${path}.${key}`);
|
|
100
|
+
}
|
|
101
|
+
walkProps(props[key], `${path}.${key}`);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
};
|
|
105
|
+
walkProps(caps, 'capabilities');
|
|
106
|
+
expect(
|
|
107
|
+
offenders,
|
|
108
|
+
why('host-extensions.md §exec-class tools', 'capabilities.schema.json MUST NOT declare an exec-class capability flag'),
|
|
109
|
+
).toEqual([]);
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
it('the canonical RunEventType vocabulary contains no exec-class event', () => {
|
|
113
|
+
const runEvent = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'run-event.schema.json'), 'utf8')) as {
|
|
114
|
+
$defs?: { RunEventType?: { enum?: string[] } };
|
|
115
|
+
};
|
|
116
|
+
const enumVals = runEvent.$defs?.RunEventType?.enum ?? [];
|
|
117
|
+
const offenders = enumVals.filter((v) => {
|
|
118
|
+
const lastSegment = v.split('.').pop()?.toLowerCase().replace(/-/g, '') ?? '';
|
|
119
|
+
return EXEC_SEGMENTS.has(lastSegment);
|
|
120
|
+
});
|
|
121
|
+
expect(
|
|
122
|
+
offenders,
|
|
123
|
+
why('host-extensions.md §exec-class tools', 'no RunEventType MUST denote arbitrary command execution'),
|
|
124
|
+
).toEqual([]);
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it('positive control: a vendor / x-host exec identifier is allowed (host-extension namespace)', () => {
|
|
128
|
+
expect(isExecClass('vendor.acme.exec')).toBe(false);
|
|
129
|
+
expect(isExecClass('x-host-acme-exec')).toBe(false);
|
|
130
|
+
expect(isExecClass('private.host.shell')).toBe(false);
|
|
131
|
+
// And the denylist actually fires on a protocol-owned id:
|
|
132
|
+
expect(isExecClass('core.exec')).toBe(true);
|
|
133
|
+
expect(isExecClass('openwop.shell')).toBe(true);
|
|
134
|
+
// Negative control: a benign substring is not flagged.
|
|
135
|
+
expect(isExecClass('core.workflowChain.event')).toBe(false);
|
|
136
|
+
});
|
|
137
|
+
});
|
|
@@ -29,6 +29,7 @@
|
|
|
29
29
|
import { describe, it, expect } from 'vitest';
|
|
30
30
|
import { driver } from '../lib/driver.js';
|
|
31
31
|
import { experimentalGate } from '../lib/behavior-gate.js';
|
|
32
|
+
import { capabilityFamily } from '../lib/discovery-capabilities.js';
|
|
32
33
|
|
|
33
34
|
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
34
35
|
|
|
@@ -57,7 +58,7 @@ async function readDiscovery(): Promise<DiscoveryDoc | null> {
|
|
|
57
58
|
describe.skipIf(HTTP_SKIP)('experimental-tier-shape: §A schema discipline (RFC 0042 §A)', () => {
|
|
58
59
|
it('multiAgent.executionModel.tier (when present) MUST be one of {stable, experimental}', async (ctx) => {
|
|
59
60
|
const d = await readDiscovery();
|
|
60
|
-
const em = d
|
|
61
|
+
const em = capabilityFamily<{ executionModel?: { [k: string]: unknown; crossHostCausation?: Record<string, unknown>; replayDeterminism?: Record<string, unknown> } }>(d, 'multiAgent')?.executionModel;
|
|
61
62
|
if (em === undefined) {
|
|
62
63
|
ctx.skip();
|
|
63
64
|
return;
|
|
@@ -77,7 +78,7 @@ describe.skipIf(HTTP_SKIP)('experimental-tier-shape: §A schema discipline (RFC
|
|
|
77
78
|
|
|
78
79
|
it('when tier === "experimental", experimentalUntil MUST be present + valid date', async (ctx) => {
|
|
79
80
|
const d = await readDiscovery();
|
|
80
|
-
const em = d
|
|
81
|
+
const em = capabilityFamily<{ executionModel?: { [k: string]: unknown; crossHostCausation?: Record<string, unknown>; replayDeterminism?: Record<string, unknown> } }>(d, 'multiAgent')?.executionModel;
|
|
81
82
|
if (em === undefined || em.tier !== 'experimental') {
|
|
82
83
|
ctx.skip();
|
|
83
84
|
return;
|
|
@@ -112,7 +113,7 @@ describe.skipIf(HTTP_SKIP)('experimental-tier-shape: §A schema discipline (RFC
|
|
|
112
113
|
|
|
113
114
|
it('experimentalUntil MUST be ≤ 365 days in the future (sunset bound)', async (ctx) => {
|
|
114
115
|
const d = await readDiscovery();
|
|
115
|
-
const em = d
|
|
116
|
+
const em = capabilityFamily<{ executionModel?: { [k: string]: unknown; crossHostCausation?: Record<string, unknown>; replayDeterminism?: Record<string, unknown> } }>(d, 'multiAgent')?.executionModel;
|
|
116
117
|
if (em === undefined || em.tier !== 'experimental') {
|
|
117
118
|
ctx.skip();
|
|
118
119
|
return;
|
|
@@ -135,7 +136,7 @@ describe.skipIf(HTTP_SKIP)('experimental-tier-shape: §A schema discipline (RFC
|
|
|
135
136
|
|
|
136
137
|
it('sunset detection: experimentalUntil in the past is non-conformant', async (ctx) => {
|
|
137
138
|
const d = await readDiscovery();
|
|
138
|
-
const em = d
|
|
139
|
+
const em = capabilityFamily<{ executionModel?: { [k: string]: unknown; crossHostCausation?: Record<string, unknown>; replayDeterminism?: Record<string, unknown> } }>(d, 'multiAgent')?.executionModel;
|
|
139
140
|
if (em === undefined || em.tier !== 'experimental') {
|
|
140
141
|
ctx.skip();
|
|
141
142
|
return;
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* feedback-capability-shape — RFC 0056 §A. The `capabilities.feedback`
|
|
3
|
+
* advertisement block is either absent or a well-formed object.
|
|
4
|
+
*
|
|
5
|
+
* Status: ACTIVE (advertisement-shape; always runs). Behavioral coverage
|
|
6
|
+
* lives in the sibling `feedback-*.test.ts` scenarios, gated on
|
|
7
|
+
* `capabilities.feedback.supported`.
|
|
8
|
+
*
|
|
9
|
+
* @see RFCS/0056-run-feedback-and-annotation-event.md §A
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { describe, it, expect } from 'vitest';
|
|
13
|
+
import { driver } from '../lib/driver.js';
|
|
14
|
+
import { readFeedbackCap } from '../lib/feedback.js';
|
|
15
|
+
|
|
16
|
+
describe('feedback-capability-shape: advertisement (RFC 0056 §A)', () => {
|
|
17
|
+
it('capabilities.feedback is absent or a well-formed object', async () => {
|
|
18
|
+
const cap = await readFeedbackCap();
|
|
19
|
+
if (cap === null) return; // not advertised — valid
|
|
20
|
+
expect(
|
|
21
|
+
typeof cap.supported,
|
|
22
|
+
driver.describe('capabilities.schema.json §feedback', 'capabilities.feedback.supported MUST be a boolean when present'),
|
|
23
|
+
).toBe('boolean');
|
|
24
|
+
if (Array.isArray(cap.targets)) {
|
|
25
|
+
for (const t of cap.targets) {
|
|
26
|
+
expect(['run', 'event', 'node']).toContain(t);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
if (Array.isArray(cap.signals)) {
|
|
30
|
+
for (const s of cap.signals) {
|
|
31
|
+
expect(['rating', 'correction', 'label', 'flag']).toContain(s);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
});
|
|
35
|
+
});
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* feedback-correction-redaction — RFC 0056 §E + SECURITY/invariants.yaml
|
|
3
|
+
* `annotation-content-redaction`. `signal.correction` and `note` are
|
|
4
|
+
* untrusted user content; secret-shaped material MUST be redacted under
|
|
5
|
+
* SR-1 before persistence/listing/export.
|
|
6
|
+
*
|
|
7
|
+
* @see RFCS/0056-run-feedback-and-annotation-event.md §E
|
|
8
|
+
* @see SECURITY/invariants.yaml — annotation-content-redaction
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { describe, it, expect } from 'vitest';
|
|
12
|
+
import { driver } from '../lib/driver.js';
|
|
13
|
+
import { readFeedbackCap, seedRun } from '../lib/feedback.js';
|
|
14
|
+
|
|
15
|
+
const CANARY = 'sk-canary-rfc0056-do-not-leak-abc123';
|
|
16
|
+
|
|
17
|
+
describe('feedback-correction-redaction (RFC 0056 §E)', () => {
|
|
18
|
+
it('secret-shaped material in correction/note is redacted in the annotation list', async () => {
|
|
19
|
+
const cap = await readFeedbackCap();
|
|
20
|
+
if (cap?.supported !== true) return;
|
|
21
|
+
const runId = await seedRun('feedback-redact');
|
|
22
|
+
if (!runId) return;
|
|
23
|
+
const post = await driver.post(`/v1/runs/${runId}/annotations`, {
|
|
24
|
+
signal: { kind: 'correction', correction: `please use ${CANARY}` },
|
|
25
|
+
note: CANARY,
|
|
26
|
+
});
|
|
27
|
+
if (post.status === 501 || post.status === 404) return;
|
|
28
|
+
expect(post.status).toBe(201);
|
|
29
|
+
const list = await driver.get(`/v1/runs/${runId}/annotations`);
|
|
30
|
+
expect(
|
|
31
|
+
JSON.stringify(list.json ?? {}).includes(CANARY),
|
|
32
|
+
driver.describe('RFC 0056 §E', 'secret-shaped material MUST be redacted before persistence/listing (SR-1)'),
|
|
33
|
+
).toBe(false);
|
|
34
|
+
});
|
|
35
|
+
});
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* feedback-cross-tenant-isolation — RFC 0056 §E + SECURITY/invariants.yaml
|
|
3
|
+
* `annotation-cross-tenant-isolation`. A run's annotation list MUST contain
|
|
4
|
+
* only that run's annotations (mirrors CTI-1).
|
|
5
|
+
*
|
|
6
|
+
* The run-scoped check runs against any feedback host. The full cross-tenant
|
|
7
|
+
* proof (tenant B cannot read tenant A's run) needs a multi-tenant auth seam
|
|
8
|
+
* not yet standardized for this surface — that half soft-skips, mirroring
|
|
9
|
+
* `kv-cross-tenant-isolation`'s seam gate.
|
|
10
|
+
*
|
|
11
|
+
* @see RFCS/0056-run-feedback-and-annotation-event.md §E
|
|
12
|
+
* @see SECURITY/invariants.yaml — annotation-cross-tenant-isolation
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { describe, it, expect } from 'vitest';
|
|
16
|
+
import { driver } from '../lib/driver.js';
|
|
17
|
+
import { readFeedbackCap, seedRun } from '../lib/feedback.js';
|
|
18
|
+
|
|
19
|
+
describe('feedback-cross-tenant-isolation (RFC 0056 §E)', () => {
|
|
20
|
+
it('a run\'s annotation list contains only that run\'s annotations', async () => {
|
|
21
|
+
const cap = await readFeedbackCap();
|
|
22
|
+
if (cap?.supported !== true) return;
|
|
23
|
+
const runId = await seedRun('feedback-cti');
|
|
24
|
+
if (!runId) return;
|
|
25
|
+
const post = await driver.post(`/v1/runs/${runId}/annotations`, { signal: { kind: 'label', label: 'cti-probe' } });
|
|
26
|
+
if (post.status === 501 || post.status === 404) return;
|
|
27
|
+
expect(post.status).toBe(201);
|
|
28
|
+
const list = await driver.get(`/v1/runs/${runId}/annotations`);
|
|
29
|
+
const ann = (list.json as { annotations?: Array<{ target?: { runId?: string } }> } | undefined)?.annotations ?? [];
|
|
30
|
+
for (const a of ann) {
|
|
31
|
+
expect(
|
|
32
|
+
a.target?.runId,
|
|
33
|
+
driver.describe('RFC 0056 §E', 'an annotation list MUST contain only this run\'s annotations (CTI-1)'),
|
|
34
|
+
).toBe(runId);
|
|
35
|
+
}
|
|
36
|
+
});
|
|
37
|
+
});
|