@openwop/openwop-conformance 1.5.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +2 -2
- package/api/asyncapi.yaml +25 -4
- package/api/openapi.yaml +371 -0
- package/coverage.md +31 -4
- package/fixtures/conformance-phase4-nondet-tool.json +53 -0
- package/fixtures/conformance-phase4-replay-divergence.json +40 -0
- package/fixtures.md +5 -3
- package/package.json +1 -1
- package/schemas/README.md +4 -0
- package/schemas/annotation-create.schema.json +37 -0
- package/schemas/annotation.schema.json +56 -0
- package/schemas/capabilities.schema.json +191 -3
- package/schemas/credential-reference.schema.json +21 -0
- package/schemas/node-pack-manifest.schema.json +112 -1
- package/schemas/run-diff-response.schema.json +64 -0
- package/schemas/run-event-payloads.schema.json +104 -2
- package/schemas/run-event.schema.json +8 -1
- package/schemas/run-snapshot.schema.json +11 -0
- package/src/lib/behavior-gate.ts +51 -0
- package/src/lib/driver.ts +13 -1
- package/src/lib/feedback.ts +31 -0
- package/src/lib/saml-idp.ts +179 -0
- package/src/scenarios/approval-gate-events.test.ts +61 -0
- package/src/scenarios/approval-gate-flow.test.ts +68 -0
- package/src/scenarios/auth-saml-profile.test.ts +119 -0
- package/src/scenarios/auth-scim-profile.test.ts +65 -0
- package/src/scenarios/authorization-fail-closed.test.ts +80 -0
- package/src/scenarios/authorization-roles-shape.test.ts +83 -0
- package/src/scenarios/connector-manifest-validity.test.ts +142 -0
- package/src/scenarios/credential-payload-redaction.test.ts +93 -0
- package/src/scenarios/credentials-capability-shape.test.ts +90 -0
- package/src/scenarios/cross-engine-append-behavior.test.ts +204 -0
- package/src/scenarios/cross-host-traceparent-propagation.test.ts +13 -6
- package/src/scenarios/cross-workspace-isolation.test.ts +72 -0
- package/src/scenarios/deadletter-capability-shape.test.ts +59 -0
- package/src/scenarios/deadletter-retry-exhaustion.test.ts +62 -0
- package/src/scenarios/experimental-tier-shape.test.ts +192 -0
- package/src/scenarios/feedback-capability-shape.test.ts +35 -0
- package/src/scenarios/feedback-correction-redaction.test.ts +35 -0
- package/src/scenarios/feedback-cross-tenant-isolation.test.ts +37 -0
- package/src/scenarios/feedback-fork-not-copied.test.ts +40 -0
- package/src/scenarios/feedback-on-terminal-run.test.ts +32 -0
- package/src/scenarios/feedback-record-and-list.test.ts +32 -0
- package/src/scenarios/feedback-unsupported-501.test.ts +32 -0
- package/src/scenarios/identity-owner-shape.test.ts +64 -0
- package/src/scenarios/multi-agent-confidence-escalation.test.ts +13 -12
- package/src/scenarios/multi-agent-memory-lifecycle.test.ts +87 -12
- package/src/scenarios/multi-region-idempotency-behavior.test.ts +203 -0
- package/src/scenarios/oauth-capability-shape.test.ts +97 -0
- package/src/scenarios/oauth-connector-redaction.test.ts +91 -0
- package/src/scenarios/pack-registry-isolation.test.ts +108 -0
- package/src/scenarios/pack-registry-publish.test.ts +1 -1
- package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +126 -0
- package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +183 -0
- package/src/scenarios/redaction.test.ts +4 -1
- package/src/scenarios/replay-divergence-at-refusal.test.ts +187 -7
- package/src/scenarios/replay-observable-sequence-determinism.test.ts +20 -6
- package/src/scenarios/run-diff.test.ts +143 -0
- package/src/scenarios/sandbox-capability-gate-respected.test.ts +7 -1
- package/src/scenarios/sandbox-memory-cap.test.ts +7 -5
- package/src/scenarios/sandbox-mvp-behavior.test.ts +280 -0
- package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +7 -1
- package/src/scenarios/sandbox-no-host-env-leak.test.ts +5 -1
- package/src/scenarios/sandbox-no-host-fs-escape.test.ts +9 -1
- package/src/scenarios/sandbox-no-host-process-escape.test.ts +5 -1
- package/src/scenarios/sandbox-no-network-escape.test.ts +5 -1
- package/src/scenarios/sandbox-timeout-cap.test.ts +7 -5
- package/src/scenarios/scheduling-capability-shape.test.ts +81 -0
- package/src/scenarios/scheduling-cron-fires-once.test.ts +66 -0
- package/src/scenarios/secret-leakage-otel-attribute.test.ts +241 -0
- package/src/scenarios/spec-corpus-validity.test.ts +6 -3
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* feedback-correction-redaction — RFC 0056 §E + SECURITY/invariants.yaml
|
|
3
|
+
* `annotation-content-redaction`. `signal.correction` and `note` are
|
|
4
|
+
* untrusted user content; secret-shaped material MUST be redacted under
|
|
5
|
+
* SR-1 before persistence/listing/export.
|
|
6
|
+
*
|
|
7
|
+
* @see RFCS/0056-run-feedback-and-annotation-event.md §E
|
|
8
|
+
* @see SECURITY/invariants.yaml — annotation-content-redaction
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { describe, it, expect } from 'vitest';
|
|
12
|
+
import { driver } from '../lib/driver.js';
|
|
13
|
+
import { readFeedbackCap, seedRun } from '../lib/feedback.js';
|
|
14
|
+
|
|
15
|
+
const CANARY = 'sk-canary-rfc0056-do-not-leak-abc123';
|
|
16
|
+
|
|
17
|
+
describe('feedback-correction-redaction (RFC 0056 §E)', () => {
|
|
18
|
+
it('secret-shaped material in correction/note is redacted in the annotation list', async () => {
|
|
19
|
+
const cap = await readFeedbackCap();
|
|
20
|
+
if (cap?.supported !== true) return;
|
|
21
|
+
const runId = await seedRun('feedback-redact');
|
|
22
|
+
if (!runId) return;
|
|
23
|
+
const post = await driver.post(`/v1/runs/${runId}/annotations`, {
|
|
24
|
+
signal: { kind: 'correction', correction: `please use ${CANARY}` },
|
|
25
|
+
note: CANARY,
|
|
26
|
+
});
|
|
27
|
+
if (post.status === 501 || post.status === 404) return;
|
|
28
|
+
expect(post.status).toBe(201);
|
|
29
|
+
const list = await driver.get(`/v1/runs/${runId}/annotations`);
|
|
30
|
+
expect(
|
|
31
|
+
JSON.stringify(list.json ?? {}).includes(CANARY),
|
|
32
|
+
driver.describe('RFC 0056 §E', 'secret-shaped material MUST be redacted before persistence/listing (SR-1)'),
|
|
33
|
+
).toBe(false);
|
|
34
|
+
});
|
|
35
|
+
});
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* feedback-cross-tenant-isolation — RFC 0056 §E + SECURITY/invariants.yaml
|
|
3
|
+
* `annotation-cross-tenant-isolation`. A run's annotation list MUST contain
|
|
4
|
+
* only that run's annotations (mirrors CTI-1).
|
|
5
|
+
*
|
|
6
|
+
* The run-scoped check runs against any feedback host. The full cross-tenant
|
|
7
|
+
* proof (tenant B cannot read tenant A's run) needs a multi-tenant auth seam
|
|
8
|
+
* not yet standardized for this surface — that half soft-skips, mirroring
|
|
9
|
+
* `kv-cross-tenant-isolation`'s seam gate.
|
|
10
|
+
*
|
|
11
|
+
* @see RFCS/0056-run-feedback-and-annotation-event.md §E
|
|
12
|
+
* @see SECURITY/invariants.yaml — annotation-cross-tenant-isolation
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { describe, it, expect } from 'vitest';
|
|
16
|
+
import { driver } from '../lib/driver.js';
|
|
17
|
+
import { readFeedbackCap, seedRun } from '../lib/feedback.js';
|
|
18
|
+
|
|
19
|
+
describe('feedback-cross-tenant-isolation (RFC 0056 §E)', () => {
|
|
20
|
+
it('a run\'s annotation list contains only that run\'s annotations', async () => {
|
|
21
|
+
const cap = await readFeedbackCap();
|
|
22
|
+
if (cap?.supported !== true) return;
|
|
23
|
+
const runId = await seedRun('feedback-cti');
|
|
24
|
+
if (!runId) return;
|
|
25
|
+
const post = await driver.post(`/v1/runs/${runId}/annotations`, { signal: { kind: 'label', label: 'cti-probe' } });
|
|
26
|
+
if (post.status === 501 || post.status === 404) return;
|
|
27
|
+
expect(post.status).toBe(201);
|
|
28
|
+
const list = await driver.get(`/v1/runs/${runId}/annotations`);
|
|
29
|
+
const ann = (list.json as { annotations?: Array<{ target?: { runId?: string } }> } | undefined)?.annotations ?? [];
|
|
30
|
+
for (const a of ann) {
|
|
31
|
+
expect(
|
|
32
|
+
a.target?.runId,
|
|
33
|
+
driver.describe('RFC 0056 §E', 'an annotation list MUST contain only this run\'s annotations (CTI-1)'),
|
|
34
|
+
).toBe(runId);
|
|
35
|
+
}
|
|
36
|
+
});
|
|
37
|
+
});
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* feedback-fork-not-copied — RFC 0056 §D. Annotations are a per-run
|
|
3
|
+
* side-store, NOT replayable event-log entries — so a fork of an annotated
|
|
4
|
+
* run starts with ZERO annotations. Gated on feedback + fork; soft-skips
|
|
5
|
+
* when either is unavailable.
|
|
6
|
+
*
|
|
7
|
+
* @see RFCS/0056-run-feedback-and-annotation-event.md §D
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { describe, it, expect } from 'vitest';
|
|
11
|
+
import { driver } from '../lib/driver.js';
|
|
12
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
13
|
+
import { readFeedbackCap, seedRun } from '../lib/feedback.js';
|
|
14
|
+
|
|
15
|
+
describe('feedback-fork-not-copied (RFC 0056 §D)', () => {
|
|
16
|
+
it('a fork of an annotated run starts with zero annotations', async () => {
|
|
17
|
+
const cap = await readFeedbackCap();
|
|
18
|
+
if (cap?.supported !== true) return;
|
|
19
|
+
const runId = await seedRun('feedback-fork');
|
|
20
|
+
if (!runId) return;
|
|
21
|
+
const post = await driver.post(`/v1/runs/${runId}/annotations`, { signal: { kind: 'flag' } });
|
|
22
|
+
if (post.status === 501 || post.status === 404) return;
|
|
23
|
+
expect(post.status).toBe(201);
|
|
24
|
+
try {
|
|
25
|
+
await pollUntilTerminal(runId, { timeoutMs: 10_000 });
|
|
26
|
+
} catch {
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
const fork = await driver.post(`/v1/runs/${runId}:fork`, { fromSeq: 0, mode: 'branch' });
|
|
30
|
+
if (fork.status !== 200 && fork.status !== 201) return; // fork unsupported — soft-skip
|
|
31
|
+
const forkId = (fork.json as { runId?: string } | undefined)?.runId;
|
|
32
|
+
if (!forkId) return;
|
|
33
|
+
const list = await driver.get(`/v1/runs/${forkId}/annotations`);
|
|
34
|
+
const ann = (list.json as { annotations?: unknown[] } | undefined)?.annotations ?? [];
|
|
35
|
+
expect(
|
|
36
|
+
ann.length,
|
|
37
|
+
driver.describe('RFC 0056 §D', 'annotations are a side-store and MUST NOT be copied into a fork'),
|
|
38
|
+
).toBe(0);
|
|
39
|
+
});
|
|
40
|
+
});
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* feedback-on-terminal-run — RFC 0056 §C. An annotation on a COMPLETED run
|
|
3
|
+
* is accepted (proves feedback is non-blocking and post-hoc). Gated on
|
|
4
|
+
* `capabilities.feedback.supported`; soft-skips when a run can't be seeded.
|
|
5
|
+
*
|
|
6
|
+
* @see RFCS/0056-run-feedback-and-annotation-event.md §C
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { describe, it, expect } from 'vitest';
|
|
10
|
+
import { driver } from '../lib/driver.js';
|
|
11
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
12
|
+
import { readFeedbackCap, seedRun } from '../lib/feedback.js';
|
|
13
|
+
|
|
14
|
+
describe('feedback-on-terminal-run (RFC 0056 §C)', () => {
|
|
15
|
+
it('annotating a terminal run is accepted', async () => {
|
|
16
|
+
const cap = await readFeedbackCap();
|
|
17
|
+
if (cap?.supported !== true) return;
|
|
18
|
+
const runId = await seedRun('feedback-terminal');
|
|
19
|
+
if (!runId) return;
|
|
20
|
+
try {
|
|
21
|
+
await pollUntilTerminal(runId, { timeoutMs: 10_000 });
|
|
22
|
+
} catch {
|
|
23
|
+
return; // run didn't reach terminal in time — soft-skip
|
|
24
|
+
}
|
|
25
|
+
const post = await driver.post(`/v1/runs/${runId}/annotations`, { signal: { kind: 'flag' }, note: 'post-hoc review' });
|
|
26
|
+
if (post.status === 501 || post.status === 404) return;
|
|
27
|
+
expect(
|
|
28
|
+
post.status,
|
|
29
|
+
driver.describe('RFC 0056 §C', 'a host MUST accept an annotation on a terminal run'),
|
|
30
|
+
).toBe(201);
|
|
31
|
+
});
|
|
32
|
+
});
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* feedback-record-and-list — RFC 0056 §C. POST an annotation, then GET
|
|
3
|
+
* lists it back. Gated on `capabilities.feedback.supported` + the
|
|
4
|
+
* `conformance-a` seed fixture; soft-skips otherwise.
|
|
5
|
+
*
|
|
6
|
+
* @see RFCS/0056-run-feedback-and-annotation-event.md §C
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { describe, it, expect } from 'vitest';
|
|
10
|
+
import { driver } from '../lib/driver.js';
|
|
11
|
+
import { readFeedbackCap, seedRun } from '../lib/feedback.js';
|
|
12
|
+
|
|
13
|
+
describe('feedback-record-and-list (RFC 0056 §C)', () => {
|
|
14
|
+
it('POST an annotation then GET returns it', async () => {
|
|
15
|
+
const cap = await readFeedbackCap();
|
|
16
|
+
if (cap?.supported !== true) return;
|
|
17
|
+
const runId = await seedRun('feedback-rl');
|
|
18
|
+
if (!runId) return;
|
|
19
|
+
const post = await driver.post(`/v1/runs/${runId}/annotations`, { signal: { kind: 'rating', rating: 5 } });
|
|
20
|
+
if (post.status === 501 || post.status === 404) return;
|
|
21
|
+
expect(
|
|
22
|
+
post.status,
|
|
23
|
+
driver.describe('RFC 0056 §C', 'POST annotation returns 201 with the persisted annotation'),
|
|
24
|
+
).toBe(201);
|
|
25
|
+
const created = post.json as { annotationId?: string };
|
|
26
|
+
expect(typeof created.annotationId).toBe('string');
|
|
27
|
+
const list = await driver.get(`/v1/runs/${runId}/annotations`);
|
|
28
|
+
expect(list.status).toBe(200);
|
|
29
|
+
const ann = (list.json as { annotations?: Array<{ annotationId?: string }> } | undefined)?.annotations ?? [];
|
|
30
|
+
expect(ann.some((a) => a.annotationId === created.annotationId)).toBe(true);
|
|
31
|
+
});
|
|
32
|
+
});
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* feedback-unsupported-501 — RFC 0056 §C. A host that does NOT advertise
|
|
3
|
+
* `capabilities.feedback.supported` MUST return `501 capability_not_provided`
|
|
4
|
+
* on the annotation endpoints (the honest signal, per `capabilities.md`) —
|
|
5
|
+
* not silently 404 the route.
|
|
6
|
+
*
|
|
7
|
+
* Soft-skips when the host advertises feedback (501 is N/A) or when the
|
|
8
|
+
* route is entirely absent (404/405 — host predates RFC 0056).
|
|
9
|
+
*
|
|
10
|
+
* @see RFCS/0056-run-feedback-and-annotation-event.md §C
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { describe, it, expect } from 'vitest';
|
|
14
|
+
import { driver } from '../lib/driver.js';
|
|
15
|
+
import { readFeedbackCap } from '../lib/feedback.js';
|
|
16
|
+
|
|
17
|
+
describe('feedback-unsupported-501 (RFC 0056 §C)', () => {
|
|
18
|
+
it('POST annotations returns 501 capability_not_provided when feedback is unadvertised', async () => {
|
|
19
|
+
const cap = await readFeedbackCap();
|
|
20
|
+
if (cap?.supported === true) return; // host supports feedback — 501 N/A
|
|
21
|
+
const res = await driver.post('/v1/runs/probe-run-rfc0056/annotations', {
|
|
22
|
+
signal: { kind: 'flag' },
|
|
23
|
+
});
|
|
24
|
+
if (res.status === 404 || res.status === 405) return; // route absent — host predates RFC 0056
|
|
25
|
+
expect(
|
|
26
|
+
res.status,
|
|
27
|
+
driver.describe('rest-endpoints.md / RFC 0056 §C', 'unadvertised feedback MUST return 501, not 404'),
|
|
28
|
+
).toBe(501);
|
|
29
|
+
const code = (res.json as { error?: string } | undefined)?.error;
|
|
30
|
+
expect(code).toBe('capability_not_provided');
|
|
31
|
+
});
|
|
32
|
+
});
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* identity-owner-shape — RFC 0048 §C verification.
|
|
3
|
+
*
|
|
4
|
+
* Status: DRAFT. RFC 0048 (tenant·workspace·principal identity model) is
|
|
5
|
+
* `Draft`. The optional `RunSnapshot.owner` triple has landed in
|
|
6
|
+
* `schemas/run-snapshot.schema.json`.
|
|
7
|
+
*
|
|
8
|
+
* Server-free schema validation of the owner triple:
|
|
9
|
+
* - Positive: `{ tenant }` and `{ tenant, workspace, principal }` validate.
|
|
10
|
+
* - Negative: missing `tenant` (required), or an unknown property, is rejected.
|
|
11
|
+
*
|
|
12
|
+
* The owner subschema is self-contained (no external $ref), so it compiles
|
|
13
|
+
* standalone via ajv.
|
|
14
|
+
*
|
|
15
|
+
* @see RFCS/0048-tenant-workspace-principal-identity-model.md
|
|
16
|
+
* @see schemas/run-snapshot.schema.json properties.owner
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { describe, it, expect } from 'vitest';
|
|
20
|
+
import { readFileSync } from 'node:fs';
|
|
21
|
+
import { join } from 'node:path';
|
|
22
|
+
import Ajv2020 from 'ajv/dist/2020.js';
|
|
23
|
+
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
24
|
+
|
|
25
|
+
interface SnapshotSchema {
|
|
26
|
+
$schema: string;
|
|
27
|
+
properties: { owner?: Record<string, unknown> };
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const snapshot = JSON.parse(
|
|
31
|
+
readFileSync(join(SCHEMAS_DIR, 'run-snapshot.schema.json'), 'utf8'),
|
|
32
|
+
) as SnapshotSchema;
|
|
33
|
+
|
|
34
|
+
describe('category: identity owner-triple shape (RFC 0048 §C)', () => {
|
|
35
|
+
it('run-snapshot.schema.json defines an optional owner triple', () => {
|
|
36
|
+
expect(
|
|
37
|
+
snapshot.properties.owner,
|
|
38
|
+
'RFC 0048 §C: RunSnapshot MUST define an optional `owner` object',
|
|
39
|
+
).toBeDefined();
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
const ajv = new Ajv2020({ allErrors: true, strict: false });
|
|
43
|
+
const ownerSchema = { $schema: snapshot.$schema, ...(snapshot.properties.owner as Record<string, unknown>) };
|
|
44
|
+
const validate = ajv.compile(ownerSchema);
|
|
45
|
+
|
|
46
|
+
it('positive: tenant-only owner validates', () => {
|
|
47
|
+
expect(validate({ tenant: 'acme' }), JSON.stringify(validate.errors)).toBe(true);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it('positive: full triple validates', () => {
|
|
51
|
+
expect(
|
|
52
|
+
validate({ tenant: 'acme', workspace: 'ws-eng', principal: 'user_42' }),
|
|
53
|
+
JSON.stringify(validate.errors),
|
|
54
|
+
).toBe(true);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it('negative: owner missing tenant is rejected (tenant is required)', () => {
|
|
58
|
+
expect(validate({ workspace: 'ws-eng' })).toBe(false);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it('negative: unknown owner property is rejected (additionalProperties:false)', () => {
|
|
62
|
+
expect(validate({ tenant: 'acme', role: 'admin' })).toBe(false);
|
|
63
|
+
});
|
|
64
|
+
});
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* multi-agent-confidence-escalation — RFC 0039 §A behavioral.
|
|
3
3
|
*
|
|
4
|
-
* Status: ACTIVE (advertisement-shape + behavioral). RFC 0039
|
|
5
|
-
*
|
|
6
|
-
* this scenario.
|
|
4
|
+
* Status: ACTIVE (advertisement-shape + behavioral). RFC 0039
|
|
5
|
+
* (multi-agent execution model `version: 2`) filed Draft → graduated
|
|
6
|
+
* Active 2026-05-22 in the same commit chain as this scenario.
|
|
7
|
+
* Capability-gated on
|
|
7
8
|
* `capabilities.multiAgent.executionModel.supported: true` AND
|
|
8
9
|
* `capabilities.multiAgent.executionModel.version >= 2` AND fixture
|
|
9
|
-
* availability. Hosts that advertise only
|
|
10
|
-
* cleanly — the confidence-floor MUST applies only at version >= 2
|
|
10
|
+
* availability. Hosts that advertise only `version: 1` soft-skip
|
|
11
|
+
* cleanly — the confidence-floor MUST applies only at `version >= 2`.
|
|
11
12
|
*
|
|
12
|
-
* Asserts (behavioral when host advertises
|
|
13
|
+
* Asserts (behavioral when host advertises `version >= 2`):
|
|
13
14
|
*
|
|
14
15
|
* 1. Advertisement shape: confidenceEscalationFloor (when present) MUST be
|
|
15
16
|
* a number in [0.5, 1.0]; floor < 0.5 is non-conformant per RFC 0039 §A.
|
|
@@ -37,11 +38,11 @@
|
|
|
37
38
|
* interrupt fires AND BEFORE any `core.workflowChain.event` with
|
|
38
39
|
* `phase: 'dispatch.began'` for the escalated decision's intended
|
|
39
40
|
* next-worker"). This is the load-bearing test that distinguishes
|
|
40
|
-
*
|
|
41
|
-
* hosts gate on confidence.
|
|
41
|
+
* `version: 2` from `version: 1`: `version: 1` hosts dispatch
|
|
42
|
+
* unconditionally; `version: 2` hosts gate on confidence.
|
|
42
43
|
*
|
|
43
44
|
* @see RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §A
|
|
44
|
-
* @see spec/v1/multi-agent-execution.md §"Confidence escalation (RFC 0039
|
|
45
|
+
* @see spec/v1/multi-agent-execution.md §"Confidence escalation (RFC 0039)"
|
|
45
46
|
* @see schemas/run-event-payloads.schema.json §coreWorkflowChainConfidenceEscalated
|
|
46
47
|
*/
|
|
47
48
|
|
|
@@ -103,14 +104,14 @@ describe.skipIf(BEHAVIORAL_SKIP)('multi-agent-confidence-escalation: behavioral
|
|
|
103
104
|
const supported = d?.capabilities?.multiAgent?.executionModel?.supported === true;
|
|
104
105
|
const versionRaw = d?.capabilities?.multiAgent?.executionModel?.version;
|
|
105
106
|
const version = typeof versionRaw === 'number' ? versionRaw : 0;
|
|
106
|
-
if (!supported || version < 2) return; // soft-skip —
|
|
107
|
+
if (!supported || version < 2) return; // soft-skip — `version: 1` hosts pass via this absence
|
|
107
108
|
|
|
108
109
|
const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
|
|
109
110
|
expect(create.status).toBe(201);
|
|
110
111
|
const runId = (create.json as { runId: string }).runId;
|
|
111
112
|
|
|
112
113
|
const terminal = await pollUntilTerminal(runId);
|
|
113
|
-
//
|
|
114
|
+
// RFC 0039 escalation suspends the parent — NOT a terminal `completed`.
|
|
114
115
|
// The conformance pollUntilTerminal returns when the run reaches any
|
|
115
116
|
// settled status. RFC 0039 §A gives hosts a choice: clarify-kind
|
|
116
117
|
// escalation (→ waiting-clarification) OR escalate-kind approval
|
|
@@ -188,7 +189,7 @@ describe.skipIf(BEHAVIORAL_SKIP)('multi-agent-confidence-escalation: behavioral
|
|
|
188
189
|
'confidence-escalated causationId MUST point at the runOrchestrator.decided that surfaced the low-confidence decision',
|
|
189
190
|
).toBe('runOrchestrator.decided');
|
|
190
191
|
|
|
191
|
-
// Load-bearing: NO dispatch event fired.
|
|
192
|
+
// Load-bearing: NO dispatch event fired. RFC 0039 gates BEFORE the loop.
|
|
192
193
|
const chainEvents = events.filter((e) => e.type === 'core.workflowChain.event');
|
|
193
194
|
expect(
|
|
194
195
|
chainEvents.length,
|
|
@@ -108,17 +108,92 @@ describe.skipIf(HTTP_SKIP)('multi-agent-memory-lifecycle: behavioral (RFC 0039
|
|
|
108
108
|
// Until a memory-advertising Phase 2 host wires the seam, the contract
|
|
109
109
|
// is documentation-only — surfaced as `todo` so test reporters track
|
|
110
110
|
// the gap rather than reporting a vacuous PASS.
|
|
111
|
-
|
|
111
|
+
// MAE-2 is still out of stable profile via RFC 0042 §B (experimental
|
|
112
|
+
// tier): RFC 0039 §B Half B (MAE-2 + MAE-3) landed on MyndHyve
|
|
113
|
+
// 2026-05-23 via commit `a51f7bbd` (`snapshotAtSeq()` +
|
|
114
|
+
// `crossChildMemoryConcurrency: 'strict'`). The MAE-2 cross-run-ttl-
|
|
115
|
+
// roundtrip seam (POST /v1/host/sample/test/memory/cross-run-ttl-
|
|
116
|
+
// roundtrip) is still open per host-sample-test-seams.md §"Open seams"
|
|
117
|
+
// — no host has wired the seam endpoint yet, so the behavioral
|
|
118
|
+
// assertion stays `it.skip`. Hosts that implement Half B SHOULD
|
|
119
|
+
// advertise `multiAgent.executionModel.tier: 'experimental'` per
|
|
120
|
+
// RFC 0042 §A until the seam contract is wired.
|
|
121
|
+
it.skip('MAE-2 cross-run TTL: child write expiresAt MUST be anchored at child write time, not parent start — out of stable profile via RFC 0042');
|
|
112
122
|
|
|
113
|
-
//
|
|
114
|
-
//
|
|
115
|
-
//
|
|
116
|
-
//
|
|
117
|
-
//
|
|
118
|
-
//
|
|
119
|
-
//
|
|
120
|
-
//
|
|
121
|
-
//
|
|
122
|
-
|
|
123
|
-
|
|
123
|
+
// MAE-3 flipped to behavioral 2026-05-25 — MyndHyve workflow-runtime
|
|
124
|
+
// revision `00206-tdh` advertises Phase 2 + memory and honors the
|
|
125
|
+
// POST /v1/runs/{runId}:fork mode:replay contract per
|
|
126
|
+
// host-sample-test-seams.md §"Canonical-endpoint conformance hooks"
|
|
127
|
+
// §9. The seam reuses the canonical fork endpoint plus the
|
|
128
|
+
// OPENWOP_TEST_EXPIRED_REPLAY_RUN_ID env-var convention (parallel
|
|
129
|
+
// naming to OPENWOP_TEST_EXPIRED_RUN_ID used by
|
|
130
|
+
// production-retention-expiry). Soft-skips on Phase 1 hosts, Phase 2
|
|
131
|
+
// hosts without memory, and hosts that have not seeded the env var.
|
|
132
|
+
it('MAE-3 replay snapshot refusal: fork mode:replay against a past-retention runId MUST return 422 replay_memory_snapshot_unavailable with documented envelope; silent substitution is non-conformant', async (ctx) => {
|
|
133
|
+
const d = await readDiscovery();
|
|
134
|
+
if (d === null) {
|
|
135
|
+
ctx.skip();
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
138
|
+
const v = d.capabilities?.multiAgent?.executionModel?.version;
|
|
139
|
+
const memorySupported = d.capabilities?.memory?.supported;
|
|
140
|
+
const phase2OrLater = typeof v === 'number' && v >= 2;
|
|
141
|
+
const expiredRunId = process.env.OPENWOP_TEST_EXPIRED_REPLAY_RUN_ID;
|
|
142
|
+
if (!phase2OrLater || memorySupported !== true || !expiredRunId) {
|
|
143
|
+
ctx.skip();
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const fromSeq = 0;
|
|
148
|
+
const res = await driver.post(`/v1/runs/${encodeURIComponent(expiredRunId)}:fork`, {
|
|
149
|
+
mode: 'replay',
|
|
150
|
+
fromSeq,
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
expect(
|
|
154
|
+
res.status,
|
|
155
|
+
driver.describe(
|
|
156
|
+
'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B MAE-3',
|
|
157
|
+
'fork mode:replay against a past-retention runId MUST refuse with 422; silent substitution of current memory is non-conformant',
|
|
158
|
+
),
|
|
159
|
+
).toBe(422);
|
|
160
|
+
|
|
161
|
+
const body = res.json as {
|
|
162
|
+
error?: unknown;
|
|
163
|
+
details?: { fromSeq?: unknown; sourceRunId?: unknown; reason?: unknown };
|
|
164
|
+
} | null;
|
|
165
|
+
|
|
166
|
+
expect(
|
|
167
|
+
body?.error,
|
|
168
|
+
driver.describe(
|
|
169
|
+
'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B MAE-3',
|
|
170
|
+
'refusal envelope error code MUST be "replay_memory_snapshot_unavailable" (distinct from the pre-flight invalid_from_seq gate)',
|
|
171
|
+
),
|
|
172
|
+
).toBe('replay_memory_snapshot_unavailable');
|
|
173
|
+
|
|
174
|
+
expect(
|
|
175
|
+
body?.details?.fromSeq,
|
|
176
|
+
driver.describe(
|
|
177
|
+
'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B MAE-3',
|
|
178
|
+
'refusal envelope details.fromSeq MUST echo the requested fromSeq',
|
|
179
|
+
),
|
|
180
|
+
).toBe(fromSeq);
|
|
181
|
+
|
|
182
|
+
expect(
|
|
183
|
+
body?.details?.sourceRunId,
|
|
184
|
+
driver.describe(
|
|
185
|
+
'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B MAE-3',
|
|
186
|
+
'refusal envelope details.sourceRunId MUST echo the runId from the URL',
|
|
187
|
+
),
|
|
188
|
+
).toBe(expiredRunId);
|
|
189
|
+
|
|
190
|
+
const reason = body?.details?.reason;
|
|
191
|
+
expect(
|
|
192
|
+
reason === 'retention_expired' || reason === 'event_log_unavailable',
|
|
193
|
+
driver.describe(
|
|
194
|
+
'RFCS/0039-multi-agent-confidence-and-memory-lifecycle.md §B MAE-3',
|
|
195
|
+
'refusal envelope details.reason MUST be one of {"retention_expired", "event_log_unavailable"}',
|
|
196
|
+
),
|
|
197
|
+
).toBe(true);
|
|
198
|
+
});
|
|
124
199
|
});
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* multi-region-idempotency-behavior — RFC 0036 §C convergence-rule behavioral probe.
|
|
3
|
+
*
|
|
4
|
+
* Companion to `multi-region-idempotency.test.ts` which carries the
|
|
5
|
+
* advertisement-shape probes. This file exercises the canonical convergence
|
|
6
|
+
* algorithm specified by `spec/v1/idempotency.md` §"Multi-region idempotency
|
|
7
|
+
* annex" via the host-extension test seam at:
|
|
8
|
+
*
|
|
9
|
+
* POST /v1/host/sample/test/multi-region/simulate-partition
|
|
10
|
+
*
|
|
11
|
+
* The seam is conformance-only (host-extension namespace), gated on the
|
|
12
|
+
* host's `OPENWOP_TEST_MULTI_REGION_SIMULATOR=true` env var. The seam itself
|
|
13
|
+
* is OPTIONAL — hosts that don't expose it soft-skip; hosts that DO expose
|
|
14
|
+
* it MUST honor the annex's convergence rule:
|
|
15
|
+
*
|
|
16
|
+
* 1. Given ≥2 conflicting `ConflictClaim` records sharing
|
|
17
|
+
* `(tenantId, endpoint, key)`, the host's resolver MUST return the
|
|
18
|
+
* lex-min `runId` as the winner.
|
|
19
|
+
* 2. Every region (including the winner's) gets a cache redirect entry
|
|
20
|
+
* pointing at the winner's runId.
|
|
21
|
+
* 3. The loser's cancel reason MUST be the canonical string
|
|
22
|
+
* `cross_region_dedup_loss`.
|
|
23
|
+
* 4. The resolver MUST be order-invariant — shuffling the input claims
|
|
24
|
+
* MUST produce the same winner.
|
|
25
|
+
* 5. Cross-region partition simulation: same idempotency-key submitted
|
|
26
|
+
* to 2+ regions simultaneously converges to ONE survivor per the
|
|
27
|
+
* lex-min rule, with no coordination required.
|
|
28
|
+
*
|
|
29
|
+
* @see RFCS/0036-multi-region-and-cross-engine-guarantees.md §C
|
|
30
|
+
* @see spec/v1/idempotency.md §"Multi-region idempotency annex"
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
import { describe, it, expect } from 'vitest';
|
|
34
|
+
import { driver } from '../lib/driver.js';
|
|
35
|
+
|
|
36
|
+
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
37
|
+
|
|
38
|
+
interface ConflictClaim {
|
|
39
|
+
runId: string;
|
|
40
|
+
tenantId: string;
|
|
41
|
+
endpoint: string;
|
|
42
|
+
key: string;
|
|
43
|
+
region: string;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
interface ConvergenceResult {
|
|
47
|
+
winner?: ConflictClaim;
|
|
48
|
+
losers?: ConflictClaim[];
|
|
49
|
+
cacheRedirects?: Array<{ region: string; cacheKey: string; redirectToRunId: string }>;
|
|
50
|
+
loserCancelReason?: string;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async function simulatePartition(claims: ConflictClaim[]): Promise<{ status: number; body: ConvergenceResult }> {
|
|
54
|
+
const res = await driver.post('/v1/host/sample/test/multi-region/simulate-partition', { claims });
|
|
55
|
+
return { status: res.status, body: (res.json as ConvergenceResult) ?? {} };
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
describe.skipIf(HTTP_SKIP)('multi-region-idempotency-behavior: convergence rule (RFC 0036 §C)', () => {
|
|
59
|
+
it('two-region conflict resolves to the lex-min runId per annex §"Convergence rule"', async (ctx) => {
|
|
60
|
+
const probe = await simulatePartition([
|
|
61
|
+
{ runId: 'run-b-east', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-1', region: 'us-east-1' },
|
|
62
|
+
{ runId: 'run-a-west', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-1', region: 'eu-west-1' },
|
|
63
|
+
]);
|
|
64
|
+
if (probe.status === 404) {
|
|
65
|
+
ctx.skip(); // host doesn't expose the simulator seam
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
expect(
|
|
69
|
+
probe.status,
|
|
70
|
+
driver.describe(
|
|
71
|
+
'idempotency.md §"Multi-region idempotency annex"',
|
|
72
|
+
'simulate-partition seam MUST return 200 when ≥2 conflicting claims are submitted',
|
|
73
|
+
),
|
|
74
|
+
).toBe(200);
|
|
75
|
+
expect(
|
|
76
|
+
probe.body.winner?.runId,
|
|
77
|
+
driver.describe(
|
|
78
|
+
'idempotency.md §"Convergence rule"',
|
|
79
|
+
'winner MUST be the lex-min runId (run-a-west < run-b-east)',
|
|
80
|
+
),
|
|
81
|
+
).toBe('run-a-west');
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
it('three-region partition resolves to a single winner', async (ctx) => {
|
|
85
|
+
const probe = await simulatePartition([
|
|
86
|
+
{ runId: 'zzz-3', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-2', region: 'r1' },
|
|
87
|
+
{ runId: 'aaa-1', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-2', region: 'r2' },
|
|
88
|
+
{ runId: 'mmm-2', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-2', region: 'r3' },
|
|
89
|
+
]);
|
|
90
|
+
if (probe.status === 404) {
|
|
91
|
+
ctx.skip();
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
expect(probe.status).toBe(200);
|
|
95
|
+
expect(
|
|
96
|
+
probe.body.winner?.runId,
|
|
97
|
+
driver.describe(
|
|
98
|
+
'idempotency.md §"Convergence rule"',
|
|
99
|
+
'winner MUST be the lex-min runId across all conflicting claims',
|
|
100
|
+
),
|
|
101
|
+
).toBe('aaa-1');
|
|
102
|
+
expect(
|
|
103
|
+
probe.body.losers?.length,
|
|
104
|
+
driver.describe(
|
|
105
|
+
'idempotency.md §"Convergence rule"',
|
|
106
|
+
'losers array MUST contain N-1 entries when N claims conflict',
|
|
107
|
+
),
|
|
108
|
+
).toBe(2);
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it('every region gets a cache redirect entry pointing at the winner', async (ctx) => {
|
|
112
|
+
const probe = await simulatePartition([
|
|
113
|
+
{ runId: 'run-x', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-3', region: 'r1' },
|
|
114
|
+
{ runId: 'run-a', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-3', region: 'r2' },
|
|
115
|
+
]);
|
|
116
|
+
if (probe.status === 404) {
|
|
117
|
+
ctx.skip();
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
expect(probe.status).toBe(200);
|
|
121
|
+
const redirects = probe.body.cacheRedirects ?? [];
|
|
122
|
+
expect(
|
|
123
|
+
redirects.length,
|
|
124
|
+
driver.describe(
|
|
125
|
+
'idempotency.md §"Convergence rule"',
|
|
126
|
+
'cacheRedirects MUST contain one entry per claim (including the winner)',
|
|
127
|
+
),
|
|
128
|
+
).toBe(2);
|
|
129
|
+
for (const redirect of redirects) {
|
|
130
|
+
expect(
|
|
131
|
+
redirect.redirectToRunId,
|
|
132
|
+
driver.describe(
|
|
133
|
+
'idempotency.md §"Convergence rule"',
|
|
134
|
+
'every cache redirect MUST point at the winner runId',
|
|
135
|
+
),
|
|
136
|
+
).toBe('run-a');
|
|
137
|
+
}
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
it('loser cancel reason MUST be the canonical `cross_region_dedup_loss` string', async (ctx) => {
|
|
141
|
+
const probe = await simulatePartition([
|
|
142
|
+
{ runId: 'run-b', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-4', region: 'r1' },
|
|
143
|
+
{ runId: 'run-a', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-4', region: 'r2' },
|
|
144
|
+
]);
|
|
145
|
+
if (probe.status === 404) {
|
|
146
|
+
ctx.skip();
|
|
147
|
+
return;
|
|
148
|
+
}
|
|
149
|
+
expect(probe.status).toBe(200);
|
|
150
|
+
expect(
|
|
151
|
+
probe.body.loserCancelReason,
|
|
152
|
+
driver.describe(
|
|
153
|
+
'idempotency.md §"Convergence rule"',
|
|
154
|
+
'loserCancelReason MUST be the canonical `cross_region_dedup_loss` string',
|
|
155
|
+
),
|
|
156
|
+
).toBe('cross_region_dedup_loss');
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it('resolver is order-invariant — shuffled inputs produce the same winner', async (ctx) => {
|
|
160
|
+
const claims: ConflictClaim[] = [
|
|
161
|
+
{ runId: 'c', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-5', region: 'r1' },
|
|
162
|
+
{ runId: 'a', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-5', region: 'r2' },
|
|
163
|
+
{ runId: 'b', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-5', region: 'r3' },
|
|
164
|
+
];
|
|
165
|
+
const p1 = await simulatePartition(claims);
|
|
166
|
+
if (p1.status === 404) {
|
|
167
|
+
ctx.skip();
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
expect(p1.status).toBe(200);
|
|
171
|
+
const p2 = await simulatePartition([claims[2]!, claims[0]!, claims[1]!]);
|
|
172
|
+
expect(p2.status).toBe(200);
|
|
173
|
+
const p3 = await simulatePartition([...claims].reverse());
|
|
174
|
+
expect(p3.status).toBe(200);
|
|
175
|
+
expect(
|
|
176
|
+
p1.body.winner?.runId,
|
|
177
|
+
driver.describe(
|
|
178
|
+
'idempotency.md §"Convergence rule" — determinism',
|
|
179
|
+
'resolver MUST be order-invariant; all permutations MUST produce the same lex-min winner',
|
|
180
|
+
),
|
|
181
|
+
).toBe('a');
|
|
182
|
+
expect(p2.body.winner?.runId).toBe('a');
|
|
183
|
+
expect(p3.body.winner?.runId).toBe('a');
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
it('mismatched tuple rejects with 400 validation_error', async (ctx) => {
|
|
187
|
+
const probe = await simulatePartition([
|
|
188
|
+
{ runId: 'r1', tenantId: 't1', endpoint: 'POST /v1/runs', key: 'idem-6', region: 'r1' },
|
|
189
|
+
{ runId: 'r2', tenantId: 't2', endpoint: 'POST /v1/runs', key: 'idem-6', region: 'r2' },
|
|
190
|
+
]);
|
|
191
|
+
if (probe.status === 404) {
|
|
192
|
+
ctx.skip();
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
195
|
+
expect(
|
|
196
|
+
probe.status,
|
|
197
|
+
driver.describe(
|
|
198
|
+
'idempotency.md §"Convergence rule"',
|
|
199
|
+
'claims with non-matching (tenantId, endpoint, key) MUST be rejected — it would be a programming error in the caller',
|
|
200
|
+
),
|
|
201
|
+
).toBe(400);
|
|
202
|
+
});
|
|
203
|
+
});
|