@openwop/openwop-conformance 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/CHANGELOG.md +25 -0
  2. package/README.md +2 -2
  3. package/coverage.md +26 -14
  4. package/fixtures/conformance-agent-low-confidence.json +7 -4
  5. package/fixtures/conformance-agent-pack-handoff-schema-validation.json +30 -0
  6. package/fixtures/conformance-agent-reasoning.json +23 -4
  7. package/fixtures/conformance-dispatch-cross-worker-handoff-child-a.json +27 -0
  8. package/fixtures/conformance-dispatch-cross-worker-handoff-child-b.json +25 -0
  9. package/fixtures/conformance-dispatch-cross-worker-handoff.json +60 -0
  10. package/fixtures/conformance-dispatch-input-mapping-child.json +25 -0
  11. package/fixtures/conformance-dispatch-input-mapping.json +49 -0
  12. package/fixtures/conformance-dispatch-output-mapping-child.json +27 -0
  13. package/fixtures/conformance-dispatch-output-mapping.json +49 -0
  14. package/fixtures/conformance-subworkflow-input-mapping-child.json +27 -0
  15. package/fixtures/conformance-subworkflow-input-mapping.json +33 -0
  16. package/fixtures.md +12 -2
  17. package/package.json +1 -1
  18. package/schemas/README.md +7 -0
  19. package/schemas/agent-ref.schema.json +1 -1
  20. package/schemas/ai-envelope.schema.json +106 -0
  21. package/schemas/capabilities.schema.json +248 -0
  22. package/schemas/core-conformance-mock-agent-config.schema.json +147 -0
  23. package/schemas/dispatch-config.schema.json +26 -0
  24. package/schemas/envelopes/clarification.request.schema.json +43 -0
  25. package/schemas/envelopes/error.schema.json +26 -0
  26. package/schemas/envelopes/schema.request.schema.json +22 -0
  27. package/schemas/envelopes/schema.response.schema.json +22 -0
  28. package/schemas/node-pack-manifest.schema.json +5 -0
  29. package/schemas/pack-lockfile.schema.json +16 -0
  30. package/schemas/workflow-chain-pack-manifest.schema.json +226 -0
  31. package/src/lib/webhook-receiver.ts +137 -0
  32. package/src/lib/workflow-chain-expansion.ts +213 -0
  33. package/src/scenarios/agentPackCatalog.test.ts +216 -0
  34. package/src/scenarios/agentPackHandoffSchemaValidation.test.ts +146 -0
  35. package/src/scenarios/agentReasoningEvents.test.ts +58 -7
  36. package/src/scenarios/agents-run-tool-allowlist.test.ts +182 -0
  37. package/src/scenarios/ai-envelope-shape.test.ts +362 -0
  38. package/src/scenarios/aiEnvelope.capBreached.test.ts +173 -0
  39. package/src/scenarios/aiEnvelope.contractRefusal.test.ts +150 -0
  40. package/src/scenarios/aiEnvelope.correlationReplay.test.ts +69 -0
  41. package/src/scenarios/aiEnvelope.redaction.test.ts +73 -0
  42. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +87 -0
  43. package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +143 -0
  44. package/src/scenarios/aiEnvelope.universalKinds.test.ts +176 -0
  45. package/src/scenarios/append-ordering.test.ts +44 -0
  46. package/src/scenarios/artifact-auth.test.ts +58 -0
  47. package/src/scenarios/blob-cross-tenant-isolation.test.ts +66 -0
  48. package/src/scenarios/blob-presign-expiry.test.ts +66 -0
  49. package/src/scenarios/blob-roundtrip.test.ts +48 -0
  50. package/src/scenarios/cache-cross-tenant-isolation.test.ts +61 -0
  51. package/src/scenarios/cache-ttl-expiry.test.ts +47 -0
  52. package/src/scenarios/dispatch-cross-worker-handoff.test.ts +98 -0
  53. package/src/scenarios/dispatch-input-mapping.test.ts +94 -0
  54. package/src/scenarios/dispatch-output-mapping.test.ts +65 -0
  55. package/src/scenarios/fs-path-traversal.test.ts +124 -0
  56. package/src/scenarios/idempotency-key-determinism.test.ts +230 -0
  57. package/src/scenarios/interrupt-token-matrix.test.ts +126 -0
  58. package/src/scenarios/kv-atomic-increment.test.ts +74 -0
  59. package/src/scenarios/kv-cas.test.ts +75 -0
  60. package/src/scenarios/kv-cross-tenant-isolation.test.ts +85 -0
  61. package/src/scenarios/kv-ttl-expiry.test.ts +47 -0
  62. package/src/scenarios/mcp-server-elicitation-bridge.test.ts +92 -0
  63. package/src/scenarios/mcp-server-prompt-roundtrip.test.ts +80 -0
  64. package/src/scenarios/mcp-server-resource-roundtrip.test.ts +82 -0
  65. package/src/scenarios/mcp-server-sampling-bridge.test.ts +84 -0
  66. package/src/scenarios/mcp-server-tool-roundtrip.test.ts +107 -0
  67. package/src/scenarios/mcp-server-untrusted-args.test.ts +105 -0
  68. package/src/scenarios/pause-resume.test.ts +43 -0
  69. package/src/scenarios/queue-ack-nack-dlq.test.ts +67 -0
  70. package/src/scenarios/queue-cross-tenant-isolation.test.ts +66 -0
  71. package/src/scenarios/queue-publish-consume-roundtrip.test.ts +48 -0
  72. package/src/scenarios/search-bm25-roundtrip.test.ts +47 -0
  73. package/src/scenarios/spec-corpus-validity.test.ts +17 -1
  74. package/src/scenarios/sql-injection-rejection.test.ts +84 -0
  75. package/src/scenarios/sql-transaction-atomicity.test.ts +66 -0
  76. package/src/scenarios/stream-subscribe-from-beginning.test.ts +66 -0
  77. package/src/scenarios/subworkflow-input-mapping.test.ts +100 -0
  78. package/src/scenarios/table-cross-tenant-isolation.test.ts +65 -0
  79. package/src/scenarios/table-cursor-pagination.test.ts +47 -0
  80. package/src/scenarios/table-schema-enforcement.test.ts +47 -0
  81. package/src/scenarios/vector-knn-roundtrip.test.ts +48 -0
  82. package/src/scenarios/webhook-receiver-adversarial.test.ts +210 -0
  83. package/src/scenarios/workflow-chain-expansion.test.ts +366 -0
  84. package/src/scenarios/workflow-chain-pack-manifest-validation.test.ts +232 -0
  85. package/src/scenarios/workflow-chain-pack-signature-verification.test.ts +138 -0
  86. package/src/scenarios/workflow-chain-unresolvable-typeid.test.ts +170 -0
@@ -0,0 +1,137 @@
1
+ /**
2
+ * Reference webhook receiver for the conformance suite — implements
3
+ * the verification contract per `spec/v1/webhooks.md` §"Signature
4
+ * recipe" + §"Replay-attack resistance" so adversarial-input scenarios
5
+ * can verify that a properly-implemented receiver rejects the
6
+ * documented failure modes.
7
+ *
8
+ * Mirrors the SDK's verifyWebhookSignature helper (sdk/typescript/src/
9
+ * webhook-helpers.ts) but inlined here so the conformance suite stays
10
+ * dependency-free vs. the SDK. The two MUST produce identical
11
+ * outcomes for the same inputs.
12
+ *
13
+ * @see spec/v1/webhooks.md §"Signature recipe"
14
+ * @see sdk/typescript/src/webhook-helpers.ts (canonical SDK
15
+ * implementation; this file is a conformance-suite mirror)
16
+ */
17
+
18
+ import { createHmac, timingSafeEqual } from 'node:crypto';
19
+
20
+ export const DEFAULT_FRESHNESS_WINDOW_SECONDS = 300;
21
+
22
+ export type WebhookRejectionReason =
23
+ | 'signature_mismatch'
24
+ | 'timestamp_expired'
25
+ | 'timestamp_too_far_in_future'
26
+ | 'malformed_signature_header'
27
+ | 'malformed_timestamp_header'
28
+ | 'wrong_algorithm'
29
+ | 'duplicate_signature';
30
+
31
+ export type WebhookVerifyResult =
32
+ | { accepted: true }
33
+ | { accepted: false; reason: WebhookRejectionReason };
34
+
35
+ export interface WebhookReceiverState {
36
+ /** Set of signature values the receiver has already accepted (anti-replay). */
37
+ acceptedSignatures: Set<string>;
38
+ }
39
+
40
+ export function createReceiverState(): WebhookReceiverState {
41
+ return { acceptedSignatures: new Set() };
42
+ }
43
+
44
+ export interface VerifyOptions {
45
+ /** Default 5 minutes per spec. Set 0 to disable freshness check. */
46
+ freshnessWindowSeconds?: number;
47
+ /** Override `now` (unix seconds) for deterministic tests. */
48
+ nowSeconds?: number;
49
+ }
50
+
51
+ /**
52
+ * Verify a single webhook delivery against the canonical recipe.
53
+ * Returns `{ accepted: true }` on success; `{ accepted: false, reason }`
54
+ * otherwise. Updates `state.acceptedSignatures` on acceptance for
55
+ * replay-attack detection on subsequent calls.
56
+ *
57
+ * Receivers MUST pass the **exact** request body bytes — parsed-and-
58
+ * reserialized JSON will fail verification.
59
+ */
60
+ export function verifyWebhookDelivery(
61
+ secret: string,
62
+ signatureHeader: string,
63
+ algorithmHeader: string | undefined,
64
+ timestampHeader: string,
65
+ rawBody: string | Buffer,
66
+ state: WebhookReceiverState,
67
+ options: VerifyOptions = {},
68
+ ): WebhookVerifyResult {
69
+ // 1. Algorithm gating. Hosts MAY include an explicit
70
+ // X-openwop-Signature-Algorithm header; receivers MUST refuse
71
+ // anything other than `v1` per webhooks.md §"Signature algorithm
72
+ // versioning". Absence is treated as the v1 default.
73
+ if (algorithmHeader !== undefined && algorithmHeader !== 'v1') {
74
+ return { accepted: false, reason: 'wrong_algorithm' };
75
+ }
76
+
77
+ // 2. Signature header parse.
78
+ if (!signatureHeader.startsWith('v1=')) {
79
+ return { accepted: false, reason: 'malformed_signature_header' };
80
+ }
81
+ const providedHex = signatureHeader.slice(3);
82
+ if (!/^[0-9a-f]+$/i.test(providedHex)) {
83
+ return { accepted: false, reason: 'malformed_signature_header' };
84
+ }
85
+
86
+ // 3. Anti-replay: receivers MUST refuse a signature value seen
87
+ // before, even if the timestamp would otherwise be fresh
88
+ // (defense-in-depth against an attacker resending a captured
89
+ // delivery before the original's timestamp window expires).
90
+ if (state.acceptedSignatures.has(signatureHeader)) {
91
+ return { accepted: false, reason: 'duplicate_signature' };
92
+ }
93
+
94
+ // 4. Timestamp parse + freshness window.
95
+ const timestamp = Number(timestampHeader);
96
+ if (!Number.isInteger(timestamp) || timestamp <= 0) {
97
+ return { accepted: false, reason: 'malformed_timestamp_header' };
98
+ }
99
+ const window = options.freshnessWindowSeconds ?? DEFAULT_FRESHNESS_WINDOW_SECONDS;
100
+ if (window > 0) {
101
+ const now = options.nowSeconds ?? Math.floor(Date.now() / 1000);
102
+ const delta = now - timestamp;
103
+ if (delta > window) return { accepted: false, reason: 'timestamp_expired' };
104
+ if (delta < -window) return { accepted: false, reason: 'timestamp_too_far_in_future' };
105
+ }
106
+
107
+ // 5. HMAC recompute + constant-time compare.
108
+ const bodyStr = typeof rawBody === 'string' ? rawBody : rawBody.toString('utf8');
109
+ const expectedHex = createHmac('sha256', secret).update(`${timestamp}.${bodyStr}`, 'utf8').digest('hex');
110
+ const providedBuf = Buffer.from(providedHex, 'hex');
111
+ const expectedBuf = Buffer.from(expectedHex, 'hex');
112
+ if (providedBuf.length !== expectedBuf.length || !timingSafeEqual(providedBuf, expectedBuf)) {
113
+ return { accepted: false, reason: 'signature_mismatch' };
114
+ }
115
+
116
+ // 6. Accept + record for replay detection.
117
+ state.acceptedSignatures.add(signatureHeader);
118
+ return { accepted: true };
119
+ }
120
+
121
+ /**
122
+ * Sign a payload the way the host would — useful for building
123
+ * adversarial-input fixtures in scenarios.
124
+ */
125
+ export function signPayload(
126
+ secret: string,
127
+ timestamp: number,
128
+ rawBody: string | Buffer,
129
+ ): { signatureHeader: string; timestampHeader: string; algorithmHeader: 'v1' } {
130
+ const bodyStr = typeof rawBody === 'string' ? rawBody : rawBody.toString('utf8');
131
+ const hex = createHmac('sha256', secret).update(`${timestamp}.${bodyStr}`, 'utf8').digest('hex');
132
+ return {
133
+ signatureHeader: `v1=${hex}`,
134
+ timestampHeader: String(timestamp),
135
+ algorithmHeader: 'v1',
136
+ };
137
+ }
@@ -0,0 +1,213 @@
1
+ /**
2
+ * Workflow-chain pack expansion — reference implementation of the
3
+ * 9-step host-editor expansion semantics from
4
+ * `spec/v1/workflow-chain-packs.md` §"Expansion semantics (normative)".
5
+ *
6
+ * Pure function. Zero I/O, zero crypto. Hosts implementing chain
7
+ * expansion in their workflow editors MAY import this directly OR
8
+ * adapt the algorithm into their language of choice — the contract
9
+ * this code encodes is the spec, not the code itself.
10
+ *
11
+ * What this implements:
12
+ * - Step 3: validate referenced typeIds resolve (delegated to caller via
13
+ * `isTypeIdResolvable` predicate)
14
+ * - Step 5: `{{params.<name>}}` literal substitution (recursive into
15
+ * nested string fields inside `config` / `inputs`)
16
+ * - Step 6: per-expansion node-id rewrite with a chainId-derived prefix
17
+ * for collision-free splice into the parent workflow
18
+ * - Step 8: capability propagation (chain.capabilities[] → every
19
+ * expanded WorkflowNode.capabilities[])
20
+ * - Edge endpoint rewriting (`from`/`to` ids that reference fragment
21
+ * nodes get the same prefix)
22
+ *
23
+ * What this deliberately DOESN'T implement (host-specific concerns):
24
+ * - Step 1: registry resolution (network/storage path is host-specific)
25
+ * - Step 2: signature verification (use `node:crypto`'s Ed25519 path —
26
+ * see workflow-chain-pack-signature-verification.test.ts)
27
+ * - Step 4: parameter-form prompting (host-UI concern)
28
+ * - Step 7: splice into parent workflow (host-editor concern; this
29
+ * function returns the rewritten fragment ready to be appended)
30
+ * - Step 9: persistence (host-storage concern)
31
+ *
32
+ * @see spec/v1/workflow-chain-packs.md §"Expansion semantics (normative)"
33
+ * @see RFCS/0013-workflow-chain-packs.md
34
+ */
35
+
36
+ /** A workflow-chain entry as it appears in a pack manifest. */
37
+ export interface WorkflowChain {
38
+ chainId: string;
39
+ version: string;
40
+ label: string;
41
+ description: string;
42
+ parameters: object;
43
+ dag: { nodes: ReadonlyArray<FragmentNode>; edges?: ReadonlyArray<FragmentEdge> };
44
+ outputs?: Record<string, { type: string; description: string }>;
45
+ capabilities?: ReadonlyArray<'streamable' | 'cacheable' | 'side-effectful' | 'mcp-exportable'>;
46
+ }
47
+
48
+ export interface FragmentNode {
49
+ id: string;
50
+ typeId: string;
51
+ name?: string;
52
+ position?: { x: number; y: number };
53
+ config?: Record<string, unknown>;
54
+ inputs?: Record<string, unknown>;
55
+ }
56
+
57
+ export interface FragmentEdge {
58
+ from: string;
59
+ to: string;
60
+ condition?: string;
61
+ }
62
+
63
+ /** Per-expansion context the caller supplies. */
64
+ export interface ExpansionContext {
65
+ /** Caller-supplied unique tag for this expansion (e.g., 4-hex random).
66
+ * Combined with the chainId slug to namespace expanded node ids so
67
+ * the same chain can be expanded multiple times within one parent
68
+ * workflow without id collisions. */
69
+ expansionId: string;
70
+ /** Author-supplied parameter values, ALREADY VALIDATED against the
71
+ * chain's `parameters` JSON Schema. This function does NOT re-validate
72
+ * — the caller MUST ajv-compile `chain.parameters` and reject invalid
73
+ * input with `chain_parameter_invalid` BEFORE calling. */
74
+ params: Record<string, unknown>;
75
+ /** Predicate the caller supplies for typeId resolution (step 3). Should
76
+ * return `true` if the typeId is registered with the destination host
77
+ * (either reserved `core.*` or published via a known node pack). */
78
+ isTypeIdResolvable: (typeId: string) => boolean;
79
+ }
80
+
81
+ /** Result of expansion — ready to be spliced into a parent workflow's
82
+ * `nodes[]` / `edges[]`. */
83
+ export interface ExpandedFragment {
84
+ nodes: ReadonlyArray<{
85
+ id: string;
86
+ typeId: string;
87
+ name?: string;
88
+ position?: { x: number; y: number };
89
+ config?: Record<string, unknown>;
90
+ inputs?: Record<string, unknown>;
91
+ capabilities?: ReadonlyArray<string>;
92
+ }>;
93
+ edges: ReadonlyArray<{ from: string; to: string; condition?: string }>;
94
+ /** Map of original-fragment-id → rewritten-id, so the caller can
95
+ * wire the parent workflow's adjacent edges into the expansion. */
96
+ idMap: ReadonlyMap<string, string>;
97
+ }
98
+
99
+ /** Thrown when expansion encounters a chain that references a typeId the
100
+ * destination host can't resolve. Carries both the offending `typeId`
101
+ * and the `chainId` for diagnostic reporting. The error message uses
102
+ * the wire-level error code `chain_unresolvable_typeid` per
103
+ * `workflow-chain-packs.md` §"Error codes". */
104
+ export class ChainUnresolvableTypeIdError extends Error {
105
+ readonly code = 'chain_unresolvable_typeid';
106
+ constructor(readonly typeId: string, readonly chainId: string) {
107
+ super(`chain_unresolvable_typeid: '${typeId}' in chain '${chainId}'`);
108
+ this.name = 'ChainUnresolvableTypeIdError';
109
+ }
110
+ }
111
+
112
+ const PARAM_PATTERN = /\{\{params\.([a-zA-Z_][a-zA-Z0-9_]*)\}\}/g;
113
+
114
+ /** Recursive literal substitution of `{{params.<name>}}` placeholders in
115
+ * any string field. Non-string values pass through unchanged; nested
116
+ * arrays/objects are walked. */
117
+ function substitute(value: unknown, params: Record<string, unknown>): unknown {
118
+ if (typeof value === 'string') {
119
+ return value.replace(PARAM_PATTERN, (_match, name: string) => {
120
+ const v = params[name];
121
+ // Per the spec, parameter values are validated against the chain's
122
+ // parameters schema BEFORE expansion, so `v === undefined` here
123
+ // means the chain author referenced an undeclared parameter — the
124
+ // safest substitution is the empty string (matching the standard
125
+ // {{...}} convention in n8n/Handlebars).
126
+ return v === undefined ? '' : String(v);
127
+ });
128
+ }
129
+ if (Array.isArray(value)) return value.map((v) => substitute(v, params));
130
+ if (value !== null && typeof value === 'object') {
131
+ const out: Record<string, unknown> = {};
132
+ for (const [k, v] of Object.entries(value)) out[k] = substitute(v, params);
133
+ return out;
134
+ }
135
+ return value;
136
+ }
137
+
138
+ /** Rewrite an edge endpoint ref. `ref` is either `<nodeId>` or
139
+ * `<nodeId>.<portName>`. Only the nodeId portion is rewritten; the
140
+ * portName (if present) is preserved verbatim. Refs that don't match
141
+ * a fragment node id pass through unchanged (lets edges to/from
142
+ * parent-workflow nodes work via post-splice wiring). */
143
+ function rewriteEdgeRef(
144
+ ref: string,
145
+ fragmentNodeIds: ReadonlySet<string>,
146
+ prefix: string,
147
+ ): string {
148
+ const dotIdx = ref.indexOf('.');
149
+ const nodeId = dotIdx === -1 ? ref : ref.slice(0, dotIdx);
150
+ const portPart = dotIdx === -1 ? '' : ref.slice(dotIdx);
151
+ return fragmentNodeIds.has(nodeId) ? `${prefix}${nodeId}${portPart}` : ref;
152
+ }
153
+
154
+ /** Compute the per-expansion node-id prefix from the chainId + expansionId.
155
+ * The chainId's dots are replaced with underscores so the resulting ids
156
+ * remain valid in storage backends that reserve `.` for hierarchical
157
+ * keys. */
158
+ function computePrefix(chainId: string, expansionId: string): string {
159
+ return `${chainId.replace(/\./g, '_')}_${expansionId}_`;
160
+ }
161
+
162
+ /**
163
+ * Expand a workflow-chain into a concrete fragment ready to splice into a
164
+ * parent workflow. Implements steps 3 + 5 + 6 + 8 of the normative
165
+ * `workflow-chain-packs.md` §"Expansion semantics" flow.
166
+ *
167
+ * @throws ChainUnresolvableTypeIdError when any `dag.nodes[].typeId`
168
+ * fails the caller's `isTypeIdResolvable` predicate.
169
+ */
170
+ export function expandChain(chain: WorkflowChain, ctx: ExpansionContext): ExpandedFragment {
171
+ // Step 3: validate every typeId resolves.
172
+ for (const node of chain.dag.nodes) {
173
+ if (!ctx.isTypeIdResolvable(node.typeId)) {
174
+ throw new ChainUnresolvableTypeIdError(node.typeId, chain.chainId);
175
+ }
176
+ }
177
+
178
+ const prefix = computePrefix(chain.chainId, ctx.expansionId);
179
+ const fragmentNodeIds = new Set(chain.dag.nodes.map((n) => n.id));
180
+ const idMap = new Map<string, string>();
181
+ for (const id of fragmentNodeIds) idMap.set(id, `${prefix}${id}`);
182
+
183
+ // Steps 5 + 6 + 8: substitute placeholders, rewrite ids, propagate capabilities.
184
+ const expandedNodes = chain.dag.nodes.map((n) => {
185
+ const out: ExpandedFragment['nodes'][number] = {
186
+ id: `${prefix}${n.id}`,
187
+ typeId: n.typeId,
188
+ };
189
+ if (n.name !== undefined) out.name = n.name;
190
+ if (n.position !== undefined) out.position = n.position;
191
+ if (n.config !== undefined) {
192
+ out.config = substitute(n.config, ctx.params) as Record<string, unknown>;
193
+ }
194
+ if (n.inputs !== undefined) {
195
+ out.inputs = substitute(n.inputs, ctx.params) as Record<string, unknown>;
196
+ }
197
+ if (chain.capabilities && chain.capabilities.length > 0) {
198
+ out.capabilities = [...chain.capabilities];
199
+ }
200
+ return out;
201
+ });
202
+
203
+ const expandedEdges = (chain.dag.edges ?? []).map((e) => {
204
+ const out: ExpandedFragment['edges'][number] = {
205
+ from: rewriteEdgeRef(e.from, fragmentNodeIds, prefix),
206
+ to: rewriteEdgeRef(e.to, fragmentNodeIds, prefix),
207
+ };
208
+ if (e.condition !== undefined) out.condition = e.condition;
209
+ return out;
210
+ });
211
+
212
+ return { nodes: expandedNodes, edges: expandedEdges, idMap };
213
+ }
@@ -0,0 +1,216 @@
1
+ /**
2
+ * Multi-Agent Shift — `core.openwop.agents.{deep-research, react, supervisor}`
3
+ * pack-catalog evidence.
4
+ *
5
+ * The three reference agent packs published 2026-05-17 are registry-signed
6
+ * (keyId `openwop-team-1`) but had no in-tree conformance scenarios
7
+ * proving their `agents[]` manifests are reachable via the host pack
8
+ * surface AND that each manifest's contents match the contract documented
9
+ * in `RFCS/0003-agent-packs.md` + `schemas/agent-manifest.schema.json`.
10
+ *
11
+ * This file closes that gap. Three test groups, one per pack. Each group:
12
+ * 1. Skips when the host doesn't advertise `capabilities.agents.supported`
13
+ * OR doesn't expose a pack-listing endpoint (`/v1/packs` returning
14
+ * 404/501 → soft-skip).
15
+ * 2. Locates the pack by name in the host's pack list.
16
+ * 3. Validates the pack's `agents[]` entry against the AgentManifest
17
+ * contract: required fields, agentId namespace pattern, modelClass
18
+ * enum, toolAllowlist format, handoff schema refs.
19
+ *
20
+ * Behavioral assertions (the agent actually researches / reacts / supervises)
21
+ * require an LLM + real agentRuntime host and live outside the public
22
+ * conformance suite. The advertisement-shape + manifest-validity coverage
23
+ * here is the wire-level guarantee a third-party host MUST satisfy to
24
+ * claim "I ship the reference agent packs."
25
+ *
26
+ * @see RFCS/0003-agent-packs.md
27
+ * @see schemas/agent-manifest.schema.json
28
+ * @see packs/core.openwop.agents.{deep-research,react,supervisor}/pack.json
29
+ */
30
+
31
+ import { describe, it, expect } from 'vitest';
32
+ import { driver } from '../lib/driver.js';
33
+ import { isAgentSupported } from '../lib/multi-agent-capabilities.js';
34
+
35
+ interface PackList {
36
+ packs?: Array<{
37
+ name?: string;
38
+ version?: string;
39
+ agents?: Array<{
40
+ agentId?: string;
41
+ persona?: string;
42
+ modelClass?: string;
43
+ systemPrompt?: string;
44
+ systemPromptRef?: string;
45
+ toolAllowlist?: string[];
46
+ memoryShape?: Record<string, unknown>;
47
+ handoff?: { taskSchemaRef?: string; returnSchemaRef?: string };
48
+ }>;
49
+ }>;
50
+ }
51
+
52
+ // AgentManifest agentId pattern from schemas/agent-manifest.schema.json.
53
+ const AGENT_ID_PATTERN = /^(core|vendor|community|private|local)\.[a-z][a-z0-9_-]*(\.[a-z][a-zA-Z0-9_-]*)+$/;
54
+ const VALID_MODEL_CLASSES = new Set([
55
+ 'reasoning', 'tool-using', 'chat', 'code', 'vision', 'multimodal',
56
+ 'embedding', 'classification', 'retrieval', 'research', 'delegate',
57
+ ]);
58
+ const VALID_TOOL_SCOPES = ['openwop:', 'mcp:', 'vendor.', 'community.', 'private.', 'local.', 'host:'];
59
+
60
+ async function findPack(name: string): Promise<PackList['packs'] extends Array<infer T> | undefined ? T | null : never> {
61
+ const res = await driver.get('/v1/packs');
62
+ if (res.status === 404 || res.status === 501) return null as never;
63
+ if (res.status !== 200) return null as never;
64
+ const body = res.json as PackList;
65
+ if (!Array.isArray(body.packs)) return null as never;
66
+ const found = body.packs.find((p) => p.name === name);
67
+ // Cast through unknown to satisfy the conditional return type.
68
+ return (found ?? null) as never;
69
+ }
70
+
71
+ function assertAgentManifestShape(
72
+ agent: NonNullable<NonNullable<PackList['packs']>[number]['agents']>[number],
73
+ expectations: { agentIdEndsWith?: string; modelClass?: string; minTools?: number },
74
+ ): void {
75
+ // Required: agentId, persona, modelClass.
76
+ expect(typeof agent.agentId, 'AgentManifest.agentId MUST be a string').toBe('string');
77
+ expect(typeof agent.persona, 'AgentManifest.persona MUST be a string').toBe('string');
78
+ expect(typeof agent.modelClass, 'AgentManifest.modelClass MUST be a string').toBe('string');
79
+
80
+ // agentId pattern (RFCS/0003 §A namespace tiers).
81
+ expect(
82
+ AGENT_ID_PATTERN.test(agent.agentId ?? ''),
83
+ driver.describe(
84
+ 'schemas/agent-manifest.schema.json §agentId',
85
+ `agentId "${agent.agentId}" MUST match the namespace-tier pattern`,
86
+ ),
87
+ ).toBe(true);
88
+
89
+ // modelClass enum check (loose — the schema declares an enum but
90
+ // hosts MAY extend with research/delegate per the reference packs).
91
+ if (agent.modelClass !== undefined) {
92
+ expect(
93
+ VALID_MODEL_CLASSES.has(agent.modelClass),
94
+ `AgentManifest.modelClass "${agent.modelClass}" SHOULD be a recognized class`,
95
+ ).toBe(true);
96
+ }
97
+
98
+ // systemPrompt XOR systemPromptRef.
99
+ const hasInline = typeof agent.systemPrompt === 'string' && agent.systemPrompt.length > 0;
100
+ const hasRef = typeof agent.systemPromptRef === 'string' && agent.systemPromptRef.length > 0;
101
+ expect(
102
+ hasInline !== hasRef,
103
+ 'AgentManifest MUST have exactly one of systemPrompt | systemPromptRef',
104
+ ).toBe(true);
105
+
106
+ // toolAllowlist: optional, but when present each entry MUST start with a recognized scope.
107
+ if (Array.isArray(agent.toolAllowlist)) {
108
+ for (const tool of agent.toolAllowlist) {
109
+ expect(
110
+ VALID_TOOL_SCOPES.some((scope) => tool.startsWith(scope)),
111
+ `toolAllowlist entry "${tool}" MUST start with a recognized scope`,
112
+ ).toBe(true);
113
+ }
114
+ if (expectations.minTools !== undefined) {
115
+ expect(
116
+ agent.toolAllowlist.length,
117
+ `agent's toolAllowlist MUST have at least ${expectations.minTools} entries`,
118
+ ).toBeGreaterThanOrEqual(expectations.minTools);
119
+ }
120
+ }
121
+
122
+ // Per-pack expectations.
123
+ if (expectations.agentIdEndsWith !== undefined) {
124
+ expect(agent.agentId ?? '').toContain(expectations.agentIdEndsWith);
125
+ }
126
+ if (expectations.modelClass !== undefined) {
127
+ expect(agent.modelClass).toBe(expectations.modelClass);
128
+ }
129
+ }
130
+
131
+ const SKIP = !isAgentSupported();
132
+
133
+ describe.skipIf(SKIP)('core.openwop.agents.deep-research — pack catalog evidence', () => {
134
+ it('host pack-list includes deep-research with a well-formed AgentManifest', async () => {
135
+ const pack = await findPack('core.openwop.agents.deep-research');
136
+ if (pack === null) return; // host doesn't expose /v1/packs or doesn't have this pack
137
+ expect(pack.version, 'pack version MUST be present').toBeDefined();
138
+ expect(Array.isArray(pack.agents) && pack.agents.length === 1, 'deep-research ships exactly one agent').toBe(true);
139
+ assertAgentManifestShape(pack.agents![0]!, {
140
+ agentIdEndsWith: 'deep-research',
141
+ modelClass: 'research',
142
+ minTools: 1,
143
+ });
144
+ // Domain-specific: deep-research uses long-term memory + RAG retrievers.
145
+ const tools = pack.agents![0]!.toolAllowlist ?? [];
146
+ expect(
147
+ tools.some((t) => t.includes('rag') || t.includes('retriever')),
148
+ 'deep-research SHOULD allow at least one rag/retriever tool',
149
+ ).toBe(true);
150
+ expect(
151
+ pack.agents![0]!.memoryShape?.longTerm,
152
+ 'deep-research MUST request longTerm memory (it persists facts across runs)',
153
+ ).toBe(true);
154
+ });
155
+ });
156
+
157
+ describe.skipIf(SKIP)('core.openwop.agents.react — pack catalog evidence', () => {
158
+ it('host pack-list includes react with a well-formed AgentManifest', async () => {
159
+ const pack = await findPack('core.openwop.agents.react');
160
+ if (pack === null) return;
161
+ expect(pack.version).toBeDefined();
162
+ expect(Array.isArray(pack.agents) && pack.agents.length >= 1, 'react ships at least one agent').toBe(true);
163
+ assertAgentManifestShape(pack.agents![0]!, {
164
+ agentIdEndsWith: 'react',
165
+ });
166
+ // ReAct pattern requires handoff schemas (task + return).
167
+ const handoff = pack.agents![0]!.handoff;
168
+ expect(handoff, 'react AgentManifest MUST include a handoff block').toBeDefined();
169
+ expect(typeof handoff?.taskSchemaRef, 'handoff.taskSchemaRef MUST be a string').toBe('string');
170
+ expect(typeof handoff?.returnSchemaRef, 'handoff.returnSchemaRef MUST be a string').toBe('string');
171
+ });
172
+ });
173
+
174
+ describe.skipIf(SKIP)('core.openwop.agents.supervisor — pack catalog evidence', () => {
175
+ it('host pack-list includes supervisor with a well-formed AgentManifest', async () => {
176
+ const pack = await findPack('core.openwop.agents.supervisor');
177
+ if (pack === null) return;
178
+ expect(pack.version).toBeDefined();
179
+ expect(Array.isArray(pack.agents) && pack.agents.length >= 1, 'supervisor ships at least one agent').toBe(true);
180
+ assertAgentManifestShape(pack.agents![0]!, {
181
+ agentIdEndsWith: 'supervisor',
182
+ });
183
+ // Supervisor pattern delegates to crew members; its modelClass should
184
+ // be `delegate` or `reasoning` (it makes orchestration decisions).
185
+ const mc = pack.agents![0]!.modelClass;
186
+ expect(
187
+ mc === 'delegate' || mc === 'reasoning',
188
+ `supervisor SHOULD have modelClass=delegate|reasoning, got "${mc}"`,
189
+ ).toBe(true);
190
+ // Supervisor needs handoff schemas to dispatch work.
191
+ expect(pack.agents![0]!.handoff, 'supervisor MUST include handoff schemas').toBeDefined();
192
+ });
193
+ });
194
+
195
+ describe.skipIf(SKIP)('agent-pack catalog summary', () => {
196
+ it('all three 2026-05-17 reference agent packs are catalog-reachable', async () => {
197
+ const names = [
198
+ 'core.openwop.agents.deep-research',
199
+ 'core.openwop.agents.react',
200
+ 'core.openwop.agents.supervisor',
201
+ ];
202
+ const found: string[] = [];
203
+ for (const n of names) {
204
+ const p = await findPack(n);
205
+ if (p !== null) found.push(n);
206
+ }
207
+ // Either none are present (host doesn't ship these — skip) OR all are
208
+ // present (host ships the full reference batch). Half-shipping is a
209
+ // configuration error worth flagging.
210
+ if (found.length === 0) return;
211
+ expect(
212
+ found.length,
213
+ 'host SHOULD ship the reference agent packs as a coherent batch (none, or all three)',
214
+ ).toBe(names.length);
215
+ });
216
+ });