@openwop/openwop-conformance 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +241 -0
  3. package/api/asyncapi.yaml +481 -0
  4. package/api/openapi.yaml +830 -0
  5. package/api/redocly.yaml +8 -0
  6. package/coverage.md +80 -0
  7. package/dist/cli.js +161 -0
  8. package/fixtures/conformance-a2a-task-roundtrip.json +27 -0
  9. package/fixtures/conformance-agent-identity.json +27 -0
  10. package/fixtures/conformance-agent-low-confidence.json +29 -0
  11. package/fixtures/conformance-agent-memory-cross-tenant.json +28 -0
  12. package/fixtures/conformance-agent-memory-redaction.json +32 -0
  13. package/fixtures/conformance-agent-memory-roundtrip.json +32 -0
  14. package/fixtures/conformance-agent-memory-ttl.json +31 -0
  15. package/fixtures/conformance-agent-pack-export.json +26 -0
  16. package/fixtures/conformance-agent-pack-install.json +26 -0
  17. package/fixtures/conformance-agent-pack-provenance.json +31 -0
  18. package/fixtures/conformance-agent-reasoning.json +29 -0
  19. package/fixtures/conformance-approval.json +27 -0
  20. package/fixtures/conformance-cancellable.json +33 -0
  21. package/fixtures/conformance-cap-breach.json +27 -0
  22. package/fixtures/conformance-capability-missing.json +23 -0
  23. package/fixtures/conformance-channel-ttl.json +60 -0
  24. package/fixtures/conformance-clarification.json +30 -0
  25. package/fixtures/conformance-conversation-capability-negotiation.json +23 -0
  26. package/fixtures/conformance-conversation-lifecycle.json +32 -0
  27. package/fixtures/conformance-conversation-replay.json +33 -0
  28. package/fixtures/conformance-conversation-vs-clarification.json +26 -0
  29. package/fixtures/conformance-delay.json +33 -0
  30. package/fixtures/conformance-dispatch-loop.json +38 -0
  31. package/fixtures/conformance-failure.json +23 -0
  32. package/fixtures/conformance-idempotent.json +30 -0
  33. package/fixtures/conformance-identity.json +32 -0
  34. package/fixtures/conformance-interrupt-auth-required.json +28 -0
  35. package/fixtures/conformance-interrupt-external-event.json +33 -0
  36. package/fixtures/conformance-interrupt-parent-child-cancel-child.json +27 -0
  37. package/fixtures/conformance-interrupt-parent-child-cancel.json +26 -0
  38. package/fixtures/conformance-interrupt-quorum.json +30 -0
  39. package/fixtures/conformance-mcp-tool-roundtrip.json +32 -0
  40. package/fixtures/conformance-message-reducer.json +31 -0
  41. package/fixtures/conformance-multi-node.json +21 -0
  42. package/fixtures/conformance-noop.json +23 -0
  43. package/fixtures/conformance-orchestrator-dispatch.json +47 -0
  44. package/fixtures/conformance-orchestrator-low-confidence.json +41 -0
  45. package/fixtures/conformance-orchestrator-terminate.json +44 -0
  46. package/fixtures/conformance-stream-text.json +26 -0
  47. package/fixtures/conformance-subworkflow-child.json +21 -0
  48. package/fixtures/conformance-subworkflow-parent.json +49 -0
  49. package/fixtures/conformance-version-fold.json +23 -0
  50. package/fixtures/conformance-wasm-pack-roundtrip.json +25 -0
  51. package/fixtures/pack-manifests/pack-private-example.json +26 -0
  52. package/fixtures.md +404 -0
  53. package/package.json +48 -0
  54. package/schemas/README.md +75 -0
  55. package/schemas/agent-manifest.schema.json +107 -0
  56. package/schemas/agent-ref.schema.json +53 -0
  57. package/schemas/capabilities.schema.json +287 -0
  58. package/schemas/channel-written-payload.schema.json +55 -0
  59. package/schemas/conversation-event.schema.json +120 -0
  60. package/schemas/conversation-turn.schema.json +72 -0
  61. package/schemas/debug-bundle.schema.json +196 -0
  62. package/schemas/dispatch-config.schema.json +46 -0
  63. package/schemas/error-envelope.schema.json +25 -0
  64. package/schemas/memory-entry.schema.json +36 -0
  65. package/schemas/memory-list-options.schema.json +21 -0
  66. package/schemas/node-pack-manifest.schema.json +235 -0
  67. package/schemas/orchestrator-decision.schema.json +60 -0
  68. package/schemas/run-event-payloads.schema.json +663 -0
  69. package/schemas/run-event.schema.json +116 -0
  70. package/schemas/run-options.schema.json +81 -0
  71. package/schemas/run-orchestrator-decided-event.schema.json +20 -0
  72. package/schemas/run-snapshot.schema.json +121 -0
  73. package/schemas/suspend-request.schema.json +182 -0
  74. package/schemas/workflow-definition.schema.json +430 -0
  75. package/src/cli.ts +187 -0
  76. package/src/lib/a2a-fake-peer.ts +233 -0
  77. package/src/lib/canaries.ts +186 -0
  78. package/src/lib/driver.ts +96 -0
  79. package/src/lib/env.ts +49 -0
  80. package/src/lib/fixtures.ts +93 -0
  81. package/src/lib/mcp-fake-server.ts +185 -0
  82. package/src/lib/multi-agent-capabilities.ts +155 -0
  83. package/src/lib/multiProcess.ts +141 -0
  84. package/src/lib/otel-collector.ts +312 -0
  85. package/src/lib/paths.ts +198 -0
  86. package/src/lib/polling.ts +81 -0
  87. package/src/lib/profiles.ts +258 -0
  88. package/src/lib/sse.ts +172 -0
  89. package/src/scenarios/a2a-task-roundtrip.test.ts +149 -0
  90. package/src/scenarios/agentConfidenceEscalation.test.ts +61 -0
  91. package/src/scenarios/agentMemoryCrossTenantIsolation.test.ts +54 -0
  92. package/src/scenarios/agentMemoryRedactionContract.test.ts +46 -0
  93. package/src/scenarios/agentMemoryRoundTrip.test.ts +52 -0
  94. package/src/scenarios/agentMemoryTtlExpiry.test.ts +47 -0
  95. package/src/scenarios/agentMessageReducer.test.ts +57 -0
  96. package/src/scenarios/agentMetadata.test.ts +56 -0
  97. package/src/scenarios/agentPackExport.test.ts +45 -0
  98. package/src/scenarios/agentPackInstall.test.ts +50 -0
  99. package/src/scenarios/agentPackProvenance.test.ts +53 -0
  100. package/src/scenarios/agentReasoningEvents.test.ts +72 -0
  101. package/src/scenarios/append-ordering.test.ts +91 -0
  102. package/src/scenarios/approval-payload.test.ts +120 -0
  103. package/src/scenarios/audit-log-integrity.test.ts +106 -0
  104. package/src/scenarios/auth.test.ts +55 -0
  105. package/src/scenarios/byok-roundtrip.test.ts +166 -0
  106. package/src/scenarios/cancellation.test.ts +68 -0
  107. package/src/scenarios/cap-breach.test.ts +149 -0
  108. package/src/scenarios/channel-ttl.test.ts +70 -0
  109. package/src/scenarios/configurable-schema.test.ts +76 -0
  110. package/src/scenarios/conversationCapabilityNegotiation.test.ts +39 -0
  111. package/src/scenarios/conversationLifecycle.test.ts +64 -0
  112. package/src/scenarios/conversationReplayDeterminism.test.ts +52 -0
  113. package/src/scenarios/conversationVsLegacySuspend.test.ts +46 -0
  114. package/src/scenarios/cost-attribution.test.ts +207 -0
  115. package/src/scenarios/debugBundle.test.ts +222 -0
  116. package/src/scenarios/discovery.test.ts +147 -0
  117. package/src/scenarios/dispatchLoop.test.ts +52 -0
  118. package/src/scenarios/errors.test.ts +144 -0
  119. package/src/scenarios/eventOrdering.test.ts +144 -0
  120. package/src/scenarios/failure-path.test.ts +46 -0
  121. package/src/scenarios/fixtures-gating.test.ts +137 -0
  122. package/src/scenarios/fixtures-valid.test.ts +140 -0
  123. package/src/scenarios/highConcurrency.test.ts +263 -0
  124. package/src/scenarios/idempotency.test.ts +83 -0
  125. package/src/scenarios/idempotencyRetry.test.ts +130 -0
  126. package/src/scenarios/identity-passthrough.test.ts +54 -0
  127. package/src/scenarios/interrupt-approval.test.ts +97 -0
  128. package/src/scenarios/interrupt-auth-required-resume.test.ts +88 -0
  129. package/src/scenarios/interrupt-clarification.test.ts +45 -0
  130. package/src/scenarios/interrupt-external-event-correlation.test.ts +113 -0
  131. package/src/scenarios/interrupt-parent-child-cascade.test.ts +102 -0
  132. package/src/scenarios/interrupt-quorum-resolution.test.ts +97 -0
  133. package/src/scenarios/interruptRace.test.ts +176 -0
  134. package/src/scenarios/maliciousManifest.test.ts +154 -0
  135. package/src/scenarios/mcp-discoverability.test.ts +129 -0
  136. package/src/scenarios/mcp-tool-roundtrip.test.ts +149 -0
  137. package/src/scenarios/multi-node-ordering.test.ts +60 -0
  138. package/src/scenarios/multi-region-idempotency.test.ts +52 -0
  139. package/src/scenarios/orchestratorConservativePath.test.ts +63 -0
  140. package/src/scenarios/orchestratorDispatch.test.ts +66 -0
  141. package/src/scenarios/orchestratorTermination.test.ts +54 -0
  142. package/src/scenarios/otel-emission.test.ts +113 -0
  143. package/src/scenarios/otel-trace-propagation.test.ts +90 -0
  144. package/src/scenarios/pack-registry-publish.test.ts +93 -0
  145. package/src/scenarios/pack-registry.test.ts +328 -0
  146. package/src/scenarios/pause-resume.test.ts +109 -0
  147. package/src/scenarios/policies.test.ts +162 -0
  148. package/src/scenarios/profileDerivation.test.ts +335 -0
  149. package/src/scenarios/providerPolicyEnforcement.test.ts +132 -0
  150. package/src/scenarios/rate-limit-envelope.test.ts +97 -0
  151. package/src/scenarios/redaction.test.ts +254 -0
  152. package/src/scenarios/redactionAdversarial.test.ts +162 -0
  153. package/src/scenarios/replay-fork-arbitrary.test.ts +347 -0
  154. package/src/scenarios/replay-fork.test.ts +216 -0
  155. package/src/scenarios/replayDeterminism.test.ts +171 -0
  156. package/src/scenarios/route-coverage.test.ts +129 -0
  157. package/src/scenarios/runs-lifecycle.test.ts +65 -0
  158. package/src/scenarios/runtime-capabilities.test.ts +118 -0
  159. package/src/scenarios/spec-corpus-validity.test.ts +1257 -0
  160. package/src/scenarios/staleClaim.test.ts +223 -0
  161. package/src/scenarios/stream-modes-buffer.test.ts +148 -0
  162. package/src/scenarios/stream-modes-mixed.test.ts +149 -0
  163. package/src/scenarios/stream-modes.test.ts +139 -0
  164. package/src/scenarios/streamReconnect.test.ts +162 -0
  165. package/src/scenarios/subworkflow.test.ts +126 -0
  166. package/src/scenarios/version-negotiation.test.ts +157 -0
  167. package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +47 -0
  168. package/src/scenarios/wasm-pack-invoke-completed.test.ts +69 -0
  169. package/src/scenarios/wasm-pack-invoke-suspended.test.ts +74 -0
  170. package/src/scenarios/wasm-pack-load.test.ts +75 -0
  171. package/src/scenarios/wasm-pack-memory-cap.test.ts +43 -0
  172. package/src/scenarios/wasm-pack-replay-determinism.test.ts +61 -0
  173. package/src/scenarios/webhook-sig-algorithm.test.ts +61 -0
  174. package/src/setup.ts +173 -0
  175. package/vitest.config.ts +17 -0
@@ -0,0 +1,8 @@
1
+ extends:
2
+ - recommended
3
+
4
+ rules:
5
+ # Discovery endpoints (`/.well-known/openwop`, `/v1/openapi.json`) are unauthenticated
6
+ # and take no input — they have no meaningful 4XX response surface. 5XX is the
7
+ # only honest failure mode and is documented inline.
8
+ operation-4xx-response: off
package/coverage.md ADDED
@@ -0,0 +1,80 @@
1
+ # OpenWOP Conformance Coverage Map
2
+
3
+ > **Status: Living document. Updated 2026-05-10.** This map connects the current scenario files to the protocol surfaces they protect and records the remaining gaps from the protocol deep dive. Scenario names are source-of-truth file names under `conformance/src/scenarios/`.
4
+
5
+ ---
6
+
7
+ ## Coverage by protocol surface
8
+
9
+ | Surface | Scenario files | Current grade | Remaining gaps |
10
+ |---|---|---|---|
11
+ | Discovery and capability handshake | `discovery.test.ts`, `runtime-capabilities.test.ts`, `profileDerivation.test.ts`, `mcp-discoverability.test.ts` | A | `Capabilities-Etag` optional runtime shape is covered; scoped discovery and non-HTTP handoff remain host-advertised follow-ups. |
12
+ | Auth and errors | `auth.test.ts`, `errors.test.ts`, `policies.test.ts`, `providerPolicyEnforcement.test.ts` | B | OAuth2, API-key rotation, mTLS, richer scope matrix. |
13
+ | Run lifecycle | `runs-lifecycle.test.ts`, `failure-path.test.ts`, `cancellation.test.ts`, `eventOrdering.test.ts` | A- | Restart-during-run production scenario. |
14
+ | Idempotency and retry | `idempotency.test.ts`, `idempotencyRetry.test.ts`, `highConcurrency.test.ts` | A- | Long retention proof beyond the fast CI window. |
15
+ | Interrupts | `interrupt-approval.test.ts`, `interrupt-clarification.test.ts`, `approval-payload.test.ts`, `interruptRace.test.ts`, `interrupt-quorum-resolution.test.ts`, `interrupt-external-event-correlation.test.ts`, `interrupt-auth-required-resume.test.ts`, `interrupt-parent-child-cascade.test.ts` | A− | All four optional profile scenarios landed 2026-05-10. Remaining: positive end-to-end run against a host that advertises every profile. |
16
+ | Streaming | `stream-modes.test.ts`, `stream-modes-buffer.test.ts`, `stream-modes-mixed.test.ts`, `streamReconnect.test.ts` | A | Browser/proxy timeout matrix and long-running stream soak. |
17
+ | Replay and fork | `replay-fork.test.ts`, `replayDeterminism.test.ts`, `staleClaim.test.ts` | A- | Fork from arbitrary event types and retention-expiry behavior remain uncovered; retention/privacy/scoring semantics are now specified in `replay.md`. |
18
+ | Capabilities and limits | `cap-breach.test.ts`, `dispatchLoop.test.ts` | B+ | Clarification/schema/envelope cap-breach fixtures beyond node-execution cap. |
19
+ | State channels and reducers | `channel-ttl.test.ts` | B+ | Cross-adapter reducer consistency and conflict cases. |
20
+ | Sub-workflows and dispatch | `subworkflow.test.ts`, `multi-node-ordering.test.ts` | B+ | Parallel fan-out floors by scale tier, parent/child cancellation. |
21
+ | Node packs and registry | `pack-registry.test.ts`, `pack-registry-publish.test.ts`, `maliciousManifest.test.ts`, `wasm-pack-load.test.ts`, `wasm-pack-invoke-completed.test.ts`, `wasm-pack-invoke-suspended.test.ts`, `wasm-pack-replay-determinism.test.ts`, `wasm-pack-memory-cap.test.ts`, `wasm-pack-abi-version-rejection.test.ts` | A− | RFC 0008 WASM ABI scenarios landed 2026-05-10; gated on `capabilities.nodePackRuntimes.wasm.supported`. Remaining: hosted registry interoperability once `packs.openwop.dev` exists; deliberately-misbehaving pack for memory-cap + ABI-version-rejection positive paths. |
22
+ | Secrets and redaction | `redaction.test.ts`, `redactionAdversarial.test.ts`, `byok-roundtrip.test.ts` | A- | Cross-provider BYOK matrix and debug-bundle redaction under high volume. |
23
+ | Observability and diagnostics | `cost-attribution.test.ts`, `debugBundle.test.ts`, `otel-emission.test.ts`, `otel-trace-propagation.test.ts` | B+ | OTLP/HTTP-JSON receiver harness now wired; opt-in via `OPENWOP_OTEL_COLLECTOR=true`. Remaining: real-OTLP-protobuf path, metric-emission scenario, debug-bundle truncation. |
24
+ | Fixtures and corpus validity | `fixtures-valid.test.ts`, `fixtures-gating.test.ts`, `spec-corpus-validity.test.ts` | A | Keep fixture manifest synchronized as new optional profiles land. |
25
+ | Run control — pause/resume | `pause-resume.test.ts` | B | Lifecycle + 409-on-non-paused covered; remaining: pause-during-suspend race, immediate-vs-drain-current-node policy assertion. |
26
+ | Rate-limit envelope | `rate-limit-envelope.test.ts` | B− | Shape validation when 429 observed; remaining: deterministic 429-induction harness so the scenario reliably triggers under CI. |
27
+ | Per-workflow `configurableSchema` | `configurable-schema.test.ts` | C+ | Negative validation covered; remaining: positive accepted-overlay scenario + `GET /v1/workflows/{id}` schema surface assertion. |
28
+ | Append-reducer ordering | `append-ordering.test.ts` | B | Intra-engine sequence-order check; remaining: cross-engine ordering under a multi-engine fixture. |
29
+ | Webhook signature algorithms | `webhook-sig-algorithm.test.ts` | C+ | Discovery shape covered; remaining: end-to-end signed delivery exercising `X-openwop-Signature-Algorithm: v1`. |
30
+ | Audit-log integrity profile | `audit-log-integrity.test.ts` | C+ | Profile claim + `/v1/audit/verify` shape covered; remaining: tamper-detection scenario (requires admin access to host's audit store) + multi-checkpoint chain verification. |
31
+ | Multi-region idempotency capability | `multi-region-idempotency.test.ts` | C | Discovery enum coverage; remaining: cross-region partition simulation (requires multi-region harness). |
32
+
33
+ ---
34
+
35
+ ## Endpoint Coverage Manifest
36
+
37
+ Every OpenAPI operation should have:
38
+
39
+ 1. At least one positive scenario.
40
+ 2. At least one auth failure scenario where auth applies.
41
+ 3. At least one validation or conflict scenario where the operation accepts input.
42
+ 4. A cited spec section in each assertion message.
43
+
44
+ | Operation ID | Positive coverage | Negative / auth / validation coverage | Gap |
45
+ |---|---|---|---|
46
+ | `getCapabilities` | `discovery.test.ts`, `runtime-capabilities.test.ts`, `profileDerivation.test.ts`, `mcp-discoverability.test.ts` | `discovery.test.ts` covers optional `Capabilities-Etag`; `spec-corpus-validity.test.ts` validates schema shape | Add scoped discovery scenario when a host advertises it. |
47
+ | `getOpenApiSpec` | `discovery.test.ts` | `spec-corpus-validity.test.ts` validates OpenAPI refs | Add unavailable/transient error scenario only if host can simulate it. |
48
+ | `getWorkflow` | `route-coverage.test.ts`; fixture-dependent lifecycle tests indirectly require seeded workflow IDs | `route-coverage.test.ts` covers unknown workflow `404`/`403` envelope | Good. |
49
+ | `createRun` | `runs-lifecycle.test.ts`, `identity-passthrough.test.ts`, `failure-path.test.ts`, fixture scenarios | `auth.test.ts`, `errors.test.ts`, `idempotency.test.ts`, `idempotencyRetry.test.ts` | Strong baseline; add per-field validation matrix. |
50
+ | `getRun` | Lifecycle, cancellation, interrupt, replay, and subworkflow tests poll snapshots | `failure-path.test.ts`, `errors.test.ts` | Add explicit unknown-run `404` scenario if not already covered through helper assertions. |
51
+ | `streamRunEvents` | `stream-modes.test.ts`, `stream-modes-buffer.test.ts`, `stream-modes-mixed.test.ts`, `streamReconnect.test.ts` | Unsupported mode and invalid buffer assertions | Add long-running proxy timeout soak outside fast CI. |
52
+ | `pollRunEvents` | `multi-node-ordering.test.ts`, `version-negotiation.test.ts`, redaction tests | Past-end and validation assertions | Good. Add malformed `lastSequence` if missing. |
53
+ | `cancelRun` | `cancellation.test.ts` | Unknown/terminal idempotency cases partial | Add explicit already-terminal cancel behavior. |
54
+ | `pauseRun` | Lifecycle scenarios cover paused state via `runs-lifecycle.test.ts` (`run.paused` event projection) | None dedicated yet | Add explicit `pauseRun` route exerciser (running → paused, paused → resumed, error envelope on terminal target). |
55
+ | `resumeRun` | Lifecycle scenarios cover resumed state via `runs-lifecycle.test.ts` (`run.resumed` event projection) | None dedicated yet | Add explicit `resumeRun` route exerciser (paused → running, error envelope on running / terminal target). |
56
+ | `forkRun` | `replay-fork.test.ts`, `replayDeterminism.test.ts` | Negative `fromSeq`, past-end, unknown source, invalid overlay | Add arbitrary-event fork and retention-expired source. |
57
+ | `resolveInterruptByRun` | `interrupt-approval.test.ts`, `interrupt-clarification.test.ts`, `approval-payload.test.ts`, `interruptRace.test.ts` | Invalid action, unknown node, race cases | Add auth-required and quorum profile scenarios. |
58
+ | `inspectInterruptByToken` | Interrupt token coverage partial | Missing explicit token-inspect matrix | Add expired, malformed, and already-resolved token cases. |
59
+ | `resolveInterruptByToken` | Interrupt token coverage partial | Missing explicit token-resolve matrix | Add expired, malformed, wrong-action, and replayed-token cases. |
60
+ | `getArtifact` | Indirect through approval payload fixtures | `route-coverage.test.ts` covers unknown artifact `404`/`403` envelope | Add positive artifact read and explicit scope failure scenarios. |
61
+ | `registerWebhook` | Webhook spec exists | `route-coverage.test.ts` covers invalid URL validation envelope | Add positive registration with a test receiver when harness support exists. |
62
+ | `unregisterWebhook` | Webhook spec exists | `route-coverage.test.ts` covers unknown subscription behavior | Add full register-then-unregister roundtrip with a test receiver. |
63
+
64
+ ---
65
+
66
+ ## Gap closure plan
67
+
68
+ | Priority | Work item | Target docs |
69
+ |---|---|---|
70
+ | P0 | Add production-profile scenarios for backpressure envelope, retry durability, stale-claim recovery, and debug-bundle truncation. | `production-profile.md`, `scale-profiles.md`, `storage-adapters.md`, `debug-bundle.md` |
71
+ | P1 | Add auth-profile scenarios for API-key rotation and OAuth2 client-credentials where test issuer metadata is available. | `auth.md`, `auth-profiles.md` |
72
+ | ✅ done | Interrupt-profile scenarios for quorum, external-event, auth-required, and parent/child cascade — landed 2026-05-10. | `interrupt.md`, `interrupt-profiles.md` |
73
+ | P1 | Convert endpoint manifest into generated coverage evidence from `api/openapi.yaml` operation IDs. | `rest-endpoints.md` |
74
+ | ✅ done | MCP and A2A synthetic-peer roundtrip scenarios landed 2026-05-10 (`mcp-tool-roundtrip.test.ts`, `a2a-task-roundtrip.test.ts`); opt-in via `OPENWOP_MCP_FAKE_SERVER=true` / `OPENWOP_A2A_FAKE_PEER=true`. | `mcp-integration.md`, `a2a-integration.md` |
75
+ | P2 | Add replay retention and fork-from-arbitrary-event coverage. | `replay.md` |
76
+ | P1 | Deterministic 429-induction harness so `rate-limit-envelope.test.ts` triggers reliably under CI (currently observational). | `rest-endpoints.md` |
77
+ | P1 | Add tamper-detection scenario for `audit-log-integrity.test.ts` — requires admin write access to the host's audit store. | `auth-profiles.md` |
78
+ | P2 | Cross-engine append-ordering scenario (multi-engine fixture). | `channels-and-reducers.md` |
79
+ | P2 | End-to-end webhook signed-delivery test exercising `X-openwop-Signature-Algorithm: v1`. | `webhooks.md` |
80
+ | P2 | Conformance scenarios that cite normative RFC docs (not just schemas) for the multi-agent surfaces. | RFCS/0002–0007 |
package/dist/cli.js ADDED
@@ -0,0 +1,161 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * `openwop-conformance` — operator-facing CLI for running the openwop
4
+ * conformance suite against a deployed server.
5
+ *
6
+ * Wraps `vitest` with friendlier args + structured exit codes so it
7
+ * works as the `npm test` entry for downstream packages.
8
+ *
9
+ * Usage:
10
+ * openwop-conformance --base-url https://api.example.com --api-key hk_test_123
11
+ * openwop-conformance --offline # server-free subset only
12
+ * openwop-conformance --filter discovery # category filter
13
+ * openwop-conformance --base-url ... --api-key ... --filter "interrupt|cancellation"
14
+ *
15
+ * Environment variables override flags (per the conformance harness's
16
+ * existing convention):
17
+ * OPENWOP_BASE_URL, OPENWOP_API_KEY, OPENWOP_IMPLEMENTATION_NAME,
18
+ * OPENWOP_IMPLEMENTATION_VERSION, OPENWOP_LIFECYCLE_TIMEOUT_MS
19
+ *
20
+ * Exit codes:
21
+ * 0 all scenarios pass
22
+ * 1 one or more scenarios failed
23
+ * 2 suite couldn't start (missing required args, etc)
24
+ */
25
+ import { spawnSync } from 'node:child_process';
26
+ import { fileURLToPath } from 'node:url';
27
+ import { dirname, resolve as resolvePath } from 'node:path';
28
+ function parseArgs(argv) {
29
+ let baseUrl;
30
+ let apiKey;
31
+ let offline = false;
32
+ let filter;
33
+ let help = false;
34
+ let impl;
35
+ let implVersion;
36
+ for (let i = 0; i < argv.length; i++) {
37
+ const arg = argv[i] ?? '';
38
+ if (arg === '-h' || arg === '--help') {
39
+ help = true;
40
+ continue;
41
+ }
42
+ if (arg === '--offline') {
43
+ offline = true;
44
+ continue;
45
+ }
46
+ const eq = arg.indexOf('=');
47
+ const flag = eq === -1 ? arg : arg.slice(0, eq);
48
+ const inlineValue = eq === -1 ? undefined : arg.slice(eq + 1);
49
+ const nextValue = () => {
50
+ if (inlineValue !== undefined)
51
+ return inlineValue;
52
+ const next = argv[i + 1];
53
+ if (next !== undefined && !next.startsWith('-')) {
54
+ i++;
55
+ return next;
56
+ }
57
+ return undefined;
58
+ };
59
+ switch (flag) {
60
+ case '--base-url':
61
+ baseUrl = nextValue();
62
+ break;
63
+ case '--api-key':
64
+ apiKey = nextValue();
65
+ break;
66
+ case '--filter':
67
+ filter = nextValue();
68
+ break;
69
+ case '--impl':
70
+ case '--implementation-name':
71
+ impl = nextValue();
72
+ break;
73
+ case '--impl-version':
74
+ case '--implementation-version':
75
+ implVersion = nextValue();
76
+ break;
77
+ default:
78
+ if (arg.startsWith('-')) {
79
+ // Unknown flag — pass through to vitest by ignoring here.
80
+ }
81
+ }
82
+ }
83
+ return { baseUrl, apiKey, offline, filter, help, impl, implVersion };
84
+ }
85
+ const HELP_TEXT = `openwop-conformance — run the openwop conformance suite against a server
86
+
87
+ Usage:
88
+ openwop-conformance [options]
89
+
90
+ Required (unless --offline):
91
+ --base-url <url> openwop server base URL (or set OPENWOP_BASE_URL env var)
92
+ --api-key <key> Bearer-style API key (or set OPENWOP_API_KEY env var)
93
+
94
+ Filtering:
95
+ --offline Run only the server-free subset (fixtures + spec corpus)
96
+ --filter <pattern> Pass through to vitest --testNamePattern
97
+
98
+ Implementation labels (cosmetic — surface in failure messages):
99
+ --impl <name> Implementation name (env: OPENWOP_IMPLEMENTATION_NAME)
100
+ --impl-version <version> Implementation version (env: OPENWOP_IMPLEMENTATION_VERSION)
101
+
102
+ Other:
103
+ --help, -h Show this message
104
+
105
+ Examples:
106
+ openwop-conformance --offline
107
+ openwop-conformance --base-url https://api.example.com --api-key hk_test_abc
108
+ openwop-conformance --filter "discovery|errors"
109
+ `;
110
+ function main() {
111
+ const args = parseArgs(process.argv.slice(2));
112
+ if (args.help) {
113
+ process.stdout.write(HELP_TEXT);
114
+ process.exit(0);
115
+ }
116
+ // Env vars OVERRIDE flags only when the flag was unset (consistent
117
+ // with the rest of the harness — env wins on the absence of CLI input).
118
+ const env = { ...process.env };
119
+ if (args.baseUrl)
120
+ env.OPENWOP_BASE_URL = args.baseUrl;
121
+ if (args.apiKey)
122
+ env.OPENWOP_API_KEY = args.apiKey;
123
+ if (args.impl)
124
+ env.OPENWOP_IMPLEMENTATION_NAME = args.impl;
125
+ if (args.implVersion)
126
+ env.OPENWOP_IMPLEMENTATION_VERSION = args.implVersion;
127
+ if (!args.offline && (!env.OPENWOP_BASE_URL || !env.OPENWOP_API_KEY)) {
128
+ process.stderr.write('openwop-conformance: --base-url and --api-key are required (or use --offline).\n' +
129
+ 'Run `openwop-conformance --help` for usage.\n');
130
+ process.exit(2);
131
+ }
132
+ // Resolve the conformance directory relative to this script's location
133
+ // so the CLI works regardless of the caller's cwd. Both the source
134
+ // path (`src/cli.ts`) and the compiled path (`dist/cli.js`) live ONE
135
+ // directory below the package root, so the same `..` works either way.
136
+ const here = dirname(fileURLToPath(import.meta.url));
137
+ const conformanceRoot = resolvePath(here, '..');
138
+ // Build vitest argv. server-free subset is `fixtures-valid` +
139
+ // `spec-corpus-validity`; the offline flag scopes the run to those.
140
+ // Pass --config explicitly so vitest doesn't auto-discover an
141
+ // ancestor config (e.g., a parent monorepo's vite.config.ts) when
142
+ // the conformance package is used as a workspace member.
143
+ const vitestArgs = ['run', '--config', resolvePath(conformanceRoot, 'vitest.config.ts')];
144
+ if (args.offline) {
145
+ vitestArgs.push('src/scenarios/fixtures-valid.test.ts', 'src/scenarios/spec-corpus-validity.test.ts');
146
+ }
147
+ if (args.filter) {
148
+ vitestArgs.push('--testNamePattern', args.filter);
149
+ }
150
+ const result = spawnSync('npx', ['vitest', ...vitestArgs], {
151
+ cwd: conformanceRoot,
152
+ env,
153
+ stdio: 'inherit',
154
+ });
155
+ if (result.error) {
156
+ process.stderr.write(`openwop-conformance: failed to spawn vitest: ${String(result.error)}\n`);
157
+ process.exit(2);
158
+ }
159
+ process.exit(result.status ?? 1);
160
+ }
161
+ main();
@@ -0,0 +1,27 @@
1
+ {
2
+ "id": "conformance-a2a-task-roundtrip",
3
+ "name": "Conformance: A2A Task Roundtrip",
4
+ "version": "1.0",
5
+ "description": "Track 6 fixture for `a2a-task-roundtrip.test.ts`. Workflow invokes a single A2A peer task; the conformance suite stands up a synthetic A2A peer at startup (`OPENWOP_A2A_FAKE_PEER=true`) and operators configure the host to consume the printed AgentCard URL. The fixture covers drift points #3 (`AUTH_REQUIRED` → `waiting-input`) and #4 (`REJECTED` → `failed`) per `a2a-integration.md` §State projection. The `driftScenario` input lets the test toggle which projection to exercise; hosts that don't implement A2A consumption can stub this fixture as `core.noop` and the host-mediated subtest skips when no task POST reaches the synthetic peer.",
6
+ "nodes": [
7
+ {
8
+ "id": "a2a-invoke",
9
+ "typeId": "core.a2a.invoke",
10
+ "name": "Invoke A2A peer",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "skill": "echo"
14
+ },
15
+ "inputs": {}
16
+ }
17
+ ],
18
+ "edges": [],
19
+ "triggers": [
20
+ { "id": "manual", "type": "manual", "enabled": true }
21
+ ],
22
+ "variables": [
23
+ { "name": "driftScenario", "type": "string", "defaultValue": "happy-path" }
24
+ ],
25
+ "metadata": { "tags": ["conformance", "a2a", "roundtrip", "state-projection"] },
26
+ "settings": { "timeout": 0 }
27
+ }
@@ -0,0 +1,27 @@
1
+ {
2
+ "id": "conformance-agent-identity",
3
+ "name": "Conformance: Agent Identity",
4
+ "version": "1.0",
5
+ "description": "Phase 1. Single-node run with an `agent` pin on the node. Host MUST populate `RunSnapshot.agent` (or `runOrchestrator` for supervisor-bound fixtures) with an AgentRef-shaped value. See agentMetadata.test.ts.",
6
+ "nodes": [
7
+ {
8
+ "id": "echo",
9
+ "typeId": "core.identity",
10
+ "name": "Identity (agent-pinned)",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {},
13
+ "inputs": {},
14
+ "agent": {
15
+ "agentId": "core.conformance.identity-agent",
16
+ "modelClass": "chat"
17
+ }
18
+ }
19
+ ],
20
+ "edges": [],
21
+ "triggers": [
22
+ { "id": "manual", "type": "manual", "enabled": true }
23
+ ],
24
+ "variables": [],
25
+ "metadata": { "tags": ["conformance", "multi-agent", "phase-1", "identity"] },
26
+ "settings": { "timeout": 10000 }
27
+ }
@@ -0,0 +1,29 @@
1
+ {
2
+ "id": "conformance-agent-low-confidence",
3
+ "name": "Conformance: Agent Low-Confidence Escalation",
4
+ "version": "1.0",
5
+ "description": "Phase 1 / CP-1 contract. Agent emits `agent.decided` with confidence below the default 0.7 escalation threshold (host mock confidence is 0.5). Host MUST suspend via `node.suspended { reason: 'low-confidence' }` and transition run to `'waiting-approval'`. See agentConfidenceEscalation.test.ts.",
6
+ "nodes": [
7
+ {
8
+ "id": "low-conf-decider",
9
+ "typeId": "core.identity",
10
+ "name": "Low-Confidence Decider",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "mockConfidence": 0.5
14
+ },
15
+ "inputs": {},
16
+ "agent": {
17
+ "agentId": "core.conformance.low-conf-decider",
18
+ "modelClass": "reasoning"
19
+ }
20
+ }
21
+ ],
22
+ "edges": [],
23
+ "triggers": [
24
+ { "id": "manual", "type": "manual", "enabled": true }
25
+ ],
26
+ "variables": [],
27
+ "metadata": { "tags": ["conformance", "multi-agent", "phase-1", "low-confidence", "cp-1"] },
28
+ "settings": { "timeout": 15000 }
29
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "id": "conformance-agent-memory-cross-tenant",
3
+ "name": "Conformance: Agent Memory Cross-Tenant Isolation (CTI-1)",
4
+ "version": "1.0",
5
+ "description": "Phase 3 / CTI-1. Issues a deliberately-cross-tenant memoryRef probe (a path referencing tenant B from a run owned by tenant A). Host MUST return [] / null — no leak. Probe result lands in `crossTenantProbe` variable. See agentMemoryCrossTenantIsolation.test.ts.",
6
+ "nodes": [
7
+ {
8
+ "id": "cross-tenant-probe",
9
+ "typeId": "core.identity",
10
+ "name": "Cross-Tenant Memory Probe",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "memoryAction": "cross-tenant-probe",
14
+ "probeMemoryRef": "conformance/another-tenant/agent-memory"
15
+ },
16
+ "inputs": {}
17
+ }
18
+ ],
19
+ "edges": [],
20
+ "triggers": [
21
+ { "id": "manual", "type": "manual", "enabled": true }
22
+ ],
23
+ "variables": [
24
+ { "name": "crossTenantProbe", "type": "array" }
25
+ ],
26
+ "metadata": { "tags": ["conformance", "multi-agent", "phase-3", "memory", "cti-1", "security"] },
27
+ "settings": { "timeout": 10000 }
28
+ }
@@ -0,0 +1,32 @@
1
+ {
2
+ "id": "conformance-agent-memory-redaction",
3
+ "name": "Conformance: Agent Memory Redaction (SR-1)",
4
+ "version": "1.0",
5
+ "description": "Phase 3 / SR-1. Resolves a BYOK test-mode secret, writes a memory entry containing the plaintext, reads it back. The read-side `memoryReadback.content` MUST contain [REDACTED:<secretId>] in place of the plaintext. See agentMemoryRedactionContract.test.ts.",
6
+ "nodes": [
7
+ {
8
+ "id": "memory-redaction-probe",
9
+ "typeId": "core.identity",
10
+ "name": "Memory Redaction Probe",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "memoryAction": "redaction-probe",
14
+ "byokSecretId": "conformance-test-secret"
15
+ },
16
+ "inputs": {},
17
+ "agent": {
18
+ "agentId": "core.conformance.byok-agent",
19
+ "memoryRef": "conformance/agent-memory-redaction"
20
+ }
21
+ }
22
+ ],
23
+ "edges": [],
24
+ "triggers": [
25
+ { "id": "manual", "type": "manual", "enabled": true }
26
+ ],
27
+ "variables": [
28
+ { "name": "memoryReadback", "type": "object" }
29
+ ],
30
+ "metadata": { "tags": ["conformance", "multi-agent", "phase-3", "memory", "sr-1", "byok"] },
31
+ "settings": { "timeout": 15000 }
32
+ }
@@ -0,0 +1,32 @@
1
+ {
2
+ "id": "conformance-agent-memory-roundtrip",
3
+ "name": "Conformance: Agent Memory Round-Trip",
4
+ "version": "1.0",
5
+ "description": "Phase 3. Host writes a memory entry via its host-internal trigger surface, then reads it back through MemoryAdapter.list(). The read-back result MUST conform to schemas/memory-entry.schema.json and land in workflow variable `memoryReadback`. See agentMemoryRoundTrip.test.ts.",
6
+ "nodes": [
7
+ {
8
+ "id": "memory-roundtrip",
9
+ "typeId": "core.identity",
10
+ "name": "Memory Round-Trip",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "memoryAction": "write-then-read"
14
+ },
15
+ "inputs": {},
16
+ "agent": {
17
+ "agentId": "core.conformance.memory-agent",
18
+ "memoryRef": "conformance/agent-memory-roundtrip",
19
+ "modelClass": "chat"
20
+ }
21
+ }
22
+ ],
23
+ "edges": [],
24
+ "triggers": [
25
+ { "id": "manual", "type": "manual", "enabled": true }
26
+ ],
27
+ "variables": [
28
+ { "name": "memoryReadback", "type": "object" }
29
+ ],
30
+ "metadata": { "tags": ["conformance", "multi-agent", "phase-3", "memory"] },
31
+ "settings": { "timeout": 15000 }
32
+ }
@@ -0,0 +1,31 @@
1
+ {
2
+ "id": "conformance-agent-memory-ttl",
3
+ "name": "Conformance: Agent Memory TTL Expiry",
4
+ "version": "1.0",
5
+ "description": "Phase 3. Writes two memory entries — one with `expiresAt` in the past, one in the future — then lists. The result MUST contain only the future-dated entry. List result lands in `memoryList` variable. See agentMemoryTtlExpiry.test.ts.",
6
+ "nodes": [
7
+ {
8
+ "id": "memory-ttl-probe",
9
+ "typeId": "core.identity",
10
+ "name": "Memory TTL Probe",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "memoryAction": "ttl-probe"
14
+ },
15
+ "inputs": {},
16
+ "agent": {
17
+ "agentId": "core.conformance.ttl-agent",
18
+ "memoryRef": "conformance/agent-memory-ttl"
19
+ }
20
+ }
21
+ ],
22
+ "edges": [],
23
+ "triggers": [
24
+ { "id": "manual", "type": "manual", "enabled": true }
25
+ ],
26
+ "variables": [
27
+ { "name": "memoryList", "type": "array" }
28
+ ],
29
+ "metadata": { "tags": ["conformance", "multi-agent", "phase-3", "memory", "ttl"] },
30
+ "settings": { "timeout": 10000 }
31
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "id": "conformance-agent-pack-export",
3
+ "name": "Conformance: Agent Pack Export",
4
+ "version": "1.0",
5
+ "description": "Phase 2. No-op workflow; the actual assertion is at GET /v1/packs/export — host MUST project workspace agents to AgentManifest shape with required fields. See agentPackExport.test.ts.",
6
+ "nodes": [
7
+ {
8
+ "id": "noop",
9
+ "typeId": "core.identity",
10
+ "name": "Noop (pack-export fixture)",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {},
13
+ "inputs": {}
14
+ }
15
+ ],
16
+ "edges": [],
17
+ "triggers": [
18
+ { "id": "manual", "type": "manual", "enabled": true }
19
+ ],
20
+ "variables": [],
21
+ "metadata": {
22
+ "tags": ["conformance", "multi-agent", "phase-2", "pack-export"],
23
+ "requiresWorkspaceAgent": "core.conformance.exportable-agent"
24
+ },
25
+ "settings": { "timeout": 5000 }
26
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "id": "conformance-agent-pack-install",
3
+ "name": "Conformance: Agent Pack Install",
4
+ "version": "1.0",
5
+ "description": "Phase 2. Installs a pack carrying `agents[]` entries; verifies the host's pack registry surfaces them as AgentManifest-shaped objects. Fixture is a no-op workflow; the actual install/list assertion happens at the registry surface (GET /v1/packs). See agentPackInstall.test.ts.",
6
+ "nodes": [
7
+ {
8
+ "id": "noop",
9
+ "typeId": "core.identity",
10
+ "name": "Noop (pack-install fixture)",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {},
13
+ "inputs": {}
14
+ }
15
+ ],
16
+ "edges": [],
17
+ "triggers": [
18
+ { "id": "manual", "type": "manual", "enabled": true }
19
+ ],
20
+ "variables": [],
21
+ "metadata": {
22
+ "tags": ["conformance", "multi-agent", "phase-2", "pack-install"],
23
+ "requiresInstalledPack": "core.conformance.agent-pack"
24
+ },
25
+ "settings": { "timeout": 5000 }
26
+ }
@@ -0,0 +1,31 @@
1
+ {
2
+ "id": "conformance-agent-pack-provenance",
3
+ "name": "Conformance: Agent Pack Provenance",
4
+ "version": "1.0",
5
+ "description": "Phase 2. Runs a node pinned to a pack-installed agent; verifies the runtime AgentRef carries `sourceManifestId` provenance back to the install source. See agentPackProvenance.test.ts.",
6
+ "nodes": [
7
+ {
8
+ "id": "pack-installed-node",
9
+ "typeId": "core.identity",
10
+ "name": "Pack-Installed Agent Node",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {},
13
+ "inputs": {},
14
+ "agent": {
15
+ "agentId": "core.conformance.pack-installed-agent",
16
+ "sourceManifestId": "core.conformance.agent-pack.installed-agent",
17
+ "modelClass": "chat"
18
+ }
19
+ }
20
+ ],
21
+ "edges": [],
22
+ "triggers": [
23
+ { "id": "manual", "type": "manual", "enabled": true }
24
+ ],
25
+ "variables": [],
26
+ "metadata": {
27
+ "tags": ["conformance", "multi-agent", "phase-2", "provenance"],
28
+ "requiresInstalledPack": "core.conformance.agent-pack"
29
+ },
30
+ "settings": { "timeout": 10000 }
31
+ }
@@ -0,0 +1,29 @@
1
+ {
2
+ "id": "conformance-agent-reasoning",
3
+ "name": "Conformance: Agent Reasoning Events",
4
+ "version": "1.0",
5
+ "description": "Phase 1. Drives an agent through a reasoning trace + tool call + handoff sequence. Host's mock provider emits agent.reasoned / agent.toolCalled / agent.toolReturned / agent.handoff / agent.decided events with conformant payloads. See agentReasoningEvents.test.ts.",
6
+ "nodes": [
7
+ {
8
+ "id": "reasoner",
9
+ "typeId": "core.identity",
10
+ "name": "Reasoning Agent",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "emitReasoningTrace": true
14
+ },
15
+ "inputs": {},
16
+ "agent": {
17
+ "agentId": "core.conformance.reasoning-agent",
18
+ "modelClass": "reasoning"
19
+ }
20
+ }
21
+ ],
22
+ "edges": [],
23
+ "triggers": [
24
+ { "id": "manual", "type": "manual", "enabled": true }
25
+ ],
26
+ "variables": [],
27
+ "metadata": { "tags": ["conformance", "multi-agent", "phase-1", "reasoning"] },
28
+ "settings": { "timeout": 15000 }
29
+ }
@@ -0,0 +1,27 @@
1
+ {
2
+ "id": "conformance-approval",
3
+ "name": "Conformance: Approval",
4
+ "version": "1.0",
5
+ "description": "Suspends on an approval interrupt. Resume schema: {action: 'accept'|'reject'}. Verifies HITL primitive end-to-end.",
6
+ "nodes": [
7
+ {
8
+ "id": "gate",
9
+ "typeId": "core.approvalGate",
10
+ "name": "Approval Gate",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "title": "Conformance approval",
14
+ "description": "Conformance suite — please accept to complete the run.",
15
+ "actions": ["accept", "reject"]
16
+ },
17
+ "inputs": {}
18
+ }
19
+ ],
20
+ "edges": [],
21
+ "triggers": [
22
+ { "id": "manual", "type": "manual", "enabled": true }
23
+ ],
24
+ "variables": [],
25
+ "metadata": { "tags": ["conformance", "hitl"] },
26
+ "settings": { "timeout": 0 }
27
+ }
@@ -0,0 +1,33 @@
1
+ {
2
+ "id": "conformance-cancellable",
3
+ "name": "Conformance: Cancellable",
4
+ "version": "1.0",
5
+ "description": "Long-running delay node. Conformance test starts a run, then issues :cancel; expects terminal `cancelled` within 5s.",
6
+ "nodes": [
7
+ {
8
+ "id": "wait",
9
+ "typeId": "core.delay",
10
+ "name": "Wait",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {},
13
+ "inputs": {
14
+ "delayMs": { "type": "variable", "variableName": "delayMs" }
15
+ }
16
+ }
17
+ ],
18
+ "edges": [],
19
+ "triggers": [
20
+ { "id": "manual", "type": "manual", "enabled": true }
21
+ ],
22
+ "variables": [
23
+ {
24
+ "name": "delayMs",
25
+ "type": "number",
26
+ "description": "Sleep duration (1..60000 ms). Conformance test sets this long enough to issue cancel mid-flight.",
27
+ "required": true,
28
+ "defaultValue": 30000
29
+ }
30
+ ],
31
+ "metadata": { "tags": ["conformance"] },
32
+ "settings": { "timeout": 120000 }
33
+ }