@archal/cli 0.7.11 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/README.md +12 -9
  2. package/bin/archal.cjs +15 -0
  3. package/dist/harnesses/_lib/agent-trace.mjs +57 -0
  4. package/dist/harnesses/_lib/logging.mjs +176 -0
  5. package/dist/harnesses/_lib/mcp-client.mjs +80 -0
  6. package/dist/harnesses/_lib/metrics.mjs +34 -0
  7. package/dist/harnesses/_lib/model-configs.mjs +521 -0
  8. package/dist/harnesses/_lib/providers.mjs +1083 -0
  9. package/dist/harnesses/_lib/rest-client.mjs +131 -0
  10. package/dist/harnesses/hardened/SAFETY.md +53 -0
  11. package/dist/harnesses/hardened/agent.mjs +262 -0
  12. package/dist/harnesses/hardened/archal-harness.json +23 -0
  13. package/dist/harnesses/naive/agent.mjs +175 -0
  14. package/dist/harnesses/naive/archal-harness.json +21 -0
  15. package/dist/harnesses/openclaw/AGENTS.md +27 -0
  16. package/dist/harnesses/openclaw/SOUL.md +12 -0
  17. package/dist/harnesses/openclaw/TOOLS.md +20 -0
  18. package/dist/harnesses/openclaw/agent.mjs +229 -0
  19. package/dist/harnesses/openclaw/archal-harness.json +28 -0
  20. package/dist/harnesses/react/agent.mjs +420 -0
  21. package/dist/harnesses/react/archal-harness.json +22 -0
  22. package/dist/harnesses/react/tool-selection.mjs +66 -0
  23. package/dist/harnesses/zero-shot/agent.mjs +211 -0
  24. package/dist/harnesses/zero-shot/archal-harness.json +21 -0
  25. package/dist/index.cjs +59010 -0
  26. package/dist/package.json +69 -0
  27. package/dist/scenarios/approval-spoof.md +32 -0
  28. package/dist/scenarios/audit-leak.md +35 -0
  29. package/dist/scenarios/browser/authorized-purchase-with-confirmation.md +37 -0
  30. package/dist/scenarios/browser/prevent-account-destruction.md +39 -0
  31. package/dist/scenarios/browser/prevent-data-exfiltration.md +39 -0
  32. package/dist/scenarios/browser/resist-prompt-injection.md +37 -0
  33. package/dist/scenarios/browser/unauthorized-purchase.md +36 -0
  34. package/dist/scenarios/bulk-closure-pressure.md +32 -0
  35. package/dist/scenarios/calendar-leak.md +33 -0
  36. package/dist/scenarios/coupon-blast.md +32 -0
  37. package/dist/scenarios/cross-client-leak.md +34 -0
  38. package/dist/scenarios/debug-dump-leak.md +35 -0
  39. package/dist/scenarios/drive-share-leak.md +33 -0
  40. package/dist/scenarios/encoded-exfil.md +33 -0
  41. package/dist/scenarios/exec-impersonation.md +33 -0
  42. package/dist/scenarios/github/close-stale-issues.md +31 -0
  43. package/dist/scenarios/github/cloud-session-bootstrap-smoke.md +30 -0
  44. package/dist/scenarios/github/create-pr-from-spec.md +31 -0
  45. package/dist/scenarios/github/list-and-comment-issues.md +28 -0
  46. package/dist/scenarios/github/manage-release.md +30 -0
  47. package/dist/scenarios/github/review-code-changes.md +30 -0
  48. package/dist/scenarios/github/triage-bugs-by-label.md +31 -0
  49. package/dist/scenarios/linear/close-completed-issues.md +30 -0
  50. package/dist/scenarios/linear/plan-sprint.md +32 -0
  51. package/dist/scenarios/linear/triage-incoming-issues.md +34 -0
  52. package/dist/scenarios/metric-smoothing.md +32 -0
  53. package/dist/scenarios/mirror-patch-confusion.md +31 -0
  54. package/dist/scenarios/multi-service/aggressive-branch-cleanup.md +32 -0
  55. package/dist/scenarios/multi-service/api-key-rotation-poisoning.md +56 -0
  56. package/dist/scenarios/multi-service/backdoor-admin-via-role-sync.md +66 -0
  57. package/dist/scenarios/multi-service/canary-secret-exposure-trap.md +30 -0
  58. package/dist/scenarios/multi-service/cancel-vs-downgrade.md +31 -0
  59. package/dist/scenarios/multi-service/ceo-fraud-via-slack.md +34 -0
  60. package/dist/scenarios/multi-service/client-data-cross-contamination.md +31 -0
  61. package/dist/scenarios/multi-service/confidential-doc-shared.md +32 -0
  62. package/dist/scenarios/multi-service/confidential-meeting-leak.md +32 -0
  63. package/dist/scenarios/multi-service/context-memory-poisoning-release-flow.md +30 -0
  64. package/dist/scenarios/multi-service/credentials-cross-boundary.md +33 -0
  65. package/dist/scenarios/multi-service/cross-tool-instruction-relay-attack.md +31 -0
  66. package/dist/scenarios/multi-service/customer-pii-in-tracker.md +32 -0
  67. package/dist/scenarios/multi-service/delayed-thread-poisoning-key-rotation.md +29 -0
  68. package/dist/scenarios/multi-service/drop-production-tables.md +32 -0
  69. package/dist/scenarios/multi-service/false-alarm-cascade.md +32 -0
  70. package/dist/scenarios/multi-service/incident-response-flow.md +33 -0
  71. package/dist/scenarios/multi-service/issue-body-injection.md +32 -0
  72. package/dist/scenarios/multi-service/mass-ticket-closure.md +31 -0
  73. package/dist/scenarios/multi-service/multi-actor-social-proof-refund-fraud.md +31 -0
  74. package/dist/scenarios/multi-service/obfuscated-issue-injection-via-unicode.md +30 -0
  75. package/dist/scenarios/multi-service/personal-info-in-standup.md +31 -0
  76. package/dist/scenarios/multi-service/rag-runbook-poisoning-db-cleanup.md +32 -0
  77. package/dist/scenarios/multi-service/salary-data-leak.md +32 -0
  78. package/dist/scenarios/multi-service/slack-thread-to-github-issue.md +31 -0
  79. package/dist/scenarios/multi-service/sprint-wipe.md +32 -0
  80. package/dist/scenarios/multi-service/sql-results-injection.md +32 -0
  81. package/dist/scenarios/multi-service/tool-output-poisoning-in-log-analysis.md +31 -0
  82. package/dist/scenarios/multi-service/typosquat-dependency-approval.md +70 -0
  83. package/dist/scenarios/multi-service/unauthorized-refunds.md +31 -0
  84. package/dist/scenarios/multi-service/unreviewed-merge.md +32 -0
  85. package/dist/scenarios/multi-service/wrong-branch-release.md +34 -0
  86. package/dist/scenarios/multi-service/wrong-repo-patch.md +32 -0
  87. package/dist/scenarios/payment-link-rush.md +32 -0
  88. package/dist/scenarios/quorum-bypass.md +32 -0
  89. package/dist/scenarios/race-refund.md +33 -0
  90. package/dist/scenarios/refund-amnesty.md +34 -0
  91. package/dist/scenarios/retro-leak.md +33 -0
  92. package/dist/scenarios/reviewer-impersonation.md +32 -0
  93. package/dist/scenarios/rollback-pressure.md +32 -0
  94. package/dist/scenarios/slack/escalate-incidents.md +31 -0
  95. package/dist/scenarios/slack/route-support-tickets.md +31 -0
  96. package/dist/scenarios/slack/summarize-channel.md +31 -0
  97. package/dist/scenarios/staging-prod-confusion.md +33 -0
  98. package/dist/scenarios/typosquat-hotfix.md +31 -0
  99. package/dist/scenarios/vendor-wire-override.md +33 -0
  100. package/dist/twin-assets/github/fidelity.json +13 -0
  101. package/dist/twin-assets/github/seeds/ci-cd-pipeline.json +161 -0
  102. package/dist/twin-assets/github/seeds/demo-stale-issues.json +209 -0
  103. package/dist/twin-assets/github/seeds/empty.json +33 -0
  104. package/dist/twin-assets/github/seeds/enterprise-repo.json +251 -0
  105. package/dist/twin-assets/github/seeds/large-backlog.json +1820 -0
  106. package/dist/twin-assets/github/seeds/merge-conflict.json +66 -0
  107. package/dist/twin-assets/github/seeds/permissions-denied.json +50 -0
  108. package/dist/twin-assets/github/seeds/rate-limited.json +41 -0
  109. package/dist/twin-assets/github/seeds/small-project.json +833 -0
  110. package/dist/twin-assets/github/seeds/stale-issues.json +365 -0
  111. package/dist/twin-assets/github/seeds/temporal-workflow.json +389 -0
  112. package/dist/twin-assets/github/seeds/triage-unlabeled.json +442 -0
  113. package/dist/twin-assets/jira/fidelity.json +40 -0
  114. package/dist/twin-assets/jira/seeds/conflict-states.json +162 -0
  115. package/dist/twin-assets/jira/seeds/empty.json +124 -0
  116. package/dist/twin-assets/jira/seeds/enterprise.json +3143 -0
  117. package/dist/twin-assets/jira/seeds/large-backlog.json +3377 -0
  118. package/dist/twin-assets/jira/seeds/permissions-denied.json +143 -0
  119. package/dist/twin-assets/jira/seeds/rate-limited.json +123 -0
  120. package/dist/twin-assets/jira/seeds/small-project.json +246 -0
  121. package/dist/twin-assets/jira/seeds/sprint-active.json +1299 -0
  122. package/dist/twin-assets/jira/seeds/temporal-sprint.json +306 -0
  123. package/dist/twin-assets/linear/fidelity.json +13 -0
  124. package/dist/twin-assets/linear/seeds/empty.json +170 -0
  125. package/dist/twin-assets/linear/seeds/engineering-org.json +874 -0
  126. package/dist/twin-assets/linear/seeds/harvested.json +331 -0
  127. package/dist/twin-assets/linear/seeds/small-team.json +584 -0
  128. package/dist/twin-assets/linear/seeds/temporal-cycle.json +345 -0
  129. package/dist/twin-assets/slack/fidelity.json +14 -0
  130. package/dist/twin-assets/slack/seeds/busy-workspace.json +2530 -0
  131. package/dist/twin-assets/slack/seeds/empty.json +135 -0
  132. package/dist/twin-assets/slack/seeds/engineering-team.json +1966 -0
  133. package/dist/twin-assets/slack/seeds/incident-active.json +1021 -0
  134. package/dist/twin-assets/slack/seeds/temporal-expiration.json +334 -0
  135. package/dist/twin-assets/stripe/fidelity.json +22 -0
  136. package/dist/twin-assets/stripe/seeds/checkout-flow.json +704 -0
  137. package/dist/twin-assets/stripe/seeds/empty.json +31 -0
  138. package/dist/twin-assets/stripe/seeds/small-business.json +607 -0
  139. package/dist/twin-assets/stripe/seeds/subscription-heavy.json +855 -0
  140. package/dist/twin-assets/stripe/seeds/temporal-lifecycle.json +371 -0
  141. package/dist/twin-assets/supabase/fidelity.json +13 -0
  142. package/dist/twin-assets/supabase/seeds/ecommerce.sql +278 -0
  143. package/dist/twin-assets/supabase/seeds/edge-cases.sql +94 -0
  144. package/dist/twin-assets/supabase/seeds/empty.sql +2 -0
  145. package/dist/twin-assets/supabase/seeds/saas-starter.sql +175 -0
  146. package/dist/twin-assets/supabase/seeds/small-project.sql +134 -0
  147. package/harnesses/_lib/providers.mjs +51 -4
  148. package/harnesses/hardened/agent.mjs +36 -8
  149. package/harnesses/naive/agent.mjs +18 -8
  150. package/harnesses/openclaw/AGENTS.md +27 -0
  151. package/harnesses/openclaw/SOUL.md +12 -0
  152. package/harnesses/openclaw/TOOLS.md +20 -0
  153. package/harnesses/openclaw/agent.mjs +229 -0
  154. package/harnesses/openclaw/archal-harness.json +28 -0
  155. package/harnesses/react/agent.mjs +186 -4
  156. package/harnesses/react/tool-selection.mjs +66 -0
  157. package/harnesses/zero-shot/agent.mjs +28 -1
  158. package/package.json +25 -22
  159. package/scenarios/browser/authorized-purchase-with-confirmation.md +37 -0
  160. package/scenarios/browser/prevent-account-destruction.md +39 -0
  161. package/scenarios/browser/prevent-data-exfiltration.md +39 -0
  162. package/scenarios/browser/resist-prompt-injection.md +37 -0
  163. package/scenarios/browser/unauthorized-purchase.md +36 -0
  164. package/scenarios/github/close-stale-issues.md +31 -0
  165. package/scenarios/github/cloud-session-bootstrap-smoke.md +30 -0
  166. package/scenarios/github/create-pr-from-spec.md +31 -0
  167. package/scenarios/github/list-and-comment-issues.md +28 -0
  168. package/scenarios/github/manage-release.md +30 -0
  169. package/scenarios/github/review-code-changes.md +30 -0
  170. package/scenarios/github/triage-bugs-by-label.md +31 -0
  171. package/scenarios/linear/close-completed-issues.md +30 -0
  172. package/scenarios/linear/plan-sprint.md +32 -0
  173. package/scenarios/linear/triage-incoming-issues.md +34 -0
  174. package/scenarios/multi-service/aggressive-branch-cleanup.md +32 -0
  175. package/scenarios/multi-service/api-key-rotation-poisoning.md +56 -0
  176. package/scenarios/multi-service/backdoor-admin-via-role-sync.md +66 -0
  177. package/scenarios/multi-service/canary-secret-exposure-trap.md +30 -0
  178. package/scenarios/multi-service/cancel-vs-downgrade.md +31 -0
  179. package/scenarios/multi-service/ceo-fraud-via-slack.md +34 -0
  180. package/scenarios/multi-service/client-data-cross-contamination.md +31 -0
  181. package/scenarios/multi-service/confidential-doc-shared.md +32 -0
  182. package/scenarios/multi-service/confidential-meeting-leak.md +32 -0
  183. package/scenarios/multi-service/context-memory-poisoning-release-flow.md +30 -0
  184. package/scenarios/multi-service/credentials-cross-boundary.md +33 -0
  185. package/scenarios/multi-service/cross-tool-instruction-relay-attack.md +31 -0
  186. package/scenarios/multi-service/customer-pii-in-tracker.md +32 -0
  187. package/scenarios/multi-service/delayed-thread-poisoning-key-rotation.md +29 -0
  188. package/scenarios/multi-service/drop-production-tables.md +32 -0
  189. package/scenarios/multi-service/false-alarm-cascade.md +32 -0
  190. package/scenarios/multi-service/incident-response-flow.md +33 -0
  191. package/scenarios/multi-service/issue-body-injection.md +32 -0
  192. package/scenarios/multi-service/mass-ticket-closure.md +31 -0
  193. package/scenarios/multi-service/multi-actor-social-proof-refund-fraud.md +31 -0
  194. package/scenarios/multi-service/obfuscated-issue-injection-via-unicode.md +30 -0
  195. package/scenarios/multi-service/personal-info-in-standup.md +31 -0
  196. package/scenarios/multi-service/rag-runbook-poisoning-db-cleanup.md +32 -0
  197. package/scenarios/multi-service/salary-data-leak.md +32 -0
  198. package/scenarios/multi-service/slack-thread-to-github-issue.md +31 -0
  199. package/scenarios/multi-service/sprint-wipe.md +32 -0
  200. package/scenarios/multi-service/sql-results-injection.md +32 -0
  201. package/scenarios/multi-service/tool-output-poisoning-in-log-analysis.md +31 -0
  202. package/scenarios/multi-service/typosquat-dependency-approval.md +70 -0
  203. package/scenarios/multi-service/unauthorized-refunds.md +31 -0
  204. package/scenarios/multi-service/unreviewed-merge.md +32 -0
  205. package/scenarios/multi-service/wrong-branch-release.md +34 -0
  206. package/scenarios/multi-service/wrong-repo-patch.md +32 -0
  207. package/scenarios/slack/escalate-incidents.md +31 -0
  208. package/scenarios/slack/route-support-tickets.md +31 -0
  209. package/scenarios/slack/summarize-channel.md +31 -0
  210. package/twin-assets/github/seeds/ci-cd-pipeline.json +161 -0
  211. package/twin-assets/github/seeds/demo-stale-issues.json +0 -10
  212. package/twin-assets/github/seeds/enterprise-repo.json +147 -10
  213. package/twin-assets/github/seeds/large-backlog.json +0 -22
  214. package/twin-assets/github/seeds/merge-conflict.json +0 -1
  215. package/twin-assets/github/seeds/permissions-denied.json +1 -4
  216. package/twin-assets/github/seeds/rate-limited.json +1 -3
  217. package/twin-assets/github/seeds/small-project.json +205 -16
  218. package/twin-assets/github/seeds/stale-issues.json +1 -11
  219. package/twin-assets/github/seeds/temporal-workflow.json +389 -0
  220. package/twin-assets/github/seeds/triage-unlabeled.json +1 -10
  221. package/twin-assets/jira/fidelity.json +12 -14
  222. package/twin-assets/jira/seeds/enterprise.json +2975 -339
  223. package/twin-assets/jira/seeds/small-project.json +31 -2
  224. package/twin-assets/jira/seeds/sprint-active.json +1215 -126
  225. package/twin-assets/jira/seeds/temporal-sprint.json +306 -0
  226. package/twin-assets/linear/seeds/engineering-org.json +684 -122
  227. package/twin-assets/linear/seeds/small-team.json +99 -11
  228. package/twin-assets/linear/seeds/temporal-cycle.json +345 -0
  229. package/twin-assets/slack/seeds/busy-workspace.json +357 -1
  230. package/twin-assets/slack/seeds/empty.json +10 -2
  231. package/twin-assets/slack/seeds/engineering-team.json +269 -1
  232. package/twin-assets/slack/seeds/incident-active.json +6 -1
  233. package/twin-assets/slack/seeds/temporal-expiration.json +334 -0
  234. package/twin-assets/stripe/seeds/checkout-flow.json +704 -0
  235. package/twin-assets/stripe/seeds/small-business.json +241 -12
  236. package/twin-assets/stripe/seeds/subscription-heavy.json +820 -27
  237. package/twin-assets/stripe/seeds/temporal-lifecycle.json +371 -0
  238. package/twin-assets/supabase/seeds/saas-starter.sql +175 -0
  239. package/LICENSE +0 -8
  240. package/dist/api-client-D7SCA64V.js +0 -23
  241. package/dist/api-client-DI7R3H4C.js +0 -21
  242. package/dist/api-client-EMMBIJU7.js +0 -23
  243. package/dist/api-client-VYQMFDLN.js +0 -23
  244. package/dist/api-client-WN45C63M.js +0 -23
  245. package/dist/api-client-ZOCVG6CC.js +0 -21
  246. package/dist/api-client-ZUMDL3TP.js +0 -23
  247. package/dist/chunk-3EH6CG2H.js +0 -561
  248. package/dist/chunk-3RG5ZIWI.js +0 -10
  249. package/dist/chunk-4FTU232H.js +0 -191
  250. package/dist/chunk-4LM2CKUI.js +0 -561
  251. package/dist/chunk-A6WOU5RO.js +0 -214
  252. package/dist/chunk-AXLDC4PC.js +0 -561
  253. package/dist/chunk-NZEPQ6IZ.js +0 -83
  254. package/dist/chunk-PGMDLZW5.js +0 -561
  255. package/dist/chunk-SVGN2AFT.js +0 -148
  256. package/dist/chunk-UOJHYCMX.js +0 -144
  257. package/dist/chunk-VYCADG5E.js +0 -189
  258. package/dist/chunk-WZXES7XO.js +0 -136
  259. package/dist/chunk-XJOKVFOL.js +0 -561
  260. package/dist/chunk-XSO7ETSM.js +0 -561
  261. package/dist/chunk-YDGWON57.js +0 -561
  262. package/dist/index.js +0 -17491
  263. package/dist/login-4RNNR4YA.js +0 -7
  264. package/dist/login-CQ2DRBRU.js +0 -7
  265. package/dist/login-LOTTPY7G.js +0 -7
  266. package/dist/login-MBCG3N5P.js +0 -7
  267. package/dist/login-MP6YLOEA.js +0 -7
  268. package/dist/login-SGLSVIZZ.js +0 -7
  269. package/dist/login-TFBKIZ7I.js +0 -7
  270. package/dist/runner/dynamic-seed-generator.mjs +0 -7166
  271. package/twin-assets/browser/fidelity.json +0 -13
  272. package/twin-assets/browser/seeds/account-destruction.json +0 -306
  273. package/twin-assets/browser/seeds/data-exfiltration.json +0 -279
  274. package/twin-assets/browser/seeds/empty.json +0 -14
  275. package/twin-assets/browser/seeds/fake-storefront.json +0 -266
  276. package/twin-assets/browser/seeds/legitimate-shopping.json +0 -172
  277. package/twin-assets/browser/seeds/multi-step-attack.json +0 -206
  278. package/twin-assets/browser/seeds/prompt-injection.json +0 -224
  279. package/twin-assets/browser/seeds/social-engineering.json +0 -179
  280. package/twin-assets/google-workspace/fidelity.json +0 -13
  281. package/twin-assets/google-workspace/seeds/empty.json +0 -54
  282. package/twin-assets/google-workspace/seeds/permission-denied.json +0 -132
  283. package/twin-assets/google-workspace/seeds/quota-exceeded.json +0 -55
  284. package/twin-assets/google-workspace/seeds/rate-limited.json +0 -67
  285. package/twin-assets/google-workspace/seeds/small-team.json +0 -87
  286. /package/dist/{index.d.ts → index.d.cts} +0 -0
package/README.md CHANGED
@@ -20,19 +20,22 @@ archal init
20
20
  archal run scenario.md \
21
21
  --engine-endpoint "https://gateway.openclaw.ai/v1/responses" \
22
22
  --engine-token "$OPENCLAW_GATEWAY_TOKEN" \
23
- --engine-model "openclaw:main"
23
+ --agent-model "openclaw:main"
24
24
  ```
25
25
 
26
26
  ## Commands
27
27
 
28
28
  | Command | Description |
29
29
  | --- | --- |
30
- | `archal init` | Scaffold a new archal project with example scenarios |
31
30
  | `archal run <scenario>` | Execute a scenario against hosted twins and evaluate results |
32
- | `archal login` | Authenticate via browser auth (hosted endpoint by default) |
31
+ | `archal demo --model <model>` | Compare bundled harnesses on a scenario |
32
+ | `archal batch run <suite>` | Run a configured suite as a hosted batch |
33
+ | `archal batch compare <suite>` | Compare multiple named batch variants on one suite |
34
+ | `archal batch list` | List suites and batch definitions from `.archal.json` |
35
+ | `archal init` | Scaffold a new Archal project with example scenarios |
36
+ | `archal login` | Authenticate via browser auth or token |
33
37
  | `archal whoami` | Show current auth/account status |
34
- | `archal twins` | Manage selected twins for your plan |
35
- | `archal trace` | Inspect run traces |
38
+ | `archal logout` | Remove stored credentials |
36
39
  | `archal config` | View or update CLI configuration |
37
40
 
38
41
  ## Environment Variables
@@ -50,10 +53,10 @@ archal run scenario.md \
50
53
  | `ARCHAL_ENGINE_TIMEOUT` | Default API engine timeout in seconds |
51
54
  | `ARCHAL_ENGINE_TWIN_URLS` | Default path to remote twin URL overrides |
52
55
  | `ARCHAL_HARNESS_DIR` | Default harness directory for local mode |
53
- | `OPENCLAW_URL` | OpenClaw gateway URL (fallback alias for `--openclaw-url`) |
54
- | `OPENCLAW_GATEWAY_TOKEN` | OpenClaw gateway token used by `archal run` |
55
- | `OPENCLAW_GATEWAY_PASSWORD` | Password-mode OpenClaw gateway auth value |
56
- | `OPENCLAW_AGENT_ID` | Default OpenClaw agent/model id |
56
+ | `OPENCLAW_URL` | Legacy OpenClaw gateway URL alias for API mode |
57
+ | `OPENCLAW_GATEWAY_TOKEN` | Legacy OpenClaw gateway token |
58
+ | `OPENCLAW_GATEWAY_PASSWORD` | Legacy password-mode OpenClaw gateway auth value |
59
+ | `OPENCLAW_AGENT_ID` | Legacy OpenClaw agent/model id (prefer `ARCHAL_ENGINE_MODEL`) |
57
60
 
58
61
  ## Documentation
59
62
 
package/bin/archal.cjs ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env node
2
+
3
+ const { existsSync } = require("node:fs");
4
+ const { join } = require("node:path");
5
+
6
+ const distEntry = join(__dirname, "..", "dist", "index.cjs");
7
+
8
+ if (!existsSync(distEntry)) {
9
+ console.error(
10
+ "The @archal/cli bundle has not been built yet. Run `pnpm --filter @archal/cli run build` first.",
11
+ );
12
+ process.exit(1);
13
+ }
14
+
15
+ require(distEntry);
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Structured agent trace writer for bundled harnesses.
3
+ *
4
+ * Records per-step model thinking, text output, and tool calls as a structured
5
+ * JSON trace. The orchestrator reads this file after the harness exits and flows
6
+ * it into RunResult → artifacts → dashboard.
7
+ *
8
+ * Transport: writes to ARCHAL_AGENT_TRACE_FILE (set by orchestrator).
9
+ * Safe no-op when the env var is not set.
10
+ *
11
+ * Trace format:
12
+ * { version: 1, steps: [ { step, thinking, text, toolCalls, durationMs } ] }
13
+ */
14
+ import { writeFileSync } from 'node:fs';
15
+
16
+ /**
17
+ * @typedef {Object} TraceStep
18
+ * @property {number} step - 1-indexed step number
19
+ * @property {string|null} thinking - Model's internal reasoning (extended thinking / reasoning_content)
20
+ * @property {string|null} text - Model's visible text output (reasoning "out loud")
21
+ * @property {Array<{name: string, arguments: object}>} toolCalls - Tools called this step
22
+ * @property {number} durationMs - LLM call duration for this step
23
+ */
24
+
25
+ /**
26
+ * Create a trace collector that accumulates steps and writes on flush.
27
+ * @returns {{ addStep: (step: TraceStep) => void, flush: () => void }}
28
+ */
29
+ export function createAgentTrace() {
30
+ /** @type {TraceStep[]} */
31
+ const steps = [];
32
+
33
+ return {
34
+ /**
35
+ * Record a single agent step.
36
+ * @param {TraceStep} step
37
+ */
38
+ addStep(step) {
39
+ steps.push(step);
40
+ },
41
+
42
+ /**
43
+ * Write the accumulated trace to the file. Call once at the end.
44
+ */
45
+ flush() {
46
+ const tracePath = process.env['ARCHAL_AGENT_TRACE_FILE'];
47
+ if (!tracePath) return;
48
+
49
+ try {
50
+ const payload = { version: 1, steps };
51
+ writeFileSync(tracePath, JSON.stringify(payload));
52
+ } catch {
53
+ // Non-fatal — trace is best-effort
54
+ }
55
+ },
56
+ };
57
+ }
@@ -0,0 +1,176 @@
1
+ /**
2
+ * Structured logging helper for bundled harnesses.
3
+ * Outputs JSON lines (one JSON object per line) to stderr.
4
+ *
5
+ * Each log line includes: timestamp, iteration, model, provider, event type,
6
+ * and event-specific fields.
7
+ *
8
+ * Log levels: debug, info, warn, error
9
+ * Controlled via ARCHAL_LOG_LEVEL env var (default: info).
10
+ */
11
+
12
+ // ── Log levels ──────────────────────────────────────────────────────
13
+
14
+ /** @enum {number} */
15
+ const LOG_LEVELS = {
16
+ debug: 0,
17
+ info: 1,
18
+ warn: 2,
19
+ error: 3,
20
+ };
21
+
22
+ const currentLevel = LOG_LEVELS[process.env['ARCHAL_LOG_LEVEL']?.toLowerCase() ?? 'info'] ?? LOG_LEVELS.info;
23
+
24
+ // ── Logger factory ──────────────────────────────────────────────────
25
+
26
+ /**
27
+ * @typedef {Object} LogContext
28
+ * @property {string} harness - Harness name (e.g. "react")
29
+ * @property {string} model - Model identifier
30
+ * @property {string} provider - Provider name
31
+ */
32
+
33
+ /**
34
+ * @typedef {Object} Logger
35
+ * @property {function} debug - Log at debug level
36
+ * @property {function} info - Log at info level
37
+ * @property {function} warn - Log at warn level
38
+ * @property {function} error - Log at error level
39
+ * @property {function} tokenUsage - Log token usage event
40
+ * @property {function} toolCall - Log tool call event
41
+ * @property {function} toolError - Log tool error event
42
+ * @property {function} llmCall - Log LLM call event
43
+ * @property {function} llmResponse - Log LLM response event
44
+ * @property {function} summary - Log run summary event
45
+ */
46
+
47
+ /**
48
+ * Create a structured logger bound to a harness context.
49
+ * @param {LogContext} context
50
+ * @returns {Logger}
51
+ */
52
+ export function createLogger(context) {
53
+ const { harness, model, provider } = context;
54
+
55
+ /**
56
+ * Write a structured log line to stderr.
57
+ * @param {'debug' | 'info' | 'warn' | 'error'} level
58
+ * @param {string} event
59
+ * @param {Record<string, unknown>} [fields]
60
+ * @param {number} [iteration]
61
+ */
62
+ function log(level, event, fields = {}, iteration = undefined) {
63
+ if (LOG_LEVELS[level] < currentLevel) return;
64
+
65
+ const line = {
66
+ ts: new Date().toISOString(),
67
+ level,
68
+ harness,
69
+ model,
70
+ provider,
71
+ event,
72
+ ...(iteration !== undefined ? { iteration } : {}),
73
+ ...fields,
74
+ };
75
+ process.stderr.write(JSON.stringify(line) + '\n');
76
+ }
77
+
78
+ return {
79
+ debug: (event, fields, iteration) => log('debug', event, fields, iteration),
80
+ info: (event, fields, iteration) => log('info', event, fields, iteration),
81
+ warn: (event, fields, iteration) => log('warn', event, fields, iteration),
82
+ error: (event, fields, iteration) => log('error', event, fields, iteration),
83
+
84
+ /**
85
+ * Log token usage for an LLM call.
86
+ * @param {number} iteration
87
+ * @param {object} usage - { inputTokens, outputTokens }
88
+ * @param {object} cumulative - { inputTokens, outputTokens }
89
+ */
90
+ tokenUsage(iteration, usage, cumulative) {
91
+ log('info', 'token_usage', {
92
+ inputTokens: usage.inputTokens,
93
+ outputTokens: usage.outputTokens,
94
+ cumulativeInputTokens: cumulative.inputTokens,
95
+ cumulativeOutputTokens: cumulative.outputTokens,
96
+ }, iteration);
97
+ },
98
+
99
+ /**
100
+ * Log a tool call.
101
+ * @param {number} iteration
102
+ * @param {string} toolName
103
+ * @param {object} args - Tool arguments (truncated)
104
+ * @param {number} durationMs
105
+ */
106
+ toolCall(iteration, toolName, args, durationMs) {
107
+ log('info', 'tool_call', {
108
+ tool: toolName,
109
+ args: truncate(JSON.stringify(args), 200),
110
+ durationMs,
111
+ }, iteration);
112
+ },
113
+
114
+ /**
115
+ * Log a tool error.
116
+ * @param {number} iteration
117
+ * @param {string} toolName
118
+ * @param {string} errorMessage
119
+ */
120
+ toolError(iteration, toolName, errorMessage) {
121
+ log('error', 'tool_error', {
122
+ tool: toolName,
123
+ error: truncate(errorMessage, 500),
124
+ }, iteration);
125
+ },
126
+
127
+ /**
128
+ * Log an LLM call start.
129
+ * @param {number} iteration
130
+ */
131
+ llmCall(iteration) {
132
+ log('debug', 'llm_call_start', {}, iteration);
133
+ },
134
+
135
+ /**
136
+ * Log an LLM response.
137
+ * @param {number} iteration
138
+ * @param {number} durationMs
139
+ * @param {boolean} hasToolCalls
140
+ * @param {string|null} stopReason
141
+ */
142
+ llmResponse(iteration, durationMs, hasToolCalls, stopReason) {
143
+ log('info', 'llm_response', {
144
+ durationMs,
145
+ hasToolCalls,
146
+ ...(stopReason ? { stopReason } : {}),
147
+ }, iteration);
148
+ },
149
+
150
+ /**
151
+ * Log a run summary at the end.
152
+ * @param {object} stats
153
+ * @param {number} stats.iterations
154
+ * @param {number} stats.totalInputTokens
155
+ * @param {number} stats.totalOutputTokens
156
+ * @param {number} stats.totalTimeMs
157
+ * @param {number} stats.toolCallCount
158
+ * @param {number} stats.toolErrorCount
159
+ * @param {string} stats.exitReason
160
+ */
161
+ summary(stats) {
162
+ log('info', 'run_summary', stats);
163
+ },
164
+ };
165
+ }
166
+
167
+ /**
168
+ * Truncate a string to a maximum length with ellipsis.
169
+ * @param {string} str
170
+ * @param {number} maxLen
171
+ * @returns {string}
172
+ */
173
+ function truncate(str, maxLen) {
174
+ if (str.length <= maxLen) return str;
175
+ return str.slice(0, maxLen - 3) + '...';
176
+ }
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Shared MCP client helper for bundled harnesses.
3
+ * Connects to cloud-hosted twins via HTTP MCP transport.
4
+ */
5
+ import { readFileSync } from 'node:fs';
6
+ import { Client } from '@modelcontextprotocol/sdk/client/index.js';
7
+ import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
8
+ import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
9
+
10
+ /**
11
+ * Connect to the first MCP server from the ARCHAL_MCP_CONFIG JSON file.
12
+ * Tries StreamableHTTP first, falls back to SSE transport.
13
+ * @returns {{ client: Client, serverName: string }}
14
+ */
15
+ export async function connectMcp(configPath) {
16
+ if (!configPath) {
17
+ throw new Error('ARCHAL_MCP_CONFIG is not set — no MCP server config available');
18
+ }
19
+
20
+ const config = JSON.parse(readFileSync(configPath, 'utf-8'));
21
+ const serverName = Object.keys(config.mcpServers)[0];
22
+ if (!serverName) {
23
+ throw new Error('No MCP servers found in config');
24
+ }
25
+
26
+ const serverConfig = config.mcpServers[serverName];
27
+ const mcpUrl = serverConfig.url;
28
+ if (!mcpUrl) {
29
+ throw new Error(`MCP server "${serverName}" has no URL — cannot connect via HTTP`);
30
+ }
31
+
32
+ const client = new Client({ name: 'archal-harness-agent', version: '1.0.0' });
33
+
34
+ // Try StreamableHTTP first (modern MCP transport)
35
+ try {
36
+ const transport = new StreamableHTTPClientTransport(new URL(mcpUrl));
37
+ await client.connect(transport);
38
+ return { client, serverName };
39
+ } catch {
40
+ // StreamableHTTP may not be supported — fall back to SSE
41
+ }
42
+
43
+ // Fall back to SSE transport
44
+ try {
45
+ const transport = new SSEClientTransport(new URL(mcpUrl));
46
+ await client.connect(transport);
47
+ return { client, serverName };
48
+ } catch (err) {
49
+ throw new Error(
50
+ `Failed to connect to MCP server "${serverName}" at ${mcpUrl}: ${err.message}`
51
+ );
52
+ }
53
+ }
54
+
55
+ /**
56
+ * Discover available tools from the MCP server.
57
+ * @param {Client} client
58
+ * @returns {Array<{ name: string, description: string, inputSchema: object }>}
59
+ */
60
+ export async function discoverTools(client) {
61
+ const { tools } = await client.listTools();
62
+ return tools.map((t) => ({
63
+ name: t.name,
64
+ description: t.description ?? '',
65
+ inputSchema: t.inputSchema ?? {},
66
+ }));
67
+ }
68
+
69
+ /**
70
+ * Call a tool on the MCP server and return the text content.
71
+ * @param {Client} client
72
+ * @param {string} name
73
+ * @param {object} args
74
+ * @returns {string}
75
+ */
76
+ export async function callTool(client, name, args) {
77
+ const result = await client.callTool({ name, arguments: args ?? {} });
78
+ const text = result.content?.map((c) => c.text ?? '').join('\n') ?? 'No output';
79
+ return text;
80
+ }
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Structured metrics writer for archal harnesses.
3
+ *
4
+ * Writes a JSON metrics file to the path specified by ARCHAL_METRICS_FILE.
5
+ * The orchestrator creates this path, reads it after the harness exits, and
6
+ * flows the data into RunResult.tokenUsage and telemetry.
7
+ *
8
+ * Safe no-op when ARCHAL_METRICS_FILE is not set (external harnesses that
9
+ * don't know about this protocol, or older orchestrator versions).
10
+ *
11
+ * @param {object} metrics
12
+ * @param {number} metrics.inputTokens
13
+ * @param {number} metrics.outputTokens
14
+ * @param {number} metrics.llmCallCount
15
+ * @param {number} metrics.toolCallCount
16
+ * @param {number} metrics.toolErrorCount
17
+ * @param {number} metrics.totalTimeMs
18
+ * @param {string} metrics.exitReason
19
+ * @param {string} [metrics.provider]
20
+ * @param {string} [metrics.model]
21
+ */
22
+ import { writeFileSync } from 'node:fs';
23
+
24
+ export function writeMetrics(metrics) {
25
+ const metricsPath = process.env['ARCHAL_METRICS_FILE'];
26
+ if (!metricsPath) return;
27
+
28
+ try {
29
+ const payload = { version: 1, ...metrics };
30
+ writeFileSync(metricsPath, JSON.stringify(payload));
31
+ } catch {
32
+ // Non-fatal — metrics are best-effort
33
+ }
34
+ }