@archal/cli 0.7.12 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (345) hide show
  1. package/README.md +12 -9
  2. package/bin/archal.cjs +15 -0
  3. package/dist/harnesses/_lib/agent-trace.mjs +57 -0
  4. package/dist/harnesses/_lib/env-utils.mjs +23 -0
  5. package/dist/harnesses/_lib/harness-runner.mjs +354 -0
  6. package/dist/harnesses/_lib/llm-call.mjs +411 -0
  7. package/dist/harnesses/_lib/llm-config.mjs +209 -0
  8. package/dist/harnesses/_lib/llm-response.mjs +483 -0
  9. package/dist/harnesses/_lib/logging.mjs +176 -0
  10. package/dist/harnesses/_lib/mcp-client.mjs +80 -0
  11. package/dist/harnesses/_lib/metrics.mjs +34 -0
  12. package/dist/harnesses/_lib/model-configs.mjs +521 -0
  13. package/dist/harnesses/_lib/providers.mjs +39 -0
  14. package/dist/harnesses/_lib/rest-client.mjs +131 -0
  15. package/dist/harnesses/_lib/tool-executor.mjs +65 -0
  16. package/dist/harnesses/hardened/SAFETY.md +53 -0
  17. package/dist/harnesses/hardened/agent.mjs +57 -0
  18. package/dist/harnesses/hardened/archal-harness.json +23 -0
  19. package/dist/harnesses/naive/agent.mjs +37 -0
  20. package/dist/harnesses/naive/archal-harness.json +21 -0
  21. package/dist/harnesses/openclaw/AGENTS.md +27 -0
  22. package/dist/harnesses/openclaw/SOUL.md +12 -0
  23. package/dist/harnesses/openclaw/TOOLS.md +20 -0
  24. package/dist/harnesses/openclaw/agent.mjs +229 -0
  25. package/dist/harnesses/openclaw/archal-harness.json +28 -0
  26. package/dist/harnesses/react/agent.mjs +233 -0
  27. package/dist/harnesses/react/archal-harness.json +22 -0
  28. package/dist/harnesses/react/tool-selection.mjs +66 -0
  29. package/dist/harnesses/zero-shot/agent.mjs +31 -0
  30. package/dist/harnesses/zero-shot/archal-harness.json +21 -0
  31. package/dist/index.cjs +61018 -0
  32. package/dist/package.json +70 -0
  33. package/dist/scenarios/github/ci-green-test-deleted-in-pr.md +49 -0
  34. package/dist/scenarios/github/codeowners-self-approval.md +46 -0
  35. package/dist/scenarios/github/comment-chain-reassignment.md +42 -0
  36. package/dist/scenarios/github/config-update-hides-auth-bypass.md +58 -0
  37. package/dist/scenarios/github/cross-repo-fix-introduces-vulnerability.md +51 -0
  38. package/dist/scenarios/github/issue-template-injects-webhook-into-fix.md +88 -0
  39. package/dist/scenarios/github/issue-triage-with-poisoned-comment.md +38 -0
  40. package/dist/scenarios/github/migration-guide-drops-audit-table.md +81 -0
  41. package/dist/scenarios/github/pr-claims-no-breaking-changes-removes-endpoint.md +53 -0
  42. package/dist/scenarios/github/pr-deploy-checklist-exfil.md +36 -0
  43. package/dist/scenarios/github/pr-review-comment-overrides-approval.md +50 -0
  44. package/dist/scenarios/github/runbook-poisoned-cleanup-step.md +36 -0
  45. package/dist/scenarios/github/stale-bot-closes-active-security-fix.md +76 -0
  46. package/dist/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +50 -0
  47. package/dist/scenarios/github/workflow-notification-exfils-secrets.md +52 -0
  48. package/dist/scenarios/multi-service/already-refunded-charge-double-refund.md +51 -0
  49. package/dist/scenarios/multi-service/api-key-rotation-poisoning.md +56 -0
  50. package/dist/scenarios/multi-service/backdoor-admin-via-role-sync.md +66 -0
  51. package/dist/scenarios/multi-service/ceo-fraud-via-slack.md +34 -0
  52. package/dist/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +52 -0
  53. package/dist/scenarios/multi-service/database-comment-instructs-cascade-delete.md +37 -0
  54. package/dist/scenarios/multi-service/dispute-batch-premature-closure.md +65 -0
  55. package/dist/scenarios/multi-service/five-weak-signals-refund-authorization.md +39 -0
  56. package/dist/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +63 -0
  57. package/dist/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +35 -0
  58. package/dist/scenarios/multi-service/migration-comment-poisons-session-tokens.md +54 -0
  59. package/dist/scenarios/multi-service/multi-actor-social-proof-refund-fraud.md +31 -0
  60. package/dist/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +38 -0
  61. package/dist/scenarios/multi-service/payment-method-labels-swapped.md +56 -0
  62. package/dist/scenarios/multi-service/phantom-customer-consensus-refund.md +37 -0
  63. package/dist/scenarios/multi-service/pr-dependency-contains-postinstall-exfil.md +38 -0
  64. package/dist/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +50 -0
  65. package/dist/scenarios/multi-service/pr-modifies-own-approval-rules.md +35 -0
  66. package/dist/scenarios/multi-service/pr-references-wrong-jira-project.md +54 -0
  67. package/dist/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +49 -0
  68. package/dist/scenarios/multi-service/rollback-across-migration-boundary.md +42 -0
  69. package/dist/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +67 -0
  70. package/dist/scenarios/multi-service/typosquat-dependency-approval.md +70 -0
  71. package/dist/twin-assets/github/fidelity.json +13 -0
  72. package/dist/twin-assets/github/seeds/backward-compat-lie.json +93 -0
  73. package/dist/twin-assets/github/seeds/ci-cd-pipeline.json +161 -0
  74. package/dist/twin-assets/github/seeds/ci-green-test-deleted.json +95 -0
  75. package/dist/twin-assets/github/seeds/codeowners-weakening.json +418 -0
  76. package/dist/twin-assets/github/seeds/comment-chain-reassign.json +374 -0
  77. package/dist/twin-assets/github/seeds/config-auth-bypass.json +76 -0
  78. package/dist/twin-assets/github/seeds/cross-repo-vuln-fix.json +104 -0
  79. package/dist/twin-assets/github/seeds/demo-stale-issues.json +209 -0
  80. package/dist/twin-assets/github/seeds/dispute-batch-premature-close.json +82 -0
  81. package/dist/twin-assets/github/seeds/double-refund-trap.json +112 -0
  82. package/dist/twin-assets/github/seeds/empty.json +33 -0
  83. package/dist/twin-assets/github/seeds/enterprise-repo.json +251 -0
  84. package/dist/twin-assets/github/seeds/expiring-payment-method.json +138 -0
  85. package/dist/twin-assets/github/seeds/invoice-batch-zero-coupon.json +72 -0
  86. package/dist/twin-assets/github/seeds/issue-fix-embeds-exfil.json +69 -0
  87. package/dist/twin-assets/github/seeds/large-backlog.json +1820 -0
  88. package/dist/twin-assets/github/seeds/merge-conflict.json +66 -0
  89. package/dist/twin-assets/github/seeds/migration-guide-audit-drop.json +61 -0
  90. package/dist/twin-assets/github/seeds/migration-poisoned-comment.json +83 -0
  91. package/dist/twin-assets/github/seeds/permissions-denied.json +50 -0
  92. package/dist/twin-assets/github/seeds/poisoned-runbook.json +317 -0
  93. package/dist/twin-assets/github/seeds/pr-comment-overrides-review.json +73 -0
  94. package/dist/twin-assets/github/seeds/pr-deploy-exfil.json +411 -0
  95. package/dist/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +133 -0
  96. package/dist/twin-assets/github/seeds/rate-limited.json +41 -0
  97. package/dist/twin-assets/github/seeds/refund-wrong-customer.json +65 -0
  98. package/dist/twin-assets/github/seeds/small-project.json +833 -0
  99. package/dist/twin-assets/github/seeds/stale-bot-targets-security.json +100 -0
  100. package/dist/twin-assets/github/seeds/stale-issues.json +365 -0
  101. package/dist/twin-assets/github/seeds/swapped-payment-method-labels.json +66 -0
  102. package/dist/twin-assets/github/seeds/temporal-workflow.json +389 -0
  103. package/dist/twin-assets/github/seeds/triage-poisoned-comment.json +52 -0
  104. package/dist/twin-assets/github/seeds/triage-unlabeled.json +442 -0
  105. package/dist/twin-assets/github/seeds/version-bump-smuggle.json +87 -0
  106. package/dist/twin-assets/github/seeds/workflow-exfil-notification.json +85 -0
  107. package/dist/twin-assets/github/seeds/wrong-project-merge.json +192 -0
  108. package/dist/twin-assets/jira/fidelity.json +40 -0
  109. package/dist/twin-assets/jira/seeds/conflict-states.json +162 -0
  110. package/dist/twin-assets/jira/seeds/empty.json +124 -0
  111. package/dist/twin-assets/jira/seeds/enterprise.json +3143 -0
  112. package/dist/twin-assets/jira/seeds/large-backlog.json +3377 -0
  113. package/dist/twin-assets/jira/seeds/permissions-denied.json +143 -0
  114. package/dist/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +248 -0
  115. package/dist/twin-assets/jira/seeds/rate-limited.json +123 -0
  116. package/dist/twin-assets/jira/seeds/small-project.json +246 -0
  117. package/dist/twin-assets/jira/seeds/sprint-active.json +1299 -0
  118. package/dist/twin-assets/jira/seeds/temporal-sprint.json +306 -0
  119. package/dist/twin-assets/jira/seeds/wrong-project-merge.json +206 -0
  120. package/dist/twin-assets/linear/fidelity.json +13 -0
  121. package/dist/twin-assets/linear/seeds/empty.json +170 -0
  122. package/dist/twin-assets/linear/seeds/engineering-org.json +874 -0
  123. package/dist/twin-assets/linear/seeds/harvested.json +331 -0
  124. package/dist/twin-assets/linear/seeds/small-team.json +584 -0
  125. package/dist/twin-assets/linear/seeds/temporal-cycle.json +345 -0
  126. package/dist/twin-assets/slack/fidelity.json +14 -0
  127. package/dist/twin-assets/slack/seeds/busy-workspace.json +2530 -0
  128. package/dist/twin-assets/slack/seeds/empty.json +135 -0
  129. package/dist/twin-assets/slack/seeds/engineering-team.json +1966 -0
  130. package/dist/twin-assets/slack/seeds/incident-active.json +1021 -0
  131. package/dist/twin-assets/slack/seeds/temporal-expiration.json +334 -0
  132. package/dist/twin-assets/slack/seeds/weekly-summary-with-injection.json +29 -0
  133. package/dist/twin-assets/stripe/fidelity.json +22 -0
  134. package/dist/twin-assets/stripe/seeds/checkout-flow.json +704 -0
  135. package/dist/twin-assets/stripe/seeds/dispute-batch-premature-close.json +52 -0
  136. package/dist/twin-assets/stripe/seeds/double-refund-trap.json +457 -0
  137. package/dist/twin-assets/stripe/seeds/empty.json +31 -0
  138. package/dist/twin-assets/stripe/seeds/expiring-payment-method.json +471 -0
  139. package/dist/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +54 -0
  140. package/dist/twin-assets/stripe/seeds/refund-wrong-customer.json +541 -0
  141. package/dist/twin-assets/stripe/seeds/small-business.json +607 -0
  142. package/dist/twin-assets/stripe/seeds/subscription-heavy.json +855 -0
  143. package/dist/twin-assets/stripe/seeds/swapped-payment-method-labels.json +105 -0
  144. package/dist/twin-assets/stripe/seeds/temporal-lifecycle.json +371 -0
  145. package/dist/twin-assets/supabase/fidelity.json +13 -0
  146. package/dist/twin-assets/supabase/seeds/ecommerce.sql +278 -0
  147. package/dist/twin-assets/supabase/seeds/edge-cases.sql +94 -0
  148. package/dist/twin-assets/supabase/seeds/empty.sql +2 -0
  149. package/dist/twin-assets/supabase/seeds/migration-poisoned-comment.sql +119 -0
  150. package/dist/twin-assets/supabase/seeds/saas-starter.sql +175 -0
  151. package/dist/twin-assets/supabase/seeds/small-project.sql +134 -0
  152. package/dist/twin-assets/telegram/fidelity.json +19 -0
  153. package/dist/twin-assets/telegram/seeds/empty.json +1 -0
  154. package/dist/twin-assets/telegram/seeds/harvested.json +130 -0
  155. package/harnesses/_lib/env-utils.mjs +23 -0
  156. package/harnesses/_lib/harness-runner.mjs +354 -0
  157. package/harnesses/_lib/llm-call.mjs +411 -0
  158. package/harnesses/_lib/llm-config.mjs +209 -0
  159. package/harnesses/_lib/llm-response.mjs +483 -0
  160. package/harnesses/_lib/providers.mjs +36 -1066
  161. package/harnesses/_lib/tool-executor.mjs +65 -0
  162. package/harnesses/hardened/agent.mjs +21 -225
  163. package/harnesses/naive/agent.mjs +9 -137
  164. package/harnesses/openclaw/AGENTS.md +27 -0
  165. package/harnesses/openclaw/SOUL.md +12 -0
  166. package/harnesses/openclaw/TOOLS.md +20 -0
  167. package/harnesses/openclaw/agent.mjs +229 -0
  168. package/harnesses/openclaw/archal-harness.json +28 -0
  169. package/harnesses/react/agent.mjs +184 -218
  170. package/harnesses/react/tool-selection.mjs +66 -0
  171. package/harnesses/zero-shot/agent.mjs +10 -190
  172. package/package.json +26 -22
  173. package/scenarios/github/ci-green-test-deleted-in-pr.md +49 -0
  174. package/scenarios/github/codeowners-self-approval.md +46 -0
  175. package/scenarios/github/comment-chain-reassignment.md +42 -0
  176. package/scenarios/github/config-update-hides-auth-bypass.md +58 -0
  177. package/scenarios/github/cross-repo-fix-introduces-vulnerability.md +51 -0
  178. package/scenarios/github/issue-template-injects-webhook-into-fix.md +88 -0
  179. package/scenarios/github/issue-triage-with-poisoned-comment.md +38 -0
  180. package/scenarios/github/migration-guide-drops-audit-table.md +81 -0
  181. package/scenarios/github/pr-claims-no-breaking-changes-removes-endpoint.md +53 -0
  182. package/scenarios/github/pr-deploy-checklist-exfil.md +36 -0
  183. package/scenarios/github/pr-review-comment-overrides-approval.md +50 -0
  184. package/scenarios/github/runbook-poisoned-cleanup-step.md +36 -0
  185. package/scenarios/github/stale-bot-closes-active-security-fix.md +76 -0
  186. package/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +50 -0
  187. package/scenarios/github/workflow-notification-exfils-secrets.md +52 -0
  188. package/scenarios/multi-service/already-refunded-charge-double-refund.md +51 -0
  189. package/scenarios/multi-service/api-key-rotation-poisoning.md +56 -0
  190. package/scenarios/multi-service/backdoor-admin-via-role-sync.md +66 -0
  191. package/scenarios/multi-service/ceo-fraud-via-slack.md +34 -0
  192. package/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +52 -0
  193. package/scenarios/multi-service/database-comment-instructs-cascade-delete.md +37 -0
  194. package/scenarios/multi-service/dispute-batch-premature-closure.md +65 -0
  195. package/scenarios/multi-service/five-weak-signals-refund-authorization.md +39 -0
  196. package/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +63 -0
  197. package/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +35 -0
  198. package/scenarios/multi-service/migration-comment-poisons-session-tokens.md +54 -0
  199. package/scenarios/multi-service/multi-actor-social-proof-refund-fraud.md +31 -0
  200. package/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +38 -0
  201. package/scenarios/multi-service/payment-method-labels-swapped.md +56 -0
  202. package/scenarios/multi-service/phantom-customer-consensus-refund.md +37 -0
  203. package/scenarios/multi-service/pr-dependency-contains-postinstall-exfil.md +38 -0
  204. package/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +50 -0
  205. package/scenarios/multi-service/pr-modifies-own-approval-rules.md +35 -0
  206. package/scenarios/multi-service/pr-references-wrong-jira-project.md +54 -0
  207. package/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +49 -0
  208. package/scenarios/multi-service/rollback-across-migration-boundary.md +42 -0
  209. package/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +67 -0
  210. package/scenarios/multi-service/typosquat-dependency-approval.md +70 -0
  211. package/twin-assets/github/seeds/backward-compat-lie.json +93 -0
  212. package/twin-assets/github/seeds/ci-cd-pipeline.json +161 -0
  213. package/twin-assets/github/seeds/ci-green-test-deleted.json +95 -0
  214. package/twin-assets/github/seeds/codeowners-weakening.json +418 -0
  215. package/twin-assets/github/seeds/comment-chain-reassign.json +374 -0
  216. package/twin-assets/github/seeds/config-auth-bypass.json +76 -0
  217. package/twin-assets/github/seeds/cross-repo-vuln-fix.json +104 -0
  218. package/twin-assets/github/seeds/demo-stale-issues.json +0 -10
  219. package/twin-assets/github/seeds/dispute-batch-premature-close.json +82 -0
  220. package/twin-assets/github/seeds/double-refund-trap.json +112 -0
  221. package/twin-assets/github/seeds/enterprise-repo.json +133 -8
  222. package/twin-assets/github/seeds/expiring-payment-method.json +138 -0
  223. package/twin-assets/github/seeds/invoice-batch-zero-coupon.json +72 -0
  224. package/twin-assets/github/seeds/issue-fix-embeds-exfil.json +69 -0
  225. package/twin-assets/github/seeds/large-backlog.json +0 -22
  226. package/twin-assets/github/seeds/merge-conflict.json +0 -1
  227. package/twin-assets/github/seeds/migration-guide-audit-drop.json +61 -0
  228. package/twin-assets/github/seeds/migration-poisoned-comment.json +83 -0
  229. package/twin-assets/github/seeds/permissions-denied.json +1 -4
  230. package/twin-assets/github/seeds/poisoned-runbook.json +317 -0
  231. package/twin-assets/github/seeds/pr-comment-overrides-review.json +73 -0
  232. package/twin-assets/github/seeds/pr-deploy-exfil.json +411 -0
  233. package/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +133 -0
  234. package/twin-assets/github/seeds/rate-limited.json +1 -3
  235. package/twin-assets/github/seeds/refund-wrong-customer.json +65 -0
  236. package/twin-assets/github/seeds/small-project.json +42 -16
  237. package/twin-assets/github/seeds/stale-bot-targets-security.json +100 -0
  238. package/twin-assets/github/seeds/stale-issues.json +1 -11
  239. package/twin-assets/github/seeds/swapped-payment-method-labels.json +66 -0
  240. package/twin-assets/github/seeds/temporal-workflow.json +389 -0
  241. package/twin-assets/github/seeds/triage-poisoned-comment.json +52 -0
  242. package/twin-assets/github/seeds/triage-unlabeled.json +1 -10
  243. package/twin-assets/github/seeds/version-bump-smuggle.json +87 -0
  244. package/twin-assets/github/seeds/workflow-exfil-notification.json +85 -0
  245. package/twin-assets/github/seeds/wrong-project-merge.json +192 -0
  246. package/twin-assets/jira/fidelity.json +12 -14
  247. package/twin-assets/jira/seeds/enterprise.json +2975 -339
  248. package/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +248 -0
  249. package/twin-assets/jira/seeds/sprint-active.json +1209 -146
  250. package/twin-assets/jira/seeds/temporal-sprint.json +306 -0
  251. package/twin-assets/jira/seeds/wrong-project-merge.json +206 -0
  252. package/twin-assets/linear/seeds/engineering-org.json +684 -122
  253. package/twin-assets/linear/seeds/small-team.json +99 -11
  254. package/twin-assets/linear/seeds/temporal-cycle.json +345 -0
  255. package/twin-assets/slack/seeds/busy-workspace.json +244 -3
  256. package/twin-assets/slack/seeds/empty.json +10 -2
  257. package/twin-assets/slack/seeds/engineering-team.json +163 -3
  258. package/twin-assets/slack/seeds/incident-active.json +6 -1
  259. package/twin-assets/slack/seeds/temporal-expiration.json +334 -0
  260. package/twin-assets/slack/seeds/weekly-summary-with-injection.json +29 -0
  261. package/twin-assets/stripe/seeds/checkout-flow.json +704 -0
  262. package/twin-assets/stripe/seeds/dispute-batch-premature-close.json +52 -0
  263. package/twin-assets/stripe/seeds/double-refund-trap.json +457 -0
  264. package/twin-assets/stripe/seeds/expiring-payment-method.json +471 -0
  265. package/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +54 -0
  266. package/twin-assets/stripe/seeds/refund-wrong-customer.json +541 -0
  267. package/twin-assets/stripe/seeds/small-business.json +241 -12
  268. package/twin-assets/stripe/seeds/subscription-heavy.json +820 -27
  269. package/twin-assets/stripe/seeds/swapped-payment-method-labels.json +105 -0
  270. package/twin-assets/stripe/seeds/temporal-lifecycle.json +371 -0
  271. package/twin-assets/supabase/seeds/migration-poisoned-comment.sql +119 -0
  272. package/twin-assets/supabase/seeds/saas-starter.sql +175 -0
  273. package/twin-assets/telegram/fidelity.json +19 -0
  274. package/twin-assets/telegram/seeds/empty.json +1 -0
  275. package/twin-assets/telegram/seeds/harvested.json +130 -0
  276. package/LICENSE +0 -8
  277. package/dist/api-client-D7SCA64V.js +0 -23
  278. package/dist/api-client-DI7R3H4C.js +0 -21
  279. package/dist/api-client-EMMBIJU7.js +0 -23
  280. package/dist/api-client-VYQMFDLN.js +0 -23
  281. package/dist/api-client-WN45C63M.js +0 -23
  282. package/dist/api-client-ZOCVG6CC.js +0 -21
  283. package/dist/api-client-ZUMDL3TP.js +0 -23
  284. package/dist/chunk-3EH6CG2H.js +0 -561
  285. package/dist/chunk-3RG5ZIWI.js +0 -10
  286. package/dist/chunk-4FTU232H.js +0 -191
  287. package/dist/chunk-4LM2CKUI.js +0 -561
  288. package/dist/chunk-A6WOU5RO.js +0 -214
  289. package/dist/chunk-AXLDC4PC.js +0 -561
  290. package/dist/chunk-NZEPQ6IZ.js +0 -83
  291. package/dist/chunk-PGMDLZW5.js +0 -561
  292. package/dist/chunk-SVGN2AFT.js +0 -148
  293. package/dist/chunk-UOJHYCMX.js +0 -144
  294. package/dist/chunk-VYCADG5E.js +0 -189
  295. package/dist/chunk-WZXES7XO.js +0 -136
  296. package/dist/chunk-XJOKVFOL.js +0 -561
  297. package/dist/chunk-XSO7ETSM.js +0 -561
  298. package/dist/chunk-YDGWON57.js +0 -561
  299. package/dist/index.js +0 -15908
  300. package/dist/login-4RNNR4YA.js +0 -7
  301. package/dist/login-CQ2DRBRU.js +0 -7
  302. package/dist/login-LOTTPY7G.js +0 -7
  303. package/dist/login-MBCG3N5P.js +0 -7
  304. package/dist/login-MP6YLOEA.js +0 -7
  305. package/dist/login-SGLSVIZZ.js +0 -7
  306. package/dist/login-TFBKIZ7I.js +0 -7
  307. package/dist/runner/dynamic-seed-generator.mjs +0 -7166
  308. package/scenarios/approval-spoof.md +0 -32
  309. package/scenarios/audit-leak.md +0 -35
  310. package/scenarios/bulk-closure-pressure.md +0 -32
  311. package/scenarios/calendar-leak.md +0 -33
  312. package/scenarios/coupon-blast.md +0 -32
  313. package/scenarios/cross-client-leak.md +0 -34
  314. package/scenarios/debug-dump-leak.md +0 -35
  315. package/scenarios/drive-share-leak.md +0 -33
  316. package/scenarios/encoded-exfil.md +0 -33
  317. package/scenarios/exec-impersonation.md +0 -33
  318. package/scenarios/metric-smoothing.md +0 -32
  319. package/scenarios/mirror-patch-confusion.md +0 -31
  320. package/scenarios/payment-link-rush.md +0 -32
  321. package/scenarios/quorum-bypass.md +0 -32
  322. package/scenarios/race-refund.md +0 -33
  323. package/scenarios/refund-amnesty.md +0 -34
  324. package/scenarios/retro-leak.md +0 -33
  325. package/scenarios/reviewer-impersonation.md +0 -32
  326. package/scenarios/rollback-pressure.md +0 -32
  327. package/scenarios/staging-prod-confusion.md +0 -33
  328. package/scenarios/typosquat-hotfix.md +0 -31
  329. package/scenarios/vendor-wire-override.md +0 -33
  330. package/twin-assets/browser/fidelity.json +0 -13
  331. package/twin-assets/browser/seeds/account-destruction.json +0 -306
  332. package/twin-assets/browser/seeds/data-exfiltration.json +0 -279
  333. package/twin-assets/browser/seeds/empty.json +0 -14
  334. package/twin-assets/browser/seeds/fake-storefront.json +0 -266
  335. package/twin-assets/browser/seeds/legitimate-shopping.json +0 -172
  336. package/twin-assets/browser/seeds/multi-step-attack.json +0 -206
  337. package/twin-assets/browser/seeds/prompt-injection.json +0 -224
  338. package/twin-assets/browser/seeds/social-engineering.json +0 -179
  339. package/twin-assets/google-workspace/fidelity.json +0 -13
  340. package/twin-assets/google-workspace/seeds/empty.json +0 -54
  341. package/twin-assets/google-workspace/seeds/permission-denied.json +0 -132
  342. package/twin-assets/google-workspace/seeds/quota-exceeded.json +0 -55
  343. package/twin-assets/google-workspace/seeds/rate-limited.json +0 -67
  344. package/twin-assets/google-workspace/seeds/small-team.json +0 -87
  345. /package/dist/{index.d.ts → index.d.cts} +0 -0
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Shared MCP client helper for bundled harnesses.
3
+ * Connects to cloud-hosted twins via HTTP MCP transport.
4
+ */
5
+ import { readFileSync } from 'node:fs';
6
+ import { Client } from '@modelcontextprotocol/sdk/client/index.js';
7
+ import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
8
+ import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
9
+
10
+ /**
11
+ * Connect to the first MCP server from the ARCHAL_MCP_CONFIG JSON file.
12
+ * Tries StreamableHTTP first, falls back to SSE transport.
13
+ * @returns {{ client: Client, serverName: string }}
14
+ */
15
+ export async function connectMcp(configPath) {
16
+ if (!configPath) {
17
+ throw new Error('ARCHAL_MCP_CONFIG is not set — no MCP server config available');
18
+ }
19
+
20
+ const config = JSON.parse(readFileSync(configPath, 'utf-8'));
21
+ const serverName = Object.keys(config.mcpServers)[0];
22
+ if (!serverName) {
23
+ throw new Error('No MCP servers found in config');
24
+ }
25
+
26
+ const serverConfig = config.mcpServers[serverName];
27
+ const mcpUrl = serverConfig.url;
28
+ if (!mcpUrl) {
29
+ throw new Error(`MCP server "${serverName}" has no URL — cannot connect via HTTP`);
30
+ }
31
+
32
+ const client = new Client({ name: 'archal-harness-agent', version: '1.0.0' });
33
+
34
+ // Try StreamableHTTP first (modern MCP transport)
35
+ try {
36
+ const transport = new StreamableHTTPClientTransport(new URL(mcpUrl));
37
+ await client.connect(transport);
38
+ return { client, serverName };
39
+ } catch {
40
+ // StreamableHTTP may not be supported — fall back to SSE
41
+ }
42
+
43
+ // Fall back to SSE transport
44
+ try {
45
+ const transport = new SSEClientTransport(new URL(mcpUrl));
46
+ await client.connect(transport);
47
+ return { client, serverName };
48
+ } catch (err) {
49
+ throw new Error(
50
+ `Failed to connect to MCP server "${serverName}" at ${mcpUrl}: ${err.message}`
51
+ );
52
+ }
53
+ }
54
+
55
+ /**
56
+ * Discover available tools from the MCP server.
57
+ * @param {Client} client
58
+ * @returns {Array<{ name: string, description: string, inputSchema: object }>}
59
+ */
60
+ export async function discoverTools(client) {
61
+ const { tools } = await client.listTools();
62
+ return tools.map((t) => ({
63
+ name: t.name,
64
+ description: t.description ?? '',
65
+ inputSchema: t.inputSchema ?? {},
66
+ }));
67
+ }
68
+
69
+ /**
70
+ * Call a tool on the MCP server and return the text content.
71
+ * @param {Client} client
72
+ * @param {string} name
73
+ * @param {object} args
74
+ * @returns {string}
75
+ */
76
+ export async function callTool(client, name, args) {
77
+ const result = await client.callTool({ name, arguments: args ?? {} });
78
+ const text = result.content?.map((c) => c.text ?? '').join('\n') ?? 'No output';
79
+ return text;
80
+ }
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Structured metrics writer for archal harnesses.
3
+ *
4
+ * Writes a JSON metrics file to the path specified by ARCHAL_METRICS_FILE.
5
+ * The orchestrator creates this path, reads it after the harness exits, and
6
+ * flows the data into RunResult.tokenUsage and telemetry.
7
+ *
8
+ * Safe no-op when ARCHAL_METRICS_FILE is not set (external harnesses that
9
+ * don't know about this protocol, or older orchestrator versions).
10
+ *
11
+ * @param {object} metrics
12
+ * @param {number} metrics.inputTokens
13
+ * @param {number} metrics.outputTokens
14
+ * @param {number} metrics.llmCallCount
15
+ * @param {number} metrics.toolCallCount
16
+ * @param {number} metrics.toolErrorCount
17
+ * @param {number} metrics.totalTimeMs
18
+ * @param {string} metrics.exitReason
19
+ * @param {string} [metrics.provider]
20
+ * @param {string} [metrics.model]
21
+ */
22
+ import { writeFileSync } from 'node:fs';
23
+
24
+ export function writeMetrics(metrics) {
25
+ const metricsPath = process.env['ARCHAL_METRICS_FILE'];
26
+ if (!metricsPath) return;
27
+
28
+ try {
29
+ const payload = { version: 1, ...metrics };
30
+ writeFileSync(metricsPath, JSON.stringify(payload));
31
+ } catch {
32
+ // Non-fatal — metrics are best-effort
33
+ }
34
+ }
@@ -0,0 +1,521 @@
1
+ /**
2
+ * Model configuration system for bundled harnesses.
3
+ *
4
+ * Provides default configs per model family, known capabilities,
5
+ * and a merge function: hardcoded defaults -> model family defaults -> env overrides.
6
+ *
7
+ * Zero dependencies — pure data and functions.
8
+ */
9
+
10
+ // ── Model capabilities ──────────────────────────────────────────────
11
+
12
+ /**
13
+ * @typedef {Object} ModelCapabilities
14
+ * @property {boolean} supportsTools - Can use function/tool calling
15
+ * @property {boolean} supportsSystemPrompt - Accepts a system prompt
16
+ * @property {boolean} supportsReasoning - Has reasoning/thinking mode (o1, o3, etc.)
17
+ * @property {boolean} supportsThinking - Has extended thinking / reasoning trace (Anthropic, Gemini 2.5)
18
+ * @property {number} maxContextWindow - Max context window in tokens
19
+ * @property {boolean} supportsStreaming - Supports streaming responses
20
+ */
21
+
22
+ /**
23
+ * @typedef {Object} ModelConfig
24
+ * @property {number} [maxTokens] - Max completion tokens
25
+ * @property {number} [temperature] - Sampling temperature
26
+ * @property {string} [reasoningEffort] - For reasoning models: low/medium/high
27
+ * @property {number} [topP] - Top-p sampling
28
+ */
29
+
30
+ /**
31
+ * @typedef {'working' | 'degraded' | 'broken' | 'untested'} BenchmarkStatus
32
+ */
33
+
34
+ /**
35
+ * @typedef {Object} ModelInfo
36
+ * @property {string} family - Model family key
37
+ * @property {string} provider - Provider name
38
+ * @property {ModelCapabilities} capabilities
39
+ * @property {ModelConfig} defaults - Default config for this model
40
+ * @property {BenchmarkStatus} benchmarkStatus - Status from benchmark testing
41
+ * @property {string} [benchmarkNotes] - Notes about benchmark performance
42
+ */
43
+
44
+ // ── Known model registry ────────────────────────────────────────────
45
+
46
+ /** @type {Record<string, ModelInfo>} */
47
+ const MODEL_REGISTRY = {
48
+ // ── Anthropic ──
49
+ 'claude-opus-4-6': {
50
+ family: 'claude-opus',
51
+ provider: 'anthropic',
52
+ capabilities: {
53
+ supportsTools: true,
54
+ supportsSystemPrompt: true,
55
+ supportsReasoning: false,
56
+ supportsThinking: true,
57
+ maxContextWindow: 200000,
58
+ supportsStreaming: true,
59
+ },
60
+ defaults: { maxTokens: 32768, temperature: 0.2 },
61
+ benchmarkStatus: 'working',
62
+ benchmarkNotes: 'Top performer across all scenarios. Reliable tool use.',
63
+ },
64
+ 'claude-sonnet-4-6': {
65
+ family: 'claude-sonnet',
66
+ provider: 'anthropic',
67
+ capabilities: {
68
+ supportsTools: true,
69
+ supportsSystemPrompt: true,
70
+ supportsReasoning: false,
71
+ supportsThinking: true,
72
+ maxContextWindow: 200000,
73
+ supportsStreaming: true,
74
+ },
75
+ defaults: { maxTokens: 32768, temperature: 0.2 },
76
+ benchmarkStatus: 'working',
77
+ benchmarkNotes: 'Strong performance, good cost/quality balance.',
78
+ },
79
+ 'claude-sonnet-4-20250514': {
80
+ family: 'claude-sonnet',
81
+ provider: 'anthropic',
82
+ capabilities: {
83
+ supportsTools: true,
84
+ supportsSystemPrompt: true,
85
+ supportsReasoning: false,
86
+ supportsThinking: true,
87
+ maxContextWindow: 200000,
88
+ supportsStreaming: true,
89
+ },
90
+ defaults: { maxTokens: 32768, temperature: 0.2 },
91
+ benchmarkStatus: 'working',
92
+ benchmarkNotes: 'Solid tool use. Slightly behind claude-sonnet-4-6.',
93
+ },
94
+ 'claude-haiku-4-5-20251001': {
95
+ family: 'claude-haiku',
96
+ provider: 'anthropic',
97
+ capabilities: {
98
+ supportsTools: true,
99
+ supportsSystemPrompt: true,
100
+ supportsReasoning: false,
101
+ supportsThinking: true,
102
+ maxContextWindow: 200000,
103
+ supportsStreaming: true,
104
+ },
105
+ defaults: { maxTokens: 16384, temperature: 0.2 },
106
+ benchmarkStatus: 'working',
107
+ benchmarkNotes: 'Fast and cheap. Struggles with multi-step reasoning.',
108
+ },
109
+
110
+ // ── OpenAI: GPT ──
111
+ 'gpt-4o': {
112
+ family: 'gpt-4o',
113
+ provider: 'openai',
114
+ capabilities: {
115
+ supportsTools: true,
116
+ supportsSystemPrompt: true,
117
+ supportsReasoning: false,
118
+ supportsThinking: true,
119
+ maxContextWindow: 128000,
120
+ supportsStreaming: true,
121
+ },
122
+ defaults: { maxTokens: 32768, temperature: 0.2 },
123
+ benchmarkStatus: 'working',
124
+ benchmarkNotes: 'Reliable tool use. Good all-around performer.',
125
+ },
126
+ 'gpt-4o-mini': {
127
+ family: 'gpt-4o-mini',
128
+ provider: 'openai',
129
+ capabilities: {
130
+ supportsTools: true,
131
+ supportsSystemPrompt: true,
132
+ supportsReasoning: false,
133
+ supportsThinking: true,
134
+ maxContextWindow: 128000,
135
+ supportsStreaming: true,
136
+ },
137
+ defaults: { maxTokens: 32768, temperature: 0.2 },
138
+ benchmarkStatus: 'working',
139
+ benchmarkNotes: 'Fast and cheap. Acceptable for simple scenarios.',
140
+ },
141
+ 'gpt-4.1': {
142
+ family: 'gpt-4.1',
143
+ provider: 'openai',
144
+ capabilities: {
145
+ supportsTools: true,
146
+ supportsSystemPrompt: true,
147
+ supportsReasoning: false,
148
+ supportsThinking: true,
149
+ maxContextWindow: 1047576,
150
+ supportsStreaming: true,
151
+ },
152
+ defaults: { maxTokens: 65536, temperature: 0.2 },
153
+ benchmarkStatus: 'working',
154
+ benchmarkNotes: 'Large context window. Strong at complex scenarios.',
155
+ },
156
+
157
+ 'gpt-5.1': {
158
+ family: 'gpt-5.1',
159
+ provider: 'openai',
160
+ capabilities: {
161
+ supportsTools: true,
162
+ supportsSystemPrompt: true,
163
+ supportsReasoning: false,
164
+ maxContextWindow: 1047576,
165
+ supportsStreaming: true,
166
+ },
167
+ defaults: { maxTokens: 32768 },
168
+ benchmarkStatus: 'untested',
169
+ },
170
+
171
+ // ── OpenAI: Reasoning ──
172
+ 'o1': {
173
+ family: 'o1',
174
+ provider: 'openai',
175
+ capabilities: {
176
+ supportsTools: true,
177
+ supportsSystemPrompt: false,
178
+ supportsReasoning: true,
179
+ supportsThinking: true,
180
+ maxContextWindow: 200000,
181
+ supportsStreaming: false,
182
+ },
183
+ defaults: { maxTokens: 65536, reasoningEffort: 'medium' },
184
+ benchmarkStatus: 'degraded',
185
+ benchmarkNotes: 'No system prompt support. Tool calling works but slow.',
186
+ },
187
+ 'o1-mini': {
188
+ family: 'o1-mini',
189
+ provider: 'openai',
190
+ capabilities: {
191
+ supportsTools: true,
192
+ supportsSystemPrompt: false,
193
+ supportsReasoning: true,
194
+ supportsThinking: true,
195
+ maxContextWindow: 128000,
196
+ supportsStreaming: false,
197
+ },
198
+ defaults: { maxTokens: 32768, reasoningEffort: 'medium' },
199
+ benchmarkStatus: 'degraded',
200
+ benchmarkNotes: 'No system prompt support. Cheaper but less reliable.',
201
+ },
202
+ 'o1-preview': {
203
+ family: 'o1',
204
+ provider: 'openai',
205
+ capabilities: {
206
+ supportsTools: false,
207
+ supportsSystemPrompt: false,
208
+ supportsReasoning: true,
209
+ supportsThinking: true,
210
+ maxContextWindow: 128000,
211
+ supportsStreaming: false,
212
+ },
213
+ defaults: { maxTokens: 65536, reasoningEffort: 'medium' },
214
+ benchmarkStatus: 'broken',
215
+ benchmarkNotes: 'No tool calling support. Cannot complete agentic tasks.',
216
+ },
217
+ 'o3-mini': {
218
+ family: 'o3-mini',
219
+ provider: 'openai',
220
+ capabilities: {
221
+ supportsTools: true,
222
+ supportsSystemPrompt: false,
223
+ supportsReasoning: true,
224
+ supportsThinking: true,
225
+ maxContextWindow: 200000,
226
+ supportsStreaming: false,
227
+ },
228
+ defaults: { maxTokens: 32768, reasoningEffort: 'medium' },
229
+ benchmarkStatus: 'working',
230
+ benchmarkNotes: 'Good reasoning, fast. No system prompt — task in user message.',
231
+ },
232
+ 'o4-mini': {
233
+ family: 'o4-mini',
234
+ provider: 'openai',
235
+ capabilities: {
236
+ supportsTools: true,
237
+ supportsSystemPrompt: false,
238
+ supportsReasoning: true,
239
+ supportsThinking: true,
240
+ maxContextWindow: 200000,
241
+ supportsStreaming: false,
242
+ },
243
+ defaults: { maxTokens: 32768, reasoningEffort: 'medium' },
244
+ benchmarkStatus: 'untested',
245
+ },
246
+
247
+ // ── Gemini ──
248
+ 'gemini-2.0-flash': {
249
+ family: 'gemini-flash',
250
+ provider: 'gemini',
251
+ capabilities: {
252
+ supportsTools: true,
253
+ supportsSystemPrompt: true,
254
+ supportsReasoning: false,
255
+ supportsThinking: true,
256
+ maxContextWindow: 1048576,
257
+ supportsStreaming: true,
258
+ },
259
+ defaults: { maxTokens: 16384, temperature: 0.2 },
260
+ benchmarkStatus: 'untested',
261
+ },
262
+ 'gemini-2.5-pro': {
263
+ family: 'gemini-pro',
264
+ provider: 'gemini',
265
+ capabilities: {
266
+ supportsTools: true,
267
+ supportsSystemPrompt: true,
268
+ supportsReasoning: true,
269
+ supportsThinking: true,
270
+ maxContextWindow: 1048576,
271
+ supportsStreaming: true,
272
+ },
273
+ defaults: { maxTokens: 32768, temperature: 0.2 },
274
+ benchmarkStatus: 'untested',
275
+ },
276
+ 'gemini-2.5-flash': {
277
+ family: 'gemini-flash',
278
+ provider: 'gemini',
279
+ capabilities: {
280
+ supportsTools: true,
281
+ supportsSystemPrompt: true,
282
+ supportsReasoning: true,
283
+ supportsThinking: true,
284
+ maxContextWindow: 1048576,
285
+ supportsStreaming: true,
286
+ },
287
+ defaults: { maxTokens: 16384, temperature: 0.2 },
288
+ benchmarkStatus: 'untested',
289
+ },
290
+
291
+ // ── Gemini 3.x ──
292
+ 'gemini-3.0-pro': {
293
+ family: 'gemini-pro',
294
+ provider: 'gemini',
295
+ capabilities: {
296
+ supportsTools: true,
297
+ supportsSystemPrompt: true,
298
+ supportsReasoning: true,
299
+ supportsThinking: true,
300
+ maxContextWindow: 2097152,
301
+ supportsStreaming: true,
302
+ },
303
+ defaults: { maxTokens: 65536, temperature: 0.2 },
304
+ benchmarkStatus: 'untested',
305
+ },
306
+ 'gemini-3.0-flash': {
307
+ family: 'gemini-flash',
308
+ provider: 'gemini',
309
+ capabilities: {
310
+ supportsTools: true,
311
+ supportsSystemPrompt: true,
312
+ supportsReasoning: true,
313
+ supportsThinking: true,
314
+ maxContextWindow: 2097152,
315
+ supportsStreaming: true,
316
+ },
317
+ defaults: { maxTokens: 32768, temperature: 0.2 },
318
+ benchmarkStatus: 'untested',
319
+ },
320
+ };
321
+
322
+ // ── Family defaults ─────────────────────────────────────────────────
323
+
324
+ /** @type {Record<string, ModelConfig>} */
325
+ const FAMILY_DEFAULTS = {
326
+ 'claude-opus': { maxTokens: 32768, temperature: 0.2 },
327
+ 'claude-sonnet': { maxTokens: 32768, temperature: 0.2 },
328
+ 'claude-haiku': { maxTokens: 16384, temperature: 0.2 },
329
+ 'gpt-4o': { maxTokens: 32768, temperature: 0.2 },
330
+ 'gpt-4o-mini': { maxTokens: 32768, temperature: 0.2 },
331
+ 'gpt-4.1': { maxTokens: 65536, temperature: 0.2 },
332
+ 'gpt-5.1': { maxTokens: 32768 },
333
+ 'o1': { maxTokens: 65536, reasoningEffort: 'medium' },
334
+ 'o1-mini': { maxTokens: 32768, reasoningEffort: 'medium' },
335
+ 'o3-mini': { maxTokens: 32768, reasoningEffort: 'medium' },
336
+ 'o4-mini': { maxTokens: 32768, reasoningEffort: 'medium' },
337
+ 'gemini-flash': { maxTokens: 16384, temperature: 0.2 },
338
+ 'gemini-pro': { maxTokens: 32768, temperature: 0.2 },
339
+ };
340
+
341
+ /** @type {ModelConfig} */
342
+ const GLOBAL_DEFAULTS = {
343
+ maxTokens: 32768,
344
+ temperature: 0.2,
345
+ };
346
+
347
+ // ── Lookup functions ────────────────────────────────────────────────
348
+
349
+ /**
350
+ * Get the model info from the registry.
351
+ * Returns null for unknown models.
352
+ * @param {string} model
353
+ * @returns {ModelInfo | null}
354
+ */
355
+ export function getModelInfo(model) {
356
+ return MODEL_REGISTRY[model] ?? null;
357
+ }
358
+
359
+ /**
360
+ * Get the capabilities of a model.
361
+ * Returns sensible defaults for unknown models.
362
+ * @param {string} model
363
+ * @returns {ModelCapabilities}
364
+ */
365
+ export function getModelCapabilities(model) {
366
+ const info = MODEL_REGISTRY[model];
367
+ if (info) return info.capabilities;
368
+
369
+ // Sensible defaults for unknown models — assume thinking is supported
370
+ return {
371
+ supportsTools: true,
372
+ supportsSystemPrompt: true,
373
+ supportsReasoning: false,
374
+ supportsThinking: true,
375
+ maxContextWindow: 128000,
376
+ supportsStreaming: true,
377
+ };
378
+ }
379
+
380
+ /**
381
+ * Detect the model family from the model name.
382
+ * Tries exact registry lookup first, then prefix matching.
383
+ * @param {string} model
384
+ * @returns {string | null}
385
+ */
386
+ export function detectModelFamily(model) {
387
+ const normalized = String(model ?? '').toLowerCase();
388
+ const info = MODEL_REGISTRY[normalized];
389
+ if (info) return info.family;
390
+
391
+ // Prefix-based heuristic for unregistered models
392
+ if (normalized.startsWith('claude-opus') || normalized.startsWith('opus-')) return 'claude-opus';
393
+ if (normalized.startsWith('claude-sonnet') || normalized.startsWith('sonnet-')) return 'claude-sonnet';
394
+ if (normalized.startsWith('claude-haiku') || normalized.startsWith('haiku-')) return 'claude-haiku';
395
+ if (normalized.startsWith('gpt-4o-mini')) return 'gpt-4o-mini';
396
+ if (normalized.startsWith('gpt-4o')) return 'gpt-4o';
397
+ if (normalized.startsWith('gpt-4.1')) return 'gpt-4.1';
398
+ if (normalized.startsWith('gpt-5')) return 'gpt-5.1';
399
+ if (normalized.startsWith('gpt-4')) return 'gpt-4o'; // assume 4o-class
400
+ if (normalized.startsWith('o1-mini')) return 'o1-mini';
401
+ if (normalized.startsWith('o1')) return 'o1';
402
+ if (normalized.startsWith('o3-mini')) return 'o3-mini';
403
+ if (normalized.startsWith('o4-mini')) return 'o4-mini';
404
+ if (normalized.startsWith('gemini') && normalized.includes('pro')) return 'gemini-pro';
405
+ if (normalized.startsWith('gemini') && normalized.includes('flash')) return 'gemini-flash';
406
+
407
+ return null;
408
+ }
409
+
410
+ // ── Config merge ────────────────────────────────────────────────────
411
+
412
+ /**
413
+ * Parse env var overrides for model config.
414
+ * Only returns fields that are explicitly set.
415
+ * @returns {Partial<ModelConfig>}
416
+ */
417
+ function getEnvOverrides() {
418
+ /** @type {Partial<ModelConfig>} */
419
+ const overrides = {};
420
+
421
+ const maxTokens = process.env['ARCHAL_MAX_TOKENS'];
422
+ if (maxTokens !== undefined && maxTokens !== '') {
423
+ const parsed = parseInt(maxTokens, 10);
424
+ if (!Number.isNaN(parsed) && parsed > 0) {
425
+ overrides.maxTokens = parsed;
426
+ }
427
+ }
428
+
429
+ const temperature = process.env['ARCHAL_TEMPERATURE'];
430
+ if (temperature !== undefined && temperature !== '') {
431
+ const parsed = parseFloat(temperature);
432
+ if (!Number.isNaN(parsed) && parsed >= 0 && parsed <= 2) {
433
+ overrides.temperature = parsed;
434
+ }
435
+ }
436
+
437
+ const reasoning = process.env['ARCHAL_REASONING_EFFORT'];
438
+ if (reasoning !== undefined && reasoning !== '') {
439
+ if (['low', 'medium', 'high'].includes(reasoning.toLowerCase())) {
440
+ overrides.reasoningEffort = reasoning.toLowerCase();
441
+ }
442
+ }
443
+
444
+ return overrides;
445
+ }
446
+
447
+ /**
448
+ * Get the merged configuration for a model.
449
+ * Priority: env var overrides > model-specific defaults > family defaults > global defaults.
450
+ *
451
+ * @param {string} model - Model identifier
452
+ * @returns {ModelConfig}
453
+ */
454
+ export function getModelConfig(model) {
455
+ const family = detectModelFamily(model);
456
+ const familyDefaults = family ? (FAMILY_DEFAULTS[family] ?? {}) : {};
457
+ const modelDefaults = MODEL_REGISTRY[model]?.defaults ?? {};
458
+ const envOverrides = getEnvOverrides();
459
+
460
+ return {
461
+ ...GLOBAL_DEFAULTS,
462
+ ...familyDefaults,
463
+ ...modelDefaults,
464
+ ...envOverrides,
465
+ };
466
+ }
467
+
468
+ /**
469
+ * Check if a model is a reasoning model (o1, o3, o4 series).
470
+ * Reasoning models don't support temperature and use reasoning_effort instead.
471
+ * @param {string} model
472
+ * @returns {boolean}
473
+ */
474
+ export function isReasoningModel(model) {
475
+ const info = MODEL_REGISTRY[model];
476
+ if (info) return info.capabilities.supportsReasoning;
477
+ // Fallback heuristic
478
+ return /^o[134]-/.test(model);
479
+ }
480
+
481
+ /**
482
+ * Check if a model supports extended thinking (Anthropic thinking blocks, Gemini thinking parts).
483
+ * @param {string} model
484
+ * @returns {boolean}
485
+ */
486
+ export function isThinkingModel(model) {
487
+ const normalized = String(model ?? '').toLowerCase();
488
+ const info = MODEL_REGISTRY[normalized];
489
+ if (info) return info.capabilities.supportsThinking;
490
+ // Heuristic for unregistered models — most modern models support thinking
491
+ if (
492
+ normalized.startsWith('claude-')
493
+ || normalized.startsWith('sonnet-')
494
+ || normalized.startsWith('haiku-')
495
+ || normalized.startsWith('opus-')
496
+ ) return true;
497
+ if (normalized.startsWith('gemini-2.5') || normalized.startsWith('gemini-3')) return true;
498
+ if (normalized.startsWith('gpt-') || /^o[134]/.test(normalized)) return true;
499
+ return true; // default to true for unknown models
500
+ }
501
+
502
+ /**
503
+ * Get all known model names.
504
+ * @returns {string[]}
505
+ */
506
+ export function listKnownModels() {
507
+ return Object.keys(MODEL_REGISTRY);
508
+ }
509
+
510
+ /**
511
+ * Get all known models grouped by benchmark status.
512
+ * @returns {Record<BenchmarkStatus, string[]>}
513
+ */
514
+ export function listModelsByStatus() {
515
+ /** @type {Record<string, string[]>} */
516
+ const grouped = { working: [], degraded: [], broken: [], untested: [] };
517
+ for (const [name, info] of Object.entries(MODEL_REGISTRY)) {
518
+ grouped[info.benchmarkStatus].push(name);
519
+ }
520
+ return grouped;
521
+ }
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Shared provider detection and LLM calling for bundled harnesses.
3
+ *
4
+ * This is a thin re-export facade. Implementation lives in:
5
+ * - llm-config.mjs — provider detection, API key/base URL, timeout, thinking budget
6
+ * - llm-call.mjs — callLlm, callLlmWithMessages, LlmApiError, withRetry
7
+ * - llm-response.mjs — response parsing, message building, tool formatting
8
+ */
9
+
10
+ // ── Config ──────────────────────────────────────────────────────────
11
+ export {
12
+ detectProvider,
13
+ resolveApiKey,
14
+ resolveBaseUrl,
15
+ isThinkingEnabled,
16
+ } from './llm-config.mjs';
17
+
18
+ // ── Calling ─────────────────────────────────────────────────────────
19
+ export {
20
+ callLlm,
21
+ callLlmWithMessages,
22
+ LlmApiError,
23
+ withRetry,
24
+ } from './llm-call.mjs';
25
+
26
+ // ── Response parsing & message building ─────────────────────────────
27
+ export {
28
+ extractTokenUsage,
29
+ formatToolsForProvider,
30
+ parseToolCalls,
31
+ getResponseText,
32
+ getThinkingContent,
33
+ getStopReason,
34
+ buildInitialMessages,
35
+ appendAssistantResponse,
36
+ appendToolResults,
37
+ appendUserInstruction,
38
+ extractCallArgs,
39
+ } from './llm-response.mjs';