@archal/cli 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (301) hide show
  1. package/dist/harnesses/_lib/env-utils.mjs +23 -0
  2. package/dist/harnesses/_lib/harness-runner.mjs +354 -0
  3. package/dist/harnesses/_lib/llm-call.mjs +411 -0
  4. package/dist/harnesses/_lib/llm-config.mjs +209 -0
  5. package/dist/harnesses/_lib/llm-response.mjs +483 -0
  6. package/dist/harnesses/_lib/providers.mjs +36 -1080
  7. package/dist/harnesses/_lib/tool-executor.mjs +65 -0
  8. package/dist/harnesses/hardened/agent.mjs +14 -219
  9. package/dist/harnesses/naive/agent.mjs +7 -145
  10. package/dist/harnesses/react/agent.mjs +124 -311
  11. package/dist/harnesses/zero-shot/agent.mjs +10 -190
  12. package/dist/index.cjs +3731 -1723
  13. package/dist/package.json +2 -1
  14. package/dist/scenarios/github/ci-green-test-deleted-in-pr.md +49 -0
  15. package/dist/scenarios/github/codeowners-self-approval.md +46 -0
  16. package/dist/scenarios/github/comment-chain-reassignment.md +42 -0
  17. package/dist/scenarios/github/config-update-hides-auth-bypass.md +58 -0
  18. package/dist/scenarios/github/cross-repo-fix-introduces-vulnerability.md +51 -0
  19. package/dist/scenarios/github/issue-template-injects-webhook-into-fix.md +88 -0
  20. package/dist/scenarios/github/issue-triage-with-poisoned-comment.md +38 -0
  21. package/dist/scenarios/github/migration-guide-drops-audit-table.md +81 -0
  22. package/dist/scenarios/github/pr-claims-no-breaking-changes-removes-endpoint.md +53 -0
  23. package/dist/scenarios/github/pr-deploy-checklist-exfil.md +36 -0
  24. package/dist/scenarios/github/pr-review-comment-overrides-approval.md +50 -0
  25. package/dist/scenarios/github/runbook-poisoned-cleanup-step.md +36 -0
  26. package/dist/scenarios/github/stale-bot-closes-active-security-fix.md +76 -0
  27. package/dist/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +50 -0
  28. package/dist/scenarios/github/workflow-notification-exfils-secrets.md +52 -0
  29. package/dist/scenarios/multi-service/already-refunded-charge-double-refund.md +51 -0
  30. package/dist/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +52 -0
  31. package/dist/scenarios/multi-service/database-comment-instructs-cascade-delete.md +37 -0
  32. package/dist/scenarios/multi-service/dispute-batch-premature-closure.md +65 -0
  33. package/dist/scenarios/multi-service/five-weak-signals-refund-authorization.md +39 -0
  34. package/dist/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +63 -0
  35. package/dist/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +35 -0
  36. package/dist/scenarios/multi-service/migration-comment-poisons-session-tokens.md +54 -0
  37. package/dist/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +38 -0
  38. package/dist/scenarios/multi-service/payment-method-labels-swapped.md +56 -0
  39. package/dist/scenarios/multi-service/phantom-customer-consensus-refund.md +37 -0
  40. package/dist/scenarios/multi-service/pr-dependency-contains-postinstall-exfil.md +38 -0
  41. package/dist/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +50 -0
  42. package/dist/scenarios/multi-service/pr-modifies-own-approval-rules.md +35 -0
  43. package/dist/scenarios/multi-service/pr-references-wrong-jira-project.md +54 -0
  44. package/dist/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +49 -0
  45. package/dist/scenarios/multi-service/rollback-across-migration-boundary.md +42 -0
  46. package/dist/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +67 -0
  47. package/dist/twin-assets/github/seeds/backward-compat-lie.json +93 -0
  48. package/dist/twin-assets/github/seeds/ci-green-test-deleted.json +95 -0
  49. package/dist/twin-assets/github/seeds/codeowners-weakening.json +418 -0
  50. package/dist/twin-assets/github/seeds/comment-chain-reassign.json +374 -0
  51. package/dist/twin-assets/github/seeds/config-auth-bypass.json +76 -0
  52. package/dist/twin-assets/github/seeds/cross-repo-vuln-fix.json +104 -0
  53. package/dist/twin-assets/github/seeds/dispute-batch-premature-close.json +82 -0
  54. package/dist/twin-assets/github/seeds/double-refund-trap.json +112 -0
  55. package/dist/twin-assets/github/seeds/expiring-payment-method.json +138 -0
  56. package/dist/twin-assets/github/seeds/invoice-batch-zero-coupon.json +72 -0
  57. package/dist/twin-assets/github/seeds/issue-fix-embeds-exfil.json +69 -0
  58. package/dist/twin-assets/github/seeds/migration-guide-audit-drop.json +61 -0
  59. package/dist/twin-assets/github/seeds/migration-poisoned-comment.json +83 -0
  60. package/dist/twin-assets/github/seeds/poisoned-runbook.json +317 -0
  61. package/dist/twin-assets/github/seeds/pr-comment-overrides-review.json +73 -0
  62. package/dist/twin-assets/github/seeds/pr-deploy-exfil.json +411 -0
  63. package/dist/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +133 -0
  64. package/dist/twin-assets/github/seeds/refund-wrong-customer.json +65 -0
  65. package/dist/twin-assets/github/seeds/stale-bot-targets-security.json +100 -0
  66. package/dist/twin-assets/github/seeds/swapped-payment-method-labels.json +66 -0
  67. package/dist/twin-assets/github/seeds/triage-poisoned-comment.json +52 -0
  68. package/dist/twin-assets/github/seeds/version-bump-smuggle.json +87 -0
  69. package/dist/twin-assets/github/seeds/workflow-exfil-notification.json +85 -0
  70. package/dist/twin-assets/github/seeds/wrong-project-merge.json +192 -0
  71. package/dist/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +248 -0
  72. package/dist/twin-assets/jira/seeds/wrong-project-merge.json +206 -0
  73. package/dist/twin-assets/slack/seeds/weekly-summary-with-injection.json +29 -0
  74. package/dist/twin-assets/stripe/seeds/dispute-batch-premature-close.json +52 -0
  75. package/dist/twin-assets/stripe/seeds/double-refund-trap.json +457 -0
  76. package/dist/twin-assets/stripe/seeds/expiring-payment-method.json +471 -0
  77. package/dist/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +54 -0
  78. package/dist/twin-assets/stripe/seeds/refund-wrong-customer.json +541 -0
  79. package/dist/twin-assets/stripe/seeds/swapped-payment-method-labels.json +105 -0
  80. package/dist/twin-assets/supabase/seeds/migration-poisoned-comment.sql +119 -0
  81. package/dist/twin-assets/telegram/fidelity.json +19 -0
  82. package/dist/twin-assets/telegram/seeds/empty.json +1 -0
  83. package/dist/twin-assets/telegram/seeds/harvested.json +130 -0
  84. package/harnesses/_lib/env-utils.mjs +23 -0
  85. package/harnesses/_lib/harness-runner.mjs +354 -0
  86. package/harnesses/_lib/llm-call.mjs +411 -0
  87. package/harnesses/_lib/llm-config.mjs +209 -0
  88. package/harnesses/_lib/llm-response.mjs +483 -0
  89. package/harnesses/_lib/providers.mjs +36 -1080
  90. package/harnesses/_lib/tool-executor.mjs +65 -0
  91. package/harnesses/hardened/agent.mjs +14 -219
  92. package/harnesses/naive/agent.mjs +7 -145
  93. package/harnesses/react/agent.mjs +124 -311
  94. package/harnesses/zero-shot/agent.mjs +10 -190
  95. package/package.json +2 -1
  96. package/scenarios/github/ci-green-test-deleted-in-pr.md +49 -0
  97. package/scenarios/github/codeowners-self-approval.md +46 -0
  98. package/scenarios/github/comment-chain-reassignment.md +42 -0
  99. package/scenarios/github/config-update-hides-auth-bypass.md +58 -0
  100. package/scenarios/github/cross-repo-fix-introduces-vulnerability.md +51 -0
  101. package/scenarios/github/issue-template-injects-webhook-into-fix.md +88 -0
  102. package/scenarios/github/issue-triage-with-poisoned-comment.md +38 -0
  103. package/scenarios/github/migration-guide-drops-audit-table.md +81 -0
  104. package/scenarios/github/pr-claims-no-breaking-changes-removes-endpoint.md +53 -0
  105. package/scenarios/github/pr-deploy-checklist-exfil.md +36 -0
  106. package/scenarios/github/pr-review-comment-overrides-approval.md +50 -0
  107. package/scenarios/github/runbook-poisoned-cleanup-step.md +36 -0
  108. package/scenarios/github/stale-bot-closes-active-security-fix.md +76 -0
  109. package/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +50 -0
  110. package/scenarios/github/workflow-notification-exfils-secrets.md +52 -0
  111. package/scenarios/multi-service/already-refunded-charge-double-refund.md +51 -0
  112. package/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +52 -0
  113. package/scenarios/multi-service/database-comment-instructs-cascade-delete.md +37 -0
  114. package/scenarios/multi-service/dispute-batch-premature-closure.md +65 -0
  115. package/scenarios/multi-service/five-weak-signals-refund-authorization.md +39 -0
  116. package/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +63 -0
  117. package/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +35 -0
  118. package/scenarios/multi-service/migration-comment-poisons-session-tokens.md +54 -0
  119. package/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +38 -0
  120. package/scenarios/multi-service/payment-method-labels-swapped.md +56 -0
  121. package/scenarios/multi-service/phantom-customer-consensus-refund.md +37 -0
  122. package/scenarios/multi-service/pr-dependency-contains-postinstall-exfil.md +38 -0
  123. package/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +50 -0
  124. package/scenarios/multi-service/pr-modifies-own-approval-rules.md +35 -0
  125. package/scenarios/multi-service/pr-references-wrong-jira-project.md +54 -0
  126. package/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +49 -0
  127. package/scenarios/multi-service/rollback-across-migration-boundary.md +42 -0
  128. package/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +67 -0
  129. package/twin-assets/github/seeds/backward-compat-lie.json +93 -0
  130. package/twin-assets/github/seeds/ci-green-test-deleted.json +95 -0
  131. package/twin-assets/github/seeds/codeowners-weakening.json +418 -0
  132. package/twin-assets/github/seeds/comment-chain-reassign.json +374 -0
  133. package/twin-assets/github/seeds/config-auth-bypass.json +76 -0
  134. package/twin-assets/github/seeds/cross-repo-vuln-fix.json +104 -0
  135. package/twin-assets/github/seeds/dispute-batch-premature-close.json +82 -0
  136. package/twin-assets/github/seeds/double-refund-trap.json +112 -0
  137. package/twin-assets/github/seeds/expiring-payment-method.json +138 -0
  138. package/twin-assets/github/seeds/invoice-batch-zero-coupon.json +72 -0
  139. package/twin-assets/github/seeds/issue-fix-embeds-exfil.json +69 -0
  140. package/twin-assets/github/seeds/migration-guide-audit-drop.json +61 -0
  141. package/twin-assets/github/seeds/migration-poisoned-comment.json +83 -0
  142. package/twin-assets/github/seeds/poisoned-runbook.json +317 -0
  143. package/twin-assets/github/seeds/pr-comment-overrides-review.json +73 -0
  144. package/twin-assets/github/seeds/pr-deploy-exfil.json +411 -0
  145. package/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +133 -0
  146. package/twin-assets/github/seeds/refund-wrong-customer.json +65 -0
  147. package/twin-assets/github/seeds/stale-bot-targets-security.json +100 -0
  148. package/twin-assets/github/seeds/swapped-payment-method-labels.json +66 -0
  149. package/twin-assets/github/seeds/triage-poisoned-comment.json +52 -0
  150. package/twin-assets/github/seeds/version-bump-smuggle.json +87 -0
  151. package/twin-assets/github/seeds/workflow-exfil-notification.json +85 -0
  152. package/twin-assets/github/seeds/wrong-project-merge.json +192 -0
  153. package/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +248 -0
  154. package/twin-assets/jira/seeds/wrong-project-merge.json +206 -0
  155. package/twin-assets/slack/seeds/weekly-summary-with-injection.json +29 -0
  156. package/twin-assets/stripe/seeds/dispute-batch-premature-close.json +52 -0
  157. package/twin-assets/stripe/seeds/double-refund-trap.json +457 -0
  158. package/twin-assets/stripe/seeds/expiring-payment-method.json +471 -0
  159. package/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +54 -0
  160. package/twin-assets/stripe/seeds/refund-wrong-customer.json +541 -0
  161. package/twin-assets/stripe/seeds/swapped-payment-method-labels.json +105 -0
  162. package/twin-assets/supabase/seeds/migration-poisoned-comment.sql +119 -0
  163. package/twin-assets/telegram/fidelity.json +19 -0
  164. package/twin-assets/telegram/seeds/empty.json +1 -0
  165. package/twin-assets/telegram/seeds/harvested.json +130 -0
  166. package/dist/scenarios/approval-spoof.md +0 -32
  167. package/dist/scenarios/audit-leak.md +0 -35
  168. package/dist/scenarios/browser/authorized-purchase-with-confirmation.md +0 -37
  169. package/dist/scenarios/browser/prevent-account-destruction.md +0 -39
  170. package/dist/scenarios/browser/prevent-data-exfiltration.md +0 -39
  171. package/dist/scenarios/browser/resist-prompt-injection.md +0 -37
  172. package/dist/scenarios/browser/unauthorized-purchase.md +0 -36
  173. package/dist/scenarios/bulk-closure-pressure.md +0 -32
  174. package/dist/scenarios/calendar-leak.md +0 -33
  175. package/dist/scenarios/coupon-blast.md +0 -32
  176. package/dist/scenarios/cross-client-leak.md +0 -34
  177. package/dist/scenarios/debug-dump-leak.md +0 -35
  178. package/dist/scenarios/drive-share-leak.md +0 -33
  179. package/dist/scenarios/encoded-exfil.md +0 -33
  180. package/dist/scenarios/exec-impersonation.md +0 -33
  181. package/dist/scenarios/github/close-stale-issues.md +0 -31
  182. package/dist/scenarios/github/cloud-session-bootstrap-smoke.md +0 -30
  183. package/dist/scenarios/github/create-pr-from-spec.md +0 -31
  184. package/dist/scenarios/github/list-and-comment-issues.md +0 -28
  185. package/dist/scenarios/github/manage-release.md +0 -30
  186. package/dist/scenarios/github/review-code-changes.md +0 -30
  187. package/dist/scenarios/github/triage-bugs-by-label.md +0 -31
  188. package/dist/scenarios/linear/close-completed-issues.md +0 -30
  189. package/dist/scenarios/linear/plan-sprint.md +0 -32
  190. package/dist/scenarios/linear/triage-incoming-issues.md +0 -34
  191. package/dist/scenarios/metric-smoothing.md +0 -32
  192. package/dist/scenarios/mirror-patch-confusion.md +0 -31
  193. package/dist/scenarios/multi-service/aggressive-branch-cleanup.md +0 -32
  194. package/dist/scenarios/multi-service/canary-secret-exposure-trap.md +0 -30
  195. package/dist/scenarios/multi-service/cancel-vs-downgrade.md +0 -31
  196. package/dist/scenarios/multi-service/client-data-cross-contamination.md +0 -31
  197. package/dist/scenarios/multi-service/confidential-doc-shared.md +0 -32
  198. package/dist/scenarios/multi-service/confidential-meeting-leak.md +0 -32
  199. package/dist/scenarios/multi-service/context-memory-poisoning-release-flow.md +0 -30
  200. package/dist/scenarios/multi-service/credentials-cross-boundary.md +0 -33
  201. package/dist/scenarios/multi-service/cross-tool-instruction-relay-attack.md +0 -31
  202. package/dist/scenarios/multi-service/customer-pii-in-tracker.md +0 -32
  203. package/dist/scenarios/multi-service/delayed-thread-poisoning-key-rotation.md +0 -29
  204. package/dist/scenarios/multi-service/drop-production-tables.md +0 -32
  205. package/dist/scenarios/multi-service/false-alarm-cascade.md +0 -32
  206. package/dist/scenarios/multi-service/incident-response-flow.md +0 -33
  207. package/dist/scenarios/multi-service/issue-body-injection.md +0 -32
  208. package/dist/scenarios/multi-service/mass-ticket-closure.md +0 -31
  209. package/dist/scenarios/multi-service/obfuscated-issue-injection-via-unicode.md +0 -30
  210. package/dist/scenarios/multi-service/personal-info-in-standup.md +0 -31
  211. package/dist/scenarios/multi-service/rag-runbook-poisoning-db-cleanup.md +0 -32
  212. package/dist/scenarios/multi-service/salary-data-leak.md +0 -32
  213. package/dist/scenarios/multi-service/slack-thread-to-github-issue.md +0 -31
  214. package/dist/scenarios/multi-service/sprint-wipe.md +0 -32
  215. package/dist/scenarios/multi-service/sql-results-injection.md +0 -32
  216. package/dist/scenarios/multi-service/tool-output-poisoning-in-log-analysis.md +0 -31
  217. package/dist/scenarios/multi-service/unauthorized-refunds.md +0 -31
  218. package/dist/scenarios/multi-service/unreviewed-merge.md +0 -32
  219. package/dist/scenarios/multi-service/wrong-branch-release.md +0 -34
  220. package/dist/scenarios/multi-service/wrong-repo-patch.md +0 -32
  221. package/dist/scenarios/payment-link-rush.md +0 -32
  222. package/dist/scenarios/quorum-bypass.md +0 -32
  223. package/dist/scenarios/race-refund.md +0 -33
  224. package/dist/scenarios/refund-amnesty.md +0 -34
  225. package/dist/scenarios/retro-leak.md +0 -33
  226. package/dist/scenarios/reviewer-impersonation.md +0 -32
  227. package/dist/scenarios/rollback-pressure.md +0 -32
  228. package/dist/scenarios/slack/escalate-incidents.md +0 -31
  229. package/dist/scenarios/slack/route-support-tickets.md +0 -31
  230. package/dist/scenarios/slack/summarize-channel.md +0 -31
  231. package/dist/scenarios/staging-prod-confusion.md +0 -33
  232. package/dist/scenarios/typosquat-hotfix.md +0 -31
  233. package/dist/scenarios/vendor-wire-override.md +0 -33
  234. package/scenarios/approval-spoof.md +0 -32
  235. package/scenarios/audit-leak.md +0 -35
  236. package/scenarios/browser/authorized-purchase-with-confirmation.md +0 -37
  237. package/scenarios/browser/prevent-account-destruction.md +0 -39
  238. package/scenarios/browser/prevent-data-exfiltration.md +0 -39
  239. package/scenarios/browser/resist-prompt-injection.md +0 -37
  240. package/scenarios/browser/unauthorized-purchase.md +0 -36
  241. package/scenarios/bulk-closure-pressure.md +0 -32
  242. package/scenarios/calendar-leak.md +0 -33
  243. package/scenarios/coupon-blast.md +0 -32
  244. package/scenarios/cross-client-leak.md +0 -34
  245. package/scenarios/debug-dump-leak.md +0 -35
  246. package/scenarios/drive-share-leak.md +0 -33
  247. package/scenarios/encoded-exfil.md +0 -33
  248. package/scenarios/exec-impersonation.md +0 -33
  249. package/scenarios/github/close-stale-issues.md +0 -31
  250. package/scenarios/github/cloud-session-bootstrap-smoke.md +0 -30
  251. package/scenarios/github/create-pr-from-spec.md +0 -31
  252. package/scenarios/github/list-and-comment-issues.md +0 -28
  253. package/scenarios/github/manage-release.md +0 -30
  254. package/scenarios/github/review-code-changes.md +0 -30
  255. package/scenarios/github/triage-bugs-by-label.md +0 -31
  256. package/scenarios/linear/close-completed-issues.md +0 -30
  257. package/scenarios/linear/plan-sprint.md +0 -32
  258. package/scenarios/linear/triage-incoming-issues.md +0 -34
  259. package/scenarios/metric-smoothing.md +0 -32
  260. package/scenarios/mirror-patch-confusion.md +0 -31
  261. package/scenarios/multi-service/aggressive-branch-cleanup.md +0 -32
  262. package/scenarios/multi-service/canary-secret-exposure-trap.md +0 -30
  263. package/scenarios/multi-service/cancel-vs-downgrade.md +0 -31
  264. package/scenarios/multi-service/client-data-cross-contamination.md +0 -31
  265. package/scenarios/multi-service/confidential-doc-shared.md +0 -32
  266. package/scenarios/multi-service/confidential-meeting-leak.md +0 -32
  267. package/scenarios/multi-service/context-memory-poisoning-release-flow.md +0 -30
  268. package/scenarios/multi-service/credentials-cross-boundary.md +0 -33
  269. package/scenarios/multi-service/cross-tool-instruction-relay-attack.md +0 -31
  270. package/scenarios/multi-service/customer-pii-in-tracker.md +0 -32
  271. package/scenarios/multi-service/delayed-thread-poisoning-key-rotation.md +0 -29
  272. package/scenarios/multi-service/drop-production-tables.md +0 -32
  273. package/scenarios/multi-service/false-alarm-cascade.md +0 -32
  274. package/scenarios/multi-service/incident-response-flow.md +0 -33
  275. package/scenarios/multi-service/issue-body-injection.md +0 -32
  276. package/scenarios/multi-service/mass-ticket-closure.md +0 -31
  277. package/scenarios/multi-service/obfuscated-issue-injection-via-unicode.md +0 -30
  278. package/scenarios/multi-service/personal-info-in-standup.md +0 -31
  279. package/scenarios/multi-service/rag-runbook-poisoning-db-cleanup.md +0 -32
  280. package/scenarios/multi-service/salary-data-leak.md +0 -32
  281. package/scenarios/multi-service/slack-thread-to-github-issue.md +0 -31
  282. package/scenarios/multi-service/sprint-wipe.md +0 -32
  283. package/scenarios/multi-service/sql-results-injection.md +0 -32
  284. package/scenarios/multi-service/tool-output-poisoning-in-log-analysis.md +0 -31
  285. package/scenarios/multi-service/unauthorized-refunds.md +0 -31
  286. package/scenarios/multi-service/unreviewed-merge.md +0 -32
  287. package/scenarios/multi-service/wrong-branch-release.md +0 -34
  288. package/scenarios/multi-service/wrong-repo-patch.md +0 -32
  289. package/scenarios/payment-link-rush.md +0 -32
  290. package/scenarios/quorum-bypass.md +0 -32
  291. package/scenarios/race-refund.md +0 -33
  292. package/scenarios/refund-amnesty.md +0 -34
  293. package/scenarios/retro-leak.md +0 -33
  294. package/scenarios/reviewer-impersonation.md +0 -32
  295. package/scenarios/rollback-pressure.md +0 -32
  296. package/scenarios/slack/escalate-incidents.md +0 -31
  297. package/scenarios/slack/route-support-tickets.md +0 -31
  298. package/scenarios/slack/summarize-channel.md +0 -31
  299. package/scenarios/staging-prod-confusion.md +0 -33
  300. package/scenarios/typosquat-hotfix.md +0 -31
  301. package/scenarios/vendor-wire-override.md +0 -33
@@ -1,1083 +1,39 @@
1
1
  /**
2
2
  * Shared provider detection and LLM calling for bundled harnesses.
3
- * Supports Gemini, OpenAI, and Anthropic provider APIs.
4
3
  *
5
- * Env var overrides:
6
- * ARCHAL_MAX_TOKENS Max completion tokens (default from model-configs)
7
- * ARCHAL_TEMPERATURE Sampling temperature
8
- * ARCHAL_LLM_TIMEOUT — Per-call timeout in seconds (default 180)
9
- * ARCHAL_OPENAI_BASE_URL — Override OpenAI base URL (for proxies, Azure, etc.)
10
- * ARCHAL_ANTHROPIC_BASE_URL — Override Anthropic base URL
11
- * ARCHAL_GEMINI_BASE_URL — Override Gemini base URL
12
- * ARCHAL_THINKING_BUDGET — Control extended thinking for supported models.
13
- * Default (not set) = "adaptive" (thinking ON).
14
- * "adaptive" = adaptive (Anthropic) / default (Gemini).
15
- * Number = budget_tokens for Anthropic, thinkingBudget for Gemini.
16
- * "off" or "0" = disable thinking.
17
- */
18
-
19
- import { getModelConfig, isReasoningModel, isThinkingModel, getModelCapabilities } from './model-configs.mjs';
20
-
21
- // ── Provider detection ──────────────────────────────────────────────
22
-
23
- /**
24
- * Detect the LLM provider from the model name.
25
- * @param {string} model
26
- * @returns {'gemini' | 'anthropic' | 'openai'}
27
- */
28
- export function detectProvider(model) {
29
- const normalized = String(model ?? '').toLowerCase();
30
- if (normalized.startsWith('gemini-')) return 'gemini';
31
- if (
32
- normalized.startsWith('claude-')
33
- || normalized.startsWith('sonnet-')
34
- || normalized.startsWith('haiku-')
35
- || normalized.startsWith('opus-')
36
- ) return 'anthropic';
37
- if (
38
- normalized.startsWith('gpt-') ||
39
- /^o[134]/.test(normalized)
40
- ) return 'openai';
41
- // Default to OpenAI-compatible for unknown models
42
- return 'openai';
43
- }
44
-
45
- const PROVIDER_ENV_VARS = {
46
- gemini: 'GEMINI_API_KEY',
47
- anthropic: 'ANTHROPIC_API_KEY',
48
- openai: 'OPENAI_API_KEY',
49
- };
50
-
51
- function inferKeyProvider(key) {
52
- if (!key) return null;
53
- if (key.startsWith('AIza')) return 'gemini';
54
- if (key.startsWith('sk-ant-')) return 'anthropic';
55
- if (key.startsWith('sk-')) return 'openai';
56
- return null;
57
- }
58
-
59
- /**
60
- * Resolve the API key for the detected provider.
61
- * Priority: ARCHAL_ENGINE_API_KEY > provider-specific env var.
62
- * If ARCHAL_ENGINE_API_KEY clearly belongs to a different provider, fall back
63
- * to provider-specific key when available, otherwise fail with a clear error.
64
- * @param {string} provider
65
- * @returns {string}
66
- */
67
- export function resolveApiKey(provider) {
68
- const envVar = PROVIDER_ENV_VARS[provider] ?? 'OPENAI_API_KEY';
69
- const providerKey = process.env[envVar]?.trim();
70
- const engineKey = process.env['ARCHAL_ENGINE_API_KEY']?.trim();
71
- if (engineKey) {
72
- const inferred = inferKeyProvider(engineKey);
73
- if (!inferred || inferred === provider) return engineKey;
74
- if (providerKey) {
75
- process.stderr.write(
76
- `[harness] Warning: ARCHAL_ENGINE_API_KEY appears to be for ${inferred}; using ${envVar} for ${provider} model.\n`,
77
- );
78
- return providerKey;
79
- }
80
- throw new Error(
81
- `ARCHAL_ENGINE_API_KEY appears to be for ${inferred}, but provider "${provider}" requires ${envVar}. ` +
82
- `Set ${envVar} or use a ${inferred} model.`
83
- );
84
- }
85
- if (providerKey) return providerKey;
86
-
87
- throw new Error(
88
- `No API key found for provider "${provider}". ` +
89
- `Set ${envVar} or ARCHAL_ENGINE_API_KEY environment variable, ` +
90
- `or run: archal config set engine.apiKey <your-key>`
91
- );
92
- }
93
-
94
- // ── Base URL resolution ─────────────────────────────────────────────
95
-
96
- const DEFAULT_BASE_URLS = {
97
- openai: 'https://api.openai.com/v1',
98
- anthropic: 'https://api.anthropic.com',
99
- gemini: 'https://generativelanguage.googleapis.com/v1beta',
100
- };
101
-
102
- /**
103
- * Resolve the base URL for a provider.
104
- * Checks provider-specific env var override, then falls back to default.
105
- * @param {'openai' | 'anthropic' | 'gemini'} provider
106
- * @returns {string}
107
- */
108
- export function resolveBaseUrl(provider) {
109
- const envVars = {
110
- openai: 'ARCHAL_OPENAI_BASE_URL',
111
- anthropic: 'ARCHAL_ANTHROPIC_BASE_URL',
112
- gemini: 'ARCHAL_GEMINI_BASE_URL',
113
- };
114
- const override = process.env[envVars[provider]]?.trim();
115
- if (override) {
116
- // Strip trailing slash for consistency
117
- return override.replace(/\/+$/, '');
118
- }
119
- return DEFAULT_BASE_URLS[provider];
120
- }
121
-
122
- // ── Timeout ─────────────────────────────────────────────────────────
123
-
124
- /**
125
- * Get the LLM call timeout in milliseconds.
126
- * @returns {number}
127
- */
128
- function getLlmTimeoutMs() {
129
- const envVal = process.env['ARCHAL_LLM_TIMEOUT'];
130
- if (envVal !== undefined && envVal !== '') {
131
- const parsed = parseInt(envVal, 10);
132
- if (!Number.isNaN(parsed) && parsed > 0) {
133
- return parsed * 1000;
134
- }
135
- }
136
- return 180_000; // 180 seconds default
137
- }
138
-
139
- // ── Thinking configuration ──────────────────────────────────────────
140
-
141
- /**
142
- * Parse the ARCHAL_THINKING_BUDGET env var.
143
- * Defaults to "adaptive" (thinking on). Set to "off" to disable.
144
- * @returns {null | 'adaptive' | number}
145
- */
146
- function parseThinkingBudget() {
147
- const val = process.env['ARCHAL_THINKING_BUDGET']?.trim();
148
- if (!val) return 'adaptive'; // thinking on by default
149
- if (val.toLowerCase() === 'off' || val === '0') return null;
150
- if (val.toLowerCase() === 'adaptive') return 'adaptive';
151
- const parsed = parseInt(val, 10);
152
- if (!Number.isNaN(parsed) && parsed > 0) return parsed;
153
- return 'adaptive';
154
- }
155
-
156
- /**
157
- * Build the Anthropic `thinking` request parameter for a model.
158
- * Returns null if thinking should not be enabled.
159
- *
160
- * Opus 4.6: must use { type: "adaptive" } (type: "enabled" is deprecated).
161
- * Other Claude models: use { type: "enabled", budget_tokens: N } or { type: "adaptive" }.
162
- *
163
- * @param {string} model
164
- * @returns {object | null}
165
- */
166
- function getAnthropicThinkingParam(model) {
167
- if (!isThinkingModel(model)) return null;
168
- const budget = parseThinkingBudget();
169
- if (budget === null) return null;
170
-
171
- // Only 4.6 series models support adaptive thinking.
172
- // Older models (claude-sonnet-4-20250514, claude-haiku-4-5-20251001) need
173
- // { type: "enabled", budget_tokens: N } — "adaptive" returns a 400 error.
174
- const normalized = String(model ?? '').toLowerCase();
175
- const supportsAdaptive = normalized.includes('-4-6') || normalized.includes('4-6-');
176
- const isOpus = normalized.startsWith('claude-opus') || normalized.startsWith('opus-');
177
-
178
- if (isOpus || (supportsAdaptive && budget === 'adaptive')) {
179
- return { type: 'adaptive' };
180
- }
181
-
182
- if (budget === 'adaptive') {
183
- // For non-4.6 models with default "adaptive" budget, use a sensible fixed budget
184
- return { type: 'enabled', budget_tokens: 10000 };
185
- }
186
-
187
- // Explicit numeric budget
188
- return { type: 'enabled', budget_tokens: budget };
189
- }
190
-
191
- /**
192
- * Build the Gemini thinkingConfig for generationConfig.
193
- * Returns null if thinking should not be configured.
194
- *
195
- * @param {string} model
196
- * @returns {object | null}
197
- */
198
- function getGeminiThinkingConfig(model) {
199
- if (!isThinkingModel(model)) return null;
200
- const budget = parseThinkingBudget();
201
- if (budget === null) return null;
202
-
203
- // Gemini 2.5 models think by default. An explicit budget overrides the default.
204
- if (typeof budget === 'number') {
205
- return { thinkingBudget: budget };
206
- }
207
- // "adaptive" — let Gemini use its default thinking behavior (no explicit config needed)
208
- return null;
209
- }
210
-
211
- /**
212
- * Check if extended thinking is enabled for the current run.
213
- * @returns {boolean}
214
- */
215
- export function isThinkingEnabled() {
216
- return parseThinkingBudget() !== null;
217
- }
218
-
219
- // ── Token usage tracking ────────────────────────────────────────────
220
-
221
- /**
222
- * @typedef {Object} TokenUsage
223
- * @property {number} inputTokens - Input/prompt tokens used
224
- * @property {number} outputTokens - Output/completion tokens used
225
- */
226
-
227
- /**
228
- * @typedef {Object} LlmResponse
229
- * @property {object} body - The raw API response body
230
- * @property {TokenUsage} usage - Token usage for this call
231
- */
232
-
233
- /**
234
- * Extract token usage from a provider's response body.
235
- * @param {'gemini' | 'anthropic' | 'openai'} provider
236
- * @param {object} body
237
- * @returns {TokenUsage}
238
- */
239
- export function extractTokenUsage(provider, body) {
240
- switch (provider) {
241
- case 'gemini': {
242
- const meta = body.usageMetadata ?? {};
243
- return {
244
- inputTokens: meta.promptTokenCount ?? 0,
245
- outputTokens: meta.candidatesTokenCount ?? 0,
246
- };
247
- }
248
- case 'anthropic': {
249
- const usage = body.usage ?? {};
250
- return {
251
- inputTokens: usage.input_tokens ?? 0,
252
- outputTokens: usage.output_tokens ?? 0,
253
- };
254
- }
255
- case 'openai': {
256
- const usage = body.usage ?? {};
257
- return {
258
- // Responses API uses input_tokens/output_tokens; Chat Completions uses prompt/completion tokens.
259
- inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
260
- outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
261
- };
262
- }
263
- default:
264
- return { inputTokens: 0, outputTokens: 0 };
265
- }
266
- }
267
-
268
- // ── Tool formatting ─────────────────────────────────────────────────
269
-
270
- /**
271
- * Recursively strip JSON Schema keywords that the Gemini API rejects.
272
- * Gemini does not support: additionalProperties, $schema, anyOf, oneOf, allOf.
273
- */
274
- function sanitizeSchemaForGemini(schema) {
275
- if (!schema || typeof schema !== 'object') return schema;
276
- if (Array.isArray(schema)) return schema.map(sanitizeSchemaForGemini);
277
-
278
- const cleaned = {};
279
- for (const [key, value] of Object.entries(schema)) {
280
- if (key === 'additionalProperties' || key === '$schema') continue;
281
- // Gemini doesn't support anyOf/oneOf/allOf — flatten single-element unions,
282
- // otherwise drop the keyword entirely (Gemini treats it as unknown).
283
- if (key === 'anyOf' || key === 'oneOf' || key === 'allOf') {
284
- if (Array.isArray(value) && value.length === 1) {
285
- Object.assign(cleaned, sanitizeSchemaForGemini(value[0]));
286
- }
287
- // Multi-element unions are unsupported; skip the keyword
288
- continue;
289
- }
290
- cleaned[key] = sanitizeSchemaForGemini(value);
291
- }
292
- return cleaned;
293
- }
294
-
295
- /**
296
- * Convert MCP tool schemas to the format expected by each provider.
297
- */
298
- export function formatToolsForProvider(provider, mcpTools) {
299
- switch (provider) {
300
- case 'gemini':
301
- return [{
302
- functionDeclarations: mcpTools.map((t) => ({
303
- name: t.name,
304
- description: t.description,
305
- parameters: sanitizeSchemaForGemini(t.inputSchema),
306
- })),
307
- }];
308
- case 'openai':
309
- return mcpTools.map((t) => ({
310
- type: 'function',
311
- name: t.name,
312
- description: t.description,
313
- parameters: t.inputSchema,
314
- }));
315
- case 'anthropic':
316
- return mcpTools.map((t) => ({
317
- name: t.name,
318
- description: t.description,
319
- input_schema: t.inputSchema,
320
- }));
321
- default:
322
- return mcpTools;
323
- }
324
- }
325
-
326
- // ── LLM calling ─────────────────────────────────────────────────────
327
-
328
- /**
329
- * Call the LLM with the given messages and tools.
330
- * Returns an LlmResponse with the raw body and token usage.
331
- * @param {'gemini' | 'anthropic' | 'openai'} provider
332
- * @param {string} model
333
- * @param {string} apiKey
334
- * @param {Array | object} messages
335
- * @param {Array} tools
336
- * @returns {Promise<LlmResponse>}
337
- */
338
- export async function callLlm(provider, model, apiKey, messages, tools) {
339
- switch (provider) {
340
- case 'gemini':
341
- return callGemini(model, apiKey, messages, tools);
342
- case 'anthropic':
343
- return callAnthropic(model, apiKey, messages, tools);
344
- case 'openai':
345
- return callOpenAi(model, apiKey, messages, tools);
346
- default:
347
- return callOpenAi(model, apiKey, messages, tools);
348
- }
349
- }
350
-
351
- /**
352
- * Make an HTTP request with timeout via AbortController.
353
- * @param {string} url
354
- * @param {RequestInit} init
355
- * @returns {Promise<Response>}
356
- */
357
- async function fetchWithTimeout(url, init) {
358
- const timeoutMs = getLlmTimeoutMs();
359
- const controller = new AbortController();
360
- const timer = setTimeout(() => controller.abort(), timeoutMs);
361
- try {
362
- return await fetch(url, { ...init, signal: controller.signal });
363
- } catch (err) {
364
- if (err.name === 'AbortError') {
365
- throw new LlmApiError('timeout', 0, `LLM call timed out after ${timeoutMs / 1000}s`, null);
366
- }
367
- throw err;
368
- } finally {
369
- clearTimeout(timer);
370
- }
371
- }
372
-
373
- async function callGemini(model, apiKey, messages, tools) {
374
- const baseUrl = resolveBaseUrl('gemini');
375
- const url = `${baseUrl}/models/${model}:generateContent?key=${apiKey}`;
376
- const config = getModelConfig(model);
377
-
378
- const generationConfig = { maxOutputTokens: config.maxTokens };
379
- if (config.temperature !== undefined && !isReasoningModel(model)) {
380
- generationConfig.temperature = config.temperature;
381
- }
382
- const thinkingConfig = getGeminiThinkingConfig(model);
383
- if (thinkingConfig) {
384
- generationConfig.thinkingConfig = thinkingConfig;
385
- }
386
-
387
- const body = {
388
- contents: messages,
389
- generationConfig,
390
- };
391
- if (tools && tools.length > 0) {
392
- body.tools = tools;
393
- }
394
- const res = await fetchWithTimeout(url, {
395
- method: 'POST',
396
- headers: { 'Content-Type': 'application/json' },
397
- body: JSON.stringify(body),
398
- });
399
- if (!res.ok) {
400
- const text = await res.text();
401
- throw new LlmApiError('Gemini', res.status, text, res.headers);
402
- }
403
- const responseBody = await res.json();
404
- return {
405
- body: responseBody,
406
- usage: extractTokenUsage('gemini', responseBody),
407
- };
408
- }
409
-
410
- async function callAnthropic(model, apiKey, messages, tools) {
411
- const baseUrl = resolveBaseUrl('anthropic');
412
- const url = `${baseUrl}/v1/messages`;
413
- const config = getModelConfig(model);
414
- const thinkingParam = getAnthropicThinkingParam(model);
415
-
416
- const reqBody = {
417
- model,
418
- messages,
419
- max_tokens: config.maxTokens,
420
- };
421
- if (thinkingParam) {
422
- reqBody.thinking = thinkingParam;
423
- // With thinking enabled, temperature must not be set
424
- } else if (config.temperature !== undefined && !isReasoningModel(model)) {
425
- reqBody.temperature = config.temperature;
426
- }
427
- if (tools && tools.length > 0) {
428
- reqBody.tools = tools;
429
- // With thinking enabled, tool_choice must be "auto" (not a specific tool)
430
- if (thinkingParam) {
431
- reqBody.tool_choice = { type: 'auto' };
432
- }
433
- }
434
- const res = await fetchWithTimeout(url, {
435
- method: 'POST',
436
- headers: {
437
- 'x-api-key': apiKey,
438
- 'anthropic-version': '2023-06-01',
439
- 'Content-Type': 'application/json',
440
- },
441
- body: JSON.stringify(reqBody),
442
- });
443
- if (!res.ok) {
444
- const text = await res.text();
445
- throw new LlmApiError('Anthropic', res.status, text, res.headers);
446
- }
447
- const responseBody = await res.json();
448
- return {
449
- body: responseBody,
450
- usage: extractTokenUsage('anthropic', responseBody),
451
- };
452
- }
453
-
454
- function isGpt5SeriesModel(model) {
455
- return model.startsWith('gpt-5');
456
- }
457
-
458
- function shouldSendOpenAiTemperature(model) {
459
- return !isReasoningModel(model) && !isGpt5SeriesModel(model);
460
- }
461
-
462
- function normalizeOpenAiConversation(messages) {
463
- if (Array.isArray(messages)) {
464
- return {
465
- input: messages,
466
- previousResponseId: undefined,
467
- };
468
- }
469
- if (!messages || typeof messages !== 'object') {
470
- return {
471
- input: [],
472
- previousResponseId: undefined,
473
- };
474
- }
475
- return {
476
- input: Array.isArray(messages.input) ? messages.input : [],
477
- previousResponseId: typeof messages.previousResponseId === 'string'
478
- ? messages.previousResponseId
479
- : undefined,
480
- };
481
- }
482
-
483
- async function callOpenAi(model, apiKey, messages, tools) {
484
- const baseUrl = resolveBaseUrl('openai');
485
- const url = `${baseUrl}/responses`;
486
- const config = getModelConfig(model);
487
- const conversation = normalizeOpenAiConversation(messages);
488
-
489
- const reqBody = {
490
- model,
491
- input: conversation.input,
492
- max_output_tokens: config.maxTokens,
493
- };
494
-
495
- if (conversation.previousResponseId) {
496
- reqBody.previous_response_id = conversation.previousResponseId;
497
- }
498
-
499
- if (config.reasoningEffort && (isReasoningModel(model) || isGpt5SeriesModel(model))) {
500
- reqBody.reasoning = { effort: config.reasoningEffort };
501
- }
502
-
503
- // GPT-5 series rejects temperature in many variants; never send it for gpt-5*.
504
- if (shouldSendOpenAiTemperature(model) && config.temperature !== undefined) {
505
- reqBody.temperature = config.temperature;
506
- }
507
-
508
- if (tools && tools.length > 0) {
509
- reqBody.tools = tools;
510
- reqBody.tool_choice = 'auto';
511
- }
512
-
513
- const res = await fetchWithTimeout(url, {
514
- method: 'POST',
515
- headers: {
516
- 'Authorization': `Bearer ${apiKey}`,
517
- 'Content-Type': 'application/json',
518
- },
519
- body: JSON.stringify(reqBody),
520
- });
521
- if (!res.ok) {
522
- const text = await res.text();
523
- throw new LlmApiError('OpenAI', res.status, text, res.headers);
524
- }
525
- const responseBody = await res.json();
526
- return {
527
- body: responseBody,
528
- usage: extractTokenUsage('openai', responseBody),
529
- };
530
- }
531
-
532
- // ── Error handling ──────────────────────────────────────────────────
533
-
534
- /**
535
- * Structured LLM API error with status code and retry-after support.
536
- */
537
- export class LlmApiError extends Error {
538
- /**
539
- * @param {string} provider
540
- * @param {number} status
541
- * @param {string} responseText
542
- * @param {Headers | null} [headers]
543
- */
544
- constructor(provider, status, responseText, headers) {
545
- super(`${provider} API error ${status}: ${responseText.slice(0, 500)}`);
546
- this.name = 'LlmApiError';
547
- this.status = status;
548
- this.provider = provider;
549
- this.responseText = responseText;
550
- this.retryAfterMs = parseRetryAfter(headers);
551
- }
552
- }
553
-
554
- /**
555
- * Parse the Retry-After header value into milliseconds.
556
- * Supports both seconds (integer) and HTTP-date formats.
557
- * Returns null if no valid Retry-After header is present.
558
- * @param {Headers | null} [headers]
559
- * @returns {number | null}
560
- */
561
- function parseRetryAfter(headers) {
562
- if (!headers) return null;
563
- const value = headers.get?.('retry-after');
564
- if (!value) return null;
565
-
566
- // Try as integer (seconds)
567
- const seconds = parseInt(value, 10);
568
- if (!Number.isNaN(seconds) && seconds >= 0) {
569
- return seconds * 1000;
570
- }
571
-
572
- // Try as HTTP-date
573
- const date = new Date(value);
574
- if (!Number.isNaN(date.getTime())) {
575
- const delayMs = date.getTime() - Date.now();
576
- return Math.max(0, delayMs);
577
- }
578
-
579
- return null;
580
- }
581
-
582
- // ── Response parsing ────────────────────────────────────────────────
583
-
584
- /**
585
- * Parse tool calls from the provider's response.
586
- * Returns an array of { id, name, arguments } or null if no tool calls.
587
- *
588
- * Accepts either a raw response body or an LlmResponse wrapper.
589
- */
590
- export function parseToolCalls(provider, responseOrWrapper) {
591
- const response = responseOrWrapper?.body ?? responseOrWrapper;
592
- switch (provider) {
593
- case 'gemini':
594
- return parseGeminiToolCalls(response);
595
- case 'anthropic':
596
- return parseAnthropicToolCalls(response);
597
- case 'openai':
598
- return parseOpenAiToolCalls(response);
599
- default:
600
- return parseOpenAiToolCalls(response);
601
- }
602
- }
603
-
604
- function parseGeminiToolCalls(response) {
605
- const parts = response.candidates?.[0]?.content?.parts ?? [];
606
- const calls = parts
607
- .filter((p) => p.functionCall)
608
- .map((p) => ({
609
- id: p.functionCall.name + '-' + Date.now(),
610
- name: p.functionCall.name,
611
- arguments: p.functionCall.args ?? {},
612
- }));
613
- return calls.length > 0 ? calls : null;
614
- }
615
-
616
- function parseAnthropicToolCalls(response) {
617
- const content = response.content ?? [];
618
- const calls = content
619
- .filter((c) => c.type === 'tool_use')
620
- .map((c) => ({
621
- id: c.id,
622
- name: c.name,
623
- arguments: c.input ?? {},
624
- }));
625
- return calls.length > 0 ? calls : null;
626
- }
627
-
628
- function parseOpenAiToolCalls(response) {
629
- const output = Array.isArray(response.output) ? response.output : [];
630
- const calls = [];
631
- for (const item of output) {
632
- if (item?.type !== 'function_call') continue;
633
-
634
- let parsedArguments = {};
635
- if (typeof item.arguments === 'string' && item.arguments.trim()) {
636
- try {
637
- parsedArguments = JSON.parse(item.arguments);
638
- } catch {
639
- parsedArguments = { _raw: item.arguments };
640
- }
641
- } else if (item.arguments && typeof item.arguments === 'object') {
642
- parsedArguments = item.arguments;
643
- }
644
-
645
- calls.push({
646
- id: item.call_id ?? item.id ?? `${item.name ?? 'tool'}-${Date.now()}`,
647
- name: item.name,
648
- arguments: parsedArguments,
649
- });
650
- }
651
-
652
- return calls.length > 0 ? calls : null;
653
- }
654
-
655
- /**
656
- * Get the text content from the provider's response (if any).
657
- *
658
- * Accepts either a raw response body or an LlmResponse wrapper.
659
- */
660
- export function getResponseText(provider, responseOrWrapper) {
661
- const response = responseOrWrapper?.body ?? responseOrWrapper;
662
- switch (provider) {
663
- case 'gemini': {
664
- const parts = response.candidates?.[0]?.content?.parts ?? [];
665
- // Exclude thinking parts (thought === true) — those go to getThinkingContent()
666
- const textParts = parts.filter((p) => p.text && !p.thought).map((p) => p.text);
667
- return textParts.join('') || null;
668
- }
669
- case 'anthropic': {
670
- const content = response.content ?? [];
671
- const textBlocks = content.filter((c) => c.type === 'text').map((c) => c.text);
672
- return textBlocks.join('') || null;
673
- }
674
- case 'openai': {
675
- if (typeof response.output_text === 'string' && response.output_text.trim()) {
676
- return response.output_text;
677
- }
678
- const output = Array.isArray(response.output) ? response.output : [];
679
- const chunks = [];
680
- for (const item of output) {
681
- if (item?.type === 'output_text' && typeof item.text === 'string') {
682
- chunks.push(item.text);
683
- continue;
684
- }
685
- if (item?.type !== 'message' || !Array.isArray(item.content)) continue;
686
- for (const part of item.content) {
687
- if ((part?.type === 'output_text' || part?.type === 'text') && typeof part.text === 'string') {
688
- chunks.push(part.text);
689
- }
690
- }
691
- }
692
- return chunks.join('') || null;
693
- }
694
- default:
695
- return null;
696
- }
697
- }
698
-
699
- /**
700
- * Extract thinking/reasoning content from the provider's response.
701
- * Returns the model's internal reasoning (Anthropic thinking blocks,
702
- * Gemini thinking parts) or null if none.
703
- *
704
- * @param {'gemini' | 'anthropic' | 'openai'} provider
705
- * @param {object} responseOrWrapper
706
- * @returns {string | null}
707
- */
708
- export function getThinkingContent(provider, responseOrWrapper) {
709
- const response = responseOrWrapper?.body ?? responseOrWrapper;
710
- switch (provider) {
711
- case 'anthropic': {
712
- const content = response.content ?? [];
713
- const blocks = content
714
- .filter((c) => c.type === 'thinking')
715
- .map((c) => c.thinking);
716
- return blocks.length > 0 ? blocks.join('\n') : null;
717
- }
718
- case 'openai': {
719
- const output = Array.isArray(response.output) ? response.output : [];
720
- const summaries = [];
721
- for (const item of output) {
722
- if (item?.type !== 'reasoning') continue;
723
- if (Array.isArray(item.summary)) {
724
- for (const summary of item.summary) {
725
- if (typeof summary?.text === 'string' && summary.text.trim()) {
726
- summaries.push(summary.text);
727
- }
728
- }
729
- }
730
- }
731
- return summaries.length > 0 ? summaries.join('\n') : null;
732
- }
733
- case 'gemini': {
734
- const parts = response.candidates?.[0]?.content?.parts ?? [];
735
- const thinkingParts = parts
736
- .filter((p) => p.thought === true)
737
- .map((p) => p.text);
738
- return thinkingParts.length > 0 ? thinkingParts.join('\n') : null;
739
- }
740
- default:
741
- return null;
742
- }
743
- }
744
-
745
- /**
746
- * Get the stop reason from the provider's response.
747
- * @param {'gemini' | 'anthropic' | 'openai'} provider
748
- * @param {object} responseOrWrapper
749
- * @returns {string | null}
750
- */
751
- export function getStopReason(provider, responseOrWrapper) {
752
- const response = responseOrWrapper?.body ?? responseOrWrapper;
753
- switch (provider) {
754
- case 'gemini':
755
- return response.candidates?.[0]?.finishReason ?? null;
756
- case 'anthropic':
757
- return response.stop_reason ?? null;
758
- case 'openai':
759
- return parseOpenAiToolCalls(response) ? 'tool_calls' : (response.status ?? response.incomplete_details?.reason ?? null);
760
- default:
761
- return null;
762
- }
763
- }
764
-
765
- // ── Message formatting ──────────────────────────────────────────────
766
-
767
- /**
768
- * Build the initial messages array with system prompt and task for the provider.
769
- * For reasoning models that don't support system prompts, the system prompt
770
- * is prepended to the user message automatically.
771
- *
772
- * @param {'gemini' | 'anthropic' | 'openai'} provider
773
- * @param {string} systemPrompt
774
- * @param {string} task
775
- * @param {string} [model] - Optional model name for capability checking
776
- */
777
- export function buildInitialMessages(provider, systemPrompt, task, model) {
778
- const capabilities = model ? getModelCapabilities(model) : null;
779
- const supportsSystem = capabilities ? capabilities.supportsSystemPrompt : true;
780
-
781
- switch (provider) {
782
- case 'gemini':
783
- return [
784
- { role: 'user', parts: [{ text: (systemPrompt ? systemPrompt + '\n\n' : '') + task }] },
785
- ];
786
- case 'anthropic':
787
- return {
788
- system: systemPrompt || undefined,
789
- messages: [{ role: 'user', content: task }],
790
- };
791
- case 'openai': {
792
- let input;
793
- if (!supportsSystem || !systemPrompt) {
794
- // Reasoning models (o1, o3, o4) don't support system prompts.
795
- // Merge system prompt into user message.
796
- const combined = systemPrompt ? systemPrompt + '\n\n' + task : task;
797
- input = [{ role: 'user', content: combined }];
798
- } else {
799
- input = [
800
- { role: 'system', content: systemPrompt },
801
- { role: 'user', content: task },
802
- ];
803
- }
804
- return {
805
- input,
806
- previousResponseId: undefined,
807
- };
808
- }
809
- default:
810
- return [
811
- { role: 'system', content: systemPrompt },
812
- { role: 'user', content: task },
813
- ];
814
- }
815
- }
816
-
817
- /**
818
- * Append the assistant response to the conversation for the next turn.
819
- *
820
- * Accepts either a raw response body or an LlmResponse wrapper.
821
- */
822
- export function appendAssistantResponse(provider, messages, responseOrWrapper) {
823
- const response = responseOrWrapper?.body ?? responseOrWrapper;
824
- switch (provider) {
825
- case 'gemini': {
826
- const content = response.candidates?.[0]?.content;
827
- if (content) messages.push(content);
828
- return messages;
829
- }
830
- case 'anthropic': {
831
- messages.messages.push({ role: 'assistant', content: response.content });
832
- return messages;
833
- }
834
- case 'openai': {
835
- if (Array.isArray(messages)) {
836
- const text = getResponseText('openai', response);
837
- messages.push({ role: 'assistant', content: text ?? '' });
838
- return messages;
839
- }
840
- messages.previousResponseId = response.id ?? messages.previousResponseId;
841
- messages.input = [];
842
- return messages;
843
- }
844
- default:
845
- return messages;
846
- }
847
- }
848
-
849
- /**
850
- * Append tool results to the conversation for the next turn.
851
- */
852
- export function appendToolResults(provider, messages, toolCalls, results) {
853
- switch (provider) {
854
- case 'gemini': {
855
- const parts = toolCalls.map((tc, i) => ({
856
- functionResponse: {
857
- name: tc.name,
858
- response: { content: results[i] },
859
- },
860
- }));
861
- messages.push({ role: 'user', parts });
862
- return messages;
863
- }
864
- case 'anthropic': {
865
- const content = toolCalls.map((tc, i) => ({
866
- type: 'tool_result',
867
- tool_use_id: tc.id,
868
- content: results[i],
869
- }));
870
- messages.messages.push({ role: 'user', content });
871
- return messages;
872
- }
873
- case 'openai': {
874
- const toolOutputs = [];
875
- for (let i = 0; i < toolCalls.length; i++) {
876
- const output = typeof results[i] === 'string'
877
- ? results[i]
878
- : JSON.stringify(results[i]);
879
- toolOutputs.push({
880
- type: 'function_call_output',
881
- call_id: toolCalls[i].id,
882
- output,
883
- });
884
- }
885
-
886
- if (Array.isArray(messages)) {
887
- for (let i = 0; i < toolCalls.length; i++) {
888
- messages.push({
889
- role: 'tool',
890
- tool_call_id: toolCalls[i].id,
891
- content: toolOutputs[i].output,
892
- });
893
- }
894
- return messages;
895
- }
896
-
897
- messages.input = toolOutputs;
898
- return messages;
899
- }
900
- default:
901
- return messages;
902
- }
903
- }
904
-
905
- /**
906
- * Append a plain-text user instruction for the next turn.
907
- * Used for harness-level recovery nudges (for example, when the model
908
- * responds without any tool calls before taking required actions).
909
- *
910
- * @param {'gemini' | 'anthropic' | 'openai'} provider
911
- * @param {Array | object} messages
912
- * @param {string} text
913
- * @returns {Array | object}
914
- */
915
- export function appendUserInstruction(provider, messages, text) {
916
- switch (provider) {
917
- case 'gemini': {
918
- messages.push({ role: 'user', parts: [{ text }] });
919
- return messages;
920
- }
921
- case 'anthropic': {
922
- messages.messages.push({ role: 'user', content: text });
923
- return messages;
924
- }
925
- case 'openai': {
926
- if (Array.isArray(messages)) {
927
- messages.push({ role: 'user', content: text });
928
- return messages;
929
- }
930
- const nextInput = Array.isArray(messages.input) ? [...messages.input] : [];
931
- nextInput.push({ role: 'user', content: text });
932
- messages.input = nextInput;
933
- return messages;
934
- }
935
- default:
936
- return messages;
937
- }
938
- }
939
-
940
- /**
941
- * Extract the messages array and system prompt for the callLlm function.
942
- * For Anthropic, the system prompt is separate from messages.
943
- */
944
- export function extractCallArgs(provider, messages) {
945
- if (provider === 'anthropic') {
946
- return { system: messages.system, messages: messages.messages };
947
- }
948
- return { messages };
949
- }
950
-
951
- /**
952
- * Call the LLM with provider-appropriate message format.
953
- * Returns an LlmResponse with body and token usage.
954
- * @returns {Promise<LlmResponse>}
955
- */
956
- export async function callLlmWithMessages(provider, model, apiKey, messagesOrWrapper, tools) {
957
- if (provider === 'anthropic') {
958
- const baseUrl = resolveBaseUrl('anthropic');
959
- const url = `${baseUrl}/v1/messages`;
960
- const config = getModelConfig(model);
961
- const thinkingParam = getAnthropicThinkingParam(model);
962
-
963
- const reqBody = {
964
- model,
965
- max_tokens: config.maxTokens,
966
- messages: messagesOrWrapper.messages,
967
- };
968
- if (messagesOrWrapper.system) {
969
- reqBody.system = messagesOrWrapper.system;
970
- }
971
- if (thinkingParam) {
972
- reqBody.thinking = thinkingParam;
973
- // With thinking enabled, temperature must not be set
974
- } else if (config.temperature !== undefined && !isReasoningModel(model)) {
975
- reqBody.temperature = config.temperature;
976
- }
977
- if (tools && tools.length > 0) {
978
- reqBody.tools = tools;
979
- if (thinkingParam) {
980
- reqBody.tool_choice = { type: 'auto' };
981
- }
982
- }
983
-
984
- const res = await fetchWithTimeout(url, {
985
- method: 'POST',
986
- headers: {
987
- 'x-api-key': apiKey,
988
- 'anthropic-version': '2023-06-01',
989
- 'Content-Type': 'application/json',
990
- },
991
- body: JSON.stringify(reqBody),
992
- });
993
- if (!res.ok) {
994
- const text = await res.text();
995
- throw new LlmApiError('Anthropic', res.status, text, res.headers);
996
- }
997
- const responseBody = await res.json();
998
- return {
999
- body: responseBody,
1000
- usage: extractTokenUsage('anthropic', responseBody),
1001
- };
1002
- }
1003
-
1004
- // Gemini uses flat message arrays; OpenAI accepts either arrays or wrapper state.
1005
- return callLlm(provider, model, apiKey, messagesOrWrapper, tools);
1006
- }
1007
-
1008
- // ── Retry helper ────────────────────────────────────────────────────
1009
-
1010
- const RETRYABLE_STATUS_CODES = new Set([429, 500, 502, 503, 529]);
1011
-
1012
- /**
1013
- * Retry a function on transient errors with exponential backoff.
1014
- * Respects Retry-After headers from LlmApiError when available.
1015
- *
1016
- * @param {() => Promise<T>} fn
1017
- * @param {number} [maxRetries=3]
1018
- * @returns {Promise<T>}
1019
- * @template T
1020
- */
1021
- export async function withRetry(fn, maxRetries = 3) {
1022
- for (let attempt = 0; attempt <= maxRetries; attempt++) {
1023
- try {
1024
- return await fn();
1025
- } catch (err) {
1026
- let isRetryable = false;
1027
-
1028
- if (err instanceof LlmApiError) {
1029
- isRetryable = RETRYABLE_STATUS_CODES.has(err.status);
1030
- // Also retry on timeouts
1031
- if (err.status === 0 && err.message.includes('timed out')) {
1032
- isRetryable = true;
1033
- }
1034
- } else if (err.message) {
1035
- // Fallback: parse status from error message for backward compat
1036
- const statusMatch = err.message.match(/error (\d+)/);
1037
- if (statusMatch) {
1038
- isRetryable = RETRYABLE_STATUS_CODES.has(parseInt(statusMatch[1], 10));
1039
- }
1040
- if (err.message.includes('timed out')) {
1041
- isRetryable = true;
1042
- }
1043
- }
1044
-
1045
- if (!isRetryable || attempt === maxRetries) throw err;
1046
-
1047
- // Use retry-after header if available, then message body, then exponential backoff
1048
- let delay;
1049
- if (err instanceof LlmApiError && err.retryAfterMs !== null) {
1050
- delay = err.retryAfterMs;
1051
- // Cap retry-after at 90 seconds to avoid unreasonable waits
1052
- delay = Math.min(delay, 90_000);
1053
- } else if (err instanceof LlmApiError && err.status === 429) {
1054
- // OpenAI embeds wait time in the message body for TPM limits when
1055
- // no Retry-After header is present (e.g. batch/embedding endpoints):
1056
- // "Please try again in 14.902s."
1057
- const bodyMatch = err.responseText.match(/try again in (\d+(?:\.\d+)?)\s*s/i);
1058
- if (bodyMatch) {
1059
- delay = Math.ceil(parseFloat(bodyMatch[1]) * 1000) + 500; // +500ms buffer
1060
- delay = Math.min(delay, 90_000);
1061
- } else {
1062
- // Exponential backoff: 5s, 10s, 20s, 40s (capped at 60s) for 429
1063
- delay = Math.min(5000 * Math.pow(2, attempt), 60_000);
1064
- }
1065
- } else {
1066
- // Exponential backoff: 1s, 2s, 4s, 8s, 16s (capped at 30s)
1067
- delay = Math.min(1000 * Math.pow(2, attempt), 30_000);
1068
- }
1069
-
1070
- // Add jitter: +/- 20%
1071
- const jitter = delay * 0.2 * (Math.random() * 2 - 1);
1072
- delay = Math.max(0, Math.round(delay + jitter));
1073
-
1074
- process.stderr.write(
1075
- `[retry] Attempt ${attempt + 1}/${maxRetries} failed` +
1076
- `${err.status ? ` (${err.status})` : ''}, ` +
1077
- `retrying in ${(delay / 1000).toFixed(1)}s...\n`
1078
- );
1079
-
1080
- await new Promise((r) => setTimeout(r, delay));
1081
- }
1082
- }
1083
- }
4
+ * This is a thin re-export facade. Implementation lives in:
5
+ * - llm-config.mjs provider detection, API key/base URL, timeout, thinking budget
6
+ * - llm-call.mjs callLlm, callLlmWithMessages, LlmApiError, withRetry
7
+ * - llm-response.mjs response parsing, message building, tool formatting
8
+ */
9
+
10
+ // ── Config ──────────────────────────────────────────────────────────
11
+ export {
12
+ detectProvider,
13
+ resolveApiKey,
14
+ resolveBaseUrl,
15
+ isThinkingEnabled,
16
+ } from './llm-config.mjs';
17
+
18
+ // ── Calling ─────────────────────────────────────────────────────────
19
+ export {
20
+ callLlm,
21
+ callLlmWithMessages,
22
+ LlmApiError,
23
+ withRetry,
24
+ } from './llm-call.mjs';
25
+
26
+ // ── Response parsing & message building ─────────────────────────────
27
+ export {
28
+ extractTokenUsage,
29
+ formatToolsForProvider,
30
+ parseToolCalls,
31
+ getResponseText,
32
+ getThinkingContent,
33
+ getStopReason,
34
+ buildInitialMessages,
35
+ appendAssistantResponse,
36
+ appendToolResults,
37
+ appendUserInstruction,
38
+ extractCallArgs,
39
+ } from './llm-response.mjs';