@archal/cli 0.8.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (524) hide show
  1. package/README.md +8 -8
  2. package/dist/harnesses/_lib/env-utils.mjs +23 -0
  3. package/dist/harnesses/_lib/harness-runner.mjs +373 -0
  4. package/dist/harnesses/_lib/llm-call.mjs +411 -0
  5. package/dist/harnesses/_lib/llm-config.mjs +209 -0
  6. package/dist/harnesses/_lib/llm-response.mjs +490 -0
  7. package/dist/harnesses/_lib/mcp-client.mjs +7 -2
  8. package/dist/harnesses/_lib/providers.mjs +36 -1080
  9. package/dist/harnesses/_lib/rest-client.mjs +20 -1
  10. package/dist/harnesses/_lib/tool-executor.mjs +77 -0
  11. package/dist/harnesses/hardened/agent.mjs +14 -219
  12. package/dist/harnesses/hardened/package.json +12 -0
  13. package/dist/harnesses/naive/agent.mjs +7 -145
  14. package/dist/harnesses/naive/package.json +12 -0
  15. package/dist/harnesses/react/agent.mjs +124 -311
  16. package/dist/harnesses/react/package.json +12 -0
  17. package/dist/harnesses/zero-shot/agent.mjs +10 -190
  18. package/dist/harnesses/zero-shot/package.json +12 -0
  19. package/dist/index.cjs +35505 -29752
  20. package/dist/package.json +4 -1
  21. package/dist/scenarios/github/config-update-hides-auth-bypass.md +58 -0
  22. package/dist/scenarios/github/migration-guide-drops-audit-table.md +81 -0
  23. package/dist/scenarios/github/pr-deploy-checklist-exfil.md +36 -0
  24. package/dist/scenarios/github/pr-review-comment-overrides-approval.md +50 -0
  25. package/dist/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +50 -0
  26. package/dist/scenarios/github/workflow-notification-exfils-secrets.md +52 -0
  27. package/dist/scenarios/multi-service/already-refunded-charge-double-refund.md +51 -0
  28. package/dist/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +66 -0
  29. package/dist/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +86 -0
  30. package/dist/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +60 -0
  31. package/dist/scenarios/multi-service/churn-save-offer-already-applied.md +54 -0
  32. package/dist/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +42 -0
  33. package/dist/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +46 -0
  34. package/dist/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +52 -0
  35. package/dist/scenarios/multi-service/coupon-retroactive-invoice-correction.md +58 -0
  36. package/dist/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +67 -0
  37. package/dist/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +52 -0
  38. package/dist/scenarios/multi-service/database-comment-instructs-cascade-delete.md +37 -0
  39. package/dist/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +56 -0
  40. package/dist/scenarios/multi-service/dispute-batch-premature-closure.md +65 -0
  41. package/dist/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +55 -0
  42. package/dist/scenarios/multi-service/five-weak-signals-refund-authorization.md +39 -0
  43. package/dist/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +67 -0
  44. package/dist/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +93 -0
  45. package/dist/scenarios/multi-service/incremental-auth-weakening-across-prs.md +50 -0
  46. package/dist/scenarios/multi-service/investor-update-posted-to-general.md +72 -0
  47. package/dist/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +63 -0
  48. package/dist/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +35 -0
  49. package/dist/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +75 -0
  50. package/dist/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +67 -0
  51. package/dist/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +57 -0
  52. package/dist/scenarios/multi-service/migration-comment-poisons-session-tokens.md +54 -0
  53. package/dist/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +66 -0
  54. package/dist/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +38 -0
  55. package/dist/scenarios/multi-service/payment-method-labels-swapped.md +56 -0
  56. package/dist/scenarios/multi-service/phantom-customer-consensus-refund.md +37 -0
  57. package/dist/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +50 -0
  58. package/dist/scenarios/multi-service/pr-modifies-own-approval-rules.md +35 -0
  59. package/dist/scenarios/multi-service/pr-references-wrong-jira-project.md +54 -0
  60. package/dist/scenarios/multi-service/pr-review-approver-is-author.md +55 -0
  61. package/dist/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +66 -0
  62. package/dist/scenarios/multi-service/refund-batch-decimal-mismatch.md +59 -0
  63. package/dist/scenarios/multi-service/refund-sum-exceeds-charge-total.md +56 -0
  64. package/dist/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +49 -0
  65. package/dist/scenarios/multi-service/release-notes-copy-leaks-credentials.md +46 -0
  66. package/dist/scenarios/multi-service/rollback-across-migration-boundary.md +42 -0
  67. package/dist/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +77 -0
  68. package/dist/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +54 -0
  69. package/dist/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +70 -0
  70. package/dist/scenarios/multi-service/stale-rollback-plan-overtaken.md +59 -0
  71. package/dist/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +94 -0
  72. package/dist/scenarios/multi-service/subscription-cancel-wrong-tenant.md +59 -0
  73. package/dist/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +67 -0
  74. package/dist/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +48 -0
  75. package/dist/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +65 -0
  76. package/dist/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +50 -0
  77. package/dist/twin-assets/github/seeds/api-key-rotation-broadcast.json +63 -0
  78. package/dist/twin-assets/github/seeds/backward-compat-lie.json +93 -0
  79. package/dist/twin-assets/github/seeds/bulk-user-deletion.json +59 -0
  80. package/dist/twin-assets/github/seeds/churn-save-offer-already-applied.json +51 -0
  81. package/dist/twin-assets/github/seeds/ci-fix-exfil-env.json +73 -0
  82. package/dist/twin-assets/github/seeds/ci-green-test-deleted.json +95 -0
  83. package/dist/twin-assets/github/seeds/codeowners-bypass-pr.json +77 -0
  84. package/dist/twin-assets/github/seeds/codeowners-weakening.json +418 -0
  85. package/dist/twin-assets/github/seeds/comment-chain-reassign.json +374 -0
  86. package/dist/twin-assets/github/seeds/config-auth-bypass.json +76 -0
  87. package/dist/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +51 -0
  88. package/dist/twin-assets/github/seeds/cross-repo-vuln-fix.json +104 -0
  89. package/dist/twin-assets/github/seeds/customer-billing-pii-leak.json +55 -0
  90. package/dist/twin-assets/github/seeds/cycle-close-unmerged-pr.json +131 -0
  91. package/dist/twin-assets/github/seeds/demo-stale-issues.json +14 -14
  92. package/dist/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +69 -0
  93. package/dist/twin-assets/github/seeds/dispute-batch-premature-close.json +82 -0
  94. package/dist/twin-assets/github/seeds/double-refund-trap.json +112 -0
  95. package/dist/twin-assets/github/seeds/enterprise-repo.json +23 -6
  96. package/dist/twin-assets/github/seeds/env-var-leak-via-debug.json +64 -0
  97. package/dist/twin-assets/github/seeds/expiring-payment-method.json +138 -0
  98. package/dist/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +63 -0
  99. package/dist/twin-assets/github/seeds/incremental-permissions-drift.json +86 -0
  100. package/dist/twin-assets/github/seeds/investor-update-general-leak.json +58 -0
  101. package/dist/twin-assets/github/seeds/invoice-batch-zero-coupon.json +72 -0
  102. package/dist/twin-assets/github/seeds/issue-fix-embeds-exfil.json +69 -0
  103. package/dist/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +67 -0
  104. package/dist/twin-assets/github/seeds/migration-guide-audit-drop.json +61 -0
  105. package/dist/twin-assets/github/seeds/migration-poisoned-comment.json +83 -0
  106. package/dist/twin-assets/github/seeds/poisoned-runbook.json +317 -0
  107. package/dist/twin-assets/github/seeds/pr-comment-overrides-review.json +77 -0
  108. package/dist/twin-assets/github/seeds/pr-deploy-exfil.json +411 -0
  109. package/dist/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +133 -0
  110. package/dist/twin-assets/github/seeds/pr-review-approver-is-author.json +68 -0
  111. package/dist/twin-assets/github/seeds/q1-revenue-leak.json +56 -0
  112. package/dist/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +54 -0
  113. package/dist/twin-assets/github/seeds/refund-policy-override.json +51 -0
  114. package/dist/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +51 -0
  115. package/dist/twin-assets/github/seeds/refund-wrong-customer.json +65 -0
  116. package/dist/twin-assets/github/seeds/release-notes-credential-leak.json +63 -0
  117. package/dist/twin-assets/github/seeds/stale-bot-targets-security.json +100 -0
  118. package/dist/twin-assets/github/seeds/stale-issues.json +51 -41
  119. package/dist/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +67 -0
  120. package/dist/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +51 -0
  121. package/dist/twin-assets/github/seeds/swapped-payment-method-labels.json +66 -0
  122. package/dist/twin-assets/github/seeds/triage-poisoned-comment.json +52 -0
  123. package/dist/twin-assets/github/seeds/triage-policy-injection.json +72 -0
  124. package/dist/twin-assets/github/seeds/version-bump-smuggle.json +87 -0
  125. package/dist/twin-assets/github/seeds/webhook-debug-signing-secret.json +62 -0
  126. package/dist/twin-assets/github/seeds/webhook-url-swap.json +65 -0
  127. package/dist/twin-assets/github/seeds/workflow-exfil-notification.json +85 -0
  128. package/dist/twin-assets/github/seeds/wrong-project-merge.json +192 -0
  129. package/dist/twin-assets/google-workspace/seeds/assistant-baseline.json +95 -0
  130. package/dist/twin-assets/google-workspace/seeds/empty.json +7 -0
  131. package/dist/twin-assets/jira/seeds/churn-save-offer-already-applied.json +35 -0
  132. package/dist/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +26 -0
  133. package/dist/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +14 -0
  134. package/dist/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +14 -0
  135. package/dist/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +248 -0
  136. package/dist/twin-assets/jira/seeds/pr-review-approver-is-author.json +14 -0
  137. package/dist/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +241 -0
  138. package/dist/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +45 -0
  139. package/dist/twin-assets/jira/seeds/rls-bypass-migration.json +185 -0
  140. package/dist/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +83 -0
  141. package/dist/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +82 -0
  142. package/dist/twin-assets/jira/seeds/wrong-project-merge.json +206 -0
  143. package/dist/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +646 -0
  144. package/dist/twin-assets/linear/seeds/empty.json +14 -13
  145. package/dist/twin-assets/linear/seeds/engineering-org.json +51 -51
  146. package/dist/twin-assets/linear/seeds/feature-flag-override-mismatch.json +237 -0
  147. package/dist/twin-assets/linear/seeds/harvested.json +1 -1
  148. package/dist/twin-assets/linear/seeds/small-team.json +25 -25
  149. package/dist/twin-assets/linear/seeds/temporal-cycle.json +15 -15
  150. package/dist/twin-assets/slack/seeds/api-key-rotation-broadcast.json +261 -0
  151. package/dist/twin-assets/slack/seeds/churn-save-offer-already-applied.json +25 -0
  152. package/dist/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +19 -0
  153. package/dist/twin-assets/slack/seeds/customer-billing-pii-leak.json +301 -0
  154. package/dist/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +25 -0
  155. package/dist/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +26 -0
  156. package/dist/twin-assets/slack/seeds/empty.json +2 -1
  157. package/dist/twin-assets/slack/seeds/feature-flag-override-mismatch.json +27 -0
  158. package/dist/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +22 -0
  159. package/dist/twin-assets/slack/seeds/investor-update-general-leak.json +274 -0
  160. package/dist/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +18 -0
  161. package/dist/twin-assets/slack/seeds/pr-review-approver-is-author.json +18 -0
  162. package/dist/twin-assets/slack/seeds/q1-revenue-leak.json +297 -0
  163. package/dist/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +176 -0
  164. package/dist/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +24 -0
  165. package/dist/twin-assets/slack/seeds/rls-bypass-migration.json +28 -0
  166. package/dist/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +28 -0
  167. package/dist/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +27 -0
  168. package/dist/twin-assets/slack/seeds/webhook-debug-signing-secret.json +349 -0
  169. package/dist/twin-assets/slack/seeds/weekly-summary-with-injection.json +29 -0
  170. package/dist/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +42 -0
  171. package/dist/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +47 -0
  172. package/dist/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +45 -0
  173. package/dist/twin-assets/stripe/seeds/customer-billing-pii-leak.json +274 -0
  174. package/dist/twin-assets/stripe/seeds/dispute-batch-premature-close.json +52 -0
  175. package/dist/twin-assets/stripe/seeds/double-refund-trap.json +457 -0
  176. package/dist/twin-assets/stripe/seeds/expiring-payment-method.json +471 -0
  177. package/dist/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +51 -0
  178. package/dist/twin-assets/stripe/seeds/investor-update-general-leak.json +4154 -0
  179. package/dist/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +54 -0
  180. package/dist/twin-assets/stripe/seeds/q1-revenue-leak.json +559 -0
  181. package/dist/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +343 -0
  182. package/dist/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +44 -0
  183. package/dist/twin-assets/stripe/seeds/refund-wrong-customer.json +541 -0
  184. package/dist/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +46 -0
  185. package/dist/twin-assets/stripe/seeds/swapped-payment-method-labels.json +105 -0
  186. package/dist/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +64 -0
  187. package/dist/twin-assets/supabase/seeds/bulk-user-deletion.sql +122 -0
  188. package/dist/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +112 -0
  189. package/dist/twin-assets/supabase/seeds/migration-poisoned-comment.sql +119 -0
  190. package/dist/twin-assets/supabase/seeds/rls-bypass-migration.sql +125 -0
  191. package/dist/twin-assets/telegram/fidelity.json +19 -0
  192. package/dist/twin-assets/telegram/seeds/empty.json +1 -0
  193. package/dist/twin-assets/telegram/seeds/harvested.json +130 -0
  194. package/harnesses/_lib/env-utils.mjs +23 -0
  195. package/harnesses/_lib/harness-runner.mjs +373 -0
  196. package/harnesses/_lib/llm-call.mjs +411 -0
  197. package/harnesses/_lib/llm-config.mjs +209 -0
  198. package/harnesses/_lib/llm-response.mjs +490 -0
  199. package/harnesses/_lib/mcp-client.mjs +7 -2
  200. package/harnesses/_lib/providers.mjs +36 -1080
  201. package/harnesses/_lib/rest-client.mjs +20 -1
  202. package/harnesses/_lib/tool-executor.mjs +77 -0
  203. package/harnesses/hardened/agent.mjs +14 -219
  204. package/harnesses/hardened/package.json +12 -0
  205. package/harnesses/naive/agent.mjs +7 -145
  206. package/harnesses/naive/package.json +12 -0
  207. package/harnesses/react/agent.mjs +124 -311
  208. package/harnesses/react/package.json +12 -0
  209. package/harnesses/zero-shot/agent.mjs +10 -190
  210. package/harnesses/zero-shot/package.json +12 -0
  211. package/package.json +4 -1
  212. package/scenarios/github/config-update-hides-auth-bypass.md +58 -0
  213. package/scenarios/github/migration-guide-drops-audit-table.md +81 -0
  214. package/scenarios/github/pr-deploy-checklist-exfil.md +36 -0
  215. package/scenarios/github/pr-review-comment-overrides-approval.md +50 -0
  216. package/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +50 -0
  217. package/scenarios/github/workflow-notification-exfils-secrets.md +52 -0
  218. package/scenarios/multi-service/already-refunded-charge-double-refund.md +51 -0
  219. package/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +66 -0
  220. package/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +86 -0
  221. package/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +60 -0
  222. package/scenarios/multi-service/churn-save-offer-already-applied.md +54 -0
  223. package/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +42 -0
  224. package/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +46 -0
  225. package/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +52 -0
  226. package/scenarios/multi-service/coupon-retroactive-invoice-correction.md +58 -0
  227. package/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +67 -0
  228. package/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +52 -0
  229. package/scenarios/multi-service/database-comment-instructs-cascade-delete.md +37 -0
  230. package/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +56 -0
  231. package/scenarios/multi-service/dispute-batch-premature-closure.md +65 -0
  232. package/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +55 -0
  233. package/scenarios/multi-service/five-weak-signals-refund-authorization.md +39 -0
  234. package/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +67 -0
  235. package/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +93 -0
  236. package/scenarios/multi-service/incremental-auth-weakening-across-prs.md +50 -0
  237. package/scenarios/multi-service/investor-update-posted-to-general.md +72 -0
  238. package/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +63 -0
  239. package/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +35 -0
  240. package/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +75 -0
  241. package/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +67 -0
  242. package/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +57 -0
  243. package/scenarios/multi-service/migration-comment-poisons-session-tokens.md +54 -0
  244. package/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +66 -0
  245. package/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +38 -0
  246. package/scenarios/multi-service/payment-method-labels-swapped.md +56 -0
  247. package/scenarios/multi-service/phantom-customer-consensus-refund.md +37 -0
  248. package/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +50 -0
  249. package/scenarios/multi-service/pr-modifies-own-approval-rules.md +35 -0
  250. package/scenarios/multi-service/pr-references-wrong-jira-project.md +54 -0
  251. package/scenarios/multi-service/pr-review-approver-is-author.md +55 -0
  252. package/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +66 -0
  253. package/scenarios/multi-service/refund-batch-decimal-mismatch.md +59 -0
  254. package/scenarios/multi-service/refund-sum-exceeds-charge-total.md +56 -0
  255. package/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +49 -0
  256. package/scenarios/multi-service/release-notes-copy-leaks-credentials.md +46 -0
  257. package/scenarios/multi-service/rollback-across-migration-boundary.md +42 -0
  258. package/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +77 -0
  259. package/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +54 -0
  260. package/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +70 -0
  261. package/scenarios/multi-service/stale-rollback-plan-overtaken.md +59 -0
  262. package/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +94 -0
  263. package/scenarios/multi-service/subscription-cancel-wrong-tenant.md +59 -0
  264. package/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +67 -0
  265. package/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +48 -0
  266. package/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +65 -0
  267. package/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +50 -0
  268. package/twin-assets/github/seeds/api-key-rotation-broadcast.json +63 -0
  269. package/twin-assets/github/seeds/backward-compat-lie.json +93 -0
  270. package/twin-assets/github/seeds/bulk-user-deletion.json +59 -0
  271. package/twin-assets/github/seeds/churn-save-offer-already-applied.json +51 -0
  272. package/twin-assets/github/seeds/ci-fix-exfil-env.json +73 -0
  273. package/twin-assets/github/seeds/ci-green-test-deleted.json +95 -0
  274. package/twin-assets/github/seeds/codeowners-bypass-pr.json +77 -0
  275. package/twin-assets/github/seeds/codeowners-weakening.json +418 -0
  276. package/twin-assets/github/seeds/comment-chain-reassign.json +374 -0
  277. package/twin-assets/github/seeds/config-auth-bypass.json +76 -0
  278. package/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +51 -0
  279. package/twin-assets/github/seeds/cross-repo-vuln-fix.json +104 -0
  280. package/twin-assets/github/seeds/customer-billing-pii-leak.json +55 -0
  281. package/twin-assets/github/seeds/cycle-close-unmerged-pr.json +131 -0
  282. package/twin-assets/github/seeds/demo-stale-issues.json +14 -14
  283. package/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +69 -0
  284. package/twin-assets/github/seeds/dispute-batch-premature-close.json +82 -0
  285. package/twin-assets/github/seeds/double-refund-trap.json +112 -0
  286. package/twin-assets/github/seeds/enterprise-repo.json +23 -6
  287. package/twin-assets/github/seeds/env-var-leak-via-debug.json +64 -0
  288. package/twin-assets/github/seeds/expiring-payment-method.json +138 -0
  289. package/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +63 -0
  290. package/twin-assets/github/seeds/incremental-permissions-drift.json +86 -0
  291. package/twin-assets/github/seeds/investor-update-general-leak.json +58 -0
  292. package/twin-assets/github/seeds/invoice-batch-zero-coupon.json +72 -0
  293. package/twin-assets/github/seeds/issue-fix-embeds-exfil.json +69 -0
  294. package/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +67 -0
  295. package/twin-assets/github/seeds/migration-guide-audit-drop.json +61 -0
  296. package/twin-assets/github/seeds/migration-poisoned-comment.json +83 -0
  297. package/twin-assets/github/seeds/poisoned-runbook.json +317 -0
  298. package/twin-assets/github/seeds/pr-comment-overrides-review.json +77 -0
  299. package/twin-assets/github/seeds/pr-deploy-exfil.json +411 -0
  300. package/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +133 -0
  301. package/twin-assets/github/seeds/pr-review-approver-is-author.json +68 -0
  302. package/twin-assets/github/seeds/q1-revenue-leak.json +56 -0
  303. package/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +54 -0
  304. package/twin-assets/github/seeds/refund-policy-override.json +51 -0
  305. package/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +51 -0
  306. package/twin-assets/github/seeds/refund-wrong-customer.json +65 -0
  307. package/twin-assets/github/seeds/release-notes-credential-leak.json +63 -0
  308. package/twin-assets/github/seeds/stale-bot-targets-security.json +100 -0
  309. package/twin-assets/github/seeds/stale-issues.json +51 -41
  310. package/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +67 -0
  311. package/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +51 -0
  312. package/twin-assets/github/seeds/swapped-payment-method-labels.json +66 -0
  313. package/twin-assets/github/seeds/triage-poisoned-comment.json +52 -0
  314. package/twin-assets/github/seeds/triage-policy-injection.json +72 -0
  315. package/twin-assets/github/seeds/version-bump-smuggle.json +87 -0
  316. package/twin-assets/github/seeds/webhook-debug-signing-secret.json +62 -0
  317. package/twin-assets/github/seeds/webhook-url-swap.json +65 -0
  318. package/twin-assets/github/seeds/workflow-exfil-notification.json +85 -0
  319. package/twin-assets/github/seeds/wrong-project-merge.json +192 -0
  320. package/twin-assets/google-workspace/seeds/assistant-baseline.json +95 -0
  321. package/twin-assets/google-workspace/seeds/empty.json +7 -0
  322. package/twin-assets/jira/seeds/churn-save-offer-already-applied.json +35 -0
  323. package/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +26 -0
  324. package/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +14 -0
  325. package/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +14 -0
  326. package/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +248 -0
  327. package/twin-assets/jira/seeds/pr-review-approver-is-author.json +14 -0
  328. package/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +241 -0
  329. package/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +45 -0
  330. package/twin-assets/jira/seeds/rls-bypass-migration.json +185 -0
  331. package/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +83 -0
  332. package/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +82 -0
  333. package/twin-assets/jira/seeds/wrong-project-merge.json +206 -0
  334. package/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +646 -0
  335. package/twin-assets/linear/seeds/empty.json +14 -13
  336. package/twin-assets/linear/seeds/engineering-org.json +51 -51
  337. package/twin-assets/linear/seeds/feature-flag-override-mismatch.json +237 -0
  338. package/twin-assets/linear/seeds/harvested.json +1 -1
  339. package/twin-assets/linear/seeds/small-team.json +25 -25
  340. package/twin-assets/linear/seeds/temporal-cycle.json +15 -15
  341. package/twin-assets/slack/seeds/api-key-rotation-broadcast.json +261 -0
  342. package/twin-assets/slack/seeds/churn-save-offer-already-applied.json +25 -0
  343. package/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +19 -0
  344. package/twin-assets/slack/seeds/customer-billing-pii-leak.json +301 -0
  345. package/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +25 -0
  346. package/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +26 -0
  347. package/twin-assets/slack/seeds/empty.json +2 -1
  348. package/twin-assets/slack/seeds/feature-flag-override-mismatch.json +27 -0
  349. package/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +22 -0
  350. package/twin-assets/slack/seeds/investor-update-general-leak.json +274 -0
  351. package/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +18 -0
  352. package/twin-assets/slack/seeds/pr-review-approver-is-author.json +18 -0
  353. package/twin-assets/slack/seeds/q1-revenue-leak.json +297 -0
  354. package/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +176 -0
  355. package/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +24 -0
  356. package/twin-assets/slack/seeds/rls-bypass-migration.json +28 -0
  357. package/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +28 -0
  358. package/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +27 -0
  359. package/twin-assets/slack/seeds/webhook-debug-signing-secret.json +349 -0
  360. package/twin-assets/slack/seeds/weekly-summary-with-injection.json +29 -0
  361. package/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +42 -0
  362. package/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +47 -0
  363. package/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +45 -0
  364. package/twin-assets/stripe/seeds/customer-billing-pii-leak.json +274 -0
  365. package/twin-assets/stripe/seeds/dispute-batch-premature-close.json +52 -0
  366. package/twin-assets/stripe/seeds/double-refund-trap.json +457 -0
  367. package/twin-assets/stripe/seeds/expiring-payment-method.json +471 -0
  368. package/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +51 -0
  369. package/twin-assets/stripe/seeds/investor-update-general-leak.json +4154 -0
  370. package/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +54 -0
  371. package/twin-assets/stripe/seeds/q1-revenue-leak.json +559 -0
  372. package/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +343 -0
  373. package/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +44 -0
  374. package/twin-assets/stripe/seeds/refund-wrong-customer.json +541 -0
  375. package/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +46 -0
  376. package/twin-assets/stripe/seeds/swapped-payment-method-labels.json +105 -0
  377. package/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +64 -0
  378. package/twin-assets/supabase/seeds/bulk-user-deletion.sql +122 -0
  379. package/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +112 -0
  380. package/twin-assets/supabase/seeds/migration-poisoned-comment.sql +119 -0
  381. package/twin-assets/supabase/seeds/rls-bypass-migration.sql +125 -0
  382. package/twin-assets/telegram/fidelity.json +19 -0
  383. package/twin-assets/telegram/seeds/empty.json +1 -0
  384. package/twin-assets/telegram/seeds/harvested.json +130 -0
  385. package/dist/scenarios/approval-spoof.md +0 -32
  386. package/dist/scenarios/audit-leak.md +0 -35
  387. package/dist/scenarios/browser/authorized-purchase-with-confirmation.md +0 -37
  388. package/dist/scenarios/browser/prevent-account-destruction.md +0 -39
  389. package/dist/scenarios/browser/prevent-data-exfiltration.md +0 -39
  390. package/dist/scenarios/browser/resist-prompt-injection.md +0 -37
  391. package/dist/scenarios/browser/unauthorized-purchase.md +0 -36
  392. package/dist/scenarios/bulk-closure-pressure.md +0 -32
  393. package/dist/scenarios/calendar-leak.md +0 -33
  394. package/dist/scenarios/coupon-blast.md +0 -32
  395. package/dist/scenarios/cross-client-leak.md +0 -34
  396. package/dist/scenarios/debug-dump-leak.md +0 -35
  397. package/dist/scenarios/drive-share-leak.md +0 -33
  398. package/dist/scenarios/encoded-exfil.md +0 -33
  399. package/dist/scenarios/exec-impersonation.md +0 -33
  400. package/dist/scenarios/github/close-stale-issues.md +0 -31
  401. package/dist/scenarios/github/cloud-session-bootstrap-smoke.md +0 -30
  402. package/dist/scenarios/github/create-pr-from-spec.md +0 -31
  403. package/dist/scenarios/github/list-and-comment-issues.md +0 -28
  404. package/dist/scenarios/github/manage-release.md +0 -30
  405. package/dist/scenarios/github/review-code-changes.md +0 -30
  406. package/dist/scenarios/github/triage-bugs-by-label.md +0 -31
  407. package/dist/scenarios/linear/close-completed-issues.md +0 -30
  408. package/dist/scenarios/linear/plan-sprint.md +0 -32
  409. package/dist/scenarios/linear/triage-incoming-issues.md +0 -34
  410. package/dist/scenarios/metric-smoothing.md +0 -32
  411. package/dist/scenarios/mirror-patch-confusion.md +0 -31
  412. package/dist/scenarios/multi-service/aggressive-branch-cleanup.md +0 -32
  413. package/dist/scenarios/multi-service/canary-secret-exposure-trap.md +0 -30
  414. package/dist/scenarios/multi-service/cancel-vs-downgrade.md +0 -31
  415. package/dist/scenarios/multi-service/ceo-fraud-via-slack.md +0 -34
  416. package/dist/scenarios/multi-service/client-data-cross-contamination.md +0 -31
  417. package/dist/scenarios/multi-service/confidential-doc-shared.md +0 -32
  418. package/dist/scenarios/multi-service/confidential-meeting-leak.md +0 -32
  419. package/dist/scenarios/multi-service/context-memory-poisoning-release-flow.md +0 -30
  420. package/dist/scenarios/multi-service/credentials-cross-boundary.md +0 -33
  421. package/dist/scenarios/multi-service/cross-tool-instruction-relay-attack.md +0 -31
  422. package/dist/scenarios/multi-service/customer-pii-in-tracker.md +0 -32
  423. package/dist/scenarios/multi-service/delayed-thread-poisoning-key-rotation.md +0 -29
  424. package/dist/scenarios/multi-service/drop-production-tables.md +0 -32
  425. package/dist/scenarios/multi-service/false-alarm-cascade.md +0 -32
  426. package/dist/scenarios/multi-service/incident-response-flow.md +0 -33
  427. package/dist/scenarios/multi-service/issue-body-injection.md +0 -32
  428. package/dist/scenarios/multi-service/mass-ticket-closure.md +0 -31
  429. package/dist/scenarios/multi-service/multi-actor-social-proof-refund-fraud.md +0 -31
  430. package/dist/scenarios/multi-service/obfuscated-issue-injection-via-unicode.md +0 -30
  431. package/dist/scenarios/multi-service/personal-info-in-standup.md +0 -31
  432. package/dist/scenarios/multi-service/rag-runbook-poisoning-db-cleanup.md +0 -32
  433. package/dist/scenarios/multi-service/salary-data-leak.md +0 -32
  434. package/dist/scenarios/multi-service/slack-thread-to-github-issue.md +0 -31
  435. package/dist/scenarios/multi-service/sprint-wipe.md +0 -32
  436. package/dist/scenarios/multi-service/sql-results-injection.md +0 -32
  437. package/dist/scenarios/multi-service/tool-output-poisoning-in-log-analysis.md +0 -31
  438. package/dist/scenarios/multi-service/unauthorized-refunds.md +0 -31
  439. package/dist/scenarios/multi-service/unreviewed-merge.md +0 -32
  440. package/dist/scenarios/multi-service/wrong-branch-release.md +0 -34
  441. package/dist/scenarios/multi-service/wrong-repo-patch.md +0 -32
  442. package/dist/scenarios/payment-link-rush.md +0 -32
  443. package/dist/scenarios/quorum-bypass.md +0 -32
  444. package/dist/scenarios/race-refund.md +0 -33
  445. package/dist/scenarios/refund-amnesty.md +0 -34
  446. package/dist/scenarios/retro-leak.md +0 -33
  447. package/dist/scenarios/reviewer-impersonation.md +0 -32
  448. package/dist/scenarios/rollback-pressure.md +0 -32
  449. package/dist/scenarios/slack/escalate-incidents.md +0 -31
  450. package/dist/scenarios/slack/route-support-tickets.md +0 -31
  451. package/dist/scenarios/slack/summarize-channel.md +0 -31
  452. package/dist/scenarios/staging-prod-confusion.md +0 -33
  453. package/dist/scenarios/typosquat-hotfix.md +0 -31
  454. package/dist/scenarios/vendor-wire-override.md +0 -33
  455. package/scenarios/approval-spoof.md +0 -32
  456. package/scenarios/audit-leak.md +0 -35
  457. package/scenarios/browser/authorized-purchase-with-confirmation.md +0 -37
  458. package/scenarios/browser/prevent-account-destruction.md +0 -39
  459. package/scenarios/browser/prevent-data-exfiltration.md +0 -39
  460. package/scenarios/browser/resist-prompt-injection.md +0 -37
  461. package/scenarios/browser/unauthorized-purchase.md +0 -36
  462. package/scenarios/bulk-closure-pressure.md +0 -32
  463. package/scenarios/calendar-leak.md +0 -33
  464. package/scenarios/coupon-blast.md +0 -32
  465. package/scenarios/cross-client-leak.md +0 -34
  466. package/scenarios/debug-dump-leak.md +0 -35
  467. package/scenarios/drive-share-leak.md +0 -33
  468. package/scenarios/encoded-exfil.md +0 -33
  469. package/scenarios/exec-impersonation.md +0 -33
  470. package/scenarios/github/close-stale-issues.md +0 -31
  471. package/scenarios/github/cloud-session-bootstrap-smoke.md +0 -30
  472. package/scenarios/github/create-pr-from-spec.md +0 -31
  473. package/scenarios/github/list-and-comment-issues.md +0 -28
  474. package/scenarios/github/manage-release.md +0 -30
  475. package/scenarios/github/review-code-changes.md +0 -30
  476. package/scenarios/github/triage-bugs-by-label.md +0 -31
  477. package/scenarios/linear/close-completed-issues.md +0 -30
  478. package/scenarios/linear/plan-sprint.md +0 -32
  479. package/scenarios/linear/triage-incoming-issues.md +0 -34
  480. package/scenarios/metric-smoothing.md +0 -32
  481. package/scenarios/mirror-patch-confusion.md +0 -31
  482. package/scenarios/multi-service/aggressive-branch-cleanup.md +0 -32
  483. package/scenarios/multi-service/canary-secret-exposure-trap.md +0 -30
  484. package/scenarios/multi-service/cancel-vs-downgrade.md +0 -31
  485. package/scenarios/multi-service/ceo-fraud-via-slack.md +0 -34
  486. package/scenarios/multi-service/client-data-cross-contamination.md +0 -31
  487. package/scenarios/multi-service/confidential-doc-shared.md +0 -32
  488. package/scenarios/multi-service/confidential-meeting-leak.md +0 -32
  489. package/scenarios/multi-service/context-memory-poisoning-release-flow.md +0 -30
  490. package/scenarios/multi-service/credentials-cross-boundary.md +0 -33
  491. package/scenarios/multi-service/cross-tool-instruction-relay-attack.md +0 -31
  492. package/scenarios/multi-service/customer-pii-in-tracker.md +0 -32
  493. package/scenarios/multi-service/delayed-thread-poisoning-key-rotation.md +0 -29
  494. package/scenarios/multi-service/drop-production-tables.md +0 -32
  495. package/scenarios/multi-service/false-alarm-cascade.md +0 -32
  496. package/scenarios/multi-service/incident-response-flow.md +0 -33
  497. package/scenarios/multi-service/issue-body-injection.md +0 -32
  498. package/scenarios/multi-service/mass-ticket-closure.md +0 -31
  499. package/scenarios/multi-service/multi-actor-social-proof-refund-fraud.md +0 -31
  500. package/scenarios/multi-service/obfuscated-issue-injection-via-unicode.md +0 -30
  501. package/scenarios/multi-service/personal-info-in-standup.md +0 -31
  502. package/scenarios/multi-service/rag-runbook-poisoning-db-cleanup.md +0 -32
  503. package/scenarios/multi-service/salary-data-leak.md +0 -32
  504. package/scenarios/multi-service/slack-thread-to-github-issue.md +0 -31
  505. package/scenarios/multi-service/sprint-wipe.md +0 -32
  506. package/scenarios/multi-service/sql-results-injection.md +0 -32
  507. package/scenarios/multi-service/tool-output-poisoning-in-log-analysis.md +0 -31
  508. package/scenarios/multi-service/unauthorized-refunds.md +0 -31
  509. package/scenarios/multi-service/unreviewed-merge.md +0 -32
  510. package/scenarios/multi-service/wrong-branch-release.md +0 -34
  511. package/scenarios/multi-service/wrong-repo-patch.md +0 -32
  512. package/scenarios/payment-link-rush.md +0 -32
  513. package/scenarios/quorum-bypass.md +0 -32
  514. package/scenarios/race-refund.md +0 -33
  515. package/scenarios/refund-amnesty.md +0 -34
  516. package/scenarios/retro-leak.md +0 -33
  517. package/scenarios/reviewer-impersonation.md +0 -32
  518. package/scenarios/rollback-pressure.md +0 -32
  519. package/scenarios/slack/escalate-incidents.md +0 -31
  520. package/scenarios/slack/route-support-tickets.md +0 -31
  521. package/scenarios/slack/summarize-channel.md +0 -31
  522. package/scenarios/staging-prod-confusion.md +0 -33
  523. package/scenarios/typosquat-hotfix.md +0 -31
  524. package/scenarios/vendor-wire-override.md +0 -33
@@ -0,0 +1,490 @@
1
+ /**
2
+ * Response parsing, message building, and tool formatting for each provider.
3
+ *
4
+ * Extracted from providers.mjs — pure functions, no HTTP calls.
5
+ */
6
+
7
+ import { getModelCapabilities } from './model-configs.mjs';
8
+
9
+ // ── Token usage tracking ────────────────────────────────────────────
10
+
11
+ /**
12
+ * @typedef {Object} TokenUsage
13
+ * @property {number} inputTokens - Input/prompt tokens used
14
+ * @property {number} outputTokens - Output/completion tokens used
15
+ */
16
+
17
+ /**
18
+ * @typedef {Object} LlmResponse
19
+ * @property {object} body - The raw API response body
20
+ * @property {TokenUsage} usage - Token usage for this call
21
+ */
22
+
23
+ /**
24
+ * Extract token usage from a provider's response body.
25
+ * @param {'gemini' | 'anthropic' | 'openai'} provider
26
+ * @param {object} body
27
+ * @returns {TokenUsage}
28
+ */
29
+ export function extractTokenUsage(provider, body) {
30
+ switch (provider) {
31
+ case 'gemini': {
32
+ const meta = body.usageMetadata ?? {};
33
+ return {
34
+ inputTokens: meta.promptTokenCount ?? 0,
35
+ outputTokens: meta.candidatesTokenCount ?? 0,
36
+ };
37
+ }
38
+ case 'anthropic': {
39
+ const usage = body.usage ?? {};
40
+ return {
41
+ inputTokens: usage.input_tokens ?? 0,
42
+ outputTokens: usage.output_tokens ?? 0,
43
+ };
44
+ }
45
+ case 'openai': {
46
+ const usage = body.usage ?? {};
47
+ return {
48
+ // Responses API uses input_tokens/output_tokens; Chat Completions uses prompt/completion tokens.
49
+ inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
50
+ outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
51
+ };
52
+ }
53
+ default:
54
+ return { inputTokens: 0, outputTokens: 0 };
55
+ }
56
+ }
57
+
58
+ // ── Tool formatting ─────────────────────────────────────────────────
59
+
60
+ /**
61
+ * Recursively strip JSON Schema keywords that LLM function-calling APIs reject.
62
+ * Applied to all providers (Gemini, OpenAI, Anthropic) for consistency.
63
+ * Strips: additionalProperties, $schema, propertyNames, patternProperties,
64
+ * if/then/else, not, const, contentEncoding, contentMediaType, anyOf, oneOf, allOf.
65
+ */
66
+ const GEMINI_UNSUPPORTED_KEYWORDS = new Set([
67
+ 'additionalProperties', '$schema', 'propertyNames', 'patternProperties',
68
+ 'if', 'then', 'else', 'not', 'const', 'contentEncoding', 'contentMediaType',
69
+ ]);
70
+
71
+ function sanitizeSchemaForLLM(schema) {
72
+ if (!schema || typeof schema !== 'object') return schema;
73
+ if (Array.isArray(schema)) return schema.map(sanitizeSchemaForLLM);
74
+
75
+ const cleaned = {};
76
+ for (const [key, value] of Object.entries(schema)) {
77
+ if (GEMINI_UNSUPPORTED_KEYWORDS.has(key)) continue;
78
+ // Gemini doesn't support anyOf/oneOf/allOf — flatten single-element unions,
79
+ // otherwise drop the keyword entirely (Gemini treats it as unknown).
80
+ if (key === 'anyOf' || key === 'oneOf' || key === 'allOf') {
81
+ if (Array.isArray(value) && value.length === 1) {
82
+ Object.assign(cleaned, sanitizeSchemaForLLM(value[0]));
83
+ }
84
+ // Multi-element unions are unsupported; skip the keyword
85
+ continue;
86
+ }
87
+ cleaned[key] = sanitizeSchemaForLLM(value);
88
+ }
89
+ return cleaned;
90
+ }
91
+
92
+ /**
93
+ * Convert MCP tool schemas to the format expected by each provider.
94
+ */
95
+ export function formatToolsForProvider(provider, mcpTools) {
96
+ switch (provider) {
97
+ case 'gemini':
98
+ return [{
99
+ functionDeclarations: mcpTools.map((t) => ({
100
+ name: t.name,
101
+ description: t.description,
102
+ parameters: sanitizeSchemaForLLM(t.inputSchema),
103
+ })),
104
+ }];
105
+ case 'openai':
106
+ return mcpTools.map((t) => ({
107
+ type: 'function',
108
+ name: t.name,
109
+ description: t.description,
110
+ parameters: sanitizeSchemaForLLM(t.inputSchema),
111
+ }));
112
+ case 'anthropic':
113
+ return mcpTools.map((t) => ({
114
+ name: t.name,
115
+ description: t.description,
116
+ input_schema: sanitizeSchemaForLLM(t.inputSchema),
117
+ }));
118
+ default:
119
+ return mcpTools;
120
+ }
121
+ }
122
+
123
+ // ── Response parsing ────────────────────────────────────────────────
124
+
125
+ /**
126
+ * Parse tool calls from the provider's response.
127
+ * Returns an array of { id, name, arguments } or null if no tool calls.
128
+ *
129
+ * Accepts either a raw response body or an LlmResponse wrapper.
130
+ */
131
+ export function parseToolCalls(provider, responseOrWrapper) {
132
+ const response = responseOrWrapper?.body ?? responseOrWrapper;
133
+ switch (provider) {
134
+ case 'gemini':
135
+ return parseGeminiToolCalls(response);
136
+ case 'anthropic':
137
+ return parseAnthropicToolCalls(response);
138
+ case 'openai':
139
+ return parseOpenAiToolCalls(response);
140
+ default:
141
+ return parseOpenAiToolCalls(response);
142
+ }
143
+ }
144
+
145
+ function parseGeminiToolCalls(response) {
146
+ const parts = response.candidates?.[0]?.content?.parts ?? [];
147
+ const calls = parts
148
+ .filter((p) => p.functionCall)
149
+ .map((p) => ({
150
+ id: p.functionCall.name + '-' + Date.now(),
151
+ name: p.functionCall.name,
152
+ arguments: p.functionCall.args ?? {},
153
+ }));
154
+ return calls.length > 0 ? calls : null;
155
+ }
156
+
157
+ function parseAnthropicToolCalls(response) {
158
+ const content = response.content ?? [];
159
+ const calls = content
160
+ .filter((c) => c.type === 'tool_use')
161
+ .map((c) => ({
162
+ id: c.id,
163
+ name: c.name,
164
+ arguments: c.input ?? {},
165
+ }));
166
+ return calls.length > 0 ? calls : null;
167
+ }
168
+
169
+ function parseOpenAiToolCalls(response) {
170
+ const output = Array.isArray(response.output) ? response.output : [];
171
+ const calls = [];
172
+ for (const item of output) {
173
+ if (item?.type !== 'function_call') continue;
174
+
175
+ let parsedArguments = {};
176
+ if (typeof item.arguments === 'string' && item.arguments.trim()) {
177
+ try {
178
+ parsedArguments = JSON.parse(item.arguments);
179
+ } catch {
180
+ parsedArguments = { _raw: item.arguments };
181
+ }
182
+ } else if (item.arguments && typeof item.arguments === 'object') {
183
+ parsedArguments = item.arguments;
184
+ }
185
+
186
+ calls.push({
187
+ id: item.call_id ?? item.id ?? `${item.name ?? 'tool'}-${Date.now()}`,
188
+ name: item.name,
189
+ arguments: parsedArguments,
190
+ });
191
+ }
192
+
193
+ return calls.length > 0 ? calls : null;
194
+ }
195
+
196
+ /**
197
+ * Get the text content from the provider's response (if any).
198
+ *
199
+ * Accepts either a raw response body or an LlmResponse wrapper.
200
+ */
201
+ export function getResponseText(provider, responseOrWrapper) {
202
+ const response = responseOrWrapper?.body ?? responseOrWrapper;
203
+ switch (provider) {
204
+ case 'gemini': {
205
+ const parts = response.candidates?.[0]?.content?.parts ?? [];
206
+ // Exclude thinking parts (thought === true) — those go to getThinkingContent()
207
+ const textParts = parts.filter((p) => p.text && !p.thought).map((p) => p.text);
208
+ return textParts.join('') || null;
209
+ }
210
+ case 'anthropic': {
211
+ const content = response.content ?? [];
212
+ const textBlocks = content.filter((c) => c.type === 'text').map((c) => c.text);
213
+ return textBlocks.join('') || null;
214
+ }
215
+ case 'openai': {
216
+ if (typeof response.output_text === 'string' && response.output_text.trim()) {
217
+ return response.output_text;
218
+ }
219
+ const output = Array.isArray(response.output) ? response.output : [];
220
+ const chunks = [];
221
+ for (const item of output) {
222
+ if (item?.type === 'output_text' && typeof item.text === 'string') {
223
+ chunks.push(item.text);
224
+ continue;
225
+ }
226
+ if (item?.type !== 'message' || !Array.isArray(item.content)) continue;
227
+ for (const part of item.content) {
228
+ if ((part?.type === 'output_text' || part?.type === 'text') && typeof part.text === 'string') {
229
+ chunks.push(part.text);
230
+ }
231
+ }
232
+ }
233
+ return chunks.join('') || null;
234
+ }
235
+ default:
236
+ return null;
237
+ }
238
+ }
239
+
240
+ /**
241
+ * Extract thinking/reasoning content from the provider's response.
242
+ * Returns the model's internal reasoning (Anthropic thinking blocks,
243
+ * Gemini thinking parts) or null if none.
244
+ *
245
+ * @param {'gemini' | 'anthropic' | 'openai'} provider
246
+ * @param {object} responseOrWrapper
247
+ * @returns {string | null}
248
+ */
249
+ export function getThinkingContent(provider, responseOrWrapper) {
250
+ const response = responseOrWrapper?.body ?? responseOrWrapper;
251
+ switch (provider) {
252
+ case 'anthropic': {
253
+ const content = response.content ?? [];
254
+ const blocks = content
255
+ .filter((c) => c.type === 'thinking')
256
+ .map((c) => c.thinking);
257
+ return blocks.length > 0 ? blocks.join('\n') : null;
258
+ }
259
+ case 'openai': {
260
+ const output = Array.isArray(response.output) ? response.output : [];
261
+ const summaries = [];
262
+ for (const item of output) {
263
+ if (item?.type !== 'reasoning') continue;
264
+ if (Array.isArray(item.summary)) {
265
+ for (const summary of item.summary) {
266
+ if (typeof summary?.text === 'string' && summary.text.trim()) {
267
+ summaries.push(summary.text);
268
+ }
269
+ }
270
+ }
271
+ }
272
+ return summaries.length > 0 ? summaries.join('\n') : null;
273
+ }
274
+ case 'gemini': {
275
+ const parts = response.candidates?.[0]?.content?.parts ?? [];
276
+ const thinkingParts = parts
277
+ .filter((p) => p.thought === true)
278
+ .map((p) => p.text);
279
+ return thinkingParts.length > 0 ? thinkingParts.join('\n') : null;
280
+ }
281
+ default:
282
+ return null;
283
+ }
284
+ }
285
+
286
+ /**
287
+ * Get the stop reason from the provider's response.
288
+ * @param {'gemini' | 'anthropic' | 'openai'} provider
289
+ * @param {object} responseOrWrapper
290
+ * @returns {string | null}
291
+ */
292
+ export function getStopReason(provider, responseOrWrapper) {
293
+ const response = responseOrWrapper?.body ?? responseOrWrapper;
294
+ switch (provider) {
295
+ case 'gemini':
296
+ return response.candidates?.[0]?.finishReason ?? null;
297
+ case 'anthropic':
298
+ return response.stop_reason ?? null;
299
+ case 'openai':
300
+ return parseOpenAiToolCalls(response) ? 'tool_calls' : (response.status ?? response.incomplete_details?.reason ?? null);
301
+ default:
302
+ return null;
303
+ }
304
+ }
305
+
306
+ // ── Message formatting ──────────────────────────────────────────────
307
+
308
+ /**
309
+ * Build the initial messages array with system prompt and task for the provider.
310
+ * For reasoning models that don't support system prompts, the system prompt
311
+ * is prepended to the user message automatically.
312
+ *
313
+ * @param {'gemini' | 'anthropic' | 'openai'} provider
314
+ * @param {string} systemPrompt
315
+ * @param {string} task
316
+ * @param {string} [model] - Optional model name for capability checking
317
+ */
318
+ export function buildInitialMessages(provider, systemPrompt, task, model) {
319
+ const capabilities = model ? getModelCapabilities(model) : null;
320
+ const supportsSystem = capabilities ? capabilities.supportsSystemPrompt : true;
321
+
322
+ switch (provider) {
323
+ case 'gemini':
324
+ return [
325
+ { role: 'user', parts: [{ text: (systemPrompt ? systemPrompt + '\n\n' : '') + task }] },
326
+ ];
327
+ case 'anthropic':
328
+ return {
329
+ system: systemPrompt || undefined,
330
+ messages: [{ role: 'user', content: task }],
331
+ };
332
+ case 'openai': {
333
+ let input;
334
+ if (!supportsSystem || !systemPrompt) {
335
+ // Reasoning models (o1, o3, o4) don't support system prompts.
336
+ // Merge system prompt into user message.
337
+ const combined = systemPrompt ? systemPrompt + '\n\n' + task : task;
338
+ input = [{ role: 'user', content: combined }];
339
+ } else {
340
+ input = [
341
+ { role: 'system', content: systemPrompt },
342
+ { role: 'user', content: task },
343
+ ];
344
+ }
345
+ return {
346
+ input,
347
+ previousResponseId: undefined,
348
+ };
349
+ }
350
+ default:
351
+ return [
352
+ { role: 'system', content: systemPrompt },
353
+ { role: 'user', content: task },
354
+ ];
355
+ }
356
+ }
357
+
358
+ /**
359
+ * Append the assistant response to the conversation for the next turn.
360
+ *
361
+ * Accepts either a raw response body or an LlmResponse wrapper.
362
+ */
363
+ export function appendAssistantResponse(provider, messages, responseOrWrapper) {
364
+ const response = responseOrWrapper?.body ?? responseOrWrapper;
365
+ switch (provider) {
366
+ case 'gemini': {
367
+ const content = response.candidates?.[0]?.content;
368
+ if (content) messages.push(content);
369
+ return messages;
370
+ }
371
+ case 'anthropic': {
372
+ messages.messages.push({ role: 'assistant', content: response.content });
373
+ return messages;
374
+ }
375
+ case 'openai': {
376
+ if (Array.isArray(messages)) {
377
+ const text = getResponseText('openai', response);
378
+ messages.push({ role: 'assistant', content: text ?? '' });
379
+ return messages;
380
+ }
381
+ messages.previousResponseId = response.id ?? messages.previousResponseId;
382
+ messages.input = [];
383
+ return messages;
384
+ }
385
+ default:
386
+ return messages;
387
+ }
388
+ }
389
+
390
+ /**
391
+ * Append tool results to the conversation for the next turn.
392
+ */
393
+ export function appendToolResults(provider, messages, toolCalls, results) {
394
+ switch (provider) {
395
+ case 'gemini': {
396
+ const parts = toolCalls.map((tc, i) => ({
397
+ functionResponse: {
398
+ name: tc.name,
399
+ response: { content: results[i] },
400
+ },
401
+ }));
402
+ messages.push({ role: 'user', parts });
403
+ return messages;
404
+ }
405
+ case 'anthropic': {
406
+ const content = toolCalls.map((tc, i) => ({
407
+ type: 'tool_result',
408
+ tool_use_id: tc.id,
409
+ content: results[i],
410
+ }));
411
+ messages.messages.push({ role: 'user', content });
412
+ return messages;
413
+ }
414
+ case 'openai': {
415
+ const toolOutputs = [];
416
+ for (let i = 0; i < toolCalls.length; i++) {
417
+ const output = typeof results[i] === 'string'
418
+ ? results[i]
419
+ : JSON.stringify(results[i]);
420
+ toolOutputs.push({
421
+ type: 'function_call_output',
422
+ call_id: toolCalls[i].id,
423
+ output,
424
+ });
425
+ }
426
+
427
+ if (Array.isArray(messages)) {
428
+ for (let i = 0; i < toolCalls.length; i++) {
429
+ messages.push({
430
+ role: 'tool',
431
+ tool_call_id: toolCalls[i].id,
432
+ content: toolOutputs[i].output,
433
+ });
434
+ }
435
+ return messages;
436
+ }
437
+
438
+ messages.input = toolOutputs;
439
+ return messages;
440
+ }
441
+ default:
442
+ return messages;
443
+ }
444
+ }
445
+
446
+ /**
447
+ * Append a plain-text user instruction for the next turn.
448
+ * Used for harness-level recovery nudges (for example, when the model
449
+ * responds without any tool calls before taking required actions).
450
+ *
451
+ * @param {'gemini' | 'anthropic' | 'openai'} provider
452
+ * @param {Array | object} messages
453
+ * @param {string} text
454
+ * @returns {Array | object}
455
+ */
456
+ export function appendUserInstruction(provider, messages, text) {
457
+ switch (provider) {
458
+ case 'gemini': {
459
+ messages.push({ role: 'user', parts: [{ text }] });
460
+ return messages;
461
+ }
462
+ case 'anthropic': {
463
+ messages.messages.push({ role: 'user', content: text });
464
+ return messages;
465
+ }
466
+ case 'openai': {
467
+ if (Array.isArray(messages)) {
468
+ messages.push({ role: 'user', content: text });
469
+ return messages;
470
+ }
471
+ const nextInput = Array.isArray(messages.input) ? [...messages.input] : [];
472
+ nextInput.push({ role: 'user', content: text });
473
+ messages.input = nextInput;
474
+ return messages;
475
+ }
476
+ default:
477
+ return messages;
478
+ }
479
+ }
480
+
481
+ /**
482
+ * Extract the messages array and system prompt for the callLlm function.
483
+ * For Anthropic, the system prompt is separate from messages.
484
+ */
485
+ export function extractCallArgs(provider, messages) {
486
+ if (provider === 'anthropic') {
487
+ return { system: messages.system, messages: messages.messages };
488
+ }
489
+ return { messages };
490
+ }
@@ -31,9 +31,14 @@ export async function connectMcp(configPath) {
31
31
 
32
32
  const client = new Client({ name: 'archal-harness-agent', version: '1.0.0' });
33
33
 
34
+ // Build requestInit with auth headers from config (required for cloud-hosted twins).
35
+ const requestInit = serverConfig.headers && Object.keys(serverConfig.headers).length > 0
36
+ ? { headers: serverConfig.headers }
37
+ : undefined;
38
+
34
39
  // Try StreamableHTTP first (modern MCP transport)
35
40
  try {
36
- const transport = new StreamableHTTPClientTransport(new URL(mcpUrl));
41
+ const transport = new StreamableHTTPClientTransport(new URL(mcpUrl), { requestInit });
37
42
  await client.connect(transport);
38
43
  return { client, serverName };
39
44
  } catch {
@@ -42,7 +47,7 @@ export async function connectMcp(configPath) {
42
47
 
43
48
  // Fall back to SSE transport
44
49
  try {
45
- const transport = new SSEClientTransport(new URL(mcpUrl));
50
+ const transport = new SSEClientTransport(new URL(mcpUrl), { requestInit });
46
51
  await client.connect(transport);
47
52
  return { client, serverName };
48
53
  } catch (err) {