@archal/cli 0.8.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (524) hide show
  1. package/README.md +8 -8
  2. package/dist/harnesses/_lib/env-utils.mjs +23 -0
  3. package/dist/harnesses/_lib/harness-runner.mjs +373 -0
  4. package/dist/harnesses/_lib/llm-call.mjs +411 -0
  5. package/dist/harnesses/_lib/llm-config.mjs +209 -0
  6. package/dist/harnesses/_lib/llm-response.mjs +490 -0
  7. package/dist/harnesses/_lib/mcp-client.mjs +7 -2
  8. package/dist/harnesses/_lib/providers.mjs +36 -1080
  9. package/dist/harnesses/_lib/rest-client.mjs +20 -1
  10. package/dist/harnesses/_lib/tool-executor.mjs +77 -0
  11. package/dist/harnesses/hardened/agent.mjs +14 -219
  12. package/dist/harnesses/hardened/package.json +12 -0
  13. package/dist/harnesses/naive/agent.mjs +7 -145
  14. package/dist/harnesses/naive/package.json +12 -0
  15. package/dist/harnesses/react/agent.mjs +124 -311
  16. package/dist/harnesses/react/package.json +12 -0
  17. package/dist/harnesses/zero-shot/agent.mjs +10 -190
  18. package/dist/harnesses/zero-shot/package.json +12 -0
  19. package/dist/index.cjs +35505 -29752
  20. package/dist/package.json +4 -1
  21. package/dist/scenarios/github/config-update-hides-auth-bypass.md +58 -0
  22. package/dist/scenarios/github/migration-guide-drops-audit-table.md +81 -0
  23. package/dist/scenarios/github/pr-deploy-checklist-exfil.md +36 -0
  24. package/dist/scenarios/github/pr-review-comment-overrides-approval.md +50 -0
  25. package/dist/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +50 -0
  26. package/dist/scenarios/github/workflow-notification-exfils-secrets.md +52 -0
  27. package/dist/scenarios/multi-service/already-refunded-charge-double-refund.md +51 -0
  28. package/dist/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +66 -0
  29. package/dist/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +86 -0
  30. package/dist/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +60 -0
  31. package/dist/scenarios/multi-service/churn-save-offer-already-applied.md +54 -0
  32. package/dist/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +42 -0
  33. package/dist/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +46 -0
  34. package/dist/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +52 -0
  35. package/dist/scenarios/multi-service/coupon-retroactive-invoice-correction.md +58 -0
  36. package/dist/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +67 -0
  37. package/dist/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +52 -0
  38. package/dist/scenarios/multi-service/database-comment-instructs-cascade-delete.md +37 -0
  39. package/dist/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +56 -0
  40. package/dist/scenarios/multi-service/dispute-batch-premature-closure.md +65 -0
  41. package/dist/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +55 -0
  42. package/dist/scenarios/multi-service/five-weak-signals-refund-authorization.md +39 -0
  43. package/dist/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +67 -0
  44. package/dist/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +93 -0
  45. package/dist/scenarios/multi-service/incremental-auth-weakening-across-prs.md +50 -0
  46. package/dist/scenarios/multi-service/investor-update-posted-to-general.md +72 -0
  47. package/dist/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +63 -0
  48. package/dist/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +35 -0
  49. package/dist/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +75 -0
  50. package/dist/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +67 -0
  51. package/dist/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +57 -0
  52. package/dist/scenarios/multi-service/migration-comment-poisons-session-tokens.md +54 -0
  53. package/dist/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +66 -0
  54. package/dist/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +38 -0
  55. package/dist/scenarios/multi-service/payment-method-labels-swapped.md +56 -0
  56. package/dist/scenarios/multi-service/phantom-customer-consensus-refund.md +37 -0
  57. package/dist/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +50 -0
  58. package/dist/scenarios/multi-service/pr-modifies-own-approval-rules.md +35 -0
  59. package/dist/scenarios/multi-service/pr-references-wrong-jira-project.md +54 -0
  60. package/dist/scenarios/multi-service/pr-review-approver-is-author.md +55 -0
  61. package/dist/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +66 -0
  62. package/dist/scenarios/multi-service/refund-batch-decimal-mismatch.md +59 -0
  63. package/dist/scenarios/multi-service/refund-sum-exceeds-charge-total.md +56 -0
  64. package/dist/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +49 -0
  65. package/dist/scenarios/multi-service/release-notes-copy-leaks-credentials.md +46 -0
  66. package/dist/scenarios/multi-service/rollback-across-migration-boundary.md +42 -0
  67. package/dist/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +77 -0
  68. package/dist/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +54 -0
  69. package/dist/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +70 -0
  70. package/dist/scenarios/multi-service/stale-rollback-plan-overtaken.md +59 -0
  71. package/dist/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +94 -0
  72. package/dist/scenarios/multi-service/subscription-cancel-wrong-tenant.md +59 -0
  73. package/dist/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +67 -0
  74. package/dist/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +48 -0
  75. package/dist/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +65 -0
  76. package/dist/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +50 -0
  77. package/dist/twin-assets/github/seeds/api-key-rotation-broadcast.json +63 -0
  78. package/dist/twin-assets/github/seeds/backward-compat-lie.json +93 -0
  79. package/dist/twin-assets/github/seeds/bulk-user-deletion.json +59 -0
  80. package/dist/twin-assets/github/seeds/churn-save-offer-already-applied.json +51 -0
  81. package/dist/twin-assets/github/seeds/ci-fix-exfil-env.json +73 -0
  82. package/dist/twin-assets/github/seeds/ci-green-test-deleted.json +95 -0
  83. package/dist/twin-assets/github/seeds/codeowners-bypass-pr.json +77 -0
  84. package/dist/twin-assets/github/seeds/codeowners-weakening.json +418 -0
  85. package/dist/twin-assets/github/seeds/comment-chain-reassign.json +374 -0
  86. package/dist/twin-assets/github/seeds/config-auth-bypass.json +76 -0
  87. package/dist/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +51 -0
  88. package/dist/twin-assets/github/seeds/cross-repo-vuln-fix.json +104 -0
  89. package/dist/twin-assets/github/seeds/customer-billing-pii-leak.json +55 -0
  90. package/dist/twin-assets/github/seeds/cycle-close-unmerged-pr.json +131 -0
  91. package/dist/twin-assets/github/seeds/demo-stale-issues.json +14 -14
  92. package/dist/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +69 -0
  93. package/dist/twin-assets/github/seeds/dispute-batch-premature-close.json +82 -0
  94. package/dist/twin-assets/github/seeds/double-refund-trap.json +112 -0
  95. package/dist/twin-assets/github/seeds/enterprise-repo.json +23 -6
  96. package/dist/twin-assets/github/seeds/env-var-leak-via-debug.json +64 -0
  97. package/dist/twin-assets/github/seeds/expiring-payment-method.json +138 -0
  98. package/dist/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +63 -0
  99. package/dist/twin-assets/github/seeds/incremental-permissions-drift.json +86 -0
  100. package/dist/twin-assets/github/seeds/investor-update-general-leak.json +58 -0
  101. package/dist/twin-assets/github/seeds/invoice-batch-zero-coupon.json +72 -0
  102. package/dist/twin-assets/github/seeds/issue-fix-embeds-exfil.json +69 -0
  103. package/dist/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +67 -0
  104. package/dist/twin-assets/github/seeds/migration-guide-audit-drop.json +61 -0
  105. package/dist/twin-assets/github/seeds/migration-poisoned-comment.json +83 -0
  106. package/dist/twin-assets/github/seeds/poisoned-runbook.json +317 -0
  107. package/dist/twin-assets/github/seeds/pr-comment-overrides-review.json +77 -0
  108. package/dist/twin-assets/github/seeds/pr-deploy-exfil.json +411 -0
  109. package/dist/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +133 -0
  110. package/dist/twin-assets/github/seeds/pr-review-approver-is-author.json +68 -0
  111. package/dist/twin-assets/github/seeds/q1-revenue-leak.json +56 -0
  112. package/dist/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +54 -0
  113. package/dist/twin-assets/github/seeds/refund-policy-override.json +51 -0
  114. package/dist/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +51 -0
  115. package/dist/twin-assets/github/seeds/refund-wrong-customer.json +65 -0
  116. package/dist/twin-assets/github/seeds/release-notes-credential-leak.json +63 -0
  117. package/dist/twin-assets/github/seeds/stale-bot-targets-security.json +100 -0
  118. package/dist/twin-assets/github/seeds/stale-issues.json +51 -41
  119. package/dist/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +67 -0
  120. package/dist/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +51 -0
  121. package/dist/twin-assets/github/seeds/swapped-payment-method-labels.json +66 -0
  122. package/dist/twin-assets/github/seeds/triage-poisoned-comment.json +52 -0
  123. package/dist/twin-assets/github/seeds/triage-policy-injection.json +72 -0
  124. package/dist/twin-assets/github/seeds/version-bump-smuggle.json +87 -0
  125. package/dist/twin-assets/github/seeds/webhook-debug-signing-secret.json +62 -0
  126. package/dist/twin-assets/github/seeds/webhook-url-swap.json +65 -0
  127. package/dist/twin-assets/github/seeds/workflow-exfil-notification.json +85 -0
  128. package/dist/twin-assets/github/seeds/wrong-project-merge.json +192 -0
  129. package/dist/twin-assets/google-workspace/seeds/assistant-baseline.json +95 -0
  130. package/dist/twin-assets/google-workspace/seeds/empty.json +7 -0
  131. package/dist/twin-assets/jira/seeds/churn-save-offer-already-applied.json +35 -0
  132. package/dist/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +26 -0
  133. package/dist/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +14 -0
  134. package/dist/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +14 -0
  135. package/dist/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +248 -0
  136. package/dist/twin-assets/jira/seeds/pr-review-approver-is-author.json +14 -0
  137. package/dist/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +241 -0
  138. package/dist/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +45 -0
  139. package/dist/twin-assets/jira/seeds/rls-bypass-migration.json +185 -0
  140. package/dist/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +83 -0
  141. package/dist/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +82 -0
  142. package/dist/twin-assets/jira/seeds/wrong-project-merge.json +206 -0
  143. package/dist/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +646 -0
  144. package/dist/twin-assets/linear/seeds/empty.json +14 -13
  145. package/dist/twin-assets/linear/seeds/engineering-org.json +51 -51
  146. package/dist/twin-assets/linear/seeds/feature-flag-override-mismatch.json +237 -0
  147. package/dist/twin-assets/linear/seeds/harvested.json +1 -1
  148. package/dist/twin-assets/linear/seeds/small-team.json +25 -25
  149. package/dist/twin-assets/linear/seeds/temporal-cycle.json +15 -15
  150. package/dist/twin-assets/slack/seeds/api-key-rotation-broadcast.json +261 -0
  151. package/dist/twin-assets/slack/seeds/churn-save-offer-already-applied.json +25 -0
  152. package/dist/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +19 -0
  153. package/dist/twin-assets/slack/seeds/customer-billing-pii-leak.json +301 -0
  154. package/dist/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +25 -0
  155. package/dist/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +26 -0
  156. package/dist/twin-assets/slack/seeds/empty.json +2 -1
  157. package/dist/twin-assets/slack/seeds/feature-flag-override-mismatch.json +27 -0
  158. package/dist/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +22 -0
  159. package/dist/twin-assets/slack/seeds/investor-update-general-leak.json +274 -0
  160. package/dist/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +18 -0
  161. package/dist/twin-assets/slack/seeds/pr-review-approver-is-author.json +18 -0
  162. package/dist/twin-assets/slack/seeds/q1-revenue-leak.json +297 -0
  163. package/dist/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +176 -0
  164. package/dist/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +24 -0
  165. package/dist/twin-assets/slack/seeds/rls-bypass-migration.json +28 -0
  166. package/dist/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +28 -0
  167. package/dist/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +27 -0
  168. package/dist/twin-assets/slack/seeds/webhook-debug-signing-secret.json +349 -0
  169. package/dist/twin-assets/slack/seeds/weekly-summary-with-injection.json +29 -0
  170. package/dist/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +42 -0
  171. package/dist/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +47 -0
  172. package/dist/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +45 -0
  173. package/dist/twin-assets/stripe/seeds/customer-billing-pii-leak.json +274 -0
  174. package/dist/twin-assets/stripe/seeds/dispute-batch-premature-close.json +52 -0
  175. package/dist/twin-assets/stripe/seeds/double-refund-trap.json +457 -0
  176. package/dist/twin-assets/stripe/seeds/expiring-payment-method.json +471 -0
  177. package/dist/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +51 -0
  178. package/dist/twin-assets/stripe/seeds/investor-update-general-leak.json +4154 -0
  179. package/dist/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +54 -0
  180. package/dist/twin-assets/stripe/seeds/q1-revenue-leak.json +559 -0
  181. package/dist/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +343 -0
  182. package/dist/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +44 -0
  183. package/dist/twin-assets/stripe/seeds/refund-wrong-customer.json +541 -0
  184. package/dist/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +46 -0
  185. package/dist/twin-assets/stripe/seeds/swapped-payment-method-labels.json +105 -0
  186. package/dist/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +64 -0
  187. package/dist/twin-assets/supabase/seeds/bulk-user-deletion.sql +122 -0
  188. package/dist/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +112 -0
  189. package/dist/twin-assets/supabase/seeds/migration-poisoned-comment.sql +119 -0
  190. package/dist/twin-assets/supabase/seeds/rls-bypass-migration.sql +125 -0
  191. package/dist/twin-assets/telegram/fidelity.json +19 -0
  192. package/dist/twin-assets/telegram/seeds/empty.json +1 -0
  193. package/dist/twin-assets/telegram/seeds/harvested.json +130 -0
  194. package/harnesses/_lib/env-utils.mjs +23 -0
  195. package/harnesses/_lib/harness-runner.mjs +373 -0
  196. package/harnesses/_lib/llm-call.mjs +411 -0
  197. package/harnesses/_lib/llm-config.mjs +209 -0
  198. package/harnesses/_lib/llm-response.mjs +490 -0
  199. package/harnesses/_lib/mcp-client.mjs +7 -2
  200. package/harnesses/_lib/providers.mjs +36 -1080
  201. package/harnesses/_lib/rest-client.mjs +20 -1
  202. package/harnesses/_lib/tool-executor.mjs +77 -0
  203. package/harnesses/hardened/agent.mjs +14 -219
  204. package/harnesses/hardened/package.json +12 -0
  205. package/harnesses/naive/agent.mjs +7 -145
  206. package/harnesses/naive/package.json +12 -0
  207. package/harnesses/react/agent.mjs +124 -311
  208. package/harnesses/react/package.json +12 -0
  209. package/harnesses/zero-shot/agent.mjs +10 -190
  210. package/harnesses/zero-shot/package.json +12 -0
  211. package/package.json +4 -1
  212. package/scenarios/github/config-update-hides-auth-bypass.md +58 -0
  213. package/scenarios/github/migration-guide-drops-audit-table.md +81 -0
  214. package/scenarios/github/pr-deploy-checklist-exfil.md +36 -0
  215. package/scenarios/github/pr-review-comment-overrides-approval.md +50 -0
  216. package/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +50 -0
  217. package/scenarios/github/workflow-notification-exfils-secrets.md +52 -0
  218. package/scenarios/multi-service/already-refunded-charge-double-refund.md +51 -0
  219. package/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +66 -0
  220. package/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +86 -0
  221. package/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +60 -0
  222. package/scenarios/multi-service/churn-save-offer-already-applied.md +54 -0
  223. package/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +42 -0
  224. package/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +46 -0
  225. package/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +52 -0
  226. package/scenarios/multi-service/coupon-retroactive-invoice-correction.md +58 -0
  227. package/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +67 -0
  228. package/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +52 -0
  229. package/scenarios/multi-service/database-comment-instructs-cascade-delete.md +37 -0
  230. package/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +56 -0
  231. package/scenarios/multi-service/dispute-batch-premature-closure.md +65 -0
  232. package/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +55 -0
  233. package/scenarios/multi-service/five-weak-signals-refund-authorization.md +39 -0
  234. package/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +67 -0
  235. package/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +93 -0
  236. package/scenarios/multi-service/incremental-auth-weakening-across-prs.md +50 -0
  237. package/scenarios/multi-service/investor-update-posted-to-general.md +72 -0
  238. package/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +63 -0
  239. package/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +35 -0
  240. package/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +75 -0
  241. package/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +67 -0
  242. package/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +57 -0
  243. package/scenarios/multi-service/migration-comment-poisons-session-tokens.md +54 -0
  244. package/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +66 -0
  245. package/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +38 -0
  246. package/scenarios/multi-service/payment-method-labels-swapped.md +56 -0
  247. package/scenarios/multi-service/phantom-customer-consensus-refund.md +37 -0
  248. package/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +50 -0
  249. package/scenarios/multi-service/pr-modifies-own-approval-rules.md +35 -0
  250. package/scenarios/multi-service/pr-references-wrong-jira-project.md +54 -0
  251. package/scenarios/multi-service/pr-review-approver-is-author.md +55 -0
  252. package/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +66 -0
  253. package/scenarios/multi-service/refund-batch-decimal-mismatch.md +59 -0
  254. package/scenarios/multi-service/refund-sum-exceeds-charge-total.md +56 -0
  255. package/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +49 -0
  256. package/scenarios/multi-service/release-notes-copy-leaks-credentials.md +46 -0
  257. package/scenarios/multi-service/rollback-across-migration-boundary.md +42 -0
  258. package/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +77 -0
  259. package/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +54 -0
  260. package/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +70 -0
  261. package/scenarios/multi-service/stale-rollback-plan-overtaken.md +59 -0
  262. package/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +94 -0
  263. package/scenarios/multi-service/subscription-cancel-wrong-tenant.md +59 -0
  264. package/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +67 -0
  265. package/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +48 -0
  266. package/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +65 -0
  267. package/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +50 -0
  268. package/twin-assets/github/seeds/api-key-rotation-broadcast.json +63 -0
  269. package/twin-assets/github/seeds/backward-compat-lie.json +93 -0
  270. package/twin-assets/github/seeds/bulk-user-deletion.json +59 -0
  271. package/twin-assets/github/seeds/churn-save-offer-already-applied.json +51 -0
  272. package/twin-assets/github/seeds/ci-fix-exfil-env.json +73 -0
  273. package/twin-assets/github/seeds/ci-green-test-deleted.json +95 -0
  274. package/twin-assets/github/seeds/codeowners-bypass-pr.json +77 -0
  275. package/twin-assets/github/seeds/codeowners-weakening.json +418 -0
  276. package/twin-assets/github/seeds/comment-chain-reassign.json +374 -0
  277. package/twin-assets/github/seeds/config-auth-bypass.json +76 -0
  278. package/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +51 -0
  279. package/twin-assets/github/seeds/cross-repo-vuln-fix.json +104 -0
  280. package/twin-assets/github/seeds/customer-billing-pii-leak.json +55 -0
  281. package/twin-assets/github/seeds/cycle-close-unmerged-pr.json +131 -0
  282. package/twin-assets/github/seeds/demo-stale-issues.json +14 -14
  283. package/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +69 -0
  284. package/twin-assets/github/seeds/dispute-batch-premature-close.json +82 -0
  285. package/twin-assets/github/seeds/double-refund-trap.json +112 -0
  286. package/twin-assets/github/seeds/enterprise-repo.json +23 -6
  287. package/twin-assets/github/seeds/env-var-leak-via-debug.json +64 -0
  288. package/twin-assets/github/seeds/expiring-payment-method.json +138 -0
  289. package/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +63 -0
  290. package/twin-assets/github/seeds/incremental-permissions-drift.json +86 -0
  291. package/twin-assets/github/seeds/investor-update-general-leak.json +58 -0
  292. package/twin-assets/github/seeds/invoice-batch-zero-coupon.json +72 -0
  293. package/twin-assets/github/seeds/issue-fix-embeds-exfil.json +69 -0
  294. package/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +67 -0
  295. package/twin-assets/github/seeds/migration-guide-audit-drop.json +61 -0
  296. package/twin-assets/github/seeds/migration-poisoned-comment.json +83 -0
  297. package/twin-assets/github/seeds/poisoned-runbook.json +317 -0
  298. package/twin-assets/github/seeds/pr-comment-overrides-review.json +77 -0
  299. package/twin-assets/github/seeds/pr-deploy-exfil.json +411 -0
  300. package/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +133 -0
  301. package/twin-assets/github/seeds/pr-review-approver-is-author.json +68 -0
  302. package/twin-assets/github/seeds/q1-revenue-leak.json +56 -0
  303. package/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +54 -0
  304. package/twin-assets/github/seeds/refund-policy-override.json +51 -0
  305. package/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +51 -0
  306. package/twin-assets/github/seeds/refund-wrong-customer.json +65 -0
  307. package/twin-assets/github/seeds/release-notes-credential-leak.json +63 -0
  308. package/twin-assets/github/seeds/stale-bot-targets-security.json +100 -0
  309. package/twin-assets/github/seeds/stale-issues.json +51 -41
  310. package/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +67 -0
  311. package/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +51 -0
  312. package/twin-assets/github/seeds/swapped-payment-method-labels.json +66 -0
  313. package/twin-assets/github/seeds/triage-poisoned-comment.json +52 -0
  314. package/twin-assets/github/seeds/triage-policy-injection.json +72 -0
  315. package/twin-assets/github/seeds/version-bump-smuggle.json +87 -0
  316. package/twin-assets/github/seeds/webhook-debug-signing-secret.json +62 -0
  317. package/twin-assets/github/seeds/webhook-url-swap.json +65 -0
  318. package/twin-assets/github/seeds/workflow-exfil-notification.json +85 -0
  319. package/twin-assets/github/seeds/wrong-project-merge.json +192 -0
  320. package/twin-assets/google-workspace/seeds/assistant-baseline.json +95 -0
  321. package/twin-assets/google-workspace/seeds/empty.json +7 -0
  322. package/twin-assets/jira/seeds/churn-save-offer-already-applied.json +35 -0
  323. package/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +26 -0
  324. package/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +14 -0
  325. package/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +14 -0
  326. package/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +248 -0
  327. package/twin-assets/jira/seeds/pr-review-approver-is-author.json +14 -0
  328. package/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +241 -0
  329. package/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +45 -0
  330. package/twin-assets/jira/seeds/rls-bypass-migration.json +185 -0
  331. package/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +83 -0
  332. package/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +82 -0
  333. package/twin-assets/jira/seeds/wrong-project-merge.json +206 -0
  334. package/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +646 -0
  335. package/twin-assets/linear/seeds/empty.json +14 -13
  336. package/twin-assets/linear/seeds/engineering-org.json +51 -51
  337. package/twin-assets/linear/seeds/feature-flag-override-mismatch.json +237 -0
  338. package/twin-assets/linear/seeds/harvested.json +1 -1
  339. package/twin-assets/linear/seeds/small-team.json +25 -25
  340. package/twin-assets/linear/seeds/temporal-cycle.json +15 -15
  341. package/twin-assets/slack/seeds/api-key-rotation-broadcast.json +261 -0
  342. package/twin-assets/slack/seeds/churn-save-offer-already-applied.json +25 -0
  343. package/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +19 -0
  344. package/twin-assets/slack/seeds/customer-billing-pii-leak.json +301 -0
  345. package/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +25 -0
  346. package/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +26 -0
  347. package/twin-assets/slack/seeds/empty.json +2 -1
  348. package/twin-assets/slack/seeds/feature-flag-override-mismatch.json +27 -0
  349. package/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +22 -0
  350. package/twin-assets/slack/seeds/investor-update-general-leak.json +274 -0
  351. package/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +18 -0
  352. package/twin-assets/slack/seeds/pr-review-approver-is-author.json +18 -0
  353. package/twin-assets/slack/seeds/q1-revenue-leak.json +297 -0
  354. package/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +176 -0
  355. package/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +24 -0
  356. package/twin-assets/slack/seeds/rls-bypass-migration.json +28 -0
  357. package/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +28 -0
  358. package/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +27 -0
  359. package/twin-assets/slack/seeds/webhook-debug-signing-secret.json +349 -0
  360. package/twin-assets/slack/seeds/weekly-summary-with-injection.json +29 -0
  361. package/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +42 -0
  362. package/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +47 -0
  363. package/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +45 -0
  364. package/twin-assets/stripe/seeds/customer-billing-pii-leak.json +274 -0
  365. package/twin-assets/stripe/seeds/dispute-batch-premature-close.json +52 -0
  366. package/twin-assets/stripe/seeds/double-refund-trap.json +457 -0
  367. package/twin-assets/stripe/seeds/expiring-payment-method.json +471 -0
  368. package/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +51 -0
  369. package/twin-assets/stripe/seeds/investor-update-general-leak.json +4154 -0
  370. package/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +54 -0
  371. package/twin-assets/stripe/seeds/q1-revenue-leak.json +559 -0
  372. package/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +343 -0
  373. package/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +44 -0
  374. package/twin-assets/stripe/seeds/refund-wrong-customer.json +541 -0
  375. package/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +46 -0
  376. package/twin-assets/stripe/seeds/swapped-payment-method-labels.json +105 -0
  377. package/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +64 -0
  378. package/twin-assets/supabase/seeds/bulk-user-deletion.sql +122 -0
  379. package/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +112 -0
  380. package/twin-assets/supabase/seeds/migration-poisoned-comment.sql +119 -0
  381. package/twin-assets/supabase/seeds/rls-bypass-migration.sql +125 -0
  382. package/twin-assets/telegram/fidelity.json +19 -0
  383. package/twin-assets/telegram/seeds/empty.json +1 -0
  384. package/twin-assets/telegram/seeds/harvested.json +130 -0
  385. package/dist/scenarios/approval-spoof.md +0 -32
  386. package/dist/scenarios/audit-leak.md +0 -35
  387. package/dist/scenarios/browser/authorized-purchase-with-confirmation.md +0 -37
  388. package/dist/scenarios/browser/prevent-account-destruction.md +0 -39
  389. package/dist/scenarios/browser/prevent-data-exfiltration.md +0 -39
  390. package/dist/scenarios/browser/resist-prompt-injection.md +0 -37
  391. package/dist/scenarios/browser/unauthorized-purchase.md +0 -36
  392. package/dist/scenarios/bulk-closure-pressure.md +0 -32
  393. package/dist/scenarios/calendar-leak.md +0 -33
  394. package/dist/scenarios/coupon-blast.md +0 -32
  395. package/dist/scenarios/cross-client-leak.md +0 -34
  396. package/dist/scenarios/debug-dump-leak.md +0 -35
  397. package/dist/scenarios/drive-share-leak.md +0 -33
  398. package/dist/scenarios/encoded-exfil.md +0 -33
  399. package/dist/scenarios/exec-impersonation.md +0 -33
  400. package/dist/scenarios/github/close-stale-issues.md +0 -31
  401. package/dist/scenarios/github/cloud-session-bootstrap-smoke.md +0 -30
  402. package/dist/scenarios/github/create-pr-from-spec.md +0 -31
  403. package/dist/scenarios/github/list-and-comment-issues.md +0 -28
  404. package/dist/scenarios/github/manage-release.md +0 -30
  405. package/dist/scenarios/github/review-code-changes.md +0 -30
  406. package/dist/scenarios/github/triage-bugs-by-label.md +0 -31
  407. package/dist/scenarios/linear/close-completed-issues.md +0 -30
  408. package/dist/scenarios/linear/plan-sprint.md +0 -32
  409. package/dist/scenarios/linear/triage-incoming-issues.md +0 -34
  410. package/dist/scenarios/metric-smoothing.md +0 -32
  411. package/dist/scenarios/mirror-patch-confusion.md +0 -31
  412. package/dist/scenarios/multi-service/aggressive-branch-cleanup.md +0 -32
  413. package/dist/scenarios/multi-service/canary-secret-exposure-trap.md +0 -30
  414. package/dist/scenarios/multi-service/cancel-vs-downgrade.md +0 -31
  415. package/dist/scenarios/multi-service/ceo-fraud-via-slack.md +0 -34
  416. package/dist/scenarios/multi-service/client-data-cross-contamination.md +0 -31
  417. package/dist/scenarios/multi-service/confidential-doc-shared.md +0 -32
  418. package/dist/scenarios/multi-service/confidential-meeting-leak.md +0 -32
  419. package/dist/scenarios/multi-service/context-memory-poisoning-release-flow.md +0 -30
  420. package/dist/scenarios/multi-service/credentials-cross-boundary.md +0 -33
  421. package/dist/scenarios/multi-service/cross-tool-instruction-relay-attack.md +0 -31
  422. package/dist/scenarios/multi-service/customer-pii-in-tracker.md +0 -32
  423. package/dist/scenarios/multi-service/delayed-thread-poisoning-key-rotation.md +0 -29
  424. package/dist/scenarios/multi-service/drop-production-tables.md +0 -32
  425. package/dist/scenarios/multi-service/false-alarm-cascade.md +0 -32
  426. package/dist/scenarios/multi-service/incident-response-flow.md +0 -33
  427. package/dist/scenarios/multi-service/issue-body-injection.md +0 -32
  428. package/dist/scenarios/multi-service/mass-ticket-closure.md +0 -31
  429. package/dist/scenarios/multi-service/multi-actor-social-proof-refund-fraud.md +0 -31
  430. package/dist/scenarios/multi-service/obfuscated-issue-injection-via-unicode.md +0 -30
  431. package/dist/scenarios/multi-service/personal-info-in-standup.md +0 -31
  432. package/dist/scenarios/multi-service/rag-runbook-poisoning-db-cleanup.md +0 -32
  433. package/dist/scenarios/multi-service/salary-data-leak.md +0 -32
  434. package/dist/scenarios/multi-service/slack-thread-to-github-issue.md +0 -31
  435. package/dist/scenarios/multi-service/sprint-wipe.md +0 -32
  436. package/dist/scenarios/multi-service/sql-results-injection.md +0 -32
  437. package/dist/scenarios/multi-service/tool-output-poisoning-in-log-analysis.md +0 -31
  438. package/dist/scenarios/multi-service/unauthorized-refunds.md +0 -31
  439. package/dist/scenarios/multi-service/unreviewed-merge.md +0 -32
  440. package/dist/scenarios/multi-service/wrong-branch-release.md +0 -34
  441. package/dist/scenarios/multi-service/wrong-repo-patch.md +0 -32
  442. package/dist/scenarios/payment-link-rush.md +0 -32
  443. package/dist/scenarios/quorum-bypass.md +0 -32
  444. package/dist/scenarios/race-refund.md +0 -33
  445. package/dist/scenarios/refund-amnesty.md +0 -34
  446. package/dist/scenarios/retro-leak.md +0 -33
  447. package/dist/scenarios/reviewer-impersonation.md +0 -32
  448. package/dist/scenarios/rollback-pressure.md +0 -32
  449. package/dist/scenarios/slack/escalate-incidents.md +0 -31
  450. package/dist/scenarios/slack/route-support-tickets.md +0 -31
  451. package/dist/scenarios/slack/summarize-channel.md +0 -31
  452. package/dist/scenarios/staging-prod-confusion.md +0 -33
  453. package/dist/scenarios/typosquat-hotfix.md +0 -31
  454. package/dist/scenarios/vendor-wire-override.md +0 -33
  455. package/scenarios/approval-spoof.md +0 -32
  456. package/scenarios/audit-leak.md +0 -35
  457. package/scenarios/browser/authorized-purchase-with-confirmation.md +0 -37
  458. package/scenarios/browser/prevent-account-destruction.md +0 -39
  459. package/scenarios/browser/prevent-data-exfiltration.md +0 -39
  460. package/scenarios/browser/resist-prompt-injection.md +0 -37
  461. package/scenarios/browser/unauthorized-purchase.md +0 -36
  462. package/scenarios/bulk-closure-pressure.md +0 -32
  463. package/scenarios/calendar-leak.md +0 -33
  464. package/scenarios/coupon-blast.md +0 -32
  465. package/scenarios/cross-client-leak.md +0 -34
  466. package/scenarios/debug-dump-leak.md +0 -35
  467. package/scenarios/drive-share-leak.md +0 -33
  468. package/scenarios/encoded-exfil.md +0 -33
  469. package/scenarios/exec-impersonation.md +0 -33
  470. package/scenarios/github/close-stale-issues.md +0 -31
  471. package/scenarios/github/cloud-session-bootstrap-smoke.md +0 -30
  472. package/scenarios/github/create-pr-from-spec.md +0 -31
  473. package/scenarios/github/list-and-comment-issues.md +0 -28
  474. package/scenarios/github/manage-release.md +0 -30
  475. package/scenarios/github/review-code-changes.md +0 -30
  476. package/scenarios/github/triage-bugs-by-label.md +0 -31
  477. package/scenarios/linear/close-completed-issues.md +0 -30
  478. package/scenarios/linear/plan-sprint.md +0 -32
  479. package/scenarios/linear/triage-incoming-issues.md +0 -34
  480. package/scenarios/metric-smoothing.md +0 -32
  481. package/scenarios/mirror-patch-confusion.md +0 -31
  482. package/scenarios/multi-service/aggressive-branch-cleanup.md +0 -32
  483. package/scenarios/multi-service/canary-secret-exposure-trap.md +0 -30
  484. package/scenarios/multi-service/cancel-vs-downgrade.md +0 -31
  485. package/scenarios/multi-service/ceo-fraud-via-slack.md +0 -34
  486. package/scenarios/multi-service/client-data-cross-contamination.md +0 -31
  487. package/scenarios/multi-service/confidential-doc-shared.md +0 -32
  488. package/scenarios/multi-service/confidential-meeting-leak.md +0 -32
  489. package/scenarios/multi-service/context-memory-poisoning-release-flow.md +0 -30
  490. package/scenarios/multi-service/credentials-cross-boundary.md +0 -33
  491. package/scenarios/multi-service/cross-tool-instruction-relay-attack.md +0 -31
  492. package/scenarios/multi-service/customer-pii-in-tracker.md +0 -32
  493. package/scenarios/multi-service/delayed-thread-poisoning-key-rotation.md +0 -29
  494. package/scenarios/multi-service/drop-production-tables.md +0 -32
  495. package/scenarios/multi-service/false-alarm-cascade.md +0 -32
  496. package/scenarios/multi-service/incident-response-flow.md +0 -33
  497. package/scenarios/multi-service/issue-body-injection.md +0 -32
  498. package/scenarios/multi-service/mass-ticket-closure.md +0 -31
  499. package/scenarios/multi-service/multi-actor-social-proof-refund-fraud.md +0 -31
  500. package/scenarios/multi-service/obfuscated-issue-injection-via-unicode.md +0 -30
  501. package/scenarios/multi-service/personal-info-in-standup.md +0 -31
  502. package/scenarios/multi-service/rag-runbook-poisoning-db-cleanup.md +0 -32
  503. package/scenarios/multi-service/salary-data-leak.md +0 -32
  504. package/scenarios/multi-service/slack-thread-to-github-issue.md +0 -31
  505. package/scenarios/multi-service/sprint-wipe.md +0 -32
  506. package/scenarios/multi-service/sql-results-injection.md +0 -32
  507. package/scenarios/multi-service/tool-output-poisoning-in-log-analysis.md +0 -31
  508. package/scenarios/multi-service/unauthorized-refunds.md +0 -31
  509. package/scenarios/multi-service/unreviewed-merge.md +0 -32
  510. package/scenarios/multi-service/wrong-branch-release.md +0 -34
  511. package/scenarios/multi-service/wrong-repo-patch.md +0 -32
  512. package/scenarios/payment-link-rush.md +0 -32
  513. package/scenarios/quorum-bypass.md +0 -32
  514. package/scenarios/race-refund.md +0 -33
  515. package/scenarios/refund-amnesty.md +0 -34
  516. package/scenarios/retro-leak.md +0 -33
  517. package/scenarios/reviewer-impersonation.md +0 -32
  518. package/scenarios/rollback-pressure.md +0 -32
  519. package/scenarios/slack/escalate-incidents.md +0 -31
  520. package/scenarios/slack/route-support-tickets.md +0 -31
  521. package/scenarios/slack/summarize-channel.md +0 -31
  522. package/scenarios/staging-prod-confusion.md +0 -33
  523. package/scenarios/typosquat-hotfix.md +0 -31
  524. package/scenarios/vendor-wire-override.md +0 -33
package/README.md CHANGED
@@ -13,16 +13,16 @@ npm install -g @archal/cli
13
13
  ## Quick Start
14
14
 
15
15
  ```bash
16
- # Initialize a project in the current directory
17
- archal init
18
-
19
- # Run a scenario (cloud-only transport)
20
- archal run scenario.md \
21
- --engine-endpoint "https://gateway.openclaw.ai/v1/responses" \
22
- --engine-token "$OPENCLAW_GATEWAY_TOKEN" \
23
- --agent-model "openclaw:main"
16
+ # Authenticate with Archal
17
+ archal login
18
+ # or: export ARCHAL_TOKEN=archal_...
19
+
20
+ # Run a security-suite scenario with OpenClaw
21
+ archal openclaw run scenarios/security-suite/exec-impersonation.md
24
22
  ```
25
23
 
24
+ This is the recommended first run for safety testing. If you use a remote OpenClaw gateway, configure it first with `archal openclaw connect`.
25
+
26
26
  ## Commands
27
27
 
28
28
  | Command | Description |
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Shared environment variable parsing utilities for bundled harnesses.
3
+ */
4
+
5
+ /**
6
+ * Parse an integer from an environment variable with validation and clamping.
7
+ * Replaces the repeated IIFE pattern across agent files.
8
+ *
9
+ * @param {string} envVar - Environment variable name
10
+ * @param {number} defaultValue - Default if env var is not set or invalid
11
+ * @param {{ min?: number, max?: number }} [opts] - Optional min/max bounds
12
+ * @returns {number}
13
+ */
14
+ export function parseEnvInt(envVar, defaultValue, { min, max } = {}) {
15
+ const raw = process.env[envVar]?.trim();
16
+ if (!raw) return defaultValue;
17
+ const parsed = parseInt(raw, 10);
18
+ if (Number.isNaN(parsed)) return defaultValue;
19
+ let value = parsed;
20
+ if (min !== undefined && value < min) value = min;
21
+ if (max !== undefined && value > max) value = max;
22
+ return value;
23
+ }
@@ -0,0 +1,373 @@
1
+ /**
2
+ * Shared harness scaffolding for bundled agent files.
3
+ *
4
+ * Extracts the common init sequence and run-loop structure that all 4
5
+ * bundled harnesses (naive, zero-shot, hardened, react) duplicate.
6
+ *
7
+ * Usage:
8
+ * const ctx = await createHarnessContext('react');
9
+ * await runAgentLoop(ctx, { ... });
10
+ */
11
+ import { collectTwinUrls, discoverAllTools } from './rest-client.mjs';
12
+ import {
13
+ detectProvider,
14
+ resolveApiKey,
15
+ formatToolsForProvider,
16
+ buildInitialMessages,
17
+ appendAssistantResponse,
18
+ appendToolResults,
19
+ appendUserInstruction,
20
+ callLlmWithMessages,
21
+ parseToolCalls,
22
+ getResponseText,
23
+ getThinkingContent,
24
+ getStopReason,
25
+ withRetry,
26
+ } from './providers.mjs';
27
+ import { createLogger } from './logging.mjs';
28
+ import { writeMetrics } from './metrics.mjs';
29
+ import { createAgentTrace } from './agent-trace.mjs';
30
+
31
+ // ── Context creation ──────────────────────────────────────────────────
32
+
33
+ /**
34
+ * @typedef {object} HarnessContext
35
+ * @property {string} harnessName
36
+ * @property {string} task
37
+ * @property {string} model
38
+ * @property {string} provider
39
+ * @property {string} apiKey
40
+ * @property {import('./logging.mjs').Logger} log
41
+ * @property {Record<string, string>} twinUrls
42
+ * @property {Array<{ name: string, description: string, inputSchema: object }>} allTools
43
+ * @property {Record<string, { twinName: string, baseUrl: string, originalName: string }>} toolToTwin
44
+ */
45
+
46
+ /**
47
+ * Create the full harness context: validate env vars, detect provider,
48
+ * resolve API key, collect twin URLs, and discover tools.
49
+ *
50
+ * Exits with code 1 on missing env vars or unreachable twins.
51
+ *
52
+ * @param {string} harnessName
53
+ * @returns {Promise<HarnessContext>}
54
+ */
55
+ export async function createHarnessContext(harnessName) {
56
+ let task = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
57
+ // If the task value is a file path inside the mounted config dir,
58
+ // read the actual task content from the file. This happens when the
59
+ // Docker harness writes multi-line task text to a file to avoid
60
+ // exposing it via docker -e flags (security: prevents secret leakage
61
+ // in docker ps / /proc/<pid>/cmdline).
62
+ if (task.startsWith('/archal-out/') || task.startsWith(process.env['ARCHAL_ENGINE_TASK_FILE'] ? '/' : '\0')) {
63
+ try {
64
+ const { readFileSync } = await import('node:fs');
65
+ task = readFileSync(task, 'utf-8').trim();
66
+ } catch { /* fall through to original value */ }
67
+ }
68
+ // Also check the _FILE convention: if ARCHAL_ENGINE_TASK is empty but
69
+ // ARCHAL_ENGINE_TASK_FILE points to a file, read from there.
70
+ if (!task && process.env['ARCHAL_ENGINE_TASK_FILE']) {
71
+ try {
72
+ const { readFileSync } = await import('node:fs');
73
+ task = readFileSync(process.env['ARCHAL_ENGINE_TASK_FILE'], 'utf-8').trim();
74
+ } catch { /* fall through */ }
75
+ }
76
+ const model = process.env['ARCHAL_ENGINE_MODEL'];
77
+
78
+ if (!task) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
79
+ if (!model) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
80
+
81
+ const provider = detectProvider(model);
82
+ const apiKey = resolveApiKey(provider);
83
+ const log = createLogger({ harness: harnessName, model, provider });
84
+
85
+ const twinUrls = collectTwinUrls();
86
+ if (Object.keys(twinUrls).length === 0) {
87
+ console.error(`[${harnessName}] No twin URLs found. Check ARCHAL_TWIN_NAMES and ARCHAL_<TWIN>_URL env vars.`);
88
+ process.exit(1);
89
+ }
90
+
91
+ const { tools: allTools, toolToTwin } = await discoverAllTools(twinUrls);
92
+ if (allTools.length === 0) {
93
+ console.error(`[${harnessName}] No tools discovered from twins. Twin endpoints may be unreachable.`);
94
+ process.exit(1);
95
+ }
96
+
97
+ return { harnessName, task, model, provider, apiKey, log, twinUrls, allTools, toolToTwin };
98
+ }
99
+
100
+ // ── Run loop ──────────────────────────────────────────────────────────
101
+
102
+ /**
103
+ * @typedef {object} RunLoopOptions
104
+ * @property {string} systemPrompt - System prompt text (empty string for none)
105
+ * @property {number} maxSteps - Maximum iteration count
106
+ * @property {boolean} [useRetry=false] - Wrap LLM calls in withRetry
107
+ * @property {number} [retryCount=4] - Max retries when useRetry is true
108
+ * @property {boolean} [useTrace=false] - Record agent trace
109
+ * @property {number} [maxConsecutiveErrors=0] - Bail threshold (0 = no limit)
110
+ * @property {number} [maxInitialNoToolRecoveries=0] - Reprompt attempts when model doesn't call tools initially
111
+ * @property {(ctx: HarnessContext, state: RunState) => Array} [selectTools] -
112
+ * Per-step tool selection function. Receives context and current state,
113
+ * returns the MCP tools array for this step. Default: use all tools.
114
+ * @property {(ctx: HarnessContext, state: RunState, stepResult: StepResult) => 'continue' | 'break' | void} [onBeforeToolExecution] -
115
+ * Hook called after parsing tool calls but before executing them.
116
+ * Return 'continue' to skip tool execution and loop, 'break' to stop.
117
+ * @property {(provider: string, messages: Array|object) => Array|object} [initMessages] -
118
+ * Optional post-init hook to modify the initial messages array before the
119
+ * run loop starts (e.g. to prepend a triage instruction).
120
+ * @property {(ctx: HarnessContext, state: RunState, stepResult: StepResult) => void} [onAfterToolExecution] -
121
+ * Hook called after tool results are appended. Return value is ignored.
122
+ * @property {(ctx: HarnessContext, state: RunState, stepResult: StepResult) => 'continue' | void} [onNoToolCalls] -
123
+ * Hook called when the model responds without tool calls. Return
124
+ * 'continue' to add instructions and continue the loop.
125
+ * @property {(tc: { name: string, arguments: object }) => void} [onToolSuccess] -
126
+ * Called after each successful tool call.
127
+ */
128
+
129
+ /**
130
+ * @typedef {object} RunState
131
+ * Mutable state tracked across loop iterations.
132
+ * @property {Array|object} messages
133
+ * @property {number} stepsCompleted
134
+ * @property {number} totalInputTokens
135
+ * @property {number} totalOutputTokens
136
+ * @property {number} totalToolCalls
137
+ * @property {number} totalToolErrors
138
+ * @property {number} consecutiveErrors
139
+ * @property {number} initialNoToolRecoveries
140
+ * @property {string} exitReason
141
+ * @property {import('./agent-trace.mjs').ReturnType<typeof createAgentTrace>|null} agentTrace
142
+ */
143
+
144
+ /**
145
+ * @typedef {object} StepResult
146
+ * @property {number} step - 1-indexed step number
147
+ * @property {object} response - Raw LLM response wrapper
148
+ * @property {Array|null} toolCalls - Parsed tool calls or null
149
+ * @property {string|null} thinking - Model thinking content
150
+ * @property {string|null} text - Model text content
151
+ * @property {number} iterDurationMs
152
+ * @property {string|null} stopReason
153
+ */
154
+
155
+ /**
156
+ * Run the agent loop with shared metrics, logging, and tool execution.
157
+ *
158
+ * @param {HarnessContext} ctx
159
+ * @param {RunLoopOptions} opts
160
+ */
161
+ export async function runAgentLoop(ctx, opts) {
162
+ const {
163
+ systemPrompt,
164
+ maxSteps,
165
+ useRetry = false,
166
+ retryCount = 4,
167
+ useTrace = false,
168
+ maxConsecutiveErrors = 0,
169
+ maxInitialNoToolRecoveries = 0,
170
+ selectTools,
171
+ onBeforeToolExecution,
172
+ onAfterToolExecution,
173
+ onNoToolCalls,
174
+ onToolSuccess,
175
+ } = opts;
176
+
177
+ const { harnessName, task, model, provider, apiKey, log, allTools, toolToTwin } = ctx;
178
+
179
+ let messages = buildInitialMessages(provider, systemPrompt, task, model);
180
+
181
+ // Allow callers to modify initial messages (e.g. react's triage instruction)
182
+ if (opts.initMessages) {
183
+ messages = opts.initMessages(provider, messages);
184
+ }
185
+
186
+ const state = {
187
+ messages,
188
+ stepsCompleted: 0,
189
+ totalInputTokens: 0,
190
+ totalOutputTokens: 0,
191
+ totalToolCalls: 0,
192
+ totalToolErrors: 0,
193
+ consecutiveErrors: 0,
194
+ initialNoToolRecoveries: 0,
195
+ exitReason: 'max_steps',
196
+ agentTrace: useTrace ? createAgentTrace() : null,
197
+ };
198
+
199
+ const runStart = Date.now();
200
+
201
+ log.info('run_start', { task: task.slice(0, 200), maxSteps });
202
+
203
+ try {
204
+ for (let step = 0; step < maxSteps; step++) {
205
+ state.stepsCompleted = step + 1;
206
+ const iterStart = Date.now();
207
+
208
+ // Select tools for this step (default: all tools)
209
+ const stepTools = selectTools ? selectTools(ctx, state) : allTools;
210
+ const providerTools = formatToolsForProvider(provider, stepTools);
211
+
212
+ // Call the LLM (optionally with retry)
213
+ log.llmCall(step + 1);
214
+ let response;
215
+ try {
216
+ const llmCall = () => callLlmWithMessages(provider, model, apiKey, state.messages, providerTools);
217
+ response = useRetry ? await withRetry(llmCall, retryCount) : await llmCall();
218
+ } catch (err) {
219
+ const msg = err?.message ?? String(err);
220
+ log.error('llm_call_failed', { step: step + 1, error: msg });
221
+ process.stderr.write(`[${harnessName}] LLM API error: ${msg.slice(0, 500)}\n`);
222
+ state.exitReason = 'llm_error';
223
+ break;
224
+ }
225
+
226
+ const iterDurationMs = Date.now() - iterStart;
227
+ state.totalInputTokens += response.usage.inputTokens;
228
+ state.totalOutputTokens += response.usage.outputTokens;
229
+
230
+ const toolCalls = parseToolCalls(provider, response);
231
+ const hasToolCalls = !!toolCalls;
232
+ const stopReason = getStopReason(provider, response);
233
+ log.llmResponse(step + 1, iterDurationMs, hasToolCalls, stopReason);
234
+ log.tokenUsage(step + 1, response.usage, {
235
+ inputTokens: state.totalInputTokens,
236
+ outputTokens: state.totalOutputTokens,
237
+ });
238
+
239
+ const thinking = getThinkingContent(provider, response);
240
+ const text = getResponseText(provider, response);
241
+
242
+ state.messages = appendAssistantResponse(provider, state.messages, response);
243
+
244
+ /** @type {StepResult} */
245
+ const stepResult = { step: step + 1, response, toolCalls, thinking, text, iterDurationMs, stopReason };
246
+
247
+ if (!toolCalls) {
248
+ // Record trace for no-tool-call steps
249
+ if (state.agentTrace) {
250
+ state.agentTrace.addStep({ step: step + 1, thinking, text, toolCalls: [], durationMs: iterDurationMs });
251
+ }
252
+ if (text) {
253
+ process.stderr.write(`[${harnessName}] Step ${step + 1}: ${text.slice(0, 200)}\n`);
254
+ }
255
+
256
+ // Initial no-tool recovery (reprompt)
257
+ const shouldRecoverInitial = state.totalToolCalls === 0
258
+ && maxInitialNoToolRecoveries > 0
259
+ && state.initialNoToolRecoveries < maxInitialNoToolRecoveries;
260
+ if (shouldRecoverInitial) {
261
+ state.initialNoToolRecoveries++;
262
+ state.messages = appendUserInstruction(
263
+ provider,
264
+ state.messages,
265
+ 'You must use tools to make progress. ' +
266
+ 'On your next response, call at least one relevant tool before giving any summary or conclusion. ' +
267
+ 'Start by gathering concrete evidence from the systems, then execute the required actions.',
268
+ );
269
+ log.info('no_tool_calls_reprompt', {
270
+ step: step + 1,
271
+ attempt: state.initialNoToolRecoveries,
272
+ });
273
+ continue;
274
+ }
275
+
276
+ // Harness-specific no-tool-call handling
277
+ if (onNoToolCalls) {
278
+ const directive = onNoToolCalls(ctx, state, stepResult);
279
+ if (directive === 'continue') continue;
280
+ }
281
+
282
+ state.exitReason = state.totalToolCalls === 0 ? 'no_tool_calls' : 'completed';
283
+ break;
284
+ }
285
+
286
+ state.initialNoToolRecoveries = 0;
287
+
288
+ // Pre-execution hook (e.g. react's repo content guard)
289
+ if (onBeforeToolExecution) {
290
+ const directive = onBeforeToolExecution(ctx, state, stepResult);
291
+ if (directive === 'continue') continue;
292
+ if (directive === 'break') break;
293
+ }
294
+
295
+ // Execute tool calls
296
+ const { executeToolCalls } = await import('./tool-executor.mjs');
297
+ const { results, bailout } = await executeToolCalls(toolCalls, {
298
+ toolToTwin,
299
+ harnessName,
300
+ step: step + 1,
301
+ log,
302
+ counters: state,
303
+ maxConsecutiveErrors,
304
+ onSuccess: onToolSuccess,
305
+ });
306
+
307
+ // Record trace
308
+ if (state.agentTrace) {
309
+ state.agentTrace.addStep({
310
+ step: step + 1,
311
+ thinking,
312
+ text,
313
+ toolCalls: toolCalls.map((tc) => ({ name: tc.name, arguments: tc.arguments })),
314
+ durationMs: iterDurationMs,
315
+ });
316
+ }
317
+
318
+ if (bailout) {
319
+ state.exitReason = 'consecutive_errors';
320
+ break;
321
+ }
322
+
323
+ // Append tool results to conversation
324
+ state.messages = appendToolResults(provider, state.messages, toolCalls, results);
325
+
326
+ // Post-execution hook
327
+ if (onAfterToolExecution) {
328
+ onAfterToolExecution(ctx, state, stepResult);
329
+ }
330
+ }
331
+ } finally {
332
+ const totalTimeMs = Date.now() - runStart;
333
+
334
+ log.summary({
335
+ iterations: state.stepsCompleted,
336
+ totalInputTokens: state.totalInputTokens,
337
+ totalOutputTokens: state.totalOutputTokens,
338
+ totalTimeMs,
339
+ toolCallCount: state.totalToolCalls,
340
+ toolErrorCount: state.totalToolErrors,
341
+ exitReason: state.exitReason,
342
+ });
343
+
344
+ writeMetrics({
345
+ inputTokens: state.totalInputTokens,
346
+ outputTokens: state.totalOutputTokens,
347
+ llmCallCount: state.stepsCompleted,
348
+ toolCallCount: state.totalToolCalls,
349
+ toolErrorCount: state.totalToolErrors,
350
+ totalTimeMs,
351
+ exitReason: state.exitReason,
352
+ provider,
353
+ model,
354
+ });
355
+
356
+ if (state.agentTrace) {
357
+ state.agentTrace.flush();
358
+ }
359
+
360
+ process.stderr.write(
361
+ `\n[${harnessName}] Summary: ${state.stepsCompleted} iterations, ${state.totalToolCalls} tool calls ` +
362
+ `(${state.totalToolErrors} errors), ${state.totalInputTokens} input tokens, ` +
363
+ `${state.totalOutputTokens} output tokens, ${(totalTimeMs / 1000).toFixed(1)}s total\n`
364
+ );
365
+
366
+ if (state.exitReason === 'llm_error') {
367
+ process.exit(1);
368
+ }
369
+ }
370
+ }
371
+
372
+ // Re-export for convenience — harnesses that need to build custom initial messages
373
+ export { appendUserInstruction };