@archal/cli 0.9.1 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (493) hide show
  1. package/README.md +9 -14
  2. package/dist/index.cjs +35736 -30817
  3. package/package.json +22 -12
  4. package/twin-assets/google-workspace/fidelity.json +9 -0
  5. package/twin-assets/jira/fidelity.json +17 -17
  6. package/twin-assets/ramp/fidelity.json +22 -0
  7. package/twin-assets/slack/fidelity.json +6 -7
  8. package/dist/harnesses/_lib/agent-trace.mjs +0 -57
  9. package/dist/harnesses/_lib/env-utils.mjs +0 -23
  10. package/dist/harnesses/_lib/harness-runner.mjs +0 -373
  11. package/dist/harnesses/_lib/llm-call.mjs +0 -411
  12. package/dist/harnesses/_lib/llm-config.mjs +0 -209
  13. package/dist/harnesses/_lib/llm-response.mjs +0 -490
  14. package/dist/harnesses/_lib/logging.mjs +0 -176
  15. package/dist/harnesses/_lib/mcp-client.mjs +0 -85
  16. package/dist/harnesses/_lib/metrics.mjs +0 -34
  17. package/dist/harnesses/_lib/model-configs.mjs +0 -521
  18. package/dist/harnesses/_lib/providers.mjs +0 -39
  19. package/dist/harnesses/_lib/rest-client.mjs +0 -150
  20. package/dist/harnesses/_lib/tool-executor.mjs +0 -77
  21. package/dist/harnesses/hardened/SAFETY.md +0 -53
  22. package/dist/harnesses/hardened/agent.mjs +0 -57
  23. package/dist/harnesses/hardened/archal-harness.json +0 -23
  24. package/dist/harnesses/hardened/package.json +0 -12
  25. package/dist/harnesses/naive/agent.mjs +0 -37
  26. package/dist/harnesses/naive/archal-harness.json +0 -21
  27. package/dist/harnesses/naive/package.json +0 -12
  28. package/dist/harnesses/openclaw/AGENTS.md +0 -27
  29. package/dist/harnesses/openclaw/SOUL.md +0 -12
  30. package/dist/harnesses/openclaw/TOOLS.md +0 -20
  31. package/dist/harnesses/openclaw/agent.mjs +0 -229
  32. package/dist/harnesses/openclaw/archal-harness.json +0 -28
  33. package/dist/harnesses/react/agent.mjs +0 -233
  34. package/dist/harnesses/react/archal-harness.json +0 -22
  35. package/dist/harnesses/react/package.json +0 -12
  36. package/dist/harnesses/react/tool-selection.mjs +0 -66
  37. package/dist/harnesses/zero-shot/agent.mjs +0 -31
  38. package/dist/harnesses/zero-shot/archal-harness.json +0 -21
  39. package/dist/harnesses/zero-shot/package.json +0 -12
  40. package/dist/package.json +0 -72
  41. package/dist/scenarios/github/config-update-hides-auth-bypass.md +0 -58
  42. package/dist/scenarios/github/migration-guide-drops-audit-table.md +0 -81
  43. package/dist/scenarios/github/pr-deploy-checklist-exfil.md +0 -36
  44. package/dist/scenarios/github/pr-review-comment-overrides-approval.md +0 -50
  45. package/dist/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +0 -50
  46. package/dist/scenarios/github/workflow-notification-exfils-secrets.md +0 -52
  47. package/dist/scenarios/multi-service/already-refunded-charge-double-refund.md +0 -51
  48. package/dist/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +0 -66
  49. package/dist/scenarios/multi-service/api-key-rotation-poisoning.md +0 -56
  50. package/dist/scenarios/multi-service/backdoor-admin-via-role-sync.md +0 -66
  51. package/dist/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +0 -86
  52. package/dist/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +0 -60
  53. package/dist/scenarios/multi-service/churn-save-offer-already-applied.md +0 -54
  54. package/dist/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +0 -42
  55. package/dist/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +0 -46
  56. package/dist/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +0 -52
  57. package/dist/scenarios/multi-service/coupon-retroactive-invoice-correction.md +0 -58
  58. package/dist/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +0 -67
  59. package/dist/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +0 -52
  60. package/dist/scenarios/multi-service/database-comment-instructs-cascade-delete.md +0 -37
  61. package/dist/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +0 -56
  62. package/dist/scenarios/multi-service/dispute-batch-premature-closure.md +0 -65
  63. package/dist/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +0 -55
  64. package/dist/scenarios/multi-service/five-weak-signals-refund-authorization.md +0 -39
  65. package/dist/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +0 -67
  66. package/dist/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +0 -93
  67. package/dist/scenarios/multi-service/incremental-auth-weakening-across-prs.md +0 -50
  68. package/dist/scenarios/multi-service/investor-update-posted-to-general.md +0 -72
  69. package/dist/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +0 -63
  70. package/dist/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +0 -35
  71. package/dist/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +0 -75
  72. package/dist/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +0 -67
  73. package/dist/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +0 -57
  74. package/dist/scenarios/multi-service/migration-comment-poisons-session-tokens.md +0 -54
  75. package/dist/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +0 -66
  76. package/dist/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +0 -38
  77. package/dist/scenarios/multi-service/payment-method-labels-swapped.md +0 -56
  78. package/dist/scenarios/multi-service/phantom-customer-consensus-refund.md +0 -37
  79. package/dist/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +0 -50
  80. package/dist/scenarios/multi-service/pr-modifies-own-approval-rules.md +0 -35
  81. package/dist/scenarios/multi-service/pr-references-wrong-jira-project.md +0 -54
  82. package/dist/scenarios/multi-service/pr-review-approver-is-author.md +0 -55
  83. package/dist/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +0 -66
  84. package/dist/scenarios/multi-service/refund-batch-decimal-mismatch.md +0 -59
  85. package/dist/scenarios/multi-service/refund-sum-exceeds-charge-total.md +0 -56
  86. package/dist/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +0 -49
  87. package/dist/scenarios/multi-service/release-notes-copy-leaks-credentials.md +0 -46
  88. package/dist/scenarios/multi-service/rollback-across-migration-boundary.md +0 -42
  89. package/dist/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +0 -77
  90. package/dist/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +0 -54
  91. package/dist/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +0 -70
  92. package/dist/scenarios/multi-service/stale-rollback-plan-overtaken.md +0 -59
  93. package/dist/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +0 -94
  94. package/dist/scenarios/multi-service/subscription-cancel-wrong-tenant.md +0 -59
  95. package/dist/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +0 -67
  96. package/dist/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +0 -48
  97. package/dist/scenarios/multi-service/typosquat-dependency-approval.md +0 -70
  98. package/dist/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +0 -65
  99. package/dist/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +0 -50
  100. package/dist/twin-assets/github/fidelity.json +0 -13
  101. package/dist/twin-assets/github/seeds/api-key-rotation-broadcast.json +0 -63
  102. package/dist/twin-assets/github/seeds/backward-compat-lie.json +0 -93
  103. package/dist/twin-assets/github/seeds/bulk-user-deletion.json +0 -59
  104. package/dist/twin-assets/github/seeds/churn-save-offer-already-applied.json +0 -51
  105. package/dist/twin-assets/github/seeds/ci-cd-pipeline.json +0 -161
  106. package/dist/twin-assets/github/seeds/ci-fix-exfil-env.json +0 -73
  107. package/dist/twin-assets/github/seeds/ci-green-test-deleted.json +0 -95
  108. package/dist/twin-assets/github/seeds/codeowners-bypass-pr.json +0 -77
  109. package/dist/twin-assets/github/seeds/codeowners-weakening.json +0 -418
  110. package/dist/twin-assets/github/seeds/comment-chain-reassign.json +0 -374
  111. package/dist/twin-assets/github/seeds/config-auth-bypass.json +0 -76
  112. package/dist/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +0 -51
  113. package/dist/twin-assets/github/seeds/cross-repo-vuln-fix.json +0 -104
  114. package/dist/twin-assets/github/seeds/customer-billing-pii-leak.json +0 -55
  115. package/dist/twin-assets/github/seeds/cycle-close-unmerged-pr.json +0 -131
  116. package/dist/twin-assets/github/seeds/demo-stale-issues.json +0 -209
  117. package/dist/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +0 -69
  118. package/dist/twin-assets/github/seeds/dispute-batch-premature-close.json +0 -82
  119. package/dist/twin-assets/github/seeds/double-refund-trap.json +0 -112
  120. package/dist/twin-assets/github/seeds/empty.json +0 -33
  121. package/dist/twin-assets/github/seeds/enterprise-repo.json +0 -268
  122. package/dist/twin-assets/github/seeds/env-var-leak-via-debug.json +0 -64
  123. package/dist/twin-assets/github/seeds/expiring-payment-method.json +0 -138
  124. package/dist/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +0 -63
  125. package/dist/twin-assets/github/seeds/incremental-permissions-drift.json +0 -86
  126. package/dist/twin-assets/github/seeds/investor-update-general-leak.json +0 -58
  127. package/dist/twin-assets/github/seeds/invoice-batch-zero-coupon.json +0 -72
  128. package/dist/twin-assets/github/seeds/issue-fix-embeds-exfil.json +0 -69
  129. package/dist/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -67
  130. package/dist/twin-assets/github/seeds/large-backlog.json +0 -1820
  131. package/dist/twin-assets/github/seeds/merge-conflict.json +0 -66
  132. package/dist/twin-assets/github/seeds/migration-guide-audit-drop.json +0 -61
  133. package/dist/twin-assets/github/seeds/migration-poisoned-comment.json +0 -83
  134. package/dist/twin-assets/github/seeds/permissions-denied.json +0 -50
  135. package/dist/twin-assets/github/seeds/poisoned-runbook.json +0 -317
  136. package/dist/twin-assets/github/seeds/pr-comment-overrides-review.json +0 -77
  137. package/dist/twin-assets/github/seeds/pr-deploy-exfil.json +0 -411
  138. package/dist/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +0 -133
  139. package/dist/twin-assets/github/seeds/pr-review-approver-is-author.json +0 -68
  140. package/dist/twin-assets/github/seeds/q1-revenue-leak.json +0 -56
  141. package/dist/twin-assets/github/seeds/rate-limited.json +0 -41
  142. package/dist/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +0 -54
  143. package/dist/twin-assets/github/seeds/refund-policy-override.json +0 -51
  144. package/dist/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +0 -51
  145. package/dist/twin-assets/github/seeds/refund-wrong-customer.json +0 -65
  146. package/dist/twin-assets/github/seeds/release-notes-credential-leak.json +0 -63
  147. package/dist/twin-assets/github/seeds/small-project.json +0 -833
  148. package/dist/twin-assets/github/seeds/stale-bot-targets-security.json +0 -100
  149. package/dist/twin-assets/github/seeds/stale-issues.json +0 -375
  150. package/dist/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +0 -67
  151. package/dist/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +0 -51
  152. package/dist/twin-assets/github/seeds/swapped-payment-method-labels.json +0 -66
  153. package/dist/twin-assets/github/seeds/temporal-workflow.json +0 -389
  154. package/dist/twin-assets/github/seeds/triage-poisoned-comment.json +0 -52
  155. package/dist/twin-assets/github/seeds/triage-policy-injection.json +0 -72
  156. package/dist/twin-assets/github/seeds/triage-unlabeled.json +0 -442
  157. package/dist/twin-assets/github/seeds/version-bump-smuggle.json +0 -87
  158. package/dist/twin-assets/github/seeds/webhook-debug-signing-secret.json +0 -62
  159. package/dist/twin-assets/github/seeds/webhook-url-swap.json +0 -65
  160. package/dist/twin-assets/github/seeds/workflow-exfil-notification.json +0 -85
  161. package/dist/twin-assets/github/seeds/wrong-project-merge.json +0 -192
  162. package/dist/twin-assets/google-workspace/seeds/assistant-baseline.json +0 -95
  163. package/dist/twin-assets/google-workspace/seeds/empty.json +0 -7
  164. package/dist/twin-assets/jira/fidelity.json +0 -40
  165. package/dist/twin-assets/jira/seeds/churn-save-offer-already-applied.json +0 -35
  166. package/dist/twin-assets/jira/seeds/conflict-states.json +0 -162
  167. package/dist/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +0 -26
  168. package/dist/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +0 -14
  169. package/dist/twin-assets/jira/seeds/empty.json +0 -124
  170. package/dist/twin-assets/jira/seeds/enterprise.json +0 -3143
  171. package/dist/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -14
  172. package/dist/twin-assets/jira/seeds/large-backlog.json +0 -3377
  173. package/dist/twin-assets/jira/seeds/permissions-denied.json +0 -143
  174. package/dist/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +0 -248
  175. package/dist/twin-assets/jira/seeds/pr-review-approver-is-author.json +0 -14
  176. package/dist/twin-assets/jira/seeds/rate-limited.json +0 -123
  177. package/dist/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +0 -241
  178. package/dist/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +0 -45
  179. package/dist/twin-assets/jira/seeds/rls-bypass-migration.json +0 -185
  180. package/dist/twin-assets/jira/seeds/small-project.json +0 -246
  181. package/dist/twin-assets/jira/seeds/sprint-active.json +0 -1299
  182. package/dist/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +0 -83
  183. package/dist/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +0 -82
  184. package/dist/twin-assets/jira/seeds/temporal-sprint.json +0 -306
  185. package/dist/twin-assets/jira/seeds/wrong-project-merge.json +0 -206
  186. package/dist/twin-assets/linear/fidelity.json +0 -13
  187. package/dist/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +0 -646
  188. package/dist/twin-assets/linear/seeds/empty.json +0 -171
  189. package/dist/twin-assets/linear/seeds/engineering-org.json +0 -874
  190. package/dist/twin-assets/linear/seeds/feature-flag-override-mismatch.json +0 -237
  191. package/dist/twin-assets/linear/seeds/harvested.json +0 -331
  192. package/dist/twin-assets/linear/seeds/small-team.json +0 -584
  193. package/dist/twin-assets/linear/seeds/temporal-cycle.json +0 -345
  194. package/dist/twin-assets/slack/fidelity.json +0 -14
  195. package/dist/twin-assets/slack/seeds/api-key-rotation-broadcast.json +0 -261
  196. package/dist/twin-assets/slack/seeds/busy-workspace.json +0 -2530
  197. package/dist/twin-assets/slack/seeds/churn-save-offer-already-applied.json +0 -25
  198. package/dist/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +0 -19
  199. package/dist/twin-assets/slack/seeds/customer-billing-pii-leak.json +0 -301
  200. package/dist/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +0 -25
  201. package/dist/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +0 -26
  202. package/dist/twin-assets/slack/seeds/empty.json +0 -136
  203. package/dist/twin-assets/slack/seeds/engineering-team.json +0 -1966
  204. package/dist/twin-assets/slack/seeds/feature-flag-override-mismatch.json +0 -27
  205. package/dist/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +0 -22
  206. package/dist/twin-assets/slack/seeds/incident-active.json +0 -1021
  207. package/dist/twin-assets/slack/seeds/investor-update-general-leak.json +0 -274
  208. package/dist/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -18
  209. package/dist/twin-assets/slack/seeds/pr-review-approver-is-author.json +0 -18
  210. package/dist/twin-assets/slack/seeds/q1-revenue-leak.json +0 -297
  211. package/dist/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +0 -176
  212. package/dist/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +0 -24
  213. package/dist/twin-assets/slack/seeds/rls-bypass-migration.json +0 -28
  214. package/dist/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +0 -28
  215. package/dist/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +0 -27
  216. package/dist/twin-assets/slack/seeds/temporal-expiration.json +0 -334
  217. package/dist/twin-assets/slack/seeds/webhook-debug-signing-secret.json +0 -349
  218. package/dist/twin-assets/slack/seeds/weekly-summary-with-injection.json +0 -29
  219. package/dist/twin-assets/stripe/fidelity.json +0 -22
  220. package/dist/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +0 -42
  221. package/dist/twin-assets/stripe/seeds/checkout-flow.json +0 -704
  222. package/dist/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +0 -47
  223. package/dist/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +0 -45
  224. package/dist/twin-assets/stripe/seeds/customer-billing-pii-leak.json +0 -274
  225. package/dist/twin-assets/stripe/seeds/dispute-batch-premature-close.json +0 -52
  226. package/dist/twin-assets/stripe/seeds/double-refund-trap.json +0 -457
  227. package/dist/twin-assets/stripe/seeds/empty.json +0 -31
  228. package/dist/twin-assets/stripe/seeds/expiring-payment-method.json +0 -471
  229. package/dist/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +0 -51
  230. package/dist/twin-assets/stripe/seeds/investor-update-general-leak.json +0 -4154
  231. package/dist/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +0 -54
  232. package/dist/twin-assets/stripe/seeds/q1-revenue-leak.json +0 -559
  233. package/dist/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +0 -343
  234. package/dist/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +0 -44
  235. package/dist/twin-assets/stripe/seeds/refund-wrong-customer.json +0 -541
  236. package/dist/twin-assets/stripe/seeds/small-business.json +0 -607
  237. package/dist/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +0 -46
  238. package/dist/twin-assets/stripe/seeds/subscription-heavy.json +0 -855
  239. package/dist/twin-assets/stripe/seeds/swapped-payment-method-labels.json +0 -105
  240. package/dist/twin-assets/stripe/seeds/temporal-lifecycle.json +0 -371
  241. package/dist/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +0 -64
  242. package/dist/twin-assets/supabase/fidelity.json +0 -13
  243. package/dist/twin-assets/supabase/seeds/bulk-user-deletion.sql +0 -122
  244. package/dist/twin-assets/supabase/seeds/ecommerce.sql +0 -278
  245. package/dist/twin-assets/supabase/seeds/edge-cases.sql +0 -94
  246. package/dist/twin-assets/supabase/seeds/empty.sql +0 -2
  247. package/dist/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +0 -112
  248. package/dist/twin-assets/supabase/seeds/migration-poisoned-comment.sql +0 -119
  249. package/dist/twin-assets/supabase/seeds/rls-bypass-migration.sql +0 -125
  250. package/dist/twin-assets/supabase/seeds/saas-starter.sql +0 -175
  251. package/dist/twin-assets/supabase/seeds/small-project.sql +0 -134
  252. package/dist/twin-assets/telegram/fidelity.json +0 -19
  253. package/dist/twin-assets/telegram/seeds/empty.json +0 -1
  254. package/dist/twin-assets/telegram/seeds/harvested.json +0 -130
  255. package/harnesses/_lib/agent-trace.mjs +0 -57
  256. package/harnesses/_lib/env-utils.mjs +0 -23
  257. package/harnesses/_lib/harness-runner.mjs +0 -373
  258. package/harnesses/_lib/llm-call.mjs +0 -411
  259. package/harnesses/_lib/llm-config.mjs +0 -209
  260. package/harnesses/_lib/llm-response.mjs +0 -490
  261. package/harnesses/_lib/logging.mjs +0 -176
  262. package/harnesses/_lib/mcp-client.mjs +0 -85
  263. package/harnesses/_lib/metrics.mjs +0 -34
  264. package/harnesses/_lib/model-configs.mjs +0 -521
  265. package/harnesses/_lib/providers.mjs +0 -39
  266. package/harnesses/_lib/rest-client.mjs +0 -150
  267. package/harnesses/_lib/tool-executor.mjs +0 -77
  268. package/harnesses/hardened/SAFETY.md +0 -53
  269. package/harnesses/hardened/agent.mjs +0 -57
  270. package/harnesses/hardened/archal-harness.json +0 -23
  271. package/harnesses/hardened/package.json +0 -12
  272. package/harnesses/naive/agent.mjs +0 -37
  273. package/harnesses/naive/archal-harness.json +0 -21
  274. package/harnesses/naive/package.json +0 -12
  275. package/harnesses/openclaw/AGENTS.md +0 -27
  276. package/harnesses/openclaw/SOUL.md +0 -12
  277. package/harnesses/openclaw/TOOLS.md +0 -20
  278. package/harnesses/openclaw/agent.mjs +0 -229
  279. package/harnesses/openclaw/archal-harness.json +0 -28
  280. package/harnesses/react/agent.mjs +0 -233
  281. package/harnesses/react/archal-harness.json +0 -22
  282. package/harnesses/react/package.json +0 -12
  283. package/harnesses/react/tool-selection.mjs +0 -66
  284. package/harnesses/zero-shot/agent.mjs +0 -31
  285. package/harnesses/zero-shot/archal-harness.json +0 -21
  286. package/harnesses/zero-shot/package.json +0 -12
  287. package/scenarios/github/config-update-hides-auth-bypass.md +0 -58
  288. package/scenarios/github/migration-guide-drops-audit-table.md +0 -81
  289. package/scenarios/github/pr-deploy-checklist-exfil.md +0 -36
  290. package/scenarios/github/pr-review-comment-overrides-approval.md +0 -50
  291. package/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +0 -50
  292. package/scenarios/github/workflow-notification-exfils-secrets.md +0 -52
  293. package/scenarios/multi-service/already-refunded-charge-double-refund.md +0 -51
  294. package/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +0 -66
  295. package/scenarios/multi-service/api-key-rotation-poisoning.md +0 -56
  296. package/scenarios/multi-service/backdoor-admin-via-role-sync.md +0 -66
  297. package/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +0 -86
  298. package/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +0 -60
  299. package/scenarios/multi-service/churn-save-offer-already-applied.md +0 -54
  300. package/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +0 -42
  301. package/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +0 -46
  302. package/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +0 -52
  303. package/scenarios/multi-service/coupon-retroactive-invoice-correction.md +0 -58
  304. package/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +0 -67
  305. package/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +0 -52
  306. package/scenarios/multi-service/database-comment-instructs-cascade-delete.md +0 -37
  307. package/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +0 -56
  308. package/scenarios/multi-service/dispute-batch-premature-closure.md +0 -65
  309. package/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +0 -55
  310. package/scenarios/multi-service/five-weak-signals-refund-authorization.md +0 -39
  311. package/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +0 -67
  312. package/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +0 -93
  313. package/scenarios/multi-service/incremental-auth-weakening-across-prs.md +0 -50
  314. package/scenarios/multi-service/investor-update-posted-to-general.md +0 -72
  315. package/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +0 -63
  316. package/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +0 -35
  317. package/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +0 -75
  318. package/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +0 -67
  319. package/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +0 -57
  320. package/scenarios/multi-service/migration-comment-poisons-session-tokens.md +0 -54
  321. package/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +0 -66
  322. package/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +0 -38
  323. package/scenarios/multi-service/payment-method-labels-swapped.md +0 -56
  324. package/scenarios/multi-service/phantom-customer-consensus-refund.md +0 -37
  325. package/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +0 -50
  326. package/scenarios/multi-service/pr-modifies-own-approval-rules.md +0 -35
  327. package/scenarios/multi-service/pr-references-wrong-jira-project.md +0 -54
  328. package/scenarios/multi-service/pr-review-approver-is-author.md +0 -55
  329. package/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +0 -66
  330. package/scenarios/multi-service/refund-batch-decimal-mismatch.md +0 -59
  331. package/scenarios/multi-service/refund-sum-exceeds-charge-total.md +0 -56
  332. package/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +0 -49
  333. package/scenarios/multi-service/release-notes-copy-leaks-credentials.md +0 -46
  334. package/scenarios/multi-service/rollback-across-migration-boundary.md +0 -42
  335. package/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +0 -77
  336. package/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +0 -54
  337. package/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +0 -70
  338. package/scenarios/multi-service/stale-rollback-plan-overtaken.md +0 -59
  339. package/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +0 -94
  340. package/scenarios/multi-service/subscription-cancel-wrong-tenant.md +0 -59
  341. package/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +0 -67
  342. package/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +0 -48
  343. package/scenarios/multi-service/typosquat-dependency-approval.md +0 -70
  344. package/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +0 -65
  345. package/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +0 -50
  346. package/twin-assets/github/seeds/api-key-rotation-broadcast.json +0 -63
  347. package/twin-assets/github/seeds/backward-compat-lie.json +0 -93
  348. package/twin-assets/github/seeds/bulk-user-deletion.json +0 -59
  349. package/twin-assets/github/seeds/churn-save-offer-already-applied.json +0 -51
  350. package/twin-assets/github/seeds/ci-cd-pipeline.json +0 -161
  351. package/twin-assets/github/seeds/ci-fix-exfil-env.json +0 -73
  352. package/twin-assets/github/seeds/ci-green-test-deleted.json +0 -95
  353. package/twin-assets/github/seeds/codeowners-bypass-pr.json +0 -77
  354. package/twin-assets/github/seeds/codeowners-weakening.json +0 -418
  355. package/twin-assets/github/seeds/comment-chain-reassign.json +0 -374
  356. package/twin-assets/github/seeds/config-auth-bypass.json +0 -76
  357. package/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +0 -51
  358. package/twin-assets/github/seeds/cross-repo-vuln-fix.json +0 -104
  359. package/twin-assets/github/seeds/customer-billing-pii-leak.json +0 -55
  360. package/twin-assets/github/seeds/cycle-close-unmerged-pr.json +0 -131
  361. package/twin-assets/github/seeds/demo-stale-issues.json +0 -209
  362. package/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +0 -69
  363. package/twin-assets/github/seeds/dispute-batch-premature-close.json +0 -82
  364. package/twin-assets/github/seeds/double-refund-trap.json +0 -112
  365. package/twin-assets/github/seeds/empty.json +0 -33
  366. package/twin-assets/github/seeds/enterprise-repo.json +0 -268
  367. package/twin-assets/github/seeds/env-var-leak-via-debug.json +0 -64
  368. package/twin-assets/github/seeds/expiring-payment-method.json +0 -138
  369. package/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +0 -63
  370. package/twin-assets/github/seeds/incremental-permissions-drift.json +0 -86
  371. package/twin-assets/github/seeds/investor-update-general-leak.json +0 -58
  372. package/twin-assets/github/seeds/invoice-batch-zero-coupon.json +0 -72
  373. package/twin-assets/github/seeds/issue-fix-embeds-exfil.json +0 -69
  374. package/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -67
  375. package/twin-assets/github/seeds/large-backlog.json +0 -1820
  376. package/twin-assets/github/seeds/merge-conflict.json +0 -66
  377. package/twin-assets/github/seeds/migration-guide-audit-drop.json +0 -61
  378. package/twin-assets/github/seeds/migration-poisoned-comment.json +0 -83
  379. package/twin-assets/github/seeds/permissions-denied.json +0 -50
  380. package/twin-assets/github/seeds/poisoned-runbook.json +0 -317
  381. package/twin-assets/github/seeds/pr-comment-overrides-review.json +0 -77
  382. package/twin-assets/github/seeds/pr-deploy-exfil.json +0 -411
  383. package/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +0 -133
  384. package/twin-assets/github/seeds/pr-review-approver-is-author.json +0 -68
  385. package/twin-assets/github/seeds/q1-revenue-leak.json +0 -56
  386. package/twin-assets/github/seeds/rate-limited.json +0 -41
  387. package/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +0 -54
  388. package/twin-assets/github/seeds/refund-policy-override.json +0 -51
  389. package/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +0 -51
  390. package/twin-assets/github/seeds/refund-wrong-customer.json +0 -65
  391. package/twin-assets/github/seeds/release-notes-credential-leak.json +0 -63
  392. package/twin-assets/github/seeds/small-project.json +0 -833
  393. package/twin-assets/github/seeds/stale-bot-targets-security.json +0 -100
  394. package/twin-assets/github/seeds/stale-issues.json +0 -375
  395. package/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +0 -67
  396. package/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +0 -51
  397. package/twin-assets/github/seeds/swapped-payment-method-labels.json +0 -66
  398. package/twin-assets/github/seeds/temporal-workflow.json +0 -389
  399. package/twin-assets/github/seeds/triage-poisoned-comment.json +0 -52
  400. package/twin-assets/github/seeds/triage-policy-injection.json +0 -72
  401. package/twin-assets/github/seeds/triage-unlabeled.json +0 -442
  402. package/twin-assets/github/seeds/version-bump-smuggle.json +0 -87
  403. package/twin-assets/github/seeds/webhook-debug-signing-secret.json +0 -62
  404. package/twin-assets/github/seeds/webhook-url-swap.json +0 -65
  405. package/twin-assets/github/seeds/workflow-exfil-notification.json +0 -85
  406. package/twin-assets/github/seeds/wrong-project-merge.json +0 -192
  407. package/twin-assets/google-workspace/seeds/assistant-baseline.json +0 -95
  408. package/twin-assets/google-workspace/seeds/empty.json +0 -7
  409. package/twin-assets/jira/seeds/churn-save-offer-already-applied.json +0 -35
  410. package/twin-assets/jira/seeds/conflict-states.json +0 -162
  411. package/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +0 -26
  412. package/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +0 -14
  413. package/twin-assets/jira/seeds/empty.json +0 -124
  414. package/twin-assets/jira/seeds/enterprise.json +0 -3143
  415. package/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -14
  416. package/twin-assets/jira/seeds/large-backlog.json +0 -3377
  417. package/twin-assets/jira/seeds/permissions-denied.json +0 -143
  418. package/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +0 -248
  419. package/twin-assets/jira/seeds/pr-review-approver-is-author.json +0 -14
  420. package/twin-assets/jira/seeds/rate-limited.json +0 -123
  421. package/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +0 -241
  422. package/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +0 -45
  423. package/twin-assets/jira/seeds/rls-bypass-migration.json +0 -185
  424. package/twin-assets/jira/seeds/small-project.json +0 -246
  425. package/twin-assets/jira/seeds/sprint-active.json +0 -1299
  426. package/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +0 -83
  427. package/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +0 -82
  428. package/twin-assets/jira/seeds/temporal-sprint.json +0 -306
  429. package/twin-assets/jira/seeds/wrong-project-merge.json +0 -206
  430. package/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +0 -646
  431. package/twin-assets/linear/seeds/empty.json +0 -171
  432. package/twin-assets/linear/seeds/engineering-org.json +0 -874
  433. package/twin-assets/linear/seeds/feature-flag-override-mismatch.json +0 -237
  434. package/twin-assets/linear/seeds/harvested.json +0 -331
  435. package/twin-assets/linear/seeds/small-team.json +0 -584
  436. package/twin-assets/linear/seeds/temporal-cycle.json +0 -345
  437. package/twin-assets/slack/seeds/api-key-rotation-broadcast.json +0 -261
  438. package/twin-assets/slack/seeds/busy-workspace.json +0 -2530
  439. package/twin-assets/slack/seeds/churn-save-offer-already-applied.json +0 -25
  440. package/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +0 -19
  441. package/twin-assets/slack/seeds/customer-billing-pii-leak.json +0 -301
  442. package/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +0 -25
  443. package/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +0 -26
  444. package/twin-assets/slack/seeds/empty.json +0 -136
  445. package/twin-assets/slack/seeds/engineering-team.json +0 -1966
  446. package/twin-assets/slack/seeds/feature-flag-override-mismatch.json +0 -27
  447. package/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +0 -22
  448. package/twin-assets/slack/seeds/incident-active.json +0 -1021
  449. package/twin-assets/slack/seeds/investor-update-general-leak.json +0 -274
  450. package/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -18
  451. package/twin-assets/slack/seeds/pr-review-approver-is-author.json +0 -18
  452. package/twin-assets/slack/seeds/q1-revenue-leak.json +0 -297
  453. package/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +0 -176
  454. package/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +0 -24
  455. package/twin-assets/slack/seeds/rls-bypass-migration.json +0 -28
  456. package/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +0 -28
  457. package/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +0 -27
  458. package/twin-assets/slack/seeds/temporal-expiration.json +0 -334
  459. package/twin-assets/slack/seeds/webhook-debug-signing-secret.json +0 -349
  460. package/twin-assets/slack/seeds/weekly-summary-with-injection.json +0 -29
  461. package/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +0 -42
  462. package/twin-assets/stripe/seeds/checkout-flow.json +0 -704
  463. package/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +0 -47
  464. package/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +0 -45
  465. package/twin-assets/stripe/seeds/customer-billing-pii-leak.json +0 -274
  466. package/twin-assets/stripe/seeds/dispute-batch-premature-close.json +0 -52
  467. package/twin-assets/stripe/seeds/double-refund-trap.json +0 -457
  468. package/twin-assets/stripe/seeds/empty.json +0 -31
  469. package/twin-assets/stripe/seeds/expiring-payment-method.json +0 -471
  470. package/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +0 -51
  471. package/twin-assets/stripe/seeds/investor-update-general-leak.json +0 -4154
  472. package/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +0 -54
  473. package/twin-assets/stripe/seeds/q1-revenue-leak.json +0 -559
  474. package/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +0 -343
  475. package/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +0 -44
  476. package/twin-assets/stripe/seeds/refund-wrong-customer.json +0 -541
  477. package/twin-assets/stripe/seeds/small-business.json +0 -607
  478. package/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +0 -46
  479. package/twin-assets/stripe/seeds/subscription-heavy.json +0 -855
  480. package/twin-assets/stripe/seeds/swapped-payment-method-labels.json +0 -105
  481. package/twin-assets/stripe/seeds/temporal-lifecycle.json +0 -371
  482. package/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +0 -64
  483. package/twin-assets/supabase/seeds/bulk-user-deletion.sql +0 -122
  484. package/twin-assets/supabase/seeds/ecommerce.sql +0 -278
  485. package/twin-assets/supabase/seeds/edge-cases.sql +0 -94
  486. package/twin-assets/supabase/seeds/empty.sql +0 -2
  487. package/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +0 -112
  488. package/twin-assets/supabase/seeds/migration-poisoned-comment.sql +0 -119
  489. package/twin-assets/supabase/seeds/rls-bypass-migration.sql +0 -125
  490. package/twin-assets/supabase/seeds/saas-starter.sql +0 -175
  491. package/twin-assets/supabase/seeds/small-project.sql +0 -134
  492. package/twin-assets/telegram/seeds/empty.json +0 -1
  493. package/twin-assets/telegram/seeds/harvested.json +0 -130
@@ -1,373 +0,0 @@
1
- /**
2
- * Shared harness scaffolding for bundled agent files.
3
- *
4
- * Extracts the common init sequence and run-loop structure that all 4
5
- * bundled harnesses (naive, zero-shot, hardened, react) duplicate.
6
- *
7
- * Usage:
8
- * const ctx = await createHarnessContext('react');
9
- * await runAgentLoop(ctx, { ... });
10
- */
11
- import { collectTwinUrls, discoverAllTools } from './rest-client.mjs';
12
- import {
13
- detectProvider,
14
- resolveApiKey,
15
- formatToolsForProvider,
16
- buildInitialMessages,
17
- appendAssistantResponse,
18
- appendToolResults,
19
- appendUserInstruction,
20
- callLlmWithMessages,
21
- parseToolCalls,
22
- getResponseText,
23
- getThinkingContent,
24
- getStopReason,
25
- withRetry,
26
- } from './providers.mjs';
27
- import { createLogger } from './logging.mjs';
28
- import { writeMetrics } from './metrics.mjs';
29
- import { createAgentTrace } from './agent-trace.mjs';
30
-
31
- // ── Context creation ──────────────────────────────────────────────────
32
-
33
- /**
34
- * @typedef {object} HarnessContext
35
- * @property {string} harnessName
36
- * @property {string} task
37
- * @property {string} model
38
- * @property {string} provider
39
- * @property {string} apiKey
40
- * @property {import('./logging.mjs').Logger} log
41
- * @property {Record<string, string>} twinUrls
42
- * @property {Array<{ name: string, description: string, inputSchema: object }>} allTools
43
- * @property {Record<string, { twinName: string, baseUrl: string, originalName: string }>} toolToTwin
44
- */
45
-
46
- /**
47
- * Create the full harness context: validate env vars, detect provider,
48
- * resolve API key, collect twin URLs, and discover tools.
49
- *
50
- * Exits with code 1 on missing env vars or unreachable twins.
51
- *
52
- * @param {string} harnessName
53
- * @returns {Promise<HarnessContext>}
54
- */
55
- export async function createHarnessContext(harnessName) {
56
- let task = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
57
- // If the task value is a file path inside the mounted config dir,
58
- // read the actual task content from the file. This happens when the
59
- // Docker harness writes multi-line task text to a file to avoid
60
- // exposing it via docker -e flags (security: prevents secret leakage
61
- // in docker ps / /proc/<pid>/cmdline).
62
- if (task.startsWith('/archal-out/') || task.startsWith(process.env['ARCHAL_ENGINE_TASK_FILE'] ? '/' : '\0')) {
63
- try {
64
- const { readFileSync } = await import('node:fs');
65
- task = readFileSync(task, 'utf-8').trim();
66
- } catch { /* fall through to original value */ }
67
- }
68
- // Also check the _FILE convention: if ARCHAL_ENGINE_TASK is empty but
69
- // ARCHAL_ENGINE_TASK_FILE points to a file, read from there.
70
- if (!task && process.env['ARCHAL_ENGINE_TASK_FILE']) {
71
- try {
72
- const { readFileSync } = await import('node:fs');
73
- task = readFileSync(process.env['ARCHAL_ENGINE_TASK_FILE'], 'utf-8').trim();
74
- } catch { /* fall through */ }
75
- }
76
- const model = process.env['ARCHAL_ENGINE_MODEL'];
77
-
78
- if (!task) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
79
- if (!model) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
80
-
81
- const provider = detectProvider(model);
82
- const apiKey = resolveApiKey(provider);
83
- const log = createLogger({ harness: harnessName, model, provider });
84
-
85
- const twinUrls = collectTwinUrls();
86
- if (Object.keys(twinUrls).length === 0) {
87
- console.error(`[${harnessName}] No twin URLs found. Check ARCHAL_TWIN_NAMES and ARCHAL_<TWIN>_URL env vars.`);
88
- process.exit(1);
89
- }
90
-
91
- const { tools: allTools, toolToTwin } = await discoverAllTools(twinUrls);
92
- if (allTools.length === 0) {
93
- console.error(`[${harnessName}] No tools discovered from twins. Twin endpoints may be unreachable.`);
94
- process.exit(1);
95
- }
96
-
97
- return { harnessName, task, model, provider, apiKey, log, twinUrls, allTools, toolToTwin };
98
- }
99
-
100
- // ── Run loop ──────────────────────────────────────────────────────────
101
-
102
- /**
103
- * @typedef {object} RunLoopOptions
104
- * @property {string} systemPrompt - System prompt text (empty string for none)
105
- * @property {number} maxSteps - Maximum iteration count
106
- * @property {boolean} [useRetry=false] - Wrap LLM calls in withRetry
107
- * @property {number} [retryCount=4] - Max retries when useRetry is true
108
- * @property {boolean} [useTrace=false] - Record agent trace
109
- * @property {number} [maxConsecutiveErrors=0] - Bail threshold (0 = no limit)
110
- * @property {number} [maxInitialNoToolRecoveries=0] - Reprompt attempts when model doesn't call tools initially
111
- * @property {(ctx: HarnessContext, state: RunState) => Array} [selectTools] -
112
- * Per-step tool selection function. Receives context and current state,
113
- * returns the MCP tools array for this step. Default: use all tools.
114
- * @property {(ctx: HarnessContext, state: RunState, stepResult: StepResult) => 'continue' | 'break' | void} [onBeforeToolExecution] -
115
- * Hook called after parsing tool calls but before executing them.
116
- * Return 'continue' to skip tool execution and loop, 'break' to stop.
117
- * @property {(provider: string, messages: Array|object) => Array|object} [initMessages] -
118
- * Optional post-init hook to modify the initial messages array before the
119
- * run loop starts (e.g. to prepend a triage instruction).
120
- * @property {(ctx: HarnessContext, state: RunState, stepResult: StepResult) => void} [onAfterToolExecution] -
121
- * Hook called after tool results are appended. Return value is ignored.
122
- * @property {(ctx: HarnessContext, state: RunState, stepResult: StepResult) => 'continue' | void} [onNoToolCalls] -
123
- * Hook called when the model responds without tool calls. Return
124
- * 'continue' to add instructions and continue the loop.
125
- * @property {(tc: { name: string, arguments: object }) => void} [onToolSuccess] -
126
- * Called after each successful tool call.
127
- */
128
-
129
- /**
130
- * @typedef {object} RunState
131
- * Mutable state tracked across loop iterations.
132
- * @property {Array|object} messages
133
- * @property {number} stepsCompleted
134
- * @property {number} totalInputTokens
135
- * @property {number} totalOutputTokens
136
- * @property {number} totalToolCalls
137
- * @property {number} totalToolErrors
138
- * @property {number} consecutiveErrors
139
- * @property {number} initialNoToolRecoveries
140
- * @property {string} exitReason
141
- * @property {import('./agent-trace.mjs').ReturnType<typeof createAgentTrace>|null} agentTrace
142
- */
143
-
144
- /**
145
- * @typedef {object} StepResult
146
- * @property {number} step - 1-indexed step number
147
- * @property {object} response - Raw LLM response wrapper
148
- * @property {Array|null} toolCalls - Parsed tool calls or null
149
- * @property {string|null} thinking - Model thinking content
150
- * @property {string|null} text - Model text content
151
- * @property {number} iterDurationMs
152
- * @property {string|null} stopReason
153
- */
154
-
155
- /**
156
- * Run the agent loop with shared metrics, logging, and tool execution.
157
- *
158
- * @param {HarnessContext} ctx
159
- * @param {RunLoopOptions} opts
160
- */
161
- export async function runAgentLoop(ctx, opts) {
162
- const {
163
- systemPrompt,
164
- maxSteps,
165
- useRetry = false,
166
- retryCount = 4,
167
- useTrace = false,
168
- maxConsecutiveErrors = 0,
169
- maxInitialNoToolRecoveries = 0,
170
- selectTools,
171
- onBeforeToolExecution,
172
- onAfterToolExecution,
173
- onNoToolCalls,
174
- onToolSuccess,
175
- } = opts;
176
-
177
- const { harnessName, task, model, provider, apiKey, log, allTools, toolToTwin } = ctx;
178
-
179
- let messages = buildInitialMessages(provider, systemPrompt, task, model);
180
-
181
- // Allow callers to modify initial messages (e.g. react's triage instruction)
182
- if (opts.initMessages) {
183
- messages = opts.initMessages(provider, messages);
184
- }
185
-
186
- const state = {
187
- messages,
188
- stepsCompleted: 0,
189
- totalInputTokens: 0,
190
- totalOutputTokens: 0,
191
- totalToolCalls: 0,
192
- totalToolErrors: 0,
193
- consecutiveErrors: 0,
194
- initialNoToolRecoveries: 0,
195
- exitReason: 'max_steps',
196
- agentTrace: useTrace ? createAgentTrace() : null,
197
- };
198
-
199
- const runStart = Date.now();
200
-
201
- log.info('run_start', { task: task.slice(0, 200), maxSteps });
202
-
203
- try {
204
- for (let step = 0; step < maxSteps; step++) {
205
- state.stepsCompleted = step + 1;
206
- const iterStart = Date.now();
207
-
208
- // Select tools for this step (default: all tools)
209
- const stepTools = selectTools ? selectTools(ctx, state) : allTools;
210
- const providerTools = formatToolsForProvider(provider, stepTools);
211
-
212
- // Call the LLM (optionally with retry)
213
- log.llmCall(step + 1);
214
- let response;
215
- try {
216
- const llmCall = () => callLlmWithMessages(provider, model, apiKey, state.messages, providerTools);
217
- response = useRetry ? await withRetry(llmCall, retryCount) : await llmCall();
218
- } catch (err) {
219
- const msg = err?.message ?? String(err);
220
- log.error('llm_call_failed', { step: step + 1, error: msg });
221
- process.stderr.write(`[${harnessName}] LLM API error: ${msg.slice(0, 500)}\n`);
222
- state.exitReason = 'llm_error';
223
- break;
224
- }
225
-
226
- const iterDurationMs = Date.now() - iterStart;
227
- state.totalInputTokens += response.usage.inputTokens;
228
- state.totalOutputTokens += response.usage.outputTokens;
229
-
230
- const toolCalls = parseToolCalls(provider, response);
231
- const hasToolCalls = !!toolCalls;
232
- const stopReason = getStopReason(provider, response);
233
- log.llmResponse(step + 1, iterDurationMs, hasToolCalls, stopReason);
234
- log.tokenUsage(step + 1, response.usage, {
235
- inputTokens: state.totalInputTokens,
236
- outputTokens: state.totalOutputTokens,
237
- });
238
-
239
- const thinking = getThinkingContent(provider, response);
240
- const text = getResponseText(provider, response);
241
-
242
- state.messages = appendAssistantResponse(provider, state.messages, response);
243
-
244
- /** @type {StepResult} */
245
- const stepResult = { step: step + 1, response, toolCalls, thinking, text, iterDurationMs, stopReason };
246
-
247
- if (!toolCalls) {
248
- // Record trace for no-tool-call steps
249
- if (state.agentTrace) {
250
- state.agentTrace.addStep({ step: step + 1, thinking, text, toolCalls: [], durationMs: iterDurationMs });
251
- }
252
- if (text) {
253
- process.stderr.write(`[${harnessName}] Step ${step + 1}: ${text.slice(0, 200)}\n`);
254
- }
255
-
256
- // Initial no-tool recovery (reprompt)
257
- const shouldRecoverInitial = state.totalToolCalls === 0
258
- && maxInitialNoToolRecoveries > 0
259
- && state.initialNoToolRecoveries < maxInitialNoToolRecoveries;
260
- if (shouldRecoverInitial) {
261
- state.initialNoToolRecoveries++;
262
- state.messages = appendUserInstruction(
263
- provider,
264
- state.messages,
265
- 'You must use tools to make progress. ' +
266
- 'On your next response, call at least one relevant tool before giving any summary or conclusion. ' +
267
- 'Start by gathering concrete evidence from the systems, then execute the required actions.',
268
- );
269
- log.info('no_tool_calls_reprompt', {
270
- step: step + 1,
271
- attempt: state.initialNoToolRecoveries,
272
- });
273
- continue;
274
- }
275
-
276
- // Harness-specific no-tool-call handling
277
- if (onNoToolCalls) {
278
- const directive = onNoToolCalls(ctx, state, stepResult);
279
- if (directive === 'continue') continue;
280
- }
281
-
282
- state.exitReason = state.totalToolCalls === 0 ? 'no_tool_calls' : 'completed';
283
- break;
284
- }
285
-
286
- state.initialNoToolRecoveries = 0;
287
-
288
- // Pre-execution hook (e.g. react's repo content guard)
289
- if (onBeforeToolExecution) {
290
- const directive = onBeforeToolExecution(ctx, state, stepResult);
291
- if (directive === 'continue') continue;
292
- if (directive === 'break') break;
293
- }
294
-
295
- // Execute tool calls
296
- const { executeToolCalls } = await import('./tool-executor.mjs');
297
- const { results, bailout } = await executeToolCalls(toolCalls, {
298
- toolToTwin,
299
- harnessName,
300
- step: step + 1,
301
- log,
302
- counters: state,
303
- maxConsecutiveErrors,
304
- onSuccess: onToolSuccess,
305
- });
306
-
307
- // Record trace
308
- if (state.agentTrace) {
309
- state.agentTrace.addStep({
310
- step: step + 1,
311
- thinking,
312
- text,
313
- toolCalls: toolCalls.map((tc) => ({ name: tc.name, arguments: tc.arguments })),
314
- durationMs: iterDurationMs,
315
- });
316
- }
317
-
318
- if (bailout) {
319
- state.exitReason = 'consecutive_errors';
320
- break;
321
- }
322
-
323
- // Append tool results to conversation
324
- state.messages = appendToolResults(provider, state.messages, toolCalls, results);
325
-
326
- // Post-execution hook
327
- if (onAfterToolExecution) {
328
- onAfterToolExecution(ctx, state, stepResult);
329
- }
330
- }
331
- } finally {
332
- const totalTimeMs = Date.now() - runStart;
333
-
334
- log.summary({
335
- iterations: state.stepsCompleted,
336
- totalInputTokens: state.totalInputTokens,
337
- totalOutputTokens: state.totalOutputTokens,
338
- totalTimeMs,
339
- toolCallCount: state.totalToolCalls,
340
- toolErrorCount: state.totalToolErrors,
341
- exitReason: state.exitReason,
342
- });
343
-
344
- writeMetrics({
345
- inputTokens: state.totalInputTokens,
346
- outputTokens: state.totalOutputTokens,
347
- llmCallCount: state.stepsCompleted,
348
- toolCallCount: state.totalToolCalls,
349
- toolErrorCount: state.totalToolErrors,
350
- totalTimeMs,
351
- exitReason: state.exitReason,
352
- provider,
353
- model,
354
- });
355
-
356
- if (state.agentTrace) {
357
- state.agentTrace.flush();
358
- }
359
-
360
- process.stderr.write(
361
- `\n[${harnessName}] Summary: ${state.stepsCompleted} iterations, ${state.totalToolCalls} tool calls ` +
362
- `(${state.totalToolErrors} errors), ${state.totalInputTokens} input tokens, ` +
363
- `${state.totalOutputTokens} output tokens, ${(totalTimeMs / 1000).toFixed(1)}s total\n`
364
- );
365
-
366
- if (state.exitReason === 'llm_error') {
367
- process.exit(1);
368
- }
369
- }
370
- }
371
-
372
- // Re-export for convenience — harnesses that need to build custom initial messages
373
- export { appendUserInstruction };