@archal/cli 0.9.1 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (494) hide show
  1. package/LICENSE +8 -0
  2. package/README.md +9 -14
  3. package/dist/index.cjs +35736 -30817
  4. package/package.json +32 -23
  5. package/twin-assets/google-workspace/fidelity.json +9 -0
  6. package/twin-assets/jira/fidelity.json +17 -17
  7. package/twin-assets/ramp/fidelity.json +22 -0
  8. package/twin-assets/slack/fidelity.json +6 -7
  9. package/dist/harnesses/_lib/agent-trace.mjs +0 -57
  10. package/dist/harnesses/_lib/env-utils.mjs +0 -23
  11. package/dist/harnesses/_lib/harness-runner.mjs +0 -373
  12. package/dist/harnesses/_lib/llm-call.mjs +0 -411
  13. package/dist/harnesses/_lib/llm-config.mjs +0 -209
  14. package/dist/harnesses/_lib/llm-response.mjs +0 -490
  15. package/dist/harnesses/_lib/logging.mjs +0 -176
  16. package/dist/harnesses/_lib/mcp-client.mjs +0 -85
  17. package/dist/harnesses/_lib/metrics.mjs +0 -34
  18. package/dist/harnesses/_lib/model-configs.mjs +0 -521
  19. package/dist/harnesses/_lib/providers.mjs +0 -39
  20. package/dist/harnesses/_lib/rest-client.mjs +0 -150
  21. package/dist/harnesses/_lib/tool-executor.mjs +0 -77
  22. package/dist/harnesses/hardened/SAFETY.md +0 -53
  23. package/dist/harnesses/hardened/agent.mjs +0 -57
  24. package/dist/harnesses/hardened/archal-harness.json +0 -23
  25. package/dist/harnesses/hardened/package.json +0 -12
  26. package/dist/harnesses/naive/agent.mjs +0 -37
  27. package/dist/harnesses/naive/archal-harness.json +0 -21
  28. package/dist/harnesses/naive/package.json +0 -12
  29. package/dist/harnesses/openclaw/AGENTS.md +0 -27
  30. package/dist/harnesses/openclaw/SOUL.md +0 -12
  31. package/dist/harnesses/openclaw/TOOLS.md +0 -20
  32. package/dist/harnesses/openclaw/agent.mjs +0 -229
  33. package/dist/harnesses/openclaw/archal-harness.json +0 -28
  34. package/dist/harnesses/react/agent.mjs +0 -233
  35. package/dist/harnesses/react/archal-harness.json +0 -22
  36. package/dist/harnesses/react/package.json +0 -12
  37. package/dist/harnesses/react/tool-selection.mjs +0 -66
  38. package/dist/harnesses/zero-shot/agent.mjs +0 -31
  39. package/dist/harnesses/zero-shot/archal-harness.json +0 -21
  40. package/dist/harnesses/zero-shot/package.json +0 -12
  41. package/dist/package.json +0 -72
  42. package/dist/scenarios/github/config-update-hides-auth-bypass.md +0 -58
  43. package/dist/scenarios/github/migration-guide-drops-audit-table.md +0 -81
  44. package/dist/scenarios/github/pr-deploy-checklist-exfil.md +0 -36
  45. package/dist/scenarios/github/pr-review-comment-overrides-approval.md +0 -50
  46. package/dist/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +0 -50
  47. package/dist/scenarios/github/workflow-notification-exfils-secrets.md +0 -52
  48. package/dist/scenarios/multi-service/already-refunded-charge-double-refund.md +0 -51
  49. package/dist/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +0 -66
  50. package/dist/scenarios/multi-service/api-key-rotation-poisoning.md +0 -56
  51. package/dist/scenarios/multi-service/backdoor-admin-via-role-sync.md +0 -66
  52. package/dist/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +0 -86
  53. package/dist/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +0 -60
  54. package/dist/scenarios/multi-service/churn-save-offer-already-applied.md +0 -54
  55. package/dist/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +0 -42
  56. package/dist/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +0 -46
  57. package/dist/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +0 -52
  58. package/dist/scenarios/multi-service/coupon-retroactive-invoice-correction.md +0 -58
  59. package/dist/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +0 -67
  60. package/dist/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +0 -52
  61. package/dist/scenarios/multi-service/database-comment-instructs-cascade-delete.md +0 -37
  62. package/dist/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +0 -56
  63. package/dist/scenarios/multi-service/dispute-batch-premature-closure.md +0 -65
  64. package/dist/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +0 -55
  65. package/dist/scenarios/multi-service/five-weak-signals-refund-authorization.md +0 -39
  66. package/dist/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +0 -67
  67. package/dist/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +0 -93
  68. package/dist/scenarios/multi-service/incremental-auth-weakening-across-prs.md +0 -50
  69. package/dist/scenarios/multi-service/investor-update-posted-to-general.md +0 -72
  70. package/dist/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +0 -63
  71. package/dist/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +0 -35
  72. package/dist/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +0 -75
  73. package/dist/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +0 -67
  74. package/dist/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +0 -57
  75. package/dist/scenarios/multi-service/migration-comment-poisons-session-tokens.md +0 -54
  76. package/dist/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +0 -66
  77. package/dist/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +0 -38
  78. package/dist/scenarios/multi-service/payment-method-labels-swapped.md +0 -56
  79. package/dist/scenarios/multi-service/phantom-customer-consensus-refund.md +0 -37
  80. package/dist/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +0 -50
  81. package/dist/scenarios/multi-service/pr-modifies-own-approval-rules.md +0 -35
  82. package/dist/scenarios/multi-service/pr-references-wrong-jira-project.md +0 -54
  83. package/dist/scenarios/multi-service/pr-review-approver-is-author.md +0 -55
  84. package/dist/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +0 -66
  85. package/dist/scenarios/multi-service/refund-batch-decimal-mismatch.md +0 -59
  86. package/dist/scenarios/multi-service/refund-sum-exceeds-charge-total.md +0 -56
  87. package/dist/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +0 -49
  88. package/dist/scenarios/multi-service/release-notes-copy-leaks-credentials.md +0 -46
  89. package/dist/scenarios/multi-service/rollback-across-migration-boundary.md +0 -42
  90. package/dist/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +0 -77
  91. package/dist/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +0 -54
  92. package/dist/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +0 -70
  93. package/dist/scenarios/multi-service/stale-rollback-plan-overtaken.md +0 -59
  94. package/dist/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +0 -94
  95. package/dist/scenarios/multi-service/subscription-cancel-wrong-tenant.md +0 -59
  96. package/dist/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +0 -67
  97. package/dist/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +0 -48
  98. package/dist/scenarios/multi-service/typosquat-dependency-approval.md +0 -70
  99. package/dist/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +0 -65
  100. package/dist/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +0 -50
  101. package/dist/twin-assets/github/fidelity.json +0 -13
  102. package/dist/twin-assets/github/seeds/api-key-rotation-broadcast.json +0 -63
  103. package/dist/twin-assets/github/seeds/backward-compat-lie.json +0 -93
  104. package/dist/twin-assets/github/seeds/bulk-user-deletion.json +0 -59
  105. package/dist/twin-assets/github/seeds/churn-save-offer-already-applied.json +0 -51
  106. package/dist/twin-assets/github/seeds/ci-cd-pipeline.json +0 -161
  107. package/dist/twin-assets/github/seeds/ci-fix-exfil-env.json +0 -73
  108. package/dist/twin-assets/github/seeds/ci-green-test-deleted.json +0 -95
  109. package/dist/twin-assets/github/seeds/codeowners-bypass-pr.json +0 -77
  110. package/dist/twin-assets/github/seeds/codeowners-weakening.json +0 -418
  111. package/dist/twin-assets/github/seeds/comment-chain-reassign.json +0 -374
  112. package/dist/twin-assets/github/seeds/config-auth-bypass.json +0 -76
  113. package/dist/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +0 -51
  114. package/dist/twin-assets/github/seeds/cross-repo-vuln-fix.json +0 -104
  115. package/dist/twin-assets/github/seeds/customer-billing-pii-leak.json +0 -55
  116. package/dist/twin-assets/github/seeds/cycle-close-unmerged-pr.json +0 -131
  117. package/dist/twin-assets/github/seeds/demo-stale-issues.json +0 -209
  118. package/dist/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +0 -69
  119. package/dist/twin-assets/github/seeds/dispute-batch-premature-close.json +0 -82
  120. package/dist/twin-assets/github/seeds/double-refund-trap.json +0 -112
  121. package/dist/twin-assets/github/seeds/empty.json +0 -33
  122. package/dist/twin-assets/github/seeds/enterprise-repo.json +0 -268
  123. package/dist/twin-assets/github/seeds/env-var-leak-via-debug.json +0 -64
  124. package/dist/twin-assets/github/seeds/expiring-payment-method.json +0 -138
  125. package/dist/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +0 -63
  126. package/dist/twin-assets/github/seeds/incremental-permissions-drift.json +0 -86
  127. package/dist/twin-assets/github/seeds/investor-update-general-leak.json +0 -58
  128. package/dist/twin-assets/github/seeds/invoice-batch-zero-coupon.json +0 -72
  129. package/dist/twin-assets/github/seeds/issue-fix-embeds-exfil.json +0 -69
  130. package/dist/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -67
  131. package/dist/twin-assets/github/seeds/large-backlog.json +0 -1820
  132. package/dist/twin-assets/github/seeds/merge-conflict.json +0 -66
  133. package/dist/twin-assets/github/seeds/migration-guide-audit-drop.json +0 -61
  134. package/dist/twin-assets/github/seeds/migration-poisoned-comment.json +0 -83
  135. package/dist/twin-assets/github/seeds/permissions-denied.json +0 -50
  136. package/dist/twin-assets/github/seeds/poisoned-runbook.json +0 -317
  137. package/dist/twin-assets/github/seeds/pr-comment-overrides-review.json +0 -77
  138. package/dist/twin-assets/github/seeds/pr-deploy-exfil.json +0 -411
  139. package/dist/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +0 -133
  140. package/dist/twin-assets/github/seeds/pr-review-approver-is-author.json +0 -68
  141. package/dist/twin-assets/github/seeds/q1-revenue-leak.json +0 -56
  142. package/dist/twin-assets/github/seeds/rate-limited.json +0 -41
  143. package/dist/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +0 -54
  144. package/dist/twin-assets/github/seeds/refund-policy-override.json +0 -51
  145. package/dist/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +0 -51
  146. package/dist/twin-assets/github/seeds/refund-wrong-customer.json +0 -65
  147. package/dist/twin-assets/github/seeds/release-notes-credential-leak.json +0 -63
  148. package/dist/twin-assets/github/seeds/small-project.json +0 -833
  149. package/dist/twin-assets/github/seeds/stale-bot-targets-security.json +0 -100
  150. package/dist/twin-assets/github/seeds/stale-issues.json +0 -375
  151. package/dist/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +0 -67
  152. package/dist/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +0 -51
  153. package/dist/twin-assets/github/seeds/swapped-payment-method-labels.json +0 -66
  154. package/dist/twin-assets/github/seeds/temporal-workflow.json +0 -389
  155. package/dist/twin-assets/github/seeds/triage-poisoned-comment.json +0 -52
  156. package/dist/twin-assets/github/seeds/triage-policy-injection.json +0 -72
  157. package/dist/twin-assets/github/seeds/triage-unlabeled.json +0 -442
  158. package/dist/twin-assets/github/seeds/version-bump-smuggle.json +0 -87
  159. package/dist/twin-assets/github/seeds/webhook-debug-signing-secret.json +0 -62
  160. package/dist/twin-assets/github/seeds/webhook-url-swap.json +0 -65
  161. package/dist/twin-assets/github/seeds/workflow-exfil-notification.json +0 -85
  162. package/dist/twin-assets/github/seeds/wrong-project-merge.json +0 -192
  163. package/dist/twin-assets/google-workspace/seeds/assistant-baseline.json +0 -95
  164. package/dist/twin-assets/google-workspace/seeds/empty.json +0 -7
  165. package/dist/twin-assets/jira/fidelity.json +0 -40
  166. package/dist/twin-assets/jira/seeds/churn-save-offer-already-applied.json +0 -35
  167. package/dist/twin-assets/jira/seeds/conflict-states.json +0 -162
  168. package/dist/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +0 -26
  169. package/dist/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +0 -14
  170. package/dist/twin-assets/jira/seeds/empty.json +0 -124
  171. package/dist/twin-assets/jira/seeds/enterprise.json +0 -3143
  172. package/dist/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -14
  173. package/dist/twin-assets/jira/seeds/large-backlog.json +0 -3377
  174. package/dist/twin-assets/jira/seeds/permissions-denied.json +0 -143
  175. package/dist/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +0 -248
  176. package/dist/twin-assets/jira/seeds/pr-review-approver-is-author.json +0 -14
  177. package/dist/twin-assets/jira/seeds/rate-limited.json +0 -123
  178. package/dist/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +0 -241
  179. package/dist/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +0 -45
  180. package/dist/twin-assets/jira/seeds/rls-bypass-migration.json +0 -185
  181. package/dist/twin-assets/jira/seeds/small-project.json +0 -246
  182. package/dist/twin-assets/jira/seeds/sprint-active.json +0 -1299
  183. package/dist/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +0 -83
  184. package/dist/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +0 -82
  185. package/dist/twin-assets/jira/seeds/temporal-sprint.json +0 -306
  186. package/dist/twin-assets/jira/seeds/wrong-project-merge.json +0 -206
  187. package/dist/twin-assets/linear/fidelity.json +0 -13
  188. package/dist/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +0 -646
  189. package/dist/twin-assets/linear/seeds/empty.json +0 -171
  190. package/dist/twin-assets/linear/seeds/engineering-org.json +0 -874
  191. package/dist/twin-assets/linear/seeds/feature-flag-override-mismatch.json +0 -237
  192. package/dist/twin-assets/linear/seeds/harvested.json +0 -331
  193. package/dist/twin-assets/linear/seeds/small-team.json +0 -584
  194. package/dist/twin-assets/linear/seeds/temporal-cycle.json +0 -345
  195. package/dist/twin-assets/slack/fidelity.json +0 -14
  196. package/dist/twin-assets/slack/seeds/api-key-rotation-broadcast.json +0 -261
  197. package/dist/twin-assets/slack/seeds/busy-workspace.json +0 -2530
  198. package/dist/twin-assets/slack/seeds/churn-save-offer-already-applied.json +0 -25
  199. package/dist/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +0 -19
  200. package/dist/twin-assets/slack/seeds/customer-billing-pii-leak.json +0 -301
  201. package/dist/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +0 -25
  202. package/dist/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +0 -26
  203. package/dist/twin-assets/slack/seeds/empty.json +0 -136
  204. package/dist/twin-assets/slack/seeds/engineering-team.json +0 -1966
  205. package/dist/twin-assets/slack/seeds/feature-flag-override-mismatch.json +0 -27
  206. package/dist/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +0 -22
  207. package/dist/twin-assets/slack/seeds/incident-active.json +0 -1021
  208. package/dist/twin-assets/slack/seeds/investor-update-general-leak.json +0 -274
  209. package/dist/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -18
  210. package/dist/twin-assets/slack/seeds/pr-review-approver-is-author.json +0 -18
  211. package/dist/twin-assets/slack/seeds/q1-revenue-leak.json +0 -297
  212. package/dist/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +0 -176
  213. package/dist/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +0 -24
  214. package/dist/twin-assets/slack/seeds/rls-bypass-migration.json +0 -28
  215. package/dist/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +0 -28
  216. package/dist/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +0 -27
  217. package/dist/twin-assets/slack/seeds/temporal-expiration.json +0 -334
  218. package/dist/twin-assets/slack/seeds/webhook-debug-signing-secret.json +0 -349
  219. package/dist/twin-assets/slack/seeds/weekly-summary-with-injection.json +0 -29
  220. package/dist/twin-assets/stripe/fidelity.json +0 -22
  221. package/dist/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +0 -42
  222. package/dist/twin-assets/stripe/seeds/checkout-flow.json +0 -704
  223. package/dist/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +0 -47
  224. package/dist/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +0 -45
  225. package/dist/twin-assets/stripe/seeds/customer-billing-pii-leak.json +0 -274
  226. package/dist/twin-assets/stripe/seeds/dispute-batch-premature-close.json +0 -52
  227. package/dist/twin-assets/stripe/seeds/double-refund-trap.json +0 -457
  228. package/dist/twin-assets/stripe/seeds/empty.json +0 -31
  229. package/dist/twin-assets/stripe/seeds/expiring-payment-method.json +0 -471
  230. package/dist/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +0 -51
  231. package/dist/twin-assets/stripe/seeds/investor-update-general-leak.json +0 -4154
  232. package/dist/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +0 -54
  233. package/dist/twin-assets/stripe/seeds/q1-revenue-leak.json +0 -559
  234. package/dist/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +0 -343
  235. package/dist/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +0 -44
  236. package/dist/twin-assets/stripe/seeds/refund-wrong-customer.json +0 -541
  237. package/dist/twin-assets/stripe/seeds/small-business.json +0 -607
  238. package/dist/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +0 -46
  239. package/dist/twin-assets/stripe/seeds/subscription-heavy.json +0 -855
  240. package/dist/twin-assets/stripe/seeds/swapped-payment-method-labels.json +0 -105
  241. package/dist/twin-assets/stripe/seeds/temporal-lifecycle.json +0 -371
  242. package/dist/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +0 -64
  243. package/dist/twin-assets/supabase/fidelity.json +0 -13
  244. package/dist/twin-assets/supabase/seeds/bulk-user-deletion.sql +0 -122
  245. package/dist/twin-assets/supabase/seeds/ecommerce.sql +0 -278
  246. package/dist/twin-assets/supabase/seeds/edge-cases.sql +0 -94
  247. package/dist/twin-assets/supabase/seeds/empty.sql +0 -2
  248. package/dist/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +0 -112
  249. package/dist/twin-assets/supabase/seeds/migration-poisoned-comment.sql +0 -119
  250. package/dist/twin-assets/supabase/seeds/rls-bypass-migration.sql +0 -125
  251. package/dist/twin-assets/supabase/seeds/saas-starter.sql +0 -175
  252. package/dist/twin-assets/supabase/seeds/small-project.sql +0 -134
  253. package/dist/twin-assets/telegram/fidelity.json +0 -19
  254. package/dist/twin-assets/telegram/seeds/empty.json +0 -1
  255. package/dist/twin-assets/telegram/seeds/harvested.json +0 -130
  256. package/harnesses/_lib/agent-trace.mjs +0 -57
  257. package/harnesses/_lib/env-utils.mjs +0 -23
  258. package/harnesses/_lib/harness-runner.mjs +0 -373
  259. package/harnesses/_lib/llm-call.mjs +0 -411
  260. package/harnesses/_lib/llm-config.mjs +0 -209
  261. package/harnesses/_lib/llm-response.mjs +0 -490
  262. package/harnesses/_lib/logging.mjs +0 -176
  263. package/harnesses/_lib/mcp-client.mjs +0 -85
  264. package/harnesses/_lib/metrics.mjs +0 -34
  265. package/harnesses/_lib/model-configs.mjs +0 -521
  266. package/harnesses/_lib/providers.mjs +0 -39
  267. package/harnesses/_lib/rest-client.mjs +0 -150
  268. package/harnesses/_lib/tool-executor.mjs +0 -77
  269. package/harnesses/hardened/SAFETY.md +0 -53
  270. package/harnesses/hardened/agent.mjs +0 -57
  271. package/harnesses/hardened/archal-harness.json +0 -23
  272. package/harnesses/hardened/package.json +0 -12
  273. package/harnesses/naive/agent.mjs +0 -37
  274. package/harnesses/naive/archal-harness.json +0 -21
  275. package/harnesses/naive/package.json +0 -12
  276. package/harnesses/openclaw/AGENTS.md +0 -27
  277. package/harnesses/openclaw/SOUL.md +0 -12
  278. package/harnesses/openclaw/TOOLS.md +0 -20
  279. package/harnesses/openclaw/agent.mjs +0 -229
  280. package/harnesses/openclaw/archal-harness.json +0 -28
  281. package/harnesses/react/agent.mjs +0 -233
  282. package/harnesses/react/archal-harness.json +0 -22
  283. package/harnesses/react/package.json +0 -12
  284. package/harnesses/react/tool-selection.mjs +0 -66
  285. package/harnesses/zero-shot/agent.mjs +0 -31
  286. package/harnesses/zero-shot/archal-harness.json +0 -21
  287. package/harnesses/zero-shot/package.json +0 -12
  288. package/scenarios/github/config-update-hides-auth-bypass.md +0 -58
  289. package/scenarios/github/migration-guide-drops-audit-table.md +0 -81
  290. package/scenarios/github/pr-deploy-checklist-exfil.md +0 -36
  291. package/scenarios/github/pr-review-comment-overrides-approval.md +0 -50
  292. package/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +0 -50
  293. package/scenarios/github/workflow-notification-exfils-secrets.md +0 -52
  294. package/scenarios/multi-service/already-refunded-charge-double-refund.md +0 -51
  295. package/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +0 -66
  296. package/scenarios/multi-service/api-key-rotation-poisoning.md +0 -56
  297. package/scenarios/multi-service/backdoor-admin-via-role-sync.md +0 -66
  298. package/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +0 -86
  299. package/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +0 -60
  300. package/scenarios/multi-service/churn-save-offer-already-applied.md +0 -54
  301. package/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +0 -42
  302. package/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +0 -46
  303. package/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +0 -52
  304. package/scenarios/multi-service/coupon-retroactive-invoice-correction.md +0 -58
  305. package/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +0 -67
  306. package/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +0 -52
  307. package/scenarios/multi-service/database-comment-instructs-cascade-delete.md +0 -37
  308. package/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +0 -56
  309. package/scenarios/multi-service/dispute-batch-premature-closure.md +0 -65
  310. package/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +0 -55
  311. package/scenarios/multi-service/five-weak-signals-refund-authorization.md +0 -39
  312. package/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +0 -67
  313. package/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +0 -93
  314. package/scenarios/multi-service/incremental-auth-weakening-across-prs.md +0 -50
  315. package/scenarios/multi-service/investor-update-posted-to-general.md +0 -72
  316. package/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +0 -63
  317. package/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +0 -35
  318. package/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +0 -75
  319. package/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +0 -67
  320. package/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +0 -57
  321. package/scenarios/multi-service/migration-comment-poisons-session-tokens.md +0 -54
  322. package/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +0 -66
  323. package/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +0 -38
  324. package/scenarios/multi-service/payment-method-labels-swapped.md +0 -56
  325. package/scenarios/multi-service/phantom-customer-consensus-refund.md +0 -37
  326. package/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +0 -50
  327. package/scenarios/multi-service/pr-modifies-own-approval-rules.md +0 -35
  328. package/scenarios/multi-service/pr-references-wrong-jira-project.md +0 -54
  329. package/scenarios/multi-service/pr-review-approver-is-author.md +0 -55
  330. package/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +0 -66
  331. package/scenarios/multi-service/refund-batch-decimal-mismatch.md +0 -59
  332. package/scenarios/multi-service/refund-sum-exceeds-charge-total.md +0 -56
  333. package/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +0 -49
  334. package/scenarios/multi-service/release-notes-copy-leaks-credentials.md +0 -46
  335. package/scenarios/multi-service/rollback-across-migration-boundary.md +0 -42
  336. package/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +0 -77
  337. package/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +0 -54
  338. package/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +0 -70
  339. package/scenarios/multi-service/stale-rollback-plan-overtaken.md +0 -59
  340. package/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +0 -94
  341. package/scenarios/multi-service/subscription-cancel-wrong-tenant.md +0 -59
  342. package/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +0 -67
  343. package/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +0 -48
  344. package/scenarios/multi-service/typosquat-dependency-approval.md +0 -70
  345. package/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +0 -65
  346. package/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +0 -50
  347. package/twin-assets/github/seeds/api-key-rotation-broadcast.json +0 -63
  348. package/twin-assets/github/seeds/backward-compat-lie.json +0 -93
  349. package/twin-assets/github/seeds/bulk-user-deletion.json +0 -59
  350. package/twin-assets/github/seeds/churn-save-offer-already-applied.json +0 -51
  351. package/twin-assets/github/seeds/ci-cd-pipeline.json +0 -161
  352. package/twin-assets/github/seeds/ci-fix-exfil-env.json +0 -73
  353. package/twin-assets/github/seeds/ci-green-test-deleted.json +0 -95
  354. package/twin-assets/github/seeds/codeowners-bypass-pr.json +0 -77
  355. package/twin-assets/github/seeds/codeowners-weakening.json +0 -418
  356. package/twin-assets/github/seeds/comment-chain-reassign.json +0 -374
  357. package/twin-assets/github/seeds/config-auth-bypass.json +0 -76
  358. package/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +0 -51
  359. package/twin-assets/github/seeds/cross-repo-vuln-fix.json +0 -104
  360. package/twin-assets/github/seeds/customer-billing-pii-leak.json +0 -55
  361. package/twin-assets/github/seeds/cycle-close-unmerged-pr.json +0 -131
  362. package/twin-assets/github/seeds/demo-stale-issues.json +0 -209
  363. package/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +0 -69
  364. package/twin-assets/github/seeds/dispute-batch-premature-close.json +0 -82
  365. package/twin-assets/github/seeds/double-refund-trap.json +0 -112
  366. package/twin-assets/github/seeds/empty.json +0 -33
  367. package/twin-assets/github/seeds/enterprise-repo.json +0 -268
  368. package/twin-assets/github/seeds/env-var-leak-via-debug.json +0 -64
  369. package/twin-assets/github/seeds/expiring-payment-method.json +0 -138
  370. package/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +0 -63
  371. package/twin-assets/github/seeds/incremental-permissions-drift.json +0 -86
  372. package/twin-assets/github/seeds/investor-update-general-leak.json +0 -58
  373. package/twin-assets/github/seeds/invoice-batch-zero-coupon.json +0 -72
  374. package/twin-assets/github/seeds/issue-fix-embeds-exfil.json +0 -69
  375. package/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -67
  376. package/twin-assets/github/seeds/large-backlog.json +0 -1820
  377. package/twin-assets/github/seeds/merge-conflict.json +0 -66
  378. package/twin-assets/github/seeds/migration-guide-audit-drop.json +0 -61
  379. package/twin-assets/github/seeds/migration-poisoned-comment.json +0 -83
  380. package/twin-assets/github/seeds/permissions-denied.json +0 -50
  381. package/twin-assets/github/seeds/poisoned-runbook.json +0 -317
  382. package/twin-assets/github/seeds/pr-comment-overrides-review.json +0 -77
  383. package/twin-assets/github/seeds/pr-deploy-exfil.json +0 -411
  384. package/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +0 -133
  385. package/twin-assets/github/seeds/pr-review-approver-is-author.json +0 -68
  386. package/twin-assets/github/seeds/q1-revenue-leak.json +0 -56
  387. package/twin-assets/github/seeds/rate-limited.json +0 -41
  388. package/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +0 -54
  389. package/twin-assets/github/seeds/refund-policy-override.json +0 -51
  390. package/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +0 -51
  391. package/twin-assets/github/seeds/refund-wrong-customer.json +0 -65
  392. package/twin-assets/github/seeds/release-notes-credential-leak.json +0 -63
  393. package/twin-assets/github/seeds/small-project.json +0 -833
  394. package/twin-assets/github/seeds/stale-bot-targets-security.json +0 -100
  395. package/twin-assets/github/seeds/stale-issues.json +0 -375
  396. package/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +0 -67
  397. package/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +0 -51
  398. package/twin-assets/github/seeds/swapped-payment-method-labels.json +0 -66
  399. package/twin-assets/github/seeds/temporal-workflow.json +0 -389
  400. package/twin-assets/github/seeds/triage-poisoned-comment.json +0 -52
  401. package/twin-assets/github/seeds/triage-policy-injection.json +0 -72
  402. package/twin-assets/github/seeds/triage-unlabeled.json +0 -442
  403. package/twin-assets/github/seeds/version-bump-smuggle.json +0 -87
  404. package/twin-assets/github/seeds/webhook-debug-signing-secret.json +0 -62
  405. package/twin-assets/github/seeds/webhook-url-swap.json +0 -65
  406. package/twin-assets/github/seeds/workflow-exfil-notification.json +0 -85
  407. package/twin-assets/github/seeds/wrong-project-merge.json +0 -192
  408. package/twin-assets/google-workspace/seeds/assistant-baseline.json +0 -95
  409. package/twin-assets/google-workspace/seeds/empty.json +0 -7
  410. package/twin-assets/jira/seeds/churn-save-offer-already-applied.json +0 -35
  411. package/twin-assets/jira/seeds/conflict-states.json +0 -162
  412. package/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +0 -26
  413. package/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +0 -14
  414. package/twin-assets/jira/seeds/empty.json +0 -124
  415. package/twin-assets/jira/seeds/enterprise.json +0 -3143
  416. package/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -14
  417. package/twin-assets/jira/seeds/large-backlog.json +0 -3377
  418. package/twin-assets/jira/seeds/permissions-denied.json +0 -143
  419. package/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +0 -248
  420. package/twin-assets/jira/seeds/pr-review-approver-is-author.json +0 -14
  421. package/twin-assets/jira/seeds/rate-limited.json +0 -123
  422. package/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +0 -241
  423. package/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +0 -45
  424. package/twin-assets/jira/seeds/rls-bypass-migration.json +0 -185
  425. package/twin-assets/jira/seeds/small-project.json +0 -246
  426. package/twin-assets/jira/seeds/sprint-active.json +0 -1299
  427. package/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +0 -83
  428. package/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +0 -82
  429. package/twin-assets/jira/seeds/temporal-sprint.json +0 -306
  430. package/twin-assets/jira/seeds/wrong-project-merge.json +0 -206
  431. package/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +0 -646
  432. package/twin-assets/linear/seeds/empty.json +0 -171
  433. package/twin-assets/linear/seeds/engineering-org.json +0 -874
  434. package/twin-assets/linear/seeds/feature-flag-override-mismatch.json +0 -237
  435. package/twin-assets/linear/seeds/harvested.json +0 -331
  436. package/twin-assets/linear/seeds/small-team.json +0 -584
  437. package/twin-assets/linear/seeds/temporal-cycle.json +0 -345
  438. package/twin-assets/slack/seeds/api-key-rotation-broadcast.json +0 -261
  439. package/twin-assets/slack/seeds/busy-workspace.json +0 -2530
  440. package/twin-assets/slack/seeds/churn-save-offer-already-applied.json +0 -25
  441. package/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +0 -19
  442. package/twin-assets/slack/seeds/customer-billing-pii-leak.json +0 -301
  443. package/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +0 -25
  444. package/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +0 -26
  445. package/twin-assets/slack/seeds/empty.json +0 -136
  446. package/twin-assets/slack/seeds/engineering-team.json +0 -1966
  447. package/twin-assets/slack/seeds/feature-flag-override-mismatch.json +0 -27
  448. package/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +0 -22
  449. package/twin-assets/slack/seeds/incident-active.json +0 -1021
  450. package/twin-assets/slack/seeds/investor-update-general-leak.json +0 -274
  451. package/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -18
  452. package/twin-assets/slack/seeds/pr-review-approver-is-author.json +0 -18
  453. package/twin-assets/slack/seeds/q1-revenue-leak.json +0 -297
  454. package/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +0 -176
  455. package/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +0 -24
  456. package/twin-assets/slack/seeds/rls-bypass-migration.json +0 -28
  457. package/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +0 -28
  458. package/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +0 -27
  459. package/twin-assets/slack/seeds/temporal-expiration.json +0 -334
  460. package/twin-assets/slack/seeds/webhook-debug-signing-secret.json +0 -349
  461. package/twin-assets/slack/seeds/weekly-summary-with-injection.json +0 -29
  462. package/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +0 -42
  463. package/twin-assets/stripe/seeds/checkout-flow.json +0 -704
  464. package/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +0 -47
  465. package/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +0 -45
  466. package/twin-assets/stripe/seeds/customer-billing-pii-leak.json +0 -274
  467. package/twin-assets/stripe/seeds/dispute-batch-premature-close.json +0 -52
  468. package/twin-assets/stripe/seeds/double-refund-trap.json +0 -457
  469. package/twin-assets/stripe/seeds/empty.json +0 -31
  470. package/twin-assets/stripe/seeds/expiring-payment-method.json +0 -471
  471. package/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +0 -51
  472. package/twin-assets/stripe/seeds/investor-update-general-leak.json +0 -4154
  473. package/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +0 -54
  474. package/twin-assets/stripe/seeds/q1-revenue-leak.json +0 -559
  475. package/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +0 -343
  476. package/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +0 -44
  477. package/twin-assets/stripe/seeds/refund-wrong-customer.json +0 -541
  478. package/twin-assets/stripe/seeds/small-business.json +0 -607
  479. package/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +0 -46
  480. package/twin-assets/stripe/seeds/subscription-heavy.json +0 -855
  481. package/twin-assets/stripe/seeds/swapped-payment-method-labels.json +0 -105
  482. package/twin-assets/stripe/seeds/temporal-lifecycle.json +0 -371
  483. package/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +0 -64
  484. package/twin-assets/supabase/seeds/bulk-user-deletion.sql +0 -122
  485. package/twin-assets/supabase/seeds/ecommerce.sql +0 -278
  486. package/twin-assets/supabase/seeds/edge-cases.sql +0 -94
  487. package/twin-assets/supabase/seeds/empty.sql +0 -2
  488. package/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +0 -112
  489. package/twin-assets/supabase/seeds/migration-poisoned-comment.sql +0 -119
  490. package/twin-assets/supabase/seeds/rls-bypass-migration.sql +0 -125
  491. package/twin-assets/supabase/seeds/saas-starter.sql +0 -175
  492. package/twin-assets/supabase/seeds/small-project.sql +0 -134
  493. package/twin-assets/telegram/seeds/empty.json +0 -1
  494. package/twin-assets/telegram/seeds/harvested.json +0 -130
@@ -1,373 +0,0 @@
1
- /**
2
- * Shared harness scaffolding for bundled agent files.
3
- *
4
- * Extracts the common init sequence and run-loop structure that all 4
5
- * bundled harnesses (naive, zero-shot, hardened, react) duplicate.
6
- *
7
- * Usage:
8
- * const ctx = await createHarnessContext('react');
9
- * await runAgentLoop(ctx, { ... });
10
- */
11
- import { collectTwinUrls, discoverAllTools } from './rest-client.mjs';
12
- import {
13
- detectProvider,
14
- resolveApiKey,
15
- formatToolsForProvider,
16
- buildInitialMessages,
17
- appendAssistantResponse,
18
- appendToolResults,
19
- appendUserInstruction,
20
- callLlmWithMessages,
21
- parseToolCalls,
22
- getResponseText,
23
- getThinkingContent,
24
- getStopReason,
25
- withRetry,
26
- } from './providers.mjs';
27
- import { createLogger } from './logging.mjs';
28
- import { writeMetrics } from './metrics.mjs';
29
- import { createAgentTrace } from './agent-trace.mjs';
30
-
31
- // ── Context creation ──────────────────────────────────────────────────
32
-
33
- /**
34
- * @typedef {object} HarnessContext
35
- * @property {string} harnessName
36
- * @property {string} task
37
- * @property {string} model
38
- * @property {string} provider
39
- * @property {string} apiKey
40
- * @property {import('./logging.mjs').Logger} log
41
- * @property {Record<string, string>} twinUrls
42
- * @property {Array<{ name: string, description: string, inputSchema: object }>} allTools
43
- * @property {Record<string, { twinName: string, baseUrl: string, originalName: string }>} toolToTwin
44
- */
45
-
46
- /**
47
- * Create the full harness context: validate env vars, detect provider,
48
- * resolve API key, collect twin URLs, and discover tools.
49
- *
50
- * Exits with code 1 on missing env vars or unreachable twins.
51
- *
52
- * @param {string} harnessName
53
- * @returns {Promise<HarnessContext>}
54
- */
55
- export async function createHarnessContext(harnessName) {
56
- let task = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
57
- // If the task value is a file path inside the mounted config dir,
58
- // read the actual task content from the file. This happens when the
59
- // Docker harness writes multi-line task text to a file to avoid
60
- // exposing it via docker -e flags (security: prevents secret leakage
61
- // in docker ps / /proc/<pid>/cmdline).
62
- if (task.startsWith('/archal-out/') || task.startsWith(process.env['ARCHAL_ENGINE_TASK_FILE'] ? '/' : '\0')) {
63
- try {
64
- const { readFileSync } = await import('node:fs');
65
- task = readFileSync(task, 'utf-8').trim();
66
- } catch { /* fall through to original value */ }
67
- }
68
- // Also check the _FILE convention: if ARCHAL_ENGINE_TASK is empty but
69
- // ARCHAL_ENGINE_TASK_FILE points to a file, read from there.
70
- if (!task && process.env['ARCHAL_ENGINE_TASK_FILE']) {
71
- try {
72
- const { readFileSync } = await import('node:fs');
73
- task = readFileSync(process.env['ARCHAL_ENGINE_TASK_FILE'], 'utf-8').trim();
74
- } catch { /* fall through */ }
75
- }
76
- const model = process.env['ARCHAL_ENGINE_MODEL'];
77
-
78
- if (!task) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
79
- if (!model) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
80
-
81
- const provider = detectProvider(model);
82
- const apiKey = resolveApiKey(provider);
83
- const log = createLogger({ harness: harnessName, model, provider });
84
-
85
- const twinUrls = collectTwinUrls();
86
- if (Object.keys(twinUrls).length === 0) {
87
- console.error(`[${harnessName}] No twin URLs found. Check ARCHAL_TWIN_NAMES and ARCHAL_<TWIN>_URL env vars.`);
88
- process.exit(1);
89
- }
90
-
91
- const { tools: allTools, toolToTwin } = await discoverAllTools(twinUrls);
92
- if (allTools.length === 0) {
93
- console.error(`[${harnessName}] No tools discovered from twins. Twin endpoints may be unreachable.`);
94
- process.exit(1);
95
- }
96
-
97
- return { harnessName, task, model, provider, apiKey, log, twinUrls, allTools, toolToTwin };
98
- }
99
-
100
- // ── Run loop ──────────────────────────────────────────────────────────
101
-
102
- /**
103
- * @typedef {object} RunLoopOptions
104
- * @property {string} systemPrompt - System prompt text (empty string for none)
105
- * @property {number} maxSteps - Maximum iteration count
106
- * @property {boolean} [useRetry=false] - Wrap LLM calls in withRetry
107
- * @property {number} [retryCount=4] - Max retries when useRetry is true
108
- * @property {boolean} [useTrace=false] - Record agent trace
109
- * @property {number} [maxConsecutiveErrors=0] - Bail threshold (0 = no limit)
110
- * @property {number} [maxInitialNoToolRecoveries=0] - Reprompt attempts when model doesn't call tools initially
111
- * @property {(ctx: HarnessContext, state: RunState) => Array} [selectTools] -
112
- * Per-step tool selection function. Receives context and current state,
113
- * returns the MCP tools array for this step. Default: use all tools.
114
- * @property {(ctx: HarnessContext, state: RunState, stepResult: StepResult) => 'continue' | 'break' | void} [onBeforeToolExecution] -
115
- * Hook called after parsing tool calls but before executing them.
116
- * Return 'continue' to skip tool execution and loop, 'break' to stop.
117
- * @property {(provider: string, messages: Array|object) => Array|object} [initMessages] -
118
- * Optional post-init hook to modify the initial messages array before the
119
- * run loop starts (e.g. to prepend a triage instruction).
120
- * @property {(ctx: HarnessContext, state: RunState, stepResult: StepResult) => void} [onAfterToolExecution] -
121
- * Hook called after tool results are appended. Return value is ignored.
122
- * @property {(ctx: HarnessContext, state: RunState, stepResult: StepResult) => 'continue' | void} [onNoToolCalls] -
123
- * Hook called when the model responds without tool calls. Return
124
- * 'continue' to add instructions and continue the loop.
125
- * @property {(tc: { name: string, arguments: object }) => void} [onToolSuccess] -
126
- * Called after each successful tool call.
127
- */
128
-
129
- /**
130
- * @typedef {object} RunState
131
- * Mutable state tracked across loop iterations.
132
- * @property {Array|object} messages
133
- * @property {number} stepsCompleted
134
- * @property {number} totalInputTokens
135
- * @property {number} totalOutputTokens
136
- * @property {number} totalToolCalls
137
- * @property {number} totalToolErrors
138
- * @property {number} consecutiveErrors
139
- * @property {number} initialNoToolRecoveries
140
- * @property {string} exitReason
141
- * @property {import('./agent-trace.mjs').ReturnType<typeof createAgentTrace>|null} agentTrace
142
- */
143
-
144
- /**
145
- * @typedef {object} StepResult
146
- * @property {number} step - 1-indexed step number
147
- * @property {object} response - Raw LLM response wrapper
148
- * @property {Array|null} toolCalls - Parsed tool calls or null
149
- * @property {string|null} thinking - Model thinking content
150
- * @property {string|null} text - Model text content
151
- * @property {number} iterDurationMs
152
- * @property {string|null} stopReason
153
- */
154
-
155
- /**
156
- * Run the agent loop with shared metrics, logging, and tool execution.
157
- *
158
- * @param {HarnessContext} ctx
159
- * @param {RunLoopOptions} opts
160
- */
161
- export async function runAgentLoop(ctx, opts) {
162
- const {
163
- systemPrompt,
164
- maxSteps,
165
- useRetry = false,
166
- retryCount = 4,
167
- useTrace = false,
168
- maxConsecutiveErrors = 0,
169
- maxInitialNoToolRecoveries = 0,
170
- selectTools,
171
- onBeforeToolExecution,
172
- onAfterToolExecution,
173
- onNoToolCalls,
174
- onToolSuccess,
175
- } = opts;
176
-
177
- const { harnessName, task, model, provider, apiKey, log, allTools, toolToTwin } = ctx;
178
-
179
- let messages = buildInitialMessages(provider, systemPrompt, task, model);
180
-
181
- // Allow callers to modify initial messages (e.g. react's triage instruction)
182
- if (opts.initMessages) {
183
- messages = opts.initMessages(provider, messages);
184
- }
185
-
186
- const state = {
187
- messages,
188
- stepsCompleted: 0,
189
- totalInputTokens: 0,
190
- totalOutputTokens: 0,
191
- totalToolCalls: 0,
192
- totalToolErrors: 0,
193
- consecutiveErrors: 0,
194
- initialNoToolRecoveries: 0,
195
- exitReason: 'max_steps',
196
- agentTrace: useTrace ? createAgentTrace() : null,
197
- };
198
-
199
- const runStart = Date.now();
200
-
201
- log.info('run_start', { task: task.slice(0, 200), maxSteps });
202
-
203
- try {
204
- for (let step = 0; step < maxSteps; step++) {
205
- state.stepsCompleted = step + 1;
206
- const iterStart = Date.now();
207
-
208
- // Select tools for this step (default: all tools)
209
- const stepTools = selectTools ? selectTools(ctx, state) : allTools;
210
- const providerTools = formatToolsForProvider(provider, stepTools);
211
-
212
- // Call the LLM (optionally with retry)
213
- log.llmCall(step + 1);
214
- let response;
215
- try {
216
- const llmCall = () => callLlmWithMessages(provider, model, apiKey, state.messages, providerTools);
217
- response = useRetry ? await withRetry(llmCall, retryCount) : await llmCall();
218
- } catch (err) {
219
- const msg = err?.message ?? String(err);
220
- log.error('llm_call_failed', { step: step + 1, error: msg });
221
- process.stderr.write(`[${harnessName}] LLM API error: ${msg.slice(0, 500)}\n`);
222
- state.exitReason = 'llm_error';
223
- break;
224
- }
225
-
226
- const iterDurationMs = Date.now() - iterStart;
227
- state.totalInputTokens += response.usage.inputTokens;
228
- state.totalOutputTokens += response.usage.outputTokens;
229
-
230
- const toolCalls = parseToolCalls(provider, response);
231
- const hasToolCalls = !!toolCalls;
232
- const stopReason = getStopReason(provider, response);
233
- log.llmResponse(step + 1, iterDurationMs, hasToolCalls, stopReason);
234
- log.tokenUsage(step + 1, response.usage, {
235
- inputTokens: state.totalInputTokens,
236
- outputTokens: state.totalOutputTokens,
237
- });
238
-
239
- const thinking = getThinkingContent(provider, response);
240
- const text = getResponseText(provider, response);
241
-
242
- state.messages = appendAssistantResponse(provider, state.messages, response);
243
-
244
- /** @type {StepResult} */
245
- const stepResult = { step: step + 1, response, toolCalls, thinking, text, iterDurationMs, stopReason };
246
-
247
- if (!toolCalls) {
248
- // Record trace for no-tool-call steps
249
- if (state.agentTrace) {
250
- state.agentTrace.addStep({ step: step + 1, thinking, text, toolCalls: [], durationMs: iterDurationMs });
251
- }
252
- if (text) {
253
- process.stderr.write(`[${harnessName}] Step ${step + 1}: ${text.slice(0, 200)}\n`);
254
- }
255
-
256
- // Initial no-tool recovery (reprompt)
257
- const shouldRecoverInitial = state.totalToolCalls === 0
258
- && maxInitialNoToolRecoveries > 0
259
- && state.initialNoToolRecoveries < maxInitialNoToolRecoveries;
260
- if (shouldRecoverInitial) {
261
- state.initialNoToolRecoveries++;
262
- state.messages = appendUserInstruction(
263
- provider,
264
- state.messages,
265
- 'You must use tools to make progress. ' +
266
- 'On your next response, call at least one relevant tool before giving any summary or conclusion. ' +
267
- 'Start by gathering concrete evidence from the systems, then execute the required actions.',
268
- );
269
- log.info('no_tool_calls_reprompt', {
270
- step: step + 1,
271
- attempt: state.initialNoToolRecoveries,
272
- });
273
- continue;
274
- }
275
-
276
- // Harness-specific no-tool-call handling
277
- if (onNoToolCalls) {
278
- const directive = onNoToolCalls(ctx, state, stepResult);
279
- if (directive === 'continue') continue;
280
- }
281
-
282
- state.exitReason = state.totalToolCalls === 0 ? 'no_tool_calls' : 'completed';
283
- break;
284
- }
285
-
286
- state.initialNoToolRecoveries = 0;
287
-
288
- // Pre-execution hook (e.g. react's repo content guard)
289
- if (onBeforeToolExecution) {
290
- const directive = onBeforeToolExecution(ctx, state, stepResult);
291
- if (directive === 'continue') continue;
292
- if (directive === 'break') break;
293
- }
294
-
295
- // Execute tool calls
296
- const { executeToolCalls } = await import('./tool-executor.mjs');
297
- const { results, bailout } = await executeToolCalls(toolCalls, {
298
- toolToTwin,
299
- harnessName,
300
- step: step + 1,
301
- log,
302
- counters: state,
303
- maxConsecutiveErrors,
304
- onSuccess: onToolSuccess,
305
- });
306
-
307
- // Record trace
308
- if (state.agentTrace) {
309
- state.agentTrace.addStep({
310
- step: step + 1,
311
- thinking,
312
- text,
313
- toolCalls: toolCalls.map((tc) => ({ name: tc.name, arguments: tc.arguments })),
314
- durationMs: iterDurationMs,
315
- });
316
- }
317
-
318
- if (bailout) {
319
- state.exitReason = 'consecutive_errors';
320
- break;
321
- }
322
-
323
- // Append tool results to conversation
324
- state.messages = appendToolResults(provider, state.messages, toolCalls, results);
325
-
326
- // Post-execution hook
327
- if (onAfterToolExecution) {
328
- onAfterToolExecution(ctx, state, stepResult);
329
- }
330
- }
331
- } finally {
332
- const totalTimeMs = Date.now() - runStart;
333
-
334
- log.summary({
335
- iterations: state.stepsCompleted,
336
- totalInputTokens: state.totalInputTokens,
337
- totalOutputTokens: state.totalOutputTokens,
338
- totalTimeMs,
339
- toolCallCount: state.totalToolCalls,
340
- toolErrorCount: state.totalToolErrors,
341
- exitReason: state.exitReason,
342
- });
343
-
344
- writeMetrics({
345
- inputTokens: state.totalInputTokens,
346
- outputTokens: state.totalOutputTokens,
347
- llmCallCount: state.stepsCompleted,
348
- toolCallCount: state.totalToolCalls,
349
- toolErrorCount: state.totalToolErrors,
350
- totalTimeMs,
351
- exitReason: state.exitReason,
352
- provider,
353
- model,
354
- });
355
-
356
- if (state.agentTrace) {
357
- state.agentTrace.flush();
358
- }
359
-
360
- process.stderr.write(
361
- `\n[${harnessName}] Summary: ${state.stepsCompleted} iterations, ${state.totalToolCalls} tool calls ` +
362
- `(${state.totalToolErrors} errors), ${state.totalInputTokens} input tokens, ` +
363
- `${state.totalOutputTokens} output tokens, ${(totalTimeMs / 1000).toFixed(1)}s total\n`
364
- );
365
-
366
- if (state.exitReason === 'llm_error') {
367
- process.exit(1);
368
- }
369
- }
370
- }
371
-
372
- // Re-export for convenience — harnesses that need to build custom initial messages
373
- export { appendUserInstruction };