@archal/cli 0.9.1 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (494) hide show
  1. package/LICENSE +8 -0
  2. package/README.md +9 -14
  3. package/dist/index.cjs +35736 -30817
  4. package/package.json +32 -23
  5. package/twin-assets/google-workspace/fidelity.json +9 -0
  6. package/twin-assets/jira/fidelity.json +17 -17
  7. package/twin-assets/ramp/fidelity.json +22 -0
  8. package/twin-assets/slack/fidelity.json +6 -7
  9. package/dist/harnesses/_lib/agent-trace.mjs +0 -57
  10. package/dist/harnesses/_lib/env-utils.mjs +0 -23
  11. package/dist/harnesses/_lib/harness-runner.mjs +0 -373
  12. package/dist/harnesses/_lib/llm-call.mjs +0 -411
  13. package/dist/harnesses/_lib/llm-config.mjs +0 -209
  14. package/dist/harnesses/_lib/llm-response.mjs +0 -490
  15. package/dist/harnesses/_lib/logging.mjs +0 -176
  16. package/dist/harnesses/_lib/mcp-client.mjs +0 -85
  17. package/dist/harnesses/_lib/metrics.mjs +0 -34
  18. package/dist/harnesses/_lib/model-configs.mjs +0 -521
  19. package/dist/harnesses/_lib/providers.mjs +0 -39
  20. package/dist/harnesses/_lib/rest-client.mjs +0 -150
  21. package/dist/harnesses/_lib/tool-executor.mjs +0 -77
  22. package/dist/harnesses/hardened/SAFETY.md +0 -53
  23. package/dist/harnesses/hardened/agent.mjs +0 -57
  24. package/dist/harnesses/hardened/archal-harness.json +0 -23
  25. package/dist/harnesses/hardened/package.json +0 -12
  26. package/dist/harnesses/naive/agent.mjs +0 -37
  27. package/dist/harnesses/naive/archal-harness.json +0 -21
  28. package/dist/harnesses/naive/package.json +0 -12
  29. package/dist/harnesses/openclaw/AGENTS.md +0 -27
  30. package/dist/harnesses/openclaw/SOUL.md +0 -12
  31. package/dist/harnesses/openclaw/TOOLS.md +0 -20
  32. package/dist/harnesses/openclaw/agent.mjs +0 -229
  33. package/dist/harnesses/openclaw/archal-harness.json +0 -28
  34. package/dist/harnesses/react/agent.mjs +0 -233
  35. package/dist/harnesses/react/archal-harness.json +0 -22
  36. package/dist/harnesses/react/package.json +0 -12
  37. package/dist/harnesses/react/tool-selection.mjs +0 -66
  38. package/dist/harnesses/zero-shot/agent.mjs +0 -31
  39. package/dist/harnesses/zero-shot/archal-harness.json +0 -21
  40. package/dist/harnesses/zero-shot/package.json +0 -12
  41. package/dist/package.json +0 -72
  42. package/dist/scenarios/github/config-update-hides-auth-bypass.md +0 -58
  43. package/dist/scenarios/github/migration-guide-drops-audit-table.md +0 -81
  44. package/dist/scenarios/github/pr-deploy-checklist-exfil.md +0 -36
  45. package/dist/scenarios/github/pr-review-comment-overrides-approval.md +0 -50
  46. package/dist/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +0 -50
  47. package/dist/scenarios/github/workflow-notification-exfils-secrets.md +0 -52
  48. package/dist/scenarios/multi-service/already-refunded-charge-double-refund.md +0 -51
  49. package/dist/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +0 -66
  50. package/dist/scenarios/multi-service/api-key-rotation-poisoning.md +0 -56
  51. package/dist/scenarios/multi-service/backdoor-admin-via-role-sync.md +0 -66
  52. package/dist/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +0 -86
  53. package/dist/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +0 -60
  54. package/dist/scenarios/multi-service/churn-save-offer-already-applied.md +0 -54
  55. package/dist/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +0 -42
  56. package/dist/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +0 -46
  57. package/dist/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +0 -52
  58. package/dist/scenarios/multi-service/coupon-retroactive-invoice-correction.md +0 -58
  59. package/dist/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +0 -67
  60. package/dist/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +0 -52
  61. package/dist/scenarios/multi-service/database-comment-instructs-cascade-delete.md +0 -37
  62. package/dist/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +0 -56
  63. package/dist/scenarios/multi-service/dispute-batch-premature-closure.md +0 -65
  64. package/dist/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +0 -55
  65. package/dist/scenarios/multi-service/five-weak-signals-refund-authorization.md +0 -39
  66. package/dist/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +0 -67
  67. package/dist/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +0 -93
  68. package/dist/scenarios/multi-service/incremental-auth-weakening-across-prs.md +0 -50
  69. package/dist/scenarios/multi-service/investor-update-posted-to-general.md +0 -72
  70. package/dist/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +0 -63
  71. package/dist/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +0 -35
  72. package/dist/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +0 -75
  73. package/dist/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +0 -67
  74. package/dist/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +0 -57
  75. package/dist/scenarios/multi-service/migration-comment-poisons-session-tokens.md +0 -54
  76. package/dist/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +0 -66
  77. package/dist/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +0 -38
  78. package/dist/scenarios/multi-service/payment-method-labels-swapped.md +0 -56
  79. package/dist/scenarios/multi-service/phantom-customer-consensus-refund.md +0 -37
  80. package/dist/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +0 -50
  81. package/dist/scenarios/multi-service/pr-modifies-own-approval-rules.md +0 -35
  82. package/dist/scenarios/multi-service/pr-references-wrong-jira-project.md +0 -54
  83. package/dist/scenarios/multi-service/pr-review-approver-is-author.md +0 -55
  84. package/dist/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +0 -66
  85. package/dist/scenarios/multi-service/refund-batch-decimal-mismatch.md +0 -59
  86. package/dist/scenarios/multi-service/refund-sum-exceeds-charge-total.md +0 -56
  87. package/dist/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +0 -49
  88. package/dist/scenarios/multi-service/release-notes-copy-leaks-credentials.md +0 -46
  89. package/dist/scenarios/multi-service/rollback-across-migration-boundary.md +0 -42
  90. package/dist/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +0 -77
  91. package/dist/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +0 -54
  92. package/dist/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +0 -70
  93. package/dist/scenarios/multi-service/stale-rollback-plan-overtaken.md +0 -59
  94. package/dist/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +0 -94
  95. package/dist/scenarios/multi-service/subscription-cancel-wrong-tenant.md +0 -59
  96. package/dist/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +0 -67
  97. package/dist/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +0 -48
  98. package/dist/scenarios/multi-service/typosquat-dependency-approval.md +0 -70
  99. package/dist/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +0 -65
  100. package/dist/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +0 -50
  101. package/dist/twin-assets/github/fidelity.json +0 -13
  102. package/dist/twin-assets/github/seeds/api-key-rotation-broadcast.json +0 -63
  103. package/dist/twin-assets/github/seeds/backward-compat-lie.json +0 -93
  104. package/dist/twin-assets/github/seeds/bulk-user-deletion.json +0 -59
  105. package/dist/twin-assets/github/seeds/churn-save-offer-already-applied.json +0 -51
  106. package/dist/twin-assets/github/seeds/ci-cd-pipeline.json +0 -161
  107. package/dist/twin-assets/github/seeds/ci-fix-exfil-env.json +0 -73
  108. package/dist/twin-assets/github/seeds/ci-green-test-deleted.json +0 -95
  109. package/dist/twin-assets/github/seeds/codeowners-bypass-pr.json +0 -77
  110. package/dist/twin-assets/github/seeds/codeowners-weakening.json +0 -418
  111. package/dist/twin-assets/github/seeds/comment-chain-reassign.json +0 -374
  112. package/dist/twin-assets/github/seeds/config-auth-bypass.json +0 -76
  113. package/dist/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +0 -51
  114. package/dist/twin-assets/github/seeds/cross-repo-vuln-fix.json +0 -104
  115. package/dist/twin-assets/github/seeds/customer-billing-pii-leak.json +0 -55
  116. package/dist/twin-assets/github/seeds/cycle-close-unmerged-pr.json +0 -131
  117. package/dist/twin-assets/github/seeds/demo-stale-issues.json +0 -209
  118. package/dist/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +0 -69
  119. package/dist/twin-assets/github/seeds/dispute-batch-premature-close.json +0 -82
  120. package/dist/twin-assets/github/seeds/double-refund-trap.json +0 -112
  121. package/dist/twin-assets/github/seeds/empty.json +0 -33
  122. package/dist/twin-assets/github/seeds/enterprise-repo.json +0 -268
  123. package/dist/twin-assets/github/seeds/env-var-leak-via-debug.json +0 -64
  124. package/dist/twin-assets/github/seeds/expiring-payment-method.json +0 -138
  125. package/dist/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +0 -63
  126. package/dist/twin-assets/github/seeds/incremental-permissions-drift.json +0 -86
  127. package/dist/twin-assets/github/seeds/investor-update-general-leak.json +0 -58
  128. package/dist/twin-assets/github/seeds/invoice-batch-zero-coupon.json +0 -72
  129. package/dist/twin-assets/github/seeds/issue-fix-embeds-exfil.json +0 -69
  130. package/dist/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -67
  131. package/dist/twin-assets/github/seeds/large-backlog.json +0 -1820
  132. package/dist/twin-assets/github/seeds/merge-conflict.json +0 -66
  133. package/dist/twin-assets/github/seeds/migration-guide-audit-drop.json +0 -61
  134. package/dist/twin-assets/github/seeds/migration-poisoned-comment.json +0 -83
  135. package/dist/twin-assets/github/seeds/permissions-denied.json +0 -50
  136. package/dist/twin-assets/github/seeds/poisoned-runbook.json +0 -317
  137. package/dist/twin-assets/github/seeds/pr-comment-overrides-review.json +0 -77
  138. package/dist/twin-assets/github/seeds/pr-deploy-exfil.json +0 -411
  139. package/dist/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +0 -133
  140. package/dist/twin-assets/github/seeds/pr-review-approver-is-author.json +0 -68
  141. package/dist/twin-assets/github/seeds/q1-revenue-leak.json +0 -56
  142. package/dist/twin-assets/github/seeds/rate-limited.json +0 -41
  143. package/dist/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +0 -54
  144. package/dist/twin-assets/github/seeds/refund-policy-override.json +0 -51
  145. package/dist/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +0 -51
  146. package/dist/twin-assets/github/seeds/refund-wrong-customer.json +0 -65
  147. package/dist/twin-assets/github/seeds/release-notes-credential-leak.json +0 -63
  148. package/dist/twin-assets/github/seeds/small-project.json +0 -833
  149. package/dist/twin-assets/github/seeds/stale-bot-targets-security.json +0 -100
  150. package/dist/twin-assets/github/seeds/stale-issues.json +0 -375
  151. package/dist/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +0 -67
  152. package/dist/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +0 -51
  153. package/dist/twin-assets/github/seeds/swapped-payment-method-labels.json +0 -66
  154. package/dist/twin-assets/github/seeds/temporal-workflow.json +0 -389
  155. package/dist/twin-assets/github/seeds/triage-poisoned-comment.json +0 -52
  156. package/dist/twin-assets/github/seeds/triage-policy-injection.json +0 -72
  157. package/dist/twin-assets/github/seeds/triage-unlabeled.json +0 -442
  158. package/dist/twin-assets/github/seeds/version-bump-smuggle.json +0 -87
  159. package/dist/twin-assets/github/seeds/webhook-debug-signing-secret.json +0 -62
  160. package/dist/twin-assets/github/seeds/webhook-url-swap.json +0 -65
  161. package/dist/twin-assets/github/seeds/workflow-exfil-notification.json +0 -85
  162. package/dist/twin-assets/github/seeds/wrong-project-merge.json +0 -192
  163. package/dist/twin-assets/google-workspace/seeds/assistant-baseline.json +0 -95
  164. package/dist/twin-assets/google-workspace/seeds/empty.json +0 -7
  165. package/dist/twin-assets/jira/fidelity.json +0 -40
  166. package/dist/twin-assets/jira/seeds/churn-save-offer-already-applied.json +0 -35
  167. package/dist/twin-assets/jira/seeds/conflict-states.json +0 -162
  168. package/dist/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +0 -26
  169. package/dist/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +0 -14
  170. package/dist/twin-assets/jira/seeds/empty.json +0 -124
  171. package/dist/twin-assets/jira/seeds/enterprise.json +0 -3143
  172. package/dist/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -14
  173. package/dist/twin-assets/jira/seeds/large-backlog.json +0 -3377
  174. package/dist/twin-assets/jira/seeds/permissions-denied.json +0 -143
  175. package/dist/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +0 -248
  176. package/dist/twin-assets/jira/seeds/pr-review-approver-is-author.json +0 -14
  177. package/dist/twin-assets/jira/seeds/rate-limited.json +0 -123
  178. package/dist/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +0 -241
  179. package/dist/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +0 -45
  180. package/dist/twin-assets/jira/seeds/rls-bypass-migration.json +0 -185
  181. package/dist/twin-assets/jira/seeds/small-project.json +0 -246
  182. package/dist/twin-assets/jira/seeds/sprint-active.json +0 -1299
  183. package/dist/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +0 -83
  184. package/dist/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +0 -82
  185. package/dist/twin-assets/jira/seeds/temporal-sprint.json +0 -306
  186. package/dist/twin-assets/jira/seeds/wrong-project-merge.json +0 -206
  187. package/dist/twin-assets/linear/fidelity.json +0 -13
  188. package/dist/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +0 -646
  189. package/dist/twin-assets/linear/seeds/empty.json +0 -171
  190. package/dist/twin-assets/linear/seeds/engineering-org.json +0 -874
  191. package/dist/twin-assets/linear/seeds/feature-flag-override-mismatch.json +0 -237
  192. package/dist/twin-assets/linear/seeds/harvested.json +0 -331
  193. package/dist/twin-assets/linear/seeds/small-team.json +0 -584
  194. package/dist/twin-assets/linear/seeds/temporal-cycle.json +0 -345
  195. package/dist/twin-assets/slack/fidelity.json +0 -14
  196. package/dist/twin-assets/slack/seeds/api-key-rotation-broadcast.json +0 -261
  197. package/dist/twin-assets/slack/seeds/busy-workspace.json +0 -2530
  198. package/dist/twin-assets/slack/seeds/churn-save-offer-already-applied.json +0 -25
  199. package/dist/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +0 -19
  200. package/dist/twin-assets/slack/seeds/customer-billing-pii-leak.json +0 -301
  201. package/dist/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +0 -25
  202. package/dist/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +0 -26
  203. package/dist/twin-assets/slack/seeds/empty.json +0 -136
  204. package/dist/twin-assets/slack/seeds/engineering-team.json +0 -1966
  205. package/dist/twin-assets/slack/seeds/feature-flag-override-mismatch.json +0 -27
  206. package/dist/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +0 -22
  207. package/dist/twin-assets/slack/seeds/incident-active.json +0 -1021
  208. package/dist/twin-assets/slack/seeds/investor-update-general-leak.json +0 -274
  209. package/dist/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -18
  210. package/dist/twin-assets/slack/seeds/pr-review-approver-is-author.json +0 -18
  211. package/dist/twin-assets/slack/seeds/q1-revenue-leak.json +0 -297
  212. package/dist/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +0 -176
  213. package/dist/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +0 -24
  214. package/dist/twin-assets/slack/seeds/rls-bypass-migration.json +0 -28
  215. package/dist/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +0 -28
  216. package/dist/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +0 -27
  217. package/dist/twin-assets/slack/seeds/temporal-expiration.json +0 -334
  218. package/dist/twin-assets/slack/seeds/webhook-debug-signing-secret.json +0 -349
  219. package/dist/twin-assets/slack/seeds/weekly-summary-with-injection.json +0 -29
  220. package/dist/twin-assets/stripe/fidelity.json +0 -22
  221. package/dist/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +0 -42
  222. package/dist/twin-assets/stripe/seeds/checkout-flow.json +0 -704
  223. package/dist/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +0 -47
  224. package/dist/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +0 -45
  225. package/dist/twin-assets/stripe/seeds/customer-billing-pii-leak.json +0 -274
  226. package/dist/twin-assets/stripe/seeds/dispute-batch-premature-close.json +0 -52
  227. package/dist/twin-assets/stripe/seeds/double-refund-trap.json +0 -457
  228. package/dist/twin-assets/stripe/seeds/empty.json +0 -31
  229. package/dist/twin-assets/stripe/seeds/expiring-payment-method.json +0 -471
  230. package/dist/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +0 -51
  231. package/dist/twin-assets/stripe/seeds/investor-update-general-leak.json +0 -4154
  232. package/dist/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +0 -54
  233. package/dist/twin-assets/stripe/seeds/q1-revenue-leak.json +0 -559
  234. package/dist/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +0 -343
  235. package/dist/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +0 -44
  236. package/dist/twin-assets/stripe/seeds/refund-wrong-customer.json +0 -541
  237. package/dist/twin-assets/stripe/seeds/small-business.json +0 -607
  238. package/dist/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +0 -46
  239. package/dist/twin-assets/stripe/seeds/subscription-heavy.json +0 -855
  240. package/dist/twin-assets/stripe/seeds/swapped-payment-method-labels.json +0 -105
  241. package/dist/twin-assets/stripe/seeds/temporal-lifecycle.json +0 -371
  242. package/dist/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +0 -64
  243. package/dist/twin-assets/supabase/fidelity.json +0 -13
  244. package/dist/twin-assets/supabase/seeds/bulk-user-deletion.sql +0 -122
  245. package/dist/twin-assets/supabase/seeds/ecommerce.sql +0 -278
  246. package/dist/twin-assets/supabase/seeds/edge-cases.sql +0 -94
  247. package/dist/twin-assets/supabase/seeds/empty.sql +0 -2
  248. package/dist/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +0 -112
  249. package/dist/twin-assets/supabase/seeds/migration-poisoned-comment.sql +0 -119
  250. package/dist/twin-assets/supabase/seeds/rls-bypass-migration.sql +0 -125
  251. package/dist/twin-assets/supabase/seeds/saas-starter.sql +0 -175
  252. package/dist/twin-assets/supabase/seeds/small-project.sql +0 -134
  253. package/dist/twin-assets/telegram/fidelity.json +0 -19
  254. package/dist/twin-assets/telegram/seeds/empty.json +0 -1
  255. package/dist/twin-assets/telegram/seeds/harvested.json +0 -130
  256. package/harnesses/_lib/agent-trace.mjs +0 -57
  257. package/harnesses/_lib/env-utils.mjs +0 -23
  258. package/harnesses/_lib/harness-runner.mjs +0 -373
  259. package/harnesses/_lib/llm-call.mjs +0 -411
  260. package/harnesses/_lib/llm-config.mjs +0 -209
  261. package/harnesses/_lib/llm-response.mjs +0 -490
  262. package/harnesses/_lib/logging.mjs +0 -176
  263. package/harnesses/_lib/mcp-client.mjs +0 -85
  264. package/harnesses/_lib/metrics.mjs +0 -34
  265. package/harnesses/_lib/model-configs.mjs +0 -521
  266. package/harnesses/_lib/providers.mjs +0 -39
  267. package/harnesses/_lib/rest-client.mjs +0 -150
  268. package/harnesses/_lib/tool-executor.mjs +0 -77
  269. package/harnesses/hardened/SAFETY.md +0 -53
  270. package/harnesses/hardened/agent.mjs +0 -57
  271. package/harnesses/hardened/archal-harness.json +0 -23
  272. package/harnesses/hardened/package.json +0 -12
  273. package/harnesses/naive/agent.mjs +0 -37
  274. package/harnesses/naive/archal-harness.json +0 -21
  275. package/harnesses/naive/package.json +0 -12
  276. package/harnesses/openclaw/AGENTS.md +0 -27
  277. package/harnesses/openclaw/SOUL.md +0 -12
  278. package/harnesses/openclaw/TOOLS.md +0 -20
  279. package/harnesses/openclaw/agent.mjs +0 -229
  280. package/harnesses/openclaw/archal-harness.json +0 -28
  281. package/harnesses/react/agent.mjs +0 -233
  282. package/harnesses/react/archal-harness.json +0 -22
  283. package/harnesses/react/package.json +0 -12
  284. package/harnesses/react/tool-selection.mjs +0 -66
  285. package/harnesses/zero-shot/agent.mjs +0 -31
  286. package/harnesses/zero-shot/archal-harness.json +0 -21
  287. package/harnesses/zero-shot/package.json +0 -12
  288. package/scenarios/github/config-update-hides-auth-bypass.md +0 -58
  289. package/scenarios/github/migration-guide-drops-audit-table.md +0 -81
  290. package/scenarios/github/pr-deploy-checklist-exfil.md +0 -36
  291. package/scenarios/github/pr-review-comment-overrides-approval.md +0 -50
  292. package/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +0 -50
  293. package/scenarios/github/workflow-notification-exfils-secrets.md +0 -52
  294. package/scenarios/multi-service/already-refunded-charge-double-refund.md +0 -51
  295. package/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +0 -66
  296. package/scenarios/multi-service/api-key-rotation-poisoning.md +0 -56
  297. package/scenarios/multi-service/backdoor-admin-via-role-sync.md +0 -66
  298. package/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +0 -86
  299. package/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +0 -60
  300. package/scenarios/multi-service/churn-save-offer-already-applied.md +0 -54
  301. package/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +0 -42
  302. package/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +0 -46
  303. package/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +0 -52
  304. package/scenarios/multi-service/coupon-retroactive-invoice-correction.md +0 -58
  305. package/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +0 -67
  306. package/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +0 -52
  307. package/scenarios/multi-service/database-comment-instructs-cascade-delete.md +0 -37
  308. package/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +0 -56
  309. package/scenarios/multi-service/dispute-batch-premature-closure.md +0 -65
  310. package/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +0 -55
  311. package/scenarios/multi-service/five-weak-signals-refund-authorization.md +0 -39
  312. package/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +0 -67
  313. package/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +0 -93
  314. package/scenarios/multi-service/incremental-auth-weakening-across-prs.md +0 -50
  315. package/scenarios/multi-service/investor-update-posted-to-general.md +0 -72
  316. package/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +0 -63
  317. package/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +0 -35
  318. package/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +0 -75
  319. package/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +0 -67
  320. package/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +0 -57
  321. package/scenarios/multi-service/migration-comment-poisons-session-tokens.md +0 -54
  322. package/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +0 -66
  323. package/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +0 -38
  324. package/scenarios/multi-service/payment-method-labels-swapped.md +0 -56
  325. package/scenarios/multi-service/phantom-customer-consensus-refund.md +0 -37
  326. package/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +0 -50
  327. package/scenarios/multi-service/pr-modifies-own-approval-rules.md +0 -35
  328. package/scenarios/multi-service/pr-references-wrong-jira-project.md +0 -54
  329. package/scenarios/multi-service/pr-review-approver-is-author.md +0 -55
  330. package/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +0 -66
  331. package/scenarios/multi-service/refund-batch-decimal-mismatch.md +0 -59
  332. package/scenarios/multi-service/refund-sum-exceeds-charge-total.md +0 -56
  333. package/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +0 -49
  334. package/scenarios/multi-service/release-notes-copy-leaks-credentials.md +0 -46
  335. package/scenarios/multi-service/rollback-across-migration-boundary.md +0 -42
  336. package/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +0 -77
  337. package/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +0 -54
  338. package/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +0 -70
  339. package/scenarios/multi-service/stale-rollback-plan-overtaken.md +0 -59
  340. package/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +0 -94
  341. package/scenarios/multi-service/subscription-cancel-wrong-tenant.md +0 -59
  342. package/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +0 -67
  343. package/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +0 -48
  344. package/scenarios/multi-service/typosquat-dependency-approval.md +0 -70
  345. package/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +0 -65
  346. package/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +0 -50
  347. package/twin-assets/github/seeds/api-key-rotation-broadcast.json +0 -63
  348. package/twin-assets/github/seeds/backward-compat-lie.json +0 -93
  349. package/twin-assets/github/seeds/bulk-user-deletion.json +0 -59
  350. package/twin-assets/github/seeds/churn-save-offer-already-applied.json +0 -51
  351. package/twin-assets/github/seeds/ci-cd-pipeline.json +0 -161
  352. package/twin-assets/github/seeds/ci-fix-exfil-env.json +0 -73
  353. package/twin-assets/github/seeds/ci-green-test-deleted.json +0 -95
  354. package/twin-assets/github/seeds/codeowners-bypass-pr.json +0 -77
  355. package/twin-assets/github/seeds/codeowners-weakening.json +0 -418
  356. package/twin-assets/github/seeds/comment-chain-reassign.json +0 -374
  357. package/twin-assets/github/seeds/config-auth-bypass.json +0 -76
  358. package/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +0 -51
  359. package/twin-assets/github/seeds/cross-repo-vuln-fix.json +0 -104
  360. package/twin-assets/github/seeds/customer-billing-pii-leak.json +0 -55
  361. package/twin-assets/github/seeds/cycle-close-unmerged-pr.json +0 -131
  362. package/twin-assets/github/seeds/demo-stale-issues.json +0 -209
  363. package/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +0 -69
  364. package/twin-assets/github/seeds/dispute-batch-premature-close.json +0 -82
  365. package/twin-assets/github/seeds/double-refund-trap.json +0 -112
  366. package/twin-assets/github/seeds/empty.json +0 -33
  367. package/twin-assets/github/seeds/enterprise-repo.json +0 -268
  368. package/twin-assets/github/seeds/env-var-leak-via-debug.json +0 -64
  369. package/twin-assets/github/seeds/expiring-payment-method.json +0 -138
  370. package/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +0 -63
  371. package/twin-assets/github/seeds/incremental-permissions-drift.json +0 -86
  372. package/twin-assets/github/seeds/investor-update-general-leak.json +0 -58
  373. package/twin-assets/github/seeds/invoice-batch-zero-coupon.json +0 -72
  374. package/twin-assets/github/seeds/issue-fix-embeds-exfil.json +0 -69
  375. package/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -67
  376. package/twin-assets/github/seeds/large-backlog.json +0 -1820
  377. package/twin-assets/github/seeds/merge-conflict.json +0 -66
  378. package/twin-assets/github/seeds/migration-guide-audit-drop.json +0 -61
  379. package/twin-assets/github/seeds/migration-poisoned-comment.json +0 -83
  380. package/twin-assets/github/seeds/permissions-denied.json +0 -50
  381. package/twin-assets/github/seeds/poisoned-runbook.json +0 -317
  382. package/twin-assets/github/seeds/pr-comment-overrides-review.json +0 -77
  383. package/twin-assets/github/seeds/pr-deploy-exfil.json +0 -411
  384. package/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +0 -133
  385. package/twin-assets/github/seeds/pr-review-approver-is-author.json +0 -68
  386. package/twin-assets/github/seeds/q1-revenue-leak.json +0 -56
  387. package/twin-assets/github/seeds/rate-limited.json +0 -41
  388. package/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +0 -54
  389. package/twin-assets/github/seeds/refund-policy-override.json +0 -51
  390. package/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +0 -51
  391. package/twin-assets/github/seeds/refund-wrong-customer.json +0 -65
  392. package/twin-assets/github/seeds/release-notes-credential-leak.json +0 -63
  393. package/twin-assets/github/seeds/small-project.json +0 -833
  394. package/twin-assets/github/seeds/stale-bot-targets-security.json +0 -100
  395. package/twin-assets/github/seeds/stale-issues.json +0 -375
  396. package/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +0 -67
  397. package/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +0 -51
  398. package/twin-assets/github/seeds/swapped-payment-method-labels.json +0 -66
  399. package/twin-assets/github/seeds/temporal-workflow.json +0 -389
  400. package/twin-assets/github/seeds/triage-poisoned-comment.json +0 -52
  401. package/twin-assets/github/seeds/triage-policy-injection.json +0 -72
  402. package/twin-assets/github/seeds/triage-unlabeled.json +0 -442
  403. package/twin-assets/github/seeds/version-bump-smuggle.json +0 -87
  404. package/twin-assets/github/seeds/webhook-debug-signing-secret.json +0 -62
  405. package/twin-assets/github/seeds/webhook-url-swap.json +0 -65
  406. package/twin-assets/github/seeds/workflow-exfil-notification.json +0 -85
  407. package/twin-assets/github/seeds/wrong-project-merge.json +0 -192
  408. package/twin-assets/google-workspace/seeds/assistant-baseline.json +0 -95
  409. package/twin-assets/google-workspace/seeds/empty.json +0 -7
  410. package/twin-assets/jira/seeds/churn-save-offer-already-applied.json +0 -35
  411. package/twin-assets/jira/seeds/conflict-states.json +0 -162
  412. package/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +0 -26
  413. package/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +0 -14
  414. package/twin-assets/jira/seeds/empty.json +0 -124
  415. package/twin-assets/jira/seeds/enterprise.json +0 -3143
  416. package/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -14
  417. package/twin-assets/jira/seeds/large-backlog.json +0 -3377
  418. package/twin-assets/jira/seeds/permissions-denied.json +0 -143
  419. package/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +0 -248
  420. package/twin-assets/jira/seeds/pr-review-approver-is-author.json +0 -14
  421. package/twin-assets/jira/seeds/rate-limited.json +0 -123
  422. package/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +0 -241
  423. package/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +0 -45
  424. package/twin-assets/jira/seeds/rls-bypass-migration.json +0 -185
  425. package/twin-assets/jira/seeds/small-project.json +0 -246
  426. package/twin-assets/jira/seeds/sprint-active.json +0 -1299
  427. package/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +0 -83
  428. package/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +0 -82
  429. package/twin-assets/jira/seeds/temporal-sprint.json +0 -306
  430. package/twin-assets/jira/seeds/wrong-project-merge.json +0 -206
  431. package/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +0 -646
  432. package/twin-assets/linear/seeds/empty.json +0 -171
  433. package/twin-assets/linear/seeds/engineering-org.json +0 -874
  434. package/twin-assets/linear/seeds/feature-flag-override-mismatch.json +0 -237
  435. package/twin-assets/linear/seeds/harvested.json +0 -331
  436. package/twin-assets/linear/seeds/small-team.json +0 -584
  437. package/twin-assets/linear/seeds/temporal-cycle.json +0 -345
  438. package/twin-assets/slack/seeds/api-key-rotation-broadcast.json +0 -261
  439. package/twin-assets/slack/seeds/busy-workspace.json +0 -2530
  440. package/twin-assets/slack/seeds/churn-save-offer-already-applied.json +0 -25
  441. package/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +0 -19
  442. package/twin-assets/slack/seeds/customer-billing-pii-leak.json +0 -301
  443. package/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +0 -25
  444. package/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +0 -26
  445. package/twin-assets/slack/seeds/empty.json +0 -136
  446. package/twin-assets/slack/seeds/engineering-team.json +0 -1966
  447. package/twin-assets/slack/seeds/feature-flag-override-mismatch.json +0 -27
  448. package/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +0 -22
  449. package/twin-assets/slack/seeds/incident-active.json +0 -1021
  450. package/twin-assets/slack/seeds/investor-update-general-leak.json +0 -274
  451. package/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -18
  452. package/twin-assets/slack/seeds/pr-review-approver-is-author.json +0 -18
  453. package/twin-assets/slack/seeds/q1-revenue-leak.json +0 -297
  454. package/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +0 -176
  455. package/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +0 -24
  456. package/twin-assets/slack/seeds/rls-bypass-migration.json +0 -28
  457. package/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +0 -28
  458. package/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +0 -27
  459. package/twin-assets/slack/seeds/temporal-expiration.json +0 -334
  460. package/twin-assets/slack/seeds/webhook-debug-signing-secret.json +0 -349
  461. package/twin-assets/slack/seeds/weekly-summary-with-injection.json +0 -29
  462. package/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +0 -42
  463. package/twin-assets/stripe/seeds/checkout-flow.json +0 -704
  464. package/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +0 -47
  465. package/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +0 -45
  466. package/twin-assets/stripe/seeds/customer-billing-pii-leak.json +0 -274
  467. package/twin-assets/stripe/seeds/dispute-batch-premature-close.json +0 -52
  468. package/twin-assets/stripe/seeds/double-refund-trap.json +0 -457
  469. package/twin-assets/stripe/seeds/empty.json +0 -31
  470. package/twin-assets/stripe/seeds/expiring-payment-method.json +0 -471
  471. package/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +0 -51
  472. package/twin-assets/stripe/seeds/investor-update-general-leak.json +0 -4154
  473. package/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +0 -54
  474. package/twin-assets/stripe/seeds/q1-revenue-leak.json +0 -559
  475. package/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +0 -343
  476. package/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +0 -44
  477. package/twin-assets/stripe/seeds/refund-wrong-customer.json +0 -541
  478. package/twin-assets/stripe/seeds/small-business.json +0 -607
  479. package/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +0 -46
  480. package/twin-assets/stripe/seeds/subscription-heavy.json +0 -855
  481. package/twin-assets/stripe/seeds/swapped-payment-method-labels.json +0 -105
  482. package/twin-assets/stripe/seeds/temporal-lifecycle.json +0 -371
  483. package/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +0 -64
  484. package/twin-assets/supabase/seeds/bulk-user-deletion.sql +0 -122
  485. package/twin-assets/supabase/seeds/ecommerce.sql +0 -278
  486. package/twin-assets/supabase/seeds/edge-cases.sql +0 -94
  487. package/twin-assets/supabase/seeds/empty.sql +0 -2
  488. package/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +0 -112
  489. package/twin-assets/supabase/seeds/migration-poisoned-comment.sql +0 -119
  490. package/twin-assets/supabase/seeds/rls-bypass-migration.sql +0 -125
  491. package/twin-assets/supabase/seeds/saas-starter.sql +0 -175
  492. package/twin-assets/supabase/seeds/small-project.sql +0 -134
  493. package/twin-assets/telegram/seeds/empty.json +0 -1
  494. package/twin-assets/telegram/seeds/harvested.json +0 -130
@@ -1,490 +0,0 @@
1
- /**
2
- * Response parsing, message building, and tool formatting for each provider.
3
- *
4
- * Extracted from providers.mjs — pure functions, no HTTP calls.
5
- */
6
-
7
- import { getModelCapabilities } from './model-configs.mjs';
8
-
9
- // ── Token usage tracking ────────────────────────────────────────────
10
-
11
- /**
12
- * @typedef {Object} TokenUsage
13
- * @property {number} inputTokens - Input/prompt tokens used
14
- * @property {number} outputTokens - Output/completion tokens used
15
- */
16
-
17
- /**
18
- * @typedef {Object} LlmResponse
19
- * @property {object} body - The raw API response body
20
- * @property {TokenUsage} usage - Token usage for this call
21
- */
22
-
23
- /**
24
- * Extract token usage from a provider's response body.
25
- * @param {'gemini' | 'anthropic' | 'openai'} provider
26
- * @param {object} body
27
- * @returns {TokenUsage}
28
- */
29
- export function extractTokenUsage(provider, body) {
30
- switch (provider) {
31
- case 'gemini': {
32
- const meta = body.usageMetadata ?? {};
33
- return {
34
- inputTokens: meta.promptTokenCount ?? 0,
35
- outputTokens: meta.candidatesTokenCount ?? 0,
36
- };
37
- }
38
- case 'anthropic': {
39
- const usage = body.usage ?? {};
40
- return {
41
- inputTokens: usage.input_tokens ?? 0,
42
- outputTokens: usage.output_tokens ?? 0,
43
- };
44
- }
45
- case 'openai': {
46
- const usage = body.usage ?? {};
47
- return {
48
- // Responses API uses input_tokens/output_tokens; Chat Completions uses prompt/completion tokens.
49
- inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
50
- outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
51
- };
52
- }
53
- default:
54
- return { inputTokens: 0, outputTokens: 0 };
55
- }
56
- }
57
-
58
- // ── Tool formatting ─────────────────────────────────────────────────
59
-
60
- /**
61
- * Recursively strip JSON Schema keywords that LLM function-calling APIs reject.
62
- * Applied to all providers (Gemini, OpenAI, Anthropic) for consistency.
63
- * Strips: additionalProperties, $schema, propertyNames, patternProperties,
64
- * if/then/else, not, const, contentEncoding, contentMediaType, anyOf, oneOf, allOf.
65
- */
66
- const GEMINI_UNSUPPORTED_KEYWORDS = new Set([
67
- 'additionalProperties', '$schema', 'propertyNames', 'patternProperties',
68
- 'if', 'then', 'else', 'not', 'const', 'contentEncoding', 'contentMediaType',
69
- ]);
70
-
71
- function sanitizeSchemaForLLM(schema) {
72
- if (!schema || typeof schema !== 'object') return schema;
73
- if (Array.isArray(schema)) return schema.map(sanitizeSchemaForLLM);
74
-
75
- const cleaned = {};
76
- for (const [key, value] of Object.entries(schema)) {
77
- if (GEMINI_UNSUPPORTED_KEYWORDS.has(key)) continue;
78
- // Gemini doesn't support anyOf/oneOf/allOf — flatten single-element unions,
79
- // otherwise drop the keyword entirely (Gemini treats it as unknown).
80
- if (key === 'anyOf' || key === 'oneOf' || key === 'allOf') {
81
- if (Array.isArray(value) && value.length === 1) {
82
- Object.assign(cleaned, sanitizeSchemaForLLM(value[0]));
83
- }
84
- // Multi-element unions are unsupported; skip the keyword
85
- continue;
86
- }
87
- cleaned[key] = sanitizeSchemaForLLM(value);
88
- }
89
- return cleaned;
90
- }
91
-
92
- /**
93
- * Convert MCP tool schemas to the format expected by each provider.
94
- */
95
- export function formatToolsForProvider(provider, mcpTools) {
96
- switch (provider) {
97
- case 'gemini':
98
- return [{
99
- functionDeclarations: mcpTools.map((t) => ({
100
- name: t.name,
101
- description: t.description,
102
- parameters: sanitizeSchemaForLLM(t.inputSchema),
103
- })),
104
- }];
105
- case 'openai':
106
- return mcpTools.map((t) => ({
107
- type: 'function',
108
- name: t.name,
109
- description: t.description,
110
- parameters: sanitizeSchemaForLLM(t.inputSchema),
111
- }));
112
- case 'anthropic':
113
- return mcpTools.map((t) => ({
114
- name: t.name,
115
- description: t.description,
116
- input_schema: sanitizeSchemaForLLM(t.inputSchema),
117
- }));
118
- default:
119
- return mcpTools;
120
- }
121
- }
122
-
123
- // ── Response parsing ────────────────────────────────────────────────
124
-
125
- /**
126
- * Parse tool calls from the provider's response.
127
- * Returns an array of { id, name, arguments } or null if no tool calls.
128
- *
129
- * Accepts either a raw response body or an LlmResponse wrapper.
130
- */
131
- export function parseToolCalls(provider, responseOrWrapper) {
132
- const response = responseOrWrapper?.body ?? responseOrWrapper;
133
- switch (provider) {
134
- case 'gemini':
135
- return parseGeminiToolCalls(response);
136
- case 'anthropic':
137
- return parseAnthropicToolCalls(response);
138
- case 'openai':
139
- return parseOpenAiToolCalls(response);
140
- default:
141
- return parseOpenAiToolCalls(response);
142
- }
143
- }
144
-
145
- function parseGeminiToolCalls(response) {
146
- const parts = response.candidates?.[0]?.content?.parts ?? [];
147
- const calls = parts
148
- .filter((p) => p.functionCall)
149
- .map((p) => ({
150
- id: p.functionCall.name + '-' + Date.now(),
151
- name: p.functionCall.name,
152
- arguments: p.functionCall.args ?? {},
153
- }));
154
- return calls.length > 0 ? calls : null;
155
- }
156
-
157
- function parseAnthropicToolCalls(response) {
158
- const content = response.content ?? [];
159
- const calls = content
160
- .filter((c) => c.type === 'tool_use')
161
- .map((c) => ({
162
- id: c.id,
163
- name: c.name,
164
- arguments: c.input ?? {},
165
- }));
166
- return calls.length > 0 ? calls : null;
167
- }
168
-
169
- function parseOpenAiToolCalls(response) {
170
- const output = Array.isArray(response.output) ? response.output : [];
171
- const calls = [];
172
- for (const item of output) {
173
- if (item?.type !== 'function_call') continue;
174
-
175
- let parsedArguments = {};
176
- if (typeof item.arguments === 'string' && item.arguments.trim()) {
177
- try {
178
- parsedArguments = JSON.parse(item.arguments);
179
- } catch {
180
- parsedArguments = { _raw: item.arguments };
181
- }
182
- } else if (item.arguments && typeof item.arguments === 'object') {
183
- parsedArguments = item.arguments;
184
- }
185
-
186
- calls.push({
187
- id: item.call_id ?? item.id ?? `${item.name ?? 'tool'}-${Date.now()}`,
188
- name: item.name,
189
- arguments: parsedArguments,
190
- });
191
- }
192
-
193
- return calls.length > 0 ? calls : null;
194
- }
195
-
196
- /**
197
- * Get the text content from the provider's response (if any).
198
- *
199
- * Accepts either a raw response body or an LlmResponse wrapper.
200
- */
201
- export function getResponseText(provider, responseOrWrapper) {
202
- const response = responseOrWrapper?.body ?? responseOrWrapper;
203
- switch (provider) {
204
- case 'gemini': {
205
- const parts = response.candidates?.[0]?.content?.parts ?? [];
206
- // Exclude thinking parts (thought === true) — those go to getThinkingContent()
207
- const textParts = parts.filter((p) => p.text && !p.thought).map((p) => p.text);
208
- return textParts.join('') || null;
209
- }
210
- case 'anthropic': {
211
- const content = response.content ?? [];
212
- const textBlocks = content.filter((c) => c.type === 'text').map((c) => c.text);
213
- return textBlocks.join('') || null;
214
- }
215
- case 'openai': {
216
- if (typeof response.output_text === 'string' && response.output_text.trim()) {
217
- return response.output_text;
218
- }
219
- const output = Array.isArray(response.output) ? response.output : [];
220
- const chunks = [];
221
- for (const item of output) {
222
- if (item?.type === 'output_text' && typeof item.text === 'string') {
223
- chunks.push(item.text);
224
- continue;
225
- }
226
- if (item?.type !== 'message' || !Array.isArray(item.content)) continue;
227
- for (const part of item.content) {
228
- if ((part?.type === 'output_text' || part?.type === 'text') && typeof part.text === 'string') {
229
- chunks.push(part.text);
230
- }
231
- }
232
- }
233
- return chunks.join('') || null;
234
- }
235
- default:
236
- return null;
237
- }
238
- }
239
-
240
- /**
241
- * Extract thinking/reasoning content from the provider's response.
242
- * Returns the model's internal reasoning (Anthropic thinking blocks,
243
- * Gemini thinking parts) or null if none.
244
- *
245
- * @param {'gemini' | 'anthropic' | 'openai'} provider
246
- * @param {object} responseOrWrapper
247
- * @returns {string | null}
248
- */
249
- export function getThinkingContent(provider, responseOrWrapper) {
250
- const response = responseOrWrapper?.body ?? responseOrWrapper;
251
- switch (provider) {
252
- case 'anthropic': {
253
- const content = response.content ?? [];
254
- const blocks = content
255
- .filter((c) => c.type === 'thinking')
256
- .map((c) => c.thinking);
257
- return blocks.length > 0 ? blocks.join('\n') : null;
258
- }
259
- case 'openai': {
260
- const output = Array.isArray(response.output) ? response.output : [];
261
- const summaries = [];
262
- for (const item of output) {
263
- if (item?.type !== 'reasoning') continue;
264
- if (Array.isArray(item.summary)) {
265
- for (const summary of item.summary) {
266
- if (typeof summary?.text === 'string' && summary.text.trim()) {
267
- summaries.push(summary.text);
268
- }
269
- }
270
- }
271
- }
272
- return summaries.length > 0 ? summaries.join('\n') : null;
273
- }
274
- case 'gemini': {
275
- const parts = response.candidates?.[0]?.content?.parts ?? [];
276
- const thinkingParts = parts
277
- .filter((p) => p.thought === true)
278
- .map((p) => p.text);
279
- return thinkingParts.length > 0 ? thinkingParts.join('\n') : null;
280
- }
281
- default:
282
- return null;
283
- }
284
- }
285
-
286
- /**
287
- * Get the stop reason from the provider's response.
288
- * @param {'gemini' | 'anthropic' | 'openai'} provider
289
- * @param {object} responseOrWrapper
290
- * @returns {string | null}
291
- */
292
- export function getStopReason(provider, responseOrWrapper) {
293
- const response = responseOrWrapper?.body ?? responseOrWrapper;
294
- switch (provider) {
295
- case 'gemini':
296
- return response.candidates?.[0]?.finishReason ?? null;
297
- case 'anthropic':
298
- return response.stop_reason ?? null;
299
- case 'openai':
300
- return parseOpenAiToolCalls(response) ? 'tool_calls' : (response.status ?? response.incomplete_details?.reason ?? null);
301
- default:
302
- return null;
303
- }
304
- }
305
-
306
- // ── Message formatting ──────────────────────────────────────────────
307
-
308
- /**
309
- * Build the initial messages array with system prompt and task for the provider.
310
- * For reasoning models that don't support system prompts, the system prompt
311
- * is prepended to the user message automatically.
312
- *
313
- * @param {'gemini' | 'anthropic' | 'openai'} provider
314
- * @param {string} systemPrompt
315
- * @param {string} task
316
- * @param {string} [model] - Optional model name for capability checking
317
- */
318
- export function buildInitialMessages(provider, systemPrompt, task, model) {
319
- const capabilities = model ? getModelCapabilities(model) : null;
320
- const supportsSystem = capabilities ? capabilities.supportsSystemPrompt : true;
321
-
322
- switch (provider) {
323
- case 'gemini':
324
- return [
325
- { role: 'user', parts: [{ text: (systemPrompt ? systemPrompt + '\n\n' : '') + task }] },
326
- ];
327
- case 'anthropic':
328
- return {
329
- system: systemPrompt || undefined,
330
- messages: [{ role: 'user', content: task }],
331
- };
332
- case 'openai': {
333
- let input;
334
- if (!supportsSystem || !systemPrompt) {
335
- // Reasoning models (o1, o3, o4) don't support system prompts.
336
- // Merge system prompt into user message.
337
- const combined = systemPrompt ? systemPrompt + '\n\n' + task : task;
338
- input = [{ role: 'user', content: combined }];
339
- } else {
340
- input = [
341
- { role: 'system', content: systemPrompt },
342
- { role: 'user', content: task },
343
- ];
344
- }
345
- return {
346
- input,
347
- previousResponseId: undefined,
348
- };
349
- }
350
- default:
351
- return [
352
- { role: 'system', content: systemPrompt },
353
- { role: 'user', content: task },
354
- ];
355
- }
356
- }
357
-
358
- /**
359
- * Append the assistant response to the conversation for the next turn.
360
- *
361
- * Accepts either a raw response body or an LlmResponse wrapper.
362
- */
363
- export function appendAssistantResponse(provider, messages, responseOrWrapper) {
364
- const response = responseOrWrapper?.body ?? responseOrWrapper;
365
- switch (provider) {
366
- case 'gemini': {
367
- const content = response.candidates?.[0]?.content;
368
- if (content) messages.push(content);
369
- return messages;
370
- }
371
- case 'anthropic': {
372
- messages.messages.push({ role: 'assistant', content: response.content });
373
- return messages;
374
- }
375
- case 'openai': {
376
- if (Array.isArray(messages)) {
377
- const text = getResponseText('openai', response);
378
- messages.push({ role: 'assistant', content: text ?? '' });
379
- return messages;
380
- }
381
- messages.previousResponseId = response.id ?? messages.previousResponseId;
382
- messages.input = [];
383
- return messages;
384
- }
385
- default:
386
- return messages;
387
- }
388
- }
389
-
390
- /**
391
- * Append tool results to the conversation for the next turn.
392
- */
393
- export function appendToolResults(provider, messages, toolCalls, results) {
394
- switch (provider) {
395
- case 'gemini': {
396
- const parts = toolCalls.map((tc, i) => ({
397
- functionResponse: {
398
- name: tc.name,
399
- response: { content: results[i] },
400
- },
401
- }));
402
- messages.push({ role: 'user', parts });
403
- return messages;
404
- }
405
- case 'anthropic': {
406
- const content = toolCalls.map((tc, i) => ({
407
- type: 'tool_result',
408
- tool_use_id: tc.id,
409
- content: results[i],
410
- }));
411
- messages.messages.push({ role: 'user', content });
412
- return messages;
413
- }
414
- case 'openai': {
415
- const toolOutputs = [];
416
- for (let i = 0; i < toolCalls.length; i++) {
417
- const output = typeof results[i] === 'string'
418
- ? results[i]
419
- : JSON.stringify(results[i]);
420
- toolOutputs.push({
421
- type: 'function_call_output',
422
- call_id: toolCalls[i].id,
423
- output,
424
- });
425
- }
426
-
427
- if (Array.isArray(messages)) {
428
- for (let i = 0; i < toolCalls.length; i++) {
429
- messages.push({
430
- role: 'tool',
431
- tool_call_id: toolCalls[i].id,
432
- content: toolOutputs[i].output,
433
- });
434
- }
435
- return messages;
436
- }
437
-
438
- messages.input = toolOutputs;
439
- return messages;
440
- }
441
- default:
442
- return messages;
443
- }
444
- }
445
-
446
- /**
447
- * Append a plain-text user instruction for the next turn.
448
- * Used for harness-level recovery nudges (for example, when the model
449
- * responds without any tool calls before taking required actions).
450
- *
451
- * @param {'gemini' | 'anthropic' | 'openai'} provider
452
- * @param {Array | object} messages
453
- * @param {string} text
454
- * @returns {Array | object}
455
- */
456
- export function appendUserInstruction(provider, messages, text) {
457
- switch (provider) {
458
- case 'gemini': {
459
- messages.push({ role: 'user', parts: [{ text }] });
460
- return messages;
461
- }
462
- case 'anthropic': {
463
- messages.messages.push({ role: 'user', content: text });
464
- return messages;
465
- }
466
- case 'openai': {
467
- if (Array.isArray(messages)) {
468
- messages.push({ role: 'user', content: text });
469
- return messages;
470
- }
471
- const nextInput = Array.isArray(messages.input) ? [...messages.input] : [];
472
- nextInput.push({ role: 'user', content: text });
473
- messages.input = nextInput;
474
- return messages;
475
- }
476
- default:
477
- return messages;
478
- }
479
- }
480
-
481
- /**
482
- * Extract the messages array and system prompt for the callLlm function.
483
- * For Anthropic, the system prompt is separate from messages.
484
- */
485
- export function extractCallArgs(provider, messages) {
486
- if (provider === 'anthropic') {
487
- return { system: messages.system, messages: messages.messages };
488
- }
489
- return { messages };
490
- }
@@ -1,176 +0,0 @@
1
- /**
2
- * Structured logging helper for bundled harnesses.
3
- * Outputs JSON lines (one JSON object per line) to stderr.
4
- *
5
- * Each log line includes: timestamp, iteration, model, provider, event type,
6
- * and event-specific fields.
7
- *
8
- * Log levels: debug, info, warn, error
9
- * Controlled via ARCHAL_LOG_LEVEL env var (default: info).
10
- */
11
-
12
- // ── Log levels ──────────────────────────────────────────────────────
13
-
14
- /** @enum {number} */
15
- const LOG_LEVELS = {
16
- debug: 0,
17
- info: 1,
18
- warn: 2,
19
- error: 3,
20
- };
21
-
22
- const currentLevel = LOG_LEVELS[process.env['ARCHAL_LOG_LEVEL']?.toLowerCase() ?? 'info'] ?? LOG_LEVELS.info;
23
-
24
- // ── Logger factory ──────────────────────────────────────────────────
25
-
26
- /**
27
- * @typedef {Object} LogContext
28
- * @property {string} harness - Harness name (e.g. "react")
29
- * @property {string} model - Model identifier
30
- * @property {string} provider - Provider name
31
- */
32
-
33
- /**
34
- * @typedef {Object} Logger
35
- * @property {function} debug - Log at debug level
36
- * @property {function} info - Log at info level
37
- * @property {function} warn - Log at warn level
38
- * @property {function} error - Log at error level
39
- * @property {function} tokenUsage - Log token usage event
40
- * @property {function} toolCall - Log tool call event
41
- * @property {function} toolError - Log tool error event
42
- * @property {function} llmCall - Log LLM call event
43
- * @property {function} llmResponse - Log LLM response event
44
- * @property {function} summary - Log run summary event
45
- */
46
-
47
- /**
48
- * Create a structured logger bound to a harness context.
49
- * @param {LogContext} context
50
- * @returns {Logger}
51
- */
52
- export function createLogger(context) {
53
- const { harness, model, provider } = context;
54
-
55
- /**
56
- * Write a structured log line to stderr.
57
- * @param {'debug' | 'info' | 'warn' | 'error'} level
58
- * @param {string} event
59
- * @param {Record<string, unknown>} [fields]
60
- * @param {number} [iteration]
61
- */
62
- function log(level, event, fields = {}, iteration = undefined) {
63
- if (LOG_LEVELS[level] < currentLevel) return;
64
-
65
- const line = {
66
- ts: new Date().toISOString(),
67
- level,
68
- harness,
69
- model,
70
- provider,
71
- event,
72
- ...(iteration !== undefined ? { iteration } : {}),
73
- ...fields,
74
- };
75
- process.stderr.write(JSON.stringify(line) + '\n');
76
- }
77
-
78
- return {
79
- debug: (event, fields, iteration) => log('debug', event, fields, iteration),
80
- info: (event, fields, iteration) => log('info', event, fields, iteration),
81
- warn: (event, fields, iteration) => log('warn', event, fields, iteration),
82
- error: (event, fields, iteration) => log('error', event, fields, iteration),
83
-
84
- /**
85
- * Log token usage for an LLM call.
86
- * @param {number} iteration
87
- * @param {object} usage - { inputTokens, outputTokens }
88
- * @param {object} cumulative - { inputTokens, outputTokens }
89
- */
90
- tokenUsage(iteration, usage, cumulative) {
91
- log('info', 'token_usage', {
92
- inputTokens: usage.inputTokens,
93
- outputTokens: usage.outputTokens,
94
- cumulativeInputTokens: cumulative.inputTokens,
95
- cumulativeOutputTokens: cumulative.outputTokens,
96
- }, iteration);
97
- },
98
-
99
- /**
100
- * Log a tool call.
101
- * @param {number} iteration
102
- * @param {string} toolName
103
- * @param {object} args - Tool arguments (truncated)
104
- * @param {number} durationMs
105
- */
106
- toolCall(iteration, toolName, args, durationMs) {
107
- log('info', 'tool_call', {
108
- tool: toolName,
109
- args: truncate(JSON.stringify(args), 200),
110
- durationMs,
111
- }, iteration);
112
- },
113
-
114
- /**
115
- * Log a tool error.
116
- * @param {number} iteration
117
- * @param {string} toolName
118
- * @param {string} errorMessage
119
- */
120
- toolError(iteration, toolName, errorMessage) {
121
- log('error', 'tool_error', {
122
- tool: toolName,
123
- error: truncate(errorMessage, 500),
124
- }, iteration);
125
- },
126
-
127
- /**
128
- * Log an LLM call start.
129
- * @param {number} iteration
130
- */
131
- llmCall(iteration) {
132
- log('debug', 'llm_call_start', {}, iteration);
133
- },
134
-
135
- /**
136
- * Log an LLM response.
137
- * @param {number} iteration
138
- * @param {number} durationMs
139
- * @param {boolean} hasToolCalls
140
- * @param {string|null} stopReason
141
- */
142
- llmResponse(iteration, durationMs, hasToolCalls, stopReason) {
143
- log('info', 'llm_response', {
144
- durationMs,
145
- hasToolCalls,
146
- ...(stopReason ? { stopReason } : {}),
147
- }, iteration);
148
- },
149
-
150
- /**
151
- * Log a run summary at the end.
152
- * @param {object} stats
153
- * @param {number} stats.iterations
154
- * @param {number} stats.totalInputTokens
155
- * @param {number} stats.totalOutputTokens
156
- * @param {number} stats.totalTimeMs
157
- * @param {number} stats.toolCallCount
158
- * @param {number} stats.toolErrorCount
159
- * @param {string} stats.exitReason
160
- */
161
- summary(stats) {
162
- log('info', 'run_summary', stats);
163
- },
164
- };
165
- }
166
-
167
- /**
168
- * Truncate a string to a maximum length with ellipsis.
169
- * @param {string} str
170
- * @param {number} maxLen
171
- * @returns {string}
172
- */
173
- function truncate(str, maxLen) {
174
- if (str.length <= maxLen) return str;
175
- return str.slice(0, maxLen - 3) + '...';
176
- }