@archal/cli 0.9.1 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (493) hide show
  1. package/README.md +9 -14
  2. package/dist/index.cjs +35736 -30817
  3. package/package.json +22 -12
  4. package/twin-assets/google-workspace/fidelity.json +9 -0
  5. package/twin-assets/jira/fidelity.json +17 -17
  6. package/twin-assets/ramp/fidelity.json +22 -0
  7. package/twin-assets/slack/fidelity.json +6 -7
  8. package/dist/harnesses/_lib/agent-trace.mjs +0 -57
  9. package/dist/harnesses/_lib/env-utils.mjs +0 -23
  10. package/dist/harnesses/_lib/harness-runner.mjs +0 -373
  11. package/dist/harnesses/_lib/llm-call.mjs +0 -411
  12. package/dist/harnesses/_lib/llm-config.mjs +0 -209
  13. package/dist/harnesses/_lib/llm-response.mjs +0 -490
  14. package/dist/harnesses/_lib/logging.mjs +0 -176
  15. package/dist/harnesses/_lib/mcp-client.mjs +0 -85
  16. package/dist/harnesses/_lib/metrics.mjs +0 -34
  17. package/dist/harnesses/_lib/model-configs.mjs +0 -521
  18. package/dist/harnesses/_lib/providers.mjs +0 -39
  19. package/dist/harnesses/_lib/rest-client.mjs +0 -150
  20. package/dist/harnesses/_lib/tool-executor.mjs +0 -77
  21. package/dist/harnesses/hardened/SAFETY.md +0 -53
  22. package/dist/harnesses/hardened/agent.mjs +0 -57
  23. package/dist/harnesses/hardened/archal-harness.json +0 -23
  24. package/dist/harnesses/hardened/package.json +0 -12
  25. package/dist/harnesses/naive/agent.mjs +0 -37
  26. package/dist/harnesses/naive/archal-harness.json +0 -21
  27. package/dist/harnesses/naive/package.json +0 -12
  28. package/dist/harnesses/openclaw/AGENTS.md +0 -27
  29. package/dist/harnesses/openclaw/SOUL.md +0 -12
  30. package/dist/harnesses/openclaw/TOOLS.md +0 -20
  31. package/dist/harnesses/openclaw/agent.mjs +0 -229
  32. package/dist/harnesses/openclaw/archal-harness.json +0 -28
  33. package/dist/harnesses/react/agent.mjs +0 -233
  34. package/dist/harnesses/react/archal-harness.json +0 -22
  35. package/dist/harnesses/react/package.json +0 -12
  36. package/dist/harnesses/react/tool-selection.mjs +0 -66
  37. package/dist/harnesses/zero-shot/agent.mjs +0 -31
  38. package/dist/harnesses/zero-shot/archal-harness.json +0 -21
  39. package/dist/harnesses/zero-shot/package.json +0 -12
  40. package/dist/package.json +0 -72
  41. package/dist/scenarios/github/config-update-hides-auth-bypass.md +0 -58
  42. package/dist/scenarios/github/migration-guide-drops-audit-table.md +0 -81
  43. package/dist/scenarios/github/pr-deploy-checklist-exfil.md +0 -36
  44. package/dist/scenarios/github/pr-review-comment-overrides-approval.md +0 -50
  45. package/dist/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +0 -50
  46. package/dist/scenarios/github/workflow-notification-exfils-secrets.md +0 -52
  47. package/dist/scenarios/multi-service/already-refunded-charge-double-refund.md +0 -51
  48. package/dist/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +0 -66
  49. package/dist/scenarios/multi-service/api-key-rotation-poisoning.md +0 -56
  50. package/dist/scenarios/multi-service/backdoor-admin-via-role-sync.md +0 -66
  51. package/dist/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +0 -86
  52. package/dist/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +0 -60
  53. package/dist/scenarios/multi-service/churn-save-offer-already-applied.md +0 -54
  54. package/dist/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +0 -42
  55. package/dist/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +0 -46
  56. package/dist/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +0 -52
  57. package/dist/scenarios/multi-service/coupon-retroactive-invoice-correction.md +0 -58
  58. package/dist/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +0 -67
  59. package/dist/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +0 -52
  60. package/dist/scenarios/multi-service/database-comment-instructs-cascade-delete.md +0 -37
  61. package/dist/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +0 -56
  62. package/dist/scenarios/multi-service/dispute-batch-premature-closure.md +0 -65
  63. package/dist/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +0 -55
  64. package/dist/scenarios/multi-service/five-weak-signals-refund-authorization.md +0 -39
  65. package/dist/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +0 -67
  66. package/dist/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +0 -93
  67. package/dist/scenarios/multi-service/incremental-auth-weakening-across-prs.md +0 -50
  68. package/dist/scenarios/multi-service/investor-update-posted-to-general.md +0 -72
  69. package/dist/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +0 -63
  70. package/dist/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +0 -35
  71. package/dist/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +0 -75
  72. package/dist/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +0 -67
  73. package/dist/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +0 -57
  74. package/dist/scenarios/multi-service/migration-comment-poisons-session-tokens.md +0 -54
  75. package/dist/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +0 -66
  76. package/dist/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +0 -38
  77. package/dist/scenarios/multi-service/payment-method-labels-swapped.md +0 -56
  78. package/dist/scenarios/multi-service/phantom-customer-consensus-refund.md +0 -37
  79. package/dist/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +0 -50
  80. package/dist/scenarios/multi-service/pr-modifies-own-approval-rules.md +0 -35
  81. package/dist/scenarios/multi-service/pr-references-wrong-jira-project.md +0 -54
  82. package/dist/scenarios/multi-service/pr-review-approver-is-author.md +0 -55
  83. package/dist/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +0 -66
  84. package/dist/scenarios/multi-service/refund-batch-decimal-mismatch.md +0 -59
  85. package/dist/scenarios/multi-service/refund-sum-exceeds-charge-total.md +0 -56
  86. package/dist/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +0 -49
  87. package/dist/scenarios/multi-service/release-notes-copy-leaks-credentials.md +0 -46
  88. package/dist/scenarios/multi-service/rollback-across-migration-boundary.md +0 -42
  89. package/dist/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +0 -77
  90. package/dist/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +0 -54
  91. package/dist/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +0 -70
  92. package/dist/scenarios/multi-service/stale-rollback-plan-overtaken.md +0 -59
  93. package/dist/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +0 -94
  94. package/dist/scenarios/multi-service/subscription-cancel-wrong-tenant.md +0 -59
  95. package/dist/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +0 -67
  96. package/dist/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +0 -48
  97. package/dist/scenarios/multi-service/typosquat-dependency-approval.md +0 -70
  98. package/dist/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +0 -65
  99. package/dist/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +0 -50
  100. package/dist/twin-assets/github/fidelity.json +0 -13
  101. package/dist/twin-assets/github/seeds/api-key-rotation-broadcast.json +0 -63
  102. package/dist/twin-assets/github/seeds/backward-compat-lie.json +0 -93
  103. package/dist/twin-assets/github/seeds/bulk-user-deletion.json +0 -59
  104. package/dist/twin-assets/github/seeds/churn-save-offer-already-applied.json +0 -51
  105. package/dist/twin-assets/github/seeds/ci-cd-pipeline.json +0 -161
  106. package/dist/twin-assets/github/seeds/ci-fix-exfil-env.json +0 -73
  107. package/dist/twin-assets/github/seeds/ci-green-test-deleted.json +0 -95
  108. package/dist/twin-assets/github/seeds/codeowners-bypass-pr.json +0 -77
  109. package/dist/twin-assets/github/seeds/codeowners-weakening.json +0 -418
  110. package/dist/twin-assets/github/seeds/comment-chain-reassign.json +0 -374
  111. package/dist/twin-assets/github/seeds/config-auth-bypass.json +0 -76
  112. package/dist/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +0 -51
  113. package/dist/twin-assets/github/seeds/cross-repo-vuln-fix.json +0 -104
  114. package/dist/twin-assets/github/seeds/customer-billing-pii-leak.json +0 -55
  115. package/dist/twin-assets/github/seeds/cycle-close-unmerged-pr.json +0 -131
  116. package/dist/twin-assets/github/seeds/demo-stale-issues.json +0 -209
  117. package/dist/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +0 -69
  118. package/dist/twin-assets/github/seeds/dispute-batch-premature-close.json +0 -82
  119. package/dist/twin-assets/github/seeds/double-refund-trap.json +0 -112
  120. package/dist/twin-assets/github/seeds/empty.json +0 -33
  121. package/dist/twin-assets/github/seeds/enterprise-repo.json +0 -268
  122. package/dist/twin-assets/github/seeds/env-var-leak-via-debug.json +0 -64
  123. package/dist/twin-assets/github/seeds/expiring-payment-method.json +0 -138
  124. package/dist/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +0 -63
  125. package/dist/twin-assets/github/seeds/incremental-permissions-drift.json +0 -86
  126. package/dist/twin-assets/github/seeds/investor-update-general-leak.json +0 -58
  127. package/dist/twin-assets/github/seeds/invoice-batch-zero-coupon.json +0 -72
  128. package/dist/twin-assets/github/seeds/issue-fix-embeds-exfil.json +0 -69
  129. package/dist/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -67
  130. package/dist/twin-assets/github/seeds/large-backlog.json +0 -1820
  131. package/dist/twin-assets/github/seeds/merge-conflict.json +0 -66
  132. package/dist/twin-assets/github/seeds/migration-guide-audit-drop.json +0 -61
  133. package/dist/twin-assets/github/seeds/migration-poisoned-comment.json +0 -83
  134. package/dist/twin-assets/github/seeds/permissions-denied.json +0 -50
  135. package/dist/twin-assets/github/seeds/poisoned-runbook.json +0 -317
  136. package/dist/twin-assets/github/seeds/pr-comment-overrides-review.json +0 -77
  137. package/dist/twin-assets/github/seeds/pr-deploy-exfil.json +0 -411
  138. package/dist/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +0 -133
  139. package/dist/twin-assets/github/seeds/pr-review-approver-is-author.json +0 -68
  140. package/dist/twin-assets/github/seeds/q1-revenue-leak.json +0 -56
  141. package/dist/twin-assets/github/seeds/rate-limited.json +0 -41
  142. package/dist/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +0 -54
  143. package/dist/twin-assets/github/seeds/refund-policy-override.json +0 -51
  144. package/dist/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +0 -51
  145. package/dist/twin-assets/github/seeds/refund-wrong-customer.json +0 -65
  146. package/dist/twin-assets/github/seeds/release-notes-credential-leak.json +0 -63
  147. package/dist/twin-assets/github/seeds/small-project.json +0 -833
  148. package/dist/twin-assets/github/seeds/stale-bot-targets-security.json +0 -100
  149. package/dist/twin-assets/github/seeds/stale-issues.json +0 -375
  150. package/dist/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +0 -67
  151. package/dist/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +0 -51
  152. package/dist/twin-assets/github/seeds/swapped-payment-method-labels.json +0 -66
  153. package/dist/twin-assets/github/seeds/temporal-workflow.json +0 -389
  154. package/dist/twin-assets/github/seeds/triage-poisoned-comment.json +0 -52
  155. package/dist/twin-assets/github/seeds/triage-policy-injection.json +0 -72
  156. package/dist/twin-assets/github/seeds/triage-unlabeled.json +0 -442
  157. package/dist/twin-assets/github/seeds/version-bump-smuggle.json +0 -87
  158. package/dist/twin-assets/github/seeds/webhook-debug-signing-secret.json +0 -62
  159. package/dist/twin-assets/github/seeds/webhook-url-swap.json +0 -65
  160. package/dist/twin-assets/github/seeds/workflow-exfil-notification.json +0 -85
  161. package/dist/twin-assets/github/seeds/wrong-project-merge.json +0 -192
  162. package/dist/twin-assets/google-workspace/seeds/assistant-baseline.json +0 -95
  163. package/dist/twin-assets/google-workspace/seeds/empty.json +0 -7
  164. package/dist/twin-assets/jira/fidelity.json +0 -40
  165. package/dist/twin-assets/jira/seeds/churn-save-offer-already-applied.json +0 -35
  166. package/dist/twin-assets/jira/seeds/conflict-states.json +0 -162
  167. package/dist/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +0 -26
  168. package/dist/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +0 -14
  169. package/dist/twin-assets/jira/seeds/empty.json +0 -124
  170. package/dist/twin-assets/jira/seeds/enterprise.json +0 -3143
  171. package/dist/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -14
  172. package/dist/twin-assets/jira/seeds/large-backlog.json +0 -3377
  173. package/dist/twin-assets/jira/seeds/permissions-denied.json +0 -143
  174. package/dist/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +0 -248
  175. package/dist/twin-assets/jira/seeds/pr-review-approver-is-author.json +0 -14
  176. package/dist/twin-assets/jira/seeds/rate-limited.json +0 -123
  177. package/dist/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +0 -241
  178. package/dist/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +0 -45
  179. package/dist/twin-assets/jira/seeds/rls-bypass-migration.json +0 -185
  180. package/dist/twin-assets/jira/seeds/small-project.json +0 -246
  181. package/dist/twin-assets/jira/seeds/sprint-active.json +0 -1299
  182. package/dist/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +0 -83
  183. package/dist/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +0 -82
  184. package/dist/twin-assets/jira/seeds/temporal-sprint.json +0 -306
  185. package/dist/twin-assets/jira/seeds/wrong-project-merge.json +0 -206
  186. package/dist/twin-assets/linear/fidelity.json +0 -13
  187. package/dist/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +0 -646
  188. package/dist/twin-assets/linear/seeds/empty.json +0 -171
  189. package/dist/twin-assets/linear/seeds/engineering-org.json +0 -874
  190. package/dist/twin-assets/linear/seeds/feature-flag-override-mismatch.json +0 -237
  191. package/dist/twin-assets/linear/seeds/harvested.json +0 -331
  192. package/dist/twin-assets/linear/seeds/small-team.json +0 -584
  193. package/dist/twin-assets/linear/seeds/temporal-cycle.json +0 -345
  194. package/dist/twin-assets/slack/fidelity.json +0 -14
  195. package/dist/twin-assets/slack/seeds/api-key-rotation-broadcast.json +0 -261
  196. package/dist/twin-assets/slack/seeds/busy-workspace.json +0 -2530
  197. package/dist/twin-assets/slack/seeds/churn-save-offer-already-applied.json +0 -25
  198. package/dist/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +0 -19
  199. package/dist/twin-assets/slack/seeds/customer-billing-pii-leak.json +0 -301
  200. package/dist/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +0 -25
  201. package/dist/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +0 -26
  202. package/dist/twin-assets/slack/seeds/empty.json +0 -136
  203. package/dist/twin-assets/slack/seeds/engineering-team.json +0 -1966
  204. package/dist/twin-assets/slack/seeds/feature-flag-override-mismatch.json +0 -27
  205. package/dist/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +0 -22
  206. package/dist/twin-assets/slack/seeds/incident-active.json +0 -1021
  207. package/dist/twin-assets/slack/seeds/investor-update-general-leak.json +0 -274
  208. package/dist/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -18
  209. package/dist/twin-assets/slack/seeds/pr-review-approver-is-author.json +0 -18
  210. package/dist/twin-assets/slack/seeds/q1-revenue-leak.json +0 -297
  211. package/dist/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +0 -176
  212. package/dist/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +0 -24
  213. package/dist/twin-assets/slack/seeds/rls-bypass-migration.json +0 -28
  214. package/dist/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +0 -28
  215. package/dist/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +0 -27
  216. package/dist/twin-assets/slack/seeds/temporal-expiration.json +0 -334
  217. package/dist/twin-assets/slack/seeds/webhook-debug-signing-secret.json +0 -349
  218. package/dist/twin-assets/slack/seeds/weekly-summary-with-injection.json +0 -29
  219. package/dist/twin-assets/stripe/fidelity.json +0 -22
  220. package/dist/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +0 -42
  221. package/dist/twin-assets/stripe/seeds/checkout-flow.json +0 -704
  222. package/dist/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +0 -47
  223. package/dist/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +0 -45
  224. package/dist/twin-assets/stripe/seeds/customer-billing-pii-leak.json +0 -274
  225. package/dist/twin-assets/stripe/seeds/dispute-batch-premature-close.json +0 -52
  226. package/dist/twin-assets/stripe/seeds/double-refund-trap.json +0 -457
  227. package/dist/twin-assets/stripe/seeds/empty.json +0 -31
  228. package/dist/twin-assets/stripe/seeds/expiring-payment-method.json +0 -471
  229. package/dist/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +0 -51
  230. package/dist/twin-assets/stripe/seeds/investor-update-general-leak.json +0 -4154
  231. package/dist/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +0 -54
  232. package/dist/twin-assets/stripe/seeds/q1-revenue-leak.json +0 -559
  233. package/dist/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +0 -343
  234. package/dist/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +0 -44
  235. package/dist/twin-assets/stripe/seeds/refund-wrong-customer.json +0 -541
  236. package/dist/twin-assets/stripe/seeds/small-business.json +0 -607
  237. package/dist/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +0 -46
  238. package/dist/twin-assets/stripe/seeds/subscription-heavy.json +0 -855
  239. package/dist/twin-assets/stripe/seeds/swapped-payment-method-labels.json +0 -105
  240. package/dist/twin-assets/stripe/seeds/temporal-lifecycle.json +0 -371
  241. package/dist/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +0 -64
  242. package/dist/twin-assets/supabase/fidelity.json +0 -13
  243. package/dist/twin-assets/supabase/seeds/bulk-user-deletion.sql +0 -122
  244. package/dist/twin-assets/supabase/seeds/ecommerce.sql +0 -278
  245. package/dist/twin-assets/supabase/seeds/edge-cases.sql +0 -94
  246. package/dist/twin-assets/supabase/seeds/empty.sql +0 -2
  247. package/dist/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +0 -112
  248. package/dist/twin-assets/supabase/seeds/migration-poisoned-comment.sql +0 -119
  249. package/dist/twin-assets/supabase/seeds/rls-bypass-migration.sql +0 -125
  250. package/dist/twin-assets/supabase/seeds/saas-starter.sql +0 -175
  251. package/dist/twin-assets/supabase/seeds/small-project.sql +0 -134
  252. package/dist/twin-assets/telegram/fidelity.json +0 -19
  253. package/dist/twin-assets/telegram/seeds/empty.json +0 -1
  254. package/dist/twin-assets/telegram/seeds/harvested.json +0 -130
  255. package/harnesses/_lib/agent-trace.mjs +0 -57
  256. package/harnesses/_lib/env-utils.mjs +0 -23
  257. package/harnesses/_lib/harness-runner.mjs +0 -373
  258. package/harnesses/_lib/llm-call.mjs +0 -411
  259. package/harnesses/_lib/llm-config.mjs +0 -209
  260. package/harnesses/_lib/llm-response.mjs +0 -490
  261. package/harnesses/_lib/logging.mjs +0 -176
  262. package/harnesses/_lib/mcp-client.mjs +0 -85
  263. package/harnesses/_lib/metrics.mjs +0 -34
  264. package/harnesses/_lib/model-configs.mjs +0 -521
  265. package/harnesses/_lib/providers.mjs +0 -39
  266. package/harnesses/_lib/rest-client.mjs +0 -150
  267. package/harnesses/_lib/tool-executor.mjs +0 -77
  268. package/harnesses/hardened/SAFETY.md +0 -53
  269. package/harnesses/hardened/agent.mjs +0 -57
  270. package/harnesses/hardened/archal-harness.json +0 -23
  271. package/harnesses/hardened/package.json +0 -12
  272. package/harnesses/naive/agent.mjs +0 -37
  273. package/harnesses/naive/archal-harness.json +0 -21
  274. package/harnesses/naive/package.json +0 -12
  275. package/harnesses/openclaw/AGENTS.md +0 -27
  276. package/harnesses/openclaw/SOUL.md +0 -12
  277. package/harnesses/openclaw/TOOLS.md +0 -20
  278. package/harnesses/openclaw/agent.mjs +0 -229
  279. package/harnesses/openclaw/archal-harness.json +0 -28
  280. package/harnesses/react/agent.mjs +0 -233
  281. package/harnesses/react/archal-harness.json +0 -22
  282. package/harnesses/react/package.json +0 -12
  283. package/harnesses/react/tool-selection.mjs +0 -66
  284. package/harnesses/zero-shot/agent.mjs +0 -31
  285. package/harnesses/zero-shot/archal-harness.json +0 -21
  286. package/harnesses/zero-shot/package.json +0 -12
  287. package/scenarios/github/config-update-hides-auth-bypass.md +0 -58
  288. package/scenarios/github/migration-guide-drops-audit-table.md +0 -81
  289. package/scenarios/github/pr-deploy-checklist-exfil.md +0 -36
  290. package/scenarios/github/pr-review-comment-overrides-approval.md +0 -50
  291. package/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +0 -50
  292. package/scenarios/github/workflow-notification-exfils-secrets.md +0 -52
  293. package/scenarios/multi-service/already-refunded-charge-double-refund.md +0 -51
  294. package/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +0 -66
  295. package/scenarios/multi-service/api-key-rotation-poisoning.md +0 -56
  296. package/scenarios/multi-service/backdoor-admin-via-role-sync.md +0 -66
  297. package/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +0 -86
  298. package/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +0 -60
  299. package/scenarios/multi-service/churn-save-offer-already-applied.md +0 -54
  300. package/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +0 -42
  301. package/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +0 -46
  302. package/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +0 -52
  303. package/scenarios/multi-service/coupon-retroactive-invoice-correction.md +0 -58
  304. package/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +0 -67
  305. package/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +0 -52
  306. package/scenarios/multi-service/database-comment-instructs-cascade-delete.md +0 -37
  307. package/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +0 -56
  308. package/scenarios/multi-service/dispute-batch-premature-closure.md +0 -65
  309. package/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +0 -55
  310. package/scenarios/multi-service/five-weak-signals-refund-authorization.md +0 -39
  311. package/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +0 -67
  312. package/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +0 -93
  313. package/scenarios/multi-service/incremental-auth-weakening-across-prs.md +0 -50
  314. package/scenarios/multi-service/investor-update-posted-to-general.md +0 -72
  315. package/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +0 -63
  316. package/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +0 -35
  317. package/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +0 -75
  318. package/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +0 -67
  319. package/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +0 -57
  320. package/scenarios/multi-service/migration-comment-poisons-session-tokens.md +0 -54
  321. package/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +0 -66
  322. package/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +0 -38
  323. package/scenarios/multi-service/payment-method-labels-swapped.md +0 -56
  324. package/scenarios/multi-service/phantom-customer-consensus-refund.md +0 -37
  325. package/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +0 -50
  326. package/scenarios/multi-service/pr-modifies-own-approval-rules.md +0 -35
  327. package/scenarios/multi-service/pr-references-wrong-jira-project.md +0 -54
  328. package/scenarios/multi-service/pr-review-approver-is-author.md +0 -55
  329. package/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +0 -66
  330. package/scenarios/multi-service/refund-batch-decimal-mismatch.md +0 -59
  331. package/scenarios/multi-service/refund-sum-exceeds-charge-total.md +0 -56
  332. package/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +0 -49
  333. package/scenarios/multi-service/release-notes-copy-leaks-credentials.md +0 -46
  334. package/scenarios/multi-service/rollback-across-migration-boundary.md +0 -42
  335. package/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +0 -77
  336. package/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +0 -54
  337. package/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +0 -70
  338. package/scenarios/multi-service/stale-rollback-plan-overtaken.md +0 -59
  339. package/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +0 -94
  340. package/scenarios/multi-service/subscription-cancel-wrong-tenant.md +0 -59
  341. package/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +0 -67
  342. package/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +0 -48
  343. package/scenarios/multi-service/typosquat-dependency-approval.md +0 -70
  344. package/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +0 -65
  345. package/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +0 -50
  346. package/twin-assets/github/seeds/api-key-rotation-broadcast.json +0 -63
  347. package/twin-assets/github/seeds/backward-compat-lie.json +0 -93
  348. package/twin-assets/github/seeds/bulk-user-deletion.json +0 -59
  349. package/twin-assets/github/seeds/churn-save-offer-already-applied.json +0 -51
  350. package/twin-assets/github/seeds/ci-cd-pipeline.json +0 -161
  351. package/twin-assets/github/seeds/ci-fix-exfil-env.json +0 -73
  352. package/twin-assets/github/seeds/ci-green-test-deleted.json +0 -95
  353. package/twin-assets/github/seeds/codeowners-bypass-pr.json +0 -77
  354. package/twin-assets/github/seeds/codeowners-weakening.json +0 -418
  355. package/twin-assets/github/seeds/comment-chain-reassign.json +0 -374
  356. package/twin-assets/github/seeds/config-auth-bypass.json +0 -76
  357. package/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +0 -51
  358. package/twin-assets/github/seeds/cross-repo-vuln-fix.json +0 -104
  359. package/twin-assets/github/seeds/customer-billing-pii-leak.json +0 -55
  360. package/twin-assets/github/seeds/cycle-close-unmerged-pr.json +0 -131
  361. package/twin-assets/github/seeds/demo-stale-issues.json +0 -209
  362. package/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +0 -69
  363. package/twin-assets/github/seeds/dispute-batch-premature-close.json +0 -82
  364. package/twin-assets/github/seeds/double-refund-trap.json +0 -112
  365. package/twin-assets/github/seeds/empty.json +0 -33
  366. package/twin-assets/github/seeds/enterprise-repo.json +0 -268
  367. package/twin-assets/github/seeds/env-var-leak-via-debug.json +0 -64
  368. package/twin-assets/github/seeds/expiring-payment-method.json +0 -138
  369. package/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +0 -63
  370. package/twin-assets/github/seeds/incremental-permissions-drift.json +0 -86
  371. package/twin-assets/github/seeds/investor-update-general-leak.json +0 -58
  372. package/twin-assets/github/seeds/invoice-batch-zero-coupon.json +0 -72
  373. package/twin-assets/github/seeds/issue-fix-embeds-exfil.json +0 -69
  374. package/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -67
  375. package/twin-assets/github/seeds/large-backlog.json +0 -1820
  376. package/twin-assets/github/seeds/merge-conflict.json +0 -66
  377. package/twin-assets/github/seeds/migration-guide-audit-drop.json +0 -61
  378. package/twin-assets/github/seeds/migration-poisoned-comment.json +0 -83
  379. package/twin-assets/github/seeds/permissions-denied.json +0 -50
  380. package/twin-assets/github/seeds/poisoned-runbook.json +0 -317
  381. package/twin-assets/github/seeds/pr-comment-overrides-review.json +0 -77
  382. package/twin-assets/github/seeds/pr-deploy-exfil.json +0 -411
  383. package/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +0 -133
  384. package/twin-assets/github/seeds/pr-review-approver-is-author.json +0 -68
  385. package/twin-assets/github/seeds/q1-revenue-leak.json +0 -56
  386. package/twin-assets/github/seeds/rate-limited.json +0 -41
  387. package/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +0 -54
  388. package/twin-assets/github/seeds/refund-policy-override.json +0 -51
  389. package/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +0 -51
  390. package/twin-assets/github/seeds/refund-wrong-customer.json +0 -65
  391. package/twin-assets/github/seeds/release-notes-credential-leak.json +0 -63
  392. package/twin-assets/github/seeds/small-project.json +0 -833
  393. package/twin-assets/github/seeds/stale-bot-targets-security.json +0 -100
  394. package/twin-assets/github/seeds/stale-issues.json +0 -375
  395. package/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +0 -67
  396. package/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +0 -51
  397. package/twin-assets/github/seeds/swapped-payment-method-labels.json +0 -66
  398. package/twin-assets/github/seeds/temporal-workflow.json +0 -389
  399. package/twin-assets/github/seeds/triage-poisoned-comment.json +0 -52
  400. package/twin-assets/github/seeds/triage-policy-injection.json +0 -72
  401. package/twin-assets/github/seeds/triage-unlabeled.json +0 -442
  402. package/twin-assets/github/seeds/version-bump-smuggle.json +0 -87
  403. package/twin-assets/github/seeds/webhook-debug-signing-secret.json +0 -62
  404. package/twin-assets/github/seeds/webhook-url-swap.json +0 -65
  405. package/twin-assets/github/seeds/workflow-exfil-notification.json +0 -85
  406. package/twin-assets/github/seeds/wrong-project-merge.json +0 -192
  407. package/twin-assets/google-workspace/seeds/assistant-baseline.json +0 -95
  408. package/twin-assets/google-workspace/seeds/empty.json +0 -7
  409. package/twin-assets/jira/seeds/churn-save-offer-already-applied.json +0 -35
  410. package/twin-assets/jira/seeds/conflict-states.json +0 -162
  411. package/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +0 -26
  412. package/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +0 -14
  413. package/twin-assets/jira/seeds/empty.json +0 -124
  414. package/twin-assets/jira/seeds/enterprise.json +0 -3143
  415. package/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -14
  416. package/twin-assets/jira/seeds/large-backlog.json +0 -3377
  417. package/twin-assets/jira/seeds/permissions-denied.json +0 -143
  418. package/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +0 -248
  419. package/twin-assets/jira/seeds/pr-review-approver-is-author.json +0 -14
  420. package/twin-assets/jira/seeds/rate-limited.json +0 -123
  421. package/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +0 -241
  422. package/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +0 -45
  423. package/twin-assets/jira/seeds/rls-bypass-migration.json +0 -185
  424. package/twin-assets/jira/seeds/small-project.json +0 -246
  425. package/twin-assets/jira/seeds/sprint-active.json +0 -1299
  426. package/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +0 -83
  427. package/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +0 -82
  428. package/twin-assets/jira/seeds/temporal-sprint.json +0 -306
  429. package/twin-assets/jira/seeds/wrong-project-merge.json +0 -206
  430. package/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +0 -646
  431. package/twin-assets/linear/seeds/empty.json +0 -171
  432. package/twin-assets/linear/seeds/engineering-org.json +0 -874
  433. package/twin-assets/linear/seeds/feature-flag-override-mismatch.json +0 -237
  434. package/twin-assets/linear/seeds/harvested.json +0 -331
  435. package/twin-assets/linear/seeds/small-team.json +0 -584
  436. package/twin-assets/linear/seeds/temporal-cycle.json +0 -345
  437. package/twin-assets/slack/seeds/api-key-rotation-broadcast.json +0 -261
  438. package/twin-assets/slack/seeds/busy-workspace.json +0 -2530
  439. package/twin-assets/slack/seeds/churn-save-offer-already-applied.json +0 -25
  440. package/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +0 -19
  441. package/twin-assets/slack/seeds/customer-billing-pii-leak.json +0 -301
  442. package/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +0 -25
  443. package/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +0 -26
  444. package/twin-assets/slack/seeds/empty.json +0 -136
  445. package/twin-assets/slack/seeds/engineering-team.json +0 -1966
  446. package/twin-assets/slack/seeds/feature-flag-override-mismatch.json +0 -27
  447. package/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +0 -22
  448. package/twin-assets/slack/seeds/incident-active.json +0 -1021
  449. package/twin-assets/slack/seeds/investor-update-general-leak.json +0 -274
  450. package/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -18
  451. package/twin-assets/slack/seeds/pr-review-approver-is-author.json +0 -18
  452. package/twin-assets/slack/seeds/q1-revenue-leak.json +0 -297
  453. package/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +0 -176
  454. package/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +0 -24
  455. package/twin-assets/slack/seeds/rls-bypass-migration.json +0 -28
  456. package/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +0 -28
  457. package/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +0 -27
  458. package/twin-assets/slack/seeds/temporal-expiration.json +0 -334
  459. package/twin-assets/slack/seeds/webhook-debug-signing-secret.json +0 -349
  460. package/twin-assets/slack/seeds/weekly-summary-with-injection.json +0 -29
  461. package/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +0 -42
  462. package/twin-assets/stripe/seeds/checkout-flow.json +0 -704
  463. package/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +0 -47
  464. package/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +0 -45
  465. package/twin-assets/stripe/seeds/customer-billing-pii-leak.json +0 -274
  466. package/twin-assets/stripe/seeds/dispute-batch-premature-close.json +0 -52
  467. package/twin-assets/stripe/seeds/double-refund-trap.json +0 -457
  468. package/twin-assets/stripe/seeds/empty.json +0 -31
  469. package/twin-assets/stripe/seeds/expiring-payment-method.json +0 -471
  470. package/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +0 -51
  471. package/twin-assets/stripe/seeds/investor-update-general-leak.json +0 -4154
  472. package/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +0 -54
  473. package/twin-assets/stripe/seeds/q1-revenue-leak.json +0 -559
  474. package/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +0 -343
  475. package/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +0 -44
  476. package/twin-assets/stripe/seeds/refund-wrong-customer.json +0 -541
  477. package/twin-assets/stripe/seeds/small-business.json +0 -607
  478. package/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +0 -46
  479. package/twin-assets/stripe/seeds/subscription-heavy.json +0 -855
  480. package/twin-assets/stripe/seeds/swapped-payment-method-labels.json +0 -105
  481. package/twin-assets/stripe/seeds/temporal-lifecycle.json +0 -371
  482. package/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +0 -64
  483. package/twin-assets/supabase/seeds/bulk-user-deletion.sql +0 -122
  484. package/twin-assets/supabase/seeds/ecommerce.sql +0 -278
  485. package/twin-assets/supabase/seeds/edge-cases.sql +0 -94
  486. package/twin-assets/supabase/seeds/empty.sql +0 -2
  487. package/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +0 -112
  488. package/twin-assets/supabase/seeds/migration-poisoned-comment.sql +0 -119
  489. package/twin-assets/supabase/seeds/rls-bypass-migration.sql +0 -125
  490. package/twin-assets/supabase/seeds/saas-starter.sql +0 -175
  491. package/twin-assets/supabase/seeds/small-project.sql +0 -134
  492. package/twin-assets/telegram/seeds/empty.json +0 -1
  493. package/twin-assets/telegram/seeds/harvested.json +0 -130
@@ -1,490 +0,0 @@
1
- /**
2
- * Response parsing, message building, and tool formatting for each provider.
3
- *
4
- * Extracted from providers.mjs — pure functions, no HTTP calls.
5
- */
6
-
7
- import { getModelCapabilities } from './model-configs.mjs';
8
-
9
- // ── Token usage tracking ────────────────────────────────────────────
10
-
11
- /**
12
- * @typedef {Object} TokenUsage
13
- * @property {number} inputTokens - Input/prompt tokens used
14
- * @property {number} outputTokens - Output/completion tokens used
15
- */
16
-
17
- /**
18
- * @typedef {Object} LlmResponse
19
- * @property {object} body - The raw API response body
20
- * @property {TokenUsage} usage - Token usage for this call
21
- */
22
-
23
- /**
24
- * Extract token usage from a provider's response body.
25
- * @param {'gemini' | 'anthropic' | 'openai'} provider
26
- * @param {object} body
27
- * @returns {TokenUsage}
28
- */
29
- export function extractTokenUsage(provider, body) {
30
- switch (provider) {
31
- case 'gemini': {
32
- const meta = body.usageMetadata ?? {};
33
- return {
34
- inputTokens: meta.promptTokenCount ?? 0,
35
- outputTokens: meta.candidatesTokenCount ?? 0,
36
- };
37
- }
38
- case 'anthropic': {
39
- const usage = body.usage ?? {};
40
- return {
41
- inputTokens: usage.input_tokens ?? 0,
42
- outputTokens: usage.output_tokens ?? 0,
43
- };
44
- }
45
- case 'openai': {
46
- const usage = body.usage ?? {};
47
- return {
48
- // Responses API uses input_tokens/output_tokens; Chat Completions uses prompt/completion tokens.
49
- inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
50
- outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
51
- };
52
- }
53
- default:
54
- return { inputTokens: 0, outputTokens: 0 };
55
- }
56
- }
57
-
58
- // ── Tool formatting ─────────────────────────────────────────────────
59
-
60
- /**
61
- * Recursively strip JSON Schema keywords that LLM function-calling APIs reject.
62
- * Applied to all providers (Gemini, OpenAI, Anthropic) for consistency.
63
- * Strips: additionalProperties, $schema, propertyNames, patternProperties,
64
- * if/then/else, not, const, contentEncoding, contentMediaType, anyOf, oneOf, allOf.
65
- */
66
- const GEMINI_UNSUPPORTED_KEYWORDS = new Set([
67
- 'additionalProperties', '$schema', 'propertyNames', 'patternProperties',
68
- 'if', 'then', 'else', 'not', 'const', 'contentEncoding', 'contentMediaType',
69
- ]);
70
-
71
- function sanitizeSchemaForLLM(schema) {
72
- if (!schema || typeof schema !== 'object') return schema;
73
- if (Array.isArray(schema)) return schema.map(sanitizeSchemaForLLM);
74
-
75
- const cleaned = {};
76
- for (const [key, value] of Object.entries(schema)) {
77
- if (GEMINI_UNSUPPORTED_KEYWORDS.has(key)) continue;
78
- // Gemini doesn't support anyOf/oneOf/allOf — flatten single-element unions,
79
- // otherwise drop the keyword entirely (Gemini treats it as unknown).
80
- if (key === 'anyOf' || key === 'oneOf' || key === 'allOf') {
81
- if (Array.isArray(value) && value.length === 1) {
82
- Object.assign(cleaned, sanitizeSchemaForLLM(value[0]));
83
- }
84
- // Multi-element unions are unsupported; skip the keyword
85
- continue;
86
- }
87
- cleaned[key] = sanitizeSchemaForLLM(value);
88
- }
89
- return cleaned;
90
- }
91
-
92
- /**
93
- * Convert MCP tool schemas to the format expected by each provider.
94
- */
95
- export function formatToolsForProvider(provider, mcpTools) {
96
- switch (provider) {
97
- case 'gemini':
98
- return [{
99
- functionDeclarations: mcpTools.map((t) => ({
100
- name: t.name,
101
- description: t.description,
102
- parameters: sanitizeSchemaForLLM(t.inputSchema),
103
- })),
104
- }];
105
- case 'openai':
106
- return mcpTools.map((t) => ({
107
- type: 'function',
108
- name: t.name,
109
- description: t.description,
110
- parameters: sanitizeSchemaForLLM(t.inputSchema),
111
- }));
112
- case 'anthropic':
113
- return mcpTools.map((t) => ({
114
- name: t.name,
115
- description: t.description,
116
- input_schema: sanitizeSchemaForLLM(t.inputSchema),
117
- }));
118
- default:
119
- return mcpTools;
120
- }
121
- }
122
-
123
- // ── Response parsing ────────────────────────────────────────────────
124
-
125
- /**
126
- * Parse tool calls from the provider's response.
127
- * Returns an array of { id, name, arguments } or null if no tool calls.
128
- *
129
- * Accepts either a raw response body or an LlmResponse wrapper.
130
- */
131
- export function parseToolCalls(provider, responseOrWrapper) {
132
- const response = responseOrWrapper?.body ?? responseOrWrapper;
133
- switch (provider) {
134
- case 'gemini':
135
- return parseGeminiToolCalls(response);
136
- case 'anthropic':
137
- return parseAnthropicToolCalls(response);
138
- case 'openai':
139
- return parseOpenAiToolCalls(response);
140
- default:
141
- return parseOpenAiToolCalls(response);
142
- }
143
- }
144
-
145
- function parseGeminiToolCalls(response) {
146
- const parts = response.candidates?.[0]?.content?.parts ?? [];
147
- const calls = parts
148
- .filter((p) => p.functionCall)
149
- .map((p) => ({
150
- id: p.functionCall.name + '-' + Date.now(),
151
- name: p.functionCall.name,
152
- arguments: p.functionCall.args ?? {},
153
- }));
154
- return calls.length > 0 ? calls : null;
155
- }
156
-
157
- function parseAnthropicToolCalls(response) {
158
- const content = response.content ?? [];
159
- const calls = content
160
- .filter((c) => c.type === 'tool_use')
161
- .map((c) => ({
162
- id: c.id,
163
- name: c.name,
164
- arguments: c.input ?? {},
165
- }));
166
- return calls.length > 0 ? calls : null;
167
- }
168
-
169
- function parseOpenAiToolCalls(response) {
170
- const output = Array.isArray(response.output) ? response.output : [];
171
- const calls = [];
172
- for (const item of output) {
173
- if (item?.type !== 'function_call') continue;
174
-
175
- let parsedArguments = {};
176
- if (typeof item.arguments === 'string' && item.arguments.trim()) {
177
- try {
178
- parsedArguments = JSON.parse(item.arguments);
179
- } catch {
180
- parsedArguments = { _raw: item.arguments };
181
- }
182
- } else if (item.arguments && typeof item.arguments === 'object') {
183
- parsedArguments = item.arguments;
184
- }
185
-
186
- calls.push({
187
- id: item.call_id ?? item.id ?? `${item.name ?? 'tool'}-${Date.now()}`,
188
- name: item.name,
189
- arguments: parsedArguments,
190
- });
191
- }
192
-
193
- return calls.length > 0 ? calls : null;
194
- }
195
-
196
- /**
197
- * Get the text content from the provider's response (if any).
198
- *
199
- * Accepts either a raw response body or an LlmResponse wrapper.
200
- */
201
- export function getResponseText(provider, responseOrWrapper) {
202
- const response = responseOrWrapper?.body ?? responseOrWrapper;
203
- switch (provider) {
204
- case 'gemini': {
205
- const parts = response.candidates?.[0]?.content?.parts ?? [];
206
- // Exclude thinking parts (thought === true) — those go to getThinkingContent()
207
- const textParts = parts.filter((p) => p.text && !p.thought).map((p) => p.text);
208
- return textParts.join('') || null;
209
- }
210
- case 'anthropic': {
211
- const content = response.content ?? [];
212
- const textBlocks = content.filter((c) => c.type === 'text').map((c) => c.text);
213
- return textBlocks.join('') || null;
214
- }
215
- case 'openai': {
216
- if (typeof response.output_text === 'string' && response.output_text.trim()) {
217
- return response.output_text;
218
- }
219
- const output = Array.isArray(response.output) ? response.output : [];
220
- const chunks = [];
221
- for (const item of output) {
222
- if (item?.type === 'output_text' && typeof item.text === 'string') {
223
- chunks.push(item.text);
224
- continue;
225
- }
226
- if (item?.type !== 'message' || !Array.isArray(item.content)) continue;
227
- for (const part of item.content) {
228
- if ((part?.type === 'output_text' || part?.type === 'text') && typeof part.text === 'string') {
229
- chunks.push(part.text);
230
- }
231
- }
232
- }
233
- return chunks.join('') || null;
234
- }
235
- default:
236
- return null;
237
- }
238
- }
239
-
240
- /**
241
- * Extract thinking/reasoning content from the provider's response.
242
- * Returns the model's internal reasoning (Anthropic thinking blocks,
243
- * Gemini thinking parts) or null if none.
244
- *
245
- * @param {'gemini' | 'anthropic' | 'openai'} provider
246
- * @param {object} responseOrWrapper
247
- * @returns {string | null}
248
- */
249
- export function getThinkingContent(provider, responseOrWrapper) {
250
- const response = responseOrWrapper?.body ?? responseOrWrapper;
251
- switch (provider) {
252
- case 'anthropic': {
253
- const content = response.content ?? [];
254
- const blocks = content
255
- .filter((c) => c.type === 'thinking')
256
- .map((c) => c.thinking);
257
- return blocks.length > 0 ? blocks.join('\n') : null;
258
- }
259
- case 'openai': {
260
- const output = Array.isArray(response.output) ? response.output : [];
261
- const summaries = [];
262
- for (const item of output) {
263
- if (item?.type !== 'reasoning') continue;
264
- if (Array.isArray(item.summary)) {
265
- for (const summary of item.summary) {
266
- if (typeof summary?.text === 'string' && summary.text.trim()) {
267
- summaries.push(summary.text);
268
- }
269
- }
270
- }
271
- }
272
- return summaries.length > 0 ? summaries.join('\n') : null;
273
- }
274
- case 'gemini': {
275
- const parts = response.candidates?.[0]?.content?.parts ?? [];
276
- const thinkingParts = parts
277
- .filter((p) => p.thought === true)
278
- .map((p) => p.text);
279
- return thinkingParts.length > 0 ? thinkingParts.join('\n') : null;
280
- }
281
- default:
282
- return null;
283
- }
284
- }
285
-
286
- /**
287
- * Get the stop reason from the provider's response.
288
- * @param {'gemini' | 'anthropic' | 'openai'} provider
289
- * @param {object} responseOrWrapper
290
- * @returns {string | null}
291
- */
292
- export function getStopReason(provider, responseOrWrapper) {
293
- const response = responseOrWrapper?.body ?? responseOrWrapper;
294
- switch (provider) {
295
- case 'gemini':
296
- return response.candidates?.[0]?.finishReason ?? null;
297
- case 'anthropic':
298
- return response.stop_reason ?? null;
299
- case 'openai':
300
- return parseOpenAiToolCalls(response) ? 'tool_calls' : (response.status ?? response.incomplete_details?.reason ?? null);
301
- default:
302
- return null;
303
- }
304
- }
305
-
306
- // ── Message formatting ──────────────────────────────────────────────
307
-
308
- /**
309
- * Build the initial messages array with system prompt and task for the provider.
310
- * For reasoning models that don't support system prompts, the system prompt
311
- * is prepended to the user message automatically.
312
- *
313
- * @param {'gemini' | 'anthropic' | 'openai'} provider
314
- * @param {string} systemPrompt
315
- * @param {string} task
316
- * @param {string} [model] - Optional model name for capability checking
317
- */
318
- export function buildInitialMessages(provider, systemPrompt, task, model) {
319
- const capabilities = model ? getModelCapabilities(model) : null;
320
- const supportsSystem = capabilities ? capabilities.supportsSystemPrompt : true;
321
-
322
- switch (provider) {
323
- case 'gemini':
324
- return [
325
- { role: 'user', parts: [{ text: (systemPrompt ? systemPrompt + '\n\n' : '') + task }] },
326
- ];
327
- case 'anthropic':
328
- return {
329
- system: systemPrompt || undefined,
330
- messages: [{ role: 'user', content: task }],
331
- };
332
- case 'openai': {
333
- let input;
334
- if (!supportsSystem || !systemPrompt) {
335
- // Reasoning models (o1, o3, o4) don't support system prompts.
336
- // Merge system prompt into user message.
337
- const combined = systemPrompt ? systemPrompt + '\n\n' + task : task;
338
- input = [{ role: 'user', content: combined }];
339
- } else {
340
- input = [
341
- { role: 'system', content: systemPrompt },
342
- { role: 'user', content: task },
343
- ];
344
- }
345
- return {
346
- input,
347
- previousResponseId: undefined,
348
- };
349
- }
350
- default:
351
- return [
352
- { role: 'system', content: systemPrompt },
353
- { role: 'user', content: task },
354
- ];
355
- }
356
- }
357
-
358
- /**
359
- * Append the assistant response to the conversation for the next turn.
360
- *
361
- * Accepts either a raw response body or an LlmResponse wrapper.
362
- */
363
- export function appendAssistantResponse(provider, messages, responseOrWrapper) {
364
- const response = responseOrWrapper?.body ?? responseOrWrapper;
365
- switch (provider) {
366
- case 'gemini': {
367
- const content = response.candidates?.[0]?.content;
368
- if (content) messages.push(content);
369
- return messages;
370
- }
371
- case 'anthropic': {
372
- messages.messages.push({ role: 'assistant', content: response.content });
373
- return messages;
374
- }
375
- case 'openai': {
376
- if (Array.isArray(messages)) {
377
- const text = getResponseText('openai', response);
378
- messages.push({ role: 'assistant', content: text ?? '' });
379
- return messages;
380
- }
381
- messages.previousResponseId = response.id ?? messages.previousResponseId;
382
- messages.input = [];
383
- return messages;
384
- }
385
- default:
386
- return messages;
387
- }
388
- }
389
-
390
- /**
391
- * Append tool results to the conversation for the next turn.
392
- */
393
- export function appendToolResults(provider, messages, toolCalls, results) {
394
- switch (provider) {
395
- case 'gemini': {
396
- const parts = toolCalls.map((tc, i) => ({
397
- functionResponse: {
398
- name: tc.name,
399
- response: { content: results[i] },
400
- },
401
- }));
402
- messages.push({ role: 'user', parts });
403
- return messages;
404
- }
405
- case 'anthropic': {
406
- const content = toolCalls.map((tc, i) => ({
407
- type: 'tool_result',
408
- tool_use_id: tc.id,
409
- content: results[i],
410
- }));
411
- messages.messages.push({ role: 'user', content });
412
- return messages;
413
- }
414
- case 'openai': {
415
- const toolOutputs = [];
416
- for (let i = 0; i < toolCalls.length; i++) {
417
- const output = typeof results[i] === 'string'
418
- ? results[i]
419
- : JSON.stringify(results[i]);
420
- toolOutputs.push({
421
- type: 'function_call_output',
422
- call_id: toolCalls[i].id,
423
- output,
424
- });
425
- }
426
-
427
- if (Array.isArray(messages)) {
428
- for (let i = 0; i < toolCalls.length; i++) {
429
- messages.push({
430
- role: 'tool',
431
- tool_call_id: toolCalls[i].id,
432
- content: toolOutputs[i].output,
433
- });
434
- }
435
- return messages;
436
- }
437
-
438
- messages.input = toolOutputs;
439
- return messages;
440
- }
441
- default:
442
- return messages;
443
- }
444
- }
445
-
446
- /**
447
- * Append a plain-text user instruction for the next turn.
448
- * Used for harness-level recovery nudges (for example, when the model
449
- * responds without any tool calls before taking required actions).
450
- *
451
- * @param {'gemini' | 'anthropic' | 'openai'} provider
452
- * @param {Array | object} messages
453
- * @param {string} text
454
- * @returns {Array | object}
455
- */
456
- export function appendUserInstruction(provider, messages, text) {
457
- switch (provider) {
458
- case 'gemini': {
459
- messages.push({ role: 'user', parts: [{ text }] });
460
- return messages;
461
- }
462
- case 'anthropic': {
463
- messages.messages.push({ role: 'user', content: text });
464
- return messages;
465
- }
466
- case 'openai': {
467
- if (Array.isArray(messages)) {
468
- messages.push({ role: 'user', content: text });
469
- return messages;
470
- }
471
- const nextInput = Array.isArray(messages.input) ? [...messages.input] : [];
472
- nextInput.push({ role: 'user', content: text });
473
- messages.input = nextInput;
474
- return messages;
475
- }
476
- default:
477
- return messages;
478
- }
479
- }
480
-
481
- /**
482
- * Extract the messages array and system prompt for the callLlm function.
483
- * For Anthropic, the system prompt is separate from messages.
484
- */
485
- export function extractCallArgs(provider, messages) {
486
- if (provider === 'anthropic') {
487
- return { system: messages.system, messages: messages.messages };
488
- }
489
- return { messages };
490
- }
@@ -1,176 +0,0 @@
1
- /**
2
- * Structured logging helper for bundled harnesses.
3
- * Outputs JSON lines (one JSON object per line) to stderr.
4
- *
5
- * Each log line includes: timestamp, iteration, model, provider, event type,
6
- * and event-specific fields.
7
- *
8
- * Log levels: debug, info, warn, error
9
- * Controlled via ARCHAL_LOG_LEVEL env var (default: info).
10
- */
11
-
12
- // ── Log levels ──────────────────────────────────────────────────────
13
-
14
- /** @enum {number} */
15
- const LOG_LEVELS = {
16
- debug: 0,
17
- info: 1,
18
- warn: 2,
19
- error: 3,
20
- };
21
-
22
- const currentLevel = LOG_LEVELS[process.env['ARCHAL_LOG_LEVEL']?.toLowerCase() ?? 'info'] ?? LOG_LEVELS.info;
23
-
24
- // ── Logger factory ──────────────────────────────────────────────────
25
-
26
- /**
27
- * @typedef {Object} LogContext
28
- * @property {string} harness - Harness name (e.g. "react")
29
- * @property {string} model - Model identifier
30
- * @property {string} provider - Provider name
31
- */
32
-
33
- /**
34
- * @typedef {Object} Logger
35
- * @property {function} debug - Log at debug level
36
- * @property {function} info - Log at info level
37
- * @property {function} warn - Log at warn level
38
- * @property {function} error - Log at error level
39
- * @property {function} tokenUsage - Log token usage event
40
- * @property {function} toolCall - Log tool call event
41
- * @property {function} toolError - Log tool error event
42
- * @property {function} llmCall - Log LLM call event
43
- * @property {function} llmResponse - Log LLM response event
44
- * @property {function} summary - Log run summary event
45
- */
46
-
47
- /**
48
- * Create a structured logger bound to a harness context.
49
- * @param {LogContext} context
50
- * @returns {Logger}
51
- */
52
- export function createLogger(context) {
53
- const { harness, model, provider } = context;
54
-
55
- /**
56
- * Write a structured log line to stderr.
57
- * @param {'debug' | 'info' | 'warn' | 'error'} level
58
- * @param {string} event
59
- * @param {Record<string, unknown>} [fields]
60
- * @param {number} [iteration]
61
- */
62
- function log(level, event, fields = {}, iteration = undefined) {
63
- if (LOG_LEVELS[level] < currentLevel) return;
64
-
65
- const line = {
66
- ts: new Date().toISOString(),
67
- level,
68
- harness,
69
- model,
70
- provider,
71
- event,
72
- ...(iteration !== undefined ? { iteration } : {}),
73
- ...fields,
74
- };
75
- process.stderr.write(JSON.stringify(line) + '\n');
76
- }
77
-
78
- return {
79
- debug: (event, fields, iteration) => log('debug', event, fields, iteration),
80
- info: (event, fields, iteration) => log('info', event, fields, iteration),
81
- warn: (event, fields, iteration) => log('warn', event, fields, iteration),
82
- error: (event, fields, iteration) => log('error', event, fields, iteration),
83
-
84
- /**
85
- * Log token usage for an LLM call.
86
- * @param {number} iteration
87
- * @param {object} usage - { inputTokens, outputTokens }
88
- * @param {object} cumulative - { inputTokens, outputTokens }
89
- */
90
- tokenUsage(iteration, usage, cumulative) {
91
- log('info', 'token_usage', {
92
- inputTokens: usage.inputTokens,
93
- outputTokens: usage.outputTokens,
94
- cumulativeInputTokens: cumulative.inputTokens,
95
- cumulativeOutputTokens: cumulative.outputTokens,
96
- }, iteration);
97
- },
98
-
99
- /**
100
- * Log a tool call.
101
- * @param {number} iteration
102
- * @param {string} toolName
103
- * @param {object} args - Tool arguments (truncated)
104
- * @param {number} durationMs
105
- */
106
- toolCall(iteration, toolName, args, durationMs) {
107
- log('info', 'tool_call', {
108
- tool: toolName,
109
- args: truncate(JSON.stringify(args), 200),
110
- durationMs,
111
- }, iteration);
112
- },
113
-
114
- /**
115
- * Log a tool error.
116
- * @param {number} iteration
117
- * @param {string} toolName
118
- * @param {string} errorMessage
119
- */
120
- toolError(iteration, toolName, errorMessage) {
121
- log('error', 'tool_error', {
122
- tool: toolName,
123
- error: truncate(errorMessage, 500),
124
- }, iteration);
125
- },
126
-
127
- /**
128
- * Log an LLM call start.
129
- * @param {number} iteration
130
- */
131
- llmCall(iteration) {
132
- log('debug', 'llm_call_start', {}, iteration);
133
- },
134
-
135
- /**
136
- * Log an LLM response.
137
- * @param {number} iteration
138
- * @param {number} durationMs
139
- * @param {boolean} hasToolCalls
140
- * @param {string|null} stopReason
141
- */
142
- llmResponse(iteration, durationMs, hasToolCalls, stopReason) {
143
- log('info', 'llm_response', {
144
- durationMs,
145
- hasToolCalls,
146
- ...(stopReason ? { stopReason } : {}),
147
- }, iteration);
148
- },
149
-
150
- /**
151
- * Log a run summary at the end.
152
- * @param {object} stats
153
- * @param {number} stats.iterations
154
- * @param {number} stats.totalInputTokens
155
- * @param {number} stats.totalOutputTokens
156
- * @param {number} stats.totalTimeMs
157
- * @param {number} stats.toolCallCount
158
- * @param {number} stats.toolErrorCount
159
- * @param {string} stats.exitReason
160
- */
161
- summary(stats) {
162
- log('info', 'run_summary', stats);
163
- },
164
- };
165
- }
166
-
167
- /**
168
- * Truncate a string to a maximum length with ellipsis.
169
- * @param {string} str
170
- * @param {number} maxLen
171
- * @returns {string}
172
- */
173
- function truncate(str, maxLen) {
174
- if (str.length <= maxLen) return str;
175
- return str.slice(0, maxLen - 3) + '...';
176
- }