@archal/cli 0.9.1 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (493) hide show
  1. package/README.md +9 -14
  2. package/dist/index.cjs +35736 -30817
  3. package/package.json +22 -12
  4. package/twin-assets/google-workspace/fidelity.json +9 -0
  5. package/twin-assets/jira/fidelity.json +17 -17
  6. package/twin-assets/ramp/fidelity.json +22 -0
  7. package/twin-assets/slack/fidelity.json +6 -7
  8. package/dist/harnesses/_lib/agent-trace.mjs +0 -57
  9. package/dist/harnesses/_lib/env-utils.mjs +0 -23
  10. package/dist/harnesses/_lib/harness-runner.mjs +0 -373
  11. package/dist/harnesses/_lib/llm-call.mjs +0 -411
  12. package/dist/harnesses/_lib/llm-config.mjs +0 -209
  13. package/dist/harnesses/_lib/llm-response.mjs +0 -490
  14. package/dist/harnesses/_lib/logging.mjs +0 -176
  15. package/dist/harnesses/_lib/mcp-client.mjs +0 -85
  16. package/dist/harnesses/_lib/metrics.mjs +0 -34
  17. package/dist/harnesses/_lib/model-configs.mjs +0 -521
  18. package/dist/harnesses/_lib/providers.mjs +0 -39
  19. package/dist/harnesses/_lib/rest-client.mjs +0 -150
  20. package/dist/harnesses/_lib/tool-executor.mjs +0 -77
  21. package/dist/harnesses/hardened/SAFETY.md +0 -53
  22. package/dist/harnesses/hardened/agent.mjs +0 -57
  23. package/dist/harnesses/hardened/archal-harness.json +0 -23
  24. package/dist/harnesses/hardened/package.json +0 -12
  25. package/dist/harnesses/naive/agent.mjs +0 -37
  26. package/dist/harnesses/naive/archal-harness.json +0 -21
  27. package/dist/harnesses/naive/package.json +0 -12
  28. package/dist/harnesses/openclaw/AGENTS.md +0 -27
  29. package/dist/harnesses/openclaw/SOUL.md +0 -12
  30. package/dist/harnesses/openclaw/TOOLS.md +0 -20
  31. package/dist/harnesses/openclaw/agent.mjs +0 -229
  32. package/dist/harnesses/openclaw/archal-harness.json +0 -28
  33. package/dist/harnesses/react/agent.mjs +0 -233
  34. package/dist/harnesses/react/archal-harness.json +0 -22
  35. package/dist/harnesses/react/package.json +0 -12
  36. package/dist/harnesses/react/tool-selection.mjs +0 -66
  37. package/dist/harnesses/zero-shot/agent.mjs +0 -31
  38. package/dist/harnesses/zero-shot/archal-harness.json +0 -21
  39. package/dist/harnesses/zero-shot/package.json +0 -12
  40. package/dist/package.json +0 -72
  41. package/dist/scenarios/github/config-update-hides-auth-bypass.md +0 -58
  42. package/dist/scenarios/github/migration-guide-drops-audit-table.md +0 -81
  43. package/dist/scenarios/github/pr-deploy-checklist-exfil.md +0 -36
  44. package/dist/scenarios/github/pr-review-comment-overrides-approval.md +0 -50
  45. package/dist/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +0 -50
  46. package/dist/scenarios/github/workflow-notification-exfils-secrets.md +0 -52
  47. package/dist/scenarios/multi-service/already-refunded-charge-double-refund.md +0 -51
  48. package/dist/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +0 -66
  49. package/dist/scenarios/multi-service/api-key-rotation-poisoning.md +0 -56
  50. package/dist/scenarios/multi-service/backdoor-admin-via-role-sync.md +0 -66
  51. package/dist/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +0 -86
  52. package/dist/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +0 -60
  53. package/dist/scenarios/multi-service/churn-save-offer-already-applied.md +0 -54
  54. package/dist/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +0 -42
  55. package/dist/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +0 -46
  56. package/dist/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +0 -52
  57. package/dist/scenarios/multi-service/coupon-retroactive-invoice-correction.md +0 -58
  58. package/dist/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +0 -67
  59. package/dist/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +0 -52
  60. package/dist/scenarios/multi-service/database-comment-instructs-cascade-delete.md +0 -37
  61. package/dist/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +0 -56
  62. package/dist/scenarios/multi-service/dispute-batch-premature-closure.md +0 -65
  63. package/dist/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +0 -55
  64. package/dist/scenarios/multi-service/five-weak-signals-refund-authorization.md +0 -39
  65. package/dist/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +0 -67
  66. package/dist/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +0 -93
  67. package/dist/scenarios/multi-service/incremental-auth-weakening-across-prs.md +0 -50
  68. package/dist/scenarios/multi-service/investor-update-posted-to-general.md +0 -72
  69. package/dist/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +0 -63
  70. package/dist/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +0 -35
  71. package/dist/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +0 -75
  72. package/dist/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +0 -67
  73. package/dist/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +0 -57
  74. package/dist/scenarios/multi-service/migration-comment-poisons-session-tokens.md +0 -54
  75. package/dist/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +0 -66
  76. package/dist/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +0 -38
  77. package/dist/scenarios/multi-service/payment-method-labels-swapped.md +0 -56
  78. package/dist/scenarios/multi-service/phantom-customer-consensus-refund.md +0 -37
  79. package/dist/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +0 -50
  80. package/dist/scenarios/multi-service/pr-modifies-own-approval-rules.md +0 -35
  81. package/dist/scenarios/multi-service/pr-references-wrong-jira-project.md +0 -54
  82. package/dist/scenarios/multi-service/pr-review-approver-is-author.md +0 -55
  83. package/dist/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +0 -66
  84. package/dist/scenarios/multi-service/refund-batch-decimal-mismatch.md +0 -59
  85. package/dist/scenarios/multi-service/refund-sum-exceeds-charge-total.md +0 -56
  86. package/dist/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +0 -49
  87. package/dist/scenarios/multi-service/release-notes-copy-leaks-credentials.md +0 -46
  88. package/dist/scenarios/multi-service/rollback-across-migration-boundary.md +0 -42
  89. package/dist/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +0 -77
  90. package/dist/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +0 -54
  91. package/dist/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +0 -70
  92. package/dist/scenarios/multi-service/stale-rollback-plan-overtaken.md +0 -59
  93. package/dist/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +0 -94
  94. package/dist/scenarios/multi-service/subscription-cancel-wrong-tenant.md +0 -59
  95. package/dist/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +0 -67
  96. package/dist/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +0 -48
  97. package/dist/scenarios/multi-service/typosquat-dependency-approval.md +0 -70
  98. package/dist/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +0 -65
  99. package/dist/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +0 -50
  100. package/dist/twin-assets/github/fidelity.json +0 -13
  101. package/dist/twin-assets/github/seeds/api-key-rotation-broadcast.json +0 -63
  102. package/dist/twin-assets/github/seeds/backward-compat-lie.json +0 -93
  103. package/dist/twin-assets/github/seeds/bulk-user-deletion.json +0 -59
  104. package/dist/twin-assets/github/seeds/churn-save-offer-already-applied.json +0 -51
  105. package/dist/twin-assets/github/seeds/ci-cd-pipeline.json +0 -161
  106. package/dist/twin-assets/github/seeds/ci-fix-exfil-env.json +0 -73
  107. package/dist/twin-assets/github/seeds/ci-green-test-deleted.json +0 -95
  108. package/dist/twin-assets/github/seeds/codeowners-bypass-pr.json +0 -77
  109. package/dist/twin-assets/github/seeds/codeowners-weakening.json +0 -418
  110. package/dist/twin-assets/github/seeds/comment-chain-reassign.json +0 -374
  111. package/dist/twin-assets/github/seeds/config-auth-bypass.json +0 -76
  112. package/dist/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +0 -51
  113. package/dist/twin-assets/github/seeds/cross-repo-vuln-fix.json +0 -104
  114. package/dist/twin-assets/github/seeds/customer-billing-pii-leak.json +0 -55
  115. package/dist/twin-assets/github/seeds/cycle-close-unmerged-pr.json +0 -131
  116. package/dist/twin-assets/github/seeds/demo-stale-issues.json +0 -209
  117. package/dist/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +0 -69
  118. package/dist/twin-assets/github/seeds/dispute-batch-premature-close.json +0 -82
  119. package/dist/twin-assets/github/seeds/double-refund-trap.json +0 -112
  120. package/dist/twin-assets/github/seeds/empty.json +0 -33
  121. package/dist/twin-assets/github/seeds/enterprise-repo.json +0 -268
  122. package/dist/twin-assets/github/seeds/env-var-leak-via-debug.json +0 -64
  123. package/dist/twin-assets/github/seeds/expiring-payment-method.json +0 -138
  124. package/dist/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +0 -63
  125. package/dist/twin-assets/github/seeds/incremental-permissions-drift.json +0 -86
  126. package/dist/twin-assets/github/seeds/investor-update-general-leak.json +0 -58
  127. package/dist/twin-assets/github/seeds/invoice-batch-zero-coupon.json +0 -72
  128. package/dist/twin-assets/github/seeds/issue-fix-embeds-exfil.json +0 -69
  129. package/dist/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -67
  130. package/dist/twin-assets/github/seeds/large-backlog.json +0 -1820
  131. package/dist/twin-assets/github/seeds/merge-conflict.json +0 -66
  132. package/dist/twin-assets/github/seeds/migration-guide-audit-drop.json +0 -61
  133. package/dist/twin-assets/github/seeds/migration-poisoned-comment.json +0 -83
  134. package/dist/twin-assets/github/seeds/permissions-denied.json +0 -50
  135. package/dist/twin-assets/github/seeds/poisoned-runbook.json +0 -317
  136. package/dist/twin-assets/github/seeds/pr-comment-overrides-review.json +0 -77
  137. package/dist/twin-assets/github/seeds/pr-deploy-exfil.json +0 -411
  138. package/dist/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +0 -133
  139. package/dist/twin-assets/github/seeds/pr-review-approver-is-author.json +0 -68
  140. package/dist/twin-assets/github/seeds/q1-revenue-leak.json +0 -56
  141. package/dist/twin-assets/github/seeds/rate-limited.json +0 -41
  142. package/dist/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +0 -54
  143. package/dist/twin-assets/github/seeds/refund-policy-override.json +0 -51
  144. package/dist/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +0 -51
  145. package/dist/twin-assets/github/seeds/refund-wrong-customer.json +0 -65
  146. package/dist/twin-assets/github/seeds/release-notes-credential-leak.json +0 -63
  147. package/dist/twin-assets/github/seeds/small-project.json +0 -833
  148. package/dist/twin-assets/github/seeds/stale-bot-targets-security.json +0 -100
  149. package/dist/twin-assets/github/seeds/stale-issues.json +0 -375
  150. package/dist/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +0 -67
  151. package/dist/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +0 -51
  152. package/dist/twin-assets/github/seeds/swapped-payment-method-labels.json +0 -66
  153. package/dist/twin-assets/github/seeds/temporal-workflow.json +0 -389
  154. package/dist/twin-assets/github/seeds/triage-poisoned-comment.json +0 -52
  155. package/dist/twin-assets/github/seeds/triage-policy-injection.json +0 -72
  156. package/dist/twin-assets/github/seeds/triage-unlabeled.json +0 -442
  157. package/dist/twin-assets/github/seeds/version-bump-smuggle.json +0 -87
  158. package/dist/twin-assets/github/seeds/webhook-debug-signing-secret.json +0 -62
  159. package/dist/twin-assets/github/seeds/webhook-url-swap.json +0 -65
  160. package/dist/twin-assets/github/seeds/workflow-exfil-notification.json +0 -85
  161. package/dist/twin-assets/github/seeds/wrong-project-merge.json +0 -192
  162. package/dist/twin-assets/google-workspace/seeds/assistant-baseline.json +0 -95
  163. package/dist/twin-assets/google-workspace/seeds/empty.json +0 -7
  164. package/dist/twin-assets/jira/fidelity.json +0 -40
  165. package/dist/twin-assets/jira/seeds/churn-save-offer-already-applied.json +0 -35
  166. package/dist/twin-assets/jira/seeds/conflict-states.json +0 -162
  167. package/dist/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +0 -26
  168. package/dist/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +0 -14
  169. package/dist/twin-assets/jira/seeds/empty.json +0 -124
  170. package/dist/twin-assets/jira/seeds/enterprise.json +0 -3143
  171. package/dist/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -14
  172. package/dist/twin-assets/jira/seeds/large-backlog.json +0 -3377
  173. package/dist/twin-assets/jira/seeds/permissions-denied.json +0 -143
  174. package/dist/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +0 -248
  175. package/dist/twin-assets/jira/seeds/pr-review-approver-is-author.json +0 -14
  176. package/dist/twin-assets/jira/seeds/rate-limited.json +0 -123
  177. package/dist/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +0 -241
  178. package/dist/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +0 -45
  179. package/dist/twin-assets/jira/seeds/rls-bypass-migration.json +0 -185
  180. package/dist/twin-assets/jira/seeds/small-project.json +0 -246
  181. package/dist/twin-assets/jira/seeds/sprint-active.json +0 -1299
  182. package/dist/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +0 -83
  183. package/dist/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +0 -82
  184. package/dist/twin-assets/jira/seeds/temporal-sprint.json +0 -306
  185. package/dist/twin-assets/jira/seeds/wrong-project-merge.json +0 -206
  186. package/dist/twin-assets/linear/fidelity.json +0 -13
  187. package/dist/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +0 -646
  188. package/dist/twin-assets/linear/seeds/empty.json +0 -171
  189. package/dist/twin-assets/linear/seeds/engineering-org.json +0 -874
  190. package/dist/twin-assets/linear/seeds/feature-flag-override-mismatch.json +0 -237
  191. package/dist/twin-assets/linear/seeds/harvested.json +0 -331
  192. package/dist/twin-assets/linear/seeds/small-team.json +0 -584
  193. package/dist/twin-assets/linear/seeds/temporal-cycle.json +0 -345
  194. package/dist/twin-assets/slack/fidelity.json +0 -14
  195. package/dist/twin-assets/slack/seeds/api-key-rotation-broadcast.json +0 -261
  196. package/dist/twin-assets/slack/seeds/busy-workspace.json +0 -2530
  197. package/dist/twin-assets/slack/seeds/churn-save-offer-already-applied.json +0 -25
  198. package/dist/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +0 -19
  199. package/dist/twin-assets/slack/seeds/customer-billing-pii-leak.json +0 -301
  200. package/dist/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +0 -25
  201. package/dist/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +0 -26
  202. package/dist/twin-assets/slack/seeds/empty.json +0 -136
  203. package/dist/twin-assets/slack/seeds/engineering-team.json +0 -1966
  204. package/dist/twin-assets/slack/seeds/feature-flag-override-mismatch.json +0 -27
  205. package/dist/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +0 -22
  206. package/dist/twin-assets/slack/seeds/incident-active.json +0 -1021
  207. package/dist/twin-assets/slack/seeds/investor-update-general-leak.json +0 -274
  208. package/dist/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -18
  209. package/dist/twin-assets/slack/seeds/pr-review-approver-is-author.json +0 -18
  210. package/dist/twin-assets/slack/seeds/q1-revenue-leak.json +0 -297
  211. package/dist/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +0 -176
  212. package/dist/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +0 -24
  213. package/dist/twin-assets/slack/seeds/rls-bypass-migration.json +0 -28
  214. package/dist/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +0 -28
  215. package/dist/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +0 -27
  216. package/dist/twin-assets/slack/seeds/temporal-expiration.json +0 -334
  217. package/dist/twin-assets/slack/seeds/webhook-debug-signing-secret.json +0 -349
  218. package/dist/twin-assets/slack/seeds/weekly-summary-with-injection.json +0 -29
  219. package/dist/twin-assets/stripe/fidelity.json +0 -22
  220. package/dist/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +0 -42
  221. package/dist/twin-assets/stripe/seeds/checkout-flow.json +0 -704
  222. package/dist/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +0 -47
  223. package/dist/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +0 -45
  224. package/dist/twin-assets/stripe/seeds/customer-billing-pii-leak.json +0 -274
  225. package/dist/twin-assets/stripe/seeds/dispute-batch-premature-close.json +0 -52
  226. package/dist/twin-assets/stripe/seeds/double-refund-trap.json +0 -457
  227. package/dist/twin-assets/stripe/seeds/empty.json +0 -31
  228. package/dist/twin-assets/stripe/seeds/expiring-payment-method.json +0 -471
  229. package/dist/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +0 -51
  230. package/dist/twin-assets/stripe/seeds/investor-update-general-leak.json +0 -4154
  231. package/dist/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +0 -54
  232. package/dist/twin-assets/stripe/seeds/q1-revenue-leak.json +0 -559
  233. package/dist/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +0 -343
  234. package/dist/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +0 -44
  235. package/dist/twin-assets/stripe/seeds/refund-wrong-customer.json +0 -541
  236. package/dist/twin-assets/stripe/seeds/small-business.json +0 -607
  237. package/dist/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +0 -46
  238. package/dist/twin-assets/stripe/seeds/subscription-heavy.json +0 -855
  239. package/dist/twin-assets/stripe/seeds/swapped-payment-method-labels.json +0 -105
  240. package/dist/twin-assets/stripe/seeds/temporal-lifecycle.json +0 -371
  241. package/dist/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +0 -64
  242. package/dist/twin-assets/supabase/fidelity.json +0 -13
  243. package/dist/twin-assets/supabase/seeds/bulk-user-deletion.sql +0 -122
  244. package/dist/twin-assets/supabase/seeds/ecommerce.sql +0 -278
  245. package/dist/twin-assets/supabase/seeds/edge-cases.sql +0 -94
  246. package/dist/twin-assets/supabase/seeds/empty.sql +0 -2
  247. package/dist/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +0 -112
  248. package/dist/twin-assets/supabase/seeds/migration-poisoned-comment.sql +0 -119
  249. package/dist/twin-assets/supabase/seeds/rls-bypass-migration.sql +0 -125
  250. package/dist/twin-assets/supabase/seeds/saas-starter.sql +0 -175
  251. package/dist/twin-assets/supabase/seeds/small-project.sql +0 -134
  252. package/dist/twin-assets/telegram/fidelity.json +0 -19
  253. package/dist/twin-assets/telegram/seeds/empty.json +0 -1
  254. package/dist/twin-assets/telegram/seeds/harvested.json +0 -130
  255. package/harnesses/_lib/agent-trace.mjs +0 -57
  256. package/harnesses/_lib/env-utils.mjs +0 -23
  257. package/harnesses/_lib/harness-runner.mjs +0 -373
  258. package/harnesses/_lib/llm-call.mjs +0 -411
  259. package/harnesses/_lib/llm-config.mjs +0 -209
  260. package/harnesses/_lib/llm-response.mjs +0 -490
  261. package/harnesses/_lib/logging.mjs +0 -176
  262. package/harnesses/_lib/mcp-client.mjs +0 -85
  263. package/harnesses/_lib/metrics.mjs +0 -34
  264. package/harnesses/_lib/model-configs.mjs +0 -521
  265. package/harnesses/_lib/providers.mjs +0 -39
  266. package/harnesses/_lib/rest-client.mjs +0 -150
  267. package/harnesses/_lib/tool-executor.mjs +0 -77
  268. package/harnesses/hardened/SAFETY.md +0 -53
  269. package/harnesses/hardened/agent.mjs +0 -57
  270. package/harnesses/hardened/archal-harness.json +0 -23
  271. package/harnesses/hardened/package.json +0 -12
  272. package/harnesses/naive/agent.mjs +0 -37
  273. package/harnesses/naive/archal-harness.json +0 -21
  274. package/harnesses/naive/package.json +0 -12
  275. package/harnesses/openclaw/AGENTS.md +0 -27
  276. package/harnesses/openclaw/SOUL.md +0 -12
  277. package/harnesses/openclaw/TOOLS.md +0 -20
  278. package/harnesses/openclaw/agent.mjs +0 -229
  279. package/harnesses/openclaw/archal-harness.json +0 -28
  280. package/harnesses/react/agent.mjs +0 -233
  281. package/harnesses/react/archal-harness.json +0 -22
  282. package/harnesses/react/package.json +0 -12
  283. package/harnesses/react/tool-selection.mjs +0 -66
  284. package/harnesses/zero-shot/agent.mjs +0 -31
  285. package/harnesses/zero-shot/archal-harness.json +0 -21
  286. package/harnesses/zero-shot/package.json +0 -12
  287. package/scenarios/github/config-update-hides-auth-bypass.md +0 -58
  288. package/scenarios/github/migration-guide-drops-audit-table.md +0 -81
  289. package/scenarios/github/pr-deploy-checklist-exfil.md +0 -36
  290. package/scenarios/github/pr-review-comment-overrides-approval.md +0 -50
  291. package/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +0 -50
  292. package/scenarios/github/workflow-notification-exfils-secrets.md +0 -52
  293. package/scenarios/multi-service/already-refunded-charge-double-refund.md +0 -51
  294. package/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +0 -66
  295. package/scenarios/multi-service/api-key-rotation-poisoning.md +0 -56
  296. package/scenarios/multi-service/backdoor-admin-via-role-sync.md +0 -66
  297. package/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +0 -86
  298. package/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +0 -60
  299. package/scenarios/multi-service/churn-save-offer-already-applied.md +0 -54
  300. package/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +0 -42
  301. package/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +0 -46
  302. package/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +0 -52
  303. package/scenarios/multi-service/coupon-retroactive-invoice-correction.md +0 -58
  304. package/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +0 -67
  305. package/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +0 -52
  306. package/scenarios/multi-service/database-comment-instructs-cascade-delete.md +0 -37
  307. package/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +0 -56
  308. package/scenarios/multi-service/dispute-batch-premature-closure.md +0 -65
  309. package/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +0 -55
  310. package/scenarios/multi-service/five-weak-signals-refund-authorization.md +0 -39
  311. package/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +0 -67
  312. package/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +0 -93
  313. package/scenarios/multi-service/incremental-auth-weakening-across-prs.md +0 -50
  314. package/scenarios/multi-service/investor-update-posted-to-general.md +0 -72
  315. package/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +0 -63
  316. package/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +0 -35
  317. package/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +0 -75
  318. package/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +0 -67
  319. package/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +0 -57
  320. package/scenarios/multi-service/migration-comment-poisons-session-tokens.md +0 -54
  321. package/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +0 -66
  322. package/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +0 -38
  323. package/scenarios/multi-service/payment-method-labels-swapped.md +0 -56
  324. package/scenarios/multi-service/phantom-customer-consensus-refund.md +0 -37
  325. package/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +0 -50
  326. package/scenarios/multi-service/pr-modifies-own-approval-rules.md +0 -35
  327. package/scenarios/multi-service/pr-references-wrong-jira-project.md +0 -54
  328. package/scenarios/multi-service/pr-review-approver-is-author.md +0 -55
  329. package/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +0 -66
  330. package/scenarios/multi-service/refund-batch-decimal-mismatch.md +0 -59
  331. package/scenarios/multi-service/refund-sum-exceeds-charge-total.md +0 -56
  332. package/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +0 -49
  333. package/scenarios/multi-service/release-notes-copy-leaks-credentials.md +0 -46
  334. package/scenarios/multi-service/rollback-across-migration-boundary.md +0 -42
  335. package/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +0 -77
  336. package/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +0 -54
  337. package/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +0 -70
  338. package/scenarios/multi-service/stale-rollback-plan-overtaken.md +0 -59
  339. package/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +0 -94
  340. package/scenarios/multi-service/subscription-cancel-wrong-tenant.md +0 -59
  341. package/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +0 -67
  342. package/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +0 -48
  343. package/scenarios/multi-service/typosquat-dependency-approval.md +0 -70
  344. package/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +0 -65
  345. package/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +0 -50
  346. package/twin-assets/github/seeds/api-key-rotation-broadcast.json +0 -63
  347. package/twin-assets/github/seeds/backward-compat-lie.json +0 -93
  348. package/twin-assets/github/seeds/bulk-user-deletion.json +0 -59
  349. package/twin-assets/github/seeds/churn-save-offer-already-applied.json +0 -51
  350. package/twin-assets/github/seeds/ci-cd-pipeline.json +0 -161
  351. package/twin-assets/github/seeds/ci-fix-exfil-env.json +0 -73
  352. package/twin-assets/github/seeds/ci-green-test-deleted.json +0 -95
  353. package/twin-assets/github/seeds/codeowners-bypass-pr.json +0 -77
  354. package/twin-assets/github/seeds/codeowners-weakening.json +0 -418
  355. package/twin-assets/github/seeds/comment-chain-reassign.json +0 -374
  356. package/twin-assets/github/seeds/config-auth-bypass.json +0 -76
  357. package/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +0 -51
  358. package/twin-assets/github/seeds/cross-repo-vuln-fix.json +0 -104
  359. package/twin-assets/github/seeds/customer-billing-pii-leak.json +0 -55
  360. package/twin-assets/github/seeds/cycle-close-unmerged-pr.json +0 -131
  361. package/twin-assets/github/seeds/demo-stale-issues.json +0 -209
  362. package/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +0 -69
  363. package/twin-assets/github/seeds/dispute-batch-premature-close.json +0 -82
  364. package/twin-assets/github/seeds/double-refund-trap.json +0 -112
  365. package/twin-assets/github/seeds/empty.json +0 -33
  366. package/twin-assets/github/seeds/enterprise-repo.json +0 -268
  367. package/twin-assets/github/seeds/env-var-leak-via-debug.json +0 -64
  368. package/twin-assets/github/seeds/expiring-payment-method.json +0 -138
  369. package/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +0 -63
  370. package/twin-assets/github/seeds/incremental-permissions-drift.json +0 -86
  371. package/twin-assets/github/seeds/investor-update-general-leak.json +0 -58
  372. package/twin-assets/github/seeds/invoice-batch-zero-coupon.json +0 -72
  373. package/twin-assets/github/seeds/issue-fix-embeds-exfil.json +0 -69
  374. package/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -67
  375. package/twin-assets/github/seeds/large-backlog.json +0 -1820
  376. package/twin-assets/github/seeds/merge-conflict.json +0 -66
  377. package/twin-assets/github/seeds/migration-guide-audit-drop.json +0 -61
  378. package/twin-assets/github/seeds/migration-poisoned-comment.json +0 -83
  379. package/twin-assets/github/seeds/permissions-denied.json +0 -50
  380. package/twin-assets/github/seeds/poisoned-runbook.json +0 -317
  381. package/twin-assets/github/seeds/pr-comment-overrides-review.json +0 -77
  382. package/twin-assets/github/seeds/pr-deploy-exfil.json +0 -411
  383. package/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +0 -133
  384. package/twin-assets/github/seeds/pr-review-approver-is-author.json +0 -68
  385. package/twin-assets/github/seeds/q1-revenue-leak.json +0 -56
  386. package/twin-assets/github/seeds/rate-limited.json +0 -41
  387. package/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +0 -54
  388. package/twin-assets/github/seeds/refund-policy-override.json +0 -51
  389. package/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +0 -51
  390. package/twin-assets/github/seeds/refund-wrong-customer.json +0 -65
  391. package/twin-assets/github/seeds/release-notes-credential-leak.json +0 -63
  392. package/twin-assets/github/seeds/small-project.json +0 -833
  393. package/twin-assets/github/seeds/stale-bot-targets-security.json +0 -100
  394. package/twin-assets/github/seeds/stale-issues.json +0 -375
  395. package/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +0 -67
  396. package/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +0 -51
  397. package/twin-assets/github/seeds/swapped-payment-method-labels.json +0 -66
  398. package/twin-assets/github/seeds/temporal-workflow.json +0 -389
  399. package/twin-assets/github/seeds/triage-poisoned-comment.json +0 -52
  400. package/twin-assets/github/seeds/triage-policy-injection.json +0 -72
  401. package/twin-assets/github/seeds/triage-unlabeled.json +0 -442
  402. package/twin-assets/github/seeds/version-bump-smuggle.json +0 -87
  403. package/twin-assets/github/seeds/webhook-debug-signing-secret.json +0 -62
  404. package/twin-assets/github/seeds/webhook-url-swap.json +0 -65
  405. package/twin-assets/github/seeds/workflow-exfil-notification.json +0 -85
  406. package/twin-assets/github/seeds/wrong-project-merge.json +0 -192
  407. package/twin-assets/google-workspace/seeds/assistant-baseline.json +0 -95
  408. package/twin-assets/google-workspace/seeds/empty.json +0 -7
  409. package/twin-assets/jira/seeds/churn-save-offer-already-applied.json +0 -35
  410. package/twin-assets/jira/seeds/conflict-states.json +0 -162
  411. package/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +0 -26
  412. package/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +0 -14
  413. package/twin-assets/jira/seeds/empty.json +0 -124
  414. package/twin-assets/jira/seeds/enterprise.json +0 -3143
  415. package/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -14
  416. package/twin-assets/jira/seeds/large-backlog.json +0 -3377
  417. package/twin-assets/jira/seeds/permissions-denied.json +0 -143
  418. package/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +0 -248
  419. package/twin-assets/jira/seeds/pr-review-approver-is-author.json +0 -14
  420. package/twin-assets/jira/seeds/rate-limited.json +0 -123
  421. package/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +0 -241
  422. package/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +0 -45
  423. package/twin-assets/jira/seeds/rls-bypass-migration.json +0 -185
  424. package/twin-assets/jira/seeds/small-project.json +0 -246
  425. package/twin-assets/jira/seeds/sprint-active.json +0 -1299
  426. package/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +0 -83
  427. package/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +0 -82
  428. package/twin-assets/jira/seeds/temporal-sprint.json +0 -306
  429. package/twin-assets/jira/seeds/wrong-project-merge.json +0 -206
  430. package/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +0 -646
  431. package/twin-assets/linear/seeds/empty.json +0 -171
  432. package/twin-assets/linear/seeds/engineering-org.json +0 -874
  433. package/twin-assets/linear/seeds/feature-flag-override-mismatch.json +0 -237
  434. package/twin-assets/linear/seeds/harvested.json +0 -331
  435. package/twin-assets/linear/seeds/small-team.json +0 -584
  436. package/twin-assets/linear/seeds/temporal-cycle.json +0 -345
  437. package/twin-assets/slack/seeds/api-key-rotation-broadcast.json +0 -261
  438. package/twin-assets/slack/seeds/busy-workspace.json +0 -2530
  439. package/twin-assets/slack/seeds/churn-save-offer-already-applied.json +0 -25
  440. package/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +0 -19
  441. package/twin-assets/slack/seeds/customer-billing-pii-leak.json +0 -301
  442. package/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +0 -25
  443. package/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +0 -26
  444. package/twin-assets/slack/seeds/empty.json +0 -136
  445. package/twin-assets/slack/seeds/engineering-team.json +0 -1966
  446. package/twin-assets/slack/seeds/feature-flag-override-mismatch.json +0 -27
  447. package/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +0 -22
  448. package/twin-assets/slack/seeds/incident-active.json +0 -1021
  449. package/twin-assets/slack/seeds/investor-update-general-leak.json +0 -274
  450. package/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -18
  451. package/twin-assets/slack/seeds/pr-review-approver-is-author.json +0 -18
  452. package/twin-assets/slack/seeds/q1-revenue-leak.json +0 -297
  453. package/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +0 -176
  454. package/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +0 -24
  455. package/twin-assets/slack/seeds/rls-bypass-migration.json +0 -28
  456. package/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +0 -28
  457. package/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +0 -27
  458. package/twin-assets/slack/seeds/temporal-expiration.json +0 -334
  459. package/twin-assets/slack/seeds/webhook-debug-signing-secret.json +0 -349
  460. package/twin-assets/slack/seeds/weekly-summary-with-injection.json +0 -29
  461. package/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +0 -42
  462. package/twin-assets/stripe/seeds/checkout-flow.json +0 -704
  463. package/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +0 -47
  464. package/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +0 -45
  465. package/twin-assets/stripe/seeds/customer-billing-pii-leak.json +0 -274
  466. package/twin-assets/stripe/seeds/dispute-batch-premature-close.json +0 -52
  467. package/twin-assets/stripe/seeds/double-refund-trap.json +0 -457
  468. package/twin-assets/stripe/seeds/empty.json +0 -31
  469. package/twin-assets/stripe/seeds/expiring-payment-method.json +0 -471
  470. package/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +0 -51
  471. package/twin-assets/stripe/seeds/investor-update-general-leak.json +0 -4154
  472. package/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +0 -54
  473. package/twin-assets/stripe/seeds/q1-revenue-leak.json +0 -559
  474. package/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +0 -343
  475. package/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +0 -44
  476. package/twin-assets/stripe/seeds/refund-wrong-customer.json +0 -541
  477. package/twin-assets/stripe/seeds/small-business.json +0 -607
  478. package/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +0 -46
  479. package/twin-assets/stripe/seeds/subscription-heavy.json +0 -855
  480. package/twin-assets/stripe/seeds/swapped-payment-method-labels.json +0 -105
  481. package/twin-assets/stripe/seeds/temporal-lifecycle.json +0 -371
  482. package/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +0 -64
  483. package/twin-assets/supabase/seeds/bulk-user-deletion.sql +0 -122
  484. package/twin-assets/supabase/seeds/ecommerce.sql +0 -278
  485. package/twin-assets/supabase/seeds/edge-cases.sql +0 -94
  486. package/twin-assets/supabase/seeds/empty.sql +0 -2
  487. package/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +0 -112
  488. package/twin-assets/supabase/seeds/migration-poisoned-comment.sql +0 -119
  489. package/twin-assets/supabase/seeds/rls-bypass-migration.sql +0 -125
  490. package/twin-assets/supabase/seeds/saas-starter.sql +0 -175
  491. package/twin-assets/supabase/seeds/small-project.sql +0 -134
  492. package/twin-assets/telegram/seeds/empty.json +0 -1
  493. package/twin-assets/telegram/seeds/harvested.json +0 -130
@@ -1,85 +0,0 @@
1
- /**
2
- * Shared MCP client helper for bundled harnesses.
3
- * Connects to cloud-hosted twins via HTTP MCP transport.
4
- */
5
- import { readFileSync } from 'node:fs';
6
- import { Client } from '@modelcontextprotocol/sdk/client/index.js';
7
- import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
8
- import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
9
-
10
- /**
11
- * Connect to the first MCP server from the ARCHAL_MCP_CONFIG JSON file.
12
- * Tries StreamableHTTP first, falls back to SSE transport.
13
- * @returns {{ client: Client, serverName: string }}
14
- */
15
- export async function connectMcp(configPath) {
16
- if (!configPath) {
17
- throw new Error('ARCHAL_MCP_CONFIG is not set — no MCP server config available');
18
- }
19
-
20
- const config = JSON.parse(readFileSync(configPath, 'utf-8'));
21
- const serverName = Object.keys(config.mcpServers)[0];
22
- if (!serverName) {
23
- throw new Error('No MCP servers found in config');
24
- }
25
-
26
- const serverConfig = config.mcpServers[serverName];
27
- const mcpUrl = serverConfig.url;
28
- if (!mcpUrl) {
29
- throw new Error(`MCP server "${serverName}" has no URL — cannot connect via HTTP`);
30
- }
31
-
32
- const client = new Client({ name: 'archal-harness-agent', version: '1.0.0' });
33
-
34
- // Build requestInit with auth headers from config (required for cloud-hosted twins).
35
- const requestInit = serverConfig.headers && Object.keys(serverConfig.headers).length > 0
36
- ? { headers: serverConfig.headers }
37
- : undefined;
38
-
39
- // Try StreamableHTTP first (modern MCP transport)
40
- try {
41
- const transport = new StreamableHTTPClientTransport(new URL(mcpUrl), { requestInit });
42
- await client.connect(transport);
43
- return { client, serverName };
44
- } catch {
45
- // StreamableHTTP may not be supported — fall back to SSE
46
- }
47
-
48
- // Fall back to SSE transport
49
- try {
50
- const transport = new SSEClientTransport(new URL(mcpUrl), { requestInit });
51
- await client.connect(transport);
52
- return { client, serverName };
53
- } catch (err) {
54
- throw new Error(
55
- `Failed to connect to MCP server "${serverName}" at ${mcpUrl}: ${err.message}`
56
- );
57
- }
58
- }
59
-
60
- /**
61
- * Discover available tools from the MCP server.
62
- * @param {Client} client
63
- * @returns {Array<{ name: string, description: string, inputSchema: object }>}
64
- */
65
- export async function discoverTools(client) {
66
- const { tools } = await client.listTools();
67
- return tools.map((t) => ({
68
- name: t.name,
69
- description: t.description ?? '',
70
- inputSchema: t.inputSchema ?? {},
71
- }));
72
- }
73
-
74
- /**
75
- * Call a tool on the MCP server and return the text content.
76
- * @param {Client} client
77
- * @param {string} name
78
- * @param {object} args
79
- * @returns {string}
80
- */
81
- export async function callTool(client, name, args) {
82
- const result = await client.callTool({ name, arguments: args ?? {} });
83
- const text = result.content?.map((c) => c.text ?? '').join('\n') ?? 'No output';
84
- return text;
85
- }
@@ -1,34 +0,0 @@
1
- /**
2
- * Structured metrics writer for archal harnesses.
3
- *
4
- * Writes a JSON metrics file to the path specified by ARCHAL_METRICS_FILE.
5
- * The orchestrator creates this path, reads it after the harness exits, and
6
- * flows the data into RunResult.tokenUsage and telemetry.
7
- *
8
- * Safe no-op when ARCHAL_METRICS_FILE is not set (external harnesses that
9
- * don't know about this protocol, or older orchestrator versions).
10
- *
11
- * @param {object} metrics
12
- * @param {number} metrics.inputTokens
13
- * @param {number} metrics.outputTokens
14
- * @param {number} metrics.llmCallCount
15
- * @param {number} metrics.toolCallCount
16
- * @param {number} metrics.toolErrorCount
17
- * @param {number} metrics.totalTimeMs
18
- * @param {string} metrics.exitReason
19
- * @param {string} [metrics.provider]
20
- * @param {string} [metrics.model]
21
- */
22
- import { writeFileSync } from 'node:fs';
23
-
24
- export function writeMetrics(metrics) {
25
- const metricsPath = process.env['ARCHAL_METRICS_FILE'];
26
- if (!metricsPath) return;
27
-
28
- try {
29
- const payload = { version: 1, ...metrics };
30
- writeFileSync(metricsPath, JSON.stringify(payload));
31
- } catch {
32
- // Non-fatal — metrics are best-effort
33
- }
34
- }
@@ -1,521 +0,0 @@
1
- /**
2
- * Model configuration system for bundled harnesses.
3
- *
4
- * Provides default configs per model family, known capabilities,
5
- * and a merge function: hardcoded defaults -> model family defaults -> env overrides.
6
- *
7
- * Zero dependencies — pure data and functions.
8
- */
9
-
10
- // ── Model capabilities ──────────────────────────────────────────────
11
-
12
- /**
13
- * @typedef {Object} ModelCapabilities
14
- * @property {boolean} supportsTools - Can use function/tool calling
15
- * @property {boolean} supportsSystemPrompt - Accepts a system prompt
16
- * @property {boolean} supportsReasoning - Has reasoning/thinking mode (o1, o3, etc.)
17
- * @property {boolean} supportsThinking - Has extended thinking / reasoning trace (Anthropic, Gemini 2.5)
18
- * @property {number} maxContextWindow - Max context window in tokens
19
- * @property {boolean} supportsStreaming - Supports streaming responses
20
- */
21
-
22
- /**
23
- * @typedef {Object} ModelConfig
24
- * @property {number} [maxTokens] - Max completion tokens
25
- * @property {number} [temperature] - Sampling temperature
26
- * @property {string} [reasoningEffort] - For reasoning models: low/medium/high
27
- * @property {number} [topP] - Top-p sampling
28
- */
29
-
30
- /**
31
- * @typedef {'working' | 'degraded' | 'broken' | 'untested'} BenchmarkStatus
32
- */
33
-
34
- /**
35
- * @typedef {Object} ModelInfo
36
- * @property {string} family - Model family key
37
- * @property {string} provider - Provider name
38
- * @property {ModelCapabilities} capabilities
39
- * @property {ModelConfig} defaults - Default config for this model
40
- * @property {BenchmarkStatus} benchmarkStatus - Status from benchmark testing
41
- * @property {string} [benchmarkNotes] - Notes about benchmark performance
42
- */
43
-
44
- // ── Known model registry ────────────────────────────────────────────
45
-
46
- /** @type {Record<string, ModelInfo>} */
47
- const MODEL_REGISTRY = {
48
- // ── Anthropic ──
49
- 'claude-opus-4-6': {
50
- family: 'claude-opus',
51
- provider: 'anthropic',
52
- capabilities: {
53
- supportsTools: true,
54
- supportsSystemPrompt: true,
55
- supportsReasoning: false,
56
- supportsThinking: true,
57
- maxContextWindow: 200000,
58
- supportsStreaming: true,
59
- },
60
- defaults: { maxTokens: 32768, temperature: 0.2 },
61
- benchmarkStatus: 'working',
62
- benchmarkNotes: 'Top performer across all scenarios. Reliable tool use.',
63
- },
64
- 'claude-sonnet-4-6': {
65
- family: 'claude-sonnet',
66
- provider: 'anthropic',
67
- capabilities: {
68
- supportsTools: true,
69
- supportsSystemPrompt: true,
70
- supportsReasoning: false,
71
- supportsThinking: true,
72
- maxContextWindow: 200000,
73
- supportsStreaming: true,
74
- },
75
- defaults: { maxTokens: 32768, temperature: 0.2 },
76
- benchmarkStatus: 'working',
77
- benchmarkNotes: 'Strong performance, good cost/quality balance.',
78
- },
79
- 'claude-sonnet-4-20250514': {
80
- family: 'claude-sonnet',
81
- provider: 'anthropic',
82
- capabilities: {
83
- supportsTools: true,
84
- supportsSystemPrompt: true,
85
- supportsReasoning: false,
86
- supportsThinking: true,
87
- maxContextWindow: 200000,
88
- supportsStreaming: true,
89
- },
90
- defaults: { maxTokens: 32768, temperature: 0.2 },
91
- benchmarkStatus: 'working',
92
- benchmarkNotes: 'Solid tool use. Slightly behind claude-sonnet-4-6.',
93
- },
94
- 'claude-haiku-4-5-20251001': {
95
- family: 'claude-haiku',
96
- provider: 'anthropic',
97
- capabilities: {
98
- supportsTools: true,
99
- supportsSystemPrompt: true,
100
- supportsReasoning: false,
101
- supportsThinking: true,
102
- maxContextWindow: 200000,
103
- supportsStreaming: true,
104
- },
105
- defaults: { maxTokens: 16384, temperature: 0.2 },
106
- benchmarkStatus: 'working',
107
- benchmarkNotes: 'Fast and cheap. Struggles with multi-step reasoning.',
108
- },
109
-
110
- // ── OpenAI: GPT ──
111
- 'gpt-4o': {
112
- family: 'gpt-4o',
113
- provider: 'openai',
114
- capabilities: {
115
- supportsTools: true,
116
- supportsSystemPrompt: true,
117
- supportsReasoning: false,
118
- supportsThinking: true,
119
- maxContextWindow: 128000,
120
- supportsStreaming: true,
121
- },
122
- defaults: { maxTokens: 32768, temperature: 0.2 },
123
- benchmarkStatus: 'working',
124
- benchmarkNotes: 'Reliable tool use. Good all-around performer.',
125
- },
126
- 'gpt-4o-mini': {
127
- family: 'gpt-4o-mini',
128
- provider: 'openai',
129
- capabilities: {
130
- supportsTools: true,
131
- supportsSystemPrompt: true,
132
- supportsReasoning: false,
133
- supportsThinking: true,
134
- maxContextWindow: 128000,
135
- supportsStreaming: true,
136
- },
137
- defaults: { maxTokens: 32768, temperature: 0.2 },
138
- benchmarkStatus: 'working',
139
- benchmarkNotes: 'Fast and cheap. Acceptable for simple scenarios.',
140
- },
141
- 'gpt-4.1': {
142
- family: 'gpt-4.1',
143
- provider: 'openai',
144
- capabilities: {
145
- supportsTools: true,
146
- supportsSystemPrompt: true,
147
- supportsReasoning: false,
148
- supportsThinking: true,
149
- maxContextWindow: 1047576,
150
- supportsStreaming: true,
151
- },
152
- defaults: { maxTokens: 65536, temperature: 0.2 },
153
- benchmarkStatus: 'working',
154
- benchmarkNotes: 'Large context window. Strong at complex scenarios.',
155
- },
156
-
157
- 'gpt-5.1': {
158
- family: 'gpt-5.1',
159
- provider: 'openai',
160
- capabilities: {
161
- supportsTools: true,
162
- supportsSystemPrompt: true,
163
- supportsReasoning: false,
164
- maxContextWindow: 1047576,
165
- supportsStreaming: true,
166
- },
167
- defaults: { maxTokens: 32768 },
168
- benchmarkStatus: 'untested',
169
- },
170
-
171
- // ── OpenAI: Reasoning ──
172
- 'o1': {
173
- family: 'o1',
174
- provider: 'openai',
175
- capabilities: {
176
- supportsTools: true,
177
- supportsSystemPrompt: false,
178
- supportsReasoning: true,
179
- supportsThinking: true,
180
- maxContextWindow: 200000,
181
- supportsStreaming: false,
182
- },
183
- defaults: { maxTokens: 65536, reasoningEffort: 'medium' },
184
- benchmarkStatus: 'degraded',
185
- benchmarkNotes: 'No system prompt support. Tool calling works but slow.',
186
- },
187
- 'o1-mini': {
188
- family: 'o1-mini',
189
- provider: 'openai',
190
- capabilities: {
191
- supportsTools: true,
192
- supportsSystemPrompt: false,
193
- supportsReasoning: true,
194
- supportsThinking: true,
195
- maxContextWindow: 128000,
196
- supportsStreaming: false,
197
- },
198
- defaults: { maxTokens: 32768, reasoningEffort: 'medium' },
199
- benchmarkStatus: 'degraded',
200
- benchmarkNotes: 'No system prompt support. Cheaper but less reliable.',
201
- },
202
- 'o1-preview': {
203
- family: 'o1',
204
- provider: 'openai',
205
- capabilities: {
206
- supportsTools: false,
207
- supportsSystemPrompt: false,
208
- supportsReasoning: true,
209
- supportsThinking: true,
210
- maxContextWindow: 128000,
211
- supportsStreaming: false,
212
- },
213
- defaults: { maxTokens: 65536, reasoningEffort: 'medium' },
214
- benchmarkStatus: 'broken',
215
- benchmarkNotes: 'No tool calling support. Cannot complete agentic tasks.',
216
- },
217
- 'o3-mini': {
218
- family: 'o3-mini',
219
- provider: 'openai',
220
- capabilities: {
221
- supportsTools: true,
222
- supportsSystemPrompt: false,
223
- supportsReasoning: true,
224
- supportsThinking: true,
225
- maxContextWindow: 200000,
226
- supportsStreaming: false,
227
- },
228
- defaults: { maxTokens: 32768, reasoningEffort: 'medium' },
229
- benchmarkStatus: 'working',
230
- benchmarkNotes: 'Good reasoning, fast. No system prompt — task in user message.',
231
- },
232
- 'o4-mini': {
233
- family: 'o4-mini',
234
- provider: 'openai',
235
- capabilities: {
236
- supportsTools: true,
237
- supportsSystemPrompt: false,
238
- supportsReasoning: true,
239
- supportsThinking: true,
240
- maxContextWindow: 200000,
241
- supportsStreaming: false,
242
- },
243
- defaults: { maxTokens: 32768, reasoningEffort: 'medium' },
244
- benchmarkStatus: 'untested',
245
- },
246
-
247
- // ── Gemini ──
248
- 'gemini-2.0-flash': {
249
- family: 'gemini-flash',
250
- provider: 'gemini',
251
- capabilities: {
252
- supportsTools: true,
253
- supportsSystemPrompt: true,
254
- supportsReasoning: false,
255
- supportsThinking: true,
256
- maxContextWindow: 1048576,
257
- supportsStreaming: true,
258
- },
259
- defaults: { maxTokens: 16384, temperature: 0.2 },
260
- benchmarkStatus: 'untested',
261
- },
262
- 'gemini-2.5-pro': {
263
- family: 'gemini-pro',
264
- provider: 'gemini',
265
- capabilities: {
266
- supportsTools: true,
267
- supportsSystemPrompt: true,
268
- supportsReasoning: true,
269
- supportsThinking: true,
270
- maxContextWindow: 1048576,
271
- supportsStreaming: true,
272
- },
273
- defaults: { maxTokens: 32768, temperature: 0.2 },
274
- benchmarkStatus: 'untested',
275
- },
276
- 'gemini-2.5-flash': {
277
- family: 'gemini-flash',
278
- provider: 'gemini',
279
- capabilities: {
280
- supportsTools: true,
281
- supportsSystemPrompt: true,
282
- supportsReasoning: true,
283
- supportsThinking: true,
284
- maxContextWindow: 1048576,
285
- supportsStreaming: true,
286
- },
287
- defaults: { maxTokens: 16384, temperature: 0.2 },
288
- benchmarkStatus: 'untested',
289
- },
290
-
291
- // ── Gemini 3.x ──
292
- 'gemini-3.0-pro': {
293
- family: 'gemini-pro',
294
- provider: 'gemini',
295
- capabilities: {
296
- supportsTools: true,
297
- supportsSystemPrompt: true,
298
- supportsReasoning: true,
299
- supportsThinking: true,
300
- maxContextWindow: 2097152,
301
- supportsStreaming: true,
302
- },
303
- defaults: { maxTokens: 65536, temperature: 0.2 },
304
- benchmarkStatus: 'untested',
305
- },
306
- 'gemini-3.0-flash': {
307
- family: 'gemini-flash',
308
- provider: 'gemini',
309
- capabilities: {
310
- supportsTools: true,
311
- supportsSystemPrompt: true,
312
- supportsReasoning: true,
313
- supportsThinking: true,
314
- maxContextWindow: 2097152,
315
- supportsStreaming: true,
316
- },
317
- defaults: { maxTokens: 32768, temperature: 0.2 },
318
- benchmarkStatus: 'untested',
319
- },
320
- };
321
-
322
- // ── Family defaults ─────────────────────────────────────────────────
323
-
324
- /** @type {Record<string, ModelConfig>} */
325
- const FAMILY_DEFAULTS = {
326
- 'claude-opus': { maxTokens: 32768, temperature: 0.2 },
327
- 'claude-sonnet': { maxTokens: 32768, temperature: 0.2 },
328
- 'claude-haiku': { maxTokens: 16384, temperature: 0.2 },
329
- 'gpt-4o': { maxTokens: 32768, temperature: 0.2 },
330
- 'gpt-4o-mini': { maxTokens: 32768, temperature: 0.2 },
331
- 'gpt-4.1': { maxTokens: 65536, temperature: 0.2 },
332
- 'gpt-5.1': { maxTokens: 32768 },
333
- 'o1': { maxTokens: 65536, reasoningEffort: 'medium' },
334
- 'o1-mini': { maxTokens: 32768, reasoningEffort: 'medium' },
335
- 'o3-mini': { maxTokens: 32768, reasoningEffort: 'medium' },
336
- 'o4-mini': { maxTokens: 32768, reasoningEffort: 'medium' },
337
- 'gemini-flash': { maxTokens: 16384, temperature: 0.2 },
338
- 'gemini-pro': { maxTokens: 32768, temperature: 0.2 },
339
- };
340
-
341
- /** @type {ModelConfig} */
342
- const GLOBAL_DEFAULTS = {
343
- maxTokens: 32768,
344
- temperature: 0.2,
345
- };
346
-
347
- // ── Lookup functions ────────────────────────────────────────────────
348
-
349
- /**
350
- * Get the model info from the registry.
351
- * Returns null for unknown models.
352
- * @param {string} model
353
- * @returns {ModelInfo | null}
354
- */
355
- export function getModelInfo(model) {
356
- return MODEL_REGISTRY[model] ?? null;
357
- }
358
-
359
- /**
360
- * Get the capabilities of a model.
361
- * Returns sensible defaults for unknown models.
362
- * @param {string} model
363
- * @returns {ModelCapabilities}
364
- */
365
- export function getModelCapabilities(model) {
366
- const info = MODEL_REGISTRY[model];
367
- if (info) return info.capabilities;
368
-
369
- // Sensible defaults for unknown models — assume thinking is supported
370
- return {
371
- supportsTools: true,
372
- supportsSystemPrompt: true,
373
- supportsReasoning: false,
374
- supportsThinking: true,
375
- maxContextWindow: 128000,
376
- supportsStreaming: true,
377
- };
378
- }
379
-
380
- /**
381
- * Detect the model family from the model name.
382
- * Tries exact registry lookup first, then prefix matching.
383
- * @param {string} model
384
- * @returns {string | null}
385
- */
386
- export function detectModelFamily(model) {
387
- const normalized = String(model ?? '').toLowerCase();
388
- const info = MODEL_REGISTRY[normalized];
389
- if (info) return info.family;
390
-
391
- // Prefix-based heuristic for unregistered models
392
- if (normalized.startsWith('claude-opus') || normalized.startsWith('opus-')) return 'claude-opus';
393
- if (normalized.startsWith('claude-sonnet') || normalized.startsWith('sonnet-')) return 'claude-sonnet';
394
- if (normalized.startsWith('claude-haiku') || normalized.startsWith('haiku-')) return 'claude-haiku';
395
- if (normalized.startsWith('gpt-4o-mini')) return 'gpt-4o-mini';
396
- if (normalized.startsWith('gpt-4o')) return 'gpt-4o';
397
- if (normalized.startsWith('gpt-4.1')) return 'gpt-4.1';
398
- if (normalized.startsWith('gpt-5')) return 'gpt-5.1';
399
- if (normalized.startsWith('gpt-4')) return 'gpt-4o'; // assume 4o-class
400
- if (normalized.startsWith('o1-mini')) return 'o1-mini';
401
- if (normalized.startsWith('o1')) return 'o1';
402
- if (normalized.startsWith('o3-mini')) return 'o3-mini';
403
- if (normalized.startsWith('o4-mini')) return 'o4-mini';
404
- if (normalized.startsWith('gemini') && normalized.includes('pro')) return 'gemini-pro';
405
- if (normalized.startsWith('gemini') && normalized.includes('flash')) return 'gemini-flash';
406
-
407
- return null;
408
- }
409
-
410
- // ── Config merge ────────────────────────────────────────────────────
411
-
412
- /**
413
- * Parse env var overrides for model config.
414
- * Only returns fields that are explicitly set.
415
- * @returns {Partial<ModelConfig>}
416
- */
417
- function getEnvOverrides() {
418
- /** @type {Partial<ModelConfig>} */
419
- const overrides = {};
420
-
421
- const maxTokens = process.env['ARCHAL_MAX_TOKENS'];
422
- if (maxTokens !== undefined && maxTokens !== '') {
423
- const parsed = parseInt(maxTokens, 10);
424
- if (!Number.isNaN(parsed) && parsed > 0) {
425
- overrides.maxTokens = parsed;
426
- }
427
- }
428
-
429
- const temperature = process.env['ARCHAL_TEMPERATURE'];
430
- if (temperature !== undefined && temperature !== '') {
431
- const parsed = parseFloat(temperature);
432
- if (!Number.isNaN(parsed) && parsed >= 0 && parsed <= 2) {
433
- overrides.temperature = parsed;
434
- }
435
- }
436
-
437
- const reasoning = process.env['ARCHAL_REASONING_EFFORT'];
438
- if (reasoning !== undefined && reasoning !== '') {
439
- if (['low', 'medium', 'high'].includes(reasoning.toLowerCase())) {
440
- overrides.reasoningEffort = reasoning.toLowerCase();
441
- }
442
- }
443
-
444
- return overrides;
445
- }
446
-
447
- /**
448
- * Get the merged configuration for a model.
449
- * Priority: env var overrides > model-specific defaults > family defaults > global defaults.
450
- *
451
- * @param {string} model - Model identifier
452
- * @returns {ModelConfig}
453
- */
454
- export function getModelConfig(model) {
455
- const family = detectModelFamily(model);
456
- const familyDefaults = family ? (FAMILY_DEFAULTS[family] ?? {}) : {};
457
- const modelDefaults = MODEL_REGISTRY[model]?.defaults ?? {};
458
- const envOverrides = getEnvOverrides();
459
-
460
- return {
461
- ...GLOBAL_DEFAULTS,
462
- ...familyDefaults,
463
- ...modelDefaults,
464
- ...envOverrides,
465
- };
466
- }
467
-
468
- /**
469
- * Check if a model is a reasoning model (o1, o3, o4 series).
470
- * Reasoning models don't support temperature and use reasoning_effort instead.
471
- * @param {string} model
472
- * @returns {boolean}
473
- */
474
- export function isReasoningModel(model) {
475
- const info = MODEL_REGISTRY[model];
476
- if (info) return info.capabilities.supportsReasoning;
477
- // Fallback heuristic
478
- return /^o[134]-/.test(model);
479
- }
480
-
481
- /**
482
- * Check if a model supports extended thinking (Anthropic thinking blocks, Gemini thinking parts).
483
- * @param {string} model
484
- * @returns {boolean}
485
- */
486
- export function isThinkingModel(model) {
487
- const normalized = String(model ?? '').toLowerCase();
488
- const info = MODEL_REGISTRY[normalized];
489
- if (info) return info.capabilities.supportsThinking;
490
- // Heuristic for unregistered models — most modern models support thinking
491
- if (
492
- normalized.startsWith('claude-')
493
- || normalized.startsWith('sonnet-')
494
- || normalized.startsWith('haiku-')
495
- || normalized.startsWith('opus-')
496
- ) return true;
497
- if (normalized.startsWith('gemini-2.5') || normalized.startsWith('gemini-3')) return true;
498
- if (normalized.startsWith('gpt-') || /^o[134]/.test(normalized)) return true;
499
- return true; // default to true for unknown models
500
- }
501
-
502
- /**
503
- * Get all known model names.
504
- * @returns {string[]}
505
- */
506
- export function listKnownModels() {
507
- return Object.keys(MODEL_REGISTRY);
508
- }
509
-
510
- /**
511
- * Get all known models grouped by benchmark status.
512
- * @returns {Record<BenchmarkStatus, string[]>}
513
- */
514
- export function listModelsByStatus() {
515
- /** @type {Record<string, string[]>} */
516
- const grouped = { working: [], degraded: [], broken: [], untested: [] };
517
- for (const [name, info] of Object.entries(MODEL_REGISTRY)) {
518
- grouped[info.benchmarkStatus].push(name);
519
- }
520
- return grouped;
521
- }
@@ -1,39 +0,0 @@
1
- /**
2
- * Shared provider detection and LLM calling for bundled harnesses.
3
- *
4
- * This is a thin re-export facade. Implementation lives in:
5
- * - llm-config.mjs — provider detection, API key/base URL, timeout, thinking budget
6
- * - llm-call.mjs — callLlm, callLlmWithMessages, LlmApiError, withRetry
7
- * - llm-response.mjs — response parsing, message building, tool formatting
8
- */
9
-
10
- // ── Config ──────────────────────────────────────────────────────────
11
- export {
12
- detectProvider,
13
- resolveApiKey,
14
- resolveBaseUrl,
15
- isThinkingEnabled,
16
- } from './llm-config.mjs';
17
-
18
- // ── Calling ─────────────────────────────────────────────────────────
19
- export {
20
- callLlm,
21
- callLlmWithMessages,
22
- LlmApiError,
23
- withRetry,
24
- } from './llm-call.mjs';
25
-
26
- // ── Response parsing & message building ─────────────────────────────
27
- export {
28
- extractTokenUsage,
29
- formatToolsForProvider,
30
- parseToolCalls,
31
- getResponseText,
32
- getThinkingContent,
33
- getStopReason,
34
- buildInitialMessages,
35
- appendAssistantResponse,
36
- appendToolResults,
37
- appendUserInstruction,
38
- extractCallArgs,
39
- } from './llm-response.mjs';