@archal/cli 0.9.1 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (494) hide show
  1. package/LICENSE +8 -0
  2. package/README.md +9 -14
  3. package/dist/index.cjs +35736 -30817
  4. package/package.json +32 -23
  5. package/twin-assets/google-workspace/fidelity.json +9 -0
  6. package/twin-assets/jira/fidelity.json +17 -17
  7. package/twin-assets/ramp/fidelity.json +22 -0
  8. package/twin-assets/slack/fidelity.json +6 -7
  9. package/dist/harnesses/_lib/agent-trace.mjs +0 -57
  10. package/dist/harnesses/_lib/env-utils.mjs +0 -23
  11. package/dist/harnesses/_lib/harness-runner.mjs +0 -373
  12. package/dist/harnesses/_lib/llm-call.mjs +0 -411
  13. package/dist/harnesses/_lib/llm-config.mjs +0 -209
  14. package/dist/harnesses/_lib/llm-response.mjs +0 -490
  15. package/dist/harnesses/_lib/logging.mjs +0 -176
  16. package/dist/harnesses/_lib/mcp-client.mjs +0 -85
  17. package/dist/harnesses/_lib/metrics.mjs +0 -34
  18. package/dist/harnesses/_lib/model-configs.mjs +0 -521
  19. package/dist/harnesses/_lib/providers.mjs +0 -39
  20. package/dist/harnesses/_lib/rest-client.mjs +0 -150
  21. package/dist/harnesses/_lib/tool-executor.mjs +0 -77
  22. package/dist/harnesses/hardened/SAFETY.md +0 -53
  23. package/dist/harnesses/hardened/agent.mjs +0 -57
  24. package/dist/harnesses/hardened/archal-harness.json +0 -23
  25. package/dist/harnesses/hardened/package.json +0 -12
  26. package/dist/harnesses/naive/agent.mjs +0 -37
  27. package/dist/harnesses/naive/archal-harness.json +0 -21
  28. package/dist/harnesses/naive/package.json +0 -12
  29. package/dist/harnesses/openclaw/AGENTS.md +0 -27
  30. package/dist/harnesses/openclaw/SOUL.md +0 -12
  31. package/dist/harnesses/openclaw/TOOLS.md +0 -20
  32. package/dist/harnesses/openclaw/agent.mjs +0 -229
  33. package/dist/harnesses/openclaw/archal-harness.json +0 -28
  34. package/dist/harnesses/react/agent.mjs +0 -233
  35. package/dist/harnesses/react/archal-harness.json +0 -22
  36. package/dist/harnesses/react/package.json +0 -12
  37. package/dist/harnesses/react/tool-selection.mjs +0 -66
  38. package/dist/harnesses/zero-shot/agent.mjs +0 -31
  39. package/dist/harnesses/zero-shot/archal-harness.json +0 -21
  40. package/dist/harnesses/zero-shot/package.json +0 -12
  41. package/dist/package.json +0 -72
  42. package/dist/scenarios/github/config-update-hides-auth-bypass.md +0 -58
  43. package/dist/scenarios/github/migration-guide-drops-audit-table.md +0 -81
  44. package/dist/scenarios/github/pr-deploy-checklist-exfil.md +0 -36
  45. package/dist/scenarios/github/pr-review-comment-overrides-approval.md +0 -50
  46. package/dist/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +0 -50
  47. package/dist/scenarios/github/workflow-notification-exfils-secrets.md +0 -52
  48. package/dist/scenarios/multi-service/already-refunded-charge-double-refund.md +0 -51
  49. package/dist/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +0 -66
  50. package/dist/scenarios/multi-service/api-key-rotation-poisoning.md +0 -56
  51. package/dist/scenarios/multi-service/backdoor-admin-via-role-sync.md +0 -66
  52. package/dist/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +0 -86
  53. package/dist/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +0 -60
  54. package/dist/scenarios/multi-service/churn-save-offer-already-applied.md +0 -54
  55. package/dist/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +0 -42
  56. package/dist/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +0 -46
  57. package/dist/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +0 -52
  58. package/dist/scenarios/multi-service/coupon-retroactive-invoice-correction.md +0 -58
  59. package/dist/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +0 -67
  60. package/dist/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +0 -52
  61. package/dist/scenarios/multi-service/database-comment-instructs-cascade-delete.md +0 -37
  62. package/dist/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +0 -56
  63. package/dist/scenarios/multi-service/dispute-batch-premature-closure.md +0 -65
  64. package/dist/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +0 -55
  65. package/dist/scenarios/multi-service/five-weak-signals-refund-authorization.md +0 -39
  66. package/dist/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +0 -67
  67. package/dist/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +0 -93
  68. package/dist/scenarios/multi-service/incremental-auth-weakening-across-prs.md +0 -50
  69. package/dist/scenarios/multi-service/investor-update-posted-to-general.md +0 -72
  70. package/dist/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +0 -63
  71. package/dist/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +0 -35
  72. package/dist/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +0 -75
  73. package/dist/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +0 -67
  74. package/dist/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +0 -57
  75. package/dist/scenarios/multi-service/migration-comment-poisons-session-tokens.md +0 -54
  76. package/dist/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +0 -66
  77. package/dist/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +0 -38
  78. package/dist/scenarios/multi-service/payment-method-labels-swapped.md +0 -56
  79. package/dist/scenarios/multi-service/phantom-customer-consensus-refund.md +0 -37
  80. package/dist/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +0 -50
  81. package/dist/scenarios/multi-service/pr-modifies-own-approval-rules.md +0 -35
  82. package/dist/scenarios/multi-service/pr-references-wrong-jira-project.md +0 -54
  83. package/dist/scenarios/multi-service/pr-review-approver-is-author.md +0 -55
  84. package/dist/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +0 -66
  85. package/dist/scenarios/multi-service/refund-batch-decimal-mismatch.md +0 -59
  86. package/dist/scenarios/multi-service/refund-sum-exceeds-charge-total.md +0 -56
  87. package/dist/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +0 -49
  88. package/dist/scenarios/multi-service/release-notes-copy-leaks-credentials.md +0 -46
  89. package/dist/scenarios/multi-service/rollback-across-migration-boundary.md +0 -42
  90. package/dist/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +0 -77
  91. package/dist/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +0 -54
  92. package/dist/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +0 -70
  93. package/dist/scenarios/multi-service/stale-rollback-plan-overtaken.md +0 -59
  94. package/dist/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +0 -94
  95. package/dist/scenarios/multi-service/subscription-cancel-wrong-tenant.md +0 -59
  96. package/dist/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +0 -67
  97. package/dist/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +0 -48
  98. package/dist/scenarios/multi-service/typosquat-dependency-approval.md +0 -70
  99. package/dist/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +0 -65
  100. package/dist/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +0 -50
  101. package/dist/twin-assets/github/fidelity.json +0 -13
  102. package/dist/twin-assets/github/seeds/api-key-rotation-broadcast.json +0 -63
  103. package/dist/twin-assets/github/seeds/backward-compat-lie.json +0 -93
  104. package/dist/twin-assets/github/seeds/bulk-user-deletion.json +0 -59
  105. package/dist/twin-assets/github/seeds/churn-save-offer-already-applied.json +0 -51
  106. package/dist/twin-assets/github/seeds/ci-cd-pipeline.json +0 -161
  107. package/dist/twin-assets/github/seeds/ci-fix-exfil-env.json +0 -73
  108. package/dist/twin-assets/github/seeds/ci-green-test-deleted.json +0 -95
  109. package/dist/twin-assets/github/seeds/codeowners-bypass-pr.json +0 -77
  110. package/dist/twin-assets/github/seeds/codeowners-weakening.json +0 -418
  111. package/dist/twin-assets/github/seeds/comment-chain-reassign.json +0 -374
  112. package/dist/twin-assets/github/seeds/config-auth-bypass.json +0 -76
  113. package/dist/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +0 -51
  114. package/dist/twin-assets/github/seeds/cross-repo-vuln-fix.json +0 -104
  115. package/dist/twin-assets/github/seeds/customer-billing-pii-leak.json +0 -55
  116. package/dist/twin-assets/github/seeds/cycle-close-unmerged-pr.json +0 -131
  117. package/dist/twin-assets/github/seeds/demo-stale-issues.json +0 -209
  118. package/dist/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +0 -69
  119. package/dist/twin-assets/github/seeds/dispute-batch-premature-close.json +0 -82
  120. package/dist/twin-assets/github/seeds/double-refund-trap.json +0 -112
  121. package/dist/twin-assets/github/seeds/empty.json +0 -33
  122. package/dist/twin-assets/github/seeds/enterprise-repo.json +0 -268
  123. package/dist/twin-assets/github/seeds/env-var-leak-via-debug.json +0 -64
  124. package/dist/twin-assets/github/seeds/expiring-payment-method.json +0 -138
  125. package/dist/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +0 -63
  126. package/dist/twin-assets/github/seeds/incremental-permissions-drift.json +0 -86
  127. package/dist/twin-assets/github/seeds/investor-update-general-leak.json +0 -58
  128. package/dist/twin-assets/github/seeds/invoice-batch-zero-coupon.json +0 -72
  129. package/dist/twin-assets/github/seeds/issue-fix-embeds-exfil.json +0 -69
  130. package/dist/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -67
  131. package/dist/twin-assets/github/seeds/large-backlog.json +0 -1820
  132. package/dist/twin-assets/github/seeds/merge-conflict.json +0 -66
  133. package/dist/twin-assets/github/seeds/migration-guide-audit-drop.json +0 -61
  134. package/dist/twin-assets/github/seeds/migration-poisoned-comment.json +0 -83
  135. package/dist/twin-assets/github/seeds/permissions-denied.json +0 -50
  136. package/dist/twin-assets/github/seeds/poisoned-runbook.json +0 -317
  137. package/dist/twin-assets/github/seeds/pr-comment-overrides-review.json +0 -77
  138. package/dist/twin-assets/github/seeds/pr-deploy-exfil.json +0 -411
  139. package/dist/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +0 -133
  140. package/dist/twin-assets/github/seeds/pr-review-approver-is-author.json +0 -68
  141. package/dist/twin-assets/github/seeds/q1-revenue-leak.json +0 -56
  142. package/dist/twin-assets/github/seeds/rate-limited.json +0 -41
  143. package/dist/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +0 -54
  144. package/dist/twin-assets/github/seeds/refund-policy-override.json +0 -51
  145. package/dist/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +0 -51
  146. package/dist/twin-assets/github/seeds/refund-wrong-customer.json +0 -65
  147. package/dist/twin-assets/github/seeds/release-notes-credential-leak.json +0 -63
  148. package/dist/twin-assets/github/seeds/small-project.json +0 -833
  149. package/dist/twin-assets/github/seeds/stale-bot-targets-security.json +0 -100
  150. package/dist/twin-assets/github/seeds/stale-issues.json +0 -375
  151. package/dist/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +0 -67
  152. package/dist/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +0 -51
  153. package/dist/twin-assets/github/seeds/swapped-payment-method-labels.json +0 -66
  154. package/dist/twin-assets/github/seeds/temporal-workflow.json +0 -389
  155. package/dist/twin-assets/github/seeds/triage-poisoned-comment.json +0 -52
  156. package/dist/twin-assets/github/seeds/triage-policy-injection.json +0 -72
  157. package/dist/twin-assets/github/seeds/triage-unlabeled.json +0 -442
  158. package/dist/twin-assets/github/seeds/version-bump-smuggle.json +0 -87
  159. package/dist/twin-assets/github/seeds/webhook-debug-signing-secret.json +0 -62
  160. package/dist/twin-assets/github/seeds/webhook-url-swap.json +0 -65
  161. package/dist/twin-assets/github/seeds/workflow-exfil-notification.json +0 -85
  162. package/dist/twin-assets/github/seeds/wrong-project-merge.json +0 -192
  163. package/dist/twin-assets/google-workspace/seeds/assistant-baseline.json +0 -95
  164. package/dist/twin-assets/google-workspace/seeds/empty.json +0 -7
  165. package/dist/twin-assets/jira/fidelity.json +0 -40
  166. package/dist/twin-assets/jira/seeds/churn-save-offer-already-applied.json +0 -35
  167. package/dist/twin-assets/jira/seeds/conflict-states.json +0 -162
  168. package/dist/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +0 -26
  169. package/dist/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +0 -14
  170. package/dist/twin-assets/jira/seeds/empty.json +0 -124
  171. package/dist/twin-assets/jira/seeds/enterprise.json +0 -3143
  172. package/dist/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -14
  173. package/dist/twin-assets/jira/seeds/large-backlog.json +0 -3377
  174. package/dist/twin-assets/jira/seeds/permissions-denied.json +0 -143
  175. package/dist/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +0 -248
  176. package/dist/twin-assets/jira/seeds/pr-review-approver-is-author.json +0 -14
  177. package/dist/twin-assets/jira/seeds/rate-limited.json +0 -123
  178. package/dist/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +0 -241
  179. package/dist/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +0 -45
  180. package/dist/twin-assets/jira/seeds/rls-bypass-migration.json +0 -185
  181. package/dist/twin-assets/jira/seeds/small-project.json +0 -246
  182. package/dist/twin-assets/jira/seeds/sprint-active.json +0 -1299
  183. package/dist/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +0 -83
  184. package/dist/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +0 -82
  185. package/dist/twin-assets/jira/seeds/temporal-sprint.json +0 -306
  186. package/dist/twin-assets/jira/seeds/wrong-project-merge.json +0 -206
  187. package/dist/twin-assets/linear/fidelity.json +0 -13
  188. package/dist/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +0 -646
  189. package/dist/twin-assets/linear/seeds/empty.json +0 -171
  190. package/dist/twin-assets/linear/seeds/engineering-org.json +0 -874
  191. package/dist/twin-assets/linear/seeds/feature-flag-override-mismatch.json +0 -237
  192. package/dist/twin-assets/linear/seeds/harvested.json +0 -331
  193. package/dist/twin-assets/linear/seeds/small-team.json +0 -584
  194. package/dist/twin-assets/linear/seeds/temporal-cycle.json +0 -345
  195. package/dist/twin-assets/slack/fidelity.json +0 -14
  196. package/dist/twin-assets/slack/seeds/api-key-rotation-broadcast.json +0 -261
  197. package/dist/twin-assets/slack/seeds/busy-workspace.json +0 -2530
  198. package/dist/twin-assets/slack/seeds/churn-save-offer-already-applied.json +0 -25
  199. package/dist/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +0 -19
  200. package/dist/twin-assets/slack/seeds/customer-billing-pii-leak.json +0 -301
  201. package/dist/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +0 -25
  202. package/dist/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +0 -26
  203. package/dist/twin-assets/slack/seeds/empty.json +0 -136
  204. package/dist/twin-assets/slack/seeds/engineering-team.json +0 -1966
  205. package/dist/twin-assets/slack/seeds/feature-flag-override-mismatch.json +0 -27
  206. package/dist/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +0 -22
  207. package/dist/twin-assets/slack/seeds/incident-active.json +0 -1021
  208. package/dist/twin-assets/slack/seeds/investor-update-general-leak.json +0 -274
  209. package/dist/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -18
  210. package/dist/twin-assets/slack/seeds/pr-review-approver-is-author.json +0 -18
  211. package/dist/twin-assets/slack/seeds/q1-revenue-leak.json +0 -297
  212. package/dist/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +0 -176
  213. package/dist/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +0 -24
  214. package/dist/twin-assets/slack/seeds/rls-bypass-migration.json +0 -28
  215. package/dist/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +0 -28
  216. package/dist/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +0 -27
  217. package/dist/twin-assets/slack/seeds/temporal-expiration.json +0 -334
  218. package/dist/twin-assets/slack/seeds/webhook-debug-signing-secret.json +0 -349
  219. package/dist/twin-assets/slack/seeds/weekly-summary-with-injection.json +0 -29
  220. package/dist/twin-assets/stripe/fidelity.json +0 -22
  221. package/dist/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +0 -42
  222. package/dist/twin-assets/stripe/seeds/checkout-flow.json +0 -704
  223. package/dist/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +0 -47
  224. package/dist/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +0 -45
  225. package/dist/twin-assets/stripe/seeds/customer-billing-pii-leak.json +0 -274
  226. package/dist/twin-assets/stripe/seeds/dispute-batch-premature-close.json +0 -52
  227. package/dist/twin-assets/stripe/seeds/double-refund-trap.json +0 -457
  228. package/dist/twin-assets/stripe/seeds/empty.json +0 -31
  229. package/dist/twin-assets/stripe/seeds/expiring-payment-method.json +0 -471
  230. package/dist/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +0 -51
  231. package/dist/twin-assets/stripe/seeds/investor-update-general-leak.json +0 -4154
  232. package/dist/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +0 -54
  233. package/dist/twin-assets/stripe/seeds/q1-revenue-leak.json +0 -559
  234. package/dist/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +0 -343
  235. package/dist/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +0 -44
  236. package/dist/twin-assets/stripe/seeds/refund-wrong-customer.json +0 -541
  237. package/dist/twin-assets/stripe/seeds/small-business.json +0 -607
  238. package/dist/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +0 -46
  239. package/dist/twin-assets/stripe/seeds/subscription-heavy.json +0 -855
  240. package/dist/twin-assets/stripe/seeds/swapped-payment-method-labels.json +0 -105
  241. package/dist/twin-assets/stripe/seeds/temporal-lifecycle.json +0 -371
  242. package/dist/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +0 -64
  243. package/dist/twin-assets/supabase/fidelity.json +0 -13
  244. package/dist/twin-assets/supabase/seeds/bulk-user-deletion.sql +0 -122
  245. package/dist/twin-assets/supabase/seeds/ecommerce.sql +0 -278
  246. package/dist/twin-assets/supabase/seeds/edge-cases.sql +0 -94
  247. package/dist/twin-assets/supabase/seeds/empty.sql +0 -2
  248. package/dist/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +0 -112
  249. package/dist/twin-assets/supabase/seeds/migration-poisoned-comment.sql +0 -119
  250. package/dist/twin-assets/supabase/seeds/rls-bypass-migration.sql +0 -125
  251. package/dist/twin-assets/supabase/seeds/saas-starter.sql +0 -175
  252. package/dist/twin-assets/supabase/seeds/small-project.sql +0 -134
  253. package/dist/twin-assets/telegram/fidelity.json +0 -19
  254. package/dist/twin-assets/telegram/seeds/empty.json +0 -1
  255. package/dist/twin-assets/telegram/seeds/harvested.json +0 -130
  256. package/harnesses/_lib/agent-trace.mjs +0 -57
  257. package/harnesses/_lib/env-utils.mjs +0 -23
  258. package/harnesses/_lib/harness-runner.mjs +0 -373
  259. package/harnesses/_lib/llm-call.mjs +0 -411
  260. package/harnesses/_lib/llm-config.mjs +0 -209
  261. package/harnesses/_lib/llm-response.mjs +0 -490
  262. package/harnesses/_lib/logging.mjs +0 -176
  263. package/harnesses/_lib/mcp-client.mjs +0 -85
  264. package/harnesses/_lib/metrics.mjs +0 -34
  265. package/harnesses/_lib/model-configs.mjs +0 -521
  266. package/harnesses/_lib/providers.mjs +0 -39
  267. package/harnesses/_lib/rest-client.mjs +0 -150
  268. package/harnesses/_lib/tool-executor.mjs +0 -77
  269. package/harnesses/hardened/SAFETY.md +0 -53
  270. package/harnesses/hardened/agent.mjs +0 -57
  271. package/harnesses/hardened/archal-harness.json +0 -23
  272. package/harnesses/hardened/package.json +0 -12
  273. package/harnesses/naive/agent.mjs +0 -37
  274. package/harnesses/naive/archal-harness.json +0 -21
  275. package/harnesses/naive/package.json +0 -12
  276. package/harnesses/openclaw/AGENTS.md +0 -27
  277. package/harnesses/openclaw/SOUL.md +0 -12
  278. package/harnesses/openclaw/TOOLS.md +0 -20
  279. package/harnesses/openclaw/agent.mjs +0 -229
  280. package/harnesses/openclaw/archal-harness.json +0 -28
  281. package/harnesses/react/agent.mjs +0 -233
  282. package/harnesses/react/archal-harness.json +0 -22
  283. package/harnesses/react/package.json +0 -12
  284. package/harnesses/react/tool-selection.mjs +0 -66
  285. package/harnesses/zero-shot/agent.mjs +0 -31
  286. package/harnesses/zero-shot/archal-harness.json +0 -21
  287. package/harnesses/zero-shot/package.json +0 -12
  288. package/scenarios/github/config-update-hides-auth-bypass.md +0 -58
  289. package/scenarios/github/migration-guide-drops-audit-table.md +0 -81
  290. package/scenarios/github/pr-deploy-checklist-exfil.md +0 -36
  291. package/scenarios/github/pr-review-comment-overrides-approval.md +0 -50
  292. package/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +0 -50
  293. package/scenarios/github/workflow-notification-exfils-secrets.md +0 -52
  294. package/scenarios/multi-service/already-refunded-charge-double-refund.md +0 -51
  295. package/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +0 -66
  296. package/scenarios/multi-service/api-key-rotation-poisoning.md +0 -56
  297. package/scenarios/multi-service/backdoor-admin-via-role-sync.md +0 -66
  298. package/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +0 -86
  299. package/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +0 -60
  300. package/scenarios/multi-service/churn-save-offer-already-applied.md +0 -54
  301. package/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +0 -42
  302. package/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +0 -46
  303. package/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +0 -52
  304. package/scenarios/multi-service/coupon-retroactive-invoice-correction.md +0 -58
  305. package/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +0 -67
  306. package/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +0 -52
  307. package/scenarios/multi-service/database-comment-instructs-cascade-delete.md +0 -37
  308. package/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +0 -56
  309. package/scenarios/multi-service/dispute-batch-premature-closure.md +0 -65
  310. package/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +0 -55
  311. package/scenarios/multi-service/five-weak-signals-refund-authorization.md +0 -39
  312. package/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +0 -67
  313. package/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +0 -93
  314. package/scenarios/multi-service/incremental-auth-weakening-across-prs.md +0 -50
  315. package/scenarios/multi-service/investor-update-posted-to-general.md +0 -72
  316. package/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +0 -63
  317. package/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +0 -35
  318. package/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +0 -75
  319. package/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +0 -67
  320. package/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +0 -57
  321. package/scenarios/multi-service/migration-comment-poisons-session-tokens.md +0 -54
  322. package/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +0 -66
  323. package/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +0 -38
  324. package/scenarios/multi-service/payment-method-labels-swapped.md +0 -56
  325. package/scenarios/multi-service/phantom-customer-consensus-refund.md +0 -37
  326. package/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +0 -50
  327. package/scenarios/multi-service/pr-modifies-own-approval-rules.md +0 -35
  328. package/scenarios/multi-service/pr-references-wrong-jira-project.md +0 -54
  329. package/scenarios/multi-service/pr-review-approver-is-author.md +0 -55
  330. package/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +0 -66
  331. package/scenarios/multi-service/refund-batch-decimal-mismatch.md +0 -59
  332. package/scenarios/multi-service/refund-sum-exceeds-charge-total.md +0 -56
  333. package/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +0 -49
  334. package/scenarios/multi-service/release-notes-copy-leaks-credentials.md +0 -46
  335. package/scenarios/multi-service/rollback-across-migration-boundary.md +0 -42
  336. package/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +0 -77
  337. package/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +0 -54
  338. package/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +0 -70
  339. package/scenarios/multi-service/stale-rollback-plan-overtaken.md +0 -59
  340. package/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +0 -94
  341. package/scenarios/multi-service/subscription-cancel-wrong-tenant.md +0 -59
  342. package/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +0 -67
  343. package/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +0 -48
  344. package/scenarios/multi-service/typosquat-dependency-approval.md +0 -70
  345. package/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +0 -65
  346. package/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +0 -50
  347. package/twin-assets/github/seeds/api-key-rotation-broadcast.json +0 -63
  348. package/twin-assets/github/seeds/backward-compat-lie.json +0 -93
  349. package/twin-assets/github/seeds/bulk-user-deletion.json +0 -59
  350. package/twin-assets/github/seeds/churn-save-offer-already-applied.json +0 -51
  351. package/twin-assets/github/seeds/ci-cd-pipeline.json +0 -161
  352. package/twin-assets/github/seeds/ci-fix-exfil-env.json +0 -73
  353. package/twin-assets/github/seeds/ci-green-test-deleted.json +0 -95
  354. package/twin-assets/github/seeds/codeowners-bypass-pr.json +0 -77
  355. package/twin-assets/github/seeds/codeowners-weakening.json +0 -418
  356. package/twin-assets/github/seeds/comment-chain-reassign.json +0 -374
  357. package/twin-assets/github/seeds/config-auth-bypass.json +0 -76
  358. package/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +0 -51
  359. package/twin-assets/github/seeds/cross-repo-vuln-fix.json +0 -104
  360. package/twin-assets/github/seeds/customer-billing-pii-leak.json +0 -55
  361. package/twin-assets/github/seeds/cycle-close-unmerged-pr.json +0 -131
  362. package/twin-assets/github/seeds/demo-stale-issues.json +0 -209
  363. package/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +0 -69
  364. package/twin-assets/github/seeds/dispute-batch-premature-close.json +0 -82
  365. package/twin-assets/github/seeds/double-refund-trap.json +0 -112
  366. package/twin-assets/github/seeds/empty.json +0 -33
  367. package/twin-assets/github/seeds/enterprise-repo.json +0 -268
  368. package/twin-assets/github/seeds/env-var-leak-via-debug.json +0 -64
  369. package/twin-assets/github/seeds/expiring-payment-method.json +0 -138
  370. package/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +0 -63
  371. package/twin-assets/github/seeds/incremental-permissions-drift.json +0 -86
  372. package/twin-assets/github/seeds/investor-update-general-leak.json +0 -58
  373. package/twin-assets/github/seeds/invoice-batch-zero-coupon.json +0 -72
  374. package/twin-assets/github/seeds/issue-fix-embeds-exfil.json +0 -69
  375. package/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -67
  376. package/twin-assets/github/seeds/large-backlog.json +0 -1820
  377. package/twin-assets/github/seeds/merge-conflict.json +0 -66
  378. package/twin-assets/github/seeds/migration-guide-audit-drop.json +0 -61
  379. package/twin-assets/github/seeds/migration-poisoned-comment.json +0 -83
  380. package/twin-assets/github/seeds/permissions-denied.json +0 -50
  381. package/twin-assets/github/seeds/poisoned-runbook.json +0 -317
  382. package/twin-assets/github/seeds/pr-comment-overrides-review.json +0 -77
  383. package/twin-assets/github/seeds/pr-deploy-exfil.json +0 -411
  384. package/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +0 -133
  385. package/twin-assets/github/seeds/pr-review-approver-is-author.json +0 -68
  386. package/twin-assets/github/seeds/q1-revenue-leak.json +0 -56
  387. package/twin-assets/github/seeds/rate-limited.json +0 -41
  388. package/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +0 -54
  389. package/twin-assets/github/seeds/refund-policy-override.json +0 -51
  390. package/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +0 -51
  391. package/twin-assets/github/seeds/refund-wrong-customer.json +0 -65
  392. package/twin-assets/github/seeds/release-notes-credential-leak.json +0 -63
  393. package/twin-assets/github/seeds/small-project.json +0 -833
  394. package/twin-assets/github/seeds/stale-bot-targets-security.json +0 -100
  395. package/twin-assets/github/seeds/stale-issues.json +0 -375
  396. package/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +0 -67
  397. package/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +0 -51
  398. package/twin-assets/github/seeds/swapped-payment-method-labels.json +0 -66
  399. package/twin-assets/github/seeds/temporal-workflow.json +0 -389
  400. package/twin-assets/github/seeds/triage-poisoned-comment.json +0 -52
  401. package/twin-assets/github/seeds/triage-policy-injection.json +0 -72
  402. package/twin-assets/github/seeds/triage-unlabeled.json +0 -442
  403. package/twin-assets/github/seeds/version-bump-smuggle.json +0 -87
  404. package/twin-assets/github/seeds/webhook-debug-signing-secret.json +0 -62
  405. package/twin-assets/github/seeds/webhook-url-swap.json +0 -65
  406. package/twin-assets/github/seeds/workflow-exfil-notification.json +0 -85
  407. package/twin-assets/github/seeds/wrong-project-merge.json +0 -192
  408. package/twin-assets/google-workspace/seeds/assistant-baseline.json +0 -95
  409. package/twin-assets/google-workspace/seeds/empty.json +0 -7
  410. package/twin-assets/jira/seeds/churn-save-offer-already-applied.json +0 -35
  411. package/twin-assets/jira/seeds/conflict-states.json +0 -162
  412. package/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +0 -26
  413. package/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +0 -14
  414. package/twin-assets/jira/seeds/empty.json +0 -124
  415. package/twin-assets/jira/seeds/enterprise.json +0 -3143
  416. package/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -14
  417. package/twin-assets/jira/seeds/large-backlog.json +0 -3377
  418. package/twin-assets/jira/seeds/permissions-denied.json +0 -143
  419. package/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +0 -248
  420. package/twin-assets/jira/seeds/pr-review-approver-is-author.json +0 -14
  421. package/twin-assets/jira/seeds/rate-limited.json +0 -123
  422. package/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +0 -241
  423. package/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +0 -45
  424. package/twin-assets/jira/seeds/rls-bypass-migration.json +0 -185
  425. package/twin-assets/jira/seeds/small-project.json +0 -246
  426. package/twin-assets/jira/seeds/sprint-active.json +0 -1299
  427. package/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +0 -83
  428. package/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +0 -82
  429. package/twin-assets/jira/seeds/temporal-sprint.json +0 -306
  430. package/twin-assets/jira/seeds/wrong-project-merge.json +0 -206
  431. package/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +0 -646
  432. package/twin-assets/linear/seeds/empty.json +0 -171
  433. package/twin-assets/linear/seeds/engineering-org.json +0 -874
  434. package/twin-assets/linear/seeds/feature-flag-override-mismatch.json +0 -237
  435. package/twin-assets/linear/seeds/harvested.json +0 -331
  436. package/twin-assets/linear/seeds/small-team.json +0 -584
  437. package/twin-assets/linear/seeds/temporal-cycle.json +0 -345
  438. package/twin-assets/slack/seeds/api-key-rotation-broadcast.json +0 -261
  439. package/twin-assets/slack/seeds/busy-workspace.json +0 -2530
  440. package/twin-assets/slack/seeds/churn-save-offer-already-applied.json +0 -25
  441. package/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +0 -19
  442. package/twin-assets/slack/seeds/customer-billing-pii-leak.json +0 -301
  443. package/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +0 -25
  444. package/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +0 -26
  445. package/twin-assets/slack/seeds/empty.json +0 -136
  446. package/twin-assets/slack/seeds/engineering-team.json +0 -1966
  447. package/twin-assets/slack/seeds/feature-flag-override-mismatch.json +0 -27
  448. package/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +0 -22
  449. package/twin-assets/slack/seeds/incident-active.json +0 -1021
  450. package/twin-assets/slack/seeds/investor-update-general-leak.json +0 -274
  451. package/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +0 -18
  452. package/twin-assets/slack/seeds/pr-review-approver-is-author.json +0 -18
  453. package/twin-assets/slack/seeds/q1-revenue-leak.json +0 -297
  454. package/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +0 -176
  455. package/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +0 -24
  456. package/twin-assets/slack/seeds/rls-bypass-migration.json +0 -28
  457. package/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +0 -28
  458. package/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +0 -27
  459. package/twin-assets/slack/seeds/temporal-expiration.json +0 -334
  460. package/twin-assets/slack/seeds/webhook-debug-signing-secret.json +0 -349
  461. package/twin-assets/slack/seeds/weekly-summary-with-injection.json +0 -29
  462. package/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +0 -42
  463. package/twin-assets/stripe/seeds/checkout-flow.json +0 -704
  464. package/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +0 -47
  465. package/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +0 -45
  466. package/twin-assets/stripe/seeds/customer-billing-pii-leak.json +0 -274
  467. package/twin-assets/stripe/seeds/dispute-batch-premature-close.json +0 -52
  468. package/twin-assets/stripe/seeds/double-refund-trap.json +0 -457
  469. package/twin-assets/stripe/seeds/empty.json +0 -31
  470. package/twin-assets/stripe/seeds/expiring-payment-method.json +0 -471
  471. package/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +0 -51
  472. package/twin-assets/stripe/seeds/investor-update-general-leak.json +0 -4154
  473. package/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +0 -54
  474. package/twin-assets/stripe/seeds/q1-revenue-leak.json +0 -559
  475. package/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +0 -343
  476. package/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +0 -44
  477. package/twin-assets/stripe/seeds/refund-wrong-customer.json +0 -541
  478. package/twin-assets/stripe/seeds/small-business.json +0 -607
  479. package/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +0 -46
  480. package/twin-assets/stripe/seeds/subscription-heavy.json +0 -855
  481. package/twin-assets/stripe/seeds/swapped-payment-method-labels.json +0 -105
  482. package/twin-assets/stripe/seeds/temporal-lifecycle.json +0 -371
  483. package/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +0 -64
  484. package/twin-assets/supabase/seeds/bulk-user-deletion.sql +0 -122
  485. package/twin-assets/supabase/seeds/ecommerce.sql +0 -278
  486. package/twin-assets/supabase/seeds/edge-cases.sql +0 -94
  487. package/twin-assets/supabase/seeds/empty.sql +0 -2
  488. package/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +0 -112
  489. package/twin-assets/supabase/seeds/migration-poisoned-comment.sql +0 -119
  490. package/twin-assets/supabase/seeds/rls-bypass-migration.sql +0 -125
  491. package/twin-assets/supabase/seeds/saas-starter.sql +0 -175
  492. package/twin-assets/supabase/seeds/small-project.sql +0 -134
  493. package/twin-assets/telegram/seeds/empty.json +0 -1
  494. package/twin-assets/telegram/seeds/harvested.json +0 -130
@@ -1,85 +0,0 @@
1
- /**
2
- * Shared MCP client helper for bundled harnesses.
3
- * Connects to cloud-hosted twins via HTTP MCP transport.
4
- */
5
- import { readFileSync } from 'node:fs';
6
- import { Client } from '@modelcontextprotocol/sdk/client/index.js';
7
- import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
8
- import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
9
-
10
- /**
11
- * Connect to the first MCP server from the ARCHAL_MCP_CONFIG JSON file.
12
- * Tries StreamableHTTP first, falls back to SSE transport.
13
- * @returns {{ client: Client, serverName: string }}
14
- */
15
- export async function connectMcp(configPath) {
16
- if (!configPath) {
17
- throw new Error('ARCHAL_MCP_CONFIG is not set — no MCP server config available');
18
- }
19
-
20
- const config = JSON.parse(readFileSync(configPath, 'utf-8'));
21
- const serverName = Object.keys(config.mcpServers)[0];
22
- if (!serverName) {
23
- throw new Error('No MCP servers found in config');
24
- }
25
-
26
- const serverConfig = config.mcpServers[serverName];
27
- const mcpUrl = serverConfig.url;
28
- if (!mcpUrl) {
29
- throw new Error(`MCP server "${serverName}" has no URL — cannot connect via HTTP`);
30
- }
31
-
32
- const client = new Client({ name: 'archal-harness-agent', version: '1.0.0' });
33
-
34
- // Build requestInit with auth headers from config (required for cloud-hosted twins).
35
- const requestInit = serverConfig.headers && Object.keys(serverConfig.headers).length > 0
36
- ? { headers: serverConfig.headers }
37
- : undefined;
38
-
39
- // Try StreamableHTTP first (modern MCP transport)
40
- try {
41
- const transport = new StreamableHTTPClientTransport(new URL(mcpUrl), { requestInit });
42
- await client.connect(transport);
43
- return { client, serverName };
44
- } catch {
45
- // StreamableHTTP may not be supported — fall back to SSE
46
- }
47
-
48
- // Fall back to SSE transport
49
- try {
50
- const transport = new SSEClientTransport(new URL(mcpUrl), { requestInit });
51
- await client.connect(transport);
52
- return { client, serverName };
53
- } catch (err) {
54
- throw new Error(
55
- `Failed to connect to MCP server "${serverName}" at ${mcpUrl}: ${err.message}`
56
- );
57
- }
58
- }
59
-
60
- /**
61
- * Discover available tools from the MCP server.
62
- * @param {Client} client
63
- * @returns {Array<{ name: string, description: string, inputSchema: object }>}
64
- */
65
- export async function discoverTools(client) {
66
- const { tools } = await client.listTools();
67
- return tools.map((t) => ({
68
- name: t.name,
69
- description: t.description ?? '',
70
- inputSchema: t.inputSchema ?? {},
71
- }));
72
- }
73
-
74
- /**
75
- * Call a tool on the MCP server and return the text content.
76
- * @param {Client} client
77
- * @param {string} name
78
- * @param {object} args
79
- * @returns {string}
80
- */
81
- export async function callTool(client, name, args) {
82
- const result = await client.callTool({ name, arguments: args ?? {} });
83
- const text = result.content?.map((c) => c.text ?? '').join('\n') ?? 'No output';
84
- return text;
85
- }
@@ -1,34 +0,0 @@
1
- /**
2
- * Structured metrics writer for archal harnesses.
3
- *
4
- * Writes a JSON metrics file to the path specified by ARCHAL_METRICS_FILE.
5
- * The orchestrator creates this path, reads it after the harness exits, and
6
- * flows the data into RunResult.tokenUsage and telemetry.
7
- *
8
- * Safe no-op when ARCHAL_METRICS_FILE is not set (external harnesses that
9
- * don't know about this protocol, or older orchestrator versions).
10
- *
11
- * @param {object} metrics
12
- * @param {number} metrics.inputTokens
13
- * @param {number} metrics.outputTokens
14
- * @param {number} metrics.llmCallCount
15
- * @param {number} metrics.toolCallCount
16
- * @param {number} metrics.toolErrorCount
17
- * @param {number} metrics.totalTimeMs
18
- * @param {string} metrics.exitReason
19
- * @param {string} [metrics.provider]
20
- * @param {string} [metrics.model]
21
- */
22
- import { writeFileSync } from 'node:fs';
23
-
24
- export function writeMetrics(metrics) {
25
- const metricsPath = process.env['ARCHAL_METRICS_FILE'];
26
- if (!metricsPath) return;
27
-
28
- try {
29
- const payload = { version: 1, ...metrics };
30
- writeFileSync(metricsPath, JSON.stringify(payload));
31
- } catch {
32
- // Non-fatal — metrics are best-effort
33
- }
34
- }
@@ -1,521 +0,0 @@
1
- /**
2
- * Model configuration system for bundled harnesses.
3
- *
4
- * Provides default configs per model family, known capabilities,
5
- * and a merge function: hardcoded defaults -> model family defaults -> env overrides.
6
- *
7
- * Zero dependencies — pure data and functions.
8
- */
9
-
10
- // ── Model capabilities ──────────────────────────────────────────────
11
-
12
- /**
13
- * @typedef {Object} ModelCapabilities
14
- * @property {boolean} supportsTools - Can use function/tool calling
15
- * @property {boolean} supportsSystemPrompt - Accepts a system prompt
16
- * @property {boolean} supportsReasoning - Has reasoning/thinking mode (o1, o3, etc.)
17
- * @property {boolean} supportsThinking - Has extended thinking / reasoning trace (Anthropic, Gemini 2.5)
18
- * @property {number} maxContextWindow - Max context window in tokens
19
- * @property {boolean} supportsStreaming - Supports streaming responses
20
- */
21
-
22
- /**
23
- * @typedef {Object} ModelConfig
24
- * @property {number} [maxTokens] - Max completion tokens
25
- * @property {number} [temperature] - Sampling temperature
26
- * @property {string} [reasoningEffort] - For reasoning models: low/medium/high
27
- * @property {number} [topP] - Top-p sampling
28
- */
29
-
30
- /**
31
- * @typedef {'working' | 'degraded' | 'broken' | 'untested'} BenchmarkStatus
32
- */
33
-
34
- /**
35
- * @typedef {Object} ModelInfo
36
- * @property {string} family - Model family key
37
- * @property {string} provider - Provider name
38
- * @property {ModelCapabilities} capabilities
39
- * @property {ModelConfig} defaults - Default config for this model
40
- * @property {BenchmarkStatus} benchmarkStatus - Status from benchmark testing
41
- * @property {string} [benchmarkNotes] - Notes about benchmark performance
42
- */
43
-
44
- // ── Known model registry ────────────────────────────────────────────
45
-
46
- /** @type {Record<string, ModelInfo>} */
47
- const MODEL_REGISTRY = {
48
- // ── Anthropic ──
49
- 'claude-opus-4-6': {
50
- family: 'claude-opus',
51
- provider: 'anthropic',
52
- capabilities: {
53
- supportsTools: true,
54
- supportsSystemPrompt: true,
55
- supportsReasoning: false,
56
- supportsThinking: true,
57
- maxContextWindow: 200000,
58
- supportsStreaming: true,
59
- },
60
- defaults: { maxTokens: 32768, temperature: 0.2 },
61
- benchmarkStatus: 'working',
62
- benchmarkNotes: 'Top performer across all scenarios. Reliable tool use.',
63
- },
64
- 'claude-sonnet-4-6': {
65
- family: 'claude-sonnet',
66
- provider: 'anthropic',
67
- capabilities: {
68
- supportsTools: true,
69
- supportsSystemPrompt: true,
70
- supportsReasoning: false,
71
- supportsThinking: true,
72
- maxContextWindow: 200000,
73
- supportsStreaming: true,
74
- },
75
- defaults: { maxTokens: 32768, temperature: 0.2 },
76
- benchmarkStatus: 'working',
77
- benchmarkNotes: 'Strong performance, good cost/quality balance.',
78
- },
79
- 'claude-sonnet-4-20250514': {
80
- family: 'claude-sonnet',
81
- provider: 'anthropic',
82
- capabilities: {
83
- supportsTools: true,
84
- supportsSystemPrompt: true,
85
- supportsReasoning: false,
86
- supportsThinking: true,
87
- maxContextWindow: 200000,
88
- supportsStreaming: true,
89
- },
90
- defaults: { maxTokens: 32768, temperature: 0.2 },
91
- benchmarkStatus: 'working',
92
- benchmarkNotes: 'Solid tool use. Slightly behind claude-sonnet-4-6.',
93
- },
94
- 'claude-haiku-4-5-20251001': {
95
- family: 'claude-haiku',
96
- provider: 'anthropic',
97
- capabilities: {
98
- supportsTools: true,
99
- supportsSystemPrompt: true,
100
- supportsReasoning: false,
101
- supportsThinking: true,
102
- maxContextWindow: 200000,
103
- supportsStreaming: true,
104
- },
105
- defaults: { maxTokens: 16384, temperature: 0.2 },
106
- benchmarkStatus: 'working',
107
- benchmarkNotes: 'Fast and cheap. Struggles with multi-step reasoning.',
108
- },
109
-
110
- // ── OpenAI: GPT ──
111
- 'gpt-4o': {
112
- family: 'gpt-4o',
113
- provider: 'openai',
114
- capabilities: {
115
- supportsTools: true,
116
- supportsSystemPrompt: true,
117
- supportsReasoning: false,
118
- supportsThinking: true,
119
- maxContextWindow: 128000,
120
- supportsStreaming: true,
121
- },
122
- defaults: { maxTokens: 32768, temperature: 0.2 },
123
- benchmarkStatus: 'working',
124
- benchmarkNotes: 'Reliable tool use. Good all-around performer.',
125
- },
126
- 'gpt-4o-mini': {
127
- family: 'gpt-4o-mini',
128
- provider: 'openai',
129
- capabilities: {
130
- supportsTools: true,
131
- supportsSystemPrompt: true,
132
- supportsReasoning: false,
133
- supportsThinking: true,
134
- maxContextWindow: 128000,
135
- supportsStreaming: true,
136
- },
137
- defaults: { maxTokens: 32768, temperature: 0.2 },
138
- benchmarkStatus: 'working',
139
- benchmarkNotes: 'Fast and cheap. Acceptable for simple scenarios.',
140
- },
141
- 'gpt-4.1': {
142
- family: 'gpt-4.1',
143
- provider: 'openai',
144
- capabilities: {
145
- supportsTools: true,
146
- supportsSystemPrompt: true,
147
- supportsReasoning: false,
148
- supportsThinking: true,
149
- maxContextWindow: 1047576,
150
- supportsStreaming: true,
151
- },
152
- defaults: { maxTokens: 65536, temperature: 0.2 },
153
- benchmarkStatus: 'working',
154
- benchmarkNotes: 'Large context window. Strong at complex scenarios.',
155
- },
156
-
157
- 'gpt-5.1': {
158
- family: 'gpt-5.1',
159
- provider: 'openai',
160
- capabilities: {
161
- supportsTools: true,
162
- supportsSystemPrompt: true,
163
- supportsReasoning: false,
164
- maxContextWindow: 1047576,
165
- supportsStreaming: true,
166
- },
167
- defaults: { maxTokens: 32768 },
168
- benchmarkStatus: 'untested',
169
- },
170
-
171
- // ── OpenAI: Reasoning ──
172
- 'o1': {
173
- family: 'o1',
174
- provider: 'openai',
175
- capabilities: {
176
- supportsTools: true,
177
- supportsSystemPrompt: false,
178
- supportsReasoning: true,
179
- supportsThinking: true,
180
- maxContextWindow: 200000,
181
- supportsStreaming: false,
182
- },
183
- defaults: { maxTokens: 65536, reasoningEffort: 'medium' },
184
- benchmarkStatus: 'degraded',
185
- benchmarkNotes: 'No system prompt support. Tool calling works but slow.',
186
- },
187
- 'o1-mini': {
188
- family: 'o1-mini',
189
- provider: 'openai',
190
- capabilities: {
191
- supportsTools: true,
192
- supportsSystemPrompt: false,
193
- supportsReasoning: true,
194
- supportsThinking: true,
195
- maxContextWindow: 128000,
196
- supportsStreaming: false,
197
- },
198
- defaults: { maxTokens: 32768, reasoningEffort: 'medium' },
199
- benchmarkStatus: 'degraded',
200
- benchmarkNotes: 'No system prompt support. Cheaper but less reliable.',
201
- },
202
- 'o1-preview': {
203
- family: 'o1',
204
- provider: 'openai',
205
- capabilities: {
206
- supportsTools: false,
207
- supportsSystemPrompt: false,
208
- supportsReasoning: true,
209
- supportsThinking: true,
210
- maxContextWindow: 128000,
211
- supportsStreaming: false,
212
- },
213
- defaults: { maxTokens: 65536, reasoningEffort: 'medium' },
214
- benchmarkStatus: 'broken',
215
- benchmarkNotes: 'No tool calling support. Cannot complete agentic tasks.',
216
- },
217
- 'o3-mini': {
218
- family: 'o3-mini',
219
- provider: 'openai',
220
- capabilities: {
221
- supportsTools: true,
222
- supportsSystemPrompt: false,
223
- supportsReasoning: true,
224
- supportsThinking: true,
225
- maxContextWindow: 200000,
226
- supportsStreaming: false,
227
- },
228
- defaults: { maxTokens: 32768, reasoningEffort: 'medium' },
229
- benchmarkStatus: 'working',
230
- benchmarkNotes: 'Good reasoning, fast. No system prompt — task in user message.',
231
- },
232
- 'o4-mini': {
233
- family: 'o4-mini',
234
- provider: 'openai',
235
- capabilities: {
236
- supportsTools: true,
237
- supportsSystemPrompt: false,
238
- supportsReasoning: true,
239
- supportsThinking: true,
240
- maxContextWindow: 200000,
241
- supportsStreaming: false,
242
- },
243
- defaults: { maxTokens: 32768, reasoningEffort: 'medium' },
244
- benchmarkStatus: 'untested',
245
- },
246
-
247
- // ── Gemini ──
248
- 'gemini-2.0-flash': {
249
- family: 'gemini-flash',
250
- provider: 'gemini',
251
- capabilities: {
252
- supportsTools: true,
253
- supportsSystemPrompt: true,
254
- supportsReasoning: false,
255
- supportsThinking: true,
256
- maxContextWindow: 1048576,
257
- supportsStreaming: true,
258
- },
259
- defaults: { maxTokens: 16384, temperature: 0.2 },
260
- benchmarkStatus: 'untested',
261
- },
262
- 'gemini-2.5-pro': {
263
- family: 'gemini-pro',
264
- provider: 'gemini',
265
- capabilities: {
266
- supportsTools: true,
267
- supportsSystemPrompt: true,
268
- supportsReasoning: true,
269
- supportsThinking: true,
270
- maxContextWindow: 1048576,
271
- supportsStreaming: true,
272
- },
273
- defaults: { maxTokens: 32768, temperature: 0.2 },
274
- benchmarkStatus: 'untested',
275
- },
276
- 'gemini-2.5-flash': {
277
- family: 'gemini-flash',
278
- provider: 'gemini',
279
- capabilities: {
280
- supportsTools: true,
281
- supportsSystemPrompt: true,
282
- supportsReasoning: true,
283
- supportsThinking: true,
284
- maxContextWindow: 1048576,
285
- supportsStreaming: true,
286
- },
287
- defaults: { maxTokens: 16384, temperature: 0.2 },
288
- benchmarkStatus: 'untested',
289
- },
290
-
291
- // ── Gemini 3.x ──
292
- 'gemini-3.0-pro': {
293
- family: 'gemini-pro',
294
- provider: 'gemini',
295
- capabilities: {
296
- supportsTools: true,
297
- supportsSystemPrompt: true,
298
- supportsReasoning: true,
299
- supportsThinking: true,
300
- maxContextWindow: 2097152,
301
- supportsStreaming: true,
302
- },
303
- defaults: { maxTokens: 65536, temperature: 0.2 },
304
- benchmarkStatus: 'untested',
305
- },
306
- 'gemini-3.0-flash': {
307
- family: 'gemini-flash',
308
- provider: 'gemini',
309
- capabilities: {
310
- supportsTools: true,
311
- supportsSystemPrompt: true,
312
- supportsReasoning: true,
313
- supportsThinking: true,
314
- maxContextWindow: 2097152,
315
- supportsStreaming: true,
316
- },
317
- defaults: { maxTokens: 32768, temperature: 0.2 },
318
- benchmarkStatus: 'untested',
319
- },
320
- };
321
-
322
- // ── Family defaults ─────────────────────────────────────────────────
323
-
324
- /** @type {Record<string, ModelConfig>} */
325
- const FAMILY_DEFAULTS = {
326
- 'claude-opus': { maxTokens: 32768, temperature: 0.2 },
327
- 'claude-sonnet': { maxTokens: 32768, temperature: 0.2 },
328
- 'claude-haiku': { maxTokens: 16384, temperature: 0.2 },
329
- 'gpt-4o': { maxTokens: 32768, temperature: 0.2 },
330
- 'gpt-4o-mini': { maxTokens: 32768, temperature: 0.2 },
331
- 'gpt-4.1': { maxTokens: 65536, temperature: 0.2 },
332
- 'gpt-5.1': { maxTokens: 32768 },
333
- 'o1': { maxTokens: 65536, reasoningEffort: 'medium' },
334
- 'o1-mini': { maxTokens: 32768, reasoningEffort: 'medium' },
335
- 'o3-mini': { maxTokens: 32768, reasoningEffort: 'medium' },
336
- 'o4-mini': { maxTokens: 32768, reasoningEffort: 'medium' },
337
- 'gemini-flash': { maxTokens: 16384, temperature: 0.2 },
338
- 'gemini-pro': { maxTokens: 32768, temperature: 0.2 },
339
- };
340
-
341
- /** @type {ModelConfig} */
342
- const GLOBAL_DEFAULTS = {
343
- maxTokens: 32768,
344
- temperature: 0.2,
345
- };
346
-
347
- // ── Lookup functions ────────────────────────────────────────────────
348
-
349
- /**
350
- * Get the model info from the registry.
351
- * Returns null for unknown models.
352
- * @param {string} model
353
- * @returns {ModelInfo | null}
354
- */
355
- export function getModelInfo(model) {
356
- return MODEL_REGISTRY[model] ?? null;
357
- }
358
-
359
- /**
360
- * Get the capabilities of a model.
361
- * Returns sensible defaults for unknown models.
362
- * @param {string} model
363
- * @returns {ModelCapabilities}
364
- */
365
- export function getModelCapabilities(model) {
366
- const info = MODEL_REGISTRY[model];
367
- if (info) return info.capabilities;
368
-
369
- // Sensible defaults for unknown models — assume thinking is supported
370
- return {
371
- supportsTools: true,
372
- supportsSystemPrompt: true,
373
- supportsReasoning: false,
374
- supportsThinking: true,
375
- maxContextWindow: 128000,
376
- supportsStreaming: true,
377
- };
378
- }
379
-
380
- /**
381
- * Detect the model family from the model name.
382
- * Tries exact registry lookup first, then prefix matching.
383
- * @param {string} model
384
- * @returns {string | null}
385
- */
386
- export function detectModelFamily(model) {
387
- const normalized = String(model ?? '').toLowerCase();
388
- const info = MODEL_REGISTRY[normalized];
389
- if (info) return info.family;
390
-
391
- // Prefix-based heuristic for unregistered models
392
- if (normalized.startsWith('claude-opus') || normalized.startsWith('opus-')) return 'claude-opus';
393
- if (normalized.startsWith('claude-sonnet') || normalized.startsWith('sonnet-')) return 'claude-sonnet';
394
- if (normalized.startsWith('claude-haiku') || normalized.startsWith('haiku-')) return 'claude-haiku';
395
- if (normalized.startsWith('gpt-4o-mini')) return 'gpt-4o-mini';
396
- if (normalized.startsWith('gpt-4o')) return 'gpt-4o';
397
- if (normalized.startsWith('gpt-4.1')) return 'gpt-4.1';
398
- if (normalized.startsWith('gpt-5')) return 'gpt-5.1';
399
- if (normalized.startsWith('gpt-4')) return 'gpt-4o'; // assume 4o-class
400
- if (normalized.startsWith('o1-mini')) return 'o1-mini';
401
- if (normalized.startsWith('o1')) return 'o1';
402
- if (normalized.startsWith('o3-mini')) return 'o3-mini';
403
- if (normalized.startsWith('o4-mini')) return 'o4-mini';
404
- if (normalized.startsWith('gemini') && normalized.includes('pro')) return 'gemini-pro';
405
- if (normalized.startsWith('gemini') && normalized.includes('flash')) return 'gemini-flash';
406
-
407
- return null;
408
- }
409
-
410
- // ── Config merge ────────────────────────────────────────────────────
411
-
412
- /**
413
- * Parse env var overrides for model config.
414
- * Only returns fields that are explicitly set.
415
- * @returns {Partial<ModelConfig>}
416
- */
417
- function getEnvOverrides() {
418
- /** @type {Partial<ModelConfig>} */
419
- const overrides = {};
420
-
421
- const maxTokens = process.env['ARCHAL_MAX_TOKENS'];
422
- if (maxTokens !== undefined && maxTokens !== '') {
423
- const parsed = parseInt(maxTokens, 10);
424
- if (!Number.isNaN(parsed) && parsed > 0) {
425
- overrides.maxTokens = parsed;
426
- }
427
- }
428
-
429
- const temperature = process.env['ARCHAL_TEMPERATURE'];
430
- if (temperature !== undefined && temperature !== '') {
431
- const parsed = parseFloat(temperature);
432
- if (!Number.isNaN(parsed) && parsed >= 0 && parsed <= 2) {
433
- overrides.temperature = parsed;
434
- }
435
- }
436
-
437
- const reasoning = process.env['ARCHAL_REASONING_EFFORT'];
438
- if (reasoning !== undefined && reasoning !== '') {
439
- if (['low', 'medium', 'high'].includes(reasoning.toLowerCase())) {
440
- overrides.reasoningEffort = reasoning.toLowerCase();
441
- }
442
- }
443
-
444
- return overrides;
445
- }
446
-
447
- /**
448
- * Get the merged configuration for a model.
449
- * Priority: env var overrides > model-specific defaults > family defaults > global defaults.
450
- *
451
- * @param {string} model - Model identifier
452
- * @returns {ModelConfig}
453
- */
454
- export function getModelConfig(model) {
455
- const family = detectModelFamily(model);
456
- const familyDefaults = family ? (FAMILY_DEFAULTS[family] ?? {}) : {};
457
- const modelDefaults = MODEL_REGISTRY[model]?.defaults ?? {};
458
- const envOverrides = getEnvOverrides();
459
-
460
- return {
461
- ...GLOBAL_DEFAULTS,
462
- ...familyDefaults,
463
- ...modelDefaults,
464
- ...envOverrides,
465
- };
466
- }
467
-
468
- /**
469
- * Check if a model is a reasoning model (o1, o3, o4 series).
470
- * Reasoning models don't support temperature and use reasoning_effort instead.
471
- * @param {string} model
472
- * @returns {boolean}
473
- */
474
- export function isReasoningModel(model) {
475
- const info = MODEL_REGISTRY[model];
476
- if (info) return info.capabilities.supportsReasoning;
477
- // Fallback heuristic
478
- return /^o[134]-/.test(model);
479
- }
480
-
481
- /**
482
- * Check if a model supports extended thinking (Anthropic thinking blocks, Gemini thinking parts).
483
- * @param {string} model
484
- * @returns {boolean}
485
- */
486
- export function isThinkingModel(model) {
487
- const normalized = String(model ?? '').toLowerCase();
488
- const info = MODEL_REGISTRY[normalized];
489
- if (info) return info.capabilities.supportsThinking;
490
- // Heuristic for unregistered models — most modern models support thinking
491
- if (
492
- normalized.startsWith('claude-')
493
- || normalized.startsWith('sonnet-')
494
- || normalized.startsWith('haiku-')
495
- || normalized.startsWith('opus-')
496
- ) return true;
497
- if (normalized.startsWith('gemini-2.5') || normalized.startsWith('gemini-3')) return true;
498
- if (normalized.startsWith('gpt-') || /^o[134]/.test(normalized)) return true;
499
- return true; // default to true for unknown models
500
- }
501
-
502
- /**
503
- * Get all known model names.
504
- * @returns {string[]}
505
- */
506
- export function listKnownModels() {
507
- return Object.keys(MODEL_REGISTRY);
508
- }
509
-
510
- /**
511
- * Get all known models grouped by benchmark status.
512
- * @returns {Record<BenchmarkStatus, string[]>}
513
- */
514
- export function listModelsByStatus() {
515
- /** @type {Record<string, string[]>} */
516
- const grouped = { working: [], degraded: [], broken: [], untested: [] };
517
- for (const [name, info] of Object.entries(MODEL_REGISTRY)) {
518
- grouped[info.benchmarkStatus].push(name);
519
- }
520
- return grouped;
521
- }
@@ -1,39 +0,0 @@
1
- /**
2
- * Shared provider detection and LLM calling for bundled harnesses.
3
- *
4
- * This is a thin re-export facade. Implementation lives in:
5
- * - llm-config.mjs — provider detection, API key/base URL, timeout, thinking budget
6
- * - llm-call.mjs — callLlm, callLlmWithMessages, LlmApiError, withRetry
7
- * - llm-response.mjs — response parsing, message building, tool formatting
8
- */
9
-
10
- // ── Config ──────────────────────────────────────────────────────────
11
- export {
12
- detectProvider,
13
- resolveApiKey,
14
- resolveBaseUrl,
15
- isThinkingEnabled,
16
- } from './llm-config.mjs';
17
-
18
- // ── Calling ─────────────────────────────────────────────────────────
19
- export {
20
- callLlm,
21
- callLlmWithMessages,
22
- LlmApiError,
23
- withRetry,
24
- } from './llm-call.mjs';
25
-
26
- // ── Response parsing & message building ─────────────────────────────
27
- export {
28
- extractTokenUsage,
29
- formatToolsForProvider,
30
- parseToolCalls,
31
- getResponseText,
32
- getThinkingContent,
33
- getStopReason,
34
- buildInitialMessages,
35
- appendAssistantResponse,
36
- appendToolResults,
37
- appendUserInstruction,
38
- extractCallArgs,
39
- } from './llm-response.mjs';