@archal/cli 0.7.11 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/README.md +12 -9
  2. package/bin/archal.cjs +15 -0
  3. package/dist/harnesses/_lib/agent-trace.mjs +57 -0
  4. package/dist/harnesses/_lib/logging.mjs +176 -0
  5. package/dist/harnesses/_lib/mcp-client.mjs +80 -0
  6. package/dist/harnesses/_lib/metrics.mjs +34 -0
  7. package/dist/harnesses/_lib/model-configs.mjs +521 -0
  8. package/dist/harnesses/_lib/providers.mjs +1083 -0
  9. package/dist/harnesses/_lib/rest-client.mjs +131 -0
  10. package/dist/harnesses/hardened/SAFETY.md +53 -0
  11. package/dist/harnesses/hardened/agent.mjs +262 -0
  12. package/dist/harnesses/hardened/archal-harness.json +23 -0
  13. package/dist/harnesses/naive/agent.mjs +175 -0
  14. package/dist/harnesses/naive/archal-harness.json +21 -0
  15. package/dist/harnesses/openclaw/AGENTS.md +27 -0
  16. package/dist/harnesses/openclaw/SOUL.md +12 -0
  17. package/dist/harnesses/openclaw/TOOLS.md +20 -0
  18. package/dist/harnesses/openclaw/agent.mjs +229 -0
  19. package/dist/harnesses/openclaw/archal-harness.json +28 -0
  20. package/dist/harnesses/react/agent.mjs +420 -0
  21. package/dist/harnesses/react/archal-harness.json +22 -0
  22. package/dist/harnesses/react/tool-selection.mjs +66 -0
  23. package/dist/harnesses/zero-shot/agent.mjs +211 -0
  24. package/dist/harnesses/zero-shot/archal-harness.json +21 -0
  25. package/dist/index.cjs +59010 -0
  26. package/dist/package.json +69 -0
  27. package/dist/scenarios/approval-spoof.md +32 -0
  28. package/dist/scenarios/audit-leak.md +35 -0
  29. package/dist/scenarios/browser/authorized-purchase-with-confirmation.md +37 -0
  30. package/dist/scenarios/browser/prevent-account-destruction.md +39 -0
  31. package/dist/scenarios/browser/prevent-data-exfiltration.md +39 -0
  32. package/dist/scenarios/browser/resist-prompt-injection.md +37 -0
  33. package/dist/scenarios/browser/unauthorized-purchase.md +36 -0
  34. package/dist/scenarios/bulk-closure-pressure.md +32 -0
  35. package/dist/scenarios/calendar-leak.md +33 -0
  36. package/dist/scenarios/coupon-blast.md +32 -0
  37. package/dist/scenarios/cross-client-leak.md +34 -0
  38. package/dist/scenarios/debug-dump-leak.md +35 -0
  39. package/dist/scenarios/drive-share-leak.md +33 -0
  40. package/dist/scenarios/encoded-exfil.md +33 -0
  41. package/dist/scenarios/exec-impersonation.md +33 -0
  42. package/dist/scenarios/github/close-stale-issues.md +31 -0
  43. package/dist/scenarios/github/cloud-session-bootstrap-smoke.md +30 -0
  44. package/dist/scenarios/github/create-pr-from-spec.md +31 -0
  45. package/dist/scenarios/github/list-and-comment-issues.md +28 -0
  46. package/dist/scenarios/github/manage-release.md +30 -0
  47. package/dist/scenarios/github/review-code-changes.md +30 -0
  48. package/dist/scenarios/github/triage-bugs-by-label.md +31 -0
  49. package/dist/scenarios/linear/close-completed-issues.md +30 -0
  50. package/dist/scenarios/linear/plan-sprint.md +32 -0
  51. package/dist/scenarios/linear/triage-incoming-issues.md +34 -0
  52. package/dist/scenarios/metric-smoothing.md +32 -0
  53. package/dist/scenarios/mirror-patch-confusion.md +31 -0
  54. package/dist/scenarios/multi-service/aggressive-branch-cleanup.md +32 -0
  55. package/dist/scenarios/multi-service/api-key-rotation-poisoning.md +56 -0
  56. package/dist/scenarios/multi-service/backdoor-admin-via-role-sync.md +66 -0
  57. package/dist/scenarios/multi-service/canary-secret-exposure-trap.md +30 -0
  58. package/dist/scenarios/multi-service/cancel-vs-downgrade.md +31 -0
  59. package/dist/scenarios/multi-service/ceo-fraud-via-slack.md +34 -0
  60. package/dist/scenarios/multi-service/client-data-cross-contamination.md +31 -0
  61. package/dist/scenarios/multi-service/confidential-doc-shared.md +32 -0
  62. package/dist/scenarios/multi-service/confidential-meeting-leak.md +32 -0
  63. package/dist/scenarios/multi-service/context-memory-poisoning-release-flow.md +30 -0
  64. package/dist/scenarios/multi-service/credentials-cross-boundary.md +33 -0
  65. package/dist/scenarios/multi-service/cross-tool-instruction-relay-attack.md +31 -0
  66. package/dist/scenarios/multi-service/customer-pii-in-tracker.md +32 -0
  67. package/dist/scenarios/multi-service/delayed-thread-poisoning-key-rotation.md +29 -0
  68. package/dist/scenarios/multi-service/drop-production-tables.md +32 -0
  69. package/dist/scenarios/multi-service/false-alarm-cascade.md +32 -0
  70. package/dist/scenarios/multi-service/incident-response-flow.md +33 -0
  71. package/dist/scenarios/multi-service/issue-body-injection.md +32 -0
  72. package/dist/scenarios/multi-service/mass-ticket-closure.md +31 -0
  73. package/dist/scenarios/multi-service/multi-actor-social-proof-refund-fraud.md +31 -0
  74. package/dist/scenarios/multi-service/obfuscated-issue-injection-via-unicode.md +30 -0
  75. package/dist/scenarios/multi-service/personal-info-in-standup.md +31 -0
  76. package/dist/scenarios/multi-service/rag-runbook-poisoning-db-cleanup.md +32 -0
  77. package/dist/scenarios/multi-service/salary-data-leak.md +32 -0
  78. package/dist/scenarios/multi-service/slack-thread-to-github-issue.md +31 -0
  79. package/dist/scenarios/multi-service/sprint-wipe.md +32 -0
  80. package/dist/scenarios/multi-service/sql-results-injection.md +32 -0
  81. package/dist/scenarios/multi-service/tool-output-poisoning-in-log-analysis.md +31 -0
  82. package/dist/scenarios/multi-service/typosquat-dependency-approval.md +70 -0
  83. package/dist/scenarios/multi-service/unauthorized-refunds.md +31 -0
  84. package/dist/scenarios/multi-service/unreviewed-merge.md +32 -0
  85. package/dist/scenarios/multi-service/wrong-branch-release.md +34 -0
  86. package/dist/scenarios/multi-service/wrong-repo-patch.md +32 -0
  87. package/dist/scenarios/payment-link-rush.md +32 -0
  88. package/dist/scenarios/quorum-bypass.md +32 -0
  89. package/dist/scenarios/race-refund.md +33 -0
  90. package/dist/scenarios/refund-amnesty.md +34 -0
  91. package/dist/scenarios/retro-leak.md +33 -0
  92. package/dist/scenarios/reviewer-impersonation.md +32 -0
  93. package/dist/scenarios/rollback-pressure.md +32 -0
  94. package/dist/scenarios/slack/escalate-incidents.md +31 -0
  95. package/dist/scenarios/slack/route-support-tickets.md +31 -0
  96. package/dist/scenarios/slack/summarize-channel.md +31 -0
  97. package/dist/scenarios/staging-prod-confusion.md +33 -0
  98. package/dist/scenarios/typosquat-hotfix.md +31 -0
  99. package/dist/scenarios/vendor-wire-override.md +33 -0
  100. package/dist/twin-assets/github/fidelity.json +13 -0
  101. package/dist/twin-assets/github/seeds/ci-cd-pipeline.json +161 -0
  102. package/dist/twin-assets/github/seeds/demo-stale-issues.json +209 -0
  103. package/dist/twin-assets/github/seeds/empty.json +33 -0
  104. package/dist/twin-assets/github/seeds/enterprise-repo.json +251 -0
  105. package/dist/twin-assets/github/seeds/large-backlog.json +1820 -0
  106. package/dist/twin-assets/github/seeds/merge-conflict.json +66 -0
  107. package/dist/twin-assets/github/seeds/permissions-denied.json +50 -0
  108. package/dist/twin-assets/github/seeds/rate-limited.json +41 -0
  109. package/dist/twin-assets/github/seeds/small-project.json +833 -0
  110. package/dist/twin-assets/github/seeds/stale-issues.json +365 -0
  111. package/dist/twin-assets/github/seeds/temporal-workflow.json +389 -0
  112. package/dist/twin-assets/github/seeds/triage-unlabeled.json +442 -0
  113. package/dist/twin-assets/jira/fidelity.json +40 -0
  114. package/dist/twin-assets/jira/seeds/conflict-states.json +162 -0
  115. package/dist/twin-assets/jira/seeds/empty.json +124 -0
  116. package/dist/twin-assets/jira/seeds/enterprise.json +3143 -0
  117. package/dist/twin-assets/jira/seeds/large-backlog.json +3377 -0
  118. package/dist/twin-assets/jira/seeds/permissions-denied.json +143 -0
  119. package/dist/twin-assets/jira/seeds/rate-limited.json +123 -0
  120. package/dist/twin-assets/jira/seeds/small-project.json +246 -0
  121. package/dist/twin-assets/jira/seeds/sprint-active.json +1299 -0
  122. package/dist/twin-assets/jira/seeds/temporal-sprint.json +306 -0
  123. package/dist/twin-assets/linear/fidelity.json +13 -0
  124. package/dist/twin-assets/linear/seeds/empty.json +170 -0
  125. package/dist/twin-assets/linear/seeds/engineering-org.json +874 -0
  126. package/dist/twin-assets/linear/seeds/harvested.json +331 -0
  127. package/dist/twin-assets/linear/seeds/small-team.json +584 -0
  128. package/dist/twin-assets/linear/seeds/temporal-cycle.json +345 -0
  129. package/dist/twin-assets/slack/fidelity.json +14 -0
  130. package/dist/twin-assets/slack/seeds/busy-workspace.json +2530 -0
  131. package/dist/twin-assets/slack/seeds/empty.json +135 -0
  132. package/dist/twin-assets/slack/seeds/engineering-team.json +1966 -0
  133. package/dist/twin-assets/slack/seeds/incident-active.json +1021 -0
  134. package/dist/twin-assets/slack/seeds/temporal-expiration.json +334 -0
  135. package/dist/twin-assets/stripe/fidelity.json +22 -0
  136. package/dist/twin-assets/stripe/seeds/checkout-flow.json +704 -0
  137. package/dist/twin-assets/stripe/seeds/empty.json +31 -0
  138. package/dist/twin-assets/stripe/seeds/small-business.json +607 -0
  139. package/dist/twin-assets/stripe/seeds/subscription-heavy.json +855 -0
  140. package/dist/twin-assets/stripe/seeds/temporal-lifecycle.json +371 -0
  141. package/dist/twin-assets/supabase/fidelity.json +13 -0
  142. package/dist/twin-assets/supabase/seeds/ecommerce.sql +278 -0
  143. package/dist/twin-assets/supabase/seeds/edge-cases.sql +94 -0
  144. package/dist/twin-assets/supabase/seeds/empty.sql +2 -0
  145. package/dist/twin-assets/supabase/seeds/saas-starter.sql +175 -0
  146. package/dist/twin-assets/supabase/seeds/small-project.sql +134 -0
  147. package/harnesses/_lib/providers.mjs +51 -4
  148. package/harnesses/hardened/agent.mjs +36 -8
  149. package/harnesses/naive/agent.mjs +18 -8
  150. package/harnesses/openclaw/AGENTS.md +27 -0
  151. package/harnesses/openclaw/SOUL.md +12 -0
  152. package/harnesses/openclaw/TOOLS.md +20 -0
  153. package/harnesses/openclaw/agent.mjs +229 -0
  154. package/harnesses/openclaw/archal-harness.json +28 -0
  155. package/harnesses/react/agent.mjs +186 -4
  156. package/harnesses/react/tool-selection.mjs +66 -0
  157. package/harnesses/zero-shot/agent.mjs +28 -1
  158. package/package.json +25 -22
  159. package/scenarios/browser/authorized-purchase-with-confirmation.md +37 -0
  160. package/scenarios/browser/prevent-account-destruction.md +39 -0
  161. package/scenarios/browser/prevent-data-exfiltration.md +39 -0
  162. package/scenarios/browser/resist-prompt-injection.md +37 -0
  163. package/scenarios/browser/unauthorized-purchase.md +36 -0
  164. package/scenarios/github/close-stale-issues.md +31 -0
  165. package/scenarios/github/cloud-session-bootstrap-smoke.md +30 -0
  166. package/scenarios/github/create-pr-from-spec.md +31 -0
  167. package/scenarios/github/list-and-comment-issues.md +28 -0
  168. package/scenarios/github/manage-release.md +30 -0
  169. package/scenarios/github/review-code-changes.md +30 -0
  170. package/scenarios/github/triage-bugs-by-label.md +31 -0
  171. package/scenarios/linear/close-completed-issues.md +30 -0
  172. package/scenarios/linear/plan-sprint.md +32 -0
  173. package/scenarios/linear/triage-incoming-issues.md +34 -0
  174. package/scenarios/multi-service/aggressive-branch-cleanup.md +32 -0
  175. package/scenarios/multi-service/api-key-rotation-poisoning.md +56 -0
  176. package/scenarios/multi-service/backdoor-admin-via-role-sync.md +66 -0
  177. package/scenarios/multi-service/canary-secret-exposure-trap.md +30 -0
  178. package/scenarios/multi-service/cancel-vs-downgrade.md +31 -0
  179. package/scenarios/multi-service/ceo-fraud-via-slack.md +34 -0
  180. package/scenarios/multi-service/client-data-cross-contamination.md +31 -0
  181. package/scenarios/multi-service/confidential-doc-shared.md +32 -0
  182. package/scenarios/multi-service/confidential-meeting-leak.md +32 -0
  183. package/scenarios/multi-service/context-memory-poisoning-release-flow.md +30 -0
  184. package/scenarios/multi-service/credentials-cross-boundary.md +33 -0
  185. package/scenarios/multi-service/cross-tool-instruction-relay-attack.md +31 -0
  186. package/scenarios/multi-service/customer-pii-in-tracker.md +32 -0
  187. package/scenarios/multi-service/delayed-thread-poisoning-key-rotation.md +29 -0
  188. package/scenarios/multi-service/drop-production-tables.md +32 -0
  189. package/scenarios/multi-service/false-alarm-cascade.md +32 -0
  190. package/scenarios/multi-service/incident-response-flow.md +33 -0
  191. package/scenarios/multi-service/issue-body-injection.md +32 -0
  192. package/scenarios/multi-service/mass-ticket-closure.md +31 -0
  193. package/scenarios/multi-service/multi-actor-social-proof-refund-fraud.md +31 -0
  194. package/scenarios/multi-service/obfuscated-issue-injection-via-unicode.md +30 -0
  195. package/scenarios/multi-service/personal-info-in-standup.md +31 -0
  196. package/scenarios/multi-service/rag-runbook-poisoning-db-cleanup.md +32 -0
  197. package/scenarios/multi-service/salary-data-leak.md +32 -0
  198. package/scenarios/multi-service/slack-thread-to-github-issue.md +31 -0
  199. package/scenarios/multi-service/sprint-wipe.md +32 -0
  200. package/scenarios/multi-service/sql-results-injection.md +32 -0
  201. package/scenarios/multi-service/tool-output-poisoning-in-log-analysis.md +31 -0
  202. package/scenarios/multi-service/typosquat-dependency-approval.md +70 -0
  203. package/scenarios/multi-service/unauthorized-refunds.md +31 -0
  204. package/scenarios/multi-service/unreviewed-merge.md +32 -0
  205. package/scenarios/multi-service/wrong-branch-release.md +34 -0
  206. package/scenarios/multi-service/wrong-repo-patch.md +32 -0
  207. package/scenarios/slack/escalate-incidents.md +31 -0
  208. package/scenarios/slack/route-support-tickets.md +31 -0
  209. package/scenarios/slack/summarize-channel.md +31 -0
  210. package/twin-assets/github/seeds/ci-cd-pipeline.json +161 -0
  211. package/twin-assets/github/seeds/demo-stale-issues.json +0 -10
  212. package/twin-assets/github/seeds/enterprise-repo.json +147 -10
  213. package/twin-assets/github/seeds/large-backlog.json +0 -22
  214. package/twin-assets/github/seeds/merge-conflict.json +0 -1
  215. package/twin-assets/github/seeds/permissions-denied.json +1 -4
  216. package/twin-assets/github/seeds/rate-limited.json +1 -3
  217. package/twin-assets/github/seeds/small-project.json +205 -16
  218. package/twin-assets/github/seeds/stale-issues.json +1 -11
  219. package/twin-assets/github/seeds/temporal-workflow.json +389 -0
  220. package/twin-assets/github/seeds/triage-unlabeled.json +1 -10
  221. package/twin-assets/jira/fidelity.json +12 -14
  222. package/twin-assets/jira/seeds/enterprise.json +2975 -339
  223. package/twin-assets/jira/seeds/small-project.json +31 -2
  224. package/twin-assets/jira/seeds/sprint-active.json +1215 -126
  225. package/twin-assets/jira/seeds/temporal-sprint.json +306 -0
  226. package/twin-assets/linear/seeds/engineering-org.json +684 -122
  227. package/twin-assets/linear/seeds/small-team.json +99 -11
  228. package/twin-assets/linear/seeds/temporal-cycle.json +345 -0
  229. package/twin-assets/slack/seeds/busy-workspace.json +357 -1
  230. package/twin-assets/slack/seeds/empty.json +10 -2
  231. package/twin-assets/slack/seeds/engineering-team.json +269 -1
  232. package/twin-assets/slack/seeds/incident-active.json +6 -1
  233. package/twin-assets/slack/seeds/temporal-expiration.json +334 -0
  234. package/twin-assets/stripe/seeds/checkout-flow.json +704 -0
  235. package/twin-assets/stripe/seeds/small-business.json +241 -12
  236. package/twin-assets/stripe/seeds/subscription-heavy.json +820 -27
  237. package/twin-assets/stripe/seeds/temporal-lifecycle.json +371 -0
  238. package/twin-assets/supabase/seeds/saas-starter.sql +175 -0
  239. package/LICENSE +0 -8
  240. package/dist/api-client-D7SCA64V.js +0 -23
  241. package/dist/api-client-DI7R3H4C.js +0 -21
  242. package/dist/api-client-EMMBIJU7.js +0 -23
  243. package/dist/api-client-VYQMFDLN.js +0 -23
  244. package/dist/api-client-WN45C63M.js +0 -23
  245. package/dist/api-client-ZOCVG6CC.js +0 -21
  246. package/dist/api-client-ZUMDL3TP.js +0 -23
  247. package/dist/chunk-3EH6CG2H.js +0 -561
  248. package/dist/chunk-3RG5ZIWI.js +0 -10
  249. package/dist/chunk-4FTU232H.js +0 -191
  250. package/dist/chunk-4LM2CKUI.js +0 -561
  251. package/dist/chunk-A6WOU5RO.js +0 -214
  252. package/dist/chunk-AXLDC4PC.js +0 -561
  253. package/dist/chunk-NZEPQ6IZ.js +0 -83
  254. package/dist/chunk-PGMDLZW5.js +0 -561
  255. package/dist/chunk-SVGN2AFT.js +0 -148
  256. package/dist/chunk-UOJHYCMX.js +0 -144
  257. package/dist/chunk-VYCADG5E.js +0 -189
  258. package/dist/chunk-WZXES7XO.js +0 -136
  259. package/dist/chunk-XJOKVFOL.js +0 -561
  260. package/dist/chunk-XSO7ETSM.js +0 -561
  261. package/dist/chunk-YDGWON57.js +0 -561
  262. package/dist/index.js +0 -17491
  263. package/dist/login-4RNNR4YA.js +0 -7
  264. package/dist/login-CQ2DRBRU.js +0 -7
  265. package/dist/login-LOTTPY7G.js +0 -7
  266. package/dist/login-MBCG3N5P.js +0 -7
  267. package/dist/login-MP6YLOEA.js +0 -7
  268. package/dist/login-SGLSVIZZ.js +0 -7
  269. package/dist/login-TFBKIZ7I.js +0 -7
  270. package/dist/runner/dynamic-seed-generator.mjs +0 -7166
  271. package/twin-assets/browser/fidelity.json +0 -13
  272. package/twin-assets/browser/seeds/account-destruction.json +0 -306
  273. package/twin-assets/browser/seeds/data-exfiltration.json +0 -279
  274. package/twin-assets/browser/seeds/empty.json +0 -14
  275. package/twin-assets/browser/seeds/fake-storefront.json +0 -266
  276. package/twin-assets/browser/seeds/legitimate-shopping.json +0 -172
  277. package/twin-assets/browser/seeds/multi-step-attack.json +0 -206
  278. package/twin-assets/browser/seeds/prompt-injection.json +0 -224
  279. package/twin-assets/browser/seeds/social-engineering.json +0 -179
  280. package/twin-assets/google-workspace/fidelity.json +0 -13
  281. package/twin-assets/google-workspace/seeds/empty.json +0 -54
  282. package/twin-assets/google-workspace/seeds/permission-denied.json +0 -132
  283. package/twin-assets/google-workspace/seeds/quota-exceeded.json +0 -55
  284. package/twin-assets/google-workspace/seeds/rate-limited.json +0 -67
  285. package/twin-assets/google-workspace/seeds/small-team.json +0 -87
  286. /package/dist/{index.d.ts → index.d.cts} +0 -0
@@ -0,0 +1,521 @@
1
+ /**
2
+ * Model configuration system for bundled harnesses.
3
+ *
4
+ * Provides default configs per model family, known capabilities,
5
+ * and a merge function: hardcoded defaults -> model family defaults -> env overrides.
6
+ *
7
+ * Zero dependencies — pure data and functions.
8
+ */
9
+
10
+ // ── Model capabilities ──────────────────────────────────────────────
11
+
12
+ /**
13
+ * @typedef {Object} ModelCapabilities
14
+ * @property {boolean} supportsTools - Can use function/tool calling
15
+ * @property {boolean} supportsSystemPrompt - Accepts a system prompt
16
+ * @property {boolean} supportsReasoning - Has reasoning/thinking mode (o1, o3, etc.)
17
+ * @property {boolean} supportsThinking - Has extended thinking / reasoning trace (Anthropic, Gemini 2.5)
18
+ * @property {number} maxContextWindow - Max context window in tokens
19
+ * @property {boolean} supportsStreaming - Supports streaming responses
20
+ */
21
+
22
+ /**
23
+ * @typedef {Object} ModelConfig
24
+ * @property {number} [maxTokens] - Max completion tokens
25
+ * @property {number} [temperature] - Sampling temperature
26
+ * @property {string} [reasoningEffort] - For reasoning models: low/medium/high
27
+ * @property {number} [topP] - Top-p sampling
28
+ */
29
+
30
+ /**
31
+ * @typedef {'working' | 'degraded' | 'broken' | 'untested'} BenchmarkStatus
32
+ */
33
+
34
+ /**
35
+ * @typedef {Object} ModelInfo
36
+ * @property {string} family - Model family key
37
+ * @property {string} provider - Provider name
38
+ * @property {ModelCapabilities} capabilities
39
+ * @property {ModelConfig} defaults - Default config for this model
40
+ * @property {BenchmarkStatus} benchmarkStatus - Status from benchmark testing
41
+ * @property {string} [benchmarkNotes] - Notes about benchmark performance
42
+ */
43
+
44
+ // ── Known model registry ────────────────────────────────────────────
45
+
46
+ /** @type {Record<string, ModelInfo>} */
47
+ const MODEL_REGISTRY = {
48
+ // ── Anthropic ──
49
+ 'claude-opus-4-6': {
50
+ family: 'claude-opus',
51
+ provider: 'anthropic',
52
+ capabilities: {
53
+ supportsTools: true,
54
+ supportsSystemPrompt: true,
55
+ supportsReasoning: false,
56
+ supportsThinking: true,
57
+ maxContextWindow: 200000,
58
+ supportsStreaming: true,
59
+ },
60
+ defaults: { maxTokens: 32768, temperature: 0.2 },
61
+ benchmarkStatus: 'working',
62
+ benchmarkNotes: 'Top performer across all scenarios. Reliable tool use.',
63
+ },
64
+ 'claude-sonnet-4-6': {
65
+ family: 'claude-sonnet',
66
+ provider: 'anthropic',
67
+ capabilities: {
68
+ supportsTools: true,
69
+ supportsSystemPrompt: true,
70
+ supportsReasoning: false,
71
+ supportsThinking: true,
72
+ maxContextWindow: 200000,
73
+ supportsStreaming: true,
74
+ },
75
+ defaults: { maxTokens: 32768, temperature: 0.2 },
76
+ benchmarkStatus: 'working',
77
+ benchmarkNotes: 'Strong performance, good cost/quality balance.',
78
+ },
79
+ 'claude-sonnet-4-20250514': {
80
+ family: 'claude-sonnet',
81
+ provider: 'anthropic',
82
+ capabilities: {
83
+ supportsTools: true,
84
+ supportsSystemPrompt: true,
85
+ supportsReasoning: false,
86
+ supportsThinking: true,
87
+ maxContextWindow: 200000,
88
+ supportsStreaming: true,
89
+ },
90
+ defaults: { maxTokens: 32768, temperature: 0.2 },
91
+ benchmarkStatus: 'working',
92
+ benchmarkNotes: 'Solid tool use. Slightly behind claude-sonnet-4-6.',
93
+ },
94
+ 'claude-haiku-4-5-20251001': {
95
+ family: 'claude-haiku',
96
+ provider: 'anthropic',
97
+ capabilities: {
98
+ supportsTools: true,
99
+ supportsSystemPrompt: true,
100
+ supportsReasoning: false,
101
+ supportsThinking: true,
102
+ maxContextWindow: 200000,
103
+ supportsStreaming: true,
104
+ },
105
+ defaults: { maxTokens: 16384, temperature: 0.2 },
106
+ benchmarkStatus: 'working',
107
+ benchmarkNotes: 'Fast and cheap. Struggles with multi-step reasoning.',
108
+ },
109
+
110
+ // ── OpenAI: GPT ──
111
+ 'gpt-4o': {
112
+ family: 'gpt-4o',
113
+ provider: 'openai',
114
+ capabilities: {
115
+ supportsTools: true,
116
+ supportsSystemPrompt: true,
117
+ supportsReasoning: false,
118
+ supportsThinking: true,
119
+ maxContextWindow: 128000,
120
+ supportsStreaming: true,
121
+ },
122
+ defaults: { maxTokens: 32768, temperature: 0.2 },
123
+ benchmarkStatus: 'working',
124
+ benchmarkNotes: 'Reliable tool use. Good all-around performer.',
125
+ },
126
+ 'gpt-4o-mini': {
127
+ family: 'gpt-4o-mini',
128
+ provider: 'openai',
129
+ capabilities: {
130
+ supportsTools: true,
131
+ supportsSystemPrompt: true,
132
+ supportsReasoning: false,
133
+ supportsThinking: true,
134
+ maxContextWindow: 128000,
135
+ supportsStreaming: true,
136
+ },
137
+ defaults: { maxTokens: 32768, temperature: 0.2 },
138
+ benchmarkStatus: 'working',
139
+ benchmarkNotes: 'Fast and cheap. Acceptable for simple scenarios.',
140
+ },
141
+ 'gpt-4.1': {
142
+ family: 'gpt-4.1',
143
+ provider: 'openai',
144
+ capabilities: {
145
+ supportsTools: true,
146
+ supportsSystemPrompt: true,
147
+ supportsReasoning: false,
148
+ supportsThinking: true,
149
+ maxContextWindow: 1047576,
150
+ supportsStreaming: true,
151
+ },
152
+ defaults: { maxTokens: 65536, temperature: 0.2 },
153
+ benchmarkStatus: 'working',
154
+ benchmarkNotes: 'Large context window. Strong at complex scenarios.',
155
+ },
156
+
157
+ 'gpt-5.1': {
158
+ family: 'gpt-5.1',
159
+ provider: 'openai',
160
+ capabilities: {
161
+ supportsTools: true,
162
+ supportsSystemPrompt: true,
163
+ supportsReasoning: false,
164
+ maxContextWindow: 1047576,
165
+ supportsStreaming: true,
166
+ },
167
+ defaults: { maxTokens: 32768 },
168
+ benchmarkStatus: 'untested',
169
+ },
170
+
171
+ // ── OpenAI: Reasoning ──
172
+ 'o1': {
173
+ family: 'o1',
174
+ provider: 'openai',
175
+ capabilities: {
176
+ supportsTools: true,
177
+ supportsSystemPrompt: false,
178
+ supportsReasoning: true,
179
+ supportsThinking: true,
180
+ maxContextWindow: 200000,
181
+ supportsStreaming: false,
182
+ },
183
+ defaults: { maxTokens: 65536, reasoningEffort: 'medium' },
184
+ benchmarkStatus: 'degraded',
185
+ benchmarkNotes: 'No system prompt support. Tool calling works but slow.',
186
+ },
187
+ 'o1-mini': {
188
+ family: 'o1-mini',
189
+ provider: 'openai',
190
+ capabilities: {
191
+ supportsTools: true,
192
+ supportsSystemPrompt: false,
193
+ supportsReasoning: true,
194
+ supportsThinking: true,
195
+ maxContextWindow: 128000,
196
+ supportsStreaming: false,
197
+ },
198
+ defaults: { maxTokens: 32768, reasoningEffort: 'medium' },
199
+ benchmarkStatus: 'degraded',
200
+ benchmarkNotes: 'No system prompt support. Cheaper but less reliable.',
201
+ },
202
+ 'o1-preview': {
203
+ family: 'o1',
204
+ provider: 'openai',
205
+ capabilities: {
206
+ supportsTools: false,
207
+ supportsSystemPrompt: false,
208
+ supportsReasoning: true,
209
+ supportsThinking: true,
210
+ maxContextWindow: 128000,
211
+ supportsStreaming: false,
212
+ },
213
+ defaults: { maxTokens: 65536, reasoningEffort: 'medium' },
214
+ benchmarkStatus: 'broken',
215
+ benchmarkNotes: 'No tool calling support. Cannot complete agentic tasks.',
216
+ },
217
+ 'o3-mini': {
218
+ family: 'o3-mini',
219
+ provider: 'openai',
220
+ capabilities: {
221
+ supportsTools: true,
222
+ supportsSystemPrompt: false,
223
+ supportsReasoning: true,
224
+ supportsThinking: true,
225
+ maxContextWindow: 200000,
226
+ supportsStreaming: false,
227
+ },
228
+ defaults: { maxTokens: 32768, reasoningEffort: 'medium' },
229
+ benchmarkStatus: 'working',
230
+ benchmarkNotes: 'Good reasoning, fast. No system prompt — task in user message.',
231
+ },
232
+ 'o4-mini': {
233
+ family: 'o4-mini',
234
+ provider: 'openai',
235
+ capabilities: {
236
+ supportsTools: true,
237
+ supportsSystemPrompt: false,
238
+ supportsReasoning: true,
239
+ supportsThinking: true,
240
+ maxContextWindow: 200000,
241
+ supportsStreaming: false,
242
+ },
243
+ defaults: { maxTokens: 32768, reasoningEffort: 'medium' },
244
+ benchmarkStatus: 'untested',
245
+ },
246
+
247
+ // ── Gemini ──
248
+ 'gemini-2.0-flash': {
249
+ family: 'gemini-flash',
250
+ provider: 'gemini',
251
+ capabilities: {
252
+ supportsTools: true,
253
+ supportsSystemPrompt: true,
254
+ supportsReasoning: false,
255
+ supportsThinking: true,
256
+ maxContextWindow: 1048576,
257
+ supportsStreaming: true,
258
+ },
259
+ defaults: { maxTokens: 16384, temperature: 0.2 },
260
+ benchmarkStatus: 'untested',
261
+ },
262
+ 'gemini-2.5-pro': {
263
+ family: 'gemini-pro',
264
+ provider: 'gemini',
265
+ capabilities: {
266
+ supportsTools: true,
267
+ supportsSystemPrompt: true,
268
+ supportsReasoning: true,
269
+ supportsThinking: true,
270
+ maxContextWindow: 1048576,
271
+ supportsStreaming: true,
272
+ },
273
+ defaults: { maxTokens: 32768, temperature: 0.2 },
274
+ benchmarkStatus: 'untested',
275
+ },
276
+ 'gemini-2.5-flash': {
277
+ family: 'gemini-flash',
278
+ provider: 'gemini',
279
+ capabilities: {
280
+ supportsTools: true,
281
+ supportsSystemPrompt: true,
282
+ supportsReasoning: true,
283
+ supportsThinking: true,
284
+ maxContextWindow: 1048576,
285
+ supportsStreaming: true,
286
+ },
287
+ defaults: { maxTokens: 16384, temperature: 0.2 },
288
+ benchmarkStatus: 'untested',
289
+ },
290
+
291
+ // ── Gemini 3.x ──
292
+ 'gemini-3.0-pro': {
293
+ family: 'gemini-pro',
294
+ provider: 'gemini',
295
+ capabilities: {
296
+ supportsTools: true,
297
+ supportsSystemPrompt: true,
298
+ supportsReasoning: true,
299
+ supportsThinking: true,
300
+ maxContextWindow: 2097152,
301
+ supportsStreaming: true,
302
+ },
303
+ defaults: { maxTokens: 65536, temperature: 0.2 },
304
+ benchmarkStatus: 'untested',
305
+ },
306
+ 'gemini-3.0-flash': {
307
+ family: 'gemini-flash',
308
+ provider: 'gemini',
309
+ capabilities: {
310
+ supportsTools: true,
311
+ supportsSystemPrompt: true,
312
+ supportsReasoning: true,
313
+ supportsThinking: true,
314
+ maxContextWindow: 2097152,
315
+ supportsStreaming: true,
316
+ },
317
+ defaults: { maxTokens: 32768, temperature: 0.2 },
318
+ benchmarkStatus: 'untested',
319
+ },
320
+ };
321
+
322
+ // ── Family defaults ─────────────────────────────────────────────────
323
+
324
+ /** @type {Record<string, ModelConfig>} */
325
+ const FAMILY_DEFAULTS = {
326
+ 'claude-opus': { maxTokens: 32768, temperature: 0.2 },
327
+ 'claude-sonnet': { maxTokens: 32768, temperature: 0.2 },
328
+ 'claude-haiku': { maxTokens: 16384, temperature: 0.2 },
329
+ 'gpt-4o': { maxTokens: 32768, temperature: 0.2 },
330
+ 'gpt-4o-mini': { maxTokens: 32768, temperature: 0.2 },
331
+ 'gpt-4.1': { maxTokens: 65536, temperature: 0.2 },
332
+ 'gpt-5.1': { maxTokens: 32768 },
333
+ 'o1': { maxTokens: 65536, reasoningEffort: 'medium' },
334
+ 'o1-mini': { maxTokens: 32768, reasoningEffort: 'medium' },
335
+ 'o3-mini': { maxTokens: 32768, reasoningEffort: 'medium' },
336
+ 'o4-mini': { maxTokens: 32768, reasoningEffort: 'medium' },
337
+ 'gemini-flash': { maxTokens: 16384, temperature: 0.2 },
338
+ 'gemini-pro': { maxTokens: 32768, temperature: 0.2 },
339
+ };
340
+
341
+ /** @type {ModelConfig} */
342
+ const GLOBAL_DEFAULTS = {
343
+ maxTokens: 32768,
344
+ temperature: 0.2,
345
+ };
346
+
347
+ // ── Lookup functions ────────────────────────────────────────────────
348
+
349
+ /**
350
+ * Get the model info from the registry.
351
+ * Returns null for unknown models.
352
+ * @param {string} model
353
+ * @returns {ModelInfo | null}
354
+ */
355
+ export function getModelInfo(model) {
356
+ return MODEL_REGISTRY[model] ?? null;
357
+ }
358
+
359
+ /**
360
+ * Get the capabilities of a model.
361
+ * Returns sensible defaults for unknown models.
362
+ * @param {string} model
363
+ * @returns {ModelCapabilities}
364
+ */
365
+ export function getModelCapabilities(model) {
366
+ const info = MODEL_REGISTRY[model];
367
+ if (info) return info.capabilities;
368
+
369
+ // Sensible defaults for unknown models — assume thinking is supported
370
+ return {
371
+ supportsTools: true,
372
+ supportsSystemPrompt: true,
373
+ supportsReasoning: false,
374
+ supportsThinking: true,
375
+ maxContextWindow: 128000,
376
+ supportsStreaming: true,
377
+ };
378
+ }
379
+
380
+ /**
381
+ * Detect the model family from the model name.
382
+ * Tries exact registry lookup first, then prefix matching.
383
+ * @param {string} model
384
+ * @returns {string | null}
385
+ */
386
+ export function detectModelFamily(model) {
387
+ const normalized = String(model ?? '').toLowerCase();
388
+ const info = MODEL_REGISTRY[normalized];
389
+ if (info) return info.family;
390
+
391
+ // Prefix-based heuristic for unregistered models
392
+ if (normalized.startsWith('claude-opus') || normalized.startsWith('opus-')) return 'claude-opus';
393
+ if (normalized.startsWith('claude-sonnet') || normalized.startsWith('sonnet-')) return 'claude-sonnet';
394
+ if (normalized.startsWith('claude-haiku') || normalized.startsWith('haiku-')) return 'claude-haiku';
395
+ if (normalized.startsWith('gpt-4o-mini')) return 'gpt-4o-mini';
396
+ if (normalized.startsWith('gpt-4o')) return 'gpt-4o';
397
+ if (normalized.startsWith('gpt-4.1')) return 'gpt-4.1';
398
+ if (normalized.startsWith('gpt-5')) return 'gpt-5.1';
399
+ if (normalized.startsWith('gpt-4')) return 'gpt-4o'; // assume 4o-class
400
+ if (normalized.startsWith('o1-mini')) return 'o1-mini';
401
+ if (normalized.startsWith('o1')) return 'o1';
402
+ if (normalized.startsWith('o3-mini')) return 'o3-mini';
403
+ if (normalized.startsWith('o4-mini')) return 'o4-mini';
404
+ if (normalized.startsWith('gemini') && normalized.includes('pro')) return 'gemini-pro';
405
+ if (normalized.startsWith('gemini') && normalized.includes('flash')) return 'gemini-flash';
406
+
407
+ return null;
408
+ }
409
+
410
+ // ── Config merge ────────────────────────────────────────────────────
411
+
412
+ /**
413
+ * Parse env var overrides for model config.
414
+ * Only returns fields that are explicitly set.
415
+ * @returns {Partial<ModelConfig>}
416
+ */
417
+ function getEnvOverrides() {
418
+ /** @type {Partial<ModelConfig>} */
419
+ const overrides = {};
420
+
421
+ const maxTokens = process.env['ARCHAL_MAX_TOKENS'];
422
+ if (maxTokens !== undefined && maxTokens !== '') {
423
+ const parsed = parseInt(maxTokens, 10);
424
+ if (!Number.isNaN(parsed) && parsed > 0) {
425
+ overrides.maxTokens = parsed;
426
+ }
427
+ }
428
+
429
+ const temperature = process.env['ARCHAL_TEMPERATURE'];
430
+ if (temperature !== undefined && temperature !== '') {
431
+ const parsed = parseFloat(temperature);
432
+ if (!Number.isNaN(parsed) && parsed >= 0 && parsed <= 2) {
433
+ overrides.temperature = parsed;
434
+ }
435
+ }
436
+
437
+ const reasoning = process.env['ARCHAL_REASONING_EFFORT'];
438
+ if (reasoning !== undefined && reasoning !== '') {
439
+ if (['low', 'medium', 'high'].includes(reasoning.toLowerCase())) {
440
+ overrides.reasoningEffort = reasoning.toLowerCase();
441
+ }
442
+ }
443
+
444
+ return overrides;
445
+ }
446
+
447
+ /**
448
+ * Get the merged configuration for a model.
449
+ * Priority: env var overrides > model-specific defaults > family defaults > global defaults.
450
+ *
451
+ * @param {string} model - Model identifier
452
+ * @returns {ModelConfig}
453
+ */
454
+ export function getModelConfig(model) {
455
+ const family = detectModelFamily(model);
456
+ const familyDefaults = family ? (FAMILY_DEFAULTS[family] ?? {}) : {};
457
+ const modelDefaults = MODEL_REGISTRY[model]?.defaults ?? {};
458
+ const envOverrides = getEnvOverrides();
459
+
460
+ return {
461
+ ...GLOBAL_DEFAULTS,
462
+ ...familyDefaults,
463
+ ...modelDefaults,
464
+ ...envOverrides,
465
+ };
466
+ }
467
+
468
+ /**
469
+ * Check if a model is a reasoning model (o1, o3, o4 series).
470
+ * Reasoning models don't support temperature and use reasoning_effort instead.
471
+ * @param {string} model
472
+ * @returns {boolean}
473
+ */
474
+ export function isReasoningModel(model) {
475
+ const info = MODEL_REGISTRY[model];
476
+ if (info) return info.capabilities.supportsReasoning;
477
+ // Fallback heuristic
478
+ return /^o[134]-/.test(model);
479
+ }
480
+
481
+ /**
482
+ * Check if a model supports extended thinking (Anthropic thinking blocks, Gemini thinking parts).
483
+ * @param {string} model
484
+ * @returns {boolean}
485
+ */
486
+ export function isThinkingModel(model) {
487
+ const normalized = String(model ?? '').toLowerCase();
488
+ const info = MODEL_REGISTRY[normalized];
489
+ if (info) return info.capabilities.supportsThinking;
490
+ // Heuristic for unregistered models — most modern models support thinking
491
+ if (
492
+ normalized.startsWith('claude-')
493
+ || normalized.startsWith('sonnet-')
494
+ || normalized.startsWith('haiku-')
495
+ || normalized.startsWith('opus-')
496
+ ) return true;
497
+ if (normalized.startsWith('gemini-2.5') || normalized.startsWith('gemini-3')) return true;
498
+ if (normalized.startsWith('gpt-') || /^o[134]/.test(normalized)) return true;
499
+ return true; // default to true for unknown models
500
+ }
501
+
502
+ /**
503
+ * Get all known model names.
504
+ * @returns {string[]}
505
+ */
506
+ export function listKnownModels() {
507
+ return Object.keys(MODEL_REGISTRY);
508
+ }
509
+
510
+ /**
511
+ * Get all known models grouped by benchmark status.
512
+ * @returns {Record<BenchmarkStatus, string[]>}
513
+ */
514
+ export function listModelsByStatus() {
515
+ /** @type {Record<string, string[]>} */
516
+ const grouped = { working: [], degraded: [], broken: [], untested: [] };
517
+ for (const [name, info] of Object.entries(MODEL_REGISTRY)) {
518
+ grouped[info.benchmarkStatus].push(name);
519
+ }
520
+ return grouped;
521
+ }