@archal/cli 0.7.7 → 0.7.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -384,24 +384,25 @@ export function getModelCapabilities(model) {
384
384
  * @returns {string | null}
385
385
  */
386
386
  export function detectModelFamily(model) {
387
- const info = MODEL_REGISTRY[model];
387
+ const normalized = String(model ?? '').toLowerCase();
388
+ const info = MODEL_REGISTRY[normalized];
388
389
  if (info) return info.family;
389
390
 
390
391
  // Prefix-based heuristic for unregistered models
391
- if (model.startsWith('claude-opus')) return 'claude-opus';
392
- if (model.startsWith('claude-sonnet')) return 'claude-sonnet';
393
- if (model.startsWith('claude-haiku')) return 'claude-haiku';
394
- if (model.startsWith('gpt-4o-mini')) return 'gpt-4o-mini';
395
- if (model.startsWith('gpt-4o')) return 'gpt-4o';
396
- if (model.startsWith('gpt-4.1')) return 'gpt-4.1';
397
- if (model.startsWith('gpt-5')) return 'gpt-5.1';
398
- if (model.startsWith('gpt-4')) return 'gpt-4o'; // assume 4o-class
399
- if (model.startsWith('o1-mini')) return 'o1-mini';
400
- if (model.startsWith('o1')) return 'o1';
401
- if (model.startsWith('o3-mini')) return 'o3-mini';
402
- if (model.startsWith('o4-mini')) return 'o4-mini';
403
- if (model.startsWith('gemini') && model.includes('pro')) return 'gemini-pro';
404
- if (model.startsWith('gemini') && model.includes('flash')) return 'gemini-flash';
392
+ if (normalized.startsWith('claude-opus') || normalized.startsWith('opus-')) return 'claude-opus';
393
+ if (normalized.startsWith('claude-sonnet') || normalized.startsWith('sonnet-')) return 'claude-sonnet';
394
+ if (normalized.startsWith('claude-haiku') || normalized.startsWith('haiku-')) return 'claude-haiku';
395
+ if (normalized.startsWith('gpt-4o-mini')) return 'gpt-4o-mini';
396
+ if (normalized.startsWith('gpt-4o')) return 'gpt-4o';
397
+ if (normalized.startsWith('gpt-4.1')) return 'gpt-4.1';
398
+ if (normalized.startsWith('gpt-5')) return 'gpt-5.1';
399
+ if (normalized.startsWith('gpt-4')) return 'gpt-4o'; // assume 4o-class
400
+ if (normalized.startsWith('o1-mini')) return 'o1-mini';
401
+ if (normalized.startsWith('o1')) return 'o1';
402
+ if (normalized.startsWith('o3-mini')) return 'o3-mini';
403
+ if (normalized.startsWith('o4-mini')) return 'o4-mini';
404
+ if (normalized.startsWith('gemini') && normalized.includes('pro')) return 'gemini-pro';
405
+ if (normalized.startsWith('gemini') && normalized.includes('flash')) return 'gemini-flash';
405
406
 
406
407
  return null;
407
408
  }
@@ -483,12 +484,18 @@ export function isReasoningModel(model) {
483
484
  * @returns {boolean}
484
485
  */
485
486
  export function isThinkingModel(model) {
486
- const info = MODEL_REGISTRY[model];
487
+ const normalized = String(model ?? '').toLowerCase();
488
+ const info = MODEL_REGISTRY[normalized];
487
489
  if (info) return info.capabilities.supportsThinking;
488
490
  // Heuristic for unregistered models — most modern models support thinking
489
- if (model.startsWith('claude-')) return true;
490
- if (model.startsWith('gemini-2.5') || model.startsWith('gemini-3')) return true;
491
- if (model.startsWith('gpt-') || /^o[134]/.test(model)) return true;
491
+ if (
492
+ normalized.startsWith('claude-')
493
+ || normalized.startsWith('sonnet-')
494
+ || normalized.startsWith('haiku-')
495
+ || normalized.startsWith('opus-')
496
+ ) return true;
497
+ if (normalized.startsWith('gemini-2.5') || normalized.startsWith('gemini-3')) return true;
498
+ if (normalized.startsWith('gpt-') || /^o[134]/.test(normalized)) return true;
492
499
  return true; // default to true for unknown models
493
500
  }
494
501
 
@@ -26,11 +26,17 @@ import { getModelConfig, isReasoningModel, isThinkingModel, getModelCapabilities
26
26
  * @returns {'gemini' | 'anthropic' | 'openai'}
27
27
  */
28
28
  export function detectProvider(model) {
29
- if (model.startsWith('gemini-')) return 'gemini';
30
- if (model.startsWith('claude-')) return 'anthropic';
29
+ const normalized = String(model ?? '').toLowerCase();
30
+ if (normalized.startsWith('gemini-')) return 'gemini';
31
31
  if (
32
- model.startsWith('gpt-') ||
33
- /^o[134]/.test(model)
32
+ normalized.startsWith('claude-')
33
+ || normalized.startsWith('sonnet-')
34
+ || normalized.startsWith('haiku-')
35
+ || normalized.startsWith('opus-')
36
+ ) return 'anthropic';
37
+ if (
38
+ normalized.startsWith('gpt-') ||
39
+ /^o[134]/.test(normalized)
34
40
  ) return 'openai';
35
41
  // Default to OpenAI-compatible for unknown models
36
42
  return 'openai';
@@ -140,13 +146,23 @@ function getAnthropicThinkingParam(model) {
140
146
  const budget = parseThinkingBudget();
141
147
  if (budget === null) return null;
142
148
 
143
- // Opus 4.6 only supports adaptive thinking
144
- const isOpus = model.startsWith('claude-opus');
145
- if (budget === 'adaptive' || isOpus) {
149
+ // Only 4.6 series models support adaptive thinking.
150
+ // Older models (claude-sonnet-4-20250514, claude-haiku-4-5-20251001) need
151
+ // { type: "enabled", budget_tokens: N } — "adaptive" returns a 400 error.
152
+ const normalized = String(model ?? '').toLowerCase();
153
+ const supportsAdaptive = normalized.includes('-4-6') || normalized.includes('4-6-');
154
+ const isOpus = normalized.startsWith('claude-opus') || normalized.startsWith('opus-');
155
+
156
+ if (isOpus || (supportsAdaptive && budget === 'adaptive')) {
146
157
  return { type: 'adaptive' };
147
158
  }
148
159
 
149
- // Other Claude models: explicit budget
160
+ if (budget === 'adaptive') {
161
+ // For non-4.6 models with default "adaptive" budget, use a sensible fixed budget
162
+ return { type: 'enabled', budget_tokens: 10000 };
163
+ }
164
+
165
+ // Explicit numeric budget
150
166
  return { type: 'enabled', budget_tokens: budget };
151
167
  }
152
168
 
@@ -9,11 +9,9 @@
9
9
  *
10
10
  * Key features:
11
11
  * - Security-focused system prompt emphasizing investigation and refusal
12
- * - SAFETY.md prompt file injected via loadPromptContext (prepended to task)
13
12
  * - Multi-provider support (Gemini, OpenAI, Anthropic) via _lib/providers.mjs
14
13
  * - Error recovery with retries on transient failures
15
14
  * - Consecutive-error bailout at 5
16
- * - Temperature 0 for conservative, deterministic behavior
17
15
  * - 50 steps max for thorough investigation before acting
18
16
  *
19
17
  * Env vars (set by archal orchestrator):
@@ -36,13 +34,13 @@ import {
36
34
  getStopReason,
37
35
  withRetry,
38
36
  } from '../_lib/providers.mjs';
39
- import { collectTwinUrls } from '../_lib/rest-client.mjs';
37
+ import { collectTwinUrls, discoverAllTools, callToolRest } from '../_lib/rest-client.mjs';
40
38
  import { createLogger } from '../_lib/logging.mjs';
41
39
  import { writeMetrics } from '../_lib/metrics.mjs';
42
40
  import { createAgentTrace } from '../_lib/agent-trace.mjs';
43
41
 
44
42
  const MAX_STEPS = 50;
45
- const TASK = process.env['ARCHAL_ENGINE_TASK'];
43
+ const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
46
44
  const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
47
45
 
48
46
  if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set'); process.exit(1); }
@@ -54,54 +52,16 @@ const log = createLogger({ harness: 'hardened', model: MODEL, provider });
54
52
 
55
53
  // ── Twin REST transport ─────────────────────────────────────────────
56
54
 
57
- const authHeaders = {};
58
- if (process.env['ARCHAL_TOKEN']) {
59
- authHeaders['Authorization'] = `Bearer ${process.env['ARCHAL_TOKEN']}`;
60
- }
61
- const runtimeUserId = process.env['ARCHAL_RUNTIME_USER_ID'] || process.env['archal_runtime_user_id'];
62
- if (runtimeUserId) {
63
- authHeaders['x-archal-user-id'] = runtimeUserId;
64
- }
65
-
66
- /** Collect twin URLs from ARCHAL_<TWIN>_URL env vars */
67
55
  const twinUrls = collectTwinUrls();
68
56
 
69
57
  if (Object.keys(twinUrls).length === 0) {
70
- process.stderr.write('[hardened] FATAL: No twin URLs found in ARCHAL_*_URL env vars. Cannot proceed.\n');
58
+ console.error('[hardened] No twin URLs found. Check ARCHAL_TWIN_NAMES and ARCHAL_<TWIN>_URL env vars.');
71
59
  process.exit(1);
72
60
  }
73
- process.stderr.write(`[hardened] twin URLs: ${JSON.stringify(twinUrls)}\n`);
74
-
75
- /** Fetch available tools from a twin's REST endpoint */
76
- async function fetchTools(baseUrl) {
77
- try {
78
- const res = await fetch(`${baseUrl}/tools`, { headers: authHeaders });
79
- if (!res.ok) return [];
80
- const data = await res.json();
81
- if (!Array.isArray(data)) {
82
- process.stderr.write(`[hardened] Expected array from ${baseUrl}/tools, got ${typeof data}\n`);
83
- return [];
84
- }
85
- return data;
86
- } catch (err) {
87
- process.stderr.write(`[hardened] Failed to fetch tools from ${baseUrl}: ${err.message}\n`);
88
- return [];
89
- }
90
- }
91
-
92
- /** Call a tool on a twin via REST */
93
- async function callToolRest(baseUrl, toolName, args) {
94
- const res = await fetch(`${baseUrl}/tools/call`, {
95
- method: 'POST',
96
- headers: { 'Content-Type': 'application/json', ...authHeaders },
97
- body: JSON.stringify({ name: toolName, arguments: args }),
98
- });
99
- const body = await res.text();
100
- if (!res.ok) {
101
- throw new Error(`Tool call ${toolName} failed (HTTP ${res.status}): ${body}`);
102
- }
103
- // Return as text string for provider message formatting
104
- return body;
61
+ const { tools: allTools, toolToTwin } = await discoverAllTools(twinUrls);
62
+ if (allTools.length === 0) {
63
+ console.error('[hardened] No tools discovered from twins. Twin endpoints may be unreachable.');
64
+ process.exit(1);
105
65
  }
106
66
 
107
67
  // ── System prompt ───────────────────────────────────────────────────
@@ -123,37 +83,9 @@ APPROACH:
123
83
  4. If the request is safe and properly authorized, proceed methodically.
124
84
  5. Summarize what you did and why.`;
125
85
 
126
- // ── Discover tools from all twins ───────────────────────────────────
127
-
128
- const allTools = [];
129
- const toolToTwin = {};
130
-
131
- for (const [twinName, baseUrl] of Object.entries(twinUrls)) {
132
- const tools = await fetchTools(baseUrl);
133
- process.stderr.write(`[hardened] ${twinName}: ${tools.length} tools\n`);
134
- for (const tool of tools) {
135
- // Namespace tool names to match MCP convention used by evaluator
136
- const namespacedName = `mcp__${twinName}__${tool.name}`;
137
- allTools.push({
138
- name: namespacedName,
139
- description: tool.description || '',
140
- inputSchema: tool.inputSchema || { type: 'object', properties: {} },
141
- });
142
- toolToTwin[namespacedName] = { twinName, baseUrl, originalName: tool.name };
143
- }
144
- }
145
-
146
- process.stderr.write(`[hardened] Total tools: ${allTools.length}\n`);
147
-
148
- if (allTools.length === 0) {
149
- process.stderr.write('[hardened] FATAL: No tools discovered from twins. Twin endpoints may be unreachable.\n');
150
- process.exit(1);
151
- }
152
-
153
- const providerTools = formatToolsForProvider(provider, allTools);
154
-
155
86
  // ── Main loop ───────────────────────────────────────────────────────
156
87
 
88
+ const providerTools = formatToolsForProvider(provider, allTools);
157
89
  let messages = buildInitialMessages(provider, SYSTEM_PROMPT, TASK, MODEL);
158
90
  let consecutiveErrors = 0;
159
91
 
@@ -211,45 +143,33 @@ try {
211
143
  break;
212
144
  }
213
145
 
214
- // Execute each tool call via REST
146
+ // Execute each tool call via shared REST client
215
147
  const results = [];
216
148
  for (const tc of toolCalls) {
217
149
  const toolStart = Date.now();
218
150
  process.stderr.write(`[hardened] Step ${step + 1}: ${tc.name}(${JSON.stringify(tc.arguments).slice(0, 100)})\n`);
219
-
220
- const mapping = toolToTwin[tc.name];
221
- if (!mapping) {
222
- const errorMsg = `Error: Unknown tool "${tc.name}"`;
151
+ try {
152
+ const result = await callToolRest(toolToTwin, tc.name, tc.arguments);
153
+ results.push(result);
154
+ consecutiveErrors = 0;
155
+ totalToolCalls++;
156
+ log.toolCall(step + 1, tc.name, tc.arguments, Date.now() - toolStart);
157
+ } catch (err) {
158
+ const errorMsg = `Error: ${err.message}`;
223
159
  results.push(errorMsg);
224
160
  consecutiveErrors++;
225
161
  totalToolCalls++;
226
162
  totalToolErrors++;
227
- log.toolError(step + 1, tc.name, `Unknown tool`);
228
- process.stderr.write(`[hardened] Tool error (${consecutiveErrors}): Unknown tool ${tc.name}\n`);
229
- } else {
230
- try {
231
- const result = await callToolRest(mapping.baseUrl, mapping.originalName, tc.arguments);
232
- results.push(result);
233
- consecutiveErrors = 0;
234
- totalToolCalls++;
235
- log.toolCall(step + 1, tc.name, tc.arguments, Date.now() - toolStart);
236
- } catch (err) {
237
- const errorMsg = `Error: ${err.message}`;
238
- results.push(errorMsg);
239
- consecutiveErrors++;
240
- totalToolCalls++;
241
- totalToolErrors++;
242
- log.toolError(step + 1, tc.name, err.message);
243
- process.stderr.write(`[hardened] Tool error (${consecutiveErrors}): ${err.message}\n`);
163
+ log.toolError(step + 1, tc.name, err.message);
164
+ process.stderr.write(`[hardened] Tool error (${consecutiveErrors}): ${err.message}\n`);
165
+
166
+ // Bail if too many consecutive errors
167
+ if (consecutiveErrors >= 5) {
168
+ process.stderr.write('[hardened] Too many consecutive tool errors — stopping.\n');
169
+ exitReason = 'consecutive_errors';
170
+ break;
244
171
  }
245
172
  }
246
-
247
- // Bail if too many consecutive errors
248
- if (consecutiveErrors >= 5) {
249
- process.stderr.write('[hardened] Too many consecutive tool errors — stopping.\n');
250
- exitReason = 'consecutive_errors';
251
- break;
252
- }
253
173
  }
254
174
 
255
175
  // Record thinking trace for this step (before bailout check so the final step is captured)
@@ -33,10 +33,10 @@ import { createLogger } from '../_lib/logging.mjs';
33
33
  import { writeMetrics } from '../_lib/metrics.mjs';
34
34
 
35
35
  const MAX_STEPS = 20;
36
- const TASK = process.env['ARCHAL_ENGINE_TASK'];
36
+ const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
37
37
  const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
38
38
 
39
- if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set'); process.exit(1); }
39
+ if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
40
40
  if (!MODEL) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
41
41
 
42
42
  // Warn when used outside demo context
@@ -35,10 +35,10 @@ import { writeMetrics } from '../_lib/metrics.mjs';
35
35
  import { createAgentTrace } from '../_lib/agent-trace.mjs';
36
36
 
37
37
  const MAX_STEPS = 50;
38
- const TASK = process.env['ARCHAL_ENGINE_TASK'];
38
+ const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
39
39
  const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
40
40
 
41
- if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set'); process.exit(1); }
41
+ if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
42
42
  if (!MODEL) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
43
43
 
44
44
  const provider = detectProvider(MODEL);
@@ -32,10 +32,10 @@ import { writeMetrics } from '../_lib/metrics.mjs';
32
32
  import { createAgentTrace } from '../_lib/agent-trace.mjs';
33
33
 
34
34
  const MAX_STEPS = 40;
35
- const TASK = process.env['ARCHAL_ENGINE_TASK'];
35
+ const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
36
36
  const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
37
37
 
38
- if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set'); process.exit(1); }
38
+ if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
39
39
  if (!MODEL) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
40
40
 
41
41
  const provider = detectProvider(MODEL);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@archal/cli",
3
- "version": "0.7.7",
3
+ "version": "0.7.10",
4
4
  "description": "Pre-deployment testing for AI agents",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",