@archal/cli 0.7.9 → 0.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@
5
5
  * Env var overrides:
6
6
  * ARCHAL_MAX_TOKENS — Max completion tokens (default from model-configs)
7
7
  * ARCHAL_TEMPERATURE — Sampling temperature
8
- * ARCHAL_LLM_TIMEOUT — Per-call timeout in seconds (default 120)
8
+ * ARCHAL_LLM_TIMEOUT — Per-call timeout in seconds (default 180)
9
9
  * ARCHAL_OPENAI_BASE_URL — Override OpenAI base URL (for proxies, Azure, etc.)
10
10
  * ARCHAL_ANTHROPIC_BASE_URL — Override Anthropic base URL
11
11
  * ARCHAL_GEMINI_BASE_URL — Override Gemini base URL
@@ -48,19 +48,41 @@ const PROVIDER_ENV_VARS = {
48
48
  openai: 'OPENAI_API_KEY',
49
49
  };
50
50
 
51
+ function inferKeyProvider(key) {
52
+ if (!key) return null;
53
+ if (key.startsWith('AIzaSy')) return 'gemini';
54
+ if (key.startsWith('sk-ant-')) return 'anthropic';
55
+ if (key.startsWith('sk-')) return 'openai';
56
+ return null;
57
+ }
58
+
51
59
  /**
52
60
  * Resolve the API key for the detected provider.
53
61
  * Priority: ARCHAL_ENGINE_API_KEY > provider-specific env var.
62
+ * If ARCHAL_ENGINE_API_KEY clearly belongs to a different provider, fall back
63
+ * to provider-specific key when available, otherwise fail with a clear error.
54
64
  * @param {string} provider
55
65
  * @returns {string}
56
66
  */
57
67
  export function resolveApiKey(provider) {
58
- const engineKey = process.env['ARCHAL_ENGINE_API_KEY']?.trim();
59
- if (engineKey) return engineKey;
60
-
61
68
  const envVar = PROVIDER_ENV_VARS[provider] ?? 'OPENAI_API_KEY';
62
- const key = process.env[envVar]?.trim();
63
- if (key) return key;
69
+ const providerKey = process.env[envVar]?.trim();
70
+ const engineKey = process.env['ARCHAL_ENGINE_API_KEY']?.trim();
71
+ if (engineKey) {
72
+ const inferred = inferKeyProvider(engineKey);
73
+ if (!inferred || inferred === provider) return engineKey;
74
+ if (providerKey) {
75
+ process.stderr.write(
76
+ `[harness] Warning: ARCHAL_ENGINE_API_KEY appears to be for ${inferred}; using ${envVar} for ${provider} model.\n`,
77
+ );
78
+ return providerKey;
79
+ }
80
+ throw new Error(
81
+ `ARCHAL_ENGINE_API_KEY appears to be for ${inferred}, but provider "${provider}" requires ${envVar}. ` +
82
+ `Set ${envVar} or use a ${inferred} model.`
83
+ );
84
+ }
85
+ if (providerKey) return providerKey;
64
86
 
65
87
  throw new Error(
66
88
  `No API key found for provider "${provider}". ` +
@@ -111,7 +133,7 @@ function getLlmTimeoutMs() {
111
133
  return parsed * 1000;
112
134
  }
113
135
  }
114
- return 120_000; // 120 seconds default
136
+ return 180_000; // 180 seconds default
115
137
  }
116
138
 
117
139
  // ── Thinking configuration ──────────────────────────────────────────
@@ -9,11 +9,9 @@
9
9
  *
10
10
  * Key features:
11
11
  * - Security-focused system prompt emphasizing investigation and refusal
12
- * - SAFETY.md prompt file injected via loadPromptContext (prepended to task)
13
12
  * - Multi-provider support (Gemini, OpenAI, Anthropic) via _lib/providers.mjs
14
13
  * - Error recovery with retries on transient failures
15
14
  * - Consecutive-error bailout at 5
16
- * - Temperature 0 for conservative, deterministic behavior
17
15
  * - 50 steps max for thorough investigation before acting
18
16
  *
19
17
  * Env vars (set by archal orchestrator):
@@ -36,13 +34,13 @@ import {
36
34
  getStopReason,
37
35
  withRetry,
38
36
  } from '../_lib/providers.mjs';
39
- import { collectTwinUrls } from '../_lib/rest-client.mjs';
37
+ import { collectTwinUrls, discoverAllTools, callToolRest } from '../_lib/rest-client.mjs';
40
38
  import { createLogger } from '../_lib/logging.mjs';
41
39
  import { writeMetrics } from '../_lib/metrics.mjs';
42
40
  import { createAgentTrace } from '../_lib/agent-trace.mjs';
43
41
 
44
42
  const MAX_STEPS = 50;
45
- const TASK = process.env['ARCHAL_ENGINE_TASK'];
43
+ const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
46
44
  const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
47
45
 
48
46
  if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set'); process.exit(1); }
@@ -54,54 +52,16 @@ const log = createLogger({ harness: 'hardened', model: MODEL, provider });
54
52
 
55
53
  // ── Twin REST transport ─────────────────────────────────────────────
56
54
 
57
- const authHeaders = {};
58
- if (process.env['ARCHAL_TOKEN']) {
59
- authHeaders['Authorization'] = `Bearer ${process.env['ARCHAL_TOKEN']}`;
60
- }
61
- const runtimeUserId = process.env['ARCHAL_RUNTIME_USER_ID'] || process.env['archal_runtime_user_id'];
62
- if (runtimeUserId) {
63
- authHeaders['x-archal-user-id'] = runtimeUserId;
64
- }
65
-
66
- /** Collect twin URLs from ARCHAL_<TWIN>_URL env vars */
67
55
  const twinUrls = collectTwinUrls();
68
56
 
69
57
  if (Object.keys(twinUrls).length === 0) {
70
- process.stderr.write('[hardened] FATAL: No twin URLs found in ARCHAL_*_URL env vars. Cannot proceed.\n');
58
+ console.error('[hardened] No twin URLs found. Check ARCHAL_TWIN_NAMES and ARCHAL_<TWIN>_URL env vars.');
71
59
  process.exit(1);
72
60
  }
73
- process.stderr.write(`[hardened] twin URLs: ${JSON.stringify(twinUrls)}\n`);
74
-
75
- /** Fetch available tools from a twin's REST endpoint */
76
- async function fetchTools(baseUrl) {
77
- try {
78
- const res = await fetch(`${baseUrl}/tools`, { headers: authHeaders });
79
- if (!res.ok) return [];
80
- const data = await res.json();
81
- if (!Array.isArray(data)) {
82
- process.stderr.write(`[hardened] Expected array from ${baseUrl}/tools, got ${typeof data}\n`);
83
- return [];
84
- }
85
- return data;
86
- } catch (err) {
87
- process.stderr.write(`[hardened] Failed to fetch tools from ${baseUrl}: ${err.message}\n`);
88
- return [];
89
- }
90
- }
91
-
92
- /** Call a tool on a twin via REST */
93
- async function callToolRest(baseUrl, toolName, args) {
94
- const res = await fetch(`${baseUrl}/tools/call`, {
95
- method: 'POST',
96
- headers: { 'Content-Type': 'application/json', ...authHeaders },
97
- body: JSON.stringify({ name: toolName, arguments: args }),
98
- });
99
- const body = await res.text();
100
- if (!res.ok) {
101
- throw new Error(`Tool call ${toolName} failed (HTTP ${res.status}): ${body}`);
102
- }
103
- // Return as text string for provider message formatting
104
- return body;
61
+ const { tools: allTools, toolToTwin } = await discoverAllTools(twinUrls);
62
+ if (allTools.length === 0) {
63
+ console.error('[hardened] No tools discovered from twins. Twin endpoints may be unreachable.');
64
+ process.exit(1);
105
65
  }
106
66
 
107
67
  // ── System prompt ───────────────────────────────────────────────────
@@ -123,37 +83,9 @@ APPROACH:
123
83
  4. If the request is safe and properly authorized, proceed methodically.
124
84
  5. Summarize what you did and why.`;
125
85
 
126
- // ── Discover tools from all twins ───────────────────────────────────
127
-
128
- const allTools = [];
129
- const toolToTwin = {};
130
-
131
- for (const [twinName, baseUrl] of Object.entries(twinUrls)) {
132
- const tools = await fetchTools(baseUrl);
133
- process.stderr.write(`[hardened] ${twinName}: ${tools.length} tools\n`);
134
- for (const tool of tools) {
135
- // Namespace tool names to match MCP convention used by evaluator
136
- const namespacedName = `mcp__${twinName}__${tool.name}`;
137
- allTools.push({
138
- name: namespacedName,
139
- description: tool.description || '',
140
- inputSchema: tool.inputSchema || { type: 'object', properties: {} },
141
- });
142
- toolToTwin[namespacedName] = { twinName, baseUrl, originalName: tool.name };
143
- }
144
- }
145
-
146
- process.stderr.write(`[hardened] Total tools: ${allTools.length}\n`);
147
-
148
- if (allTools.length === 0) {
149
- process.stderr.write('[hardened] FATAL: No tools discovered from twins. Twin endpoints may be unreachable.\n');
150
- process.exit(1);
151
- }
152
-
153
- const providerTools = formatToolsForProvider(provider, allTools);
154
-
155
86
  // ── Main loop ───────────────────────────────────────────────────────
156
87
 
88
+ const providerTools = formatToolsForProvider(provider, allTools);
157
89
  let messages = buildInitialMessages(provider, SYSTEM_PROMPT, TASK, MODEL);
158
90
  let consecutiveErrors = 0;
159
91
 
@@ -175,10 +107,19 @@ try {
175
107
 
176
108
  // Call the LLM with retry on transient errors
177
109
  log.llmCall(step + 1);
178
- const response = await withRetry(
179
- () => callLlmWithMessages(provider, MODEL, apiKey, messages, providerTools),
180
- 2,
181
- );
110
+ let response;
111
+ try {
112
+ response = await withRetry(
113
+ () => callLlmWithMessages(provider, MODEL, apiKey, messages, providerTools),
114
+ 2,
115
+ );
116
+ } catch (err) {
117
+ const msg = err?.message ?? String(err);
118
+ log.error('llm_call_failed', { step: step + 1, error: msg });
119
+ process.stderr.write(`[hardened] LLM API error: ${msg.slice(0, 500)}\n`);
120
+ exitReason = 'llm_error';
121
+ break;
122
+ }
182
123
 
183
124
  const iterDurationMs = Date.now() - iterStart;
184
125
  totalInputTokens += response.usage.inputTokens;
@@ -211,45 +152,33 @@ try {
211
152
  break;
212
153
  }
213
154
 
214
- // Execute each tool call via REST
155
+ // Execute each tool call via shared REST client
215
156
  const results = [];
216
157
  for (const tc of toolCalls) {
217
158
  const toolStart = Date.now();
218
159
  process.stderr.write(`[hardened] Step ${step + 1}: ${tc.name}(${JSON.stringify(tc.arguments).slice(0, 100)})\n`);
219
-
220
- const mapping = toolToTwin[tc.name];
221
- if (!mapping) {
222
- const errorMsg = `Error: Unknown tool "${tc.name}"`;
160
+ try {
161
+ const result = await callToolRest(toolToTwin, tc.name, tc.arguments);
162
+ results.push(result);
163
+ consecutiveErrors = 0;
164
+ totalToolCalls++;
165
+ log.toolCall(step + 1, tc.name, tc.arguments, Date.now() - toolStart);
166
+ } catch (err) {
167
+ const errorMsg = `Error: ${err.message}`;
223
168
  results.push(errorMsg);
224
169
  consecutiveErrors++;
225
170
  totalToolCalls++;
226
171
  totalToolErrors++;
227
- log.toolError(step + 1, tc.name, `Unknown tool`);
228
- process.stderr.write(`[hardened] Tool error (${consecutiveErrors}): Unknown tool ${tc.name}\n`);
229
- } else {
230
- try {
231
- const result = await callToolRest(mapping.baseUrl, mapping.originalName, tc.arguments);
232
- results.push(result);
233
- consecutiveErrors = 0;
234
- totalToolCalls++;
235
- log.toolCall(step + 1, tc.name, tc.arguments, Date.now() - toolStart);
236
- } catch (err) {
237
- const errorMsg = `Error: ${err.message}`;
238
- results.push(errorMsg);
239
- consecutiveErrors++;
240
- totalToolCalls++;
241
- totalToolErrors++;
242
- log.toolError(step + 1, tc.name, err.message);
243
- process.stderr.write(`[hardened] Tool error (${consecutiveErrors}): ${err.message}\n`);
172
+ log.toolError(step + 1, tc.name, err.message);
173
+ process.stderr.write(`[hardened] Tool error (${consecutiveErrors}): ${err.message}\n`);
174
+
175
+ // Bail if too many consecutive errors
176
+ if (consecutiveErrors >= 5) {
177
+ process.stderr.write('[hardened] Too many consecutive tool errors — stopping.\n');
178
+ exitReason = 'consecutive_errors';
179
+ break;
244
180
  }
245
181
  }
246
-
247
- // Bail if too many consecutive errors
248
- if (consecutiveErrors >= 5) {
249
- process.stderr.write('[hardened] Too many consecutive tool errors — stopping.\n');
250
- exitReason = 'consecutive_errors';
251
- break;
252
- }
253
182
  }
254
183
 
255
184
  // Record thinking trace for this step (before bailout check so the final step is captured)
@@ -298,4 +227,8 @@ try {
298
227
  `(${totalToolErrors} errors), ${totalInputTokens} input tokens, ` +
299
228
  `${totalOutputTokens} output tokens, ${(totalTimeMs / 1000).toFixed(1)}s total\n`
300
229
  );
230
+
231
+ if (exitReason === 'llm_error') {
232
+ process.exit(1);
233
+ }
301
234
  }
@@ -33,10 +33,10 @@ import { createLogger } from '../_lib/logging.mjs';
33
33
  import { writeMetrics } from '../_lib/metrics.mjs';
34
34
 
35
35
  const MAX_STEPS = 20;
36
- const TASK = process.env['ARCHAL_ENGINE_TASK'];
36
+ const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
37
37
  const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
38
38
 
39
- if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set'); process.exit(1); }
39
+ if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
40
40
  if (!MODEL) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
41
41
 
42
42
  // Warn when used outside demo context
@@ -84,7 +84,16 @@ try {
84
84
  const iterStart = Date.now();
85
85
 
86
86
  log.llmCall(step + 1);
87
- const response = await callLlmWithMessages(provider, MODEL, apiKey, messages, providerTools);
87
+ let response;
88
+ try {
89
+ response = await callLlmWithMessages(provider, MODEL, apiKey, messages, providerTools);
90
+ } catch (err) {
91
+ const msg = err?.message ?? String(err);
92
+ log.error('llm_call_failed', { step: step + 1, error: msg });
93
+ process.stderr.write(`[naive] LLM API error: ${msg.slice(0, 500)}\n`);
94
+ exitReason = 'llm_error';
95
+ break;
96
+ }
88
97
 
89
98
  const iterDurationMs = Date.now() - iterStart;
90
99
  totalInputTokens += response.usage.inputTokens;
@@ -150,4 +159,7 @@ try {
150
159
  `${(totalTimeMs / 1000).toFixed(1)}s total\n`
151
160
  );
152
161
 
162
+ if (exitReason === 'llm_error') {
163
+ process.exit(1);
164
+ }
153
165
  }
@@ -6,7 +6,7 @@
6
6
  * - Structured system prompt encouraging step-by-step reasoning
7
7
  * - Error recovery with retries on transient failures
8
8
  * - Context-aware done detection
9
- * - Max 50 steps safety limit
9
+ * - Configurable step limit (default 80, cap 200 via ARCHAL_MAX_STEPS)
10
10
  * - Token usage and timing instrumentation
11
11
  *
12
12
  * Env vars (set by archal orchestrator):
@@ -34,11 +34,25 @@ import { createLogger } from '../_lib/logging.mjs';
34
34
  import { writeMetrics } from '../_lib/metrics.mjs';
35
35
  import { createAgentTrace } from '../_lib/agent-trace.mjs';
36
36
 
37
- const MAX_STEPS = 50;
38
- const TASK = process.env['ARCHAL_ENGINE_TASK'];
37
+ const DEFAULT_MAX_STEPS = 80;
38
+ const MAX_STEPS = (() => {
39
+ const raw = process.env['ARCHAL_MAX_STEPS']?.trim();
40
+ if (!raw) return DEFAULT_MAX_STEPS;
41
+ const parsed = parseInt(raw, 10);
42
+ if (Number.isNaN(parsed) || parsed <= 0) return DEFAULT_MAX_STEPS;
43
+ return Math.min(parsed, 200);
44
+ })();
45
+ const MAX_CONSECUTIVE_ERRORS = (() => {
46
+ const raw = process.env['ARCHAL_MAX_CONSECUTIVE_ERRORS']?.trim();
47
+ if (!raw) return 8;
48
+ const parsed = parseInt(raw, 10);
49
+ if (Number.isNaN(parsed) || parsed <= 0) return 8;
50
+ return Math.min(parsed, 20);
51
+ })();
52
+ const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
39
53
  const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
40
54
 
41
- if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set'); process.exit(1); }
55
+ if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
42
56
  if (!MODEL) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
43
57
 
44
58
  const provider = detectProvider(MODEL);
@@ -95,10 +109,19 @@ try {
95
109
 
96
110
  // Call the LLM with retry on transient errors
97
111
  log.llmCall(step + 1);
98
- const response = await withRetry(
99
- () => callLlmWithMessages(provider, MODEL, apiKey, messages, providerTools),
100
- 2,
101
- );
112
+ let response;
113
+ try {
114
+ response = await withRetry(
115
+ () => callLlmWithMessages(provider, MODEL, apiKey, messages, providerTools),
116
+ 2,
117
+ );
118
+ } catch (err) {
119
+ const msg = err?.message ?? String(err);
120
+ log.error('llm_call_failed', { step: step + 1, error: msg });
121
+ process.stderr.write(`[react] LLM API error: ${msg.slice(0, 500)}\n`);
122
+ exitReason = 'llm_error';
123
+ break;
124
+ }
102
125
 
103
126
  const iterDurationMs = Date.now() - iterStart;
104
127
  totalInputTokens += response.usage.inputTokens;
@@ -154,7 +177,7 @@ try {
154
177
  process.stderr.write(`[react] Tool error (${consecutiveErrors}): ${err.message}\n`);
155
178
 
156
179
  // Bail if too many consecutive errors
157
- if (consecutiveErrors >= 5) {
180
+ if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
158
181
  process.stderr.write('[react] Too many consecutive tool errors — stopping.\n');
159
182
  exitReason = 'consecutive_errors';
160
183
  break;
@@ -171,7 +194,7 @@ try {
171
194
  durationMs: iterDurationMs,
172
195
  });
173
196
 
174
- if (consecutiveErrors >= 5) break;
197
+ if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) break;
175
198
 
176
199
  // Append tool results to conversation
177
200
  messages = appendToolResults(provider, messages, toolCalls, results);
@@ -209,4 +232,7 @@ try {
209
232
  `${totalOutputTokens} output tokens, ${(totalTimeMs / 1000).toFixed(1)}s total\n`
210
233
  );
211
234
 
235
+ if (exitReason === 'llm_error') {
236
+ process.exit(1);
237
+ }
212
238
  }
@@ -32,10 +32,10 @@ import { writeMetrics } from '../_lib/metrics.mjs';
32
32
  import { createAgentTrace } from '../_lib/agent-trace.mjs';
33
33
 
34
34
  const MAX_STEPS = 40;
35
- const TASK = process.env['ARCHAL_ENGINE_TASK'];
35
+ const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
36
36
  const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
37
37
 
38
- if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set'); process.exit(1); }
38
+ if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
39
39
  if (!MODEL) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
40
40
 
41
41
  const provider = detectProvider(MODEL);
@@ -77,7 +77,16 @@ try {
77
77
  const iterStart = Date.now();
78
78
 
79
79
  log.llmCall(step + 1);
80
- const response = await callLlmWithMessages(provider, MODEL, apiKey, messages, providerTools);
80
+ let response;
81
+ try {
82
+ response = await callLlmWithMessages(provider, MODEL, apiKey, messages, providerTools);
83
+ } catch (err) {
84
+ const msg = err?.message ?? String(err);
85
+ log.error('llm_call_failed', { step: step + 1, error: msg });
86
+ process.stderr.write(`[zero-shot] LLM API error: ${msg.slice(0, 500)}\n`);
87
+ exitReason = 'llm_error';
88
+ break;
89
+ }
81
90
 
82
91
  const iterDurationMs = Date.now() - iterStart;
83
92
  totalInputTokens += response.usage.inputTokens;
@@ -169,4 +178,7 @@ try {
169
178
  `${totalOutputTokens} output tokens, ${(totalTimeMs / 1000).toFixed(1)}s total\n`
170
179
  );
171
180
 
181
+ if (exitReason === 'llm_error') {
182
+ process.exit(1);
183
+ }
172
184
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@archal/cli",
3
- "version": "0.7.9",
3
+ "version": "0.7.11",
4
4
  "description": "Pre-deployment testing for AI agents",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",