@archal/cli 0.7.9 → 0.7.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +697 -146
- package/harnesses/_lib/providers.mjs +29 -7
- package/harnesses/hardened/agent.mjs +42 -109
- package/harnesses/naive/agent.mjs +15 -3
- package/harnesses/react/agent.mjs +36 -10
- package/harnesses/zero-shot/agent.mjs +15 -3
- package/package.json +1 -1
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* Env var overrides:
|
|
6
6
|
* ARCHAL_MAX_TOKENS — Max completion tokens (default from model-configs)
|
|
7
7
|
* ARCHAL_TEMPERATURE — Sampling temperature
|
|
8
|
-
* ARCHAL_LLM_TIMEOUT — Per-call timeout in seconds (default
|
|
8
|
+
* ARCHAL_LLM_TIMEOUT — Per-call timeout in seconds (default 180)
|
|
9
9
|
* ARCHAL_OPENAI_BASE_URL — Override OpenAI base URL (for proxies, Azure, etc.)
|
|
10
10
|
* ARCHAL_ANTHROPIC_BASE_URL — Override Anthropic base URL
|
|
11
11
|
* ARCHAL_GEMINI_BASE_URL — Override Gemini base URL
|
|
@@ -48,19 +48,41 @@ const PROVIDER_ENV_VARS = {
|
|
|
48
48
|
openai: 'OPENAI_API_KEY',
|
|
49
49
|
};
|
|
50
50
|
|
|
51
|
+
function inferKeyProvider(key) {
|
|
52
|
+
if (!key) return null;
|
|
53
|
+
if (key.startsWith('AIzaSy')) return 'gemini';
|
|
54
|
+
if (key.startsWith('sk-ant-')) return 'anthropic';
|
|
55
|
+
if (key.startsWith('sk-')) return 'openai';
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
|
|
51
59
|
/**
|
|
52
60
|
* Resolve the API key for the detected provider.
|
|
53
61
|
* Priority: ARCHAL_ENGINE_API_KEY > provider-specific env var.
|
|
62
|
+
* If ARCHAL_ENGINE_API_KEY clearly belongs to a different provider, fall back
|
|
63
|
+
* to provider-specific key when available, otherwise fail with a clear error.
|
|
54
64
|
* @param {string} provider
|
|
55
65
|
* @returns {string}
|
|
56
66
|
*/
|
|
57
67
|
export function resolveApiKey(provider) {
|
|
58
|
-
const engineKey = process.env['ARCHAL_ENGINE_API_KEY']?.trim();
|
|
59
|
-
if (engineKey) return engineKey;
|
|
60
|
-
|
|
61
68
|
const envVar = PROVIDER_ENV_VARS[provider] ?? 'OPENAI_API_KEY';
|
|
62
|
-
const
|
|
63
|
-
|
|
69
|
+
const providerKey = process.env[envVar]?.trim();
|
|
70
|
+
const engineKey = process.env['ARCHAL_ENGINE_API_KEY']?.trim();
|
|
71
|
+
if (engineKey) {
|
|
72
|
+
const inferred = inferKeyProvider(engineKey);
|
|
73
|
+
if (!inferred || inferred === provider) return engineKey;
|
|
74
|
+
if (providerKey) {
|
|
75
|
+
process.stderr.write(
|
|
76
|
+
`[harness] Warning: ARCHAL_ENGINE_API_KEY appears to be for ${inferred}; using ${envVar} for ${provider} model.\n`,
|
|
77
|
+
);
|
|
78
|
+
return providerKey;
|
|
79
|
+
}
|
|
80
|
+
throw new Error(
|
|
81
|
+
`ARCHAL_ENGINE_API_KEY appears to be for ${inferred}, but provider "${provider}" requires ${envVar}. ` +
|
|
82
|
+
`Set ${envVar} or use a ${inferred} model.`
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
if (providerKey) return providerKey;
|
|
64
86
|
|
|
65
87
|
throw new Error(
|
|
66
88
|
`No API key found for provider "${provider}". ` +
|
|
@@ -111,7 +133,7 @@ function getLlmTimeoutMs() {
|
|
|
111
133
|
return parsed * 1000;
|
|
112
134
|
}
|
|
113
135
|
}
|
|
114
|
-
return
|
|
136
|
+
return 180_000; // 180 seconds default
|
|
115
137
|
}
|
|
116
138
|
|
|
117
139
|
// ── Thinking configuration ──────────────────────────────────────────
|
|
@@ -9,11 +9,9 @@
|
|
|
9
9
|
*
|
|
10
10
|
* Key features:
|
|
11
11
|
* - Security-focused system prompt emphasizing investigation and refusal
|
|
12
|
-
* - SAFETY.md prompt file injected via loadPromptContext (prepended to task)
|
|
13
12
|
* - Multi-provider support (Gemini, OpenAI, Anthropic) via _lib/providers.mjs
|
|
14
13
|
* - Error recovery with retries on transient failures
|
|
15
14
|
* - Consecutive-error bailout at 5
|
|
16
|
-
* - Temperature 0 for conservative, deterministic behavior
|
|
17
15
|
* - 50 steps max for thorough investigation before acting
|
|
18
16
|
*
|
|
19
17
|
* Env vars (set by archal orchestrator):
|
|
@@ -36,13 +34,13 @@ import {
|
|
|
36
34
|
getStopReason,
|
|
37
35
|
withRetry,
|
|
38
36
|
} from '../_lib/providers.mjs';
|
|
39
|
-
import { collectTwinUrls } from '../_lib/rest-client.mjs';
|
|
37
|
+
import { collectTwinUrls, discoverAllTools, callToolRest } from '../_lib/rest-client.mjs';
|
|
40
38
|
import { createLogger } from '../_lib/logging.mjs';
|
|
41
39
|
import { writeMetrics } from '../_lib/metrics.mjs';
|
|
42
40
|
import { createAgentTrace } from '../_lib/agent-trace.mjs';
|
|
43
41
|
|
|
44
42
|
const MAX_STEPS = 50;
|
|
45
|
-
const TASK = process.env['ARCHAL_ENGINE_TASK'];
|
|
43
|
+
const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
|
|
46
44
|
const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
|
|
47
45
|
|
|
48
46
|
if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set'); process.exit(1); }
|
|
@@ -54,54 +52,16 @@ const log = createLogger({ harness: 'hardened', model: MODEL, provider });
|
|
|
54
52
|
|
|
55
53
|
// ── Twin REST transport ─────────────────────────────────────────────
|
|
56
54
|
|
|
57
|
-
const authHeaders = {};
|
|
58
|
-
if (process.env['ARCHAL_TOKEN']) {
|
|
59
|
-
authHeaders['Authorization'] = `Bearer ${process.env['ARCHAL_TOKEN']}`;
|
|
60
|
-
}
|
|
61
|
-
const runtimeUserId = process.env['ARCHAL_RUNTIME_USER_ID'] || process.env['archal_runtime_user_id'];
|
|
62
|
-
if (runtimeUserId) {
|
|
63
|
-
authHeaders['x-archal-user-id'] = runtimeUserId;
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
/** Collect twin URLs from ARCHAL_<TWIN>_URL env vars */
|
|
67
55
|
const twinUrls = collectTwinUrls();
|
|
68
56
|
|
|
69
57
|
if (Object.keys(twinUrls).length === 0) {
|
|
70
|
-
|
|
58
|
+
console.error('[hardened] No twin URLs found. Check ARCHAL_TWIN_NAMES and ARCHAL_<TWIN>_URL env vars.');
|
|
71
59
|
process.exit(1);
|
|
72
60
|
}
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
try {
|
|
78
|
-
const res = await fetch(`${baseUrl}/tools`, { headers: authHeaders });
|
|
79
|
-
if (!res.ok) return [];
|
|
80
|
-
const data = await res.json();
|
|
81
|
-
if (!Array.isArray(data)) {
|
|
82
|
-
process.stderr.write(`[hardened] Expected array from ${baseUrl}/tools, got ${typeof data}\n`);
|
|
83
|
-
return [];
|
|
84
|
-
}
|
|
85
|
-
return data;
|
|
86
|
-
} catch (err) {
|
|
87
|
-
process.stderr.write(`[hardened] Failed to fetch tools from ${baseUrl}: ${err.message}\n`);
|
|
88
|
-
return [];
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
/** Call a tool on a twin via REST */
|
|
93
|
-
async function callToolRest(baseUrl, toolName, args) {
|
|
94
|
-
const res = await fetch(`${baseUrl}/tools/call`, {
|
|
95
|
-
method: 'POST',
|
|
96
|
-
headers: { 'Content-Type': 'application/json', ...authHeaders },
|
|
97
|
-
body: JSON.stringify({ name: toolName, arguments: args }),
|
|
98
|
-
});
|
|
99
|
-
const body = await res.text();
|
|
100
|
-
if (!res.ok) {
|
|
101
|
-
throw new Error(`Tool call ${toolName} failed (HTTP ${res.status}): ${body}`);
|
|
102
|
-
}
|
|
103
|
-
// Return as text string for provider message formatting
|
|
104
|
-
return body;
|
|
61
|
+
const { tools: allTools, toolToTwin } = await discoverAllTools(twinUrls);
|
|
62
|
+
if (allTools.length === 0) {
|
|
63
|
+
console.error('[hardened] No tools discovered from twins. Twin endpoints may be unreachable.');
|
|
64
|
+
process.exit(1);
|
|
105
65
|
}
|
|
106
66
|
|
|
107
67
|
// ── System prompt ───────────────────────────────────────────────────
|
|
@@ -123,37 +83,9 @@ APPROACH:
|
|
|
123
83
|
4. If the request is safe and properly authorized, proceed methodically.
|
|
124
84
|
5. Summarize what you did and why.`;
|
|
125
85
|
|
|
126
|
-
// ── Discover tools from all twins ───────────────────────────────────
|
|
127
|
-
|
|
128
|
-
const allTools = [];
|
|
129
|
-
const toolToTwin = {};
|
|
130
|
-
|
|
131
|
-
for (const [twinName, baseUrl] of Object.entries(twinUrls)) {
|
|
132
|
-
const tools = await fetchTools(baseUrl);
|
|
133
|
-
process.stderr.write(`[hardened] ${twinName}: ${tools.length} tools\n`);
|
|
134
|
-
for (const tool of tools) {
|
|
135
|
-
// Namespace tool names to match MCP convention used by evaluator
|
|
136
|
-
const namespacedName = `mcp__${twinName}__${tool.name}`;
|
|
137
|
-
allTools.push({
|
|
138
|
-
name: namespacedName,
|
|
139
|
-
description: tool.description || '',
|
|
140
|
-
inputSchema: tool.inputSchema || { type: 'object', properties: {} },
|
|
141
|
-
});
|
|
142
|
-
toolToTwin[namespacedName] = { twinName, baseUrl, originalName: tool.name };
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
process.stderr.write(`[hardened] Total tools: ${allTools.length}\n`);
|
|
147
|
-
|
|
148
|
-
if (allTools.length === 0) {
|
|
149
|
-
process.stderr.write('[hardened] FATAL: No tools discovered from twins. Twin endpoints may be unreachable.\n');
|
|
150
|
-
process.exit(1);
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
const providerTools = formatToolsForProvider(provider, allTools);
|
|
154
|
-
|
|
155
86
|
// ── Main loop ───────────────────────────────────────────────────────
|
|
156
87
|
|
|
88
|
+
const providerTools = formatToolsForProvider(provider, allTools);
|
|
157
89
|
let messages = buildInitialMessages(provider, SYSTEM_PROMPT, TASK, MODEL);
|
|
158
90
|
let consecutiveErrors = 0;
|
|
159
91
|
|
|
@@ -175,10 +107,19 @@ try {
|
|
|
175
107
|
|
|
176
108
|
// Call the LLM with retry on transient errors
|
|
177
109
|
log.llmCall(step + 1);
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
110
|
+
let response;
|
|
111
|
+
try {
|
|
112
|
+
response = await withRetry(
|
|
113
|
+
() => callLlmWithMessages(provider, MODEL, apiKey, messages, providerTools),
|
|
114
|
+
2,
|
|
115
|
+
);
|
|
116
|
+
} catch (err) {
|
|
117
|
+
const msg = err?.message ?? String(err);
|
|
118
|
+
log.error('llm_call_failed', { step: step + 1, error: msg });
|
|
119
|
+
process.stderr.write(`[hardened] LLM API error: ${msg.slice(0, 500)}\n`);
|
|
120
|
+
exitReason = 'llm_error';
|
|
121
|
+
break;
|
|
122
|
+
}
|
|
182
123
|
|
|
183
124
|
const iterDurationMs = Date.now() - iterStart;
|
|
184
125
|
totalInputTokens += response.usage.inputTokens;
|
|
@@ -211,45 +152,33 @@ try {
|
|
|
211
152
|
break;
|
|
212
153
|
}
|
|
213
154
|
|
|
214
|
-
// Execute each tool call via REST
|
|
155
|
+
// Execute each tool call via shared REST client
|
|
215
156
|
const results = [];
|
|
216
157
|
for (const tc of toolCalls) {
|
|
217
158
|
const toolStart = Date.now();
|
|
218
159
|
process.stderr.write(`[hardened] Step ${step + 1}: ${tc.name}(${JSON.stringify(tc.arguments).slice(0, 100)})\n`);
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
160
|
+
try {
|
|
161
|
+
const result = await callToolRest(toolToTwin, tc.name, tc.arguments);
|
|
162
|
+
results.push(result);
|
|
163
|
+
consecutiveErrors = 0;
|
|
164
|
+
totalToolCalls++;
|
|
165
|
+
log.toolCall(step + 1, tc.name, tc.arguments, Date.now() - toolStart);
|
|
166
|
+
} catch (err) {
|
|
167
|
+
const errorMsg = `Error: ${err.message}`;
|
|
223
168
|
results.push(errorMsg);
|
|
224
169
|
consecutiveErrors++;
|
|
225
170
|
totalToolCalls++;
|
|
226
171
|
totalToolErrors++;
|
|
227
|
-
log.toolError(step + 1, tc.name,
|
|
228
|
-
process.stderr.write(`[hardened] Tool error (${consecutiveErrors}):
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
log.toolCall(step + 1, tc.name, tc.arguments, Date.now() - toolStart);
|
|
236
|
-
} catch (err) {
|
|
237
|
-
const errorMsg = `Error: ${err.message}`;
|
|
238
|
-
results.push(errorMsg);
|
|
239
|
-
consecutiveErrors++;
|
|
240
|
-
totalToolCalls++;
|
|
241
|
-
totalToolErrors++;
|
|
242
|
-
log.toolError(step + 1, tc.name, err.message);
|
|
243
|
-
process.stderr.write(`[hardened] Tool error (${consecutiveErrors}): ${err.message}\n`);
|
|
172
|
+
log.toolError(step + 1, tc.name, err.message);
|
|
173
|
+
process.stderr.write(`[hardened] Tool error (${consecutiveErrors}): ${err.message}\n`);
|
|
174
|
+
|
|
175
|
+
// Bail if too many consecutive errors
|
|
176
|
+
if (consecutiveErrors >= 5) {
|
|
177
|
+
process.stderr.write('[hardened] Too many consecutive tool errors — stopping.\n');
|
|
178
|
+
exitReason = 'consecutive_errors';
|
|
179
|
+
break;
|
|
244
180
|
}
|
|
245
181
|
}
|
|
246
|
-
|
|
247
|
-
// Bail if too many consecutive errors
|
|
248
|
-
if (consecutiveErrors >= 5) {
|
|
249
|
-
process.stderr.write('[hardened] Too many consecutive tool errors — stopping.\n');
|
|
250
|
-
exitReason = 'consecutive_errors';
|
|
251
|
-
break;
|
|
252
|
-
}
|
|
253
182
|
}
|
|
254
183
|
|
|
255
184
|
// Record thinking trace for this step (before bailout check so the final step is captured)
|
|
@@ -298,4 +227,8 @@ try {
|
|
|
298
227
|
`(${totalToolErrors} errors), ${totalInputTokens} input tokens, ` +
|
|
299
228
|
`${totalOutputTokens} output tokens, ${(totalTimeMs / 1000).toFixed(1)}s total\n`
|
|
300
229
|
);
|
|
230
|
+
|
|
231
|
+
if (exitReason === 'llm_error') {
|
|
232
|
+
process.exit(1);
|
|
233
|
+
}
|
|
301
234
|
}
|
|
@@ -33,10 +33,10 @@ import { createLogger } from '../_lib/logging.mjs';
|
|
|
33
33
|
import { writeMetrics } from '../_lib/metrics.mjs';
|
|
34
34
|
|
|
35
35
|
const MAX_STEPS = 20;
|
|
36
|
-
const TASK = process.env['ARCHAL_ENGINE_TASK'];
|
|
36
|
+
const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
|
|
37
37
|
const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
|
|
38
38
|
|
|
39
|
-
if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set'); process.exit(1); }
|
|
39
|
+
if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
|
|
40
40
|
if (!MODEL) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
|
|
41
41
|
|
|
42
42
|
// Warn when used outside demo context
|
|
@@ -84,7 +84,16 @@ try {
|
|
|
84
84
|
const iterStart = Date.now();
|
|
85
85
|
|
|
86
86
|
log.llmCall(step + 1);
|
|
87
|
-
|
|
87
|
+
let response;
|
|
88
|
+
try {
|
|
89
|
+
response = await callLlmWithMessages(provider, MODEL, apiKey, messages, providerTools);
|
|
90
|
+
} catch (err) {
|
|
91
|
+
const msg = err?.message ?? String(err);
|
|
92
|
+
log.error('llm_call_failed', { step: step + 1, error: msg });
|
|
93
|
+
process.stderr.write(`[naive] LLM API error: ${msg.slice(0, 500)}\n`);
|
|
94
|
+
exitReason = 'llm_error';
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
88
97
|
|
|
89
98
|
const iterDurationMs = Date.now() - iterStart;
|
|
90
99
|
totalInputTokens += response.usage.inputTokens;
|
|
@@ -150,4 +159,7 @@ try {
|
|
|
150
159
|
`${(totalTimeMs / 1000).toFixed(1)}s total\n`
|
|
151
160
|
);
|
|
152
161
|
|
|
162
|
+
if (exitReason === 'llm_error') {
|
|
163
|
+
process.exit(1);
|
|
164
|
+
}
|
|
153
165
|
}
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* - Structured system prompt encouraging step-by-step reasoning
|
|
7
7
|
* - Error recovery with retries on transient failures
|
|
8
8
|
* - Context-aware done detection
|
|
9
|
-
* -
|
|
9
|
+
* - Configurable step limit (default 80, cap 200 via ARCHAL_MAX_STEPS)
|
|
10
10
|
* - Token usage and timing instrumentation
|
|
11
11
|
*
|
|
12
12
|
* Env vars (set by archal orchestrator):
|
|
@@ -34,11 +34,25 @@ import { createLogger } from '../_lib/logging.mjs';
|
|
|
34
34
|
import { writeMetrics } from '../_lib/metrics.mjs';
|
|
35
35
|
import { createAgentTrace } from '../_lib/agent-trace.mjs';
|
|
36
36
|
|
|
37
|
-
const
|
|
38
|
-
const
|
|
37
|
+
const DEFAULT_MAX_STEPS = 80;
|
|
38
|
+
const MAX_STEPS = (() => {
|
|
39
|
+
const raw = process.env['ARCHAL_MAX_STEPS']?.trim();
|
|
40
|
+
if (!raw) return DEFAULT_MAX_STEPS;
|
|
41
|
+
const parsed = parseInt(raw, 10);
|
|
42
|
+
if (Number.isNaN(parsed) || parsed <= 0) return DEFAULT_MAX_STEPS;
|
|
43
|
+
return Math.min(parsed, 200);
|
|
44
|
+
})();
|
|
45
|
+
const MAX_CONSECUTIVE_ERRORS = (() => {
|
|
46
|
+
const raw = process.env['ARCHAL_MAX_CONSECUTIVE_ERRORS']?.trim();
|
|
47
|
+
if (!raw) return 8;
|
|
48
|
+
const parsed = parseInt(raw, 10);
|
|
49
|
+
if (Number.isNaN(parsed) || parsed <= 0) return 8;
|
|
50
|
+
return Math.min(parsed, 20);
|
|
51
|
+
})();
|
|
52
|
+
const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
|
|
39
53
|
const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
|
|
40
54
|
|
|
41
|
-
if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set'); process.exit(1); }
|
|
55
|
+
if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
|
|
42
56
|
if (!MODEL) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
|
|
43
57
|
|
|
44
58
|
const provider = detectProvider(MODEL);
|
|
@@ -95,10 +109,19 @@ try {
|
|
|
95
109
|
|
|
96
110
|
// Call the LLM with retry on transient errors
|
|
97
111
|
log.llmCall(step + 1);
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
112
|
+
let response;
|
|
113
|
+
try {
|
|
114
|
+
response = await withRetry(
|
|
115
|
+
() => callLlmWithMessages(provider, MODEL, apiKey, messages, providerTools),
|
|
116
|
+
2,
|
|
117
|
+
);
|
|
118
|
+
} catch (err) {
|
|
119
|
+
const msg = err?.message ?? String(err);
|
|
120
|
+
log.error('llm_call_failed', { step: step + 1, error: msg });
|
|
121
|
+
process.stderr.write(`[react] LLM API error: ${msg.slice(0, 500)}\n`);
|
|
122
|
+
exitReason = 'llm_error';
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
102
125
|
|
|
103
126
|
const iterDurationMs = Date.now() - iterStart;
|
|
104
127
|
totalInputTokens += response.usage.inputTokens;
|
|
@@ -154,7 +177,7 @@ try {
|
|
|
154
177
|
process.stderr.write(`[react] Tool error (${consecutiveErrors}): ${err.message}\n`);
|
|
155
178
|
|
|
156
179
|
// Bail if too many consecutive errors
|
|
157
|
-
if (consecutiveErrors >=
|
|
180
|
+
if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
|
|
158
181
|
process.stderr.write('[react] Too many consecutive tool errors — stopping.\n');
|
|
159
182
|
exitReason = 'consecutive_errors';
|
|
160
183
|
break;
|
|
@@ -171,7 +194,7 @@ try {
|
|
|
171
194
|
durationMs: iterDurationMs,
|
|
172
195
|
});
|
|
173
196
|
|
|
174
|
-
if (consecutiveErrors >=
|
|
197
|
+
if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) break;
|
|
175
198
|
|
|
176
199
|
// Append tool results to conversation
|
|
177
200
|
messages = appendToolResults(provider, messages, toolCalls, results);
|
|
@@ -209,4 +232,7 @@ try {
|
|
|
209
232
|
`${totalOutputTokens} output tokens, ${(totalTimeMs / 1000).toFixed(1)}s total\n`
|
|
210
233
|
);
|
|
211
234
|
|
|
235
|
+
if (exitReason === 'llm_error') {
|
|
236
|
+
process.exit(1);
|
|
237
|
+
}
|
|
212
238
|
}
|
|
@@ -32,10 +32,10 @@ import { writeMetrics } from '../_lib/metrics.mjs';
|
|
|
32
32
|
import { createAgentTrace } from '../_lib/agent-trace.mjs';
|
|
33
33
|
|
|
34
34
|
const MAX_STEPS = 40;
|
|
35
|
-
const TASK = process.env['ARCHAL_ENGINE_TASK'];
|
|
35
|
+
const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
|
|
36
36
|
const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
|
|
37
37
|
|
|
38
|
-
if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set'); process.exit(1); }
|
|
38
|
+
if (!TASK) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
|
|
39
39
|
if (!MODEL) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
|
|
40
40
|
|
|
41
41
|
const provider = detectProvider(MODEL);
|
|
@@ -77,7 +77,16 @@ try {
|
|
|
77
77
|
const iterStart = Date.now();
|
|
78
78
|
|
|
79
79
|
log.llmCall(step + 1);
|
|
80
|
-
|
|
80
|
+
let response;
|
|
81
|
+
try {
|
|
82
|
+
response = await callLlmWithMessages(provider, MODEL, apiKey, messages, providerTools);
|
|
83
|
+
} catch (err) {
|
|
84
|
+
const msg = err?.message ?? String(err);
|
|
85
|
+
log.error('llm_call_failed', { step: step + 1, error: msg });
|
|
86
|
+
process.stderr.write(`[zero-shot] LLM API error: ${msg.slice(0, 500)}\n`);
|
|
87
|
+
exitReason = 'llm_error';
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
81
90
|
|
|
82
91
|
const iterDurationMs = Date.now() - iterStart;
|
|
83
92
|
totalInputTokens += response.usage.inputTokens;
|
|
@@ -169,4 +178,7 @@ try {
|
|
|
169
178
|
`${totalOutputTokens} output tokens, ${(totalTimeMs / 1000).toFixed(1)}s total\n`
|
|
170
179
|
);
|
|
171
180
|
|
|
181
|
+
if (exitReason === 'llm_error') {
|
|
182
|
+
process.exit(1);
|
|
183
|
+
}
|
|
172
184
|
}
|