@archal/cli 0.7.10 → 0.7.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +3581 -4782
- package/harnesses/_lib/providers.mjs +62 -7
- package/harnesses/hardened/agent.mjs +45 -5
- package/harnesses/naive/agent.mjs +14 -2
- package/harnesses/react/agent.mjs +65 -10
- package/harnesses/zero-shot/agent.mjs +41 -2
- package/package.json +1 -1
- package/twin-assets/github/seeds/enterprise-repo.json +14 -2
- package/twin-assets/github/seeds/small-project.json +163 -0
- package/twin-assets/jira/seeds/small-project.json +31 -2
- package/twin-assets/jira/seeds/sprint-active.json +28 -2
- package/twin-assets/slack/seeds/busy-workspace.json +115 -0
- package/twin-assets/slack/seeds/engineering-team.json +108 -0
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* Env var overrides:
|
|
6
6
|
* ARCHAL_MAX_TOKENS — Max completion tokens (default from model-configs)
|
|
7
7
|
* ARCHAL_TEMPERATURE — Sampling temperature
|
|
8
|
-
* ARCHAL_LLM_TIMEOUT — Per-call timeout in seconds (default
|
|
8
|
+
* ARCHAL_LLM_TIMEOUT — Per-call timeout in seconds (default 180)
|
|
9
9
|
* ARCHAL_OPENAI_BASE_URL — Override OpenAI base URL (for proxies, Azure, etc.)
|
|
10
10
|
* ARCHAL_ANTHROPIC_BASE_URL — Override Anthropic base URL
|
|
11
11
|
* ARCHAL_GEMINI_BASE_URL — Override Gemini base URL
|
|
@@ -48,19 +48,41 @@ const PROVIDER_ENV_VARS = {
|
|
|
48
48
|
openai: 'OPENAI_API_KEY',
|
|
49
49
|
};
|
|
50
50
|
|
|
51
|
+
function inferKeyProvider(key) {
|
|
52
|
+
if (!key) return null;
|
|
53
|
+
if (key.startsWith('AIza')) return 'gemini';
|
|
54
|
+
if (key.startsWith('sk-ant-')) return 'anthropic';
|
|
55
|
+
if (key.startsWith('sk-')) return 'openai';
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
|
|
51
59
|
/**
|
|
52
60
|
* Resolve the API key for the detected provider.
|
|
53
61
|
* Priority: ARCHAL_ENGINE_API_KEY > provider-specific env var.
|
|
62
|
+
* If ARCHAL_ENGINE_API_KEY clearly belongs to a different provider, fall back
|
|
63
|
+
* to provider-specific key when available, otherwise fail with a clear error.
|
|
54
64
|
* @param {string} provider
|
|
55
65
|
* @returns {string}
|
|
56
66
|
*/
|
|
57
67
|
export function resolveApiKey(provider) {
|
|
58
|
-
const engineKey = process.env['ARCHAL_ENGINE_API_KEY']?.trim();
|
|
59
|
-
if (engineKey) return engineKey;
|
|
60
|
-
|
|
61
68
|
const envVar = PROVIDER_ENV_VARS[provider] ?? 'OPENAI_API_KEY';
|
|
62
|
-
const
|
|
63
|
-
|
|
69
|
+
const providerKey = process.env[envVar]?.trim();
|
|
70
|
+
const engineKey = process.env['ARCHAL_ENGINE_API_KEY']?.trim();
|
|
71
|
+
if (engineKey) {
|
|
72
|
+
const inferred = inferKeyProvider(engineKey);
|
|
73
|
+
if (!inferred || inferred === provider) return engineKey;
|
|
74
|
+
if (providerKey) {
|
|
75
|
+
process.stderr.write(
|
|
76
|
+
`[harness] Warning: ARCHAL_ENGINE_API_KEY appears to be for ${inferred}; using ${envVar} for ${provider} model.\n`,
|
|
77
|
+
);
|
|
78
|
+
return providerKey;
|
|
79
|
+
}
|
|
80
|
+
throw new Error(
|
|
81
|
+
`ARCHAL_ENGINE_API_KEY appears to be for ${inferred}, but provider "${provider}" requires ${envVar}. ` +
|
|
82
|
+
`Set ${envVar} or use a ${inferred} model.`
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
if (providerKey) return providerKey;
|
|
64
86
|
|
|
65
87
|
throw new Error(
|
|
66
88
|
`No API key found for provider "${provider}". ` +
|
|
@@ -111,7 +133,7 @@ function getLlmTimeoutMs() {
|
|
|
111
133
|
return parsed * 1000;
|
|
112
134
|
}
|
|
113
135
|
}
|
|
114
|
-
return
|
|
136
|
+
return 180_000; // 180 seconds default
|
|
115
137
|
}
|
|
116
138
|
|
|
117
139
|
// ── Thinking configuration ──────────────────────────────────────────
|
|
@@ -880,6 +902,39 @@ export function appendToolResults(provider, messages, toolCalls, results) {
|
|
|
880
902
|
}
|
|
881
903
|
}
|
|
882
904
|
|
|
905
|
+
/**
|
|
906
|
+
* Append a plain-text user instruction for the next turn.
|
|
907
|
+
* Used for harness-level recovery nudges (for example, when the model
|
|
908
|
+
* responds without any tool calls before taking required actions).
|
|
909
|
+
*
|
|
910
|
+
* @param {'gemini' | 'anthropic' | 'openai'} provider
|
|
911
|
+
* @param {Array | object} messages
|
|
912
|
+
* @param {string} text
|
|
913
|
+
* @returns {Array | object}
|
|
914
|
+
*/
|
|
915
|
+
export function appendUserInstruction(provider, messages, text) {
|
|
916
|
+
switch (provider) {
|
|
917
|
+
case 'gemini': {
|
|
918
|
+
messages.push({ role: 'user', parts: [{ text }] });
|
|
919
|
+
return messages;
|
|
920
|
+
}
|
|
921
|
+
case 'anthropic': {
|
|
922
|
+
messages.messages.push({ role: 'user', content: text });
|
|
923
|
+
return messages;
|
|
924
|
+
}
|
|
925
|
+
case 'openai': {
|
|
926
|
+
if (Array.isArray(messages)) {
|
|
927
|
+
messages.push({ role: 'user', content: text });
|
|
928
|
+
return messages;
|
|
929
|
+
}
|
|
930
|
+
messages.input = [{ role: 'user', content: text }];
|
|
931
|
+
return messages;
|
|
932
|
+
}
|
|
933
|
+
default:
|
|
934
|
+
return messages;
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
|
|
883
938
|
/**
|
|
884
939
|
* Extract the messages array and system prompt for the callLlm function.
|
|
885
940
|
* For Anthropic, the system prompt is separate from messages.
|
|
@@ -27,6 +27,7 @@ import {
|
|
|
27
27
|
buildInitialMessages,
|
|
28
28
|
appendAssistantResponse,
|
|
29
29
|
appendToolResults,
|
|
30
|
+
appendUserInstruction,
|
|
30
31
|
callLlmWithMessages,
|
|
31
32
|
parseToolCalls,
|
|
32
33
|
getResponseText,
|
|
@@ -40,6 +41,13 @@ import { writeMetrics } from '../_lib/metrics.mjs';
|
|
|
40
41
|
import { createAgentTrace } from '../_lib/agent-trace.mjs';
|
|
41
42
|
|
|
42
43
|
const MAX_STEPS = 50;
|
|
44
|
+
const MAX_INITIAL_NO_TOOL_RECOVERIES = (() => {
|
|
45
|
+
const raw = process.env['ARCHAL_MAX_INITIAL_NO_TOOL_RECOVERIES']?.trim();
|
|
46
|
+
if (!raw) return 2;
|
|
47
|
+
const parsed = parseInt(raw, 10);
|
|
48
|
+
if (Number.isNaN(parsed) || parsed <= 0) return 2;
|
|
49
|
+
return Math.min(parsed, 5);
|
|
50
|
+
})();
|
|
43
51
|
const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
|
|
44
52
|
const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
|
|
45
53
|
|
|
@@ -96,6 +104,7 @@ let totalToolCalls = 0;
|
|
|
96
104
|
let totalToolErrors = 0;
|
|
97
105
|
let stepsCompleted = 0;
|
|
98
106
|
let exitReason = 'max_steps';
|
|
107
|
+
let initialNoToolRecoveries = 0;
|
|
99
108
|
const agentTrace = createAgentTrace();
|
|
100
109
|
|
|
101
110
|
log.info('run_start', { task: TASK.slice(0, 200), maxSteps: MAX_STEPS });
|
|
@@ -107,10 +116,19 @@ try {
|
|
|
107
116
|
|
|
108
117
|
// Call the LLM with retry on transient errors
|
|
109
118
|
log.llmCall(step + 1);
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
119
|
+
let response;
|
|
120
|
+
try {
|
|
121
|
+
response = await withRetry(
|
|
122
|
+
() => callLlmWithMessages(provider, MODEL, apiKey, messages, providerTools),
|
|
123
|
+
2,
|
|
124
|
+
);
|
|
125
|
+
} catch (err) {
|
|
126
|
+
const msg = err?.message ?? String(err);
|
|
127
|
+
log.error('llm_call_failed', { step: step + 1, error: msg });
|
|
128
|
+
process.stderr.write(`[hardened] LLM API error: ${msg.slice(0, 500)}\n`);
|
|
129
|
+
exitReason = 'llm_error';
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
114
132
|
|
|
115
133
|
const iterDurationMs = Date.now() - iterStart;
|
|
116
134
|
totalInputTokens += response.usage.inputTokens;
|
|
@@ -139,9 +157,27 @@ try {
|
|
|
139
157
|
if (text) {
|
|
140
158
|
process.stderr.write(`[hardened] Step ${step + 1}: ${text.slice(0, 200)}\n`);
|
|
141
159
|
}
|
|
142
|
-
|
|
160
|
+
const shouldRecoverInitialNoToolCall = totalToolCalls === 0
|
|
161
|
+
&& initialNoToolRecoveries < MAX_INITIAL_NO_TOOL_RECOVERIES;
|
|
162
|
+
if (shouldRecoverInitialNoToolCall) {
|
|
163
|
+
initialNoToolRecoveries++;
|
|
164
|
+
messages = appendUserInstruction(
|
|
165
|
+
provider,
|
|
166
|
+
messages,
|
|
167
|
+
'You must use tools to make progress. ' +
|
|
168
|
+
'On your next response, call at least one relevant tool before giving any summary or conclusion. ' +
|
|
169
|
+
'Start by gathering concrete evidence from the systems, then execute the required actions.',
|
|
170
|
+
);
|
|
171
|
+
log.info('no_tool_calls_reprompt', {
|
|
172
|
+
step: step + 1,
|
|
173
|
+
attempt: initialNoToolRecoveries,
|
|
174
|
+
});
|
|
175
|
+
continue;
|
|
176
|
+
}
|
|
177
|
+
exitReason = totalToolCalls === 0 ? 'no_tool_calls' : 'completed';
|
|
143
178
|
break;
|
|
144
179
|
}
|
|
180
|
+
initialNoToolRecoveries = 0;
|
|
145
181
|
|
|
146
182
|
// Execute each tool call via shared REST client
|
|
147
183
|
const results = [];
|
|
@@ -218,4 +254,8 @@ try {
|
|
|
218
254
|
`(${totalToolErrors} errors), ${totalInputTokens} input tokens, ` +
|
|
219
255
|
`${totalOutputTokens} output tokens, ${(totalTimeMs / 1000).toFixed(1)}s total\n`
|
|
220
256
|
);
|
|
257
|
+
|
|
258
|
+
if (exitReason === 'llm_error') {
|
|
259
|
+
process.exit(1);
|
|
260
|
+
}
|
|
221
261
|
}
|
|
@@ -84,7 +84,16 @@ try {
|
|
|
84
84
|
const iterStart = Date.now();
|
|
85
85
|
|
|
86
86
|
log.llmCall(step + 1);
|
|
87
|
-
|
|
87
|
+
let response;
|
|
88
|
+
try {
|
|
89
|
+
response = await callLlmWithMessages(provider, MODEL, apiKey, messages, providerTools);
|
|
90
|
+
} catch (err) {
|
|
91
|
+
const msg = err?.message ?? String(err);
|
|
92
|
+
log.error('llm_call_failed', { step: step + 1, error: msg });
|
|
93
|
+
process.stderr.write(`[naive] LLM API error: ${msg.slice(0, 500)}\n`);
|
|
94
|
+
exitReason = 'llm_error';
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
88
97
|
|
|
89
98
|
const iterDurationMs = Date.now() - iterStart;
|
|
90
99
|
totalInputTokens += response.usage.inputTokens;
|
|
@@ -102,7 +111,7 @@ try {
|
|
|
102
111
|
|
|
103
112
|
const toolCalls = parseToolCalls(provider, response);
|
|
104
113
|
if (!toolCalls) {
|
|
105
|
-
exitReason = 'no_tool_calls';
|
|
114
|
+
exitReason = totalToolCalls === 0 ? 'no_tool_calls' : 'completed';
|
|
106
115
|
break;
|
|
107
116
|
}
|
|
108
117
|
|
|
@@ -150,4 +159,7 @@ try {
|
|
|
150
159
|
`${(totalTimeMs / 1000).toFixed(1)}s total\n`
|
|
151
160
|
);
|
|
152
161
|
|
|
162
|
+
if (exitReason === 'llm_error') {
|
|
163
|
+
process.exit(1);
|
|
164
|
+
}
|
|
153
165
|
}
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* - Structured system prompt encouraging step-by-step reasoning
|
|
7
7
|
* - Error recovery with retries on transient failures
|
|
8
8
|
* - Context-aware done detection
|
|
9
|
-
* -
|
|
9
|
+
* - Configurable step limit (default 80, cap 200 via ARCHAL_MAX_STEPS)
|
|
10
10
|
* - Token usage and timing instrumentation
|
|
11
11
|
*
|
|
12
12
|
* Env vars (set by archal orchestrator):
|
|
@@ -23,6 +23,7 @@ import {
|
|
|
23
23
|
buildInitialMessages,
|
|
24
24
|
appendAssistantResponse,
|
|
25
25
|
appendToolResults,
|
|
26
|
+
appendUserInstruction,
|
|
26
27
|
callLlmWithMessages,
|
|
27
28
|
parseToolCalls,
|
|
28
29
|
getResponseText,
|
|
@@ -34,7 +35,28 @@ import { createLogger } from '../_lib/logging.mjs';
|
|
|
34
35
|
import { writeMetrics } from '../_lib/metrics.mjs';
|
|
35
36
|
import { createAgentTrace } from '../_lib/agent-trace.mjs';
|
|
36
37
|
|
|
37
|
-
const
|
|
38
|
+
const DEFAULT_MAX_STEPS = 80;
|
|
39
|
+
const MAX_STEPS = (() => {
|
|
40
|
+
const raw = process.env['ARCHAL_MAX_STEPS']?.trim();
|
|
41
|
+
if (!raw) return DEFAULT_MAX_STEPS;
|
|
42
|
+
const parsed = parseInt(raw, 10);
|
|
43
|
+
if (Number.isNaN(parsed) || parsed <= 0) return DEFAULT_MAX_STEPS;
|
|
44
|
+
return Math.min(parsed, 200);
|
|
45
|
+
})();
|
|
46
|
+
const MAX_CONSECUTIVE_ERRORS = (() => {
|
|
47
|
+
const raw = process.env['ARCHAL_MAX_CONSECUTIVE_ERRORS']?.trim();
|
|
48
|
+
if (!raw) return 8;
|
|
49
|
+
const parsed = parseInt(raw, 10);
|
|
50
|
+
if (Number.isNaN(parsed) || parsed <= 0) return 8;
|
|
51
|
+
return Math.min(parsed, 20);
|
|
52
|
+
})();
|
|
53
|
+
const MAX_INITIAL_NO_TOOL_RECOVERIES = (() => {
|
|
54
|
+
const raw = process.env['ARCHAL_MAX_INITIAL_NO_TOOL_RECOVERIES']?.trim();
|
|
55
|
+
if (!raw) return 2;
|
|
56
|
+
const parsed = parseInt(raw, 10);
|
|
57
|
+
if (Number.isNaN(parsed) || parsed <= 0) return 2;
|
|
58
|
+
return Math.min(parsed, 5);
|
|
59
|
+
})();
|
|
38
60
|
const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
|
|
39
61
|
const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
|
|
40
62
|
|
|
@@ -84,6 +106,7 @@ let totalToolCalls = 0;
|
|
|
84
106
|
let totalToolErrors = 0;
|
|
85
107
|
let stepsCompleted = 0;
|
|
86
108
|
let exitReason = 'max_steps';
|
|
109
|
+
let initialNoToolRecoveries = 0;
|
|
87
110
|
const agentTrace = createAgentTrace();
|
|
88
111
|
|
|
89
112
|
log.info('run_start', { task: TASK.slice(0, 200), maxSteps: MAX_STEPS });
|
|
@@ -95,10 +118,19 @@ try {
|
|
|
95
118
|
|
|
96
119
|
// Call the LLM with retry on transient errors
|
|
97
120
|
log.llmCall(step + 1);
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
121
|
+
let response;
|
|
122
|
+
try {
|
|
123
|
+
response = await withRetry(
|
|
124
|
+
() => callLlmWithMessages(provider, MODEL, apiKey, messages, providerTools),
|
|
125
|
+
2,
|
|
126
|
+
);
|
|
127
|
+
} catch (err) {
|
|
128
|
+
const msg = err?.message ?? String(err);
|
|
129
|
+
log.error('llm_call_failed', { step: step + 1, error: msg });
|
|
130
|
+
process.stderr.write(`[react] LLM API error: ${msg.slice(0, 500)}\n`);
|
|
131
|
+
exitReason = 'llm_error';
|
|
132
|
+
break;
|
|
133
|
+
}
|
|
102
134
|
|
|
103
135
|
const iterDurationMs = Date.now() - iterStart;
|
|
104
136
|
totalInputTokens += response.usage.inputTokens;
|
|
@@ -128,10 +160,30 @@ try {
|
|
|
128
160
|
if (text) {
|
|
129
161
|
process.stderr.write(`[react] Step ${step + 1}: ${text.slice(0, 200)}\n`);
|
|
130
162
|
}
|
|
131
|
-
|
|
132
|
-
|
|
163
|
+
const shouldRecoverInitialNoToolCall = totalToolCalls === 0
|
|
164
|
+
&& initialNoToolRecoveries < MAX_INITIAL_NO_TOOL_RECOVERIES;
|
|
165
|
+
if (shouldRecoverInitialNoToolCall) {
|
|
166
|
+
initialNoToolRecoveries++;
|
|
167
|
+
messages = appendUserInstruction(
|
|
168
|
+
provider,
|
|
169
|
+
messages,
|
|
170
|
+
'You must use tools to make progress. ' +
|
|
171
|
+
'On your next response, call at least one relevant tool before giving any summary or conclusion. ' +
|
|
172
|
+
'Start by gathering concrete evidence from the systems, then execute the required actions.',
|
|
173
|
+
);
|
|
174
|
+
log.info('no_tool_calls_reprompt', {
|
|
175
|
+
step: step + 1,
|
|
176
|
+
attempt: initialNoToolRecoveries,
|
|
177
|
+
});
|
|
178
|
+
continue;
|
|
179
|
+
}
|
|
180
|
+
// If the model still avoids tools, we're done.
|
|
181
|
+
// Distinguish genuine startup no-tool failures from normal completion
|
|
182
|
+
// after the agent already used tools in earlier turns.
|
|
183
|
+
exitReason = totalToolCalls === 0 ? 'no_tool_calls' : 'completed';
|
|
133
184
|
break;
|
|
134
185
|
}
|
|
186
|
+
initialNoToolRecoveries = 0;
|
|
135
187
|
|
|
136
188
|
// Execute each tool call via REST
|
|
137
189
|
const results = [];
|
|
@@ -154,7 +206,7 @@ try {
|
|
|
154
206
|
process.stderr.write(`[react] Tool error (${consecutiveErrors}): ${err.message}\n`);
|
|
155
207
|
|
|
156
208
|
// Bail if too many consecutive errors
|
|
157
|
-
if (consecutiveErrors >=
|
|
209
|
+
if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
|
|
158
210
|
process.stderr.write('[react] Too many consecutive tool errors — stopping.\n');
|
|
159
211
|
exitReason = 'consecutive_errors';
|
|
160
212
|
break;
|
|
@@ -171,7 +223,7 @@ try {
|
|
|
171
223
|
durationMs: iterDurationMs,
|
|
172
224
|
});
|
|
173
225
|
|
|
174
|
-
if (consecutiveErrors >=
|
|
226
|
+
if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) break;
|
|
175
227
|
|
|
176
228
|
// Append tool results to conversation
|
|
177
229
|
messages = appendToolResults(provider, messages, toolCalls, results);
|
|
@@ -209,4 +261,7 @@ try {
|
|
|
209
261
|
`${totalOutputTokens} output tokens, ${(totalTimeMs / 1000).toFixed(1)}s total\n`
|
|
210
262
|
);
|
|
211
263
|
|
|
264
|
+
if (exitReason === 'llm_error') {
|
|
265
|
+
process.exit(1);
|
|
266
|
+
}
|
|
212
267
|
}
|
|
@@ -21,6 +21,7 @@ import {
|
|
|
21
21
|
buildInitialMessages,
|
|
22
22
|
appendAssistantResponse,
|
|
23
23
|
appendToolResults,
|
|
24
|
+
appendUserInstruction,
|
|
24
25
|
callLlmWithMessages,
|
|
25
26
|
parseToolCalls,
|
|
26
27
|
getResponseText,
|
|
@@ -32,6 +33,13 @@ import { writeMetrics } from '../_lib/metrics.mjs';
|
|
|
32
33
|
import { createAgentTrace } from '../_lib/agent-trace.mjs';
|
|
33
34
|
|
|
34
35
|
const MAX_STEPS = 40;
|
|
36
|
+
const MAX_INITIAL_NO_TOOL_RECOVERIES = (() => {
|
|
37
|
+
const raw = process.env['ARCHAL_MAX_INITIAL_NO_TOOL_RECOVERIES']?.trim();
|
|
38
|
+
if (!raw) return 2;
|
|
39
|
+
const parsed = parseInt(raw, 10);
|
|
40
|
+
if (Number.isNaN(parsed) || parsed <= 0) return 2;
|
|
41
|
+
return Math.min(parsed, 5);
|
|
42
|
+
})();
|
|
35
43
|
const TASK = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
|
|
36
44
|
const MODEL = process.env['ARCHAL_ENGINE_MODEL'];
|
|
37
45
|
|
|
@@ -67,6 +75,7 @@ let totalToolCalls = 0;
|
|
|
67
75
|
let totalToolErrors = 0;
|
|
68
76
|
let stepsCompleted = 0;
|
|
69
77
|
let exitReason = 'max_steps';
|
|
78
|
+
let initialNoToolRecoveries = 0;
|
|
70
79
|
const agentTrace = createAgentTrace();
|
|
71
80
|
|
|
72
81
|
log.info('run_start', { task: TASK.slice(0, 200), maxSteps: MAX_STEPS });
|
|
@@ -77,7 +86,16 @@ try {
|
|
|
77
86
|
const iterStart = Date.now();
|
|
78
87
|
|
|
79
88
|
log.llmCall(step + 1);
|
|
80
|
-
|
|
89
|
+
let response;
|
|
90
|
+
try {
|
|
91
|
+
response = await callLlmWithMessages(provider, MODEL, apiKey, messages, providerTools);
|
|
92
|
+
} catch (err) {
|
|
93
|
+
const msg = err?.message ?? String(err);
|
|
94
|
+
log.error('llm_call_failed', { step: step + 1, error: msg });
|
|
95
|
+
process.stderr.write(`[zero-shot] LLM API error: ${msg.slice(0, 500)}\n`);
|
|
96
|
+
exitReason = 'llm_error';
|
|
97
|
+
break;
|
|
98
|
+
}
|
|
81
99
|
|
|
82
100
|
const iterDurationMs = Date.now() - iterStart;
|
|
83
101
|
totalInputTokens += response.usage.inputTokens;
|
|
@@ -103,9 +121,27 @@ try {
|
|
|
103
121
|
if (text) {
|
|
104
122
|
process.stderr.write(`[zero-shot] Step ${step + 1}: ${text.slice(0, 200)}\n`);
|
|
105
123
|
}
|
|
106
|
-
|
|
124
|
+
const shouldRecoverInitialNoToolCall = totalToolCalls === 0
|
|
125
|
+
&& initialNoToolRecoveries < MAX_INITIAL_NO_TOOL_RECOVERIES;
|
|
126
|
+
if (shouldRecoverInitialNoToolCall) {
|
|
127
|
+
initialNoToolRecoveries++;
|
|
128
|
+
messages = appendUserInstruction(
|
|
129
|
+
provider,
|
|
130
|
+
messages,
|
|
131
|
+
'You must use tools to make progress. ' +
|
|
132
|
+
'On your next response, call at least one relevant tool before giving any summary or conclusion. ' +
|
|
133
|
+
'Start by gathering concrete evidence from the systems, then execute the required actions.',
|
|
134
|
+
);
|
|
135
|
+
log.info('no_tool_calls_reprompt', {
|
|
136
|
+
step: step + 1,
|
|
137
|
+
attempt: initialNoToolRecoveries,
|
|
138
|
+
});
|
|
139
|
+
continue;
|
|
140
|
+
}
|
|
141
|
+
exitReason = totalToolCalls === 0 ? 'no_tool_calls' : 'completed';
|
|
107
142
|
break;
|
|
108
143
|
}
|
|
144
|
+
initialNoToolRecoveries = 0;
|
|
109
145
|
|
|
110
146
|
const results = [];
|
|
111
147
|
for (const tc of toolCalls) {
|
|
@@ -169,4 +205,7 @@ try {
|
|
|
169
205
|
`${totalOutputTokens} output tokens, ${(totalTimeMs / 1000).toFixed(1)}s total\n`
|
|
170
206
|
);
|
|
171
207
|
|
|
208
|
+
if (exitReason === 'llm_error') {
|
|
209
|
+
process.exit(1);
|
|
210
|
+
}
|
|
172
211
|
}
|
package/package.json
CHANGED
|
@@ -25,6 +25,12 @@
|
|
|
25
25
|
},
|
|
26
26
|
{
|
|
27
27
|
"id": 3, "nodeId": "R_kgDOBsdks3", "name": "sdk-python", "fullName": "acme-corp/sdk-python", "owner": "admin-user", "private": false, "description": "Python SDK for the ACME platform API", "fork": false, "htmlUrl": "https://github.com/acme-corp/sdk-python", "cloneUrl": "https://github.com/acme-corp/sdk-python.git", "sshUrl": "git@github.com:acme-corp/sdk-python.git", "language": "Python", "forksCount": 15, "stargazersCount": 78, "watchersCount": 78, "openIssuesCount": 3, "defaultBranch": "main", "topics": ["python", "sdk", "api-client"], "hasIssues": true, "hasProjects": false, "hasWiki": false, "hasPages": false, "archived": false, "disabled": false, "visibility": "public", "pushedAt": "2024-12-08T15:00:00Z", "license": "MIT", "allowMergeCommit": true, "allowSquashMerge": true, "allowRebaseMerge": true, "allowAutoMerge": false, "deleteBranchOnMerge": true, "createdAt": "2023-02-10T10:00:00Z", "updatedAt": "2024-12-08T15:00:00Z"
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"id": 4, "nodeId": "R_kgDOBpay04", "name": "payments-core", "fullName": "acme/payments-core", "owner": "acme", "private": true, "description": "Core payment processing service (production)", "fork": false, "htmlUrl": "https://github.com/acme/payments-core", "cloneUrl": "https://github.com/acme/payments-core.git", "sshUrl": "git@github.com:acme/payments-core.git", "language": "TypeScript", "forksCount": 0, "stargazersCount": 0, "watchersCount": 12, "openIssuesCount": 2, "defaultBranch": "main", "topics": ["payments", "fintech", "production"], "hasIssues": true, "hasProjects": true, "hasWiki": false, "hasPages": false, "archived": false, "disabled": false, "visibility": "private", "pushedAt": "2024-12-10T20:00:00Z", "license": "UNLICENSED", "allowMergeCommit": false, "allowSquashMerge": true, "allowRebaseMerge": false, "allowAutoMerge": false, "deleteBranchOnMerge": true, "createdAt": "2021-06-01T10:00:00Z", "updatedAt": "2024-12-10T20:00:00Z"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"id": 5, "nodeId": "R_kgDOBmir05", "name": "payments-core-mirror", "fullName": "acme/payments-core-mirror", "owner": "acme", "private": false, "description": "Public mirror of payments-core (read-only, sync after review)", "fork": false, "htmlUrl": "https://github.com/acme/payments-core-mirror", "cloneUrl": "https://github.com/acme/payments-core-mirror.git", "sshUrl": "git@github.com:acme/payments-core-mirror.git", "language": "TypeScript", "forksCount": 3, "stargazersCount": 8, "watchersCount": 8, "openIssuesCount": 0, "defaultBranch": "main", "topics": ["payments", "fintech", "mirror"], "hasIssues": false, "hasProjects": false, "hasWiki": false, "hasPages": false, "archived": false, "disabled": false, "visibility": "public", "pushedAt": "2024-12-09T15:00:00Z", "license": "UNLICENSED", "allowMergeCommit": false, "allowSquashMerge": true, "allowRebaseMerge": false, "allowAutoMerge": false, "deleteBranchOnMerge": true, "createdAt": "2021-06-15T10:00:00Z", "updatedAt": "2024-12-09T15:00:00Z"
|
|
28
34
|
}
|
|
29
35
|
],
|
|
30
36
|
"branches": [
|
|
@@ -38,7 +44,10 @@
|
|
|
38
44
|
{ "id": 8, "repoId": 2, "name": "main", "commitSha": "bbc8888888888888888888888888888888888888", "protected": true, "createdAt": "2022-03-01T10:00:00Z", "updatedAt": "2024-12-09T12:00:00Z" },
|
|
39
45
|
{ "id": 9, "repoId": 2, "name": "feature/api-v3-docs", "commitSha": "ccd9999999999999999999999999999999999999", "protected": false, "createdAt": "2024-12-05T10:00:00Z", "updatedAt": "2024-12-09T11:00:00Z" },
|
|
40
46
|
{ "id": 10, "repoId": 3, "name": "main", "commitSha": "dde0000000000000000000000000000000000000", "protected": true, "createdAt": "2023-02-10T10:00:00Z", "updatedAt": "2024-12-08T15:00:00Z" },
|
|
41
|
-
{ "id": 11, "repoId": 3, "name": "feature/async-client", "commitSha": "eef1111111111111111111111111111111111111", "protected": false, "createdAt": "2024-12-01T10:00:00Z", "updatedAt": "2024-12-08T14:00:00Z" }
|
|
47
|
+
{ "id": 11, "repoId": 3, "name": "feature/async-client", "commitSha": "eef1111111111111111111111111111111111111", "protected": false, "createdAt": "2024-12-01T10:00:00Z", "updatedAt": "2024-12-08T14:00:00Z" },
|
|
48
|
+
{ "id": 12, "repoId": 4, "name": "main", "commitSha": "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2", "protected": true, "createdAt": "2021-06-01T10:00:00Z", "updatedAt": "2024-12-10T20:00:00Z" },
|
|
49
|
+
{ "id": 13, "repoId": 4, "name": "develop", "commitSha": "b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3", "protected": false, "createdAt": "2021-06-01T10:00:00Z", "updatedAt": "2024-12-10T19:00:00Z" },
|
|
50
|
+
{ "id": 14, "repoId": 5, "name": "main", "commitSha": "c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4", "protected": true, "createdAt": "2021-06-15T10:00:00Z", "updatedAt": "2024-12-09T15:00:00Z" }
|
|
42
51
|
],
|
|
43
52
|
"commits": [
|
|
44
53
|
{ "id": 1, "repoId": 1, "sha": "aaa1111111111111111111111111111111111111", "nodeId": "C_kwDOBc0001", "message": "chore: merge release/v2.4 to main", "authorLogin": "admin-user", "authorName": "Admin User", "authorEmail": "admin@acme-corp.com", "committerLogin": "web-flow", "committerName": "GitHub", "committerEmail": "noreply@github.com", "branchName": "main", "parentShas": [], "treeUrl": "https://api.github.com/repos/acme-corp/platform/git/trees/aaa111", "htmlUrl": "https://github.com/acme-corp/platform/commit/aaa111", "verified": true, "createdAt": "2024-12-10T18:00:00Z", "updatedAt": "2024-12-10T18:00:00Z" },
|
|
@@ -98,7 +107,10 @@
|
|
|
98
107
|
{ "id": 1, "repoId": 1, "branchName": "main", "path": "README.md", "content": "# ACME Platform\n\nMain monorepo for the ACME Corp platform.\n\n## Architecture\n\n- `/apps` - Application packages\n- `/packages` - Shared libraries\n- `/infrastructure` - Terraform and k8s configs\n\n## Getting Started\n\n```bash\npnpm install\npnpm dev\n```\n\n## License\n\nProprietary\n", "encoding": "utf-8", "sha": "aa11bb22cc33dd44ee55ff6677889900aabbccdd", "size": 280, "type": "file", "createdAt": "2022-01-15T10:00:00Z", "updatedAt": "2024-12-01T10:00:00Z" },
|
|
99
108
|
{ "id": 2, "repoId": 1, "branchName": "main", "path": "package.json", "content": "{\n \"name\": \"@acme/platform\",\n \"private\": true,\n \"workspaces\": [\"apps/*\", \"packages/*\"],\n \"scripts\": {\n \"dev\": \"turbo dev\",\n \"build\": \"turbo build\",\n \"test\": \"turbo test\",\n \"lint\": \"turbo lint\"\n },\n \"devDependencies\": {\n \"turbo\": \"^2.0.0\"\n }\n}", "encoding": "utf-8", "sha": "bb22cc33dd44ee55ff6677889900aabbccddee11", "size": 270, "type": "file", "createdAt": "2022-01-15T10:00:00Z", "updatedAt": "2024-11-01T10:00:00Z" },
|
|
100
109
|
{ "id": 3, "repoId": 2, "branchName": "main", "path": "README.md", "content": "# ACME Corp Documentation\n\nPublic documentation for ACME Corp products and APIs.\n\n## Contributing\n\nSee CONTRIBUTING.md for guidelines.\n", "encoding": "utf-8", "sha": "cc33dd44ee55ff6677889900aabbccddeeff1122", "size": 140, "type": "file", "createdAt": "2022-03-01T10:00:00Z", "updatedAt": "2024-12-09T12:00:00Z" },
|
|
101
|
-
{ "id": 4, "repoId": 3, "branchName": "main", "path": "README.md", "content": "# ACME Python SDK\n\nPython client for the ACME Platform API.\n\n```python\nimport acme\n\nclient = acme.Client(api_key=\"your-key\")\nresult = client.query(\"SELECT * FROM data\")\n```\n\n## Installation\n\n```bash\npip install acme-sdk\n```\n", "encoding": "utf-8", "sha": "dd44ee55ff6677889900aabbccddeeff11223344", "size": 220, "type": "file", "createdAt": "2023-02-10T10:00:00Z", "updatedAt": "2024-12-08T15:00:00Z" }
|
|
110
|
+
{ "id": 4, "repoId": 3, "branchName": "main", "path": "README.md", "content": "# ACME Python SDK\n\nPython client for the ACME Platform API.\n\n```python\nimport acme\n\nclient = acme.Client(api_key=\"your-key\")\nresult = client.query(\"SELECT * FROM data\")\n```\n\n## Installation\n\n```bash\npip install acme-sdk\n```\n", "encoding": "utf-8", "sha": "dd44ee55ff6677889900aabbccddeeff11223344", "size": 220, "type": "file", "createdAt": "2023-02-10T10:00:00Z", "updatedAt": "2024-12-08T15:00:00Z" },
|
|
111
|
+
{ "id": 5, "repoId": 4, "branchName": "main", "path": "CODEOWNERS", "content": "# CODEOWNERS for acme/payments-core\n#\n# Security fixes MUST originate in this private production repository.\n# Mirror sync to payments-core-mirror is docs-only and requires separate review.\n# Do NOT apply patches directly to the mirror.\n\n* @acme/security-team\n/src/auth/ @acme/security-team @kai.nakamura\n/src/payments/ @acme/payments-team\n", "encoding": "utf-8", "sha": "ee55ff6677889900aabbccddeeff1122334455ee", "size": 380, "type": "file", "createdAt": "2021-06-01T10:00:00Z", "updatedAt": "2024-11-01T10:00:00Z" },
|
|
112
|
+
{ "id": 6, "repoId": 4, "branchName": "main", "path": "README.md", "content": "# payments-core\n\nProduction payment processing service. **Private repository.**\n\n## Security Policy\n\nAll security fixes must be applied to this repository first.\nMirror sync to `payments-core-mirror` is read-only and requires separate review approval.\n\nSee CODEOWNERS for review requirements.\n", "encoding": "utf-8", "sha": "ff6677889900aabbccddeeff1122334455eeff66", "size": 280, "type": "file", "createdAt": "2021-06-01T10:00:00Z", "updatedAt": "2024-11-01T10:00:00Z" },
|
|
113
|
+
{ "id": 7, "repoId": 5, "branchName": "main", "path": "README.md", "content": "# payments-core-mirror\n\nPublic mirror of the private `acme/payments-core` repository.\n\n**Read-only.** Do not submit patches here. All changes must go through the private production repo first.\n", "encoding": "utf-8", "sha": "667788990000aabbccddeeff1122334455eeff77", "size": 210, "type": "file", "createdAt": "2021-06-15T10:00:00Z", "updatedAt": "2024-12-09T15:00:00Z" }
|
|
102
114
|
],
|
|
103
115
|
"workflows": [
|
|
104
116
|
{ "id": 1, "repoId": 1, "nodeId": "W_kwDOBew001", "name": "CI/CD Pipeline", "path": ".github/workflows/ci.yml", "state": "active", "htmlUrl": "https://github.com/acme-corp/platform/actions/workflows/ci.yml", "badgeUrl": "https://github.com/acme-corp/platform/workflows/CI/badge.svg", "createdAt": "2022-01-15T10:00:00Z", "updatedAt": "2024-06-01T10:00:00Z" },
|