helloloop 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/.codex-plugin/plugin.json +4 -4
- package/README.md +48 -11
- package/hosts/claude/marketplace/plugins/helloloop/.claude-plugin/plugin.json +1 -1
- package/hosts/claude/marketplace/plugins/helloloop/commands/helloloop.md +5 -4
- package/hosts/claude/marketplace/plugins/helloloop/skills/helloloop/SKILL.md +4 -2
- package/hosts/gemini/extension/GEMINI.md +3 -1
- package/hosts/gemini/extension/commands/helloloop.toml +5 -4
- package/hosts/gemini/extension/gemini-extension.json +1 -1
- package/package.json +1 -1
- package/skills/helloloop/SKILL.md +4 -3
- package/src/config.mjs +9 -8
- package/src/discovery.mjs +21 -2
- package/src/email_notification.mjs +343 -0
- package/src/engine_process_support.mjs +294 -0
- package/src/engine_selection_settings.mjs +75 -9
- package/src/global_config.mjs +21 -0
- package/src/install_shared.mjs +50 -2
- package/src/process.mjs +452 -428
- package/src/runner_execute_task.mjs +20 -66
- package/src/runner_execution_support.mjs +0 -9
- package/src/runtime_recovery.mjs +61 -60
- package/templates/policy.template.json +3 -5
package/src/process.mjs
CHANGED
|
@@ -1,11 +1,20 @@
|
|
|
1
1
|
import fs from "node:fs";
|
|
2
2
|
import path from "node:path";
|
|
3
|
-
import { spawn } from "node:child_process";
|
|
4
3
|
|
|
5
4
|
import { ensureDir, nowIso, tailText, writeJson, writeText } from "./common.mjs";
|
|
6
5
|
import { getEngineDisplayName, normalizeEngineName } from "./engine_metadata.mjs";
|
|
7
|
-
import { resolveCliInvocation, resolveCodexInvocation, resolveVerifyShellInvocation } from "./shell_invocation.mjs";
|
|
8
6
|
import {
|
|
7
|
+
buildClaudeArgs,
|
|
8
|
+
buildCodexArgs,
|
|
9
|
+
buildGeminiArgs,
|
|
10
|
+
resolveEngineInvocation,
|
|
11
|
+
resolveVerifyInvocation,
|
|
12
|
+
runChild,
|
|
13
|
+
} from "./engine_process_support.mjs";
|
|
14
|
+
import { sendRuntimeStopNotification } from "./email_notification.mjs";
|
|
15
|
+
import { loadGlobalConfig } from "./global_config.mjs";
|
|
16
|
+
import {
|
|
17
|
+
buildEngineHealthProbePrompt,
|
|
9
18
|
buildRuntimeRecoveryPrompt,
|
|
10
19
|
classifyRuntimeRecoveryFailure,
|
|
11
20
|
renderRuntimeRecoverySummary,
|
|
@@ -30,154 +39,6 @@ function createRuntimeStatusWriter(runtimeStatusFile, baseState) {
|
|
|
30
39
|
};
|
|
31
40
|
}
|
|
32
41
|
|
|
33
|
-
function runChild(command, args, options = {}) {
|
|
34
|
-
return new Promise((resolve) => {
|
|
35
|
-
const child = spawn(command, args, {
|
|
36
|
-
cwd: options.cwd,
|
|
37
|
-
env: {
|
|
38
|
-
...process.env,
|
|
39
|
-
...(options.env || {}),
|
|
40
|
-
},
|
|
41
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
42
|
-
shell: Boolean(options.shell),
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
let stdout = "";
|
|
46
|
-
let stderr = "";
|
|
47
|
-
let stdoutBytes = 0;
|
|
48
|
-
let stderrBytes = 0;
|
|
49
|
-
const startedAt = Date.now();
|
|
50
|
-
let lastOutputAt = startedAt;
|
|
51
|
-
let watchdogTriggered = false;
|
|
52
|
-
let watchdogReason = "";
|
|
53
|
-
let stallWarned = false;
|
|
54
|
-
let killTimer = null;
|
|
55
|
-
|
|
56
|
-
const emitHeartbeat = (status, extra = {}) => {
|
|
57
|
-
options.onHeartbeat?.({
|
|
58
|
-
status,
|
|
59
|
-
pid: child.pid ?? null,
|
|
60
|
-
startedAt: new Date(startedAt).toISOString(),
|
|
61
|
-
lastOutputAt: new Date(lastOutputAt).toISOString(),
|
|
62
|
-
stdoutBytes,
|
|
63
|
-
stderrBytes,
|
|
64
|
-
idleSeconds: Math.max(0, Math.floor((Date.now() - lastOutputAt) / 1000)),
|
|
65
|
-
watchdogTriggered,
|
|
66
|
-
watchdogReason,
|
|
67
|
-
...extra,
|
|
68
|
-
});
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
const heartbeatIntervalMs = Math.max(100, Number(options.heartbeatIntervalMs || 0));
|
|
72
|
-
const stallWarningMs = Math.max(0, Number(options.stallWarningMs || 0));
|
|
73
|
-
const maxIdleMs = Math.max(0, Number(options.maxIdleMs || 0));
|
|
74
|
-
const killGraceMs = Math.max(100, Number(options.killGraceMs || 1000));
|
|
75
|
-
|
|
76
|
-
const heartbeatTimer = heartbeatIntervalMs > 0
|
|
77
|
-
? setInterval(() => {
|
|
78
|
-
const idleMs = Date.now() - lastOutputAt;
|
|
79
|
-
if (stallWarningMs > 0 && idleMs >= stallWarningMs && !stallWarned) {
|
|
80
|
-
stallWarned = true;
|
|
81
|
-
emitHeartbeat("suspected_stall", {
|
|
82
|
-
message: `当前子进程已连续 ${Math.floor(idleMs / 1000)} 秒没有可见输出,继续观察。`,
|
|
83
|
-
});
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
if (maxIdleMs > 0 && idleMs >= maxIdleMs && !watchdogTriggered) {
|
|
87
|
-
watchdogTriggered = true;
|
|
88
|
-
watchdogReason = `当前子进程已连续 ${Math.floor(idleMs / 1000)} 秒没有可见输出。`;
|
|
89
|
-
stderr = [
|
|
90
|
-
stderr.trim(),
|
|
91
|
-
`[HelloLoop watchdog] ${watchdogReason}`,
|
|
92
|
-
].filter(Boolean).join("\n");
|
|
93
|
-
emitHeartbeat("watchdog_terminating", {
|
|
94
|
-
message: "已达到无人值守恢复阈值,准备终止当前子进程并发起同引擎恢复。",
|
|
95
|
-
});
|
|
96
|
-
child.kill();
|
|
97
|
-
killTimer = setTimeout(() => {
|
|
98
|
-
child.kill("SIGKILL");
|
|
99
|
-
}, killGraceMs);
|
|
100
|
-
return;
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
emitHeartbeat(watchdogTriggered ? "watchdog_waiting" : "running");
|
|
104
|
-
}, heartbeatIntervalMs)
|
|
105
|
-
: null;
|
|
106
|
-
|
|
107
|
-
emitHeartbeat("running");
|
|
108
|
-
|
|
109
|
-
child.stdout.on("data", (chunk) => {
|
|
110
|
-
stdout += chunk.toString();
|
|
111
|
-
stdoutBytes += chunk.length;
|
|
112
|
-
lastOutputAt = Date.now();
|
|
113
|
-
stallWarned = false;
|
|
114
|
-
emitHeartbeat("running");
|
|
115
|
-
});
|
|
116
|
-
child.stderr.on("data", (chunk) => {
|
|
117
|
-
stderr += chunk.toString();
|
|
118
|
-
stderrBytes += chunk.length;
|
|
119
|
-
lastOutputAt = Date.now();
|
|
120
|
-
stallWarned = false;
|
|
121
|
-
emitHeartbeat("running");
|
|
122
|
-
});
|
|
123
|
-
|
|
124
|
-
if (options.stdin) {
|
|
125
|
-
child.stdin.write(options.stdin);
|
|
126
|
-
}
|
|
127
|
-
child.stdin.end();
|
|
128
|
-
|
|
129
|
-
child.on("error", (error) => {
|
|
130
|
-
if (heartbeatTimer) {
|
|
131
|
-
clearInterval(heartbeatTimer);
|
|
132
|
-
}
|
|
133
|
-
if (killTimer) {
|
|
134
|
-
clearTimeout(killTimer);
|
|
135
|
-
}
|
|
136
|
-
emitHeartbeat("failed", {
|
|
137
|
-
code: 1,
|
|
138
|
-
signal: "",
|
|
139
|
-
});
|
|
140
|
-
resolve({
|
|
141
|
-
ok: false,
|
|
142
|
-
code: 1,
|
|
143
|
-
stdout,
|
|
144
|
-
stderr: String(error?.stack || error || ""),
|
|
145
|
-
signal: "",
|
|
146
|
-
startedAt: new Date(startedAt).toISOString(),
|
|
147
|
-
finishedAt: nowIso(),
|
|
148
|
-
idleTimeout: watchdogTriggered,
|
|
149
|
-
watchdogTriggered,
|
|
150
|
-
watchdogReason,
|
|
151
|
-
});
|
|
152
|
-
});
|
|
153
|
-
|
|
154
|
-
child.on("close", (code, signal) => {
|
|
155
|
-
if (heartbeatTimer) {
|
|
156
|
-
clearInterval(heartbeatTimer);
|
|
157
|
-
}
|
|
158
|
-
if (killTimer) {
|
|
159
|
-
clearTimeout(killTimer);
|
|
160
|
-
}
|
|
161
|
-
emitHeartbeat(code === 0 ? "completed" : "failed", {
|
|
162
|
-
code: code ?? 1,
|
|
163
|
-
signal: signal || "",
|
|
164
|
-
});
|
|
165
|
-
resolve({
|
|
166
|
-
ok: code === 0,
|
|
167
|
-
code: code ?? 1,
|
|
168
|
-
stdout,
|
|
169
|
-
stderr,
|
|
170
|
-
signal: signal || "",
|
|
171
|
-
startedAt: new Date(startedAt).toISOString(),
|
|
172
|
-
finishedAt: nowIso(),
|
|
173
|
-
idleTimeout: watchdogTriggered,
|
|
174
|
-
watchdogTriggered,
|
|
175
|
-
watchdogReason,
|
|
176
|
-
});
|
|
177
|
-
});
|
|
178
|
-
});
|
|
179
|
-
}
|
|
180
|
-
|
|
181
42
|
function writeEngineRunArtifacts(runDir, prefix, result, finalMessage) {
|
|
182
43
|
writeText(path.join(runDir, `${prefix}-stdout.log`), result.stdout);
|
|
183
44
|
writeText(path.join(runDir, `${prefix}-stderr.log`), result.stderr);
|
|
@@ -190,151 +51,258 @@ function writeEngineRunArtifacts(runDir, prefix, result, finalMessage) {
|
|
|
190
51
|
].join("\n"));
|
|
191
52
|
}
|
|
192
53
|
|
|
193
|
-
function
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
54
|
+
function resolveEnginePolicy(policy = {}, engine) {
|
|
55
|
+
if (engine === "codex") {
|
|
56
|
+
return policy.codex || {};
|
|
57
|
+
}
|
|
58
|
+
if (engine === "claude") {
|
|
59
|
+
return policy.claude || {};
|
|
60
|
+
}
|
|
61
|
+
if (engine === "gemini") {
|
|
62
|
+
return policy.gemini || {};
|
|
63
|
+
}
|
|
64
|
+
return {};
|
|
197
65
|
}
|
|
198
66
|
|
|
199
|
-
function
|
|
200
|
-
|
|
201
|
-
|
|
67
|
+
function buildEngineArgs({
|
|
68
|
+
engine,
|
|
69
|
+
context,
|
|
70
|
+
resolvedPolicy,
|
|
71
|
+
executionMode,
|
|
72
|
+
outputSchemaFile,
|
|
73
|
+
ephemeral,
|
|
74
|
+
skipGitRepoCheck,
|
|
75
|
+
lastMessageFile,
|
|
76
|
+
probeMode = false,
|
|
77
|
+
}) {
|
|
202
78
|
if (engine === "codex") {
|
|
203
|
-
return
|
|
79
|
+
return buildCodexArgs({
|
|
80
|
+
context,
|
|
81
|
+
model: resolvedPolicy.model,
|
|
82
|
+
sandbox: resolvedPolicy.sandbox,
|
|
83
|
+
dangerouslyBypassSandbox: resolvedPolicy.dangerouslyBypassSandbox,
|
|
84
|
+
jsonOutput: probeMode ? false : (resolvedPolicy.jsonOutput !== false),
|
|
85
|
+
outputSchemaFile: probeMode ? "" : outputSchemaFile,
|
|
86
|
+
ephemeral,
|
|
87
|
+
skipGitRepoCheck,
|
|
88
|
+
lastMessageFile,
|
|
89
|
+
});
|
|
204
90
|
}
|
|
205
91
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
displayName: "Gemini",
|
|
214
|
-
},
|
|
215
|
-
}[engine];
|
|
216
|
-
|
|
217
|
-
if (!meta) {
|
|
218
|
-
return {
|
|
219
|
-
command: "",
|
|
220
|
-
argsPrefix: [],
|
|
221
|
-
shell: false,
|
|
222
|
-
error: `不支持的执行引擎:${engine}`,
|
|
223
|
-
};
|
|
92
|
+
if (engine === "claude") {
|
|
93
|
+
return buildClaudeArgs({
|
|
94
|
+
model: resolvedPolicy.model,
|
|
95
|
+
outputSchemaFile: probeMode ? "" : outputSchemaFile,
|
|
96
|
+
executionMode: probeMode ? "execute" : executionMode,
|
|
97
|
+
policy: resolvedPolicy,
|
|
98
|
+
});
|
|
224
99
|
}
|
|
225
100
|
|
|
226
|
-
return
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
101
|
+
return buildGeminiArgs({
|
|
102
|
+
model: resolvedPolicy.model,
|
|
103
|
+
executionMode: probeMode ? "execute" : executionMode,
|
|
104
|
+
policy: resolvedPolicy,
|
|
230
105
|
});
|
|
231
106
|
}
|
|
232
107
|
|
|
233
|
-
function
|
|
108
|
+
function readEngineFinalMessage(engine, lastMessageFile, result) {
|
|
109
|
+
if (engine === "codex") {
|
|
110
|
+
return fs.existsSync(lastMessageFile)
|
|
111
|
+
? fs.readFileSync(lastMessageFile, "utf8").trim()
|
|
112
|
+
: "";
|
|
113
|
+
}
|
|
114
|
+
return String(result.stdout || "").trim();
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
async function runEngineAttempt({
|
|
118
|
+
engine,
|
|
119
|
+
invocation,
|
|
234
120
|
context,
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
121
|
+
prompt,
|
|
122
|
+
runDir,
|
|
123
|
+
attemptPrefix,
|
|
124
|
+
resolvedPolicy,
|
|
125
|
+
executionMode,
|
|
126
|
+
outputSchemaFile,
|
|
127
|
+
env,
|
|
128
|
+
recoveryPolicy,
|
|
129
|
+
writeRuntimeStatus,
|
|
130
|
+
recoveryCount,
|
|
131
|
+
recoveryHistory,
|
|
240
132
|
ephemeral = false,
|
|
241
133
|
skipGitRepoCheck = false,
|
|
242
|
-
|
|
134
|
+
probeMode = false,
|
|
243
135
|
}) {
|
|
244
|
-
const
|
|
136
|
+
const attemptPromptFile = path.join(runDir, `${attemptPrefix}-prompt.md`);
|
|
137
|
+
const attemptLastMessageFile = path.join(runDir, `${attemptPrefix}-last-message.txt`);
|
|
245
138
|
|
|
246
|
-
if (
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
139
|
+
if (invocation.error) {
|
|
140
|
+
const result = {
|
|
141
|
+
ok: false,
|
|
142
|
+
code: 1,
|
|
143
|
+
stdout: "",
|
|
144
|
+
stderr: invocation.error,
|
|
145
|
+
signal: "",
|
|
146
|
+
startedAt: nowIso(),
|
|
147
|
+
finishedAt: nowIso(),
|
|
148
|
+
idleTimeout: false,
|
|
149
|
+
watchdogTriggered: false,
|
|
150
|
+
watchdogReason: "",
|
|
151
|
+
};
|
|
152
|
+
writeText(attemptPromptFile, prompt);
|
|
153
|
+
writeEngineRunArtifacts(runDir, attemptPrefix, result, "");
|
|
154
|
+
return {
|
|
155
|
+
result,
|
|
156
|
+
finalMessage: "",
|
|
157
|
+
attemptPrefix,
|
|
158
|
+
};
|
|
265
159
|
}
|
|
266
|
-
codexArgs.push("-o", lastMessageFile, "-");
|
|
267
|
-
return codexArgs;
|
|
268
|
-
}
|
|
269
160
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
:
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
"--permission-mode",
|
|
284
|
-
executionMode === "analyze"
|
|
285
|
-
? (policy.analysisPermissionMode || "plan")
|
|
286
|
-
: (policy.permissionMode || "bypassPermissions"),
|
|
287
|
-
"--no-session-persistence",
|
|
161
|
+
const finalArgs = [
|
|
162
|
+
...invocation.argsPrefix,
|
|
163
|
+
...buildEngineArgs({
|
|
164
|
+
engine,
|
|
165
|
+
context,
|
|
166
|
+
resolvedPolicy,
|
|
167
|
+
executionMode,
|
|
168
|
+
outputSchemaFile,
|
|
169
|
+
ephemeral,
|
|
170
|
+
skipGitRepoCheck,
|
|
171
|
+
lastMessageFile: attemptLastMessageFile,
|
|
172
|
+
probeMode,
|
|
173
|
+
}),
|
|
288
174
|
];
|
|
289
175
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
176
|
+
writeRuntimeStatus(probeMode ? "probe_running" : (recoveryCount > 0 ? "recovering" : "running"), {
|
|
177
|
+
attemptPrefix,
|
|
178
|
+
recoveryCount,
|
|
179
|
+
recoveryHistory,
|
|
180
|
+
});
|
|
293
181
|
|
|
294
|
-
const
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
182
|
+
const result = await runChild(invocation.command, finalArgs, {
|
|
183
|
+
cwd: context.repoRoot,
|
|
184
|
+
stdin: prompt,
|
|
185
|
+
env,
|
|
186
|
+
shell: invocation.shell,
|
|
187
|
+
heartbeatIntervalMs: recoveryPolicy.heartbeatIntervalSeconds * 1000,
|
|
188
|
+
stallWarningMs: recoveryPolicy.stallWarningSeconds * 1000,
|
|
189
|
+
maxIdleMs: recoveryPolicy.maxIdleSeconds * 1000,
|
|
190
|
+
killGraceMs: recoveryPolicy.killGraceSeconds * 1000,
|
|
191
|
+
onHeartbeat(payload) {
|
|
192
|
+
writeRuntimeStatus(payload.status, {
|
|
193
|
+
attemptPrefix,
|
|
194
|
+
recoveryCount,
|
|
195
|
+
recoveryHistory,
|
|
196
|
+
heartbeat: payload,
|
|
197
|
+
});
|
|
198
|
+
},
|
|
199
|
+
});
|
|
200
|
+
const finalMessage = readEngineFinalMessage(engine, attemptLastMessageFile, result);
|
|
298
201
|
|
|
299
|
-
|
|
202
|
+
writeText(attemptPromptFile, prompt);
|
|
203
|
+
writeEngineRunArtifacts(runDir, attemptPrefix, result, finalMessage);
|
|
204
|
+
|
|
205
|
+
return {
|
|
206
|
+
result,
|
|
207
|
+
finalMessage,
|
|
208
|
+
attemptPrefix,
|
|
209
|
+
};
|
|
300
210
|
}
|
|
301
211
|
|
|
302
|
-
function
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
212
|
+
async function runEngineHealthProbe({
|
|
213
|
+
engine,
|
|
214
|
+
invocation,
|
|
215
|
+
context,
|
|
216
|
+
runDir,
|
|
217
|
+
resolvedPolicy,
|
|
218
|
+
recoveryPolicy,
|
|
219
|
+
writeRuntimeStatus,
|
|
220
|
+
recoveryCount,
|
|
221
|
+
recoveryHistory,
|
|
222
|
+
env,
|
|
223
|
+
probeIndex,
|
|
306
224
|
}) {
|
|
307
|
-
const
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
225
|
+
const probePrompt = buildEngineHealthProbePrompt(engine);
|
|
226
|
+
const attemptPrefix = `${engine}-probe-${String(probeIndex).padStart(2, "0")}`;
|
|
227
|
+
writeRuntimeStatus("probe_waiting", {
|
|
228
|
+
attemptPrefix,
|
|
229
|
+
recoveryCount,
|
|
230
|
+
recoveryHistory,
|
|
231
|
+
});
|
|
232
|
+
const attempt = await runEngineAttempt({
|
|
233
|
+
engine,
|
|
234
|
+
invocation,
|
|
235
|
+
context,
|
|
236
|
+
prompt: probePrompt,
|
|
237
|
+
runDir,
|
|
238
|
+
attemptPrefix,
|
|
239
|
+
resolvedPolicy,
|
|
240
|
+
executionMode: "execute",
|
|
241
|
+
outputSchemaFile: "",
|
|
242
|
+
env,
|
|
243
|
+
recoveryPolicy: {
|
|
244
|
+
...recoveryPolicy,
|
|
245
|
+
maxIdleSeconds: recoveryPolicy.healthProbeTimeoutSeconds,
|
|
246
|
+
},
|
|
247
|
+
writeRuntimeStatus,
|
|
248
|
+
recoveryCount,
|
|
249
|
+
recoveryHistory,
|
|
250
|
+
ephemeral: true,
|
|
251
|
+
skipGitRepoCheck: true,
|
|
252
|
+
probeMode: true,
|
|
253
|
+
});
|
|
319
254
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
255
|
+
return {
|
|
256
|
+
...attempt,
|
|
257
|
+
failure: classifyRuntimeRecoveryFailure({
|
|
258
|
+
result: {
|
|
259
|
+
...attempt.result,
|
|
260
|
+
finalMessage: attempt.finalMessage,
|
|
261
|
+
},
|
|
262
|
+
}),
|
|
263
|
+
};
|
|
264
|
+
}
|
|
323
265
|
|
|
324
|
-
|
|
266
|
+
async function maybeSendStopNotification({
|
|
267
|
+
context,
|
|
268
|
+
runDir,
|
|
269
|
+
engine,
|
|
270
|
+
executionMode,
|
|
271
|
+
failure,
|
|
272
|
+
result,
|
|
273
|
+
recoveryHistory,
|
|
274
|
+
}) {
|
|
275
|
+
try {
|
|
276
|
+
return await sendRuntimeStopNotification({
|
|
277
|
+
globalConfig: loadGlobalConfig(),
|
|
278
|
+
context,
|
|
279
|
+
engine: getEngineDisplayName(engine),
|
|
280
|
+
phase: executionMode === "analyze" ? "分析/复核" : "执行",
|
|
281
|
+
failure,
|
|
282
|
+
result,
|
|
283
|
+
recoveryHistory,
|
|
284
|
+
runDir,
|
|
285
|
+
});
|
|
286
|
+
} catch (error) {
|
|
287
|
+
return {
|
|
288
|
+
attempted: true,
|
|
289
|
+
delivered: false,
|
|
290
|
+
reason: String(error?.message || error || "邮件发送失败。"),
|
|
291
|
+
};
|
|
292
|
+
}
|
|
325
293
|
}
|
|
326
294
|
|
|
327
|
-
function
|
|
328
|
-
if (
|
|
329
|
-
return
|
|
295
|
+
function buildNotificationNote(notificationResult) {
|
|
296
|
+
if (!notificationResult) {
|
|
297
|
+
return "";
|
|
330
298
|
}
|
|
331
|
-
if (
|
|
332
|
-
return
|
|
299
|
+
if (notificationResult.delivered) {
|
|
300
|
+
return `告警邮件已发送:${(notificationResult.recipients || []).join(", ")}`;
|
|
333
301
|
}
|
|
334
|
-
if (
|
|
335
|
-
return
|
|
302
|
+
if (notificationResult.attempted) {
|
|
303
|
+
return `告警邮件发送失败:${notificationResult.reason || "未知原因"}`;
|
|
336
304
|
}
|
|
337
|
-
return {}
|
|
305
|
+
return `未发送告警邮件:${notificationResult.reason || "未启用"}`;
|
|
338
306
|
}
|
|
339
307
|
|
|
340
308
|
export async function runEngineTask({
|
|
@@ -363,203 +331,259 @@ export async function runEngineTask({
|
|
|
363
331
|
engineDisplayName: getEngineDisplayName(normalizedEngine),
|
|
364
332
|
phase: executionMode,
|
|
365
333
|
outputPrefix: prefix,
|
|
366
|
-
|
|
334
|
+
hardRetryBudget: recoveryPolicy.hardRetryDelaysSeconds.length,
|
|
335
|
+
softRetryBudget: recoveryPolicy.softRetryDelaysSeconds.length,
|
|
367
336
|
});
|
|
368
337
|
|
|
369
|
-
let args = [];
|
|
370
|
-
if (normalizedEngine === "claude") {
|
|
371
|
-
args = buildClaudeArgs({
|
|
372
|
-
model: resolvedPolicy.model,
|
|
373
|
-
outputSchemaFile,
|
|
374
|
-
executionMode,
|
|
375
|
-
policy: resolvedPolicy,
|
|
376
|
-
});
|
|
377
|
-
} else if (normalizedEngine === "gemini") {
|
|
378
|
-
args = buildGeminiArgs({
|
|
379
|
-
model: resolvedPolicy.model,
|
|
380
|
-
executionMode,
|
|
381
|
-
policy: resolvedPolicy,
|
|
382
|
-
});
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
if (invocation.error) {
|
|
386
|
-
const result = {
|
|
387
|
-
ok: false,
|
|
388
|
-
code: 1,
|
|
389
|
-
stdout: "",
|
|
390
|
-
stderr: invocation.error,
|
|
391
|
-
};
|
|
392
|
-
writeText(path.join(runDir, `${prefix}-prompt.md`), prompt);
|
|
393
|
-
writeEngineRunArtifacts(runDir, prefix, result, "");
|
|
394
|
-
writeRuntimeStatus("failed", {
|
|
395
|
-
code: result.code,
|
|
396
|
-
message: invocation.error,
|
|
397
|
-
recoveryCount: 0,
|
|
398
|
-
recoveryHistory: [],
|
|
399
|
-
});
|
|
400
|
-
return { ...result, finalMessage: "" };
|
|
401
|
-
}
|
|
402
|
-
|
|
403
338
|
const recoveryHistory = [];
|
|
404
339
|
let currentPrompt = prompt;
|
|
405
340
|
let currentRecoveryCount = 0;
|
|
341
|
+
let activeFailure = null;
|
|
406
342
|
|
|
407
343
|
while (true) {
|
|
408
344
|
const attemptPrefix = currentRecoveryCount === 0
|
|
409
345
|
? prefix
|
|
410
346
|
: `${prefix}-recovery-${String(currentRecoveryCount).padStart(2, "0")}`;
|
|
411
|
-
const
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
context,
|
|
418
|
-
model: resolvedPolicy.model,
|
|
419
|
-
sandbox: resolvedPolicy.sandbox,
|
|
420
|
-
dangerouslyBypassSandbox: resolvedPolicy.dangerouslyBypassSandbox,
|
|
421
|
-
jsonOutput: resolvedPolicy.jsonOutput !== false,
|
|
422
|
-
outputSchemaFile,
|
|
423
|
-
ephemeral,
|
|
424
|
-
skipGitRepoCheck,
|
|
425
|
-
lastMessageFile: attemptLastMessageFile,
|
|
426
|
-
}),
|
|
427
|
-
]
|
|
428
|
-
: [...invocation.argsPrefix, ...args];
|
|
429
|
-
|
|
430
|
-
writeRuntimeStatus(currentRecoveryCount > 0 ? "recovering" : "running", {
|
|
347
|
+
const taskAttempt = await runEngineAttempt({
|
|
348
|
+
engine: normalizedEngine,
|
|
349
|
+
invocation,
|
|
350
|
+
context,
|
|
351
|
+
prompt: currentPrompt,
|
|
352
|
+
runDir,
|
|
431
353
|
attemptPrefix,
|
|
354
|
+
resolvedPolicy,
|
|
355
|
+
executionMode,
|
|
356
|
+
outputSchemaFile,
|
|
357
|
+
env,
|
|
358
|
+
recoveryPolicy,
|
|
359
|
+
writeRuntimeStatus,
|
|
432
360
|
recoveryCount: currentRecoveryCount,
|
|
433
361
|
recoveryHistory,
|
|
362
|
+
ephemeral,
|
|
363
|
+
skipGitRepoCheck,
|
|
364
|
+
probeMode: false,
|
|
434
365
|
});
|
|
435
366
|
|
|
436
|
-
const
|
|
437
|
-
cwd: context.repoRoot,
|
|
438
|
-
stdin: currentPrompt,
|
|
439
|
-
env,
|
|
440
|
-
shell: invocation.shell,
|
|
441
|
-
heartbeatIntervalMs: recoveryPolicy.heartbeatIntervalSeconds * 1000,
|
|
442
|
-
stallWarningMs: recoveryPolicy.stallWarningSeconds * 1000,
|
|
443
|
-
maxIdleMs: recoveryPolicy.maxIdleSeconds * 1000,
|
|
444
|
-
killGraceMs: recoveryPolicy.killGraceSeconds * 1000,
|
|
445
|
-
onHeartbeat(payload) {
|
|
446
|
-
writeRuntimeStatus(payload.status, {
|
|
447
|
-
attemptPrefix,
|
|
448
|
-
recoveryCount: currentRecoveryCount,
|
|
449
|
-
recoveryHistory,
|
|
450
|
-
heartbeat: payload,
|
|
451
|
-
});
|
|
452
|
-
},
|
|
453
|
-
});
|
|
454
|
-
const finalMessage = normalizedEngine === "codex"
|
|
455
|
-
? (fs.existsSync(attemptLastMessageFile) ? fs.readFileSync(attemptLastMessageFile, "utf8").trim() : "")
|
|
456
|
-
: String(result.stdout || "").trim();
|
|
457
|
-
|
|
458
|
-
writeText(attemptPromptFile, currentPrompt);
|
|
459
|
-
writeEngineRunArtifacts(runDir, attemptPrefix, result, finalMessage);
|
|
460
|
-
|
|
461
|
-
const failure = classifyRuntimeRecoveryFailure({
|
|
367
|
+
const taskFailure = classifyRuntimeRecoveryFailure({
|
|
462
368
|
result: {
|
|
463
|
-
...result,
|
|
464
|
-
finalMessage,
|
|
369
|
+
...taskAttempt.result,
|
|
370
|
+
finalMessage: taskAttempt.finalMessage,
|
|
465
371
|
},
|
|
466
|
-
recoveryPolicy,
|
|
467
|
-
recoveryCount: currentRecoveryCount,
|
|
468
372
|
});
|
|
469
373
|
|
|
470
|
-
if (
|
|
471
|
-
result.ok
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
374
|
+
if (taskAttempt.result.ok || !recoveryPolicy.enabled) {
|
|
375
|
+
const finalRecoverySummary = taskAttempt.result.ok
|
|
376
|
+
? ""
|
|
377
|
+
: renderRuntimeRecoverySummary(recoveryHistory, taskFailure);
|
|
378
|
+
const notification = taskAttempt.result.ok
|
|
379
|
+
? null
|
|
380
|
+
: await maybeSendStopNotification({
|
|
381
|
+
context,
|
|
382
|
+
runDir,
|
|
383
|
+
engine: normalizedEngine,
|
|
384
|
+
executionMode,
|
|
385
|
+
failure: taskFailure,
|
|
386
|
+
result: taskAttempt.result,
|
|
387
|
+
recoveryHistory,
|
|
388
|
+
});
|
|
389
|
+
const notificationNote = taskAttempt.result.ok ? "" : buildNotificationNote(notification);
|
|
390
|
+
const finalizedResult = taskAttempt.result.ok
|
|
391
|
+
? taskAttempt.result
|
|
479
392
|
: {
|
|
480
|
-
...result,
|
|
481
|
-
stderr: [
|
|
393
|
+
...taskAttempt.result,
|
|
394
|
+
stderr: [
|
|
395
|
+
taskAttempt.result.stderr,
|
|
396
|
+
"",
|
|
397
|
+
finalRecoverySummary,
|
|
398
|
+
notificationNote,
|
|
399
|
+
].filter(Boolean).join("\n").trim(),
|
|
482
400
|
};
|
|
483
401
|
|
|
484
402
|
writeText(path.join(runDir, `${prefix}-prompt.md`), currentPrompt);
|
|
485
|
-
writeEngineRunArtifacts(runDir, prefix, finalizedResult, finalMessage);
|
|
486
|
-
if (normalizedEngine === "codex" && finalMessage) {
|
|
487
|
-
writeText(path.join(runDir, `${prefix}-last-message.txt`), finalMessage);
|
|
403
|
+
writeEngineRunArtifacts(runDir, prefix, finalizedResult, taskAttempt.finalMessage);
|
|
404
|
+
if (normalizedEngine === "codex" && taskAttempt.finalMessage) {
|
|
405
|
+
writeText(path.join(runDir, `${prefix}-last-message.txt`), taskAttempt.finalMessage);
|
|
488
406
|
}
|
|
489
|
-
writeRuntimeStatus(result.ok ? "completed" : "
|
|
407
|
+
writeRuntimeStatus(taskAttempt.result.ok ? "completed" : "paused_manual", {
|
|
490
408
|
attemptPrefix,
|
|
491
|
-
recoveryCount:
|
|
409
|
+
recoveryCount: recoveryHistory.length,
|
|
492
410
|
recoveryHistory,
|
|
493
411
|
recoverySummary: finalRecoverySummary,
|
|
494
|
-
finalMessage,
|
|
412
|
+
finalMessage: taskAttempt.finalMessage,
|
|
495
413
|
code: finalizedResult.code,
|
|
496
|
-
failureCode:
|
|
497
|
-
|
|
414
|
+
failureCode: taskFailure.code,
|
|
415
|
+
failureFamily: taskFailure.family,
|
|
416
|
+
failureReason: taskFailure.reason,
|
|
417
|
+
notification,
|
|
498
418
|
});
|
|
499
419
|
|
|
500
420
|
return {
|
|
501
421
|
...finalizedResult,
|
|
502
|
-
finalMessage,
|
|
503
|
-
recoveryCount:
|
|
422
|
+
finalMessage: taskAttempt.finalMessage,
|
|
423
|
+
recoveryCount: recoveryHistory.length,
|
|
504
424
|
recoveryHistory,
|
|
505
425
|
recoverySummary: finalRecoverySummary,
|
|
506
|
-
recoveryFailure:
|
|
426
|
+
recoveryFailure: taskAttempt.result.ok
|
|
427
|
+
? null
|
|
428
|
+
: {
|
|
429
|
+
...taskFailure,
|
|
430
|
+
shouldStopTask: true,
|
|
431
|
+
exhausted: true,
|
|
432
|
+
},
|
|
433
|
+
notification,
|
|
507
434
|
};
|
|
508
435
|
}
|
|
509
436
|
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
437
|
+
activeFailure = taskFailure;
|
|
438
|
+
while (true) {
|
|
439
|
+
const nextRecoveryIndex = recoveryHistory.length + 1;
|
|
440
|
+
const recoveryPrompt = buildRuntimeRecoveryPrompt({
|
|
441
|
+
basePrompt: prompt,
|
|
442
|
+
engine: normalizedEngine,
|
|
443
|
+
phaseLabel: executionMode === "analyze" ? "分析/复核" : "执行",
|
|
444
|
+
failure: activeFailure,
|
|
445
|
+
result: {
|
|
446
|
+
...taskAttempt.result,
|
|
447
|
+
finalMessage: taskAttempt.finalMessage,
|
|
448
|
+
},
|
|
449
|
+
nextRecoveryIndex,
|
|
450
|
+
maxRecoveries: recoveryPolicy[activeFailure.family === "hard" ? "hardRetryDelaysSeconds" : "softRetryDelaysSeconds"].length,
|
|
451
|
+
});
|
|
452
|
+
writeText(
|
|
453
|
+
path.join(runDir, `${prefix}-auto-recovery-${String(nextRecoveryIndex).padStart(2, "0")}-prompt.md`),
|
|
454
|
+
recoveryPrompt,
|
|
455
|
+
);
|
|
456
|
+
const delayMs = selectRuntimeRecoveryDelayMs(recoveryPolicy, activeFailure.family, nextRecoveryIndex);
|
|
457
|
+
if (delayMs < 0) {
|
|
458
|
+
const finalRecoverySummary = renderRuntimeRecoverySummary(recoveryHistory, activeFailure);
|
|
459
|
+
const notification = await maybeSendStopNotification({
|
|
460
|
+
context,
|
|
461
|
+
runDir,
|
|
462
|
+
engine: normalizedEngine,
|
|
463
|
+
executionMode,
|
|
464
|
+
failure: activeFailure,
|
|
465
|
+
result: taskAttempt.result,
|
|
466
|
+
recoveryHistory,
|
|
467
|
+
});
|
|
468
|
+
const notificationNote = buildNotificationNote(notification);
|
|
469
|
+
const finalizedResult = {
|
|
470
|
+
...taskAttempt.result,
|
|
471
|
+
stderr: [
|
|
472
|
+
taskAttempt.result.stderr,
|
|
473
|
+
"",
|
|
474
|
+
finalRecoverySummary,
|
|
475
|
+
notificationNote,
|
|
476
|
+
].filter(Boolean).join("\n").trim(),
|
|
477
|
+
};
|
|
478
|
+
|
|
479
|
+
writeText(path.join(runDir, `${prefix}-prompt.md`), currentPrompt);
|
|
480
|
+
writeEngineRunArtifacts(runDir, prefix, finalizedResult, taskAttempt.finalMessage);
|
|
481
|
+
writeRuntimeStatus("paused_manual", {
|
|
482
|
+
attemptPrefix,
|
|
483
|
+
recoveryCount: recoveryHistory.length,
|
|
484
|
+
recoveryHistory,
|
|
485
|
+
recoverySummary: finalRecoverySummary,
|
|
486
|
+
finalMessage: taskAttempt.finalMessage,
|
|
487
|
+
code: finalizedResult.code,
|
|
488
|
+
failureCode: activeFailure.code,
|
|
489
|
+
failureFamily: activeFailure.family,
|
|
490
|
+
failureReason: activeFailure.reason,
|
|
491
|
+
notification,
|
|
492
|
+
});
|
|
493
|
+
|
|
494
|
+
return {
|
|
495
|
+
...finalizedResult,
|
|
496
|
+
finalMessage: taskAttempt.finalMessage,
|
|
497
|
+
recoveryCount: recoveryHistory.length,
|
|
498
|
+
recoveryHistory,
|
|
499
|
+
recoverySummary: finalRecoverySummary,
|
|
500
|
+
recoveryFailure: {
|
|
501
|
+
...activeFailure,
|
|
502
|
+
shouldStopTask: true,
|
|
503
|
+
exhausted: true,
|
|
504
|
+
},
|
|
505
|
+
notification,
|
|
506
|
+
};
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
writeRuntimeStatus("retry_waiting", {
|
|
510
|
+
attemptPrefix,
|
|
511
|
+
recoveryCount: nextRecoveryIndex,
|
|
512
|
+
recoveryHistory,
|
|
513
|
+
nextRetryDelayMs: delayMs,
|
|
514
|
+
nextRetryAt: new Date(Date.now() + delayMs).toISOString(),
|
|
515
|
+
failureCode: activeFailure.code,
|
|
516
|
+
failureFamily: activeFailure.family,
|
|
517
|
+
failureReason: activeFailure.reason,
|
|
518
|
+
});
|
|
519
|
+
if (delayMs > 0) {
|
|
520
|
+
await sleep(delayMs);
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
const probeAttempt = await runEngineHealthProbe({
|
|
524
|
+
engine: normalizedEngine,
|
|
525
|
+
invocation,
|
|
526
|
+
context,
|
|
527
|
+
runDir,
|
|
528
|
+
resolvedPolicy,
|
|
529
|
+
recoveryPolicy,
|
|
530
|
+
writeRuntimeStatus,
|
|
531
|
+
recoveryCount: nextRecoveryIndex,
|
|
532
|
+
recoveryHistory,
|
|
533
|
+
env,
|
|
534
|
+
probeIndex: nextRecoveryIndex,
|
|
535
|
+
});
|
|
536
|
+
const recoveryRecord = {
|
|
537
|
+
recoveryIndex: nextRecoveryIndex,
|
|
538
|
+
family: activeFailure.family,
|
|
539
|
+
code: activeFailure.code,
|
|
540
|
+
reason: activeFailure.reason,
|
|
541
|
+
delaySeconds: Math.floor(delayMs / 1000),
|
|
542
|
+
taskStatus: "failed",
|
|
543
|
+
taskCode: taskAttempt.result.code,
|
|
544
|
+
taskAttemptPrefix: attemptPrefix,
|
|
545
|
+
probeStatus: probeAttempt.result.ok ? "ok" : "failed",
|
|
546
|
+
probeCode: probeAttempt.result.code,
|
|
547
|
+
probeAttemptPrefix: probeAttempt.attemptPrefix,
|
|
548
|
+
probeFailureCode: probeAttempt.failure?.code || "",
|
|
549
|
+
probeFailureFamily: probeAttempt.failure?.family || "",
|
|
550
|
+
probeFailureReason: probeAttempt.failure?.reason || "",
|
|
551
|
+
watchdogTriggered: taskAttempt.result.watchdogTriggered === true || probeAttempt.result.watchdogTriggered === true,
|
|
552
|
+
};
|
|
553
|
+
recoveryHistory.push(recoveryRecord);
|
|
554
|
+
writeJson(path.join(
|
|
555
|
+
runDir,
|
|
556
|
+
`${prefix}-auto-recovery-${String(nextRecoveryIndex).padStart(2, "0")}.json`,
|
|
557
|
+
), {
|
|
558
|
+
...recoveryRecord,
|
|
559
|
+
engine: normalizedEngine,
|
|
560
|
+
phase: executionMode,
|
|
561
|
+
stdoutTail: tailText(taskAttempt.result.stdout, 20),
|
|
562
|
+
stderrTail: tailText(taskAttempt.result.stderr, 20),
|
|
563
|
+
finalMessageTail: tailText(taskAttempt.finalMessage, 20),
|
|
564
|
+
probeStdoutTail: tailText(probeAttempt.result.stdout, 20),
|
|
565
|
+
probeStderrTail: tailText(probeAttempt.result.stderr, 20),
|
|
566
|
+
probeFinalMessageTail: tailText(probeAttempt.finalMessage, 20),
|
|
567
|
+
createdAt: nowIso(),
|
|
568
|
+
});
|
|
569
|
+
|
|
570
|
+
if (!probeAttempt.result.ok) {
|
|
571
|
+
activeFailure = probeAttempt.failure;
|
|
572
|
+
writeRuntimeStatus("probe_failed", {
|
|
573
|
+
attemptPrefix: probeAttempt.attemptPrefix,
|
|
574
|
+
recoveryCount: nextRecoveryIndex,
|
|
575
|
+
recoveryHistory,
|
|
576
|
+
failureCode: activeFailure.code,
|
|
577
|
+
failureFamily: activeFailure.family,
|
|
578
|
+
failureReason: activeFailure.reason,
|
|
579
|
+
});
|
|
580
|
+
continue;
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
currentPrompt = recoveryPrompt;
|
|
584
|
+
currentRecoveryCount = nextRecoveryIndex;
|
|
585
|
+
break;
|
|
560
586
|
}
|
|
561
|
-
currentPrompt = recoveryPrompt;
|
|
562
|
-
currentRecoveryCount = nextRecoveryIndex;
|
|
563
587
|
}
|
|
564
588
|
}
|
|
565
589
|
|
|
@@ -595,7 +619,7 @@ export async function runEngineExec({ engine, context, prompt, runDir, policy })
|
|
|
595
619
|
}
|
|
596
620
|
|
|
597
621
|
export async function runShellCommand(context, commandLine, runDir, index) {
|
|
598
|
-
const shellInvocation =
|
|
622
|
+
const shellInvocation = resolveVerifyInvocation();
|
|
599
623
|
if (shellInvocation.error) {
|
|
600
624
|
const result = {
|
|
601
625
|
command: commandLine,
|
|
@@ -638,10 +662,10 @@ export async function runVerifyCommands(context, commands, runDir) {
|
|
|
638
662
|
ok: false,
|
|
639
663
|
results,
|
|
640
664
|
failed: result,
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
665
|
+
summary: [
|
|
666
|
+
`验证失败:${result.command}`,
|
|
667
|
+
"",
|
|
668
|
+
"stdout 尾部:",
|
|
645
669
|
tailText(result.stdout, 40),
|
|
646
670
|
"",
|
|
647
671
|
"stderr 尾部:",
|