agentopia 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/.claude/settings.local.json +28 -0
  2. package/dist/app.d.ts +10 -0
  3. package/dist/app.d.ts.map +1 -0
  4. package/dist/app.js +121 -0
  5. package/dist/app.js.map +1 -0
  6. package/dist/config.d.ts +9 -0
  7. package/dist/config.d.ts.map +1 -0
  8. package/dist/config.js +19 -0
  9. package/dist/config.js.map +1 -0
  10. package/dist/db/database.d.ts +5 -0
  11. package/dist/db/database.d.ts.map +1 -0
  12. package/dist/db/database.js +39 -0
  13. package/dist/db/database.js.map +1 -0
  14. package/dist/db/schema.d.ts +3 -0
  15. package/dist/db/schema.d.ts.map +1 -0
  16. package/dist/db/schema.js +621 -0
  17. package/dist/db/schema.js.map +1 -0
  18. package/dist/index.d.ts +2 -0
  19. package/dist/index.d.ts.map +1 -0
  20. package/dist/index.js +49 -0
  21. package/dist/index.js.map +1 -0
  22. package/dist/logger.d.ts +4 -0
  23. package/dist/logger.d.ts.map +1 -0
  24. package/dist/logger.js +9 -0
  25. package/dist/logger.js.map +1 -0
  26. package/dist/middleware/auth.d.ts +13 -0
  27. package/dist/middleware/auth.d.ts.map +1 -0
  28. package/dist/middleware/auth.js +733 -0
  29. package/dist/middleware/auth.js.map +1 -0
  30. package/dist/routes/agents.d.ts +3 -0
  31. package/dist/routes/agents.d.ts.map +1 -0
  32. package/dist/routes/agents.js +1058 -0
  33. package/dist/routes/agents.js.map +1 -0
  34. package/dist/routes/issues.d.ts +4 -0
  35. package/dist/routes/issues.d.ts.map +1 -0
  36. package/dist/routes/issues.js +946 -0
  37. package/dist/routes/issues.js.map +1 -0
  38. package/dist/routes/knowledge.d.ts +3 -0
  39. package/dist/routes/knowledge.d.ts.map +1 -0
  40. package/dist/routes/knowledge.js +117 -0
  41. package/dist/routes/knowledge.js.map +1 -0
  42. package/dist/routes/memories.d.ts +3 -0
  43. package/dist/routes/memories.d.ts.map +1 -0
  44. package/dist/routes/memories.js +115 -0
  45. package/dist/routes/memories.js.map +1 -0
  46. package/dist/routes/messages.d.ts +3 -0
  47. package/dist/routes/messages.d.ts.map +1 -0
  48. package/dist/routes/messages.js +130 -0
  49. package/dist/routes/messages.js.map +1 -0
  50. package/dist/routes/projects.d.ts +3 -0
  51. package/dist/routes/projects.d.ts.map +1 -0
  52. package/dist/routes/projects.js +754 -0
  53. package/dist/routes/projects.js.map +1 -0
  54. package/dist/routes/templates.d.ts +3 -0
  55. package/dist/routes/templates.d.ts.map +1 -0
  56. package/dist/routes/templates.js +117 -0
  57. package/dist/routes/templates.js.map +1 -0
  58. package/dist/routes/ui.d.ts +3 -0
  59. package/dist/routes/ui.d.ts.map +1 -0
  60. package/dist/routes/ui.js +38 -0
  61. package/dist/routes/ui.js.map +1 -0
  62. package/dist/services/agent-hierarchy.d.ts +14 -0
  63. package/dist/services/agent-hierarchy.d.ts.map +1 -0
  64. package/dist/services/agent-hierarchy.js +58 -0
  65. package/dist/services/agent-hierarchy.js.map +1 -0
  66. package/dist/services/agent-issue-batch.d.ts +17 -0
  67. package/dist/services/agent-issue-batch.d.ts.map +1 -0
  68. package/dist/services/agent-issue-batch.js +57 -0
  69. package/dist/services/agent-issue-batch.js.map +1 -0
  70. package/dist/services/controller.d.ts +4 -0
  71. package/dist/services/controller.d.ts.map +1 -0
  72. package/dist/services/controller.js +237 -0
  73. package/dist/services/controller.js.map +1 -0
  74. package/dist/services/langgraph-runner.d.ts +33 -0
  75. package/dist/services/langgraph-runner.d.ts.map +1 -0
  76. package/dist/services/langgraph-runner.js +478 -0
  77. package/dist/services/langgraph-runner.js.map +1 -0
  78. package/dist/services/orchestrator.d.ts +9 -0
  79. package/dist/services/orchestrator.d.ts.map +1 -0
  80. package/dist/services/orchestrator.js +116 -0
  81. package/dist/services/orchestrator.js.map +1 -0
  82. package/dist/services/pre-controller.d.ts +7 -0
  83. package/dist/services/pre-controller.d.ts.map +1 -0
  84. package/dist/services/pre-controller.js +101 -0
  85. package/dist/services/pre-controller.js.map +1 -0
  86. package/dist/services/process-manager.d.ts +67 -0
  87. package/dist/services/process-manager.d.ts.map +1 -0
  88. package/dist/services/process-manager.js +938 -0
  89. package/dist/services/process-manager.js.map +1 -0
  90. package/dist/services/project-permissions.d.ts +84 -0
  91. package/dist/services/project-permissions.d.ts.map +1 -0
  92. package/dist/services/project-permissions.js +129 -0
  93. package/dist/services/project-permissions.js.map +1 -0
  94. package/dist/services/scheduler.d.ts +6 -0
  95. package/dist/services/scheduler.d.ts.map +1 -0
  96. package/dist/services/scheduler.js +300 -0
  97. package/dist/services/scheduler.js.map +1 -0
  98. package/dist/services/system-prompt.d.ts +3 -0
  99. package/dist/services/system-prompt.d.ts.map +1 -0
  100. package/dist/services/system-prompt.js +285 -0
  101. package/dist/services/system-prompt.js.map +1 -0
  102. package/dist/services/terminal.d.ts +18 -0
  103. package/dist/services/terminal.d.ts.map +1 -0
  104. package/dist/services/terminal.js +222 -0
  105. package/dist/services/terminal.js.map +1 -0
  106. package/dist/services/websocket.d.ts +15 -0
  107. package/dist/services/websocket.d.ts.map +1 -0
  108. package/dist/services/websocket.js +204 -0
  109. package/dist/services/websocket.js.map +1 -0
  110. package/dist/types.d.ts +108 -0
  111. package/dist/types.d.ts.map +1 -0
  112. package/dist/types.js +3 -0
  113. package/dist/types.js.map +1 -0
  114. package/env.ini +18 -0
  115. package/package.json +38 -0
  116. package/project_id +0 -0
  117. package/public/admin-users.html +188 -0
  118. package/public/agent.html +199 -0
  119. package/public/css/issues.css +275 -0
  120. package/public/css/style.css +1299 -0
  121. package/public/index.html +166 -0
  122. package/public/issue.html +76 -0
  123. package/public/js/agent.js +19 -0
  124. package/public/js/common.js +735 -0
  125. package/public/js/dashboard.js +772 -0
  126. package/public/js/files-panel.js +703 -0
  127. package/public/js/interactive-terminal.js +201 -0
  128. package/public/js/issue-renderer.js +559 -0
  129. package/public/js/issue.js +57 -0
  130. package/public/js/project.js +2425 -0
  131. package/public/js/terminal.js +564 -0
  132. package/public/project.html +430 -0
  133. package/public/terminal.html +67 -0
  134. package/public/vendor/marked.js +74 -0
  135. package/public/vendor/xterm-addon-fit.js +2 -0
  136. package/public/vendor/xterm.css +209 -0
  137. package/public/vendor/xterm.js +2 -0
  138. package/send_message_and_update_issue.js +65 -0
  139. package/tsconfig.json +19 -0
  140. package/update_round2_and_create_round3.js +284 -0
@@ -0,0 +1,938 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.MAX_CONSECUTIVE_LOW_OUTPUT = exports.RESTART_COOLDOWN_MS = exports.FINAL_RESULT_KILL_DELAY_MS = exports.DEFAULT_IDLE_TIMEOUT_MS = void 0;
7
+ exports.setOnAgentFinish = setOnAgentFinish;
8
+ exports.classifyAgentExitStatus = classifyAgentExitStatus;
9
+ exports.startAgentProcess = startAgentProcess;
10
+ exports.checkChildCpuActivity = checkChildCpuActivity;
11
+ exports.clearCpuSnapshot = clearCpuSnapshot;
12
+ exports.stopAgentProcess = stopAgentProcess;
13
+ exports.isAgentRunning = isAgentRunning;
14
+ exports.getAgentIdleMs = getAgentIdleMs;
15
+ exports.resetAgentActivity = resetAgentActivity;
16
+ exports.isAgentInCooldown = isAgentInCooldown;
17
+ exports.markAgentIssuesCompleted = markAgentIssuesCompleted;
18
+ exports.shouldSkipAutoRestart = shouldSkipAutoRestart;
19
+ exports.resetAutoRestartSkip = resetAutoRestartSkip;
20
+ exports.getAgentFinalResultAge = getAgentFinalResultAge;
21
+ exports.getRunningAgentIds = getRunningAgentIds;
22
+ exports.stopAllProcesses = stopAllProcesses;
23
+ const child_process_1 = require("child_process");
24
+ const uuid_1 = require("uuid");
25
+ const fs_1 = __importDefault(require("fs"));
26
+ const path_1 = __importDefault(require("path"));
27
+ const os_1 = __importDefault(require("os"));
28
+ const database_1 = require("../db/database");
29
+ const websocket_1 = require("./websocket");
30
+ const logger_1 = __importDefault(require("../logger"));
31
+ const runningProcesses = new Map();
32
+ const PROMPT_DIR = path_1.default.join(os_1.default.tmpdir(), 'argus-prompts');
33
+ // Track last activity (output) time per agent — used by watchdog to detect stuck agents
34
+ const lastActivityTime = new Map();
35
+ const DEFAULT_IDLE_TIMEOUT_MS = 60 * 60 * 1000; // 60 minutes with no output = stuck
36
+ exports.DEFAULT_IDLE_TIMEOUT_MS = DEFAULT_IDLE_TIMEOUT_MS;
37
+ // Track consecutive error count per agent for session invalidation
38
+ const agentErrorCount = new Map();
39
+ const MAX_CONSECUTIVE_ERRORS = 3;
40
+ const childCpuSnapshots = new Map();
41
+ const CPU_STALE_THRESHOLD = 3; // 3 consecutive unchanged scans (= 15 min at 5-min interval) → stuck
42
+ // Track API connection errors for auto-retry
43
+ const agentApiConnectErrorCount = new Map();
44
+ const agentLastErrorWasApiConnect = new Map();
45
+ const pendingRetryTimers = new Map();
46
+ // Track orphaned timers from stopAgentProcess so they can be cancelled during shutdown
47
+ const pendingStopTimers = new Set();
48
+ // Shutdown flag — when true, all async callbacks skip DB access and timer creation
49
+ let shuttingDown = false;
50
+ // Track when agent received its final result — used by watchdog to kill stuck post-completion processes
51
+ const agentFinalResultTime = new Map();
52
+ const FINAL_RESULT_KILL_DELAY_MS = 2 * 60 * 1000; // 2 minutes after Final Result → force kill
53
+ exports.FINAL_RESULT_KILL_DELAY_MS = FINAL_RESULT_KILL_DELAY_MS;
54
+ // Track when agent last finished — used to prevent rapid restarts that produce low-output tail requests
55
+ const agentLastFinishTime = new Map();
56
+ const RESTART_COOLDOWN_MS = 3 * 60 * 1000; // 3 minutes cooldown after finish before auto-restart
57
+ exports.RESTART_COOLDOWN_MS = RESTART_COOLDOWN_MS;
58
+ // Track consecutive low-output runs per agent — auto-extend cooldown to avoid tail requests
59
+ const agentLastOutputTokens = new Map();
60
+ const agentConsecutiveLowOutput = new Map();
61
+ const LOW_OUTPUT_THRESHOLD = 100; // output tokens below this = "low output"
62
+ const EXTENDED_COOLDOWN_MS = 10 * 60 * 1000; // 10 minutes cooldown after consecutive low-output runs
63
+ const MAX_CONSECUTIVE_LOW_OUTPUT = 2; // after 2+ consecutive low-output runs, block auto-restart entirely
64
+ exports.MAX_CONSECUTIVE_LOW_OUTPUT = MAX_CONSECUTIVE_LOW_OUTPUT;
65
+ // Track whether agent completed all its assigned issues during the current/last run.
66
+ // Set by markAgentIssuesCompleted() called from issue routes; cleared on next meaningful run.
67
+ const agentCompletedAllIssues = new Set();
68
+ // Intra-session low-output detection: track consecutive small assistant messages
69
+ // with no tool calls within a single run. When an agent produces multiple such
70
+ // messages, it's doing useless "confirmation" tail rounds that waste tokens.
71
+ const agentIntraSessionLowStreak = new Map();
72
+ const INTRA_LOW_OUTPUT_KILL_THRESHOLD = 2; // 2 consecutive low-output assistant turns → terminate
73
+ const INTRA_LOW_OUTPUT_CHAR_LIMIT = 200; // assistant text below this = "low output"
74
+ const TOOL_INPUT_LOG_CHAR_LIMIT = 4000; // large enough for expandable UI without storing unbounded payloads
75
+ const RESUME_MISSING_FILE_RE = /no such file or directory/i;
76
+ const CLOSED_STDIN_SESSION_RE = /stdin is closed for this session|write_stdin failed/i;
77
+ function writePromptFile(runId, prompt) {
78
+ if (!fs_1.default.existsSync(PROMPT_DIR))
79
+ fs_1.default.mkdirSync(PROMPT_DIR, { recursive: true });
80
+ const fp = path_1.default.join(PROMPT_DIR, runId + '.txt');
81
+ fs_1.default.writeFileSync(fp, prompt, 'utf-8');
82
+ return fp;
83
+ }
84
+ function cleanupPromptFile(fp) {
85
+ try {
86
+ if (!fp || !fs_1.default.existsSync(fp))
87
+ return;
88
+ fs_1.default.unlinkSync(fp);
89
+ }
90
+ catch (e) {
91
+ if (e?.code !== 'ENOENT') {
92
+ logger_1.default.error(e, 'Failed to cleanup prompt file %s', fp);
93
+ }
94
+ }
95
+ }
96
+ function detachChildProcessIo(child) {
97
+ if (!child)
98
+ return;
99
+ for (const stream of [child.stdin, child.stdout, child.stderr]) {
100
+ if (!stream)
101
+ continue;
102
+ try {
103
+ if (typeof stream.unref === 'function') {
104
+ stream.unref();
105
+ }
106
+ }
107
+ catch { }
108
+ try {
109
+ if (!stream.destroyed && typeof stream.destroy === 'function') {
110
+ stream.destroy();
111
+ }
112
+ }
113
+ catch { }
114
+ }
115
+ try {
116
+ child.unref();
117
+ }
118
+ catch { }
119
+ }
120
+ let onAgentFinish = null;
121
+ function setOnAgentFinish(cb) {
122
+ onAgentFinish = cb;
123
+ }
124
+ function classifyAgentExitStatus(input) {
125
+ if (input.currentStatus === 'stopped')
126
+ return 'stopped';
127
+ if (input.hadFinalResult)
128
+ return 'idle';
129
+ if (input.exitCode !== 0 || input.sawClosedStdinSessionError)
130
+ return 'error';
131
+ if (!input.requiresCompletionSignal)
132
+ return 'idle';
133
+ return input.sawCompletionSignal ? 'idle' : 'error';
134
+ }
135
+ function startAgentProcess(agent, prompt, commandTemplate, systemPrompt) {
136
+ const db = (0, database_1.getDatabase)();
137
+ const runId = (0, uuid_1.v4)();
138
+ // Write prompt to temp file (written later after fullPrompt is determined)
139
+ let promptFile;
140
+ // commandTemplate is the tool command name (e.g., "cld", "claude",
141
+ // "codex", "gemini"). For Claude Code / Codex / Gemini we append
142
+ // appropriate flags so they behave like non-interactive agents; for other
143
+ // CLIs we run the template as-is.
144
+ const toolPath = commandTemplate.trim() || 'claude';
145
+ // Session strategy: time-based timeout → cache token (preferred) → run count (fallback)
146
+ const resumeTimeout = agent.session_resume_timeout ?? 300; // default 5 minutes
147
+ const maxTokens = agent.session_max_tokens || 400000;
148
+ const maxRuns = agent.session_max_runs || 10;
149
+ const runCount = (agent.session_run_count || 0) + 1;
150
+ let shouldReset = false;
151
+ // Time-based reset: if last session ended more than resumeTimeout seconds ago, start fresh
152
+ if (resumeTimeout > 0 && agent.session_id && agent.finished_at) {
153
+ const finishedTime = new Date(agent.finished_at + (agent.finished_at.includes('Z') ? '' : 'Z')).getTime();
154
+ const elapsed = (Date.now() - finishedTime) / 1000;
155
+ if (elapsed > resumeTimeout) {
156
+ shouldReset = true;
157
+ logger_1.default.info(`Agent ${agent.id} session idle for ${Math.round(elapsed)}s (timeout=${resumeTimeout}s), starting new session`);
158
+ }
159
+ }
160
+ // If time check didn't trigger reset, fall back to token/run-count strategy
161
+ if (!shouldReset) {
162
+ if (maxTokens > 0 && agent.session_id) {
163
+ // Query the latest cost record for this agent to get cache token usage
164
+ const latestCost = db.prepare("SELECT content FROM conversation_logs WHERE agent_id = ? AND stream = 'cost' ORDER BY id DESC LIMIT 1").get(agent.id);
165
+ let cacheTokens = 0;
166
+ if (latestCost) {
167
+ try {
168
+ const data = JSON.parse(latestCost.content);
169
+ cacheTokens = (data.cache_read || 0) + (data.cache_creation || 0);
170
+ }
171
+ catch { }
172
+ }
173
+ shouldReset = cacheTokens >= maxTokens;
174
+ if (shouldReset) {
175
+ logger_1.default.info(`Agent ${agent.id} cache tokens (${cacheTokens}) >= max (${maxTokens}), resetting session`);
176
+ }
177
+ }
178
+ else {
179
+ shouldReset = runCount > maxRuns;
180
+ }
181
+ }
182
+ const existingSessionId = shouldReset ? null : agent.session_id;
183
+ let sessionId = existingSessionId || (0, uuid_1.v4)();
184
+ // Update run count (reset to 1 if new session)
185
+ db.prepare('UPDATE agents SET session_run_count = ? WHERE id = ?')
186
+ .run(shouldReset ? 1 : runCount, agent.id);
187
+ // Build command per tool. For Claude Code (cld/claude) we use stream-json
188
+ // and pass session flags; for Codex and Gemini we use their non-interactive
189
+ // JSON/stream-json modes; other commands are executed as-is.
190
+ const lowerTool = toolPath.toLowerCase();
191
+ let command;
192
+ let useStreamJson = false;
193
+ if (lowerTool.startsWith('cld') || lowerTool.startsWith('claude')) {
194
+ const sessionFlag = existingSessionId ? `--resume ${sessionId}` : `--session-id ${sessionId}`;
195
+ command = `${toolPath} -p --output-format stream-json --verbose ${sessionFlag} --dangerously-skip-permissions --allowedTools "Bash Edit Read Write Glob Grep NotebookEdit WebFetch WebSearch Agent"`;
196
+ useStreamJson = true;
197
+ }
198
+ else if (lowerTool === 'codex') {
199
+ // Codex CLI: non-interactive exec mode with JSONL output.
200
+ // Support resume like Claude: use existing session's thread_id to resume,
201
+ // otherwise start a new session. The resume subcommand uses
202
+ // --dangerously-bypass-approvals-and-sandbox instead of --sandbox.
203
+ if (existingSessionId) {
204
+ // Resume: prompt is read from stdin (using -)
205
+ command = `codex exec resume --json --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check ${sessionId} -`;
206
+ }
207
+ else {
208
+ command = 'codex exec --json --sandbox danger-full-access --skip-git-repo-check';
209
+ }
210
+ useStreamJson = true;
211
+ }
212
+ else if (lowerTool.startsWith('codex ')) {
213
+ // Allow advanced users to fully customize Codex invocation via
214
+ // command_template (e.g. "codex exec --json --sandbox workspace-write").
215
+ // We respect their flags and only enable JSON parsing if --json is
216
+ // explicitly requested.
217
+ command = toolPath;
218
+ useStreamJson = toolPath.includes('--json');
219
+ }
220
+ else if (lowerTool.startsWith('gemini')) {
221
+ // Gemini CLI: use stream-json output and sandboxed auto-approval so
222
+ // long-running agents can operate with minimal friction.
223
+ command = `${toolPath} --output-format stream-json --sandbox --approval-mode yolo`;
224
+ useStreamJson = true;
225
+ }
226
+ else {
227
+ // Other CLIs: execute as provided. We still send the prompt via stdin but
228
+ // do not assume any particular JSON schema.
229
+ command = toolPath;
230
+ useStreamJson = false;
231
+ }
232
+ // Resume session时跳过systemPrompt以节省token,只发送任务内容
233
+ const fullPrompt = (existingSessionId || !systemPrompt) ? prompt : systemPrompt + prompt;
234
+ // Write prompt file now that fullPrompt is determined
235
+ promptFile = writePromptFile(runId, fullPrompt);
236
+ if (existingSessionId && systemPrompt) {
237
+ logger_1.default.info(`Agent ${agent.id} resuming session ${sessionId}, skipping system prompt (saved ~${systemPrompt.length} chars)`);
238
+ }
239
+ // Update agent status
240
+ db.prepare(`
241
+ UPDATE agents SET status = 'running', last_prompt = ?, session_id = ?, started_at = datetime('now'), finished_at = NULL, pid = NULL
242
+ WHERE id = ?
243
+ `).run(fullPrompt, sessionId, agent.id);
244
+ (0, websocket_1.broadcastToProject)(agent.project_id, {
245
+ type: 'agent_status', projectId: agent.project_id,
246
+ data: { agentId: agent.id, status: 'running' },
247
+ });
248
+ let cwd = agent.working_directory || process.cwd();
249
+ if (cwd.startsWith('~/'))
250
+ cwd = path_1.default.join(os_1.default.homedir(), cwd.slice(2));
251
+ try {
252
+ if (!fs_1.default.existsSync(cwd) || !fs_1.default.statSync(cwd).isDirectory()) {
253
+ logger_1.default.warn(`Agent ${agent.id} working directory "${cwd}" is missing, falling back to process cwd "${process.cwd()}"`);
254
+ cwd = process.cwd();
255
+ }
256
+ }
257
+ catch {
258
+ logger_1.default.warn(`Agent ${agent.id} working directory "${cwd}" is invalid, falling back to process cwd "${process.cwd()}"`);
259
+ cwd = process.cwd();
260
+ }
261
+ // Use a login bash shell when available so agent wrappers like `cld`/`spc`
262
+ // resolve consistently even when Argus itself was started with a minimal PATH.
263
+ const shellPath = fs_1.default.existsSync('/bin/bash') ? '/bin/bash' : '/bin/sh';
264
+ const shellArgs = shellPath.endsWith('bash') ? ['-lc', 'exec ' + command] : ['-c', 'exec ' + command];
265
+ const childEnv = {
266
+ ...process.env,
267
+ no_proxy: [process.env.no_proxy, 'localhost', '127.0.0.1'].filter(Boolean).join(','),
268
+ NO_PROXY: [process.env.NO_PROXY, 'localhost', '127.0.0.1'].filter(Boolean).join(','),
269
+ ARGUS_PROMPT: fullPrompt,
270
+ ARGUS_PROMPT_FILE: promptFile,
271
+ ARGUS_SESSION_ID: sessionId,
272
+ ARGUS_AGENT_ID: agent.id,
273
+ ARGUS_RUN_ID: runId,
274
+ };
275
+ // nvm aborts shell init when npm_config_prefix is preset, which prevents
276
+ // login shells from restoring Node-based CLIs like `codex` into PATH.
277
+ delete childEnv.npm_config_prefix;
278
+ delete childEnv.NPM_CONFIG_PREFIX;
279
+ const child = (0, child_process_1.spawn)(shellPath, shellArgs, {
280
+ cwd,
281
+ env: childEnv,
282
+ stdio: ['pipe', 'pipe', 'pipe'],
283
+ });
284
+ // Feed prompt via stdin (so command template doesn't need to handle it)
285
+ if (child.stdin) {
286
+ child.stdin.write(fullPrompt);
287
+ child.stdin.end();
288
+ }
289
+ const pid = child.pid || 0;
290
+ runningProcesses.set(agent.id, child);
291
+ lastActivityTime.set(agent.id, Date.now());
292
+ db.prepare('UPDATE agents SET pid = ? WHERE id = ?').run(pid, agent.id);
293
+ const logStmt = db.prepare('INSERT INTO conversation_logs (agent_id, run_id, content, stream) VALUES (?, ?, ?, ?)');
294
+ // Log the input prompt
295
+ logStmt.run(agent.id, runId, fullPrompt, 'stdin');
296
+ // Use stream-json parser only for Claude Code / Codex; other tools are
297
+ // logged as plain text so we don't depend on their JSON schema.
298
+ const isStreamJson = useStreamJson;
299
+ const isCodex = lowerTool === 'codex' || (lowerTool.startsWith('codex ') && useStreamJson);
300
+ const requiresCompletionSignal = isStreamJson && (isCodex ||
301
+ lowerTool.startsWith('cld') ||
302
+ lowerTool.startsWith('claude') ||
303
+ lowerTool.startsWith('gemini'));
304
+ let stdoutBuffer = '';
305
+ let sawStdout = false;
306
+ let stderrSample = '';
307
+ let sawClosedStdinSessionError = false;
308
+ let sawCompletionSignal = false;
309
+ function logAndBroadcast(content, stream) {
310
+ if (!content.trim())
311
+ return;
312
+ if (shuttingDown || !(0, database_1.isDatabaseOpen)())
313
+ return;
314
+ if (stream === 'stdout') {
315
+ sawStdout = true;
316
+ }
317
+ else if (stream === 'stderr' && stderrSample.length < 2000) {
318
+ stderrSample += content.slice(0, 2000 - stderrSample.length);
319
+ }
320
+ lastActivityTime.set(agent.id, Date.now());
321
+ try {
322
+ logStmt.run(agent.id, runId, content, stream);
323
+ }
324
+ catch (e) {
325
+ logger_1.default.warn({ err: e }, `logAndBroadcast: failed to write log for agent ${agent.id}`);
326
+ return;
327
+ }
328
+ (0, websocket_1.broadcastToAgent)(agent.id, { type: 'output', stream, content, runId });
329
+ }
330
+ function parseStreamJsonLine(line) {
331
+ try {
332
+ const obj = JSON.parse(line);
333
+ let handled = false;
334
+ // --- Codex-specific events ---
335
+ if (isCodex) {
336
+ if (obj.type === 'thread.started' && obj.thread_id) {
337
+ // Capture Codex's thread_id as session_id for future resume
338
+ handled = true;
339
+ sessionId = obj.thread_id;
340
+ if ((0, database_1.isDatabaseOpen)()) {
341
+ db.prepare('UPDATE agents SET session_id = ? WHERE id = ?')
342
+ .run(sessionId, agent.id);
343
+ }
344
+ logger_1.default.info(`Agent ${agent.id} Codex thread started: ${sessionId}`);
345
+ }
346
+ else if (obj.type === 'item.completed' && obj.item) {
347
+ handled = true;
348
+ if (obj.item.type === 'agent_message' && obj.item.text) {
349
+ logAndBroadcast(obj.item.text + '\n', 'stdout');
350
+ }
351
+ else if (obj.item.type === 'tool_call') {
352
+ logAndBroadcast(`[Tool: ${obj.item.name || 'unknown'}] ${JSON.stringify(obj.item).slice(0, TOOL_INPUT_LOG_CHAR_LIMIT)}\n`, 'stdout');
353
+ }
354
+ else if (obj.item.type === 'tool_call_output') {
355
+ const output = (obj.item.output || obj.item.text || '').slice(0, 500);
356
+ logAndBroadcast(`[Result] ${output}\n`, 'stdout');
357
+ }
358
+ }
359
+ else if (obj.type === 'turn.completed' && obj.usage) {
360
+ handled = true;
361
+ sawCompletionSignal = true;
362
+ const input = obj.usage.input_tokens || 0;
363
+ const output = obj.usage.output_tokens || 0;
364
+ const cacheRead = obj.usage.cached_input_tokens || 0;
365
+ // Codex doesn't report cache_creation separately; estimate as input - cached
366
+ const cacheCreation = Math.max(0, input - cacheRead);
367
+ // Try to extract real cost from Codex event (may be at top-level or nested in usage)
368
+ const costUsd = obj.cost_usd || obj.total_cost_usd || obj.usage?.cost_usd || obj.usage?.cost || 0;
369
+ const now = new Date().toISOString().replace('T', ' ').slice(0, 19);
370
+ const costLabel = costUsd > 0 ? ` | Cost: $${costUsd.toFixed(4)}` : '';
371
+ logAndBroadcast(`\n--- [${now}] Tokens: ${input} in, ${output} out, ${cacheRead} cache${costLabel} ---\n`, 'stdout');
372
+ try {
373
+ db.prepare("INSERT INTO conversation_logs (agent_id, run_id, content, stream) VALUES (?, ?, ?, 'cost')")
374
+ .run(agent.id, runId, JSON.stringify({ cost_usd: costUsd, input_tokens: input, output_tokens: output, cache_read: cacheRead, cache_creation: cacheCreation }));
375
+ }
376
+ catch { }
377
+ }
378
+ else if (obj.type === 'turn.started') {
379
+ handled = true; // silently consume
380
+ }
381
+ }
382
+ // --- Claude Code events ---
383
+ if (!handled && obj.type === 'assistant' && obj.message?.content) {
384
+ handled = true;
385
+ let totalTextLen = 0;
386
+ let hasToolUse = false;
387
+ for (const block of obj.message.content) {
388
+ if (block.type === 'text' && block.text) {
389
+ totalTextLen += block.text.length;
390
+ logAndBroadcast(block.text + '\n', 'stdout');
391
+ }
392
+ else if (block.type === 'tool_use') {
393
+ hasToolUse = true;
394
+ logAndBroadcast(`[Tool: ${block.name}] ${JSON.stringify(block.input).slice(0, TOOL_INPUT_LOG_CHAR_LIMIT)}\n`, 'stdout');
395
+ }
396
+ }
397
+ // Intra-session tail detection: track consecutive low-output assistant turns
398
+ // that have no tool calls (pure "confirmation" text with little substance)
399
+ if (!hasToolUse && totalTextLen < INTRA_LOW_OUTPUT_CHAR_LIMIT) {
400
+ const streak = (agentIntraSessionLowStreak.get(agent.id) || 0) + 1;
401
+ agentIntraSessionLowStreak.set(agent.id, streak);
402
+ // More aggressive threshold when agent already completed all its issues
403
+ const killThreshold = agentCompletedAllIssues.has(agent.id) ? 1 : INTRA_LOW_OUTPUT_KILL_THRESHOLD;
404
+ if (streak >= killThreshold && isAgentRunning(agent.id)) {
405
+ logger_1.default.info(`Agent ${agent.id} hit ${streak} consecutive low-output assistant turns (< ${INTRA_LOW_OUTPUT_CHAR_LIMIT} chars, no tools, issuesDone=${agentCompletedAllIssues.has(agent.id)}), terminating session tail`);
406
+ logAndBroadcast(`\n[Argus] Session terminated: ${streak} consecutive low-output turns detected (tail request avoidance)\n`, 'stderr');
407
+ agentIntraSessionLowStreak.delete(agent.id);
408
+ // Set 'stopped' so close handler won't mark as error, preserving session for reuse
409
+ db.prepare("UPDATE agents SET status = 'stopped' WHERE id = ?").run(agent.id);
410
+ stopAgentProcess(agent.id);
411
+ }
412
+ }
413
+ else {
414
+ // Substantial output or tool use — reset streak
415
+ agentIntraSessionLowStreak.delete(agent.id);
416
+ }
417
+ }
418
+ else if (!handled && obj.type === 'user' && obj.tool_use_result !== undefined) {
419
+ handled = true;
420
+ const raw = obj.tool_use_result;
421
+ const result = (typeof raw === 'string' ? raw : JSON.stringify(raw)).slice(0, 500);
422
+ logAndBroadcast(`[Result] ${result}\n`, 'stdout');
423
+ }
424
+ else if (!handled && obj.type === 'result') {
425
+ handled = true;
426
+ sawCompletionSignal = true;
427
+ // Mark that this agent has produced its final result — watchdog will force-kill
428
+ // if the process doesn't exit within FINAL_RESULT_KILL_DELAY_MS (child curl stuck etc.)
429
+ agentFinalResultTime.set(agent.id, Date.now());
430
+ // Track output tokens for low-output detection (tail request avoidance)
431
+ agentLastOutputTokens.set(agent.id, obj.usage?.output_tokens || 0);
432
+ if (obj.result) {
433
+ logAndBroadcast('\n--- Final Result ---\n' + obj.result + '\n', 'stdout');
434
+ }
435
+ // Track cost/usage — only if there's actual data
436
+ if (obj.total_cost_usd > 0 || obj.usage?.input_tokens > 0 || obj.usage?.output_tokens > 0) {
437
+ const costUsd = obj.total_cost_usd || 0;
438
+ const input = obj.usage?.input_tokens || 0;
439
+ const output = obj.usage?.output_tokens || 0;
440
+ const cacheRead = obj.usage?.cache_read_input_tokens || 0;
441
+ const cacheCreation = obj.usage?.cache_creation_input_tokens || 0;
442
+ const now = new Date().toISOString().replace('T', ' ').slice(0, 19);
443
+ logAndBroadcast(`\n--- [${now}] Cost: $${costUsd.toFixed(4)} | Tokens: ${input} in, ${output} out, ${cacheRead} cache ---\n`, 'stdout');
444
+ try {
445
+ db.prepare("INSERT INTO conversation_logs (agent_id, run_id, content, stream) VALUES (?, ?, ?, 'cost')")
446
+ .run(agent.id, runId, JSON.stringify({ cost_usd: costUsd, input_tokens: input, output_tokens: output, cache_read: cacheRead, cache_creation: cacheCreation, duration_ms: obj.duration_ms }));
447
+ }
448
+ catch { }
449
+ }
450
+ }
451
+ // For unknown JSON shapes (e.g. other CLIs' stream-json), log the raw
452
+ // line so that output is still visible even if we don't understand the
453
+ // schema.
454
+ if (!handled) {
455
+ logAndBroadcast(line + '\n', 'stdout');
456
+ }
457
+ }
458
+ catch {
459
+ // Not JSON (proxychains noise etc), skip or log as-is
460
+ if (!line.includes('proxychains') && !line.includes('Executing through proxy') && !line.includes('Port 7897')) {
461
+ logAndBroadcast(line + '\n', 'stdout');
462
+ }
463
+ }
464
+ }
465
+ const handleData = (stream) => (data) => {
466
+ const raw = data.toString();
467
+ // Detect API connection failure in any output stream
468
+ if (raw.includes('Unable to connect to API')) {
469
+ agentLastErrorWasApiConnect.set(agent.id, true);
470
+ }
471
+ if (stream === 'stderr') {
472
+ if (CLOSED_STDIN_SESSION_RE.test(raw)) {
473
+ sawClosedStdinSessionError = true;
474
+ }
475
+ // Skip proxychains noise in stderr
476
+ if (!raw.includes('proxychains')) {
477
+ logAndBroadcast(raw, 'stderr');
478
+ }
479
+ return;
480
+ }
481
+ if (!isStreamJson) {
482
+ logAndBroadcast(raw, 'stdout');
483
+ return;
484
+ }
485
+ // Parse stream-json line by line
486
+ stdoutBuffer += raw;
487
+ const lines = stdoutBuffer.split('\n');
488
+ stdoutBuffer = lines.pop() || '';
489
+ for (const line of lines) {
490
+ if (line.trim())
491
+ parseStreamJsonLine(line.trim());
492
+ }
493
+ };
494
+ child.stdout?.on('data', handleData('stdout'));
495
+ child.stderr?.on('data', handleData('stderr'));
496
+ child.on('close', (code) => {
497
+ runningProcesses.delete(agent.id);
498
+ lastActivityTime.delete(agent.id);
499
+ childCpuSnapshots.delete(agent.id);
500
+ const hadFinalResult = agentFinalResultTime.has(agent.id);
501
+ agentFinalResultTime.delete(agent.id);
502
+ agentIntraSessionLowStreak.delete(agent.id);
503
+ cleanupPromptFile(promptFile);
504
+ // Skip DB writes if database is already closed (during shutdown)
505
+ if (!(0, database_1.isDatabaseOpen)()) {
506
+ logger_1.default.info(`Agent ${agent.id} close event after DB closed, skipping DB writes`);
507
+ return;
508
+ }
509
+ // During shutdown, skip all DB writes and timer creation (retry, callback, etc.)
510
+ if (shuttingDown) {
511
+ logger_1.default.info(`Agent ${agent.id} close event during shutdown, skipping DB writes`);
512
+ return;
513
+ }
514
+ // Check if this run had an API connection error
515
+ const wasApiConnectError = agentLastErrorWasApiConnect.get(agent.id) || false;
516
+ agentLastErrorWasApiConnect.delete(agent.id);
517
+ // If agent was explicitly stopped, preserve 'stopped' status
518
+ const currentAgent = db.prepare('SELECT status FROM agents WHERE id = ?').get(agent.id);
519
+ const status = classifyAgentExitStatus({
520
+ currentStatus: currentAgent?.status,
521
+ exitCode: code,
522
+ requiresCompletionSignal,
523
+ sawClosedStdinSessionError,
524
+ sawCompletionSignal,
525
+ hadFinalResult,
526
+ });
527
+ if (code === 0 && sawClosedStdinSessionError && !hadFinalResult) {
528
+ logger_1.default.info(`Agent ${agent.id} exited with code 0 but had closed-stdin tool session errors; marking run as error`);
529
+ }
530
+ if (code === 0 && requiresCompletionSignal && !sawClosedStdinSessionError && !sawCompletionSignal && !hadFinalResult) {
531
+ logger_1.default.info(`Agent ${agent.id} exited with code 0 but without a completion signal; marking run as error`);
532
+ logAndBroadcast('Argus: agent exited without emitting a completion event; marking this run as error\n', 'stderr');
533
+ }
534
+ if (status === 'error' && existingSessionId && !sawStdout && RESUME_MISSING_FILE_RE.test(stderrSample)) {
535
+ logger_1.default.info(`Agent ${agent.id} resume failed with missing file, retrying with a fresh session`);
536
+ logAndBroadcast('Argus: 旧 session 恢复失败,自动改为新 session 重试...\n', 'stderr');
537
+ db.prepare("UPDATE agents SET session_id = NULL, status = 'idle', pid = NULL WHERE id = ?").run(agent.id);
538
+ const freshAgent = { ...agent, session_id: null };
539
+ startAgentProcess(freshAgent, prompt, commandTemplate, systemPrompt);
540
+ return;
541
+ }
542
+ if (status === 'error') {
543
+ // Handle API connection error with auto-retry
544
+ if (wasApiConnectError) {
545
+ const apiErrCount = (agentApiConnectErrorCount.get(agent.id) || 0) + 1;
546
+ agentApiConnectErrorCount.set(agent.id, apiErrCount);
547
+ if (apiErrCount <= 1) {
548
+ // First API connection failure: auto-retry after 5 minutes to avoid wasting tokens
549
+ const retryDelayMs = 5 * 60 * 1000; // 5 minutes
550
+ logger_1.default.info(`Agent ${agent.id} API connection failed (attempt ${apiErrCount}), auto-retrying in 5 minutes`);
551
+ logAndBroadcast('Argus: API连接失败,5分钟后自动重试...\n', 'stderr');
552
+ // Set status to 'waiting' during retry delay — visible in UI, prevents scheduler re-trigger
553
+ db.prepare(`
554
+ UPDATE agents SET status = 'waiting', pid = NULL, finished_at = datetime('now') WHERE id = ?
555
+ `).run(agent.id);
556
+ (0, websocket_1.broadcastToProject)(agent.project_id, {
557
+ type: 'agent_status', projectId: agent.project_id,
558
+ data: { agentId: agent.id, status: 'waiting' },
559
+ });
560
+ const retryTimer = setTimeout(() => {
561
+ pendingRetryTimers.delete(agent.id);
562
+ if (shuttingDown || !(0, database_1.isDatabaseOpen)())
563
+ return;
564
+ const retryAgent = db.prepare('SELECT * FROM agents WHERE id = ?').get(agent.id);
565
+ if (retryAgent && (retryAgent.status === 'waiting' || retryAgent.status === 'running')) {
566
+ logger_1.default.info(`Agent ${agent.id} auto-retrying after 5-minute API wait`);
567
+ startAgentProcess(retryAgent, prompt, commandTemplate, systemPrompt);
568
+ }
569
+ }, retryDelayMs);
570
+ pendingRetryTimers.set(agent.id, retryTimer);
571
+ return;
572
+ }
573
+ else {
574
+ // Second+ API connection failure: give up, report error
575
+ logger_1.default.info(`Agent ${agent.id} API connection failed ${apiErrCount} times, giving up`);
576
+ logAndBroadcast('Argus: API连接持续失败,请检查网络/API配置后手动重启agent\n', 'stderr');
577
+ agentApiConnectErrorCount.delete(agent.id);
578
+ // Fall through to normal error handling below
579
+ }
580
+ }
581
+ else {
582
+ // Non-API error: reset API error count
583
+ agentApiConnectErrorCount.delete(agent.id);
584
+ }
585
+ // P1: Track consecutive errors — only clear session after MAX_CONSECUTIVE_ERRORS
586
+ const errorCount = (agentErrorCount.get(agent.id) || 0) + 1;
587
+ agentErrorCount.set(agent.id, errorCount);
588
+ if (errorCount >= MAX_CONSECUTIVE_ERRORS) {
589
+ logger_1.default.info(`Agent ${agent.id} hit ${errorCount} consecutive errors, clearing session`);
590
+ db.prepare(`
591
+ UPDATE agents SET status = ?, pid = NULL, finished_at = datetime('now'), session_id = NULL WHERE id = ?
592
+ `).run(status, agent.id);
593
+ agentErrorCount.delete(agent.id);
594
+ }
595
+ else {
596
+ logger_1.default.info(`Agent ${agent.id} error (${errorCount}/${MAX_CONSECUTIVE_ERRORS}), preserving session for reuse`);
597
+ db.prepare(`
598
+ UPDATE agents SET status = ?, pid = NULL, finished_at = datetime('now') WHERE id = ?
599
+ `).run(status, agent.id);
600
+ }
601
+ }
602
+ else {
603
+ // Success — reset error counts and record finish time for cooldown
604
+ agentErrorCount.delete(agent.id);
605
+ agentApiConnectErrorCount.delete(agent.id);
606
+ agentLastFinishTime.set(agent.id, Date.now());
607
+ // Track consecutive low-output runs to avoid tail request waste
608
+ const lastOutput = agentLastOutputTokens.get(agent.id) || 0;
609
+ agentLastOutputTokens.delete(agent.id);
610
+ if (lastOutput > 0 && lastOutput < LOW_OUTPUT_THRESHOLD) {
611
+ const count = (agentConsecutiveLowOutput.get(agent.id) || 0) + 1;
612
+ agentConsecutiveLowOutput.set(agent.id, count);
613
+ logger_1.default.info(`Agent ${agent.id} low-output run (${lastOutput} tokens), consecutive count: ${count}`);
614
+ }
615
+ else {
616
+ agentConsecutiveLowOutput.delete(agent.id);
617
+ }
618
+ db.prepare(`
619
+ UPDATE agents SET status = ?, pid = NULL, finished_at = datetime('now') WHERE id = ?
620
+ `).run(status, agent.id);
621
+ }
622
+ (0, websocket_1.broadcastToAgent)(agent.id, { type: 'exit', code, runId });
623
+ (0, websocket_1.broadcastToProject)(agent.project_id, {
624
+ type: 'agent_status', projectId: agent.project_id,
625
+ data: { agentId: agent.id, status },
626
+ });
627
+ // Fetch updated agent and trigger callback
628
+ if (onAgentFinish) {
629
+ const updated = db.prepare('SELECT * FROM agents WHERE id = ?').get(agent.id);
630
+ if (updated) {
631
+ onAgentFinish(updated, code);
632
+ }
633
+ }
634
+ });
635
+ child.on('error', (err) => {
636
+ logger_1.default.error(`Spawn error: ${err.message} code=${err.code} path=${err.path} syscall=${err.syscall} cwd=${cwd} shell=${shellPath}`);
637
+ runningProcesses.delete(agent.id);
638
+ lastActivityTime.delete(agent.id);
639
+ childCpuSnapshots.delete(agent.id);
640
+ agentFinalResultTime.delete(agent.id);
641
+ agentIntraSessionLowStreak.delete(agent.id);
642
+ cleanupPromptFile(promptFile);
643
+ if (shuttingDown || !(0, database_1.isDatabaseOpen)()) {
644
+ logger_1.default.info(`Agent ${agent.id} error event during shutdown/after DB closed, skipping DB writes`);
645
+ return;
646
+ }
647
+ // If resume failed, retry with a fresh session
648
+ if (existingSessionId && err.code === 'ENOENT') {
649
+ logger_1.default.info(`Retrying agent ${agent.id} with fresh session (resume failed)`);
650
+ const freshAgent = { ...agent, session_id: null };
651
+ db.prepare("UPDATE agents SET session_id = NULL, status = 'idle' WHERE id = ?").run(agent.id);
652
+ startAgentProcess(freshAgent, prompt, commandTemplate, systemPrompt);
653
+ return;
654
+ }
655
+ db.prepare(`
656
+ UPDATE agents SET status = 'error', pid = NULL, finished_at = datetime('now') WHERE id = ?
657
+ `).run(agent.id);
658
+ logStmt.run(agent.id, runId, `Process error: ${err.message}`, 'stderr');
659
+ (0, websocket_1.broadcastToAgent)(agent.id, { type: 'error', message: err.message, runId });
660
+ });
661
+ return { runId, pid };
662
+ }
663
+ /** Get all descendant PIDs of a process by traversing /proc */
664
+ function getDescendantPids(pid) {
665
+ const descendants = [];
666
+ const queue = [pid];
667
+ while (queue.length > 0) {
668
+ const parentPid = queue.shift();
669
+ try {
670
+ const entries = fs_1.default.readdirSync('/proc').filter(e => /^\d+$/.test(e));
671
+ for (const entry of entries) {
672
+ try {
673
+ const stat = fs_1.default.readFileSync(`/proc/${entry}/stat`, 'utf-8');
674
+ // Field 4 (1-indexed) is ppid. Format: pid (comm) state ppid ...
675
+ const match = stat.match(/^\d+\s+\([^)]*\)\s+\S+\s+(\d+)/);
676
+ if (match && parseInt(match[1]) === parentPid) {
677
+ const childPid = parseInt(entry);
678
+ descendants.push(childPid);
679
+ queue.push(childPid);
680
+ }
681
+ }
682
+ catch { /* process may have exited */ }
683
+ }
684
+ }
685
+ catch {
686
+ break;
687
+ }
688
+ }
689
+ return descendants;
690
+ }
691
+ /** Read cumulative CPU time (utime + stime) for a single PID from /proc/<pid>/stat */
692
+ function getProcessCpuTime(pid) {
693
+ try {
694
+ const stat = fs_1.default.readFileSync(`/proc/${pid}/stat`, 'utf-8');
695
+ // Fields are space-separated, but field 2 (comm) may contain spaces/parens.
696
+ // Skip past the (comm) field first.
697
+ const afterComm = stat.slice(stat.lastIndexOf(')') + 2);
698
+ const fields = afterComm.split(' ');
699
+ // After stripping "pid (comm) ", field[0]=state, field[1]=ppid, ...
700
+ // utime = field[11] (0-indexed from after comm), stime = field[12]
701
+ const utime = parseInt(fields[11]) || 0;
702
+ const stime = parseInt(fields[12]) || 0;
703
+ return utime + stime;
704
+ }
705
+ catch {
706
+ return 0;
707
+ }
708
+ }
709
+ /**
710
+ * Check child process CPU activity for watchdog.
711
+ * Returns:
712
+ * 'active' — children exist and CPU time increased since last check
713
+ * 'stale' — children exist but CPU unchanged for >= CPU_STALE_THRESHOLD scans
714
+ * 'warming' — children exist, CPU unchanged but below threshold (give more time)
715
+ * 'no_children' — no descendant processes found
716
+ */
717
+ function checkChildCpuActivity(agentId, pid) {
718
+ const descendants = getDescendantPids(pid);
719
+ if (descendants.length === 0)
720
+ return 'no_children';
721
+ let totalCpu = 0;
722
+ for (const dpid of descendants) {
723
+ totalCpu += getProcessCpuTime(dpid);
724
+ }
725
+ const prev = childCpuSnapshots.get(agentId);
726
+ if (!prev) {
727
+ // First observation — record baseline
728
+ childCpuSnapshots.set(agentId, { totalCpuTime: totalCpu, staleCount: 0 });
729
+ return 'active';
730
+ }
731
+ if (totalCpu > prev.totalCpuTime) {
732
+ // CPU time increased — actively computing
733
+ childCpuSnapshots.set(agentId, { totalCpuTime: totalCpu, staleCount: 0 });
734
+ return 'active';
735
+ }
736
+ // CPU unchanged — increment stale count
737
+ const newStaleCount = prev.staleCount + 1;
738
+ childCpuSnapshots.set(agentId, { totalCpuTime: totalCpu, staleCount: newStaleCount });
739
+ if (newStaleCount >= CPU_STALE_THRESHOLD) {
740
+ return 'stale';
741
+ }
742
+ return 'warming';
743
+ }
744
+ /** Clean up CPU snapshot when agent stops */
745
+ function clearCpuSnapshot(agentId) {
746
+ childCpuSnapshots.delete(agentId);
747
+ }
748
+ function stopAgentProcess(agentId) {
749
+ // Cancel any pending API retry timer for this agent
750
+ const retryTimer = pendingRetryTimers.get(agentId);
751
+ if (retryTimer) {
752
+ clearTimeout(retryTimer);
753
+ pendingRetryTimers.delete(agentId);
754
+ }
755
+ const child = runningProcesses.get(agentId);
756
+ if (!child)
757
+ return false;
758
+ logger_1.default.info(`stopAgentProcess: sending SIGTERM to agent ${agentId} (pid=${child.pid})`);
759
+ child.kill('SIGTERM');
760
+ // Force kill after 5 seconds
761
+ const killTimer = setTimeout(() => {
762
+ pendingStopTimers.delete(killTimer);
763
+ if (runningProcesses.has(agentId)) {
764
+ logger_1.default.info(`stopAgentProcess: sending SIGKILL to agent ${agentId} (pid=${child.pid})`);
765
+ child.kill('SIGKILL');
766
+ detachChildProcessIo(child);
767
+ // Force cleanup after 3 more seconds — grandchild processes may hold
768
+ // stdio pipes open, preventing the 'close' event from ever firing.
769
+ const cleanupTimer = setTimeout(() => {
770
+ pendingStopTimers.delete(cleanupTimer);
771
+ if (runningProcesses.has(agentId)) {
772
+ logger_1.default.info(`Force cleanup: agent ${agentId} close event not fired after SIGKILL, cleaning up`);
773
+ detachChildProcessIo(child);
774
+ runningProcesses.delete(agentId);
775
+ lastActivityTime.delete(agentId);
776
+ }
777
+ }, 3000);
778
+ pendingStopTimers.add(cleanupTimer);
779
+ }
780
+ }, 5000);
781
+ pendingStopTimers.add(killTimer);
782
+ return true;
783
+ }
784
+ function isAgentRunning(agentId) {
785
+ return runningProcesses.has(agentId);
786
+ }
787
+ /** Returns how many ms since the agent last produced output, or -1 if not tracked. */
788
+ function getAgentIdleMs(agentId) {
789
+ const t = lastActivityTime.get(agentId);
790
+ return t ? Date.now() - t : -1;
791
+ }
792
+ /** Reset the last activity timestamp to now (used by watchdog when child processes are detected). */
793
+ function resetAgentActivity(agentId) {
794
+ lastActivityTime.set(agentId, Date.now());
795
+ }
796
+ /** Returns true if the agent recently finished and should not be auto-restarted yet.
797
+ * Uses extended cooldown (10 min) if the last run had very low output tokens,
798
+ * indicating a "tail request" that didn't produce useful work. */
799
+ function isAgentInCooldown(agentId) {
800
+ const t = agentLastFinishTime.get(agentId);
801
+ if (!t)
802
+ return false;
803
+ const elapsed = Date.now() - t;
804
+ const consecutiveLow = agentConsecutiveLowOutput.get(agentId) || 0;
805
+ const cooldown = consecutiveLow > 0 ? EXTENDED_COOLDOWN_MS : RESTART_COOLDOWN_MS;
806
+ return elapsed < cooldown;
807
+ }
808
+ /**
809
+ * Called from issue routes when a running agent marks all its assigned issues as done/closed.
810
+ * Signals that the agent's work is complete — subsequent low-output turns are likely tail requests.
811
+ */
812
+ function markAgentIssuesCompleted(agentId) {
813
+ agentCompletedAllIssues.add(agentId);
814
+ logger_1.default.info(`Agent ${agentId} marked as having completed all assigned issues`);
815
+ }
816
+ /**
817
+ * Returns true if the agent should NOT be auto-restarted by scheduler/pre-controller.
818
+ * Blocks restart when:
819
+ * 1. Agent had 2+ consecutive low-output runs (clearly just doing tail confirmations)
820
+ * 2. Agent completed all issues AND had a low-output last run (work is done)
821
+ * The flag is cleared when new issues are assigned (checked externally).
822
+ */
823
+ function shouldSkipAutoRestart(agentId) {
824
+ const consecutiveLow = agentConsecutiveLowOutput.get(agentId) || 0;
825
+ if (consecutiveLow >= MAX_CONSECUTIVE_LOW_OUTPUT) {
826
+ logger_1.default.info(`Skipping auto-restart for agent ${agentId}: ${consecutiveLow} consecutive low-output runs`);
827
+ return true;
828
+ }
829
+ if (agentCompletedAllIssues.has(agentId) && consecutiveLow > 0) {
830
+ logger_1.default.info(`Skipping auto-restart for agent ${agentId}: completed all issues + low-output last run`);
831
+ return true;
832
+ }
833
+ return false;
834
+ }
835
+ /**
836
+ * Reset the auto-restart skip state for an agent.
837
+ * Called when new issues are assigned or agent state changes meaningfully.
838
+ */
839
+ function resetAutoRestartSkip(agentId) {
840
+ agentConsecutiveLowOutput.delete(agentId);
841
+ agentCompletedAllIssues.delete(agentId);
842
+ }
843
+ /**
844
+ * Returns how many ms since the agent received its final result, or -1 if no final result yet.
845
+ * Used by watchdog to force-kill agents whose child processes are stuck after completion.
846
+ */
847
+ function getAgentFinalResultAge(agentId) {
848
+ const t = agentFinalResultTime.get(agentId);
849
+ return t ? Date.now() - t : -1;
850
+ }
851
+ function getRunningAgentIds() {
852
+ return Array.from(runningProcesses.keys());
853
+ }
854
+ function stopAllProcesses() {
855
+ shuttingDown = true;
856
+ // Cancel any pending API retry timers to prevent DB access after shutdown
857
+ for (const timer of pendingRetryTimers.values()) {
858
+ clearTimeout(timer);
859
+ }
860
+ pendingRetryTimers.clear();
861
+ // Cancel any orphaned timers from previous stopAgentProcess calls
862
+ for (const timer of pendingStopTimers)
863
+ clearTimeout(timer);
864
+ pendingStopTimers.clear();
865
+ const agentIds = Array.from(runningProcesses.keys());
866
+ if (agentIds.length === 0)
867
+ return Promise.resolve();
868
+ return new Promise((resolve) => {
869
+ let forceKillTimer = null;
870
+ let forceCleanupTimer = null;
871
+ function checkAllDone() {
872
+ // Check if all processes we were stopping have exited
873
+ const allDone = agentIds.every(id => !runningProcesses.has(id));
874
+ if (allDone) {
875
+ if (forceKillTimer)
876
+ clearTimeout(forceKillTimer);
877
+ if (forceCleanupTimer)
878
+ clearTimeout(forceCleanupTimer);
879
+ // Clear any retry timers that close handlers may have created
880
+ for (const timer of pendingRetryTimers.values())
881
+ clearTimeout(timer);
882
+ pendingRetryTimers.clear();
883
+ resolve();
884
+ }
885
+ }
886
+ // Watch for processes exiting
887
+ for (const agentId of agentIds) {
888
+ const child = runningProcesses.get(agentId);
889
+ if (!child)
890
+ continue;
891
+ logger_1.default.info(`Killing agent ${agentId} (pid: ${child.pid})`);
892
+ child.kill('SIGTERM');
893
+ child.once('close', () => checkAllDone());
894
+ }
895
+ // Force kill after 3 seconds if still running — also kill descendants
896
+ // to ensure stdio pipes are closed and 'close' event fires promptly
897
+ forceKillTimer = setTimeout(() => {
898
+ for (const agentId of agentIds) {
899
+ const child = runningProcesses.get(agentId);
900
+ if (child && child.pid) {
901
+ const descendants = getDescendantPids(child.pid);
902
+ logger_1.default.info(`Force killing agent ${agentId} (pid=${child.pid}) and ${descendants.length} descendants: [${descendants.join(',')}]`);
903
+ for (const dpid of descendants) {
904
+ if (dpid === process.pid || dpid === process.ppid) {
905
+ logger_1.default.error(`stopAllProcesses: refusing to kill PID ${dpid} — it is the Argus server (pid=${process.pid}, ppid=${process.ppid})`);
906
+ continue;
907
+ }
908
+ try {
909
+ process.kill(dpid, 'SIGKILL');
910
+ }
911
+ catch { }
912
+ }
913
+ child.kill('SIGKILL');
914
+ detachChildProcessIo(child);
915
+ }
916
+ }
917
+ }, 3000);
918
+ // Final cleanup after 6 seconds — resolve even if close events never fired
919
+ forceCleanupTimer = setTimeout(() => {
920
+ for (const agentId of agentIds) {
921
+ if (runningProcesses.has(agentId)) {
922
+ detachChildProcessIo(runningProcesses.get(agentId));
923
+ logger_1.default.info(`Force cleanup: agent ${agentId} close event not fired during stopAll, cleaning up`);
924
+ runningProcesses.delete(agentId);
925
+ lastActivityTime.delete(agentId);
926
+ childCpuSnapshots.delete(agentId);
927
+ agentFinalResultTime.delete(agentId);
928
+ }
929
+ }
930
+ // Clear any retry timers that close handlers may have created
931
+ for (const timer of pendingRetryTimers.values())
932
+ clearTimeout(timer);
933
+ pendingRetryTimers.clear();
934
+ resolve();
935
+ }, 6000);
936
+ });
937
+ }
938
+ //# sourceMappingURL=process-manager.js.map