@tjamescouch/niki 0.3.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/bin/niki +259 -125
- package/bin/niki.bak +665 -0
- package/package.json +1 -1
package/LICENSE
CHANGED
package/bin/niki
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
* - Wall-clock timeout (kill if exceeded)
|
|
9
9
|
* - Tool-call rate limiting (kill if agent floods)
|
|
10
10
|
* - Diagnostics logging
|
|
11
|
+
* - Automatic restart on exit (optional)
|
|
11
12
|
*
|
|
12
13
|
* Usage:
|
|
13
14
|
* niki [options] -- <command> [args...]
|
|
@@ -42,14 +43,20 @@ Options:
|
|
|
42
43
|
--dead-air-timeout <m> Minutes of zero CPU + zero output before kill (default: 5, 0=disabled)
|
|
43
44
|
--max-nudges <n> Max stdin nudge attempts before kill on stall (default: 3)
|
|
44
45
|
--log <file> Write diagnostics log to file
|
|
46
|
+
--log-level <level> Minimum log level: debug, info, warn, error (default: info)
|
|
47
|
+
--log-json Emit logs as JSON lines (for machine parsing)
|
|
45
48
|
--state <file> Write state JSON on exit (budget used, reason, etc.)
|
|
46
49
|
--cooldown <seconds> Grace period after SIGTERM before SIGKILL (default: 5)
|
|
47
50
|
--abort-file <path> Poll this file for external abort signal
|
|
48
51
|
--poll-interval <ms> Base poll interval in ms for abort file (default: 1000)
|
|
52
|
+
--restart Restart the child process when it exits (default: off)
|
|
53
|
+
--max-restarts <n> Max restart attempts, 0=unlimited (default: 0)
|
|
54
|
+
--restart-delay <secs> Delay between restarts with ±30% jitter (default: 5)
|
|
49
55
|
|
|
50
56
|
Examples:
|
|
51
57
|
niki --budget 500000 -- claude -p "your prompt" --verbose
|
|
52
|
-
niki --timeout 1800 --max-sends 5 -- claude -p "..." --model sonnet --verbose
|
|
58
|
+
niki --timeout 1800 --max-sends 5 -- claude -p "..." --model sonnet --verbose
|
|
59
|
+
niki --restart --max-restarts 10 -- gro --model gpt-5.2 "your prompt"`);
|
|
53
60
|
process.exit(1);
|
|
54
61
|
}
|
|
55
62
|
|
|
@@ -69,10 +76,15 @@ const { values: opts } = parseArgs({
|
|
|
69
76
|
'dead-air-timeout': { type: 'string', default: '5' },
|
|
70
77
|
'max-nudges': { type: 'string', default: '3' },
|
|
71
78
|
log: { type: 'string' },
|
|
79
|
+
'log-level': { type: 'string', default: 'info' },
|
|
80
|
+
'log-json': { type: 'boolean', default: false },
|
|
72
81
|
state: { type: 'string' },
|
|
73
82
|
cooldown: { type: 'string', default: '5' },
|
|
74
83
|
'abort-file': { type: 'string' },
|
|
75
84
|
'poll-interval': { type: 'string', default: '1000' },
|
|
85
|
+
restart: { type: 'boolean', default: false },
|
|
86
|
+
'max-restarts': { type: 'string', default: '0' },
|
|
87
|
+
'restart-delay': { type: 'string', default: '5' },
|
|
76
88
|
},
|
|
77
89
|
});
|
|
78
90
|
|
|
@@ -89,7 +101,15 @@ const ABORT_FILE = opts['abort-file'] ? resolve(opts['abort-file']) : null;
|
|
|
89
101
|
const POLL_INTERVAL = parseInt(opts['poll-interval'], 10);
|
|
90
102
|
const LOG_FILE = opts.log;
|
|
91
103
|
const STATE_FILE = opts.state;
|
|
104
|
+
const RESTART = opts.restart;
|
|
105
|
+
const MAX_RESTARTS = parseInt(opts['max-restarts'], 10);
|
|
106
|
+
const RESTART_DELAY_S = parseFloat(opts['restart-delay']);
|
|
107
|
+
const LOG_JSON = opts['log-json'];
|
|
92
108
|
|
|
109
|
+
// --- Log levels ---
|
|
110
|
+
|
|
111
|
+
const LOG_LEVELS = { debug: 0, info: 1, warn: 2, error: 3 };
|
|
112
|
+
const LOG_LEVEL = LOG_LEVELS[opts['log-level']] ?? LOG_LEVELS.info;
|
|
93
113
|
// --- State ---
|
|
94
114
|
|
|
95
115
|
const state = {
|
|
@@ -109,12 +129,17 @@ const state = {
|
|
|
109
129
|
stallEvents: 0,
|
|
110
130
|
nudges: 0,
|
|
111
131
|
deadAirChecks: 0,
|
|
132
|
+
restarts: 0,
|
|
112
133
|
};
|
|
113
134
|
|
|
114
135
|
// Sliding window for per-minute rate limiting
|
|
115
136
|
const toolCallTimestamps = [];
|
|
116
137
|
const sendCallTimestamps = [];
|
|
117
138
|
|
|
139
|
+
// Budget threshold tracking — warn once at each level
|
|
140
|
+
const BUDGET_THRESHOLDS = [0.5, 0.75, 0.9];
|
|
141
|
+
const budgetWarned = new Set();
|
|
142
|
+
|
|
118
143
|
// --- Logging ---
|
|
119
144
|
|
|
120
145
|
let logStream = null;
|
|
@@ -123,11 +148,24 @@ if (LOG_FILE) {
|
|
|
123
148
|
logStream = createWriteStream(resolve(LOG_FILE), { flags: 'a' });
|
|
124
149
|
}
|
|
125
150
|
|
|
126
|
-
function log(msg) {
|
|
127
|
-
const
|
|
128
|
-
if (
|
|
129
|
-
|
|
130
|
-
|
|
151
|
+
function log(msg, level = 'info', fields = null) {
|
|
152
|
+
const numLevel = LOG_LEVELS[level] ?? LOG_LEVELS.info;
|
|
153
|
+
if (numLevel < LOG_LEVEL) return;
|
|
154
|
+
|
|
155
|
+
const ts = new Date().toISOString();
|
|
156
|
+
|
|
157
|
+
if (LOG_JSON) {
|
|
158
|
+
const entry = { ts, level, msg, ...state };
|
|
159
|
+
if (fields) Object.assign(entry, fields);
|
|
160
|
+
const json = JSON.stringify(entry);
|
|
161
|
+
if (logStream) logStream.write(json + '\n');
|
|
162
|
+
process.stderr.write(json + '\n');
|
|
163
|
+
} else {
|
|
164
|
+
const prefix = level === 'info' ? '' : `[${level.toUpperCase()}] `;
|
|
165
|
+
const line = `[${ts}] ${prefix}${msg}`;
|
|
166
|
+
if (logStream) logStream.write(line + '\n');
|
|
167
|
+
process.stderr.write(`[niki] ${line}\n`);
|
|
168
|
+
}
|
|
131
169
|
}
|
|
132
170
|
|
|
133
171
|
function writeState() {
|
|
@@ -141,6 +179,20 @@ function writeState() {
|
|
|
141
179
|
}
|
|
142
180
|
}
|
|
143
181
|
|
|
182
|
+
// --- Budget threshold warnings ---
|
|
183
|
+
|
|
184
|
+
function checkBudgetThresholds() {
|
|
185
|
+
if (BUDGET <= 0) return;
|
|
186
|
+
const pct = state.tokensTotal / BUDGET;
|
|
187
|
+
for (const threshold of BUDGET_THRESHOLDS) {
|
|
188
|
+
if (pct >= threshold && !budgetWarned.has(threshold)) {
|
|
189
|
+
budgetWarned.add(threshold);
|
|
190
|
+
const used = state.tokensTotal;
|
|
191
|
+
const remaining = BUDGET - used;
|
|
192
|
+
log(`Budget ${Math.round(threshold * 100)}% — ${used.toLocaleString()}/${BUDGET.toLocaleString()} tokens used, ${remaining.toLocaleString()} remaining`, 'warn');
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
144
196
|
// --- Token parsing from stderr ---
|
|
145
197
|
|
|
146
198
|
// Claude --verbose outputs token usage in stderr. Patterns vary by version.
|
|
@@ -163,6 +215,7 @@ const TOKEN_PATTERNS = [
|
|
|
163
215
|
];
|
|
164
216
|
|
|
165
217
|
function parseTokens(line) {
|
|
218
|
+
let changed = false;
|
|
166
219
|
for (const { regex, field } of TOKEN_PATTERNS) {
|
|
167
220
|
regex.lastIndex = 0;
|
|
168
221
|
let match;
|
|
@@ -170,30 +223,39 @@ function parseTokens(line) {
|
|
|
170
223
|
const count = parseInt(match[1], 10);
|
|
171
224
|
if (isNaN(count) || count <= 0) continue;
|
|
172
225
|
if (field === 'in') {
|
|
173
|
-
state.tokensIn
|
|
226
|
+
if (count > state.tokensIn) { state.tokensIn = count; changed = true; }
|
|
174
227
|
} else {
|
|
175
|
-
state.tokensOut
|
|
228
|
+
if (count > state.tokensOut) { state.tokensOut = count; changed = true; }
|
|
176
229
|
}
|
|
177
230
|
state.tokensTotal = state.tokensIn + state.tokensOut;
|
|
178
231
|
}
|
|
179
232
|
}
|
|
233
|
+
if (changed) {
|
|
234
|
+
log(`Tokens — in: ${state.tokensIn.toLocaleString()} out: ${state.tokensOut.toLocaleString()} total: ${state.tokensTotal.toLocaleString()}/${BUDGET.toLocaleString()} (${Math.round(state.tokensTotal / BUDGET * 100)}%)`, 'debug');
|
|
235
|
+
checkBudgetThresholds();
|
|
236
|
+
}
|
|
180
237
|
}
|
|
181
238
|
|
|
182
239
|
// --- Tool call detection from stderr ---
|
|
183
240
|
|
|
184
241
|
// Claude --verbose logs tool calls. We detect sends specifically.
|
|
185
|
-
const TOOL_CALL_PATTERN = /(?:Using tool|Tool call|tool_use)
|
|
242
|
+
const TOOL_CALL_PATTERN = /(?:Using tool|Tool call|tool_use)[:\s]*(\S+)/i;
|
|
186
243
|
const SEND_PATTERN = /agentchat_send/i;
|
|
187
244
|
|
|
188
245
|
function parseToolCall(line) {
|
|
189
|
-
|
|
246
|
+
const toolMatch = line.match(TOOL_CALL_PATTERN);
|
|
247
|
+
if (toolMatch) {
|
|
190
248
|
const now = Date.now();
|
|
249
|
+
const toolName = toolMatch[1] || 'unknown';
|
|
191
250
|
state.toolCalls++;
|
|
192
251
|
toolCallTimestamps.push(now);
|
|
193
252
|
|
|
194
253
|
if (SEND_PATTERN.test(line)) {
|
|
195
254
|
state.sendCalls++;
|
|
196
255
|
sendCallTimestamps.push(now);
|
|
256
|
+
log(`Tool call #${state.toolCalls}: ${toolName} (send #${state.sendCalls}, ${state.sendCallsThisMinute + 1}/${MAX_SENDS}/min)`, 'info');
|
|
257
|
+
} else {
|
|
258
|
+
log(`Tool call #${state.toolCalls}: ${toolName} (${state.toolCallsThisMinute + 1}/${MAX_TOOL_CALLS}/min)`, 'debug');
|
|
197
259
|
}
|
|
198
260
|
}
|
|
199
261
|
}
|
|
@@ -214,6 +276,14 @@ function checkRateLimits() {
|
|
|
214
276
|
state.toolCallsThisMinute = toolCallTimestamps.length;
|
|
215
277
|
state.sendCallsThisMinute = sendCallTimestamps.length;
|
|
216
278
|
|
|
279
|
+
// Warn at 80% of rate limits
|
|
280
|
+
if (state.sendCallsThisMinute === Math.ceil(MAX_SENDS * 0.8)) {
|
|
281
|
+
log(`Rate warning — sends at ${state.sendCallsThisMinute}/${MAX_SENDS}/min (80% threshold)`, 'warn');
|
|
282
|
+
}
|
|
283
|
+
if (state.toolCallsThisMinute === Math.ceil(MAX_TOOL_CALLS * 0.8)) {
|
|
284
|
+
log(`Rate warning — tool calls at ${state.toolCallsThisMinute}/${MAX_TOOL_CALLS}/min (80% threshold)`, 'warn');
|
|
285
|
+
}
|
|
286
|
+
|
|
217
287
|
if (sendCallTimestamps.length > MAX_SENDS) {
|
|
218
288
|
return 'rate-sends';
|
|
219
289
|
}
|
|
@@ -222,7 +292,6 @@ function checkRateLimits() {
|
|
|
222
292
|
}
|
|
223
293
|
return null;
|
|
224
294
|
}
|
|
225
|
-
|
|
226
295
|
// --- Kill logic ---
|
|
227
296
|
|
|
228
297
|
let child = null;
|
|
@@ -232,7 +301,7 @@ function killChild(reason) {
|
|
|
232
301
|
if (killed || !child) return;
|
|
233
302
|
killed = true;
|
|
234
303
|
state.killedBy = reason;
|
|
235
|
-
log(`KILL — reason: ${reason} | tokens: ${state.tokensTotal}/${BUDGET} | sends: ${state.sendCallsThisMinute}/min | tools: ${state.toolCallsThisMinute}/min
|
|
304
|
+
log(`KILL — reason: ${reason} | tokens: ${state.tokensTotal}/${BUDGET} | sends: ${state.sendCallsThisMinute}/min | tools: ${state.toolCallsThisMinute}/min`, 'error');
|
|
236
305
|
|
|
237
306
|
child.kill('SIGTERM');
|
|
238
307
|
|
|
@@ -262,7 +331,7 @@ const PROMPT_PATTERNS = [
|
|
|
262
331
|
function checkForPrompts(text) {
|
|
263
332
|
for (const pattern of PROMPT_PATTERNS) {
|
|
264
333
|
if (pattern.test(text)) {
|
|
265
|
-
log(`PROMPT detected in stdout: ${text.trim().substring(0, 100)}
|
|
334
|
+
log(`PROMPT detected in stdout: ${text.trim().substring(0, 100)}`, 'warn');
|
|
266
335
|
state.stallEvents++;
|
|
267
336
|
// Close stdin to dismiss the prompt
|
|
268
337
|
closeStdin();
|
|
@@ -329,6 +398,13 @@ function hasConsumedCpu(pid) {
|
|
|
329
398
|
return cpuMs > prev;
|
|
330
399
|
}
|
|
331
400
|
|
|
401
|
+
// --- Jitter utility ---
|
|
402
|
+
|
|
403
|
+
function jitteredDelay(base) {
|
|
404
|
+
// ±30% jitter
|
|
405
|
+
const jitter = base * 0.3;
|
|
406
|
+
return base + (Math.random() * 2 * jitter - jitter);
|
|
407
|
+
}
|
|
332
408
|
// --- Dead air detection ---
|
|
333
409
|
|
|
334
410
|
let deadAirStart = null; // Timestamp when dead air began (null = not in dead air)
|
|
@@ -344,7 +420,7 @@ function checkDeadAir() {
|
|
|
344
420
|
if (cpuActive) {
|
|
345
421
|
// Process is working — reset dead air, let it cook
|
|
346
422
|
if (deadAirStart) {
|
|
347
|
-
log(`Dead air cleared — CPU active after ${Math.round((Date.now() - deadAirStart) / 1000)}s of silence
|
|
423
|
+
log(`Dead air cleared — CPU active after ${Math.round((Date.now() - deadAirStart) / 1000)}s of silence`, 'info');
|
|
348
424
|
deadAirStart = null;
|
|
349
425
|
}
|
|
350
426
|
return;
|
|
@@ -353,17 +429,17 @@ function checkDeadAir() {
|
|
|
353
429
|
// Zero CPU + zero output
|
|
354
430
|
if (!deadAirStart) {
|
|
355
431
|
deadAirStart = Date.now();
|
|
356
|
-
log(`Dead air started — zero CPU, ${silenceSec}s silence
|
|
432
|
+
log(`Dead air started — zero CPU, ${silenceSec}s silence`, 'warn');
|
|
357
433
|
}
|
|
358
434
|
|
|
359
435
|
const deadAirMin = (Date.now() - deadAirStart) / 60_000;
|
|
360
436
|
if (deadAirMin >= DEAD_AIR_TIMEOUT_M) {
|
|
361
|
-
log(`DEAD AIR — zero CPU + zero output for ${Math.round(deadAirMin)}min (threshold: ${DEAD_AIR_TIMEOUT_M}min)
|
|
437
|
+
log(`DEAD AIR — zero CPU + zero output for ${Math.round(deadAirMin)}min (threshold: ${DEAD_AIR_TIMEOUT_M}min)`, 'error');
|
|
362
438
|
killChild('dead-air');
|
|
363
439
|
return;
|
|
364
440
|
}
|
|
365
441
|
|
|
366
|
-
log(`Dead air check — zero CPU, ${Math.round(deadAirMin * 10) / 10}/${DEAD_AIR_TIMEOUT_M}min, ${silenceSec}s silence
|
|
442
|
+
log(`Dead air check — zero CPU, ${Math.round(deadAirMin * 10) / 10}/${DEAD_AIR_TIMEOUT_M}min, ${silenceSec}s silence`, 'debug');
|
|
367
443
|
}
|
|
368
444
|
|
|
369
445
|
// Poll interval: min(30s, threshold/3) — fast polls for short thresholds, 30s cap for production
|
|
@@ -391,12 +467,12 @@ function onChildOutput() {
|
|
|
391
467
|
lastOutputTime = Date.now();
|
|
392
468
|
// Reset dead air — got real output
|
|
393
469
|
if (deadAirStart) {
|
|
394
|
-
log(`Dead air cleared — received output after ${Math.round((Date.now() - deadAirStart) / 1000)}s
|
|
470
|
+
log(`Dead air cleared — received output after ${Math.round((Date.now() - deadAirStart) / 1000)}s`, 'info');
|
|
395
471
|
deadAirStart = null;
|
|
396
472
|
}
|
|
397
473
|
if (!gotFirstOutput) {
|
|
398
474
|
gotFirstOutput = true;
|
|
399
|
-
log(`First output received after ${Math.round((Date.now() - new Date(state.startedAt).getTime()) / 1000)}s — switching to stall-timeout=${STALL_TIMEOUT_S}s
|
|
475
|
+
log(`First output received after ${Math.round((Date.now() - new Date(state.startedAt).getTime()) / 1000)}s — switching to stall-timeout=${STALL_TIMEOUT_S}s`, 'info');
|
|
400
476
|
}
|
|
401
477
|
resetStallTimer();
|
|
402
478
|
}
|
|
@@ -418,14 +494,14 @@ function closeStdin() {
|
|
|
418
494
|
if (stdinClosed || !child) return;
|
|
419
495
|
stdinClosed = true;
|
|
420
496
|
try { child.stdin.end(); } catch { /* already closed */ }
|
|
421
|
-
log('Stdin: closed (EOF)');
|
|
497
|
+
log('Stdin: closed (EOF)', 'debug');
|
|
422
498
|
}
|
|
423
499
|
|
|
424
500
|
function onStallDetected() {
|
|
425
501
|
if (killed) return;
|
|
426
502
|
state.stallEvents++;
|
|
427
503
|
const silence = Math.round((Date.now() - lastOutputTime) / 1000);
|
|
428
|
-
log(`STALL — no output for ${silence}s (nudges: ${nudgeCount}/${MAX_NUDGES})
|
|
504
|
+
log(`STALL — no output for ${silence}s (nudges: ${nudgeCount}/${MAX_NUDGES})`, 'warn');
|
|
429
505
|
|
|
430
506
|
// Escalation: close stdin → nudge → check CPU → kill
|
|
431
507
|
if (!stdinClosed) {
|
|
@@ -437,7 +513,7 @@ function onStallDetected() {
|
|
|
437
513
|
if (nudgeCount < MAX_NUDGES && !child.stdin.writableEnded) {
|
|
438
514
|
nudgeCount++;
|
|
439
515
|
state.nudges = nudgeCount;
|
|
440
|
-
log(`Stall nudge #${nudgeCount}
|
|
516
|
+
log(`Stall nudge #${nudgeCount}`, 'info');
|
|
441
517
|
resetStallTimer();
|
|
442
518
|
return;
|
|
443
519
|
}
|
|
@@ -447,7 +523,7 @@ function onStallDetected() {
|
|
|
447
523
|
if (DEAD_AIR_TIMEOUT_M > 0 && child) {
|
|
448
524
|
const cpuActive = hasConsumedCpu(child.pid);
|
|
449
525
|
if (cpuActive) {
|
|
450
|
-
log(`Stall deferred — process has CPU activity, deferring to dead-air detection
|
|
526
|
+
log(`Stall deferred — process has CPU activity, deferring to dead-air detection`, 'info');
|
|
451
527
|
resetStallTimer();
|
|
452
528
|
return;
|
|
453
529
|
}
|
|
@@ -455,137 +531,195 @@ function onStallDetected() {
|
|
|
455
531
|
|
|
456
532
|
killChild('stall');
|
|
457
533
|
}
|
|
534
|
+
// --- Restart logic ---
|
|
535
|
+
|
|
536
|
+
// Reasons that should NOT trigger a restart (hard limits / operator intent)
|
|
537
|
+
const NO_RESTART_REASONS = new Set(['budget', 'rate-sends', 'rate-tools', 'abort']);
|
|
538
|
+
let nikiTerminated = false; // Set when niki itself receives SIGTERM/SIGINT
|
|
539
|
+
|
|
540
|
+
function shouldRestart(code, signal) {
|
|
541
|
+
if (!RESTART) return false;
|
|
542
|
+
if (nikiTerminated) return false;
|
|
543
|
+
if (state.killedBy && NO_RESTART_REASONS.has(state.killedBy)) return false;
|
|
544
|
+
if (MAX_RESTARTS > 0 && state.restarts >= MAX_RESTARTS) return false;
|
|
545
|
+
return true;
|
|
546
|
+
}
|
|
458
547
|
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
548
|
+
function resetPerRunState() {
|
|
549
|
+
// Reset per-run flags but keep cumulative counters (tokens, toolCalls, etc.)
|
|
550
|
+
killed = false;
|
|
551
|
+
state.killedBy = null;
|
|
552
|
+
state.exitCode = null;
|
|
553
|
+
state.exitSignal = null;
|
|
554
|
+
state.gotFirstOutput = undefined;
|
|
555
|
+
|
|
556
|
+
// Reset stall/dead-air detection
|
|
557
|
+
nudgeCount = 0;
|
|
558
|
+
stdinClosed = false;
|
|
559
|
+
lastOutputTime = Date.now();
|
|
560
|
+
gotFirstOutput = false;
|
|
561
|
+
lastCpuMs = null;
|
|
562
|
+
deadAirStart = null;
|
|
563
|
+
|
|
564
|
+
// Clear rate limit windows (fresh session)
|
|
565
|
+
toolCallTimestamps.length = 0;
|
|
566
|
+
sendCallTimestamps.length = 0;
|
|
567
|
+
state.toolCallsThisMinute = 0;
|
|
568
|
+
state.sendCallsThisMinute = 0;
|
|
569
|
+
|
|
570
|
+
// Reset budget warnings for fresh run
|
|
571
|
+
budgetWarned.clear();
|
|
572
|
+
}
|
|
468
573
|
|
|
469
|
-
|
|
574
|
+
// --- Abort file polling ---
|
|
470
575
|
|
|
471
|
-
|
|
472
|
-
// This prevents blocking on trust prompts, permission prompts, or stdin reads.
|
|
473
|
-
closeStdin();
|
|
576
|
+
let abortPollId = null;
|
|
474
577
|
|
|
475
|
-
|
|
578
|
+
function scheduleAbortPoll() {
|
|
579
|
+
if (!ABORT_FILE || killed) return;
|
|
580
|
+
abortPollId = setTimeout(() => {
|
|
581
|
+
if (killed) return;
|
|
582
|
+
if (existsSync(ABORT_FILE)) {
|
|
583
|
+
log(`Abort file detected: ${ABORT_FILE}`, 'warn');
|
|
584
|
+
killChild('abort');
|
|
585
|
+
return;
|
|
586
|
+
}
|
|
587
|
+
scheduleAbortPoll();
|
|
588
|
+
}, jitteredDelay(POLL_INTERVAL));
|
|
589
|
+
}
|
|
476
590
|
|
|
477
|
-
|
|
478
|
-
// Forward to our stdout (preserves runner's | tee pipeline)
|
|
479
|
-
process.stdout.write(chunk);
|
|
480
|
-
onChildOutput();
|
|
481
|
-
checkForPrompts(chunk.toString());
|
|
482
|
-
});
|
|
591
|
+
// --- Spawn and monitor child ---
|
|
483
592
|
|
|
484
|
-
|
|
593
|
+
let timeoutId = null;
|
|
485
594
|
|
|
486
|
-
|
|
595
|
+
function startChild() {
|
|
596
|
+
log(`Starting: ${childCmd} ${childArgs.join(' ').substring(0, 100)}...`, 'info');
|
|
597
|
+
log(`Budget: ${BUDGET.toLocaleString()} tokens | Timeout: ${TIMEOUT_S}s | Startup: ${STARTUP_TIMEOUT_S}s | Stall: ${STALL_TIMEOUT_S}s | Dead air: ${DEAD_AIR_TIMEOUT_M}min | Max sends: ${MAX_SENDS}/min | Max tools: ${MAX_TOOL_CALLS}/min`, 'info');
|
|
598
|
+
if (RESTART) {
|
|
599
|
+
log(`Restart: enabled | max: ${MAX_RESTARTS || 'unlimited'} | delay: ${RESTART_DELAY_S}s ±30% | restarts so far: ${state.restarts}`, 'info');
|
|
600
|
+
}
|
|
487
601
|
|
|
488
|
-
child
|
|
489
|
-
|
|
602
|
+
child = spawn(childCmd, childArgs, {
|
|
603
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
604
|
+
env: process.env,
|
|
605
|
+
});
|
|
490
606
|
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
onChildOutput();
|
|
607
|
+
state.pid = child.pid;
|
|
608
|
+
log(`Child PID: ${child.pid}`, 'debug');
|
|
494
609
|
|
|
495
|
-
//
|
|
496
|
-
|
|
497
|
-
const lines = stderrBuffer.split('\n');
|
|
498
|
-
stderrBuffer = lines.pop(); // Keep incomplete last line in buffer
|
|
610
|
+
// Close stdin immediately — claude -p should never need interactive input.
|
|
611
|
+
closeStdin();
|
|
499
612
|
|
|
500
|
-
|
|
501
|
-
parseTokens(line);
|
|
502
|
-
parseToolCall(line);
|
|
613
|
+
// --- Monitor stdout ---
|
|
503
614
|
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
615
|
+
child.stdout.on('data', (chunk) => {
|
|
616
|
+
process.stdout.write(chunk);
|
|
617
|
+
onChildOutput();
|
|
618
|
+
checkForPrompts(chunk.toString());
|
|
619
|
+
});
|
|
509
620
|
|
|
510
|
-
|
|
511
|
-
const rateViolation = checkRateLimits();
|
|
512
|
-
if (rateViolation) {
|
|
513
|
-
killChild(rateViolation);
|
|
514
|
-
return;
|
|
515
|
-
}
|
|
516
|
-
}
|
|
517
|
-
});
|
|
621
|
+
// --- Monitor stderr ---
|
|
518
622
|
|
|
519
|
-
|
|
520
|
-
if (STALL_TIMEOUT_S > 0 || STARTUP_TIMEOUT_S > 0) {
|
|
521
|
-
log(`Stall detection: startup-timeout=${STARTUP_TIMEOUT_S}s, stall-timeout=${STALL_TIMEOUT_S}s, max-nudges=${MAX_NUDGES}`);
|
|
522
|
-
resetStallTimer();
|
|
523
|
-
}
|
|
623
|
+
let stderrBuffer = '';
|
|
524
624
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
}
|
|
625
|
+
child.stderr.on('data', (chunk) => {
|
|
626
|
+
const text = chunk.toString();
|
|
627
|
+
process.stderr.write(chunk);
|
|
628
|
+
onChildOutput();
|
|
530
629
|
|
|
531
|
-
|
|
630
|
+
stderrBuffer += text;
|
|
631
|
+
const lines = stderrBuffer.split('\n');
|
|
632
|
+
stderrBuffer = lines.pop();
|
|
532
633
|
|
|
533
|
-
|
|
634
|
+
for (const line of lines) {
|
|
635
|
+
parseTokens(line);
|
|
636
|
+
parseToolCall(line);
|
|
534
637
|
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
}
|
|
638
|
+
if (state.tokensTotal > BUDGET) {
|
|
639
|
+
killChild('budget');
|
|
640
|
+
return;
|
|
641
|
+
}
|
|
540
642
|
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
log(`Abort file detected: ${ABORT_FILE}`);
|
|
547
|
-
killChild('abort');
|
|
548
|
-
return;
|
|
643
|
+
const rateViolation = checkRateLimits();
|
|
644
|
+
if (rateViolation) {
|
|
645
|
+
killChild(rateViolation);
|
|
646
|
+
return;
|
|
647
|
+
}
|
|
549
648
|
}
|
|
550
|
-
|
|
551
|
-
}, jitteredDelay(POLL_INTERVAL));
|
|
552
|
-
}
|
|
553
|
-
|
|
554
|
-
if (ABORT_FILE) {
|
|
555
|
-
log(`Abort file: ${ABORT_FILE} (poll: ${POLL_INTERVAL}ms ±30% jitter)`);
|
|
556
|
-
scheduleAbortPoll();
|
|
557
|
-
}
|
|
649
|
+
});
|
|
558
650
|
|
|
559
|
-
//
|
|
651
|
+
// Start stall detection
|
|
652
|
+
if (STALL_TIMEOUT_S > 0 || STARTUP_TIMEOUT_S > 0) {
|
|
653
|
+
log(`Stall detection: startup-timeout=${STARTUP_TIMEOUT_S}s, stall-timeout=${STALL_TIMEOUT_S}s, max-nudges=${MAX_NUDGES}`, 'info');
|
|
654
|
+
resetStallTimer();
|
|
655
|
+
}
|
|
560
656
|
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
},
|
|
657
|
+
// Start dead air detection
|
|
658
|
+
if (DEAD_AIR_TIMEOUT_M > 0) {
|
|
659
|
+
log(`Dead air detection: ${DEAD_AIR_TIMEOUT_M}min threshold, ${Math.round(DEAD_AIR_POLL_MS / 1000)}s poll interval`, 'info');
|
|
660
|
+
scheduleDeadAirPoll();
|
|
661
|
+
}
|
|
564
662
|
|
|
565
|
-
//
|
|
663
|
+
// Abort file polling
|
|
664
|
+
if (ABORT_FILE) {
|
|
665
|
+
log(`Abort file: ${ABORT_FILE} (poll: ${POLL_INTERVAL}ms ±30% jitter)`, 'info');
|
|
666
|
+
scheduleAbortPoll();
|
|
667
|
+
}
|
|
566
668
|
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
669
|
+
// Per-run timeout
|
|
670
|
+
timeoutId = setTimeout(() => {
|
|
671
|
+
killChild('timeout');
|
|
672
|
+
}, TIMEOUT_S * 1000);
|
|
673
|
+
|
|
674
|
+
// --- Exit handler ---
|
|
675
|
+
|
|
676
|
+
child.on('exit', (code, signal) => {
|
|
677
|
+
clearTimeout(timeoutId);
|
|
678
|
+
if (stallTimer) clearTimeout(stallTimer);
|
|
679
|
+
if (abortPollId) clearTimeout(abortPollId);
|
|
680
|
+
if (deadAirPollId) clearTimeout(deadAirPollId);
|
|
681
|
+
|
|
682
|
+
state.exitCode = code;
|
|
683
|
+
state.exitSignal = signal;
|
|
684
|
+
state.duration = Math.round((Date.now() - new Date(state.startedAt).getTime()) / 1000);
|
|
685
|
+
state.gotFirstOutput = gotFirstOutput;
|
|
686
|
+
|
|
687
|
+
const level = state.killedBy ? 'error' : (code === 0 ? 'info' : 'warn');
|
|
688
|
+
log(`Exit — code: ${code} signal: ${signal} | tokens: ${state.tokensTotal.toLocaleString()} | tools: ${state.toolCalls} | sends: ${state.sendCalls} | duration: ${state.duration}s | output: ${gotFirstOutput}${state.killedBy ? ` | killed: ${state.killedBy}` : ''} | restarts: ${state.restarts}`, level);
|
|
689
|
+
writeState();
|
|
690
|
+
|
|
691
|
+
if (shouldRestart(code, signal)) {
|
|
692
|
+
state.restarts++;
|
|
693
|
+
const delay = jitteredDelay(RESTART_DELAY_S * 1000);
|
|
694
|
+
log(`RESTART — attempt ${state.restarts}${MAX_RESTARTS > 0 ? `/${MAX_RESTARTS}` : ''} in ${Math.round(delay / 1000)}s`, 'warn');
|
|
695
|
+
resetPerRunState();
|
|
696
|
+
setTimeout(() => {
|
|
697
|
+
startChild();
|
|
698
|
+
}, delay);
|
|
699
|
+
} else {
|
|
700
|
+
if (RESTART && !shouldRestart(code, signal)) {
|
|
701
|
+
const reason = nikiTerminated ? 'niki received signal' :
|
|
702
|
+
(state.killedBy && NO_RESTART_REASONS.has(state.killedBy)) ? `hard kill (${state.killedBy})` :
|
|
703
|
+
(MAX_RESTARTS > 0 && state.restarts >= MAX_RESTARTS) ? `max restarts reached (${MAX_RESTARTS})` :
|
|
704
|
+
'restart not enabled';
|
|
705
|
+
log(`NOT RESTARTING — ${reason}`, 'warn');
|
|
706
|
+
}
|
|
707
|
+
if (logStream) logStream.end();
|
|
708
|
+
process.exit(code ?? 1);
|
|
709
|
+
}
|
|
710
|
+
});
|
|
711
|
+
}
|
|
583
712
|
|
|
584
713
|
// --- Signal forwarding ---
|
|
585
714
|
|
|
586
715
|
for (const sig of ['SIGINT', 'SIGTERM']) {
|
|
587
716
|
process.on(sig, () => {
|
|
588
|
-
log(`Received ${sig}, forwarding to child
|
|
717
|
+
log(`Received ${sig}, forwarding to child`, 'warn');
|
|
718
|
+
nikiTerminated = true;
|
|
589
719
|
if (child) child.kill(sig);
|
|
590
720
|
});
|
|
591
721
|
}
|
|
722
|
+
|
|
723
|
+
// --- Start ---
|
|
724
|
+
|
|
725
|
+
startChild();
|
package/bin/niki.bak
ADDED
|
@@ -0,0 +1,665 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* niki — Deterministic process supervisor for AI agents.
|
|
5
|
+
*
|
|
6
|
+
* Wraps a child command (e.g. `claude -p`) and enforces:
|
|
7
|
+
* - Token budget (kill if exceeded)
|
|
8
|
+
* - Wall-clock timeout (kill if exceeded)
|
|
9
|
+
* - Tool-call rate limiting (kill if agent floods)
|
|
10
|
+
* - Diagnostics logging
|
|
11
|
+
* - Automatic restart on exit (optional)
|
|
12
|
+
*
|
|
13
|
+
* Usage:
|
|
14
|
+
* niki [options] -- <command> [args...]
|
|
15
|
+
* niki --budget 500000 --timeout 3600 -- claude -p "..." --verbose
|
|
16
|
+
*
|
|
17
|
+
* Security:
|
|
18
|
+
* - Never logs or exposes API tokens
|
|
19
|
+
* - Inherits env from parent (tokens stay in env, never in CLI args)
|
|
20
|
+
* - Diagnostics only contain counters, never message content
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { spawn, execSync } from 'node:child_process';
|
|
24
|
+
import { createWriteStream, writeFileSync, mkdirSync, existsSync, readFileSync } from 'node:fs';
|
|
25
|
+
import { dirname, resolve } from 'node:path';
|
|
26
|
+
import { parseArgs } from 'node:util';
|
|
27
|
+
|
|
28
|
+
// --- Argument parsing ---
|
|
29
|
+
|
|
30
|
+
const SEPARATOR = process.argv.indexOf('--');
|
|
31
|
+
if (SEPARATOR === -1 || SEPARATOR === process.argv.length - 1) {
|
|
32
|
+
console.error(`niki — deterministic agent supervisor
|
|
33
|
+
|
|
34
|
+
Usage: niki [options] -- <command> [args...]
|
|
35
|
+
|
|
36
|
+
Options:
|
|
37
|
+
--budget <tokens> Max total tokens (input+output) before SIGTERM (default: 1000000)
|
|
38
|
+
--timeout <seconds> Max wall-clock runtime before SIGTERM (default: 3600)
|
|
39
|
+
--max-sends <n> Max agentchat_send calls per minute (default: 10)
|
|
40
|
+
--max-tool-calls <n> Max total tool calls per minute (default: 30)
|
|
41
|
+
--stall-timeout <secs> Kill after N seconds of no output (default: 60, 0=disabled)
|
|
42
|
+
--startup-timeout <s> Longer stall timeout until first output (default: 180, 0=use stall-timeout)
|
|
43
|
+
--dead-air-timeout <m> Minutes of zero CPU + zero output before kill (default: 5, 0=disabled)
|
|
44
|
+
--max-nudges <n> Max stdin nudge attempts before kill on stall (default: 3)
|
|
45
|
+
--log <file> Write diagnostics log to file
|
|
46
|
+
--state <file> Write state JSON on exit (budget used, reason, etc.)
|
|
47
|
+
--cooldown <seconds> Grace period after SIGTERM before SIGKILL (default: 5)
|
|
48
|
+
--abort-file <path> Poll this file for external abort signal
|
|
49
|
+
--poll-interval <ms> Base poll interval in ms for abort file (default: 1000)
|
|
50
|
+
--restart Restart the child process when it exits (default: off)
|
|
51
|
+
--max-restarts <n> Max restart attempts, 0=unlimited (default: 0)
|
|
52
|
+
--restart-delay <secs> Delay between restarts with ±30% jitter (default: 5)
|
|
53
|
+
|
|
54
|
+
Examples:
|
|
55
|
+
niki --budget 500000 -- claude -p "your prompt" --verbose
|
|
56
|
+
niki --timeout 1800 --max-sends 5 -- claude -p "..." --model sonnet --verbose
|
|
57
|
+
niki --restart --max-restarts 10 -- gro --model gpt-5.2 "your prompt"`);
|
|
58
|
+
process.exit(1);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const nikiArgs = process.argv.slice(2, SEPARATOR);
|
|
62
|
+
const childCmd = process.argv[SEPARATOR + 1];
|
|
63
|
+
const childArgs = process.argv.slice(SEPARATOR + 2);
|
|
64
|
+
|
|
65
|
+
const { values: opts } = parseArgs({
|
|
66
|
+
args: nikiArgs,
|
|
67
|
+
options: {
|
|
68
|
+
budget: { type: 'string', default: '1000000' },
|
|
69
|
+
timeout: { type: 'string', default: '3600' },
|
|
70
|
+
'max-sends': { type: 'string', default: '10' },
|
|
71
|
+
'max-tool-calls': { type: 'string', default: '30' },
|
|
72
|
+
'stall-timeout': { type: 'string', default: '60' },
|
|
73
|
+
'startup-timeout': { type: 'string', default: '180' },
|
|
74
|
+
'dead-air-timeout': { type: 'string', default: '5' },
|
|
75
|
+
'max-nudges': { type: 'string', default: '3' },
|
|
76
|
+
log: { type: 'string' },
|
|
77
|
+
state: { type: 'string' },
|
|
78
|
+
cooldown: { type: 'string', default: '5' },
|
|
79
|
+
'abort-file': { type: 'string' },
|
|
80
|
+
'poll-interval': { type: 'string', default: '1000' },
|
|
81
|
+
restart: { type: 'boolean', default: false },
|
|
82
|
+
'max-restarts': { type: 'string', default: '0' },
|
|
83
|
+
'restart-delay': { type: 'string', default: '5' },
|
|
84
|
+
},
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
const BUDGET = parseInt(opts.budget, 10);
|
|
88
|
+
const TIMEOUT_S = parseInt(opts.timeout, 10);
|
|
89
|
+
const MAX_SENDS = parseInt(opts['max-sends'], 10);
|
|
90
|
+
const MAX_TOOL_CALLS = parseInt(opts['max-tool-calls'], 10);
|
|
91
|
+
const STALL_TIMEOUT_S = parseInt(opts['stall-timeout'], 10);
|
|
92
|
+
const STARTUP_TIMEOUT_S = parseInt(opts['startup-timeout'], 10);
|
|
93
|
+
const DEAD_AIR_TIMEOUT_M = parseFloat(opts['dead-air-timeout']);
|
|
94
|
+
const MAX_NUDGES = parseInt(opts['max-nudges'], 10);
|
|
95
|
+
const COOLDOWN_S = parseInt(opts.cooldown, 10);
|
|
96
|
+
const ABORT_FILE = opts['abort-file'] ? resolve(opts['abort-file']) : null;
|
|
97
|
+
const POLL_INTERVAL = parseInt(opts['poll-interval'], 10);
|
|
98
|
+
const LOG_FILE = opts.log;
|
|
99
|
+
const STATE_FILE = opts.state;
|
|
100
|
+
const RESTART = opts.restart;
|
|
101
|
+
const MAX_RESTARTS = parseInt(opts['max-restarts'], 10);
|
|
102
|
+
const RESTART_DELAY_S = parseFloat(opts['restart-delay']);
|
|
103
|
+
|
|
104
|
+
// --- State ---
|
|
105
|
+
|
|
106
|
+
const state = {
|
|
107
|
+
startedAt: new Date().toISOString(),
|
|
108
|
+
pid: null,
|
|
109
|
+
tokensIn: 0,
|
|
110
|
+
tokensOut: 0,
|
|
111
|
+
tokensTotal: 0,
|
|
112
|
+
toolCalls: 0,
|
|
113
|
+
sendCalls: 0,
|
|
114
|
+
toolCallsThisMinute: 0,
|
|
115
|
+
sendCallsThisMinute: 0,
|
|
116
|
+
exitCode: null,
|
|
117
|
+
exitSignal: null,
|
|
118
|
+
killedBy: null, // 'budget' | 'timeout' | 'rate-sends' | 'rate-tools' | 'abort' | 'stall' | 'dead-air' | null
|
|
119
|
+
duration: 0,
|
|
120
|
+
stallEvents: 0,
|
|
121
|
+
nudges: 0,
|
|
122
|
+
deadAirChecks: 0,
|
|
123
|
+
restarts: 0,
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
// Sliding window for per-minute rate limiting
|
|
127
|
+
const toolCallTimestamps = [];
|
|
128
|
+
const sendCallTimestamps = [];
|
|
129
|
+
|
|
130
|
+
// --- Logging ---
|
|
131
|
+
|
|
132
|
+
let logStream = null;
|
|
133
|
+
if (LOG_FILE) {
|
|
134
|
+
mkdirSync(dirname(resolve(LOG_FILE)), { recursive: true });
|
|
135
|
+
logStream = createWriteStream(resolve(LOG_FILE), { flags: 'a' });
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function log(msg) {
|
|
139
|
+
const line = `[${new Date().toISOString()}] ${msg}`;
|
|
140
|
+
if (logStream) logStream.write(line + '\n');
|
|
141
|
+
// Also write to stderr so supervisor can capture it
|
|
142
|
+
process.stderr.write(`[niki] ${line}\n`);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function writeState() {
|
|
146
|
+
if (!STATE_FILE) return;
|
|
147
|
+
try {
|
|
148
|
+
mkdirSync(dirname(resolve(STATE_FILE)), { recursive: true });
|
|
149
|
+
// Never include env, tokens, or message content — only counters
|
|
150
|
+
writeFileSync(resolve(STATE_FILE), JSON.stringify(state, null, 2) + '\n');
|
|
151
|
+
} catch {
|
|
152
|
+
// Best effort
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// --- Token parsing from stderr ---
|
|
157
|
+
|
|
158
|
+
// Claude --verbose outputs token usage in stderr. Patterns vary by version.
|
|
159
|
+
// We look for common patterns and extract numbers.
|
|
160
|
+
//
|
|
161
|
+
// Known patterns:
|
|
162
|
+
// "input_tokens": 1234
|
|
163
|
+
// "output_tokens": 567
|
|
164
|
+
// tokens: { input: 1234, output: 567 }
|
|
165
|
+
// Input tokens: 1234
|
|
166
|
+
// Output tokens: 567
|
|
167
|
+
|
|
168
|
+
const TOKEN_PATTERNS = [
|
|
169
|
+
// JSON-style: "input_tokens": 1234
|
|
170
|
+
{ regex: /"input_tokens"\s*:\s*(\d+)/g, field: 'in' },
|
|
171
|
+
{ regex: /"output_tokens"\s*:\s*(\d+)/g, field: 'out' },
|
|
172
|
+
// Human-readable: Input tokens: 1234
|
|
173
|
+
{ regex: /Input tokens:\s*(\d+)/gi, field: 'in' },
|
|
174
|
+
{ regex: /Output tokens:\s*(\d+)/gi, field: 'out' },
|
|
175
|
+
];
|
|
176
|
+
|
|
177
|
+
function parseTokens(line) {
|
|
178
|
+
for (const { regex, field } of TOKEN_PATTERNS) {
|
|
179
|
+
regex.lastIndex = 0;
|
|
180
|
+
let match;
|
|
181
|
+
while ((match = regex.exec(line)) !== null) {
|
|
182
|
+
const count = parseInt(match[1], 10);
|
|
183
|
+
if (isNaN(count) || count <= 0) continue;
|
|
184
|
+
if (field === 'in') {
|
|
185
|
+
state.tokensIn = Math.max(state.tokensIn, count);
|
|
186
|
+
} else {
|
|
187
|
+
state.tokensOut = Math.max(state.tokensOut, count);
|
|
188
|
+
}
|
|
189
|
+
state.tokensTotal = state.tokensIn + state.tokensOut;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// --- Tool call detection from stderr ---
|
|
195
|
+
|
|
196
|
+
// Claude --verbose logs tool calls. We detect sends specifically.
|
|
197
|
+
const TOOL_CALL_PATTERN = /(?:Using tool|Tool call|tool_use).*?(\w+)/i;
|
|
198
|
+
const SEND_PATTERN = /agentchat_send/i;
|
|
199
|
+
|
|
200
|
+
function parseToolCall(line) {
|
|
201
|
+
if (TOOL_CALL_PATTERN.test(line)) {
|
|
202
|
+
const now = Date.now();
|
|
203
|
+
state.toolCalls++;
|
|
204
|
+
toolCallTimestamps.push(now);
|
|
205
|
+
|
|
206
|
+
if (SEND_PATTERN.test(line)) {
|
|
207
|
+
state.sendCalls++;
|
|
208
|
+
sendCallTimestamps.push(now);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// --- Rate limit checking ---
|
|
214
|
+
|
|
215
|
+
function pruneWindow(timestamps) {
|
|
216
|
+
const cutoff = Date.now() - 60_000; // 1 minute window
|
|
217
|
+
while (timestamps.length > 0 && timestamps[0] < cutoff) {
|
|
218
|
+
timestamps.shift();
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
function checkRateLimits() {
|
|
223
|
+
pruneWindow(toolCallTimestamps);
|
|
224
|
+
pruneWindow(sendCallTimestamps);
|
|
225
|
+
|
|
226
|
+
state.toolCallsThisMinute = toolCallTimestamps.length;
|
|
227
|
+
state.sendCallsThisMinute = sendCallTimestamps.length;
|
|
228
|
+
|
|
229
|
+
if (sendCallTimestamps.length > MAX_SENDS) {
|
|
230
|
+
return 'rate-sends';
|
|
231
|
+
}
|
|
232
|
+
if (toolCallTimestamps.length > MAX_TOOL_CALLS) {
|
|
233
|
+
return 'rate-tools';
|
|
234
|
+
}
|
|
235
|
+
return null;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// --- Kill logic ---
|
|
239
|
+
|
|
240
|
+
let child = null;
|
|
241
|
+
let killed = false;
|
|
242
|
+
|
|
243
|
+
function killChild(reason) {
|
|
244
|
+
if (killed || !child) return;
|
|
245
|
+
killed = true;
|
|
246
|
+
state.killedBy = reason;
|
|
247
|
+
log(`KILL — reason: ${reason} | tokens: ${state.tokensTotal}/${BUDGET} | sends: ${state.sendCallsThisMinute}/min | tools: ${state.toolCallsThisMinute}/min`);
|
|
248
|
+
|
|
249
|
+
child.kill('SIGTERM');
|
|
250
|
+
|
|
251
|
+
// Grace period, then SIGKILL
|
|
252
|
+
setTimeout(() => {
|
|
253
|
+
try {
|
|
254
|
+
child.kill('SIGKILL');
|
|
255
|
+
} catch {
|
|
256
|
+
// Already dead
|
|
257
|
+
}
|
|
258
|
+
}, COOLDOWN_S * 1000);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// --- Prompt pattern detection ---
|
|
262
|
+
|
|
263
|
+
const PROMPT_PATTERNS = [
|
|
264
|
+
/\(y\/n\)/i,
|
|
265
|
+
/\[Y\/n\]/i,
|
|
266
|
+
/\[y\/N\]/i,
|
|
267
|
+
/\(yes\/no\)/i,
|
|
268
|
+
/Do you want to trust/i,
|
|
269
|
+
/Do you want to allow/i,
|
|
270
|
+
/Press Enter to continue/i,
|
|
271
|
+
/Are you sure/i,
|
|
272
|
+
];
|
|
273
|
+
|
|
274
|
+
function checkForPrompts(text) {
|
|
275
|
+
for (const pattern of PROMPT_PATTERNS) {
|
|
276
|
+
if (pattern.test(text)) {
|
|
277
|
+
log(`PROMPT detected in stdout: ${text.trim().substring(0, 100)}`);
|
|
278
|
+
state.stallEvents++;
|
|
279
|
+
// Close stdin to dismiss the prompt
|
|
280
|
+
closeStdin();
|
|
281
|
+
return true;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
return false;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// --- CPU liveness sampling ---
|
|
288
|
+
|
|
289
|
+
// Reads cumulative CPU time (user+system) for a process tree.
|
|
290
|
+
// Returns total CPU milliseconds, or -1 if unavailable.
|
|
291
|
+
// On Linux: reads /proc/<pid>/stat (works in containers).
|
|
292
|
+
// On macOS: uses ps command as fallback.
|
|
293
|
+
let lastCpuMs = null; // null = no prior sample taken yet
|
|
294
|
+
|
|
295
|
+
function sampleCpuMs(pid) {
|
|
296
|
+
try {
|
|
297
|
+
// Linux: /proc/<pid>/stat fields 14 (utime) and 15 (stime) in clock ticks
|
|
298
|
+
const statPath = `/proc/${pid}/stat`;
|
|
299
|
+
if (existsSync(statPath)) {
|
|
300
|
+
const stat = readFileSync(statPath, 'utf8');
|
|
301
|
+
// Fields are space-separated, but comm (field 2) can contain spaces/parens.
|
|
302
|
+
// Find the closing paren, then split the rest.
|
|
303
|
+
const afterComm = stat.substring(stat.lastIndexOf(')') + 2);
|
|
304
|
+
const fields = afterComm.split(' ');
|
|
305
|
+
// fields[11] = utime (index 13 in original), fields[12] = stime (index 14)
|
|
306
|
+
const utime = parseInt(fields[11], 10) || 0;
|
|
307
|
+
const stime = parseInt(fields[12], 10) || 0;
|
|
308
|
+
// Also grab child times: fields[13] = cutime, fields[14] = cstime
|
|
309
|
+
const cutime = parseInt(fields[13], 10) || 0;
|
|
310
|
+
const cstime = parseInt(fields[14], 10) || 0;
|
|
311
|
+
// Convert clock ticks to ms (typically 100 ticks/sec on Linux)
|
|
312
|
+
const ticksPerSec = 100;
|
|
313
|
+
return ((utime + stime + cutime + cstime) / ticksPerSec) * 1000;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// macOS / fallback: use ps to get cumulative CPU time
|
|
317
|
+
const output = execSync(`ps -o cputime= -p ${pid} 2>/dev/null`, { encoding: 'utf8', timeout: 3000 }).trim();
|
|
318
|
+
if (!output) return -1;
|
|
319
|
+
// Format: HH:MM:SS or M:SS
|
|
320
|
+
const parts = output.split(':').map(Number);
|
|
321
|
+
if (parts.length === 3) return (parts[0] * 3600 + parts[1] * 60 + parts[2]) * 1000;
|
|
322
|
+
if (parts.length === 2) return (parts[0] * 60 + parts[1]) * 1000;
|
|
323
|
+
return -1;
|
|
324
|
+
} catch {
|
|
325
|
+
return -1;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// Returns true if the child process has consumed CPU since the last sample.
|
|
330
|
+
function hasConsumedCpu(pid) {
|
|
331
|
+
const cpuMs = sampleCpuMs(pid);
|
|
332
|
+
if (cpuMs < 0) return true; // Can't measure → assume alive (safe default)
|
|
333
|
+
|
|
334
|
+
const prev = lastCpuMs;
|
|
335
|
+
lastCpuMs = cpuMs;
|
|
336
|
+
|
|
337
|
+
// First sample — no delta yet, assume alive
|
|
338
|
+
if (prev === null) return true;
|
|
339
|
+
|
|
340
|
+
// If CPU time increased at all, process is doing work
|
|
341
|
+
return cpuMs > prev;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// --- Jitter utility ---
|
|
345
|
+
|
|
346
|
+
function jitteredDelay(base) {
|
|
347
|
+
// ±30% jitter
|
|
348
|
+
const jitter = base * 0.3;
|
|
349
|
+
return base + (Math.random() * 2 * jitter - jitter);
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// --- Dead air detection ---
|
|
353
|
+
|
|
354
|
+
let deadAirStart = null; // Timestamp when dead air began (null = not in dead air)
|
|
355
|
+
let deadAirPollId = null;
|
|
356
|
+
|
|
357
|
+
function checkDeadAir() {
|
|
358
|
+
if (killed || !child || DEAD_AIR_TIMEOUT_M <= 0) return;
|
|
359
|
+
|
|
360
|
+
state.deadAirChecks++;
|
|
361
|
+
const cpuActive = hasConsumedCpu(child.pid);
|
|
362
|
+
const silenceSec = Math.round((Date.now() - lastOutputTime) / 1000);
|
|
363
|
+
|
|
364
|
+
if (cpuActive) {
|
|
365
|
+
// Process is working — reset dead air, let it cook
|
|
366
|
+
if (deadAirStart) {
|
|
367
|
+
log(`Dead air cleared — CPU active after ${Math.round((Date.now() - deadAirStart) / 1000)}s of silence`);
|
|
368
|
+
deadAirStart = null;
|
|
369
|
+
}
|
|
370
|
+
return;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
// Zero CPU + zero output
|
|
374
|
+
if (!deadAirStart) {
|
|
375
|
+
deadAirStart = Date.now();
|
|
376
|
+
log(`Dead air started — zero CPU, ${silenceSec}s silence`);
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
const deadAirMin = (Date.now() - deadAirStart) / 60_000;
|
|
380
|
+
if (deadAirMin >= DEAD_AIR_TIMEOUT_M) {
|
|
381
|
+
log(`DEAD AIR — zero CPU + zero output for ${Math.round(deadAirMin)}min (threshold: ${DEAD_AIR_TIMEOUT_M}min)`);
|
|
382
|
+
killChild('dead-air');
|
|
383
|
+
return;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
log(`Dead air check — zero CPU, ${Math.round(deadAirMin * 10) / 10}/${DEAD_AIR_TIMEOUT_M}min, ${silenceSec}s silence`);
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// Poll interval: min(30s, threshold/3) — fast polls for short thresholds, 30s cap for production
|
|
390
|
+
const DEAD_AIR_POLL_MS = DEAD_AIR_TIMEOUT_M > 0
|
|
391
|
+
? Math.min(30_000, Math.max(2_000, (DEAD_AIR_TIMEOUT_M * 60_000) / 3))
|
|
392
|
+
: 30_000;
|
|
393
|
+
|
|
394
|
+
function scheduleDeadAirPoll() {
|
|
395
|
+
if (killed || DEAD_AIR_TIMEOUT_M <= 0) return;
|
|
396
|
+
deadAirPollId = setTimeout(() => {
|
|
397
|
+
checkDeadAir();
|
|
398
|
+
if (!killed) scheduleDeadAirPoll();
|
|
399
|
+
}, jitteredDelay(DEAD_AIR_POLL_MS));
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// --- Stall detection ---
|
|
403
|
+
|
|
404
|
+
let stallTimer = null;
|
|
405
|
+
let nudgeCount = 0;
|
|
406
|
+
let stdinClosed = false;
|
|
407
|
+
let lastOutputTime = Date.now();
|
|
408
|
+
let gotFirstOutput = false;
|
|
409
|
+
|
|
410
|
+
function onChildOutput() {
|
|
411
|
+
lastOutputTime = Date.now();
|
|
412
|
+
// Reset dead air — got real output
|
|
413
|
+
if (deadAirStart) {
|
|
414
|
+
log(`Dead air cleared — received output after ${Math.round((Date.now() - deadAirStart) / 1000)}s`);
|
|
415
|
+
deadAirStart = null;
|
|
416
|
+
}
|
|
417
|
+
if (!gotFirstOutput) {
|
|
418
|
+
gotFirstOutput = true;
|
|
419
|
+
log(`First output received after ${Math.round((Date.now() - new Date(state.startedAt).getTime()) / 1000)}s — switching to stall-timeout=${STALL_TIMEOUT_S}s`);
|
|
420
|
+
}
|
|
421
|
+
resetStallTimer();
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
function currentStallTimeout() {
|
|
425
|
+
// Use startup timeout until first output, then normal stall timeout
|
|
426
|
+
if (!gotFirstOutput && STARTUP_TIMEOUT_S > 0) return STARTUP_TIMEOUT_S;
|
|
427
|
+
return STALL_TIMEOUT_S;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
function resetStallTimer() {
|
|
431
|
+
if (stallTimer) clearTimeout(stallTimer);
|
|
432
|
+
const timeout = currentStallTimeout();
|
|
433
|
+
if (killed || timeout <= 0) return;
|
|
434
|
+
stallTimer = setTimeout(onStallDetected, timeout * 1000);
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
function closeStdin() {
|
|
438
|
+
if (stdinClosed || !child) return;
|
|
439
|
+
stdinClosed = true;
|
|
440
|
+
try { child.stdin.end(); } catch { /* already closed */ }
|
|
441
|
+
log('Stdin: closed (EOF)');
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
function onStallDetected() {
|
|
445
|
+
if (killed) return;
|
|
446
|
+
state.stallEvents++;
|
|
447
|
+
const silence = Math.round((Date.now() - lastOutputTime) / 1000);
|
|
448
|
+
log(`STALL — no output for ${silence}s (nudges: ${nudgeCount}/${MAX_NUDGES})`);
|
|
449
|
+
|
|
450
|
+
// Escalation: close stdin → nudge → check CPU → kill
|
|
451
|
+
if (!stdinClosed) {
|
|
452
|
+
closeStdin();
|
|
453
|
+
resetStallTimer();
|
|
454
|
+
return;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
if (nudgeCount < MAX_NUDGES && !child.stdin.writableEnded) {
|
|
458
|
+
nudgeCount++;
|
|
459
|
+
state.nudges = nudgeCount;
|
|
460
|
+
log(`Stall nudge #${nudgeCount}`);
|
|
461
|
+
resetStallTimer();
|
|
462
|
+
return;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// If dead air detection is enabled, defer kill to the dead air poller.
|
|
466
|
+
// Only stall-kill if we can confirm zero CPU, or if dead air is disabled.
|
|
467
|
+
if (DEAD_AIR_TIMEOUT_M > 0 && child) {
|
|
468
|
+
const cpuActive = hasConsumedCpu(child.pid);
|
|
469
|
+
if (cpuActive) {
|
|
470
|
+
log(`Stall deferred — process has CPU activity, deferring to dead-air detection`);
|
|
471
|
+
resetStallTimer();
|
|
472
|
+
return;
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
killChild('stall');
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
// --- Restart logic ---
|
|
480
|
+
|
|
481
|
+
// Reasons that should NOT trigger a restart (hard limits / operator intent)
|
|
482
|
+
const NO_RESTART_REASONS = new Set(['budget', 'rate-sends', 'rate-tools', 'abort']);
|
|
483
|
+
let nikiTerminated = false; // Set when niki itself receives SIGTERM/SIGINT
|
|
484
|
+
|
|
485
|
+
function shouldRestart(code, signal) {
|
|
486
|
+
if (!RESTART) return false;
|
|
487
|
+
if (nikiTerminated) return false;
|
|
488
|
+
if (state.killedBy && NO_RESTART_REASONS.has(state.killedBy)) return false;
|
|
489
|
+
if (MAX_RESTARTS > 0 && state.restarts >= MAX_RESTARTS) return false;
|
|
490
|
+
return true;
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
function resetPerRunState() {
|
|
494
|
+
// Reset per-run flags but keep cumulative counters (tokens, toolCalls, etc.)
|
|
495
|
+
killed = false;
|
|
496
|
+
state.killedBy = null;
|
|
497
|
+
state.exitCode = null;
|
|
498
|
+
state.exitSignal = null;
|
|
499
|
+
state.gotFirstOutput = undefined;
|
|
500
|
+
|
|
501
|
+
// Reset stall/dead-air detection
|
|
502
|
+
nudgeCount = 0;
|
|
503
|
+
stdinClosed = false;
|
|
504
|
+
lastOutputTime = Date.now();
|
|
505
|
+
gotFirstOutput = false;
|
|
506
|
+
lastCpuMs = null;
|
|
507
|
+
deadAirStart = null;
|
|
508
|
+
|
|
509
|
+
// Clear rate limit windows (fresh session)
|
|
510
|
+
toolCallTimestamps.length = 0;
|
|
511
|
+
sendCallTimestamps.length = 0;
|
|
512
|
+
state.toolCallsThisMinute = 0;
|
|
513
|
+
state.sendCallsThisMinute = 0;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
// --- Abort file polling ---
|
|
517
|
+
|
|
518
|
+
let abortPollId = null;
|
|
519
|
+
|
|
520
|
+
function scheduleAbortPoll() {
|
|
521
|
+
if (!ABORT_FILE || killed) return;
|
|
522
|
+
abortPollId = setTimeout(() => {
|
|
523
|
+
if (killed) return;
|
|
524
|
+
if (existsSync(ABORT_FILE)) {
|
|
525
|
+
log(`Abort file detected: ${ABORT_FILE}`);
|
|
526
|
+
killChild('abort');
|
|
527
|
+
return;
|
|
528
|
+
}
|
|
529
|
+
scheduleAbortPoll();
|
|
530
|
+
}, jitteredDelay(POLL_INTERVAL));
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
// --- Spawn and monitor child ---
|
|
534
|
+
|
|
535
|
+
let timeoutId = null;
|
|
536
|
+
|
|
537
|
+
function startChild() {
|
|
538
|
+
log(`Starting: ${childCmd} ${childArgs.join(' ').substring(0, 100)}...`);
|
|
539
|
+
log(`Budget: ${BUDGET} tokens | Timeout: ${TIMEOUT_S}s | Startup: ${STARTUP_TIMEOUT_S}s | Stall: ${STALL_TIMEOUT_S}s | Dead air: ${DEAD_AIR_TIMEOUT_M}min | Max sends: ${MAX_SENDS}/min | Max tools: ${MAX_TOOL_CALLS}/min`);
|
|
540
|
+
if (RESTART) {
|
|
541
|
+
log(`Restart: enabled | max: ${MAX_RESTARTS || 'unlimited'} | delay: ${RESTART_DELAY_S}s ±30% | restarts so far: ${state.restarts}`);
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
child = spawn(childCmd, childArgs, {
|
|
545
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
546
|
+
env: process.env,
|
|
547
|
+
});
|
|
548
|
+
|
|
549
|
+
state.pid = child.pid;
|
|
550
|
+
|
|
551
|
+
// Close stdin immediately — claude -p should never need interactive input.
|
|
552
|
+
closeStdin();
|
|
553
|
+
|
|
554
|
+
// --- Monitor stdout ---
|
|
555
|
+
|
|
556
|
+
child.stdout.on('data', (chunk) => {
|
|
557
|
+
process.stdout.write(chunk);
|
|
558
|
+
onChildOutput();
|
|
559
|
+
checkForPrompts(chunk.toString());
|
|
560
|
+
});
|
|
561
|
+
|
|
562
|
+
// --- Monitor stderr ---
|
|
563
|
+
|
|
564
|
+
let stderrBuffer = '';
|
|
565
|
+
|
|
566
|
+
child.stderr.on('data', (chunk) => {
|
|
567
|
+
const text = chunk.toString();
|
|
568
|
+
process.stderr.write(chunk);
|
|
569
|
+
onChildOutput();
|
|
570
|
+
|
|
571
|
+
stderrBuffer += text;
|
|
572
|
+
const lines = stderrBuffer.split('\n');
|
|
573
|
+
stderrBuffer = lines.pop();
|
|
574
|
+
|
|
575
|
+
for (const line of lines) {
|
|
576
|
+
parseTokens(line);
|
|
577
|
+
parseToolCall(line);
|
|
578
|
+
|
|
579
|
+
if (state.tokensTotal > BUDGET) {
|
|
580
|
+
killChild('budget');
|
|
581
|
+
return;
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
const rateViolation = checkRateLimits();
|
|
585
|
+
if (rateViolation) {
|
|
586
|
+
killChild(rateViolation);
|
|
587
|
+
return;
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
});
|
|
591
|
+
|
|
592
|
+
// Start stall detection
|
|
593
|
+
if (STALL_TIMEOUT_S > 0 || STARTUP_TIMEOUT_S > 0) {
|
|
594
|
+
log(`Stall detection: startup-timeout=${STARTUP_TIMEOUT_S}s, stall-timeout=${STALL_TIMEOUT_S}s, max-nudges=${MAX_NUDGES}`);
|
|
595
|
+
resetStallTimer();
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
// Start dead air detection
|
|
599
|
+
if (DEAD_AIR_TIMEOUT_M > 0) {
|
|
600
|
+
log(`Dead air detection: ${DEAD_AIR_TIMEOUT_M}min threshold, ${Math.round(DEAD_AIR_POLL_MS / 1000)}s poll interval`);
|
|
601
|
+
scheduleDeadAirPoll();
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
// Abort file polling
|
|
605
|
+
if (ABORT_FILE) {
|
|
606
|
+
log(`Abort file: ${ABORT_FILE} (poll: ${POLL_INTERVAL}ms ±30% jitter)`);
|
|
607
|
+
scheduleAbortPoll();
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
// Per-run timeout
|
|
611
|
+
timeoutId = setTimeout(() => {
|
|
612
|
+
killChild('timeout');
|
|
613
|
+
}, TIMEOUT_S * 1000);
|
|
614
|
+
|
|
615
|
+
// --- Exit handler ---
|
|
616
|
+
|
|
617
|
+
child.on('exit', (code, signal) => {
|
|
618
|
+
clearTimeout(timeoutId);
|
|
619
|
+
if (stallTimer) clearTimeout(stallTimer);
|
|
620
|
+
if (abortPollId) clearTimeout(abortPollId);
|
|
621
|
+
if (deadAirPollId) clearTimeout(deadAirPollId);
|
|
622
|
+
|
|
623
|
+
state.exitCode = code;
|
|
624
|
+
state.exitSignal = signal;
|
|
625
|
+
state.duration = Math.round((Date.now() - new Date(state.startedAt).getTime()) / 1000);
|
|
626
|
+
state.gotFirstOutput = gotFirstOutput;
|
|
627
|
+
|
|
628
|
+
log(`Exit — code: ${code} signal: ${signal} | tokens: ${state.tokensTotal} | tools: ${state.toolCalls} | sends: ${state.sendCalls} | duration: ${state.duration}s | output: ${gotFirstOutput}${state.killedBy ? ` | killed: ${state.killedBy}` : ''} | restarts: ${state.restarts}`);
|
|
629
|
+
writeState();
|
|
630
|
+
|
|
631
|
+
if (shouldRestart(code, signal)) {
|
|
632
|
+
state.restarts++;
|
|
633
|
+
const delay = jitteredDelay(RESTART_DELAY_S * 1000);
|
|
634
|
+
log(`RESTART — attempt ${state.restarts}${MAX_RESTARTS > 0 ? `/${MAX_RESTARTS}` : ''} in ${Math.round(delay / 1000)}s`);
|
|
635
|
+
resetPerRunState();
|
|
636
|
+
setTimeout(() => {
|
|
637
|
+
startChild();
|
|
638
|
+
}, delay);
|
|
639
|
+
} else {
|
|
640
|
+
if (RESTART && !shouldRestart(code, signal)) {
|
|
641
|
+
const reason = nikiTerminated ? 'niki received signal' :
|
|
642
|
+
(state.killedBy && NO_RESTART_REASONS.has(state.killedBy)) ? `hard kill (${state.killedBy})` :
|
|
643
|
+
(MAX_RESTARTS > 0 && state.restarts >= MAX_RESTARTS) ? `max restarts reached (${MAX_RESTARTS})` :
|
|
644
|
+
'restart not enabled';
|
|
645
|
+
log(`NOT RESTARTING — ${reason}`);
|
|
646
|
+
}
|
|
647
|
+
if (logStream) logStream.end();
|
|
648
|
+
process.exit(code ?? 1);
|
|
649
|
+
}
|
|
650
|
+
});
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
// --- Signal forwarding ---
|
|
654
|
+
|
|
655
|
+
for (const sig of ['SIGINT', 'SIGTERM']) {
|
|
656
|
+
process.on(sig, () => {
|
|
657
|
+
log(`Received ${sig}, forwarding to child`);
|
|
658
|
+
nikiTerminated = true;
|
|
659
|
+
if (child) child.kill(sig);
|
|
660
|
+
});
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
// --- Start ---
|
|
664
|
+
|
|
665
|
+
startChild();
|