groove-dev 0.27.115 → 0.27.117
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/TRAINING_DATA_v4.md +6 -3
- package/moe-training/client/domain-tagger.js +20 -0
- package/moe-training/client/trajectory-capture.js +36 -7
- package/moe-training/test/client/trajectory-capture.test.js +182 -1
- package/node_modules/@groove-dev/cli/package.json +1 -1
- package/node_modules/@groove-dev/cli/src/commands/team.js +27 -12
- package/node_modules/@groove-dev/daemon/package.json +1 -1
- package/node_modules/@groove-dev/daemon/src/api.js +3 -2
- package/node_modules/@groove-dev/daemon/src/process.js +283 -211
- package/node_modules/@groove-dev/daemon/src/teams.js +53 -24
- package/node_modules/@groove-dev/daemon/src/tunnel-manager.js +21 -3
- package/node_modules/@groove-dev/gui/dist/assets/{index-D4Q72afD.css → index-DdN9RVnC.css} +1 -1
- package/node_modules/@groove-dev/gui/dist/assets/{index-BKCiOUDb.js → index-fq--PD7_.js} +1724 -1724
- package/node_modules/@groove-dev/gui/dist/index.html +2 -2
- package/node_modules/@groove-dev/gui/package.json +1 -1
- package/node_modules/@groove-dev/gui/src/components/teams/team-removal-dialog.jsx +156 -0
- package/node_modules/@groove-dev/gui/src/stores/groove.js +15 -4
- package/node_modules/@groove-dev/gui/src/views/agents.jsx +10 -19
- package/node_modules/@groove-dev/gui/src/views/teams.jsx +17 -41
- package/node_modules/moe-training/client/domain-tagger.js +20 -0
- package/node_modules/moe-training/client/trajectory-capture.js +36 -7
- package/node_modules/moe-training/test/client/trajectory-capture.test.js +182 -1
- package/package.json +1 -1
- package/packages/cli/package.json +1 -1
- package/packages/cli/src/commands/team.js +27 -12
- package/packages/daemon/package.json +1 -1
- package/packages/daemon/src/api.js +3 -2
- package/packages/daemon/src/process.js +283 -211
- package/packages/daemon/src/teams.js +53 -24
- package/packages/daemon/src/tunnel-manager.js +21 -3
- package/packages/gui/dist/assets/{index-D4Q72afD.css → index-DdN9RVnC.css} +1 -1
- package/packages/gui/dist/assets/{index-BKCiOUDb.js → index-fq--PD7_.js} +1724 -1724
- package/packages/gui/dist/index.html +2 -2
- package/packages/gui/package.json +1 -1
- package/packages/gui/src/components/teams/team-removal-dialog.jsx +156 -0
- package/packages/gui/src/stores/groove.js +15 -4
- package/packages/gui/src/views/agents.jsx +10 -19
- package/packages/gui/src/views/teams.jsx +17 -41
|
@@ -329,6 +329,8 @@ export class ProcessManager {
|
|
|
329
329
|
this._streamThrottle = new Map(); // agentId -> { timer, pending }
|
|
330
330
|
this._rotatingAgents = new Set(); // agentIds currently being rotated (rotator wrote handoff)
|
|
331
331
|
this._stalledAgents = new Set(); // agentIds already flagged as stalled (avoids duplicate broadcasts)
|
|
332
|
+
this._exitHandled = new Set();
|
|
333
|
+
this._resultReceived = new Set();
|
|
332
334
|
|
|
333
335
|
this._stallWatchdog = setInterval(() => this._checkStalls(), STALL_CHECK_INTERVAL_MS);
|
|
334
336
|
if (this._stallWatchdog.unref) this._stallWatchdog.unref();
|
|
@@ -366,6 +368,251 @@ export class ProcessManager {
|
|
|
366
368
|
});
|
|
367
369
|
console.warn(`[Groove] Agent ${agent.name} (${agentId}) silent for ${Math.round(silentMs / 1000)}s — possible stalled API stream`);
|
|
368
370
|
}
|
|
371
|
+
|
|
372
|
+
// Defense in depth: detect zombie handles where PID is no longer alive
|
|
373
|
+
const ZOMBIE_THRESHOLD_MS = 10 * 60_000;
|
|
374
|
+
for (const [agentId, handle] of this.handles.entries()) {
|
|
375
|
+
const agent = registry.get(agentId);
|
|
376
|
+
if (!agent) continue;
|
|
377
|
+
const lastActivity = agent.lastActivity ? new Date(agent.lastActivity).getTime() : now;
|
|
378
|
+
if (now - lastActivity < ZOMBIE_THRESHOLD_MS) continue;
|
|
379
|
+
const pid = handle.proc?.pid;
|
|
380
|
+
if (!pid) continue;
|
|
381
|
+
try {
|
|
382
|
+
process.kill(pid, 0);
|
|
383
|
+
} catch {
|
|
384
|
+
console.warn(`[Groove] Agent ${agent.name} (${agentId}) PID ${pid} no longer alive — force-cleaning handle`);
|
|
385
|
+
if (handle.logStream && !handle.logStream.destroyed) {
|
|
386
|
+
handle.logStream.write(`[${new Date().toISOString()}] Force-cleaned: PID ${pid} no longer alive\n`);
|
|
387
|
+
handle.logStream.end();
|
|
388
|
+
}
|
|
389
|
+
this.handles.delete(agentId);
|
|
390
|
+
this._exitHandled.add(agentId);
|
|
391
|
+
setTimeout(() => this._exitHandled.delete(agentId), 30_000);
|
|
392
|
+
this._stalledAgents.delete(agentId);
|
|
393
|
+
this._resultReceived.delete(agentId);
|
|
394
|
+
const throttle = this._streamThrottle.get(agentId);
|
|
395
|
+
if (throttle?.timer) clearTimeout(throttle.timer);
|
|
396
|
+
this._streamThrottle.delete(agentId);
|
|
397
|
+
this.peakContextUsage.delete(agentId);
|
|
398
|
+
this.pendingMessages.delete(agentId);
|
|
399
|
+
if (this.daemon.locks) this.daemon.locks.release(agentId);
|
|
400
|
+
registry.update(agentId, { status: 'completed', pid: null });
|
|
401
|
+
this.daemon.broadcast({ type: 'agent:exit', agentId, code: 0, signal: null, status: 'completed' });
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
_handleProcessExit(agent, code, signal, logStream, stderrBuf, logPath) {
|
|
407
|
+
if (this._exitHandled.has(agent.id)) return;
|
|
408
|
+
this._exitHandled.add(agent.id);
|
|
409
|
+
setTimeout(() => this._exitHandled.delete(agent.id), 30_000);
|
|
410
|
+
|
|
411
|
+
const { registry } = this.daemon;
|
|
412
|
+
|
|
413
|
+
if (!logStream.destroyed) {
|
|
414
|
+
logStream.write(`[${new Date().toISOString()}] Process exited: code=${code} signal=${signal}\n`);
|
|
415
|
+
logStream.end();
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
this.handles.delete(agent.id);
|
|
419
|
+
|
|
420
|
+
const throttle = this._streamThrottle.get(agent.id);
|
|
421
|
+
if (throttle?.timer) clearTimeout(throttle.timer);
|
|
422
|
+
this._streamThrottle.delete(agent.id);
|
|
423
|
+
|
|
424
|
+
this.peakContextUsage.delete(agent.id);
|
|
425
|
+
this.pendingMessages.delete(agent.id);
|
|
426
|
+
this._stalledAgents.delete(agent.id);
|
|
427
|
+
|
|
428
|
+
if (this.daemon.locks) this.daemon.locks.release(agent.id);
|
|
429
|
+
|
|
430
|
+
const hadResult = this._resultReceived.has(agent.id);
|
|
431
|
+
this._resultReceived.delete(agent.id);
|
|
432
|
+
|
|
433
|
+
const finalStatus = hadResult
|
|
434
|
+
? 'completed'
|
|
435
|
+
: signal === 'SIGTERM' || signal === 'SIGKILL'
|
|
436
|
+
? 'killed'
|
|
437
|
+
: code === 0
|
|
438
|
+
? 'completed'
|
|
439
|
+
: 'crashed';
|
|
440
|
+
|
|
441
|
+
const crashError = finalStatus === 'crashed' ? stderrBuf.join('').trim().slice(-500) : null;
|
|
442
|
+
|
|
443
|
+
registry.update(agent.id, { status: finalStatus, pid: null });
|
|
444
|
+
|
|
445
|
+
if (this.daemon.timeline) {
|
|
446
|
+
const agentData = registry.get(agent.id);
|
|
447
|
+
this.daemon.timeline.recordEvent(finalStatus === 'completed' ? 'complete' : finalStatus === 'crashed' ? 'crash' : 'kill', {
|
|
448
|
+
agentId: agent.id, agentName: agent.name, role: agent.role,
|
|
449
|
+
finalTokens: agentData?.tokensUsed || 0, costUsd: agentData?.costUsd || 0,
|
|
450
|
+
exitCode: code,
|
|
451
|
+
});
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
if (this.daemon.trajectoryCapture) {
|
|
455
|
+
try {
|
|
456
|
+
if (finalStatus === 'completed') {
|
|
457
|
+
this.daemon.trajectoryCapture.onAgentComplete(agent.id, {
|
|
458
|
+
status: 'SUCCESS', exit_code: code, signal,
|
|
459
|
+
});
|
|
460
|
+
} else {
|
|
461
|
+
this.daemon.trajectoryCapture.onAgentCrash(agent.id,
|
|
462
|
+
signal ? 'Killed by signal ' + signal : 'Exit code ' + code
|
|
463
|
+
);
|
|
464
|
+
}
|
|
465
|
+
const count = (this.daemon.state.get('training_sessions_captured') || 0) + 1;
|
|
466
|
+
this.daemon.state.set('training_sessions_captured', count);
|
|
467
|
+
} catch (e) { /* fail silent */ }
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
this.daemon.broadcast({
|
|
471
|
+
type: 'agent:exit',
|
|
472
|
+
agentId: agent.id,
|
|
473
|
+
code,
|
|
474
|
+
signal,
|
|
475
|
+
status: finalStatus,
|
|
476
|
+
error: crashError || undefined,
|
|
477
|
+
});
|
|
478
|
+
|
|
479
|
+
if (this.daemon.integrations) {
|
|
480
|
+
this.daemon.integrations.refreshMcpJson();
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
if (finalStatus === 'completed' && agent.role === 'planner') {
|
|
484
|
+
this._extractRecommendedTeam(agent, logPath);
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
if (finalStatus === 'completed') {
|
|
488
|
+
const pending = this.consumePendingMessage(agent.id);
|
|
489
|
+
if (pending) {
|
|
490
|
+
const agentData = registry.get(agent.id);
|
|
491
|
+
if (agentData?.sessionId) {
|
|
492
|
+
this.resume(agent.id, pending.message).catch((err) => {
|
|
493
|
+
console.error(`[Groove] Auto-resume with queued message failed for ${agent.name}: ${err.message}`);
|
|
494
|
+
});
|
|
495
|
+
return;
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
if (finalStatus === 'completed' && this.daemon.journalist) {
|
|
501
|
+
const a = registry.get(agent.id);
|
|
502
|
+
const turns = a?.turns || 0;
|
|
503
|
+
const tok = a?.tokensUsed || 0;
|
|
504
|
+
if (turns > 1 || tok >= 100) {
|
|
505
|
+
this.daemon.journalist.requestSynthesis('completion');
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
this._checkPhase2(agent.id);
|
|
510
|
+
|
|
511
|
+
if (agent.teamId) {
|
|
512
|
+
this._checkPreviewReady(agent.teamId);
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
if (finalStatus === 'completed') {
|
|
516
|
+
const files = this.daemon.journalist?.getAgentFiles(agent) || [];
|
|
517
|
+
if (files.length > 0) this._triggerIdleQC(agent);
|
|
518
|
+
this._processHandoffs(agent);
|
|
519
|
+
if (this._rotatingAgents.has(agent.id)) {
|
|
520
|
+
this._rotatingAgents.delete(agent.id);
|
|
521
|
+
} else {
|
|
522
|
+
this._writeCompletionHandoff(agent).catch(err => console.error(`[Groove] Completion handoff failed for ${agent.name}:`, err.message));
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
if (this.daemon.memory && (finalStatus === 'completed' || finalStatus === 'crashed')) {
|
|
527
|
+
try {
|
|
528
|
+
const events = this.daemon.classifier?.agentWindows?.[agent.id] || [];
|
|
529
|
+
const signals = events.length >= 6
|
|
530
|
+
? this.daemon.adaptive.extractSignals(events, agent.scope)
|
|
531
|
+
: null;
|
|
532
|
+
const score = signals ? this.daemon.adaptive.scoreSession(signals) : null;
|
|
533
|
+
const files = this.daemon.journalist?.getAgentFiles(agent) || [];
|
|
534
|
+
this.daemon.memory.updateSpecialization(agent.id, {
|
|
535
|
+
role: agent.role,
|
|
536
|
+
qualityScore: score,
|
|
537
|
+
filesTouched: files,
|
|
538
|
+
signals,
|
|
539
|
+
threshold: this.daemon.adaptive?.getThreshold(agent.provider, agent.role),
|
|
540
|
+
});
|
|
541
|
+
} catch { /* best-effort */ }
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
_handleResumeProcessExit(agent, code, signal, logStream) {
|
|
546
|
+
if (this._exitHandled.has(agent.id)) return;
|
|
547
|
+
this._exitHandled.add(agent.id);
|
|
548
|
+
setTimeout(() => this._exitHandled.delete(agent.id), 30_000);
|
|
549
|
+
|
|
550
|
+
const { registry } = this.daemon;
|
|
551
|
+
|
|
552
|
+
if (!logStream.destroyed) {
|
|
553
|
+
logStream.write(`[${new Date().toISOString()}] Process exited: code=${code} signal=${signal}\n`);
|
|
554
|
+
logStream.end();
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
this.handles.delete(agent.id);
|
|
558
|
+
this._stalledAgents.delete(agent.id);
|
|
559
|
+
|
|
560
|
+
if (this.daemon.locks) this.daemon.locks.release(agent.id);
|
|
561
|
+
|
|
562
|
+
const hadResult = this._resultReceived.has(agent.id);
|
|
563
|
+
this._resultReceived.delete(agent.id);
|
|
564
|
+
|
|
565
|
+
const finalStatus = hadResult ? 'completed' : signal === 'SIGTERM' || signal === 'SIGKILL' ? 'killed' : code === 0 ? 'completed' : 'crashed';
|
|
566
|
+
registry.update(agent.id, { status: finalStatus, pid: null });
|
|
567
|
+
|
|
568
|
+
if (this.daemon.trajectoryCapture) {
|
|
569
|
+
try {
|
|
570
|
+
if (finalStatus === 'completed') {
|
|
571
|
+
this.daemon.trajectoryCapture.onAgentComplete(agent.id, {
|
|
572
|
+
status: 'SUCCESS', exit_code: code, signal,
|
|
573
|
+
});
|
|
574
|
+
} else {
|
|
575
|
+
this.daemon.trajectoryCapture.onAgentCrash(agent.id,
|
|
576
|
+
signal ? 'Killed by signal ' + signal : 'Exit code ' + code
|
|
577
|
+
);
|
|
578
|
+
}
|
|
579
|
+
const count = (this.daemon.state.get('training_sessions_captured') || 0) + 1;
|
|
580
|
+
this.daemon.state.set('training_sessions_captured', count);
|
|
581
|
+
} catch (e) { /* fail silent */ }
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
this.daemon.broadcast({ type: 'agent:exit', agentId: agent.id, code, signal, status: finalStatus });
|
|
585
|
+
if (finalStatus === 'completed' && this.daemon.journalist) {
|
|
586
|
+
const a = registry.get(agent.id);
|
|
587
|
+
const turns = a?.turns || 0;
|
|
588
|
+
const tok = a?.tokensUsed || 0;
|
|
589
|
+
if (turns > 1 || tok >= 100) this.daemon.journalist.requestSynthesis('completion');
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
if (finalStatus === 'completed' && !this._rotatingAgents.has(agent.id)) {
|
|
593
|
+
this._writeCompletionHandoff(agent).catch(err =>
|
|
594
|
+
console.error(`[Groove] Completion handoff failed for ${agent.name}:`, err.message));
|
|
595
|
+
}
|
|
596
|
+
if (this._rotatingAgents.has(agent.id)) {
|
|
597
|
+
this._rotatingAgents.delete(agent.id);
|
|
598
|
+
}
|
|
599
|
+
if (this.daemon.memory && (finalStatus === 'completed' || finalStatus === 'crashed')) {
|
|
600
|
+
try {
|
|
601
|
+
const events = this.daemon.classifier?.agentWindows?.[agent.id] || [];
|
|
602
|
+
const signals = events.length >= 6
|
|
603
|
+
? this.daemon.adaptive.extractSignals(events, agent.scope)
|
|
604
|
+
: null;
|
|
605
|
+
const score = signals ? this.daemon.adaptive.scoreSession(signals) : null;
|
|
606
|
+
const files = this.daemon.journalist?.getAgentFiles(agent) || [];
|
|
607
|
+
this.daemon.memory.updateSpecialization(agent.id, {
|
|
608
|
+
role: agent.role,
|
|
609
|
+
qualityScore: score,
|
|
610
|
+
filesTouched: files,
|
|
611
|
+
signals,
|
|
612
|
+
threshold: this.daemon.adaptive?.getThreshold(agent.provider, agent.role),
|
|
613
|
+
});
|
|
614
|
+
} catch { /* best-effort */ }
|
|
615
|
+
}
|
|
369
616
|
}
|
|
370
617
|
|
|
371
618
|
async spawn(config) {
|
|
@@ -490,7 +737,7 @@ export class ProcessManager {
|
|
|
490
737
|
try {
|
|
491
738
|
const teamSize = registry.getAll().filter(a => a.status === 'active' || a.status === 'running' || a.status === 'starting').length;
|
|
492
739
|
this.daemon.trajectoryCapture.onAgentSpawn(
|
|
493
|
-
agent.id, providerName, config.model || null, config.role, teamSize
|
|
740
|
+
agent.id, providerName, config.model || null, config.role, teamSize, config.prompt
|
|
494
741
|
).catch(() => {});
|
|
495
742
|
} catch (e) { /* fail silent */ }
|
|
496
743
|
}
|
|
@@ -732,6 +979,8 @@ For normal file edits within your scope, proceed without review.
|
|
|
732
979
|
logStream.write(`[${new Date().toISOString()}] Agent loop exited: status=${status}\n`);
|
|
733
980
|
logStream.end();
|
|
734
981
|
this.handles.delete(agent.id);
|
|
982
|
+
this._stalledAgents.delete(agent.id);
|
|
983
|
+
this._resultReceived.delete(agent.id);
|
|
735
984
|
|
|
736
985
|
// Clean up stream throttle so pending timers don't fire for dead agents
|
|
737
986
|
const throttle = this._streamThrottle.get(agent.id);
|
|
@@ -775,8 +1024,9 @@ For normal file edits within your scope, proceed without review.
|
|
|
775
1024
|
this.daemon.broadcast({ type: 'agent:exit', agentId: agent.id, code: code || 0, signal, status });
|
|
776
1025
|
if (this.daemon.integrations) this.daemon.integrations.refreshMcpJson();
|
|
777
1026
|
if (status === 'completed' && this.daemon.journalist) {
|
|
778
|
-
const
|
|
779
|
-
const
|
|
1027
|
+
const a = registry.get(agent.id);
|
|
1028
|
+
const turns = a?.turns || 0;
|
|
1029
|
+
const tok = a?.tokensUsed || 0;
|
|
780
1030
|
if (turns > 1 || tok >= 100) this.daemon.journalist.requestSynthesis('completion');
|
|
781
1031
|
}
|
|
782
1032
|
this._checkPhase2(agent.id);
|
|
@@ -862,6 +1112,7 @@ For normal file edits within your scope, proceed without review.
|
|
|
862
1112
|
if (!logStream.destroyed) logStream.write(`[${new Date().toISOString()}] Spawn error: ${err.message}\n`);
|
|
863
1113
|
if (!logStream.destroyed) logStream.end();
|
|
864
1114
|
this.handles.delete(agent.id);
|
|
1115
|
+
this._exitHandled.add(agent.id);
|
|
865
1116
|
registry.update(agent.id, { status: 'crashed', pid: null });
|
|
866
1117
|
this.daemon.broadcast({ type: 'agent:exit', agentId: agent.id, code: null, signal: null, status: 'crashed', error: err.message });
|
|
867
1118
|
});
|
|
@@ -906,154 +1157,13 @@ For normal file edits within your scope, proceed without review.
|
|
|
906
1157
|
while (stderrBuf.join('').length > 2048) stderrBuf.shift();
|
|
907
1158
|
});
|
|
908
1159
|
|
|
909
|
-
// Handle process exit
|
|
1160
|
+
// Handle process exit — cleanup extracted to _handleProcessExit with dedup
|
|
910
1161
|
proc.on('exit', (code, signal) => {
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
logStream.end();
|
|
914
|
-
|
|
915
|
-
this.handles.delete(agent.id);
|
|
916
|
-
|
|
917
|
-
// Clean up stream throttle so pending timers don't fire for dead agents
|
|
918
|
-
const throttle = this._streamThrottle.get(agent.id);
|
|
919
|
-
if (throttle?.timer) clearTimeout(throttle.timer);
|
|
920
|
-
this._streamThrottle.delete(agent.id);
|
|
921
|
-
|
|
922
|
-
// Clean up per-agent maps to prevent unbounded growth in long sessions
|
|
923
|
-
this.peakContextUsage.delete(agent.id);
|
|
924
|
-
this.pendingMessages.delete(agent.id);
|
|
925
|
-
this._stalledAgents.delete(agent.id);
|
|
926
|
-
|
|
927
|
-
// Release file-scope locks so they don't persist after agent death
|
|
928
|
-
if (this.daemon.locks) this.daemon.locks.release(agent.id);
|
|
929
|
-
|
|
930
|
-
const finalStatus = signal === 'SIGTERM' || signal === 'SIGKILL'
|
|
931
|
-
? 'killed'
|
|
932
|
-
: code === 0
|
|
933
|
-
? 'completed'
|
|
934
|
-
: 'crashed';
|
|
935
|
-
|
|
936
|
-
// Capture crash error from stderr for UI display
|
|
937
|
-
const crashError = finalStatus === 'crashed' ? stderrBuf.join('').trim().slice(-500) : null;
|
|
938
|
-
|
|
939
|
-
registry.update(agent.id, { status: finalStatus, pid: null });
|
|
940
|
-
|
|
941
|
-
// Record lifecycle event for timeline
|
|
942
|
-
if (this.daemon.timeline) {
|
|
943
|
-
const agentData = registry.get(agent.id);
|
|
944
|
-
this.daemon.timeline.recordEvent(finalStatus === 'completed' ? 'complete' : finalStatus === 'crashed' ? 'crash' : 'kill', {
|
|
945
|
-
agentId: agent.id, agentName: agent.name, role: agent.role,
|
|
946
|
-
finalTokens: agentData?.tokensUsed || 0, costUsd: agentData?.costUsd || 0,
|
|
947
|
-
exitCode: code,
|
|
948
|
-
});
|
|
949
|
-
}
|
|
950
|
-
|
|
951
|
-
if (this.daemon.trajectoryCapture) {
|
|
952
|
-
try {
|
|
953
|
-
if (finalStatus === 'completed') {
|
|
954
|
-
this.daemon.trajectoryCapture.onAgentComplete(agent.id, {
|
|
955
|
-
status: 'SUCCESS', exit_code: code, signal,
|
|
956
|
-
});
|
|
957
|
-
} else {
|
|
958
|
-
this.daemon.trajectoryCapture.onAgentCrash(agent.id,
|
|
959
|
-
signal ? 'Killed by signal ' + signal : 'Exit code ' + code
|
|
960
|
-
);
|
|
961
|
-
}
|
|
962
|
-
const count = (this.daemon.state.get('training_sessions_captured') || 0) + 1;
|
|
963
|
-
this.daemon.state.set('training_sessions_captured', count);
|
|
964
|
-
} catch (e) { /* fail silent */ }
|
|
965
|
-
}
|
|
966
|
-
|
|
967
|
-
this.daemon.broadcast({
|
|
968
|
-
type: 'agent:exit',
|
|
969
|
-
agentId: agent.id,
|
|
970
|
-
code,
|
|
971
|
-
signal,
|
|
972
|
-
status: finalStatus,
|
|
973
|
-
error: crashError || undefined,
|
|
974
|
-
});
|
|
975
|
-
|
|
976
|
-
// Refresh MCP config — remove integrations no longer needed by running agents
|
|
977
|
-
if (this.daemon.integrations) {
|
|
978
|
-
this.daemon.integrations.refreshMcpJson();
|
|
979
|
-
}
|
|
980
|
-
|
|
981
|
-
// Extract recommended-team.json from planner text output if it wasn't written to disk.
|
|
982
|
-
// Non-Claude providers (Codex, Gemini) may embed the JSON in text rather than using Write.
|
|
983
|
-
if (finalStatus === 'completed' && agent.role === 'planner') {
|
|
984
|
-
this._extractRecommendedTeam(agent, logPath);
|
|
985
|
-
}
|
|
986
|
-
|
|
987
|
-
// Auto-resume with queued message: if the user sent a message while this
|
|
988
|
-
// CLI agent was still running, resume the session now that it's done.
|
|
989
|
-
if (finalStatus === 'completed') {
|
|
990
|
-
const pending = this.consumePendingMessage(agent.id);
|
|
991
|
-
if (pending) {
|
|
992
|
-
const agentData = registry.get(agent.id);
|
|
993
|
-
if (agentData?.sessionId) {
|
|
994
|
-
this.resume(agent.id, pending.message).catch((err) => {
|
|
995
|
-
console.error(`[Groove] Auto-resume with queued message failed for ${agent.name}: ${err.message}`);
|
|
996
|
-
});
|
|
997
|
-
return;
|
|
998
|
-
}
|
|
999
|
-
}
|
|
1000
|
-
}
|
|
1001
|
-
|
|
1002
|
-
// Trigger journalist synthesis on completion (event-driven, debounced).
|
|
1003
|
-
// Skip trivial sessions — a greeting-only completion (user never gave a task)
|
|
1004
|
-
// has nothing worth synthesizing and wastes a $0.04+ headless claude call.
|
|
1005
|
-
if (finalStatus === 'completed' && this.daemon.journalist) {
|
|
1006
|
-
const a = registry.get(agent.id);
|
|
1007
|
-
const turns = a?.turns || 0;
|
|
1008
|
-
const tok = a?.tokensUsed || 0;
|
|
1009
|
-
if (turns > 1 || tok >= 100) {
|
|
1010
|
-
this.daemon.journalist.requestSynthesis('completion');
|
|
1011
|
-
}
|
|
1012
|
-
}
|
|
1013
|
-
|
|
1014
|
-
// Phase 2 auto-spawn: check if all phase 1 agents for a team are done
|
|
1015
|
-
this._checkPhase2(agent.id);
|
|
1016
|
-
|
|
1017
|
-
// Preview launch: when every agent in this team is in a terminal state,
|
|
1018
|
-
// kick off the one-click preview (dev server or static serve) the planner
|
|
1019
|
-
// staged in the team plan. Fires once per team launch.
|
|
1020
|
-
// Fire on any terminal status so crashed QC agents don't block preview
|
|
1021
|
-
// when builders completed successfully.
|
|
1022
|
-
if (agent.teamId) {
|
|
1023
|
-
this._checkPreviewReady(agent.teamId);
|
|
1024
|
-
}
|
|
1025
|
-
|
|
1026
|
-
// Auto-trigger idle QC: if this agent modified files and there's an idle QC
|
|
1027
|
-
// in the same team, activate it to verify the changes
|
|
1028
|
-
if (finalStatus === 'completed') {
|
|
1029
|
-
const files = this.daemon.journalist?.getAgentFiles(agent) || [];
|
|
1030
|
-
if (files.length > 0) this._triggerIdleQC(agent);
|
|
1031
|
-
this._processHandoffs(agent);
|
|
1032
|
-
if (this._rotatingAgents.has(agent.id)) {
|
|
1033
|
-
this._rotatingAgents.delete(agent.id);
|
|
1034
|
-
} else {
|
|
1035
|
-
this._writeCompletionHandoff(agent).catch(err => console.error(`[Groove] Completion handoff failed for ${agent.name}:`, err.message));
|
|
1036
|
-
}
|
|
1037
|
-
}
|
|
1162
|
+
this._handleProcessExit(agent, code, signal, logStream, stderrBuf, logPath);
|
|
1163
|
+
});
|
|
1038
1164
|
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
try {
|
|
1042
|
-
const events = this.daemon.classifier?.agentWindows?.[agent.id] || [];
|
|
1043
|
-
const signals = events.length >= 6
|
|
1044
|
-
? this.daemon.adaptive.extractSignals(events, agent.scope)
|
|
1045
|
-
: null;
|
|
1046
|
-
const score = signals ? this.daemon.adaptive.scoreSession(signals) : null;
|
|
1047
|
-
const files = this.daemon.journalist?.getAgentFiles(agent) || [];
|
|
1048
|
-
this.daemon.memory.updateSpecialization(agent.id, {
|
|
1049
|
-
role: agent.role,
|
|
1050
|
-
qualityScore: score,
|
|
1051
|
-
filesTouched: files,
|
|
1052
|
-
signals,
|
|
1053
|
-
threshold: this.daemon.adaptive?.getThreshold(agent.provider, agent.role),
|
|
1054
|
-
});
|
|
1055
|
-
} catch { /* best-effort */ }
|
|
1056
|
-
}
|
|
1165
|
+
proc.on('close', (code, signal) => {
|
|
1166
|
+
this._handleProcessExit(agent, code, signal, logStream, stderrBuf, logPath);
|
|
1057
1167
|
});
|
|
1058
1168
|
|
|
1059
1169
|
proc.on('error', (err) => {
|
|
@@ -1061,6 +1171,7 @@ For normal file edits within your scope, proceed without review.
|
|
|
1061
1171
|
logStream.end();
|
|
1062
1172
|
|
|
1063
1173
|
this.handles.delete(agent.id);
|
|
1174
|
+
this._exitHandled.add(agent.id);
|
|
1064
1175
|
if (this.daemon.locks) this.daemon.locks.release(agent.id);
|
|
1065
1176
|
registry.update(agent.id, { status: 'crashed', pid: null });
|
|
1066
1177
|
this.daemon.broadcast({
|
|
@@ -1162,6 +1273,21 @@ For normal file edits within your scope, proceed without review.
|
|
|
1162
1273
|
if (output.cost) updates.costUsd = (agent.costUsd || 0) + output.cost;
|
|
1163
1274
|
if (output.duration) updates.durationMs = output.duration;
|
|
1164
1275
|
if (output.turns) updates.turns = output.turns;
|
|
1276
|
+
|
|
1277
|
+
// Claude Code sometimes hangs after emitting the result event — the
|
|
1278
|
+
// process stays alive instead of exiting. Record that the result
|
|
1279
|
+
// arrived so exit handlers know this was a successful completion even
|
|
1280
|
+
// if we have to SIGTERM the process. After a 5s grace period, force-
|
|
1281
|
+
// kill any process that hasn't exited on its own.
|
|
1282
|
+
this._resultReceived.add(agentId);
|
|
1283
|
+
const handle = this.handles.get(agentId);
|
|
1284
|
+
if (handle?.proc && typeof handle.proc.kill === 'function') {
|
|
1285
|
+
setTimeout(() => {
|
|
1286
|
+
if (this.handles.has(agentId) && this._resultReceived.has(agentId)) {
|
|
1287
|
+
try { handle.proc.kill('SIGTERM'); } catch {}
|
|
1288
|
+
}
|
|
1289
|
+
}, 5_000);
|
|
1290
|
+
}
|
|
1165
1291
|
}
|
|
1166
1292
|
|
|
1167
1293
|
// Context window usage (0-1 scale) — drives rotation threshold
|
|
@@ -1745,7 +1871,7 @@ For normal file edits within your scope, proceed without review.
|
|
|
1745
1871
|
try {
|
|
1746
1872
|
const teamSize = registry.getAll().filter(a => a.status === 'active' || a.status === 'running' || a.status === 'starting').length;
|
|
1747
1873
|
this.daemon.trajectoryCapture.onAgentSpawn(
|
|
1748
|
-
newAgent.id, config.provider, config.model || null, config.role, teamSize
|
|
1874
|
+
newAgent.id, config.provider, config.model || null, config.role, teamSize, config.prompt
|
|
1749
1875
|
).catch(() => {});
|
|
1750
1876
|
} catch (e) { /* fail silent */ }
|
|
1751
1877
|
}
|
|
@@ -1763,6 +1889,7 @@ For normal file edits within your scope, proceed without review.
|
|
|
1763
1889
|
if (!logStream.destroyed) logStream.write(`[${new Date().toISOString()}] Resume spawn error: ${err.message}\n`);
|
|
1764
1890
|
if (!logStream.destroyed) logStream.end();
|
|
1765
1891
|
this.handles.delete(newAgent.id);
|
|
1892
|
+
this._exitHandled.add(newAgent.id);
|
|
1766
1893
|
registry.update(newAgent.id, { status: 'crashed', pid: null });
|
|
1767
1894
|
this.daemon.broadcast({ type: 'agent:exit', agentId: newAgent.id, code: null, signal: null, status: 'crashed', error: err.message });
|
|
1768
1895
|
});
|
|
@@ -1795,73 +1922,18 @@ For normal file edits within your scope, proceed without review.
|
|
|
1795
1922
|
proc.stderr.on('data', (chunk) => { logStream.write(`[stderr] ${chunk}`); });
|
|
1796
1923
|
|
|
1797
1924
|
proc.on('exit', (code, signal) => {
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
this.handles.delete(newAgent.id);
|
|
1801
|
-
this._stalledAgents.delete(newAgent.id);
|
|
1802
|
-
|
|
1803
|
-
// Release file-scope locks so they don't persist after agent death
|
|
1804
|
-
if (this.daemon.locks) this.daemon.locks.release(newAgent.id);
|
|
1805
|
-
|
|
1806
|
-
const finalStatus = signal === 'SIGTERM' || signal === 'SIGKILL' ? 'killed' : code === 0 ? 'completed' : 'crashed';
|
|
1807
|
-
registry.update(newAgent.id, { status: finalStatus, pid: null });
|
|
1808
|
-
|
|
1809
|
-
if (this.daemon.trajectoryCapture) {
|
|
1810
|
-
try {
|
|
1811
|
-
if (finalStatus === 'completed') {
|
|
1812
|
-
this.daemon.trajectoryCapture.onAgentComplete(newAgent.id, {
|
|
1813
|
-
status: 'SUCCESS', exit_code: code, signal,
|
|
1814
|
-
});
|
|
1815
|
-
} else {
|
|
1816
|
-
this.daemon.trajectoryCapture.onAgentCrash(newAgent.id,
|
|
1817
|
-
signal ? 'Killed by signal ' + signal : 'Exit code ' + code
|
|
1818
|
-
);
|
|
1819
|
-
}
|
|
1820
|
-
const count = (this.daemon.state.get('training_sessions_captured') || 0) + 1;
|
|
1821
|
-
this.daemon.state.set('training_sessions_captured', count);
|
|
1822
|
-
} catch (e) { /* fail silent */ }
|
|
1823
|
-
}
|
|
1824
|
-
|
|
1825
|
-
this.daemon.broadcast({ type: 'agent:exit', agentId: newAgent.id, code, signal, status: finalStatus });
|
|
1826
|
-
if (finalStatus === 'completed' && this.daemon.journalist) {
|
|
1827
|
-
const a = registry.get(newAgent.id);
|
|
1828
|
-
const turns = a?.turns || 0;
|
|
1829
|
-
const tok = a?.tokensUsed || 0;
|
|
1830
|
-
if (turns > 1 || tok >= 100) this.daemon.journalist.requestSynthesis('completion');
|
|
1831
|
-
}
|
|
1925
|
+
this._handleResumeProcessExit(newAgent, code, signal, logStream);
|
|
1926
|
+
});
|
|
1832
1927
|
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
if (finalStatus === 'completed' && !this._rotatingAgents.has(newAgent.id)) {
|
|
1836
|
-
this._writeCompletionHandoff(newAgent).catch(err =>
|
|
1837
|
-
console.error(`[Groove] Completion handoff failed for ${newAgent.name}:`, err.message));
|
|
1838
|
-
}
|
|
1839
|
-
if (this._rotatingAgents.has(newAgent.id)) {
|
|
1840
|
-
this._rotatingAgents.delete(newAgent.id);
|
|
1841
|
-
}
|
|
1842
|
-
if (this.daemon.memory && (finalStatus === 'completed' || finalStatus === 'crashed')) {
|
|
1843
|
-
try {
|
|
1844
|
-
const events = this.daemon.classifier?.agentWindows?.[newAgent.id] || [];
|
|
1845
|
-
const signals = events.length >= 6
|
|
1846
|
-
? this.daemon.adaptive.extractSignals(events, newAgent.scope)
|
|
1847
|
-
: null;
|
|
1848
|
-
const score = signals ? this.daemon.adaptive.scoreSession(signals) : null;
|
|
1849
|
-
const files = this.daemon.journalist?.getAgentFiles(newAgent) || [];
|
|
1850
|
-
this.daemon.memory.updateSpecialization(newAgent.id, {
|
|
1851
|
-
role: newAgent.role,
|
|
1852
|
-
qualityScore: score,
|
|
1853
|
-
filesTouched: files,
|
|
1854
|
-
signals,
|
|
1855
|
-
threshold: this.daemon.adaptive?.getThreshold(newAgent.provider, newAgent.role),
|
|
1856
|
-
});
|
|
1857
|
-
} catch { /* best-effort */ }
|
|
1858
|
-
}
|
|
1928
|
+
proc.on('close', (code, signal) => {
|
|
1929
|
+
this._handleResumeProcessExit(newAgent, code, signal, logStream);
|
|
1859
1930
|
});
|
|
1860
1931
|
|
|
1861
1932
|
proc.on('error', (err) => {
|
|
1862
1933
|
logStream.write(`[error] ${err.message}\n`);
|
|
1863
1934
|
logStream.end();
|
|
1864
1935
|
this.handles.delete(newAgent.id);
|
|
1936
|
+
this._exitHandled.add(newAgent.id);
|
|
1865
1937
|
this._stalledAgents.delete(newAgent.id);
|
|
1866
1938
|
registry.update(newAgent.id, { status: 'crashed', pid: null });
|
|
1867
1939
|
});
|
|
@@ -1941,7 +2013,7 @@ For normal file edits within your scope, proceed without review.
|
|
|
1941
2013
|
try {
|
|
1942
2014
|
const teamSize = registry.getAll().filter(a => a.status === 'active' || a.status === 'running' || a.status === 'starting').length;
|
|
1943
2015
|
this.daemon.trajectoryCapture.onAgentSpawn(
|
|
1944
|
-
newAgent.id, config.provider, loopConfig.model || config.model || null, config.role, teamSize
|
|
2016
|
+
newAgent.id, config.provider, loopConfig.model || config.model || null, config.role, teamSize, config.prompt
|
|
1945
2017
|
).catch(() => {});
|
|
1946
2018
|
} catch (e) { /* fail silent */ }
|
|
1947
2019
|
}
|