groove-dev 0.27.115 → 0.27.116
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/TRAINING_DATA_v4.md +6 -3
- package/node_modules/@groove-dev/cli/package.json +1 -1
- package/node_modules/@groove-dev/cli/src/commands/team.js +27 -12
- package/node_modules/@groove-dev/daemon/package.json +1 -1
- package/node_modules/@groove-dev/daemon/src/api.js +3 -2
- package/node_modules/@groove-dev/daemon/src/process.js +254 -208
- package/node_modules/@groove-dev/daemon/src/teams.js +53 -24
- package/node_modules/@groove-dev/daemon/src/tunnel-manager.js +3 -2
- package/node_modules/@groove-dev/gui/dist/assets/{index-D4Q72afD.css → index-DdN9RVnC.css} +1 -1
- package/node_modules/@groove-dev/gui/dist/assets/{index-BKCiOUDb.js → index-fq--PD7_.js} +1724 -1724
- package/node_modules/@groove-dev/gui/dist/index.html +2 -2
- package/node_modules/@groove-dev/gui/package.json +1 -1
- package/node_modules/@groove-dev/gui/src/components/teams/team-removal-dialog.jsx +156 -0
- package/node_modules/@groove-dev/gui/src/stores/groove.js +15 -4
- package/node_modules/@groove-dev/gui/src/views/agents.jsx +10 -19
- package/node_modules/@groove-dev/gui/src/views/teams.jsx +17 -41
- package/package.json +1 -1
- package/packages/cli/package.json +1 -1
- package/packages/cli/src/commands/team.js +27 -12
- package/packages/daemon/package.json +1 -1
- package/packages/daemon/src/api.js +3 -2
- package/packages/daemon/src/process.js +254 -208
- package/packages/daemon/src/teams.js +53 -24
- package/packages/daemon/src/tunnel-manager.js +3 -2
- package/packages/gui/dist/assets/{index-D4Q72afD.css → index-DdN9RVnC.css} +1 -1
- package/packages/gui/dist/assets/{index-BKCiOUDb.js → index-fq--PD7_.js} +1724 -1724
- package/packages/gui/dist/index.html +2 -2
- package/packages/gui/package.json +1 -1
- package/packages/gui/src/components/teams/team-removal-dialog.jsx +156 -0
- package/packages/gui/src/stores/groove.js +15 -4
- package/packages/gui/src/views/agents.jsx +10 -19
- package/packages/gui/src/views/teams.jsx +17 -41
|
@@ -329,6 +329,7 @@ export class ProcessManager {
|
|
|
329
329
|
this._streamThrottle = new Map(); // agentId -> { timer, pending }
|
|
330
330
|
this._rotatingAgents = new Set(); // agentIds currently being rotated (rotator wrote handoff)
|
|
331
331
|
this._stalledAgents = new Set(); // agentIds already flagged as stalled (avoids duplicate broadcasts)
|
|
332
|
+
this._exitHandled = new Set();
|
|
332
333
|
|
|
333
334
|
this._stallWatchdog = setInterval(() => this._checkStalls(), STALL_CHECK_INTERVAL_MS);
|
|
334
335
|
if (this._stallWatchdog.unref) this._stallWatchdog.unref();
|
|
@@ -366,6 +367,242 @@ export class ProcessManager {
|
|
|
366
367
|
});
|
|
367
368
|
console.warn(`[Groove] Agent ${agent.name} (${agentId}) silent for ${Math.round(silentMs / 1000)}s — possible stalled API stream`);
|
|
368
369
|
}
|
|
370
|
+
|
|
371
|
+
// Defense in depth: detect zombie handles where PID is no longer alive
|
|
372
|
+
const ZOMBIE_THRESHOLD_MS = 10 * 60_000;
|
|
373
|
+
for (const [agentId, handle] of this.handles.entries()) {
|
|
374
|
+
const agent = registry.get(agentId);
|
|
375
|
+
if (!agent) continue;
|
|
376
|
+
const lastActivity = agent.lastActivity ? new Date(agent.lastActivity).getTime() : now;
|
|
377
|
+
if (now - lastActivity < ZOMBIE_THRESHOLD_MS) continue;
|
|
378
|
+
const pid = handle.proc?.pid;
|
|
379
|
+
if (!pid) continue;
|
|
380
|
+
try {
|
|
381
|
+
process.kill(pid, 0);
|
|
382
|
+
} catch {
|
|
383
|
+
console.warn(`[Groove] Agent ${agent.name} (${agentId}) PID ${pid} no longer alive — force-cleaning handle`);
|
|
384
|
+
if (handle.logStream && !handle.logStream.destroyed) {
|
|
385
|
+
handle.logStream.write(`[${new Date().toISOString()}] Force-cleaned: PID ${pid} no longer alive\n`);
|
|
386
|
+
handle.logStream.end();
|
|
387
|
+
}
|
|
388
|
+
this.handles.delete(agentId);
|
|
389
|
+
this._exitHandled.add(agentId);
|
|
390
|
+
setTimeout(() => this._exitHandled.delete(agentId), 30_000);
|
|
391
|
+
this._stalledAgents.delete(agentId);
|
|
392
|
+
const throttle = this._streamThrottle.get(agentId);
|
|
393
|
+
if (throttle?.timer) clearTimeout(throttle.timer);
|
|
394
|
+
this._streamThrottle.delete(agentId);
|
|
395
|
+
this.peakContextUsage.delete(agentId);
|
|
396
|
+
this.pendingMessages.delete(agentId);
|
|
397
|
+
if (this.daemon.locks) this.daemon.locks.release(agentId);
|
|
398
|
+
registry.update(agentId, { status: 'completed', pid: null });
|
|
399
|
+
this.daemon.broadcast({ type: 'agent:exit', agentId, code: 0, signal: null, status: 'completed' });
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
_handleProcessExit(agent, code, signal, logStream, stderrBuf, logPath) {
|
|
405
|
+
if (this._exitHandled.has(agent.id)) return;
|
|
406
|
+
this._exitHandled.add(agent.id);
|
|
407
|
+
setTimeout(() => this._exitHandled.delete(agent.id), 30_000);
|
|
408
|
+
|
|
409
|
+
const { registry } = this.daemon;
|
|
410
|
+
|
|
411
|
+
if (!logStream.destroyed) {
|
|
412
|
+
logStream.write(`[${new Date().toISOString()}] Process exited: code=${code} signal=${signal}\n`);
|
|
413
|
+
logStream.end();
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
this.handles.delete(agent.id);
|
|
417
|
+
|
|
418
|
+
const throttle = this._streamThrottle.get(agent.id);
|
|
419
|
+
if (throttle?.timer) clearTimeout(throttle.timer);
|
|
420
|
+
this._streamThrottle.delete(agent.id);
|
|
421
|
+
|
|
422
|
+
this.peakContextUsage.delete(agent.id);
|
|
423
|
+
this.pendingMessages.delete(agent.id);
|
|
424
|
+
this._stalledAgents.delete(agent.id);
|
|
425
|
+
|
|
426
|
+
if (this.daemon.locks) this.daemon.locks.release(agent.id);
|
|
427
|
+
|
|
428
|
+
const finalStatus = signal === 'SIGTERM' || signal === 'SIGKILL'
|
|
429
|
+
? 'killed'
|
|
430
|
+
: code === 0
|
|
431
|
+
? 'completed'
|
|
432
|
+
: 'crashed';
|
|
433
|
+
|
|
434
|
+
const crashError = finalStatus === 'crashed' ? stderrBuf.join('').trim().slice(-500) : null;
|
|
435
|
+
|
|
436
|
+
registry.update(agent.id, { status: finalStatus, pid: null });
|
|
437
|
+
|
|
438
|
+
if (this.daemon.timeline) {
|
|
439
|
+
const agentData = registry.get(agent.id);
|
|
440
|
+
this.daemon.timeline.recordEvent(finalStatus === 'completed' ? 'complete' : finalStatus === 'crashed' ? 'crash' : 'kill', {
|
|
441
|
+
agentId: agent.id, agentName: agent.name, role: agent.role,
|
|
442
|
+
finalTokens: agentData?.tokensUsed || 0, costUsd: agentData?.costUsd || 0,
|
|
443
|
+
exitCode: code,
|
|
444
|
+
});
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
if (this.daemon.trajectoryCapture) {
|
|
448
|
+
try {
|
|
449
|
+
if (finalStatus === 'completed') {
|
|
450
|
+
this.daemon.trajectoryCapture.onAgentComplete(agent.id, {
|
|
451
|
+
status: 'SUCCESS', exit_code: code, signal,
|
|
452
|
+
});
|
|
453
|
+
} else {
|
|
454
|
+
this.daemon.trajectoryCapture.onAgentCrash(agent.id,
|
|
455
|
+
signal ? 'Killed by signal ' + signal : 'Exit code ' + code
|
|
456
|
+
);
|
|
457
|
+
}
|
|
458
|
+
const count = (this.daemon.state.get('training_sessions_captured') || 0) + 1;
|
|
459
|
+
this.daemon.state.set('training_sessions_captured', count);
|
|
460
|
+
} catch (e) { /* fail silent */ }
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
this.daemon.broadcast({
|
|
464
|
+
type: 'agent:exit',
|
|
465
|
+
agentId: agent.id,
|
|
466
|
+
code,
|
|
467
|
+
signal,
|
|
468
|
+
status: finalStatus,
|
|
469
|
+
error: crashError || undefined,
|
|
470
|
+
});
|
|
471
|
+
|
|
472
|
+
if (this.daemon.integrations) {
|
|
473
|
+
this.daemon.integrations.refreshMcpJson();
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
if (finalStatus === 'completed' && agent.role === 'planner') {
|
|
477
|
+
this._extractRecommendedTeam(agent, logPath);
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
if (finalStatus === 'completed') {
|
|
481
|
+
const pending = this.consumePendingMessage(agent.id);
|
|
482
|
+
if (pending) {
|
|
483
|
+
const agentData = registry.get(agent.id);
|
|
484
|
+
if (agentData?.sessionId) {
|
|
485
|
+
this.resume(agent.id, pending.message).catch((err) => {
|
|
486
|
+
console.error(`[Groove] Auto-resume with queued message failed for ${agent.name}: ${err.message}`);
|
|
487
|
+
});
|
|
488
|
+
return;
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
if (finalStatus === 'completed' && this.daemon.journalist) {
|
|
494
|
+
const a = registry.get(agent.id);
|
|
495
|
+
const turns = a?.turns || 0;
|
|
496
|
+
const tok = a?.tokensUsed || 0;
|
|
497
|
+
if (turns > 1 || tok >= 100) {
|
|
498
|
+
this.daemon.journalist.requestSynthesis('completion');
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
this._checkPhase2(agent.id);
|
|
503
|
+
|
|
504
|
+
if (agent.teamId) {
|
|
505
|
+
this._checkPreviewReady(agent.teamId);
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
if (finalStatus === 'completed') {
|
|
509
|
+
const files = this.daemon.journalist?.getAgentFiles(agent) || [];
|
|
510
|
+
if (files.length > 0) this._triggerIdleQC(agent);
|
|
511
|
+
this._processHandoffs(agent);
|
|
512
|
+
if (this._rotatingAgents.has(agent.id)) {
|
|
513
|
+
this._rotatingAgents.delete(agent.id);
|
|
514
|
+
} else {
|
|
515
|
+
this._writeCompletionHandoff(agent).catch(err => console.error(`[Groove] Completion handoff failed for ${agent.name}:`, err.message));
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
if (this.daemon.memory && (finalStatus === 'completed' || finalStatus === 'crashed')) {
|
|
520
|
+
try {
|
|
521
|
+
const events = this.daemon.classifier?.agentWindows?.[agent.id] || [];
|
|
522
|
+
const signals = events.length >= 6
|
|
523
|
+
? this.daemon.adaptive.extractSignals(events, agent.scope)
|
|
524
|
+
: null;
|
|
525
|
+
const score = signals ? this.daemon.adaptive.scoreSession(signals) : null;
|
|
526
|
+
const files = this.daemon.journalist?.getAgentFiles(agent) || [];
|
|
527
|
+
this.daemon.memory.updateSpecialization(agent.id, {
|
|
528
|
+
role: agent.role,
|
|
529
|
+
qualityScore: score,
|
|
530
|
+
filesTouched: files,
|
|
531
|
+
signals,
|
|
532
|
+
threshold: this.daemon.adaptive?.getThreshold(agent.provider, agent.role),
|
|
533
|
+
});
|
|
534
|
+
} catch { /* best-effort */ }
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
_handleResumeProcessExit(agent, code, signal, logStream) {
|
|
539
|
+
if (this._exitHandled.has(agent.id)) return;
|
|
540
|
+
this._exitHandled.add(agent.id);
|
|
541
|
+
setTimeout(() => this._exitHandled.delete(agent.id), 30_000);
|
|
542
|
+
|
|
543
|
+
const { registry } = this.daemon;
|
|
544
|
+
|
|
545
|
+
if (!logStream.destroyed) {
|
|
546
|
+
logStream.write(`[${new Date().toISOString()}] Process exited: code=${code} signal=${signal}\n`);
|
|
547
|
+
logStream.end();
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
this.handles.delete(agent.id);
|
|
551
|
+
this._stalledAgents.delete(agent.id);
|
|
552
|
+
|
|
553
|
+
if (this.daemon.locks) this.daemon.locks.release(agent.id);
|
|
554
|
+
|
|
555
|
+
const finalStatus = signal === 'SIGTERM' || signal === 'SIGKILL' ? 'killed' : code === 0 ? 'completed' : 'crashed';
|
|
556
|
+
registry.update(agent.id, { status: finalStatus, pid: null });
|
|
557
|
+
|
|
558
|
+
if (this.daemon.trajectoryCapture) {
|
|
559
|
+
try {
|
|
560
|
+
if (finalStatus === 'completed') {
|
|
561
|
+
this.daemon.trajectoryCapture.onAgentComplete(agent.id, {
|
|
562
|
+
status: 'SUCCESS', exit_code: code, signal,
|
|
563
|
+
});
|
|
564
|
+
} else {
|
|
565
|
+
this.daemon.trajectoryCapture.onAgentCrash(agent.id,
|
|
566
|
+
signal ? 'Killed by signal ' + signal : 'Exit code ' + code
|
|
567
|
+
);
|
|
568
|
+
}
|
|
569
|
+
const count = (this.daemon.state.get('training_sessions_captured') || 0) + 1;
|
|
570
|
+
this.daemon.state.set('training_sessions_captured', count);
|
|
571
|
+
} catch (e) { /* fail silent */ }
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
this.daemon.broadcast({ type: 'agent:exit', agentId: agent.id, code, signal, status: finalStatus });
|
|
575
|
+
if (finalStatus === 'completed' && this.daemon.journalist) {
|
|
576
|
+
const a = registry.get(agent.id);
|
|
577
|
+
const turns = a?.turns || 0;
|
|
578
|
+
const tok = a?.tokensUsed || 0;
|
|
579
|
+
if (turns > 1 || tok >= 100) this.daemon.journalist.requestSynthesis('completion');
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
if (finalStatus === 'completed' && !this._rotatingAgents.has(agent.id)) {
|
|
583
|
+
this._writeCompletionHandoff(agent).catch(err =>
|
|
584
|
+
console.error(`[Groove] Completion handoff failed for ${agent.name}:`, err.message));
|
|
585
|
+
}
|
|
586
|
+
if (this._rotatingAgents.has(agent.id)) {
|
|
587
|
+
this._rotatingAgents.delete(agent.id);
|
|
588
|
+
}
|
|
589
|
+
if (this.daemon.memory && (finalStatus === 'completed' || finalStatus === 'crashed')) {
|
|
590
|
+
try {
|
|
591
|
+
const events = this.daemon.classifier?.agentWindows?.[agent.id] || [];
|
|
592
|
+
const signals = events.length >= 6
|
|
593
|
+
? this.daemon.adaptive.extractSignals(events, agent.scope)
|
|
594
|
+
: null;
|
|
595
|
+
const score = signals ? this.daemon.adaptive.scoreSession(signals) : null;
|
|
596
|
+
const files = this.daemon.journalist?.getAgentFiles(agent) || [];
|
|
597
|
+
this.daemon.memory.updateSpecialization(agent.id, {
|
|
598
|
+
role: agent.role,
|
|
599
|
+
qualityScore: score,
|
|
600
|
+
filesTouched: files,
|
|
601
|
+
signals,
|
|
602
|
+
threshold: this.daemon.adaptive?.getThreshold(agent.provider, agent.role),
|
|
603
|
+
});
|
|
604
|
+
} catch { /* best-effort */ }
|
|
605
|
+
}
|
|
369
606
|
}
|
|
370
607
|
|
|
371
608
|
async spawn(config) {
|
|
@@ -732,6 +969,7 @@ For normal file edits within your scope, proceed without review.
|
|
|
732
969
|
logStream.write(`[${new Date().toISOString()}] Agent loop exited: status=${status}\n`);
|
|
733
970
|
logStream.end();
|
|
734
971
|
this.handles.delete(agent.id);
|
|
972
|
+
this._stalledAgents.delete(agent.id);
|
|
735
973
|
|
|
736
974
|
// Clean up stream throttle so pending timers don't fire for dead agents
|
|
737
975
|
const throttle = this._streamThrottle.get(agent.id);
|
|
@@ -775,8 +1013,9 @@ For normal file edits within your scope, proceed without review.
|
|
|
775
1013
|
this.daemon.broadcast({ type: 'agent:exit', agentId: agent.id, code: code || 0, signal, status });
|
|
776
1014
|
if (this.daemon.integrations) this.daemon.integrations.refreshMcpJson();
|
|
777
1015
|
if (status === 'completed' && this.daemon.journalist) {
|
|
778
|
-
const
|
|
779
|
-
const
|
|
1016
|
+
const a = registry.get(agent.id);
|
|
1017
|
+
const turns = a?.turns || 0;
|
|
1018
|
+
const tok = a?.tokensUsed || 0;
|
|
780
1019
|
if (turns > 1 || tok >= 100) this.daemon.journalist.requestSynthesis('completion');
|
|
781
1020
|
}
|
|
782
1021
|
this._checkPhase2(agent.id);
|
|
@@ -862,6 +1101,7 @@ For normal file edits within your scope, proceed without review.
|
|
|
862
1101
|
if (!logStream.destroyed) logStream.write(`[${new Date().toISOString()}] Spawn error: ${err.message}\n`);
|
|
863
1102
|
if (!logStream.destroyed) logStream.end();
|
|
864
1103
|
this.handles.delete(agent.id);
|
|
1104
|
+
this._exitHandled.add(agent.id);
|
|
865
1105
|
registry.update(agent.id, { status: 'crashed', pid: null });
|
|
866
1106
|
this.daemon.broadcast({ type: 'agent:exit', agentId: agent.id, code: null, signal: null, status: 'crashed', error: err.message });
|
|
867
1107
|
});
|
|
@@ -906,154 +1146,13 @@ For normal file edits within your scope, proceed without review.
|
|
|
906
1146
|
while (stderrBuf.join('').length > 2048) stderrBuf.shift();
|
|
907
1147
|
});
|
|
908
1148
|
|
|
909
|
-
// Handle process exit
|
|
1149
|
+
// Handle process exit — cleanup extracted to _handleProcessExit with dedup
|
|
910
1150
|
proc.on('exit', (code, signal) => {
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
logStream.end();
|
|
914
|
-
|
|
915
|
-
this.handles.delete(agent.id);
|
|
916
|
-
|
|
917
|
-
// Clean up stream throttle so pending timers don't fire for dead agents
|
|
918
|
-
const throttle = this._streamThrottle.get(agent.id);
|
|
919
|
-
if (throttle?.timer) clearTimeout(throttle.timer);
|
|
920
|
-
this._streamThrottle.delete(agent.id);
|
|
921
|
-
|
|
922
|
-
// Clean up per-agent maps to prevent unbounded growth in long sessions
|
|
923
|
-
this.peakContextUsage.delete(agent.id);
|
|
924
|
-
this.pendingMessages.delete(agent.id);
|
|
925
|
-
this._stalledAgents.delete(agent.id);
|
|
926
|
-
|
|
927
|
-
// Release file-scope locks so they don't persist after agent death
|
|
928
|
-
if (this.daemon.locks) this.daemon.locks.release(agent.id);
|
|
929
|
-
|
|
930
|
-
const finalStatus = signal === 'SIGTERM' || signal === 'SIGKILL'
|
|
931
|
-
? 'killed'
|
|
932
|
-
: code === 0
|
|
933
|
-
? 'completed'
|
|
934
|
-
: 'crashed';
|
|
935
|
-
|
|
936
|
-
// Capture crash error from stderr for UI display
|
|
937
|
-
const crashError = finalStatus === 'crashed' ? stderrBuf.join('').trim().slice(-500) : null;
|
|
938
|
-
|
|
939
|
-
registry.update(agent.id, { status: finalStatus, pid: null });
|
|
940
|
-
|
|
941
|
-
// Record lifecycle event for timeline
|
|
942
|
-
if (this.daemon.timeline) {
|
|
943
|
-
const agentData = registry.get(agent.id);
|
|
944
|
-
this.daemon.timeline.recordEvent(finalStatus === 'completed' ? 'complete' : finalStatus === 'crashed' ? 'crash' : 'kill', {
|
|
945
|
-
agentId: agent.id, agentName: agent.name, role: agent.role,
|
|
946
|
-
finalTokens: agentData?.tokensUsed || 0, costUsd: agentData?.costUsd || 0,
|
|
947
|
-
exitCode: code,
|
|
948
|
-
});
|
|
949
|
-
}
|
|
950
|
-
|
|
951
|
-
if (this.daemon.trajectoryCapture) {
|
|
952
|
-
try {
|
|
953
|
-
if (finalStatus === 'completed') {
|
|
954
|
-
this.daemon.trajectoryCapture.onAgentComplete(agent.id, {
|
|
955
|
-
status: 'SUCCESS', exit_code: code, signal,
|
|
956
|
-
});
|
|
957
|
-
} else {
|
|
958
|
-
this.daemon.trajectoryCapture.onAgentCrash(agent.id,
|
|
959
|
-
signal ? 'Killed by signal ' + signal : 'Exit code ' + code
|
|
960
|
-
);
|
|
961
|
-
}
|
|
962
|
-
const count = (this.daemon.state.get('training_sessions_captured') || 0) + 1;
|
|
963
|
-
this.daemon.state.set('training_sessions_captured', count);
|
|
964
|
-
} catch (e) { /* fail silent */ }
|
|
965
|
-
}
|
|
966
|
-
|
|
967
|
-
this.daemon.broadcast({
|
|
968
|
-
type: 'agent:exit',
|
|
969
|
-
agentId: agent.id,
|
|
970
|
-
code,
|
|
971
|
-
signal,
|
|
972
|
-
status: finalStatus,
|
|
973
|
-
error: crashError || undefined,
|
|
974
|
-
});
|
|
975
|
-
|
|
976
|
-
// Refresh MCP config — remove integrations no longer needed by running agents
|
|
977
|
-
if (this.daemon.integrations) {
|
|
978
|
-
this.daemon.integrations.refreshMcpJson();
|
|
979
|
-
}
|
|
980
|
-
|
|
981
|
-
// Extract recommended-team.json from planner text output if it wasn't written to disk.
|
|
982
|
-
// Non-Claude providers (Codex, Gemini) may embed the JSON in text rather than using Write.
|
|
983
|
-
if (finalStatus === 'completed' && agent.role === 'planner') {
|
|
984
|
-
this._extractRecommendedTeam(agent, logPath);
|
|
985
|
-
}
|
|
986
|
-
|
|
987
|
-
// Auto-resume with queued message: if the user sent a message while this
|
|
988
|
-
// CLI agent was still running, resume the session now that it's done.
|
|
989
|
-
if (finalStatus === 'completed') {
|
|
990
|
-
const pending = this.consumePendingMessage(agent.id);
|
|
991
|
-
if (pending) {
|
|
992
|
-
const agentData = registry.get(agent.id);
|
|
993
|
-
if (agentData?.sessionId) {
|
|
994
|
-
this.resume(agent.id, pending.message).catch((err) => {
|
|
995
|
-
console.error(`[Groove] Auto-resume with queued message failed for ${agent.name}: ${err.message}`);
|
|
996
|
-
});
|
|
997
|
-
return;
|
|
998
|
-
}
|
|
999
|
-
}
|
|
1000
|
-
}
|
|
1001
|
-
|
|
1002
|
-
// Trigger journalist synthesis on completion (event-driven, debounced).
|
|
1003
|
-
// Skip trivial sessions — a greeting-only completion (user never gave a task)
|
|
1004
|
-
// has nothing worth synthesizing and wastes a $0.04+ headless claude call.
|
|
1005
|
-
if (finalStatus === 'completed' && this.daemon.journalist) {
|
|
1006
|
-
const a = registry.get(agent.id);
|
|
1007
|
-
const turns = a?.turns || 0;
|
|
1008
|
-
const tok = a?.tokensUsed || 0;
|
|
1009
|
-
if (turns > 1 || tok >= 100) {
|
|
1010
|
-
this.daemon.journalist.requestSynthesis('completion');
|
|
1011
|
-
}
|
|
1012
|
-
}
|
|
1013
|
-
|
|
1014
|
-
// Phase 2 auto-spawn: check if all phase 1 agents for a team are done
|
|
1015
|
-
this._checkPhase2(agent.id);
|
|
1016
|
-
|
|
1017
|
-
// Preview launch: when every agent in this team is in a terminal state,
|
|
1018
|
-
// kick off the one-click preview (dev server or static serve) the planner
|
|
1019
|
-
// staged in the team plan. Fires once per team launch.
|
|
1020
|
-
// Fire on any terminal status so crashed QC agents don't block preview
|
|
1021
|
-
// when builders completed successfully.
|
|
1022
|
-
if (agent.teamId) {
|
|
1023
|
-
this._checkPreviewReady(agent.teamId);
|
|
1024
|
-
}
|
|
1025
|
-
|
|
1026
|
-
// Auto-trigger idle QC: if this agent modified files and there's an idle QC
|
|
1027
|
-
// in the same team, activate it to verify the changes
|
|
1028
|
-
if (finalStatus === 'completed') {
|
|
1029
|
-
const files = this.daemon.journalist?.getAgentFiles(agent) || [];
|
|
1030
|
-
if (files.length > 0) this._triggerIdleQC(agent);
|
|
1031
|
-
this._processHandoffs(agent);
|
|
1032
|
-
if (this._rotatingAgents.has(agent.id)) {
|
|
1033
|
-
this._rotatingAgents.delete(agent.id);
|
|
1034
|
-
} else {
|
|
1035
|
-
this._writeCompletionHandoff(agent).catch(err => console.error(`[Groove] Completion handoff failed for ${agent.name}:`, err.message));
|
|
1036
|
-
}
|
|
1037
|
-
}
|
|
1151
|
+
this._handleProcessExit(agent, code, signal, logStream, stderrBuf, logPath);
|
|
1152
|
+
});
|
|
1038
1153
|
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
try {
|
|
1042
|
-
const events = this.daemon.classifier?.agentWindows?.[agent.id] || [];
|
|
1043
|
-
const signals = events.length >= 6
|
|
1044
|
-
? this.daemon.adaptive.extractSignals(events, agent.scope)
|
|
1045
|
-
: null;
|
|
1046
|
-
const score = signals ? this.daemon.adaptive.scoreSession(signals) : null;
|
|
1047
|
-
const files = this.daemon.journalist?.getAgentFiles(agent) || [];
|
|
1048
|
-
this.daemon.memory.updateSpecialization(agent.id, {
|
|
1049
|
-
role: agent.role,
|
|
1050
|
-
qualityScore: score,
|
|
1051
|
-
filesTouched: files,
|
|
1052
|
-
signals,
|
|
1053
|
-
threshold: this.daemon.adaptive?.getThreshold(agent.provider, agent.role),
|
|
1054
|
-
});
|
|
1055
|
-
} catch { /* best-effort */ }
|
|
1056
|
-
}
|
|
1154
|
+
proc.on('close', (code, signal) => {
|
|
1155
|
+
this._handleProcessExit(agent, code, signal, logStream, stderrBuf, logPath);
|
|
1057
1156
|
});
|
|
1058
1157
|
|
|
1059
1158
|
proc.on('error', (err) => {
|
|
@@ -1061,6 +1160,7 @@ For normal file edits within your scope, proceed without review.
|
|
|
1061
1160
|
logStream.end();
|
|
1062
1161
|
|
|
1063
1162
|
this.handles.delete(agent.id);
|
|
1163
|
+
this._exitHandled.add(agent.id);
|
|
1064
1164
|
if (this.daemon.locks) this.daemon.locks.release(agent.id);
|
|
1065
1165
|
registry.update(agent.id, { status: 'crashed', pid: null });
|
|
1066
1166
|
this.daemon.broadcast({
|
|
@@ -1763,6 +1863,7 @@ For normal file edits within your scope, proceed without review.
|
|
|
1763
1863
|
if (!logStream.destroyed) logStream.write(`[${new Date().toISOString()}] Resume spawn error: ${err.message}\n`);
|
|
1764
1864
|
if (!logStream.destroyed) logStream.end();
|
|
1765
1865
|
this.handles.delete(newAgent.id);
|
|
1866
|
+
this._exitHandled.add(newAgent.id);
|
|
1766
1867
|
registry.update(newAgent.id, { status: 'crashed', pid: null });
|
|
1767
1868
|
this.daemon.broadcast({ type: 'agent:exit', agentId: newAgent.id, code: null, signal: null, status: 'crashed', error: err.message });
|
|
1768
1869
|
});
|
|
@@ -1795,73 +1896,18 @@ For normal file edits within your scope, proceed without review.
|
|
|
1795
1896
|
proc.stderr.on('data', (chunk) => { logStream.write(`[stderr] ${chunk}`); });
|
|
1796
1897
|
|
|
1797
1898
|
proc.on('exit', (code, signal) => {
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
this.handles.delete(newAgent.id);
|
|
1801
|
-
this._stalledAgents.delete(newAgent.id);
|
|
1802
|
-
|
|
1803
|
-
// Release file-scope locks so they don't persist after agent death
|
|
1804
|
-
if (this.daemon.locks) this.daemon.locks.release(newAgent.id);
|
|
1805
|
-
|
|
1806
|
-
const finalStatus = signal === 'SIGTERM' || signal === 'SIGKILL' ? 'killed' : code === 0 ? 'completed' : 'crashed';
|
|
1807
|
-
registry.update(newAgent.id, { status: finalStatus, pid: null });
|
|
1808
|
-
|
|
1809
|
-
if (this.daemon.trajectoryCapture) {
|
|
1810
|
-
try {
|
|
1811
|
-
if (finalStatus === 'completed') {
|
|
1812
|
-
this.daemon.trajectoryCapture.onAgentComplete(newAgent.id, {
|
|
1813
|
-
status: 'SUCCESS', exit_code: code, signal,
|
|
1814
|
-
});
|
|
1815
|
-
} else {
|
|
1816
|
-
this.daemon.trajectoryCapture.onAgentCrash(newAgent.id,
|
|
1817
|
-
signal ? 'Killed by signal ' + signal : 'Exit code ' + code
|
|
1818
|
-
);
|
|
1819
|
-
}
|
|
1820
|
-
const count = (this.daemon.state.get('training_sessions_captured') || 0) + 1;
|
|
1821
|
-
this.daemon.state.set('training_sessions_captured', count);
|
|
1822
|
-
} catch (e) { /* fail silent */ }
|
|
1823
|
-
}
|
|
1824
|
-
|
|
1825
|
-
this.daemon.broadcast({ type: 'agent:exit', agentId: newAgent.id, code, signal, status: finalStatus });
|
|
1826
|
-
if (finalStatus === 'completed' && this.daemon.journalist) {
|
|
1827
|
-
const a = registry.get(newAgent.id);
|
|
1828
|
-
const turns = a?.turns || 0;
|
|
1829
|
-
const tok = a?.tokensUsed || 0;
|
|
1830
|
-
if (turns > 1 || tok >= 100) this.daemon.journalist.requestSynthesis('completion');
|
|
1831
|
-
}
|
|
1899
|
+
this._handleResumeProcessExit(newAgent, code, signal, logStream);
|
|
1900
|
+
});
|
|
1832
1901
|
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
if (finalStatus === 'completed' && !this._rotatingAgents.has(newAgent.id)) {
|
|
1836
|
-
this._writeCompletionHandoff(newAgent).catch(err =>
|
|
1837
|
-
console.error(`[Groove] Completion handoff failed for ${newAgent.name}:`, err.message));
|
|
1838
|
-
}
|
|
1839
|
-
if (this._rotatingAgents.has(newAgent.id)) {
|
|
1840
|
-
this._rotatingAgents.delete(newAgent.id);
|
|
1841
|
-
}
|
|
1842
|
-
if (this.daemon.memory && (finalStatus === 'completed' || finalStatus === 'crashed')) {
|
|
1843
|
-
try {
|
|
1844
|
-
const events = this.daemon.classifier?.agentWindows?.[newAgent.id] || [];
|
|
1845
|
-
const signals = events.length >= 6
|
|
1846
|
-
? this.daemon.adaptive.extractSignals(events, newAgent.scope)
|
|
1847
|
-
: null;
|
|
1848
|
-
const score = signals ? this.daemon.adaptive.scoreSession(signals) : null;
|
|
1849
|
-
const files = this.daemon.journalist?.getAgentFiles(newAgent) || [];
|
|
1850
|
-
this.daemon.memory.updateSpecialization(newAgent.id, {
|
|
1851
|
-
role: newAgent.role,
|
|
1852
|
-
qualityScore: score,
|
|
1853
|
-
filesTouched: files,
|
|
1854
|
-
signals,
|
|
1855
|
-
threshold: this.daemon.adaptive?.getThreshold(newAgent.provider, newAgent.role),
|
|
1856
|
-
});
|
|
1857
|
-
} catch { /* best-effort */ }
|
|
1858
|
-
}
|
|
1902
|
+
proc.on('close', (code, signal) => {
|
|
1903
|
+
this._handleResumeProcessExit(newAgent, code, signal, logStream);
|
|
1859
1904
|
});
|
|
1860
1905
|
|
|
1861
1906
|
proc.on('error', (err) => {
|
|
1862
1907
|
logStream.write(`[error] ${err.message}\n`);
|
|
1863
1908
|
logStream.end();
|
|
1864
1909
|
this.handles.delete(newAgent.id);
|
|
1910
|
+
this._exitHandled.add(newAgent.id);
|
|
1865
1911
|
this._stalledAgents.delete(newAgent.id);
|
|
1866
1912
|
registry.update(newAgent.id, { status: 'crashed', pid: null });
|
|
1867
1913
|
});
|
|
@@ -141,28 +141,15 @@ export class Teams {
|
|
|
141
141
|
}
|
|
142
142
|
|
|
143
143
|
/**
|
|
144
|
-
*
|
|
145
|
-
*
|
|
146
|
-
* can wipe accumulated state and keep working without restarting the daemon.
|
|
144
|
+
* Archive a team — kills its agents, moves its directory to archived-teams/,
|
|
145
|
+
* stores metadata.json for later restore.
|
|
147
146
|
*/
|
|
148
|
-
|
|
147
|
+
archive(id) {
|
|
149
148
|
const team = this.teams.get(id);
|
|
150
149
|
if (!team) throw new Error('Team not found');
|
|
151
150
|
|
|
152
|
-
|
|
153
|
-
const agents = this.daemon.registry.getAll().filter((a) => a.teamId === id);
|
|
154
|
-
for (const agent of agents) {
|
|
155
|
-
if (agent.status === 'running' || agent.status === 'starting') {
|
|
156
|
-
try { this.daemon.processes.kill(agent.id); } catch { /* ignore */ }
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
// Remove agents from registry
|
|
161
|
-
for (const agent of agents) {
|
|
162
|
-
this.daemon.registry.remove(agent.id);
|
|
163
|
-
}
|
|
151
|
+
const agents = this._killAndRemoveAgents(id);
|
|
164
152
|
|
|
165
|
-
// Archive the team's working directory instead of deleting it
|
|
166
153
|
if (
|
|
167
154
|
team.workingDir &&
|
|
168
155
|
team.workingDir !== this.daemon.projectDir &&
|
|
@@ -191,6 +178,7 @@ export class Teams {
|
|
|
191
178
|
originalId: team.id,
|
|
192
179
|
deletedAt: new Date().toISOString(),
|
|
193
180
|
agentCount: agents.length,
|
|
181
|
+
originalWorkingDir: team.workingDir,
|
|
194
182
|
};
|
|
195
183
|
writeFileSync(resolve(archivePath, 'metadata.json'), JSON.stringify(metadata, null, 2));
|
|
196
184
|
} catch (err) {
|
|
@@ -198,21 +186,63 @@ export class Teams {
|
|
|
198
186
|
}
|
|
199
187
|
}
|
|
200
188
|
|
|
189
|
+
this._removeTeamAndCleanup(team, id);
|
|
190
|
+
return true;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Delete a team — kills its agents, removes its directory permanently.
|
|
195
|
+
* If permanent is false (default), delegates to archive() instead.
|
|
196
|
+
*/
|
|
197
|
+
delete(id, { permanent = false } = {}) {
|
|
198
|
+
if (!permanent) return this.archive(id);
|
|
199
|
+
|
|
200
|
+
const team = this.teams.get(id);
|
|
201
|
+
if (!team) throw new Error('Team not found');
|
|
202
|
+
|
|
203
|
+
this._killAndRemoveAgents(id);
|
|
204
|
+
|
|
205
|
+
if (
|
|
206
|
+
team.workingDir &&
|
|
207
|
+
team.workingDir !== this.daemon.projectDir &&
|
|
208
|
+
existsSync(team.workingDir)
|
|
209
|
+
) {
|
|
210
|
+
try {
|
|
211
|
+
rmSync(team.workingDir, { recursive: true, force: true });
|
|
212
|
+
} catch (err) {
|
|
213
|
+
console.log(`[Groove:Teams] Failed to delete directory: ${err.message}`);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
this._removeTeamAndCleanup(team, id);
|
|
218
|
+
return true;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
_killAndRemoveAgents(teamId) {
|
|
222
|
+
const agents = this.daemon.registry.getAll().filter((a) => a.teamId === teamId);
|
|
223
|
+
for (const agent of agents) {
|
|
224
|
+
if (agent.status === 'running' || agent.status === 'starting') {
|
|
225
|
+
try { this.daemon.processes.kill(agent.id); } catch { /* ignore */ }
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
for (const agent of agents) {
|
|
229
|
+
this.daemon.registry.remove(agent.id);
|
|
230
|
+
}
|
|
231
|
+
return agents;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
_removeTeamAndCleanup(team, id) {
|
|
201
235
|
this.teams.delete(id);
|
|
202
236
|
this._save();
|
|
203
237
|
this.daemon.broadcast({ type: 'team:deleted', teamId: id });
|
|
204
238
|
|
|
205
|
-
// Always keep a default team available — regenerate one with a clean folder
|
|
206
239
|
if (team.isDefault) {
|
|
207
240
|
this._ensureDefault();
|
|
208
241
|
const fresh = this.getDefault();
|
|
209
242
|
if (fresh) this.daemon.broadcast({ type: 'team:created', team: fresh });
|
|
210
243
|
}
|
|
211
244
|
|
|
212
|
-
// Clean up orphaned logs immediately — don't wait for the 24h GC cycle
|
|
213
245
|
try { this.daemon._gc(); } catch { /* gc should never block deletion */ }
|
|
214
|
-
|
|
215
|
-
return true;
|
|
216
246
|
}
|
|
217
247
|
|
|
218
248
|
listArchived() {
|
|
@@ -243,11 +273,10 @@ export class Teams {
|
|
|
243
273
|
try { meta = JSON.parse(readFileSync(metaPath, 'utf8')); } catch { /* use defaults */ }
|
|
244
274
|
|
|
245
275
|
const name = meta.originalName || archivedId;
|
|
246
|
-
|
|
247
|
-
let workingDir = resolve(this.daemon.projectDir, dirName);
|
|
276
|
+
let workingDir = meta.originalWorkingDir || resolve(this.daemon.projectDir, slugify(name));
|
|
248
277
|
|
|
249
278
|
if (existsSync(workingDir)) {
|
|
250
|
-
workingDir = resolve(this.daemon.projectDir, `${
|
|
279
|
+
workingDir = resolve(this.daemon.projectDir, `${slugify(name)}-${Date.now()}`);
|
|
251
280
|
}
|
|
252
281
|
|
|
253
282
|
try {
|
|
@@ -577,8 +577,9 @@ export class TunnelManager {
|
|
|
577
577
|
}
|
|
578
578
|
} catch (err) {
|
|
579
579
|
if (err.message.includes('Remote daemon failed')) throw err;
|
|
580
|
-
const output = err.stdout?.toString() || err.stderr?.toString() ||
|
|
581
|
-
|
|
580
|
+
const output = err.stdout?.toString() || err.stderr?.toString() || '';
|
|
581
|
+
if (output.includes('__DAEMON_OK__')) return;
|
|
582
|
+
throw new Error(`Failed to start remote daemon: ${(output || err.message).slice(-300)}`);
|
|
582
583
|
}
|
|
583
584
|
}
|
|
584
585
|
|