@agent-relay/wrapper 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/dist/__fixtures__/claude-outputs.d.ts +49 -0
  2. package/dist/__fixtures__/claude-outputs.d.ts.map +1 -0
  3. package/dist/__fixtures__/claude-outputs.js +443 -0
  4. package/dist/__fixtures__/claude-outputs.js.map +1 -0
  5. package/dist/__fixtures__/codex-outputs.d.ts +9 -0
  6. package/dist/__fixtures__/codex-outputs.d.ts.map +1 -0
  7. package/dist/__fixtures__/codex-outputs.js +94 -0
  8. package/dist/__fixtures__/codex-outputs.js.map +1 -0
  9. package/dist/__fixtures__/gemini-outputs.d.ts +19 -0
  10. package/dist/__fixtures__/gemini-outputs.d.ts.map +1 -0
  11. package/dist/__fixtures__/gemini-outputs.js +144 -0
  12. package/dist/__fixtures__/gemini-outputs.js.map +1 -0
  13. package/dist/__fixtures__/index.d.ts +68 -0
  14. package/dist/__fixtures__/index.d.ts.map +1 -0
  15. package/dist/__fixtures__/index.js +44 -0
  16. package/dist/__fixtures__/index.js.map +1 -0
  17. package/dist/auth-detection.d.ts +49 -0
  18. package/dist/auth-detection.d.ts.map +1 -0
  19. package/dist/auth-detection.js +199 -0
  20. package/dist/auth-detection.js.map +1 -0
  21. package/dist/base-wrapper.d.ts +225 -0
  22. package/dist/base-wrapper.d.ts.map +1 -0
  23. package/dist/base-wrapper.js +572 -0
  24. package/dist/base-wrapper.js.map +1 -0
  25. package/dist/client.d.ts +254 -0
  26. package/dist/client.d.ts.map +1 -0
  27. package/dist/client.js +801 -0
  28. package/dist/client.js.map +1 -0
  29. package/dist/id-generator.d.ts +35 -0
  30. package/dist/id-generator.d.ts.map +1 -0
  31. package/dist/id-generator.js +60 -0
  32. package/dist/id-generator.js.map +1 -0
  33. package/dist/idle-detector.d.ts +110 -0
  34. package/dist/idle-detector.d.ts.map +1 -0
  35. package/dist/idle-detector.js +304 -0
  36. package/dist/idle-detector.js.map +1 -0
  37. package/dist/inbox.d.ts +37 -0
  38. package/dist/inbox.d.ts.map +1 -0
  39. package/dist/inbox.js +73 -0
  40. package/dist/inbox.js.map +1 -0
  41. package/dist/index.d.ts +37 -0
  42. package/dist/index.d.ts.map +1 -0
  43. package/dist/index.js +47 -0
  44. package/dist/index.js.map +1 -0
  45. package/dist/parser.d.ts +236 -0
  46. package/dist/parser.d.ts.map +1 -0
  47. package/dist/parser.js +1238 -0
  48. package/dist/parser.js.map +1 -0
  49. package/dist/prompt-composer.d.ts +67 -0
  50. package/dist/prompt-composer.d.ts.map +1 -0
  51. package/dist/prompt-composer.js +168 -0
  52. package/dist/prompt-composer.js.map +1 -0
  53. package/dist/relay-pty-orchestrator.d.ts +407 -0
  54. package/dist/relay-pty-orchestrator.d.ts.map +1 -0
  55. package/dist/relay-pty-orchestrator.js +1885 -0
  56. package/dist/relay-pty-orchestrator.js.map +1 -0
  57. package/dist/shared.d.ts +201 -0
  58. package/dist/shared.d.ts.map +1 -0
  59. package/dist/shared.js +341 -0
  60. package/dist/shared.js.map +1 -0
  61. package/dist/stuck-detector.d.ts +161 -0
  62. package/dist/stuck-detector.d.ts.map +1 -0
  63. package/dist/stuck-detector.js +402 -0
  64. package/dist/stuck-detector.js.map +1 -0
  65. package/dist/tmux-resolver.d.ts +55 -0
  66. package/dist/tmux-resolver.d.ts.map +1 -0
  67. package/dist/tmux-resolver.js +175 -0
  68. package/dist/tmux-resolver.js.map +1 -0
  69. package/dist/tmux-wrapper.d.ts +345 -0
  70. package/dist/tmux-wrapper.d.ts.map +1 -0
  71. package/dist/tmux-wrapper.js +1747 -0
  72. package/dist/tmux-wrapper.js.map +1 -0
  73. package/dist/trajectory-integration.d.ts +292 -0
  74. package/dist/trajectory-integration.d.ts.map +1 -0
  75. package/dist/trajectory-integration.js +979 -0
  76. package/dist/trajectory-integration.js.map +1 -0
  77. package/dist/wrapper-types.d.ts +41 -0
  78. package/dist/wrapper-types.d.ts.map +1 -0
  79. package/dist/wrapper-types.js +7 -0
  80. package/dist/wrapper-types.js.map +1 -0
  81. package/package.json +63 -0
  82. package/src/__fixtures__/claude-outputs.ts +471 -0
  83. package/src/__fixtures__/codex-outputs.ts +99 -0
  84. package/src/__fixtures__/gemini-outputs.ts +151 -0
  85. package/src/__fixtures__/index.ts +47 -0
  86. package/src/auth-detection.ts +244 -0
  87. package/src/base-wrapper.test.ts +540 -0
  88. package/src/base-wrapper.ts +741 -0
  89. package/src/client.test.ts +262 -0
  90. package/src/client.ts +984 -0
  91. package/src/id-generator.test.ts +71 -0
  92. package/src/id-generator.ts +69 -0
  93. package/src/idle-detector.test.ts +390 -0
  94. package/src/idle-detector.ts +370 -0
  95. package/src/inbox.test.ts +233 -0
  96. package/src/inbox.ts +89 -0
  97. package/src/index.ts +170 -0
  98. package/src/parser.regression.test.ts +251 -0
  99. package/src/parser.test.ts +1359 -0
  100. package/src/parser.ts +1477 -0
  101. package/src/prompt-composer.test.ts +219 -0
  102. package/src/prompt-composer.ts +231 -0
  103. package/src/relay-pty-orchestrator.test.ts +1027 -0
  104. package/src/relay-pty-orchestrator.ts +2270 -0
  105. package/src/shared.test.ts +221 -0
  106. package/src/shared.ts +454 -0
  107. package/src/stuck-detector.test.ts +303 -0
  108. package/src/stuck-detector.ts +511 -0
  109. package/src/tmux-resolver.test.ts +104 -0
  110. package/src/tmux-resolver.ts +207 -0
  111. package/src/tmux-wrapper.test.ts +316 -0
  112. package/src/tmux-wrapper.ts +2010 -0
  113. package/src/trajectory-detection.test.ts +151 -0
  114. package/src/trajectory-integration.ts +1261 -0
  115. package/src/wrapper-types.ts +45 -0
@@ -0,0 +1,2270 @@
1
+ /**
2
+ * RelayPtyOrchestrator - Orchestrates the relay-pty Rust binary
3
+ *
4
+ * This wrapper spawns the relay-pty binary and communicates via Unix socket.
5
+ * It provides the same interface as PtyWrapper but with improved latency
6
+ * (~550ms vs ~1700ms) by using direct PTY writes instead of tmux send-keys.
7
+ *
8
+ * Architecture:
9
+ * 1. Spawn relay-pty --name {agentName} -- {command} as child process
10
+ * 2. Connect to socket for injection:
11
+ * - With WORKSPACE_ID: /tmp/relay/{workspaceId}/sockets/{agentName}.sock
12
+ * - Without: /tmp/relay-pty-{agentName}.sock (legacy)
13
+ * 3. Parse stdout for relay commands (relay-pty echoes all output)
14
+ * 4. Translate SEND envelopes → inject messages via socket
15
+ *
16
+ * @see docs/RUST_WRAPPER_DESIGN.md for protocol details
17
+ */
18
+
19
+ import { spawn, ChildProcess } from 'node:child_process';
20
+ import { createConnection, Socket } from 'node:net';
21
+ import { createHash } from 'node:crypto';
22
+ import { join, dirname } from 'node:path';
23
+ import { existsSync, unlinkSync, mkdirSync, symlinkSync, lstatSync, rmSync, watch, readdirSync } from 'node:fs';
24
+ import type { FSWatcher } from 'node:fs';
25
+ import { getProjectPaths } from '@agent-relay/config/project-namespace';
26
+ import { getAgentOutboxTemplate } from '@agent-relay/config/relay-file-writer';
27
+ import { fileURLToPath } from 'node:url';
28
+
29
+ // Get the directory where this module is located
30
+ const __filename = fileURLToPath(import.meta.url);
31
+ const __dirname = dirname(__filename);
32
+ import { BaseWrapper, type BaseWrapperConfig } from './base-wrapper.js';
33
+ import { parseSummaryWithDetails, parseSessionEndFromOutput } from './parser.js';
34
+ import type { SendPayload, SendMeta } from '@agent-relay/protocol/types';
35
+ import {
36
+ type QueuedMessage,
37
+ stripAnsi,
38
+ sleep,
39
+ buildInjectionString,
40
+ verifyInjection,
41
+ INJECTION_CONSTANTS,
42
+ AdaptiveThrottle,
43
+ } from './shared.js';
44
+ import {
45
+ getMemoryMonitor,
46
+ type AgentMemoryMonitor,
47
+ type MemoryAlert,
48
+ formatBytes,
49
+ } from '@agent-relay/resiliency';
50
+
51
+ // ============================================================================
52
+ // Types for relay-pty socket protocol
53
+ // ============================================================================
54
+
55
+ const MAX_SOCKET_PATH_LENGTH = 107;
56
+
57
+ function hashWorkspaceId(workspaceId: string): string {
58
+ return createHash('sha256').update(workspaceId).digest('hex').slice(0, 12);
59
+ }
60
+
61
+ /**
62
+ * Request types sent to relay-pty socket
63
+ */
64
+ interface InjectRequest {
65
+ type: 'inject';
66
+ id: string;
67
+ from: string;
68
+ body: string;
69
+ priority: number;
70
+ }
71
+
72
+ interface StatusRequest {
73
+ type: 'status';
74
+ }
75
+
76
+ interface ShutdownRequest {
77
+ type: 'shutdown';
78
+ }
79
+
80
+ type RelayPtyRequest = InjectRequest | StatusRequest | ShutdownRequest;
81
+
82
+ /**
83
+ * Response types received from relay-pty socket
84
+ */
85
+ interface InjectResultResponse {
86
+ type: 'inject_result';
87
+ id: string;
88
+ status: 'queued' | 'injecting' | 'delivered' | 'failed';
89
+ timestamp: number;
90
+ error?: string;
91
+ }
92
+
93
+ interface StatusResponse {
94
+ type: 'status';
95
+ agent_idle: boolean;
96
+ queue_length: number;
97
+ cursor_position?: [number, number];
98
+ last_output_ms: number;
99
+ }
100
+
101
+ interface BackpressureResponse {
102
+ type: 'backpressure';
103
+ queue_length: number;
104
+ accept: boolean;
105
+ }
106
+
107
+ interface ErrorResponse {
108
+ type: 'error';
109
+ message: string;
110
+ }
111
+
112
+ interface ShutdownAckResponse {
113
+ type: 'shutdown_ack';
114
+ }
115
+
116
+ type RelayPtyResponse =
117
+ | InjectResultResponse
118
+ | StatusResponse
119
+ | BackpressureResponse
120
+ | ErrorResponse
121
+ | ShutdownAckResponse;
122
+
123
+ /**
124
+ * Configuration for RelayPtyOrchestrator
125
+ */
126
+ export interface RelayPtyOrchestratorConfig extends BaseWrapperConfig {
127
+ /** Path to relay-pty binary (default: searches PATH and ./relay-pty/target/release) */
128
+ relayPtyPath?: string;
129
+ /** Socket connect timeout in ms (default: 5000) */
130
+ socketConnectTimeoutMs?: number;
131
+ /** Socket reconnect attempts (default: 3) */
132
+ socketReconnectAttempts?: number;
133
+ /** Callback when agent exits */
134
+ onExit?: (code: number) => void;
135
+ /** Callback when injection fails after retries */
136
+ onInjectionFailed?: (messageId: string, error: string) => void;
137
+ /** Enable debug logging (default: false) */
138
+ debug?: boolean;
139
+ /** Force headless mode (use pipes instead of inheriting TTY) */
140
+ headless?: boolean;
141
+ }
142
+
143
+ /**
144
+ * Events emitted by RelayPtyOrchestrator
145
+ */
146
+ export interface RelayPtyOrchestratorEvents {
147
+ output: (data: string) => void;
148
+ exit: (code: number) => void;
149
+ error: (error: Error) => void;
150
+ 'injection-failed': (event: { messageId: string; from: string; error: string }) => void;
151
+ 'backpressure': (event: { queueLength: number; accept: boolean }) => void;
152
+ 'summary': (event: { agentName: string; summary: unknown }) => void;
153
+ 'session-end': (event: { agentName: string; marker: unknown }) => void;
154
+ }
155
+
156
+ /**
157
+ * Orchestrator for relay-pty Rust binary
158
+ *
159
+ * Extends BaseWrapper to provide the same interface as PtyWrapper
160
+ * but uses the relay-pty binary for improved injection reliability.
161
+ */
162
+ export class RelayPtyOrchestrator extends BaseWrapper {
163
+ protected override config: RelayPtyOrchestratorConfig;
164
+
165
+ // Process management
166
+ private relayPtyProcess?: ChildProcess;
167
+ private socketPath: string;
168
+ private _logPath: string;
169
+ private _outboxPath: string;
170
+ private _legacyOutboxPath: string; // Legacy /tmp/relay-outbox path for backwards compat
171
+ private _canonicalOutboxPath: string; // Canonical ~/.agent-relay/outbox path (agents write here)
172
+ private _workspaceId?: string; // For symlink setup
173
+ private socket?: Socket;
174
+ private socketConnected = false;
175
+
176
+ // Output buffering
177
+ private outputBuffer = '';
178
+ private rawBuffer = '';
179
+ private lastParsedLength = 0;
180
+
181
+ // Interactive mode (show output to terminal)
182
+ private isInteractive = false;
183
+
184
+ // Injection state
185
+ private pendingInjections: Map<string, {
186
+ resolve: (success: boolean) => void;
187
+ reject: (error: Error) => void;
188
+ timeout: NodeJS.Timeout;
189
+ from: string; // For verification pattern matching
190
+ shortId: string; // First 8 chars of messageId for verification
191
+ retryCount: number; // Track retry attempts
192
+ originalBody: string; // Original injection content for retries
193
+ }> = new Map();
194
+ private backpressureActive = false;
195
+ private readyForMessages = false;
196
+
197
+ // Adaptive throttle for message queue - adjusts delay based on success/failure
198
+ private throttle = new AdaptiveThrottle();
199
+
200
+ // Unread message indicator state
201
+ private lastUnreadIndicatorTime = 0;
202
+ private readonly UNREAD_INDICATOR_COOLDOWN_MS = 5000; // Don't spam indicators
203
+
204
+ // Track whether any output has been received from the CLI
205
+ private hasReceivedOutput = false;
206
+
207
+ // Queue monitor for stuck message detection
208
+ private queueMonitorTimer?: NodeJS.Timeout;
209
+ private readonly QUEUE_MONITOR_INTERVAL_MS = 30000; // Check every 30 seconds
210
+
211
+ // Protocol monitor for detecting agent mistakes (e.g., empty AGENT_RELAY_NAME)
212
+ private protocolWatcher?: FSWatcher;
213
+ private protocolReminderCooldown = 0; // Prevent spam
214
+ private readonly PROTOCOL_REMINDER_COOLDOWN_MS = 30000; // 30 second cooldown between reminders
215
+
216
+ // Periodic protocol reminder for long sessions (agents sometimes forget the protocol)
217
+ private periodicReminderTimer?: NodeJS.Timeout;
218
+ private readonly PERIODIC_REMINDER_INTERVAL_MS = 45 * 60 * 1000; // 45 minutes
219
+ private sessionStartTime = 0;
220
+
221
+ // Track if agent is being gracefully stopped (vs crashed)
222
+ private isGracefulStop = false;
223
+
224
+ // Memory/CPU monitoring
225
+ private memoryMonitor: AgentMemoryMonitor;
226
+ private memoryAlertHandler: ((alert: MemoryAlert) => void) | null = null;
227
+
228
+ // Note: sessionEndProcessed and lastSummaryRawContent are inherited from BaseWrapper
229
+
230
+ constructor(config: RelayPtyOrchestratorConfig) {
231
+ super(config);
232
+ this.config = config;
233
+
234
+ // Get project paths (used for logs and local mode)
235
+ const projectPaths = getProjectPaths(config.cwd);
236
+
237
+ // Canonical outbox path - agents ALWAYS write here (transparent symlink in workspace mode)
238
+ // Uses ~/.agent-relay/outbox/{agentName}/ so agents don't need to know about workspace IDs
239
+ this._canonicalOutboxPath = join(projectPaths.dataDir, 'outbox', config.name);
240
+
241
+ // Check for workspace namespacing (for multi-tenant cloud deployment)
242
+ // WORKSPACE_ID can be in process.env or passed via config.env
243
+ const workspaceId = config.env?.WORKSPACE_ID || process.env.WORKSPACE_ID;
244
+ this._workspaceId = workspaceId;
245
+
246
+ if (workspaceId) {
247
+ // Workspace mode: relay-pty watches the actual workspace path
248
+ // Canonical path (~/.agent-relay/outbox/) will be symlinked to workspace path
249
+ const getWorkspacePaths = (id: string) => {
250
+ const workspaceDir = `/tmp/relay/${id}`;
251
+ return {
252
+ workspaceDir,
253
+ socketPath: `${workspaceDir}/sockets/${config.name}.sock`,
254
+ outboxPath: `${workspaceDir}/outbox/${config.name}`,
255
+ };
256
+ };
257
+
258
+ let paths = getWorkspacePaths(workspaceId);
259
+ if (paths.socketPath.length > MAX_SOCKET_PATH_LENGTH) {
260
+ const hashedWorkspaceId = hashWorkspaceId(workspaceId);
261
+ const hashedPaths = getWorkspacePaths(hashedWorkspaceId);
262
+ console.warn(
263
+ `[relay-pty-orchestrator:${config.name}] Socket path too long (${paths.socketPath.length} chars); using hashed workspace id ${hashedWorkspaceId}`
264
+ );
265
+ paths = hashedPaths;
266
+ }
267
+
268
+ if (paths.socketPath.length > MAX_SOCKET_PATH_LENGTH) {
269
+ throw new Error(`Socket path exceeds ${MAX_SOCKET_PATH_LENGTH} chars: ${paths.socketPath.length}`);
270
+ }
271
+
272
+ this.socketPath = paths.socketPath;
273
+ // relay-pty watches the actual workspace path
274
+ this._outboxPath = paths.outboxPath;
275
+ // Legacy path for backwards compat (older agents might still use /tmp/relay-outbox)
276
+ this._legacyOutboxPath = `/tmp/relay-outbox/${config.name}`;
277
+ } else {
278
+ // Local mode: use ~/.agent-relay paths directly (no symlinks needed)
279
+ this._outboxPath = this._canonicalOutboxPath;
280
+ // Socket at ~/.agent-relay/{projectId}/sockets/{agentName}.sock
281
+ this.socketPath = join(projectPaths.dataDir, 'sockets', `${config.name}.sock`);
282
+ // No legacy path needed for local mode
283
+ this._legacyOutboxPath = this._outboxPath;
284
+ }
285
+ if (this.socketPath.length > MAX_SOCKET_PATH_LENGTH) {
286
+ throw new Error(`Socket path exceeds ${MAX_SOCKET_PATH_LENGTH} chars: ${this.socketPath.length}`);
287
+ }
288
+
289
+ // Generate log path using project paths
290
+ this._logPath = join(projectPaths.teamDir, 'worker-logs', `${config.name}.log`);
291
+
292
+ // Check if we're running interactively (stdin is a TTY)
293
+ // If headless mode is forced via config, always use pipes
294
+ this.isInteractive = config.headless ? false : (process.stdin.isTTY === true);
295
+
296
+ // Initialize memory monitor (shared singleton, 10s polling interval)
297
+ this.memoryMonitor = getMemoryMonitor({ checkIntervalMs: 10_000 });
298
+ }
299
+
300
+ /**
301
+ * Debug log - only outputs when debug is enabled
302
+ */
303
+ private log(message: string): void {
304
+ if (this.config.debug) {
305
+ console.log(`[relay-pty-orchestrator:${this.config.name}] ${message}`);
306
+ }
307
+ }
308
+
309
+ /**
310
+ * Error log - always outputs (errors are important)
311
+ */
312
+ private logError(message: string): void {
313
+ if (this.config.debug) {
314
+ console.error(`[relay-pty-orchestrator:${this.config.name}] ERROR: ${message}`);
315
+ }
316
+ }
317
+
318
+ /**
319
+ * Get the outbox path for this agent (for documentation purposes)
320
+ */
321
+ get outboxPath(): string {
322
+ return this._outboxPath;
323
+ }
324
+
325
+ // =========================================================================
326
+ // Abstract method implementations (required by BaseWrapper)
327
+ // =========================================================================
328
+
329
+ /**
330
+ * Start the relay-pty process and connect to socket
331
+ */
332
+ override async start(): Promise<void> {
333
+ if (this.running) return;
334
+
335
+ this.log(` Starting...`);
336
+
337
+ // Ensure socket directory exists (for workspace-namespaced paths)
338
+ const socketDir = dirname(this.socketPath);
339
+ try {
340
+ if (!existsSync(socketDir)) {
341
+ mkdirSync(socketDir, { recursive: true });
342
+ this.log(` Created socket directory: ${socketDir}`);
343
+ }
344
+ } catch (err: any) {
345
+ this.logError(` Failed to create socket directory: ${err.message}`);
346
+ }
347
+
348
+ // Clean up any stale socket from previous crashed process
349
+ try {
350
+ if (existsSync(this.socketPath)) {
351
+ this.log(` Removing stale socket: ${this.socketPath}`);
352
+ unlinkSync(this.socketPath);
353
+ }
354
+ } catch (err: any) {
355
+ this.logError(` Failed to clean up socket: ${err.message}`);
356
+ }
357
+
358
+ // Set up outbox directory structure
359
+ // - Workspace mode:
360
+ // 1. Create actual workspace path /tmp/relay/{workspaceId}/outbox/{name}
361
+ // 2. Symlink canonical ~/.agent-relay/outbox/{name} -> workspace path
362
+ // 3. Optional: symlink /tmp/relay-outbox/{name} -> workspace path (backwards compat)
363
+ // - Local mode: just create ~/.agent-relay/{projectId}/outbox/{name} directly
364
+ try {
365
+ // Ensure the actual outbox directory exists (where relay-pty watches)
366
+ const outboxDir = dirname(this._outboxPath);
367
+ if (!existsSync(outboxDir)) {
368
+ mkdirSync(outboxDir, { recursive: true });
369
+ }
370
+ if (!existsSync(this._outboxPath)) {
371
+ mkdirSync(this._outboxPath, { recursive: true });
372
+ }
373
+ this.log(` Created outbox directory: ${this._outboxPath}`);
374
+
375
+ // In workspace mode, create symlinks so agents can use canonical path
376
+ if (this._workspaceId) {
377
+ // Helper to create a symlink, cleaning up existing path first
378
+ const createSymlinkSafe = (linkPath: string, targetPath: string) => {
379
+ const linkParent = dirname(linkPath);
380
+ if (!existsSync(linkParent)) {
381
+ mkdirSync(linkParent, { recursive: true });
382
+ }
383
+ if (existsSync(linkPath)) {
384
+ try {
385
+ const stats = lstatSync(linkPath);
386
+ if (stats.isSymbolicLink()) {
387
+ unlinkSync(linkPath);
388
+ } else if (stats.isDirectory()) {
389
+ rmSync(linkPath, { recursive: true, force: true });
390
+ }
391
+ } catch {
392
+ // Ignore cleanup errors
393
+ }
394
+ }
395
+ symlinkSync(targetPath, linkPath);
396
+ this.log(` Created symlink: ${linkPath} -> ${targetPath}`);
397
+ };
398
+
399
+ // Symlink canonical path (~/.agent-relay/outbox/{name}) -> workspace path
400
+ // This is the PRIMARY symlink - agents write to canonical path, relay-pty watches workspace path
401
+ if (this._canonicalOutboxPath !== this._outboxPath) {
402
+ createSymlinkSafe(this._canonicalOutboxPath, this._outboxPath);
403
+ }
404
+
405
+ // Also create legacy /tmp/relay-outbox symlink for backwards compat with older agents
406
+ if (this._legacyOutboxPath !== this._outboxPath && this._legacyOutboxPath !== this._canonicalOutboxPath) {
407
+ createSymlinkSafe(this._legacyOutboxPath, this._outboxPath);
408
+ }
409
+ }
410
+ } catch (err: any) {
411
+ this.logError(` Failed to set up outbox: ${err.message}`);
412
+ }
413
+
414
+ // Find relay-pty binary
415
+ const binaryPath = this.findRelayPtyBinary();
416
+ if (!binaryPath) {
417
+ throw new Error('relay-pty binary not found. Build with: cd relay-pty && cargo build --release');
418
+ }
419
+
420
+ this.log(` Using binary: ${binaryPath}`);
421
+
422
+ // Connect to relay daemon first
423
+ try {
424
+ await this.client.connect();
425
+ this.log(` Relay daemon connected`);
426
+ } catch (err: any) {
427
+ this.logError(` Relay connect failed: ${err.message}`);
428
+ }
429
+
430
+ // Spawn relay-pty process
431
+ await this.spawnRelayPty(binaryPath);
432
+
433
+ // Wait for socket to become available and connect
434
+ await this.connectToSocket();
435
+
436
+ this.running = true;
437
+ this.readyForMessages = true;
438
+ this.startStuckDetection();
439
+ this.startQueueMonitor();
440
+ this.startProtocolMonitor();
441
+ this.startPeriodicReminder();
442
+
443
+ this.log(` Ready for messages`);
444
+ this.log(` Socket connected: ${this.socketConnected}`);
445
+ this.log(` Relay client state: ${this.client.state}`);
446
+
447
+ // Process any queued messages
448
+ this.processMessageQueue();
449
+ }
450
+
451
+ /**
452
+ * Stop the relay-pty process gracefully
453
+ */
454
+ override async stop(): Promise<void> {
455
+ if (!this.running) return;
456
+ this.isGracefulStop = true; // Mark as graceful to prevent crash broadcast
457
+ this.running = false;
458
+ this.stopStuckDetection();
459
+ this.stopQueueMonitor();
460
+ this.stopProtocolMonitor();
461
+ this.stopPeriodicReminder();
462
+
463
+ // Unregister from memory monitor
464
+ this.memoryMonitor.unregister(this.config.name);
465
+ if (this.memoryAlertHandler) {
466
+ this.memoryMonitor.off('alert', this.memoryAlertHandler);
467
+ this.memoryAlertHandler = null;
468
+ }
469
+
470
+ this.log(` Stopping...`);
471
+
472
+ // Send shutdown command via socket
473
+ if (this.socket && this.socketConnected) {
474
+ try {
475
+ await this.sendSocketRequest({ type: 'shutdown' });
476
+ } catch {
477
+ // Ignore errors during shutdown
478
+ }
479
+ }
480
+
481
+ // Close socket
482
+ this.disconnectSocket();
483
+
484
+ // Kill process if still running
485
+ if (this.relayPtyProcess && !this.relayPtyProcess.killed) {
486
+ this.relayPtyProcess.kill('SIGTERM');
487
+
488
+ // Force kill after timeout
489
+ await Promise.race([
490
+ new Promise<void>((resolve) => {
491
+ this.relayPtyProcess?.on('exit', () => resolve());
492
+ }),
493
+ sleep(5000).then(() => {
494
+ if (this.relayPtyProcess && !this.relayPtyProcess.killed) {
495
+ this.relayPtyProcess.kill('SIGKILL');
496
+ }
497
+ }),
498
+ ]);
499
+ }
500
+
501
+ // Cleanup relay client
502
+ this.destroyClient();
503
+
504
+ // Clean up socket file
505
+ try {
506
+ if (existsSync(this.socketPath)) {
507
+ unlinkSync(this.socketPath);
508
+ this.log(` Cleaned up socket: ${this.socketPath}`);
509
+ }
510
+ } catch (err: any) {
511
+ this.logError(` Failed to clean up socket: ${err.message}`);
512
+ }
513
+
514
+ this.log(` Stopped`);
515
+ }
516
+
517
+ /**
518
+ * Inject content into the agent via socket
519
+ */
520
+ protected async performInjection(_content: string): Promise<void> {
521
+ // This is called by BaseWrapper but we handle injection differently
522
+ // via the socket protocol in processMessageQueue
523
+ throw new Error('Use injectMessage() instead of performInjection()');
524
+ }
525
+
526
+ /**
527
+ * Get cleaned output for parsing
528
+ */
529
+ protected getCleanOutput(): string {
530
+ return stripAnsi(this.rawBuffer);
531
+ }
532
+
533
+ // =========================================================================
534
+ // Process management
535
+ // =========================================================================
536
+
537
+ /**
538
+ * Find the relay-pty binary
539
+ */
540
+ private findRelayPtyBinary(): string | null {
541
+ // Check config path first
542
+ if (this.config.relayPtyPath && existsSync(this.config.relayPtyPath)) {
543
+ return this.config.relayPtyPath;
544
+ }
545
+
546
+ // Get the project root (three levels up from packages/wrapper/dist/)
547
+ // packages/wrapper/dist/ -> packages/wrapper -> packages -> project root
548
+ const projectRoot = join(__dirname, '..', '..', '..');
549
+
550
+ // Check common locations (ordered by priority)
551
+ const candidates = [
552
+ // Primary: installed by postinstall from platform-specific binary
553
+ join(projectRoot, 'bin', 'relay-pty'),
554
+ // Development: local Rust build
555
+ join(projectRoot, 'relay-pty', 'target', 'release', 'relay-pty'),
556
+ join(projectRoot, 'relay-pty', 'target', 'debug', 'relay-pty'),
557
+ // Local build in cwd (for development)
558
+ join(process.cwd(), 'relay-pty', 'target', 'release', 'relay-pty'),
559
+ join(process.cwd(), 'relay-pty', 'target', 'debug', 'relay-pty'),
560
+ // Installed globally
561
+ '/usr/local/bin/relay-pty',
562
+ // In node_modules (when installed as dependency)
563
+ join(process.cwd(), 'node_modules', 'agent-relay', 'bin', 'relay-pty'),
564
+ join(process.cwd(), 'node_modules', '.bin', 'relay-pty'),
565
+ ];
566
+
567
+ for (const candidate of candidates) {
568
+ if (existsSync(candidate)) {
569
+ return candidate;
570
+ }
571
+ }
572
+
573
+ return null;
574
+ }
575
+
576
+ /**
577
+ * Spawn the relay-pty process
578
+ */
579
+ private async spawnRelayPty(binaryPath: string): Promise<void> {
580
+ // Get terminal dimensions for proper rendering
581
+ const rows = process.stdout.rows || 24;
582
+ const cols = process.stdout.columns || 80;
583
+
584
+ const args = [
585
+ '--name', this.config.name,
586
+ '--socket', this.socketPath,
587
+ '--idle-timeout', String(this.config.idleBeforeInjectMs ?? 500),
588
+ '--json-output', // Enable Rust parsing output
589
+ '--rows', String(rows),
590
+ '--cols', String(cols),
591
+ '--log-level', 'warn', // Only show warnings and errors
592
+ '--log-file', this._logPath, // Enable output logging
593
+ '--outbox', this._outboxPath, // Enable file-based relay messages
594
+ '--', this.config.command,
595
+ ...(this.config.args ?? []),
596
+ ];
597
+
598
+ this.log(` Spawning: ${binaryPath} ${args.join(' ')}`);
599
+
600
+ // For interactive mode, let Rust directly inherit stdin/stdout from the terminal
601
+ // This is more robust than manual forwarding through pipes
602
+ // We still pipe stderr to capture JSON parsed commands
603
+ const stdio: ('inherit' | 'pipe')[] = this.isInteractive
604
+ ? ['inherit', 'inherit', 'pipe'] // Rust handles terminal directly
605
+ : ['pipe', 'pipe', 'pipe']; // Headless mode - we handle I/O
606
+
607
+ const proc = spawn(binaryPath, args, {
608
+ cwd: this.config.cwd ?? process.cwd(),
609
+ env: {
610
+ ...process.env,
611
+ ...this.config.env,
612
+ AGENT_RELAY_NAME: this.config.name,
613
+ AGENT_RELAY_OUTBOX: this._canonicalOutboxPath, // Agents use this for outbox path
614
+ TERM: 'xterm-256color',
615
+ },
616
+ stdio,
617
+ });
618
+ this.relayPtyProcess = proc;
619
+
620
+ // Handle stdout (agent output) - only in headless mode
621
+ if (!this.isInteractive && proc.stdout) {
622
+ proc.stdout.on('data', (data: Buffer) => {
623
+ const text = data.toString();
624
+ this.handleOutput(text);
625
+ });
626
+ }
627
+
628
+ // Handle stderr (relay-pty logs and JSON output) - always needed
629
+ if (proc.stderr) {
630
+ proc.stderr.on('data', (data: Buffer) => {
631
+ const text = data.toString();
632
+ this.handleStderr(text);
633
+ });
634
+ }
635
+
636
+ // Handle exit
637
+ proc.on('exit', (code, signal) => {
638
+ const exitCode = code ?? (signal === 'SIGKILL' ? 137 : 1);
639
+ this.log(` Process exited: code=${exitCode} signal=${signal}`);
640
+ this.running = false;
641
+
642
+ // Get crash context before unregistering from memory monitor
643
+ const crashContext = this.memoryMonitor.getCrashContext(this.config.name);
644
+
645
+ // Unregister from memory monitor
646
+ this.memoryMonitor.unregister(this.config.name);
647
+ if (this.memoryAlertHandler) {
648
+ this.memoryMonitor.off('alert', this.memoryAlertHandler);
649
+ this.memoryAlertHandler = null;
650
+ }
651
+
652
+ // Broadcast crash notification if not a graceful stop
653
+ if (!this.isGracefulStop && this.client.state === 'READY') {
654
+ const canBroadcast = typeof (this.client as any).broadcast === 'function';
655
+ const isNormalExit = exitCode === 0;
656
+ const wasKilled = signal === 'SIGKILL' || signal === 'SIGTERM' || exitCode === 137;
657
+
658
+ if (!isNormalExit) {
659
+ const reason = wasKilled
660
+ ? `killed by signal ${signal || 'SIGKILL'}`
661
+ : `exit code ${exitCode}`;
662
+
663
+ // Include crash context analysis if available
664
+ const contextInfo = crashContext.likelyCause !== 'unknown'
665
+ ? ` Likely cause: ${crashContext.likelyCause}. ${crashContext.analysisNotes.slice(0, 2).join('. ')}`
666
+ : '';
667
+
668
+ const message = `AGENT CRASHED: "${this.config.name}" has died unexpectedly (${reason}).${contextInfo}`;
669
+
670
+ this.log(` Broadcasting crash notification: ${message}`);
671
+ if (canBroadcast) {
672
+ this.client.broadcast(message, 'message', {
673
+ isSystemMessage: true,
674
+ agentName: this.config.name,
675
+ exitCode,
676
+ signal: signal || undefined,
677
+ crashType: 'unexpected_exit',
678
+ crashContext: {
679
+ likelyCause: crashContext.likelyCause,
680
+ peakMemory: crashContext.peakMemory,
681
+ averageMemory: crashContext.averageMemory,
682
+ memoryTrend: crashContext.memoryTrend,
683
+ },
684
+ });
685
+ } else {
686
+ this.log(' broadcast skipped: client.broadcast not available');
687
+ }
688
+ }
689
+ }
690
+
691
+ this.emit('exit', exitCode);
692
+ this.config.onExit?.(exitCode);
693
+ });
694
+
695
+ // Handle error
696
+ proc.on('error', (err) => {
697
+ this.logError(` Process error: ${err.message}`);
698
+ this.emit('error', err);
699
+ });
700
+
701
+ // Wait for process to start
702
+ await sleep(500);
703
+
704
+ if (proc.exitCode !== null) {
705
+ throw new Error(`relay-pty exited immediately with code ${proc.exitCode}`);
706
+ }
707
+
708
+ // Register for memory/CPU monitoring
709
+ if (proc.pid) {
710
+ this.memoryMonitor.register(this.config.name, proc.pid);
711
+ this.memoryMonitor.start(); // Idempotent - starts if not already running
712
+
713
+ // Set up alert handler to send resource alerts to dashboard only (not other agents)
714
+ this.memoryAlertHandler = (alert: MemoryAlert) => {
715
+ if (alert.agentName !== this.config.name) return;
716
+ if (this.client.state !== 'READY') return;
717
+
718
+ const message = alert.type === 'recovered'
719
+ ? `AGENT RECOVERED: "${this.config.name}" memory usage returned to normal.`
720
+ : `AGENT RESOURCE ALERT: "${this.config.name}" - ${alert.message} (${formatBytes(alert.currentRss)})`;
721
+
722
+ this.log(` Sending resource alert to users: ${message}`);
723
+ // Send to all human users - agents don't need to know about each other's resource usage
724
+ this.client.sendMessage('@users', message, 'message', {
725
+ isSystemMessage: true,
726
+ agentName: this.config.name,
727
+ alertType: alert.type,
728
+ currentMemory: alert.currentRss,
729
+ threshold: alert.threshold,
730
+ recommendation: alert.recommendation,
731
+ });
732
+ };
733
+ this.memoryMonitor.on('alert', this.memoryAlertHandler);
734
+ }
735
+ }
736
+
737
+ /**
738
+ * Handle output from relay-pty stdout (headless mode only)
739
+ * In interactive mode, stdout goes directly to terminal via inherited stdio
740
+ */
741
+ private handleOutput(data: string): void {
742
+ // Skip processing if agent is no longer running (prevents ghost messages after release)
743
+ if (!this.running) {
744
+ return;
745
+ }
746
+
747
+ this.rawBuffer += data;
748
+ this.outputBuffer += data;
749
+ this.hasReceivedOutput = true;
750
+
751
+ // Feed to idle detector
752
+ this.feedIdleDetectorOutput(data);
753
+
754
+ // Check for unread messages and append indicator if needed
755
+ const indicator = this.formatUnreadIndicator();
756
+ const outputWithIndicator = indicator ? data + indicator : data;
757
+
758
+ // Emit output event (with indicator if present)
759
+ this.emit('output', outputWithIndicator);
760
+
761
+ // Stream to daemon if configured
762
+ if (this.config.streamLogs !== false && this.client.state === 'READY') {
763
+ this.client.sendLog(outputWithIndicator);
764
+ }
765
+
766
+ // Parse for relay commands
767
+ this.parseRelayCommands();
768
+
769
+ // Check for summary and session end
770
+ const cleanContent = stripAnsi(this.rawBuffer);
771
+ this.checkForSummary(cleanContent);
772
+ this.checkForSessionEnd(cleanContent);
773
+ }
774
+
775
+ /**
776
+ * Format an unread message indicator if there are pending messages.
777
+ * Returns empty string if no pending messages or within cooldown period.
778
+ *
779
+ * Example output:
780
+ * ───────────────────────────
781
+ * 📬 2 unread messages (from: Alice, Bob)
782
+ */
783
+ private formatUnreadIndicator(): string {
784
+ const queueLength = this.messageQueue.length;
785
+ if (queueLength === 0) {
786
+ return '';
787
+ }
788
+
789
+ // Check cooldown to avoid spamming
790
+ const now = Date.now();
791
+ if (now - this.lastUnreadIndicatorTime < this.UNREAD_INDICATOR_COOLDOWN_MS) {
792
+ return '';
793
+ }
794
+ this.lastUnreadIndicatorTime = now;
795
+
796
+ // Collect unique sender names
797
+ const senders = [...new Set(this.messageQueue.map(m => m.from))];
798
+ const senderList = senders.slice(0, 3).join(', ');
799
+ const moreCount = senders.length > 3 ? ` +${senders.length - 3} more` : '';
800
+
801
+ const line = '─'.repeat(27);
802
+ const messageWord = queueLength === 1 ? 'message' : 'messages';
803
+
804
+ return `\n${line}\n📬 ${queueLength} unread ${messageWord} (from: ${senderList}${moreCount})\n`;
805
+ }
806
+
807
+ /**
808
+ * Handle stderr from relay-pty (logs and JSON parsed commands)
809
+ */
810
+ private handleStderr(data: string): void {
811
+ // Skip processing if agent is no longer running (prevents ghost messages after release)
812
+ if (!this.running) {
813
+ return;
814
+ }
815
+
816
+ // relay-pty outputs JSON parsed commands to stderr with --json-output
817
+ const lines = data.split('\n').filter(l => l.trim());
818
+ for (const line of lines) {
819
+ if (line.startsWith('{')) {
820
+ // JSON output - parsed relay command from Rust
821
+ try {
822
+ const parsed = JSON.parse(line);
823
+ if (parsed.type === 'relay_command' && parsed.kind) {
824
+ // Log parsed commands (only in debug mode to avoid TUI pollution)
825
+ if (parsed.kind === 'spawn' || parsed.kind === 'release') {
826
+ this.log(`Rust parsed [${parsed.kind}]: ${JSON.stringify({
827
+ spawn_name: parsed.spawn_name,
828
+ spawn_cli: parsed.spawn_cli,
829
+ spawn_task: parsed.spawn_task?.substring(0, 50),
830
+ release_name: parsed.release_name,
831
+ })}`);
832
+ } else {
833
+ this.log(`Rust parsed [${parsed.kind}]: ${parsed.from} -> ${parsed.to}`);
834
+ }
835
+ this.handleRustParsedCommand(parsed);
836
+ }
837
+ } catch (e) {
838
+ // Not JSON, just log (only in debug mode)
839
+ if (this.config.debug) {
840
+ console.error(`[relay-pty:${this.config.name}] ${line}`);
841
+ }
842
+ }
843
+ } else {
844
+ // Non-JSON stderr - only show in debug mode (logs, info messages)
845
+ if (this.config.debug) {
846
+ console.error(`[relay-pty:${this.config.name}] ${line}`);
847
+ }
848
+ }
849
+ }
850
+ }
851
+
852
+ /**
853
+ * Handle a parsed command from Rust relay-pty
854
+ * Rust outputs structured JSON with 'kind' field: "message", "spawn", "release"
855
+ */
856
+ private handleRustParsedCommand(parsed: {
857
+ type: string;
858
+ kind: string;
859
+ from: string;
860
+ to: string;
861
+ body: string;
862
+ raw: string;
863
+ thread?: string;
864
+ spawn_name?: string;
865
+ spawn_cli?: string;
866
+ spawn_task?: string;
867
+ release_name?: string;
868
+ }): void {
869
+ switch (parsed.kind) {
870
+ case 'spawn':
871
+ if (parsed.spawn_name && parsed.spawn_cli) {
872
+ this.log(` Spawn detected: ${parsed.spawn_name} (${parsed.spawn_cli})`);
873
+ this.handleSpawnCommand(parsed.spawn_name, parsed.spawn_cli, parsed.spawn_task || '');
874
+ }
875
+ break;
876
+
877
+ case 'release':
878
+ if (parsed.release_name) {
879
+ this.log(`Release: ${parsed.release_name}`);
880
+ this.handleReleaseCommand(parsed.release_name);
881
+ } else {
882
+ this.logError(`Missing release_name in parsed command: ${JSON.stringify(parsed)}`);
883
+ }
884
+ break;
885
+
886
+ case 'message':
887
+ default:
888
+ this.sendRelayCommand({
889
+ to: parsed.to,
890
+ kind: 'message',
891
+ body: parsed.body,
892
+ thread: parsed.thread,
893
+ raw: parsed.raw,
894
+ });
895
+ break;
896
+ }
897
+ }
898
+
899
+ /**
900
+ * Handle spawn command (from Rust stderr JSON parsing)
901
+ *
902
+ * Note: We do NOT send the initial task message here because the spawner
903
+ * now handles it after waitUntilCliReady(). Sending it here would cause
904
+ * duplicate task delivery.
905
+ */
906
+ private handleSpawnCommand(name: string, cli: string, task: string): void {
907
+ const key = `spawn:${name}:${cli}`;
908
+ if (this.processedSpawnCommands.has(key)) {
909
+ this.log(`Spawn already processed: ${key}`);
910
+ return;
911
+ }
912
+ this.processedSpawnCommands.add(key);
913
+
914
+ // Log spawn attempts (only in debug mode to avoid TUI pollution)
915
+ this.log(`SPAWN REQUEST: ${name} (${cli})`);
916
+ this.log(` dashboardPort=${this.config.dashboardPort}, onSpawn=${!!this.config.onSpawn}`);
917
+
918
+ // Try dashboard API first, fall back to callback
919
+ // The spawner will send the task after waitUntilCliReady()
920
+ if (this.config.dashboardPort) {
921
+ this.log(`Calling dashboard API at port ${this.config.dashboardPort}`);
922
+ this.spawnViaDashboardApi(name, cli, task)
923
+ .then(() => {
924
+ this.log(`SPAWN SUCCESS: ${name} via dashboard API`);
925
+ })
926
+ .catch(err => {
927
+ this.logError(`SPAWN FAILED: ${name} - ${err.message}`);
928
+ if (this.config.onSpawn) {
929
+ this.log(`Falling back to onSpawn callback`);
930
+ Promise.resolve(this.config.onSpawn(name, cli, task))
931
+ .catch(e => this.logError(`SPAWN CALLBACK FAILED: ${e.message}`));
932
+ }
933
+ });
934
+ } else if (this.config.onSpawn) {
935
+ this.log(`Using onSpawn callback directly`);
936
+ Promise.resolve(this.config.onSpawn(name, cli, task))
937
+ .catch(e => this.logError(`SPAWN CALLBACK FAILED: ${e.message}`));
938
+ } else {
939
+ this.logError(`SPAWN FAILED: No spawn mechanism available! (dashboardPort=${this.config.dashboardPort}, onSpawn=${!!this.config.onSpawn})`);
940
+ }
941
+ }
942
+
943
+ /**
944
+ * Handle release command
945
+ */
946
+ private handleReleaseCommand(name: string): void {
947
+ const key = `release:${name}`;
948
+ if (this.processedReleaseCommands.has(key)) {
949
+ return;
950
+ }
951
+ this.processedReleaseCommands.add(key);
952
+
953
+ this.log(` Release: ${name}`);
954
+
955
+ // Try dashboard API first, fall back to callback
956
+ if (this.config.dashboardPort) {
957
+ this.releaseViaDashboardApi(name).catch(err => {
958
+ this.logError(` Dashboard release failed: ${err.message}`);
959
+ this.config.onRelease?.(name);
960
+ });
961
+ } else if (this.config.onRelease) {
962
+ this.config.onRelease(name);
963
+ }
964
+ }
965
+
966
+ /**
967
+ * Spawn agent via dashboard API
968
+ */
969
+ private async spawnViaDashboardApi(name: string, cli: string, task: string): Promise<void> {
970
+ const url = `http://localhost:${this.config.dashboardPort}/api/spawn`;
971
+ const body = {
972
+ name,
973
+ cli,
974
+ task,
975
+ spawnerName: this.config.name, // Include spawner name so task appears from correct agent
976
+ };
977
+
978
+ try {
979
+ const response = await fetch(url, {
980
+ method: 'POST',
981
+ headers: { 'Content-Type': 'application/json' },
982
+ body: JSON.stringify(body),
983
+ });
984
+
985
+ if (!response.ok) {
986
+ const errorBody = await response.text().catch(() => 'unknown');
987
+ throw new Error(`HTTP ${response.status}: ${errorBody}`);
988
+ }
989
+
990
+ const result = await response.json().catch(() => ({})) as { success?: boolean; error?: string };
991
+ if (result.success === false) {
992
+ throw new Error(result.error || 'Spawn failed without specific error');
993
+ }
994
+ } catch (err: any) {
995
+ // Enhance error with context
996
+ if (err.code === 'ECONNREFUSED') {
997
+ throw new Error(`Dashboard not reachable at ${url} (connection refused)`);
998
+ }
999
+ throw err;
1000
+ }
1001
+ }
1002
+
1003
+ /**
1004
+ * Release agent via dashboard API
1005
+ */
1006
+ private async releaseViaDashboardApi(name: string): Promise<void> {
1007
+ const response = await fetch(`http://localhost:${this.config.dashboardPort}/api/spawned/${encodeURIComponent(name)}`, {
1008
+ method: 'DELETE',
1009
+ });
1010
+ if (!response.ok) {
1011
+ const body = await response.json().catch(() => ({ error: 'Unknown' })) as { error?: string };
1012
+ throw new Error(`HTTP ${response.status}: ${body.error || 'Unknown error'}`);
1013
+ }
1014
+ this.log(`Released ${name} via dashboard API`);
1015
+ }
1016
+
1017
+ // =========================================================================
1018
+ // Socket communication
1019
+ // =========================================================================
1020
+
1021
+ /**
1022
+ * Connect to the relay-pty socket
1023
+ */
1024
+ private async connectToSocket(): Promise<void> {
1025
+ const timeout = this.config.socketConnectTimeoutMs ?? 5000;
1026
+ const maxAttempts = this.config.socketReconnectAttempts ?? 3;
1027
+
1028
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
1029
+ try {
1030
+ await this.attemptSocketConnection(timeout);
1031
+ this.log(` Socket connected`);
1032
+ return;
1033
+ } catch (err: any) {
1034
+ this.logError(` Socket connect attempt ${attempt}/${maxAttempts} failed: ${err.message}`);
1035
+ if (attempt < maxAttempts) {
1036
+ await sleep(1000 * attempt); // Exponential backoff
1037
+ }
1038
+ }
1039
+ }
1040
+
1041
+ throw new Error(`Failed to connect to socket after ${maxAttempts} attempts`);
1042
+ }
1043
+
1044
+ /**
1045
+ * Attempt a single socket connection
1046
+ */
1047
+ private attemptSocketConnection(timeout: number): Promise<void> {
1048
+ return new Promise((resolve, reject) => {
1049
+ const timer = setTimeout(() => {
1050
+ reject(new Error('Socket connection timeout'));
1051
+ }, timeout);
1052
+
1053
+ this.socket = createConnection(this.socketPath, () => {
1054
+ clearTimeout(timer);
1055
+ this.socketConnected = true;
1056
+ resolve();
1057
+ });
1058
+
1059
+ this.socket.on('error', (err) => {
1060
+ clearTimeout(timer);
1061
+ this.socketConnected = false;
1062
+ reject(err);
1063
+ });
1064
+
1065
+ this.socket.on('close', () => {
1066
+ this.socketConnected = false;
1067
+ this.log(` Socket closed`);
1068
+ });
1069
+
1070
+ // Handle incoming data (responses)
1071
+ let buffer = '';
1072
+ this.socket.on('data', (data: Buffer) => {
1073
+ buffer += data.toString();
1074
+
1075
+ // Process complete lines
1076
+ const lines = buffer.split('\n');
1077
+ buffer = lines.pop() ?? ''; // Keep incomplete line in buffer
1078
+
1079
+ for (const line of lines) {
1080
+ if (line.trim()) {
1081
+ this.handleSocketResponse(line);
1082
+ }
1083
+ }
1084
+ });
1085
+ });
1086
+ }
1087
+
1088
+ /**
1089
+ * Disconnect from socket
1090
+ */
1091
+ private disconnectSocket(): void {
1092
+ if (this.socket) {
1093
+ this.socket.destroy();
1094
+ this.socket = undefined;
1095
+ this.socketConnected = false;
1096
+ }
1097
+
1098
+ // Reject all pending injections
1099
+ for (const [_id, pending] of this.pendingInjections) {
1100
+ clearTimeout(pending.timeout);
1101
+ pending.reject(new Error('Socket disconnected'));
1102
+ }
1103
+ this.pendingInjections.clear();
1104
+ }
1105
+
1106
+ /**
1107
+ * Send a request to the socket and optionally wait for response
1108
+ */
1109
+ private sendSocketRequest(request: RelayPtyRequest): Promise<void> {
1110
+ return new Promise((resolve, reject) => {
1111
+ if (!this.socket || !this.socketConnected) {
1112
+ reject(new Error('Socket not connected'));
1113
+ return;
1114
+ }
1115
+
1116
+ const json = JSON.stringify(request) + '\n';
1117
+ this.socket.write(json, (err) => {
1118
+ if (err) {
1119
+ reject(err);
1120
+ } else {
1121
+ resolve();
1122
+ }
1123
+ });
1124
+ });
1125
+ }
1126
+
1127
+ /**
1128
+ * Handle a response from the socket
1129
+ */
1130
+ private handleSocketResponse(line: string): void {
1131
+ try {
1132
+ const response = JSON.parse(line) as RelayPtyResponse;
1133
+
1134
+ switch (response.type) {
1135
+ case 'inject_result':
1136
+ // handleInjectResult is async (does verification), but we don't await here
1137
+ // Errors are handled internally by the method
1138
+ this.handleInjectResult(response).catch((err: Error) => {
1139
+ this.logError(` Error handling inject result: ${err.message}`);
1140
+ });
1141
+ break;
1142
+
1143
+ case 'status':
1144
+ // Status responses are typically requested explicitly
1145
+ this.log(` Status: idle=${response.agent_idle} queue=${response.queue_length}`);
1146
+ break;
1147
+
1148
+ case 'backpressure':
1149
+ this.handleBackpressure(response);
1150
+ break;
1151
+
1152
+ case 'error':
1153
+ this.logError(` Socket error: ${response.message}`);
1154
+ break;
1155
+
1156
+ case 'shutdown_ack':
1157
+ this.log(` Shutdown acknowledged`);
1158
+ break;
1159
+ }
1160
+ } catch (err: any) {
1161
+ this.logError(` Failed to parse socket response: ${err.message}`);
1162
+ }
1163
+ }
1164
+
1165
+ /**
1166
+ * Handle injection result response
1167
+ * After Rust reports 'delivered', verifies the message appeared in output.
1168
+ * If verification fails, retries up to MAX_RETRIES times.
1169
+ */
1170
+ private async handleInjectResult(response: InjectResultResponse): Promise<void> {
1171
+ this.log(` handleInjectResult: id=${response.id.substring(0, 8)} status=${response.status}`);
1172
+
1173
+ const pending = this.pendingInjections.get(response.id);
1174
+ if (!pending) {
1175
+ // Response for unknown message - might be from a previous session
1176
+ this.log(` No pending injection found for ${response.id.substring(0, 8)}`);
1177
+ return;
1178
+ }
1179
+
1180
+ if (response.status === 'delivered') {
1181
+ // Rust says it sent the message + Enter key
1182
+ // Now verify the message actually appeared in the terminal output
1183
+ this.log(` Message ${pending.shortId} marked delivered by Rust, verifying in output...`);
1184
+
1185
+ // In interactive mode, we can't verify because stdout goes directly to terminal
1186
+ // Trust Rust's "delivered" status in this case
1187
+ if (this.isInteractive) {
1188
+ this.log(` Interactive mode - trusting Rust delivery status`);
1189
+ clearTimeout(pending.timeout);
1190
+ this.pendingInjections.delete(response.id);
1191
+ if (pending.retryCount === 0) {
1192
+ this.injectionMetrics.successFirstTry++;
1193
+ } else {
1194
+ this.injectionMetrics.successWithRetry++;
1195
+ }
1196
+ this.injectionMetrics.total++;
1197
+ pending.resolve(true);
1198
+ this.log(` Message ${pending.shortId} delivered (interactive mode) ✓`);
1199
+ return;
1200
+ }
1201
+
1202
+ // Give a brief moment for output to be captured
1203
+ await sleep(100);
1204
+
1205
+ // Verify the message pattern appears in captured output
1206
+ const verified = await verifyInjection(
1207
+ pending.shortId,
1208
+ pending.from,
1209
+ async () => this.getCleanOutput()
1210
+ );
1211
+
1212
+ if (verified) {
1213
+ clearTimeout(pending.timeout);
1214
+ this.pendingInjections.delete(response.id);
1215
+ // Update metrics based on retry count (0 = first try)
1216
+ if (pending.retryCount === 0) {
1217
+ this.injectionMetrics.successFirstTry++;
1218
+ } else {
1219
+ this.injectionMetrics.successWithRetry++;
1220
+ this.log(` Message ${pending.shortId} succeeded on attempt ${pending.retryCount + 1}`);
1221
+ }
1222
+ this.injectionMetrics.total++;
1223
+ pending.resolve(true);
1224
+ this.log(` Message ${pending.shortId} verified in output ✓`);
1225
+ } else {
1226
+ // Message was "delivered" but not found in output
1227
+ // This is the bug case - Enter key may not have been processed
1228
+ this.log(` Message ${pending.shortId} NOT found in output after delivery`);
1229
+
1230
+ // Check if we should retry
1231
+ if (pending.retryCount < INJECTION_CONSTANTS.MAX_RETRIES - 1) {
1232
+ this.log(` Retrying injection (attempt ${pending.retryCount + 2}/${INJECTION_CONSTANTS.MAX_RETRIES})`);
1233
+ clearTimeout(pending.timeout);
1234
+ this.pendingInjections.delete(response.id);
1235
+
1236
+ // Wait before retry with backoff
1237
+ await sleep(INJECTION_CONSTANTS.RETRY_BACKOFF_MS * (pending.retryCount + 1));
1238
+
1239
+ // IMPORTANT: Check again if message appeared (late verification / race condition fix)
1240
+ // The previous injection may have succeeded but verification timed out
1241
+ const lateVerified = await verifyInjection(
1242
+ pending.shortId,
1243
+ pending.from,
1244
+ async () => this.getCleanOutput()
1245
+ );
1246
+ if (lateVerified) {
1247
+ this.log(` Message ${pending.shortId} found on late verification, skipping retry`);
1248
+ if (pending.retryCount === 0) {
1249
+ this.injectionMetrics.successFirstTry++;
1250
+ } else {
1251
+ this.injectionMetrics.successWithRetry++;
1252
+ }
1253
+ this.injectionMetrics.total++;
1254
+ pending.resolve(true);
1255
+ return;
1256
+ }
1257
+
1258
+ // Re-inject by sending another socket request
1259
+ // The original promise will be resolved when this retry completes
1260
+ // Prepend [RETRY] to help agent notice this is a retry
1261
+ const retryBody = pending.originalBody.startsWith('[RETRY]')
1262
+ ? pending.originalBody
1263
+ : `[RETRY] ${pending.originalBody}`;
1264
+ const retryRequest: InjectRequest = {
1265
+ type: 'inject',
1266
+ id: response.id,
1267
+ from: pending.from,
1268
+ body: retryBody,
1269
+ priority: 1, // Higher priority for retries
1270
+ };
1271
+
1272
+ // Create new pending entry with incremented retry count
1273
+ const newTimeout = setTimeout(() => {
1274
+ this.logError(` Retry timeout for ${pending.shortId}`);
1275
+ this.pendingInjections.delete(response.id);
1276
+ pending.resolve(false);
1277
+ }, 30000);
1278
+
1279
+ this.pendingInjections.set(response.id, {
1280
+ ...pending,
1281
+ timeout: newTimeout,
1282
+ retryCount: pending.retryCount + 1,
1283
+ originalBody: retryBody, // Use retry body for subsequent retries
1284
+ });
1285
+
1286
+ this.sendSocketRequest(retryRequest).catch((err) => {
1287
+ this.logError(` Retry request failed: ${err.message}`);
1288
+ clearTimeout(newTimeout);
1289
+ this.pendingInjections.delete(response.id);
1290
+ pending.resolve(false);
1291
+ });
1292
+ } else {
1293
+ // Max retries exceeded
1294
+ this.logError(` Message ${pending.shortId} failed after ${INJECTION_CONSTANTS.MAX_RETRIES} attempts - NOT found in output`);
1295
+ clearTimeout(pending.timeout);
1296
+ this.pendingInjections.delete(response.id);
1297
+ this.injectionMetrics.failed++;
1298
+ this.injectionMetrics.total++;
1299
+ pending.resolve(false);
1300
+ this.emit('injection-failed', {
1301
+ messageId: response.id,
1302
+ from: pending.from,
1303
+ error: 'Message delivered but not verified in output after max retries',
1304
+ });
1305
+ }
1306
+ }
1307
+ } else if (response.status === 'failed') {
1308
+ clearTimeout(pending.timeout);
1309
+ this.pendingInjections.delete(response.id);
1310
+ this.injectionMetrics.failed++;
1311
+ this.injectionMetrics.total++;
1312
+ pending.resolve(false);
1313
+ this.logError(` Message ${pending.shortId} failed: ${response.error}`);
1314
+ this.emit('injection-failed', {
1315
+ messageId: response.id,
1316
+ from: pending.from,
1317
+ error: response.error ?? 'Unknown error',
1318
+ });
1319
+ }
1320
+ // queued/injecting are intermediate states - wait for final status
1321
+ }
1322
+
1323
+ /**
1324
+ * Handle backpressure notification
1325
+ */
1326
+ private handleBackpressure(response: BackpressureResponse): void {
1327
+ const wasActive = this.backpressureActive;
1328
+ this.backpressureActive = !response.accept;
1329
+
1330
+ if (this.backpressureActive !== wasActive) {
1331
+ this.log(` Backpressure: ${this.backpressureActive ? 'ACTIVE' : 'cleared'} (queue=${response.queue_length})`);
1332
+ this.emit('backpressure', { queueLength: response.queue_length, accept: response.accept });
1333
+
1334
+ // Resume processing if backpressure cleared
1335
+ if (!this.backpressureActive) {
1336
+ this.processMessageQueue();
1337
+ }
1338
+ }
1339
+ }
1340
+
1341
+ // =========================================================================
1342
+ // Message handling
1343
+ // =========================================================================
1344
+
1345
+ /**
1346
+ * Inject a message into the agent via socket
1347
+ */
1348
+ private async injectMessage(msg: QueuedMessage, retryCount = 0): Promise<boolean> {
1349
+ const shortId = msg.messageId.substring(0, 8);
1350
+ this.log(` === INJECT START: ${shortId} from ${msg.from} (attempt ${retryCount + 1}) ===`);
1351
+
1352
+ if (!this.socket || !this.socketConnected) {
1353
+ this.logError(` Cannot inject - socket not connected`);
1354
+ return false;
1355
+ }
1356
+
1357
+ // Build injection content
1358
+ const content = buildInjectionString(msg);
1359
+ this.log(` Injection content (${content.length} bytes): ${content.substring(0, 100)}...`);
1360
+
1361
+ // Create request
1362
+ const request: InjectRequest = {
1363
+ type: 'inject',
1364
+ id: msg.messageId,
1365
+ from: msg.from,
1366
+ body: content,
1367
+ priority: msg.importance ?? 0,
1368
+ };
1369
+
1370
+ this.log(` Sending inject request to socket...`);
1371
+
1372
+ // Create promise for result
1373
+ return new Promise<boolean>((resolve, reject) => {
1374
+ const timeout = setTimeout(() => {
1375
+ this.logError(` Inject timeout for ${shortId} after 30s`);
1376
+ this.pendingInjections.delete(msg.messageId);
1377
+ resolve(false); // Timeout = failure
1378
+ }, 30000); // 30 second timeout for injection
1379
+
1380
+ this.pendingInjections.set(msg.messageId, {
1381
+ resolve,
1382
+ reject,
1383
+ timeout,
1384
+ from: msg.from,
1385
+ shortId,
1386
+ retryCount,
1387
+ originalBody: content,
1388
+ });
1389
+
1390
+ // Send request
1391
+ this.sendSocketRequest(request)
1392
+ .then(() => {
1393
+ this.log(` Socket request sent for ${shortId}`);
1394
+ })
1395
+ .catch((err) => {
1396
+ this.logError(` Socket request failed for ${shortId}: ${err.message}`);
1397
+ clearTimeout(timeout);
1398
+ this.pendingInjections.delete(msg.messageId);
1399
+ resolve(false);
1400
+ });
1401
+ });
1402
+ }
1403
+
1404
+ /** Maximum retries for failed injections before giving up */
1405
+ private static readonly MAX_INJECTION_RETRIES = 5;
1406
+ /** Backoff delay multiplier (ms) for retries: delay = BASE * 2^retryCount */
1407
+ private static readonly INJECTION_RETRY_BASE_MS = 2000;
1408
+
1409
+ /**
1410
+ * Process queued messages
1411
+ */
1412
+ private async processMessageQueue(): Promise<void> {
1413
+ if (!this.readyForMessages || this.backpressureActive || this.isInjecting) {
1414
+ return;
1415
+ }
1416
+
1417
+ if (this.messageQueue.length === 0) {
1418
+ return;
1419
+ }
1420
+
1421
+ // Check if agent is in editor mode - delay injection if so
1422
+ const idleResult = this.idleDetector.checkIdle();
1423
+ if (idleResult.inEditorMode) {
1424
+ this.log(` Agent in editor mode, delaying injection (queue: ${this.messageQueue.length})`);
1425
+ // Check again in 2 seconds
1426
+ setTimeout(() => this.processMessageQueue(), 2000);
1427
+ return;
1428
+ }
1429
+
1430
+ this.isInjecting = true;
1431
+
1432
+ const msg = this.messageQueue.shift()!;
1433
+ const retryCount = (msg as any)._retryCount ?? 0;
1434
+ const bodyPreview = msg.body.substring(0, 50).replace(/\n/g, '\\n');
1435
+ this.log(` Processing message from ${msg.from}: "${bodyPreview}..." (remaining=${this.messageQueue.length}, retry=${retryCount})`);
1436
+
1437
+ try {
1438
+ const success = await this.injectMessage(msg);
1439
+
1440
+ // Metrics are now tracked in handleInjectResult which knows about retries
1441
+ if (!success) {
1442
+ // Record failure for adaptive throttling
1443
+ this.throttle.recordFailure();
1444
+
1445
+ // Re-queue with backoff if under retry limit
1446
+ if (retryCount < RelayPtyOrchestrator.MAX_INJECTION_RETRIES) {
1447
+ const backoffMs = RelayPtyOrchestrator.INJECTION_RETRY_BASE_MS * Math.pow(2, retryCount);
1448
+ this.log(` Re-queuing message ${msg.messageId.substring(0, 8)} for retry ${retryCount + 1} in ${backoffMs}ms`);
1449
+ (msg as any)._retryCount = retryCount + 1;
1450
+ // Add to front of queue for priority
1451
+ this.messageQueue.unshift(msg);
1452
+ // Wait before retrying
1453
+ this.isInjecting = false;
1454
+ setTimeout(() => this.processMessageQueue(), backoffMs);
1455
+ return;
1456
+ }
1457
+
1458
+ this.logError(` Injection failed for message ${msg.messageId.substring(0, 8)} after ${retryCount} retries`);
1459
+ this.config.onInjectionFailed?.(msg.messageId, 'Injection failed after max retries');
1460
+ this.sendSyncAck(msg.messageId, msg.sync, 'ERROR', { error: 'injection_failed_max_retries' });
1461
+ } else {
1462
+ // Record success for adaptive throttling
1463
+ this.throttle.recordSuccess();
1464
+ this.sendSyncAck(msg.messageId, msg.sync, 'OK');
1465
+ }
1466
+ } catch (err: any) {
1467
+ this.logError(` Injection error: ${err.message}`);
1468
+ // Track metrics for exceptions (not handled by handleInjectResult)
1469
+ this.injectionMetrics.failed++;
1470
+ this.injectionMetrics.total++;
1471
+ // Record failure for adaptive throttling
1472
+ this.throttle.recordFailure();
1473
+ this.sendSyncAck(msg.messageId, msg.sync, 'ERROR', { error: err.message });
1474
+ } finally {
1475
+ this.isInjecting = false;
1476
+
1477
+ // Process next message after adaptive delay (faster when healthy, slower under stress)
1478
+ if (this.messageQueue.length > 0 && !this.backpressureActive) {
1479
+ const delay = this.throttle.getDelay();
1480
+ setTimeout(() => this.processMessageQueue(), delay);
1481
+ }
1482
+ }
1483
+ }
1484
+
1485
+ /**
1486
+ * Override handleIncomingMessage to trigger queue processing
1487
+ */
1488
+ protected override handleIncomingMessage(
1489
+ from: string,
1490
+ payload: SendPayload,
1491
+ messageId: string,
1492
+ meta?: SendMeta,
1493
+ originalTo?: string
1494
+ ): void {
1495
+ this.log(` === MESSAGE RECEIVED: ${messageId.substring(0, 8)} from ${from} ===`);
1496
+ this.log(` Body preview: ${payload.body?.substring(0, 100) ?? '(no body)'}...`);
1497
+ super.handleIncomingMessage(from, payload, messageId, meta, originalTo);
1498
+ this.log(` Queue length after add: ${this.messageQueue.length}`);
1499
+ this.processMessageQueue();
1500
+ }
1501
+
1502
+ // =========================================================================
1503
+ // Queue monitor - Detect and process stuck messages
1504
+ // =========================================================================
1505
+
1506
+ /**
1507
+ * Start the queue monitor to periodically check for stuck messages.
1508
+ * This ensures messages don't get orphaned in the queue when the agent is idle.
1509
+ */
1510
+ private startQueueMonitor(): void {
1511
+ if (this.queueMonitorTimer) {
1512
+ return; // Already started
1513
+ }
1514
+
1515
+ this.log(` Starting queue monitor (interval: ${this.QUEUE_MONITOR_INTERVAL_MS}ms)`);
1516
+
1517
+ this.queueMonitorTimer = setInterval(() => {
1518
+ this.checkForStuckQueue();
1519
+ }, this.QUEUE_MONITOR_INTERVAL_MS);
1520
+
1521
+ // Don't keep process alive just for queue monitoring
1522
+ this.queueMonitorTimer.unref?.();
1523
+ }
1524
+
1525
+ /**
1526
+ * Stop the queue monitor.
1527
+ */
1528
+ private stopQueueMonitor(): void {
1529
+ if (this.queueMonitorTimer) {
1530
+ clearInterval(this.queueMonitorTimer);
1531
+ this.queueMonitorTimer = undefined;
1532
+ this.log(` Queue monitor stopped`);
1533
+ }
1534
+ }
1535
+
1536
+ // =========================================================================
1537
+ // Protocol monitoring (detect agent mistakes like empty AGENT_RELAY_NAME)
1538
+ // =========================================================================
1539
+
1540
+ /**
1541
+ * Start watching for protocol issues in the outbox directory.
1542
+ * Detects common mistakes like:
1543
+ * - Empty AGENT_RELAY_NAME causing files at outbox//
1544
+ * - Files created directly in outbox/ instead of agent subdirectory
1545
+ */
1546
+ private startProtocolMonitor(): void {
1547
+ // Get the outbox parent directory (one level up from agent's outbox)
1548
+ const parentDir = dirname(this._canonicalOutboxPath);
1549
+
1550
+ // Ensure parent directory exists
1551
+ try {
1552
+ if (!existsSync(parentDir)) {
1553
+ mkdirSync(parentDir, { recursive: true });
1554
+ }
1555
+ } catch {
1556
+ // Ignore - directory may already exist
1557
+ }
1558
+
1559
+ try {
1560
+ this.protocolWatcher = watch(parentDir, (eventType, filename) => {
1561
+ if (eventType === 'rename' && filename) {
1562
+ // Check for files directly in parent (not in agent subdirectory)
1563
+ // This happens when $AGENT_RELAY_NAME is empty
1564
+ const fullPath = join(parentDir, filename);
1565
+ try {
1566
+ // If it's a file (not directory) directly in the parent, that's an issue
1567
+ if (existsSync(fullPath) && !lstatSync(fullPath).isDirectory()) {
1568
+ this.handleProtocolIssue('file_in_root', filename);
1569
+ }
1570
+ // Check for empty-named directory (double slash symptom)
1571
+ if (filename === '' || filename.startsWith('/')) {
1572
+ this.handleProtocolIssue('empty_agent_name', filename);
1573
+ }
1574
+ } catch {
1575
+ // Ignore stat errors
1576
+ }
1577
+ }
1578
+ });
1579
+
1580
+ // Don't keep process alive just for protocol monitoring
1581
+ this.protocolWatcher.unref?.();
1582
+ this.log(` Protocol monitor started on ${parentDir}`);
1583
+ } catch (err: any) {
1584
+ // Don't fail start() if protocol monitoring fails
1585
+ this.logError(` Failed to start protocol monitor: ${err.message}`);
1586
+ }
1587
+
1588
+ // Also do an initial scan for existing issues
1589
+ this.scanForProtocolIssues();
1590
+ }
1591
+
1592
+ /**
1593
+ * Stop the protocol monitor.
1594
+ */
1595
+ private stopProtocolMonitor(): void {
1596
+ if (this.protocolWatcher) {
1597
+ this.protocolWatcher.close();
1598
+ this.protocolWatcher = undefined;
1599
+ this.log(` Protocol monitor stopped`);
1600
+ }
1601
+ }
1602
+
1603
+ /**
1604
+ * Scan for existing protocol issues (called once at startup).
1605
+ */
1606
+ private scanForProtocolIssues(): void {
1607
+ const parentDir = dirname(this._canonicalOutboxPath);
1608
+ try {
1609
+ if (!existsSync(parentDir)) return;
1610
+
1611
+ const entries = readdirSync(parentDir);
1612
+ for (const entry of entries) {
1613
+ const fullPath = join(parentDir, entry);
1614
+ try {
1615
+ // Check for files directly in parent (should only be directories)
1616
+ if (!lstatSync(fullPath).isDirectory()) {
1617
+ this.handleProtocolIssue('file_in_root', entry);
1618
+ break; // Only report once
1619
+ }
1620
+ } catch {
1621
+ // Ignore stat errors
1622
+ }
1623
+ }
1624
+ } catch {
1625
+ // Ignore scan errors
1626
+ }
1627
+ }
1628
+
1629
+ /**
1630
+ * Handle a detected protocol issue by injecting a helpful reminder.
1631
+ */
1632
+ private handleProtocolIssue(issue: 'empty_agent_name' | 'file_in_root', filename: string): void {
1633
+ const now = Date.now();
1634
+
1635
+ // Respect cooldown to avoid spamming
1636
+ if (now - this.protocolReminderCooldown < this.PROTOCOL_REMINDER_COOLDOWN_MS) {
1637
+ return;
1638
+ }
1639
+ this.protocolReminderCooldown = now;
1640
+
1641
+ this.log(` Protocol issue detected: ${issue} (${filename})`);
1642
+
1643
+ const reminders: Record<string, string> = {
1644
+ empty_agent_name: `⚠️ **Protocol Issue Detected**
1645
+
1646
+ Your \`$AGENT_RELAY_NAME\` environment variable appears to be empty or unset.
1647
+ Your agent name is: **${this.config.name}**
1648
+
1649
+ Correct outbox path: \`$AGENT_RELAY_OUTBOX\`
1650
+
1651
+ When writing relay files, use:
1652
+ \`\`\`bash
1653
+ cat > $AGENT_RELAY_OUTBOX/msg << 'EOF'
1654
+ TO: TargetAgent
1655
+
1656
+ Your message here
1657
+ EOF
1658
+ \`\`\`
1659
+ Then output: \`->relay-file:msg\``,
1660
+
1661
+ file_in_root: `⚠️ **Protocol Issue Detected**
1662
+
1663
+ Found file "${filename}" directly in the outbox directory instead of in your agent's subdirectory.
1664
+ Your agent name is: **${this.config.name}**
1665
+
1666
+ Correct outbox path: \`$AGENT_RELAY_OUTBOX\`
1667
+
1668
+ Files should be created in your agent's directory:
1669
+ \`\`\`bash
1670
+ cat > $AGENT_RELAY_OUTBOX/${filename} << 'EOF'
1671
+ TO: TargetAgent
1672
+
1673
+ Your message here
1674
+ EOF
1675
+ \`\`\``,
1676
+ };
1677
+
1678
+ const reminder = reminders[issue];
1679
+ if (reminder) {
1680
+ this.injectProtocolReminder(reminder);
1681
+ }
1682
+ }
1683
+
1684
+ /**
1685
+ * Inject a protocol reminder message to the agent.
1686
+ */
1687
+ private injectProtocolReminder(message: string): void {
1688
+ const queuedMsg: QueuedMessage = {
1689
+ from: 'system',
1690
+ body: message,
1691
+ messageId: `protocol-reminder-${Date.now()}`,
1692
+ importance: 2, // Higher priority
1693
+ };
1694
+
1695
+ this.messageQueue.unshift(queuedMsg); // Add to front of queue
1696
+ this.log(` Queued protocol reminder (queue size: ${this.messageQueue.length})`);
1697
+
1698
+ // Trigger processing if not already in progress
1699
+ if (!this.isInjecting && this.readyForMessages) {
1700
+ this.processMessageQueue();
1701
+ }
1702
+ }
1703
+
1704
+ // =========================================================================
1705
+ // Periodic protocol reminders (for long sessions where agents forget protocol)
1706
+ // =========================================================================
1707
+
1708
+ /**
1709
+ * Start sending periodic protocol reminders.
1710
+ * Agents in long sessions sometimes forget the relay protocol - these
1711
+ * reminders help them stay on track without user intervention.
1712
+ */
1713
+ private startPeriodicReminder(): void {
1714
+ this.sessionStartTime = Date.now();
1715
+
1716
+ this.periodicReminderTimer = setInterval(() => {
1717
+ this.sendPeriodicProtocolReminder();
1718
+ }, this.PERIODIC_REMINDER_INTERVAL_MS);
1719
+
1720
+ // Don't keep process alive just for reminders
1721
+ this.periodicReminderTimer.unref?.();
1722
+
1723
+ const intervalMinutes = Math.round(this.PERIODIC_REMINDER_INTERVAL_MS / 60000);
1724
+ this.log(` Periodic protocol reminder started (interval: ${intervalMinutes} minutes)`);
1725
+ }
1726
+
1727
+ /**
1728
+ * Stop periodic protocol reminders.
1729
+ */
1730
+ private stopPeriodicReminder(): void {
1731
+ if (this.periodicReminderTimer) {
1732
+ clearInterval(this.periodicReminderTimer);
1733
+ this.periodicReminderTimer = undefined;
1734
+ this.log(` Periodic protocol reminder stopped`);
1735
+ }
1736
+ }
1737
+
1738
+ /**
1739
+ * Send a periodic protocol reminder to the agent.
1740
+ * This reminds agents about proper relay communication format after long sessions.
1741
+ */
1742
+ private sendPeriodicProtocolReminder(): void {
1743
+ // Don't send if not ready
1744
+ if (!this.running || !this.readyForMessages) {
1745
+ return;
1746
+ }
1747
+
1748
+ const sessionDurationMinutes = Math.round((Date.now() - this.sessionStartTime) / 60000);
1749
+
1750
+ const reminder = `📋 **Protocol Reminder** (Session: ${sessionDurationMinutes} minutes)
1751
+
1752
+ You are **${this.config.name}** in a multi-agent relay system. Here's how to communicate:
1753
+
1754
+ **Sending Messages:**
1755
+ \`\`\`bash
1756
+ cat > $AGENT_RELAY_OUTBOX/msg << 'EOF'
1757
+ TO: *
1758
+
1759
+ Your message here
1760
+ EOF
1761
+ \`\`\`
1762
+ Then output: \`->relay-file:msg\`
1763
+
1764
+ Use \`TO: *\` to broadcast to all agents, or \`TO: AgentName\` for a specific agent.
1765
+
1766
+ **Spawning Agents:**
1767
+ \`\`\`bash
1768
+ cat > $AGENT_RELAY_OUTBOX/spawn << 'EOF'
1769
+ KIND: spawn
1770
+ NAME: WorkerName
1771
+ CLI: claude
1772
+
1773
+ Task description here
1774
+ EOF
1775
+ \`\`\`
1776
+ Then output: \`->relay-file:spawn\`
1777
+
1778
+ **Protocol Tips:**
1779
+ - Always ACK when you receive a task: "ACK: Brief description"
1780
+ - Send DONE when complete: "DONE: What was accomplished"
1781
+ - Keep your lead informed of progress
1782
+
1783
+ 📖 See **AGENTS.md** in the project root for full protocol documentation.`;
1784
+
1785
+ this.log(` Sending periodic protocol reminder (session: ${sessionDurationMinutes}m)`);
1786
+ this.injectProtocolReminder(reminder);
1787
+ }
1788
+
1789
+ /**
1790
+ * Check for messages stuck in the queue and process them if the agent is idle.
1791
+ *
1792
+ * This handles cases where:
1793
+ * 1. Messages arrived while the agent was busy and the retry mechanism failed
1794
+ * 2. Socket disconnection/reconnection left messages orphaned
1795
+ * 3. Injection timeouts occurred without proper queue resumption
1796
+ */
1797
+ private checkForStuckQueue(): void {
1798
+ // Skip if not ready for messages
1799
+ if (!this.readyForMessages || !this.running) {
1800
+ return;
1801
+ }
1802
+
1803
+ // Skip if queue is empty
1804
+ if (this.messageQueue.length === 0) {
1805
+ return;
1806
+ }
1807
+
1808
+ // Skip if currently injecting (processing is in progress)
1809
+ if (this.isInjecting) {
1810
+ return;
1811
+ }
1812
+
1813
+ // Skip if backpressure is active
1814
+ if (this.backpressureActive) {
1815
+ return;
1816
+ }
1817
+
1818
+ // Check if the agent is idle (high confidence)
1819
+ const idleResult = this.idleDetector.checkIdle({ minSilenceMs: 2000 });
1820
+ if (!idleResult.isIdle) {
1821
+ // Agent is still working, let it finish
1822
+ return;
1823
+ }
1824
+
1825
+ // We have messages in the queue, agent is idle, not currently injecting
1826
+ // This is a stuck queue situation - trigger processing
1827
+ const senders = [...new Set(this.messageQueue.map(m => m.from))];
1828
+ this.log(` ⚠️ Queue monitor: Found ${this.messageQueue.length} stuck message(s) from [${senders.join(', ')}]`);
1829
+ this.log(` ⚠️ Agent is idle (confidence: ${(idleResult.confidence * 100).toFixed(0)}%), triggering queue processing`);
1830
+
1831
+ // Process the queue
1832
+ this.processMessageQueue();
1833
+ }
1834
+
1835
+ // =========================================================================
1836
+ // Output parsing
1837
+ // =========================================================================
1838
+
1839
+ /**
1840
+ * Parse relay commands from output
1841
+ */
1842
+ private parseRelayCommands(): void {
1843
+ const cleanContent = stripAnsi(this.rawBuffer);
1844
+
1845
+ if (cleanContent.length <= this.lastParsedLength) {
1846
+ return;
1847
+ }
1848
+
1849
+ // Parse new content with lookback for fenced messages
1850
+ const lookbackStart = Math.max(0, this.lastParsedLength - 500);
1851
+ const contentToParse = cleanContent.substring(lookbackStart);
1852
+
1853
+ // Parse fenced messages
1854
+ this.parseFencedMessages(contentToParse);
1855
+
1856
+ // Parse single-line messages
1857
+ this.parseSingleLineMessages(contentToParse);
1858
+
1859
+ // Parse spawn/release commands
1860
+ this.parseSpawnReleaseCommands(contentToParse);
1861
+
1862
+ this.lastParsedLength = cleanContent.length;
1863
+ }
1864
+
1865
+ /**
1866
+ * Parse fenced multi-line messages
1867
+ */
1868
+ private parseFencedMessages(content: string): void {
1869
+ const escapedPrefix = this.relayPrefix.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
1870
+ const fencePattern = new RegExp(
1871
+ `${escapedPrefix}(\\S+)(?:\\s+\\[thread:([\\w-]+)\\])?\\s*<<<([\\s\\S]*?)>>>`,
1872
+ 'g'
1873
+ );
1874
+
1875
+ let match;
1876
+ while ((match = fencePattern.exec(content)) !== null) {
1877
+ const target = match[1];
1878
+ const thread = match[2];
1879
+ const body = match[3].trim();
1880
+
1881
+ if (!body || target === 'spawn' || target === 'release') {
1882
+ continue;
1883
+ }
1884
+
1885
+ this.sendRelayCommand({
1886
+ to: target,
1887
+ kind: 'message',
1888
+ body,
1889
+ thread,
1890
+ raw: match[0],
1891
+ });
1892
+ }
1893
+ }
1894
+
1895
+ /**
1896
+ * Parse single-line messages
1897
+ */
1898
+ private parseSingleLineMessages(content: string): void {
1899
+ const lines = content.split('\n');
1900
+ const escapedPrefix = this.relayPrefix.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
1901
+ const pattern = new RegExp(`${escapedPrefix}(\\S+)(?:\\s+\\[thread:([\\w-]+)\\])?\\s+(.+)$`);
1902
+
1903
+ for (const line of lines) {
1904
+ // Skip fenced messages
1905
+ if (line.includes('<<<') || line.includes('>>>')) {
1906
+ continue;
1907
+ }
1908
+
1909
+ const match = line.match(pattern);
1910
+ if (!match) {
1911
+ continue;
1912
+ }
1913
+
1914
+ const target = match[1];
1915
+ const thread = match[2];
1916
+ const body = match[3].trim();
1917
+
1918
+ if (!body || target === 'spawn' || target === 'release') {
1919
+ continue;
1920
+ }
1921
+
1922
+ this.sendRelayCommand({
1923
+ to: target,
1924
+ kind: 'message',
1925
+ body,
1926
+ thread,
1927
+ raw: line,
1928
+ });
1929
+ }
1930
+ }
1931
+
1932
+ // =========================================================================
1933
+ // Summary and session end detection
1934
+ // =========================================================================
1935
+
1936
+ /**
1937
+ * Check for [[SUMMARY]] blocks
1938
+ */
1939
+ private checkForSummary(content: string): void {
1940
+ const result = parseSummaryWithDetails(content);
1941
+ if (!result.found || !result.valid) {
1942
+ return;
1943
+ }
1944
+
1945
+ if (result.rawContent === this.lastSummaryRawContent) {
1946
+ return;
1947
+ }
1948
+ this.lastSummaryRawContent = result.rawContent ?? '';
1949
+
1950
+ this.emit('summary', {
1951
+ agentName: this.config.name,
1952
+ summary: result.summary,
1953
+ });
1954
+ }
1955
+
1956
+ /**
1957
+ * Check for [[SESSION_END]] blocks
1958
+ */
1959
+ private checkForSessionEnd(content: string): void {
1960
+ if (this.sessionEndProcessed) {
1961
+ return;
1962
+ }
1963
+
1964
+ const sessionEnd = parseSessionEndFromOutput(content);
1965
+ if (!sessionEnd) {
1966
+ return;
1967
+ }
1968
+
1969
+ this.sessionEndProcessed = true;
1970
+ this.emit('session-end', {
1971
+ agentName: this.config.name,
1972
+ marker: sessionEnd,
1973
+ });
1974
+ }
1975
+
1976
+ // =========================================================================
1977
+ // Public API
1978
+ // =========================================================================
1979
+
1980
+ /**
1981
+ * Query status from relay-pty
1982
+ */
1983
+ async queryStatus(): Promise<StatusResponse | null> {
1984
+ if (!this.socket || !this.socketConnected) {
1985
+ return null;
1986
+ }
1987
+
1988
+ try {
1989
+ await this.sendSocketRequest({ type: 'status' });
1990
+ // Response will come asynchronously via handleSocketResponse
1991
+ // For now, return null - could implement request/response matching
1992
+ return null;
1993
+ } catch {
1994
+ return null;
1995
+ }
1996
+ }
1997
+
1998
+ /**
1999
+ * Wait for the CLI to be ready to receive messages.
2000
+ * This waits for:
2001
+ * 1. The CLI to produce at least one output (it has started)
2002
+ * 2. The CLI to become idle (it's ready for input)
2003
+ *
2004
+ * This is more reliable than a random sleep because it waits for
2005
+ * actual signals from the CLI rather than guessing how long it takes to start.
2006
+ *
2007
+ * @param timeoutMs Maximum time to wait (default: 30s)
2008
+ * @param pollMs Polling interval (default: 100ms)
2009
+ * @returns true if CLI is ready, false if timeout
2010
+ */
2011
+ async waitUntilCliReady(timeoutMs = 30000, pollMs = 100): Promise<boolean> {
2012
+ const startTime = Date.now();
2013
+ this.log(` Waiting for CLI to be ready (timeout: ${timeoutMs}ms)`);
2014
+
2015
+ // In interactive mode, stdout is inherited (not captured), so hasReceivedOutput
2016
+ // will never be set. Trust that the process is ready if it's running.
2017
+ if (this.isInteractive) {
2018
+ this.log(` Interactive mode - trusting process is ready`);
2019
+ // Give a brief moment for the CLI to initialize its TUI.
2020
+ // 500ms is a conservative estimate based on typical CLI startup times:
2021
+ // - Claude CLI: ~200-300ms to show initial prompt
2022
+ // - Codex/Gemini: ~300-400ms
2023
+ // This delay is only used in interactive mode where we can't detect output.
2024
+ // In non-interactive mode, we poll for actual output instead.
2025
+ await sleep(500);
2026
+ return this.running;
2027
+ }
2028
+
2029
+ // Phase 1: Wait for first output (CLI has started)
2030
+ while (Date.now() - startTime < timeoutMs) {
2031
+ if (this.hasReceivedOutput) {
2032
+ this.log(` CLI has started producing output`);
2033
+ break;
2034
+ }
2035
+ await sleep(pollMs);
2036
+ }
2037
+
2038
+ if (!this.hasReceivedOutput) {
2039
+ this.log(` Timeout waiting for CLI to produce output`);
2040
+ return false;
2041
+ }
2042
+
2043
+ // Phase 2: Wait for idle state (CLI is ready for input)
2044
+ const remainingTime = timeoutMs - (Date.now() - startTime);
2045
+ if (remainingTime <= 0) {
2046
+ return false;
2047
+ }
2048
+
2049
+ const idleResult = await this.waitForIdleState(remainingTime, pollMs);
2050
+ if (idleResult.isIdle) {
2051
+ this.log(` CLI is idle and ready (confidence: ${idleResult.confidence.toFixed(2)})`);
2052
+ return true;
2053
+ }
2054
+
2055
+ this.log(` Timeout waiting for CLI to become idle`);
2056
+ return false;
2057
+ }
2058
+
2059
+ /**
2060
+ * Check if the CLI has produced any output yet.
2061
+ * Useful for checking if the CLI has started without blocking.
2062
+ * In interactive mode, returns true if process is running (output isn't captured).
2063
+ */
2064
+ hasCliStarted(): boolean {
2065
+ // In interactive mode, stdout isn't captured so hasReceivedOutput is never set
2066
+ if (this.isInteractive) {
2067
+ return this.running;
2068
+ }
2069
+ return this.hasReceivedOutput;
2070
+ }
2071
+
2072
+ /**
2073
+ * Check if the orchestrator is ready to receive and inject messages.
2074
+ * This requires:
2075
+ * 1. relay-pty process spawned
2076
+ * 2. Socket connected to relay-pty
2077
+ * 3. running flag set
2078
+ *
2079
+ * Use this to verify the agent can actually receive injected messages,
2080
+ * not just that the CLI is running.
2081
+ */
2082
+ isReadyForMessages(): boolean {
2083
+ return this.readyForMessages && this.running && this.socketConnected;
2084
+ }
2085
+
2086
+ /**
2087
+ * Wait until the orchestrator is ready to receive and inject messages.
2088
+ * This is more comprehensive than waitUntilCliReady because it ensures:
2089
+ * 1. CLI is ready (has output and is idle)
2090
+ * 2. Orchestrator is ready (socket connected, can inject)
2091
+ *
2092
+ * @param timeoutMs Maximum time to wait (default: 30s)
2093
+ * @param pollMs Polling interval (default: 100ms)
2094
+ * @returns true if ready, false if timeout
2095
+ */
2096
+ async waitUntilReadyForMessages(timeoutMs = 30000, pollMs = 100): Promise<boolean> {
2097
+ const startTime = Date.now();
2098
+ this.log(` Waiting for orchestrator to be ready for messages (timeout: ${timeoutMs}ms)`);
2099
+
2100
+ // First wait for CLI to be ready (output + idle)
2101
+ const cliReady = await this.waitUntilCliReady(timeoutMs, pollMs);
2102
+ if (!cliReady) {
2103
+ this.log(` CLI not ready within timeout`);
2104
+ return false;
2105
+ }
2106
+
2107
+ // Then wait for readyForMessages flag
2108
+ const remainingTime = timeoutMs - (Date.now() - startTime);
2109
+ if (remainingTime <= 0) {
2110
+ this.log(` No time remaining to wait for readyForMessages`);
2111
+ return this.isReadyForMessages();
2112
+ }
2113
+
2114
+ while (Date.now() - startTime < timeoutMs) {
2115
+ if (this.isReadyForMessages()) {
2116
+ this.log(` Orchestrator is ready for messages`);
2117
+ return true;
2118
+ }
2119
+ await sleep(pollMs);
2120
+ }
2121
+
2122
+ this.log(` Timeout waiting for orchestrator to be ready for messages`);
2123
+ return false;
2124
+ }
2125
+
2126
+ /**
2127
+ * Get raw output buffer
2128
+ */
2129
+ getRawOutput(): string {
2130
+ return this.rawBuffer;
2131
+ }
2132
+
2133
+ /**
2134
+ * Check if backpressure is active
2135
+ */
2136
+ isBackpressureActive(): boolean {
2137
+ return this.backpressureActive;
2138
+ }
2139
+
2140
+ /**
2141
+ * Get the socket path
2142
+ */
2143
+ getSocketPath(): string {
2144
+ return this.socketPath;
2145
+ }
2146
+
2147
+ /**
2148
+ * Get the relay-pty process PID
2149
+ */
2150
+ get pid(): number | undefined {
2151
+ return this.relayPtyProcess?.pid;
2152
+ }
2153
+
2154
+ /**
2155
+ * Get the log file path (not used by relay-pty, returns undefined)
2156
+ */
2157
+ get logPath(): string | undefined {
2158
+ return this._logPath;
2159
+ }
2160
+
2161
+ /**
2162
+ * Kill the process forcefully
2163
+ */
2164
+ async kill(): Promise<void> {
2165
+ this.isGracefulStop = true; // Mark as intentional to prevent crash broadcast
2166
+ if (this.relayPtyProcess && !this.relayPtyProcess.killed) {
2167
+ this.relayPtyProcess.kill('SIGKILL');
2168
+ }
2169
+ this.running = false;
2170
+ this.disconnectSocket();
2171
+ this.destroyClient();
2172
+ }
2173
+
2174
+ /**
2175
+ * Get output lines (for compatibility with PtyWrapper)
2176
+ * @param limit Maximum number of lines to return
2177
+ */
2178
+ getOutput(limit?: number): string[] {
2179
+ const lines = this.rawBuffer.split('\n');
2180
+ if (limit && limit > 0) {
2181
+ return lines.slice(-limit);
2182
+ }
2183
+ return lines;
2184
+ }
2185
+
2186
+ /**
2187
+ * Write data directly to the process stdin
2188
+ * @param data Data to write
2189
+ */
2190
+ async write(data: string | Buffer): Promise<void> {
2191
+ if (!this.relayPtyProcess || !this.relayPtyProcess.stdin) {
2192
+ throw new Error('Process not running');
2193
+ }
2194
+ const buffer = typeof data === 'string' ? Buffer.from(data) : data;
2195
+ this.relayPtyProcess.stdin.write(buffer);
2196
+ }
2197
+
2198
+ /**
2199
+ * Inject a task using the socket-based injection system with verification.
2200
+ * This is the preferred method for spawned agent task delivery.
2201
+ *
2202
+ * @param task The task text to inject
2203
+ * @param from The sender name (default: "spawner")
2204
+ * @returns Promise resolving to true if injection succeeded, false otherwise
2205
+ */
2206
+ async injectTask(task: string, from = 'spawner'): Promise<boolean> {
2207
+ if (!this.socket || !this.socketConnected) {
2208
+ this.log(` Socket not connected for task injection, falling back to stdin write`);
2209
+ // Fallback to direct write if socket not available
2210
+ try {
2211
+ await this.write(task + '\n');
2212
+ return true;
2213
+ } catch (err: any) {
2214
+ this.logError(` Stdin write fallback failed: ${err.message}`);
2215
+ return false;
2216
+ }
2217
+ }
2218
+
2219
+ const messageId = `task-${Date.now()}-${Math.random().toString(36).substring(2, 8)}`;
2220
+ const shortId = messageId.substring(0, 8);
2221
+
2222
+ this.log(` Injecting task via socket: ${shortId}`);
2223
+
2224
+ // Create request
2225
+ const request: InjectRequest = {
2226
+ type: 'inject',
2227
+ id: messageId,
2228
+ from,
2229
+ body: task,
2230
+ priority: 0, // High priority for initial task
2231
+ };
2232
+
2233
+ // Send with timeout and verification
2234
+ return new Promise<boolean>((resolve) => {
2235
+ const timeout = setTimeout(() => {
2236
+ this.logError(` Task inject timeout for ${shortId} after 30s`);
2237
+ this.pendingInjections.delete(messageId);
2238
+ resolve(false);
2239
+ }, 30000);
2240
+
2241
+ this.pendingInjections.set(messageId, {
2242
+ resolve,
2243
+ reject: () => resolve(false),
2244
+ timeout,
2245
+ from,
2246
+ shortId,
2247
+ retryCount: 0,
2248
+ originalBody: task,
2249
+ });
2250
+
2251
+ this.sendSocketRequest(request)
2252
+ .then(() => {
2253
+ this.log(` Task inject request sent: ${shortId}`);
2254
+ })
2255
+ .catch((err) => {
2256
+ this.logError(` Task inject socket request failed: ${err.message}`);
2257
+ clearTimeout(timeout);
2258
+ this.pendingInjections.delete(messageId);
2259
+ resolve(false);
2260
+ });
2261
+ });
2262
+ }
2263
+
2264
+ /**
2265
+ * Get the agent ID (from continuity if available)
2266
+ */
2267
+ getAgentId(): string | undefined {
2268
+ return this.agentId;
2269
+ }
2270
+ }