opencode-swarm-plugin 0.6.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/agent-mail.ts CHANGED
@@ -27,6 +27,25 @@ const AGENT_MAIL_URL = "http://127.0.0.1:8765";
27
27
  const DEFAULT_TTL_SECONDS = 3600; // 1 hour
28
28
  const MAX_INBOX_LIMIT = 5; // HARD CAP - never exceed this
29
29
 
30
+ // Retry configuration
31
+ const RETRY_CONFIG = {
32
+ maxRetries: parseInt(process.env.OPENCODE_AGENT_MAIL_MAX_RETRIES || "3"),
33
+ baseDelayMs: parseInt(process.env.OPENCODE_AGENT_MAIL_BASE_DELAY_MS || "100"),
34
+ maxDelayMs: parseInt(process.env.OPENCODE_AGENT_MAIL_MAX_DELAY_MS || "5000"),
35
+ timeoutMs: parseInt(process.env.OPENCODE_AGENT_MAIL_TIMEOUT_MS || "10000"),
36
+ jitterPercent: 20,
37
+ };
38
+
39
+ // Server recovery configuration
40
+ const RECOVERY_CONFIG = {
41
+ /** Max consecutive failures before attempting restart */
42
+ failureThreshold: 2,
43
+ /** Cooldown between restart attempts (ms) */
44
+ restartCooldownMs: 30000,
45
+ /** Whether auto-restart is enabled */
46
+ enabled: process.env.OPENCODE_AGENT_MAIL_AUTO_RESTART !== "false",
47
+ };
48
+
30
49
  // ============================================================================
31
50
  // Types
32
51
  // ============================================================================
@@ -176,6 +195,245 @@ export class RateLimitExceededError extends Error {
176
195
  }
177
196
  }
178
197
 
198
+ // ============================================================================
199
+ // Server Recovery
200
+ // ============================================================================
201
+
202
+ /** Track consecutive failures for recovery decisions */
203
+ let consecutiveFailures = 0;
204
+ let lastRestartAttempt = 0;
205
+ let isRestarting = false;
206
+
207
+ /**
208
+ * Check if the server is responding to health checks
209
+ */
210
+ async function isServerHealthy(): Promise<boolean> {
211
+ try {
212
+ const controller = new AbortController();
213
+ const timeout = setTimeout(() => controller.abort(), 3000);
214
+
215
+ const response = await fetch(`${AGENT_MAIL_URL}/health/liveness`, {
216
+ signal: controller.signal,
217
+ });
218
+ clearTimeout(timeout);
219
+
220
+ return response.ok;
221
+ } catch {
222
+ return false;
223
+ }
224
+ }
225
+
226
+ /**
227
+ * Test if the server can handle a basic MCP call
228
+ * This catches cases where health is OK but MCP is broken
229
+ */
230
+ async function isServerFunctional(): Promise<boolean> {
231
+ try {
232
+ const controller = new AbortController();
233
+ const timeout = setTimeout(() => controller.abort(), 5000);
234
+
235
+ const response = await fetch(`${AGENT_MAIL_URL}/mcp/`, {
236
+ method: "POST",
237
+ headers: { "Content-Type": "application/json" },
238
+ body: JSON.stringify({
239
+ jsonrpc: "2.0",
240
+ id: "health-test",
241
+ method: "tools/call",
242
+ params: { name: "health_check", arguments: {} },
243
+ }),
244
+ signal: controller.signal,
245
+ });
246
+ clearTimeout(timeout);
247
+
248
+ if (!response.ok) return false;
249
+
250
+ const json = (await response.json()) as { result?: { isError?: boolean } };
251
+ // Check if it's an error response
252
+ if (json.result?.isError) return false;
253
+
254
+ return true;
255
+ } catch {
256
+ return false;
257
+ }
258
+ }
259
+
260
+ /**
261
+ * Attempt to restart the Agent Mail server
262
+ *
263
+ * Finds the running process, kills it, and starts a new one.
264
+ * Returns true if restart was successful.
265
+ */
266
+ async function restartServer(): Promise<boolean> {
267
+ if (!RECOVERY_CONFIG.enabled) {
268
+ console.warn(
269
+ "[agent-mail] Auto-restart disabled via OPENCODE_AGENT_MAIL_AUTO_RESTART=false",
270
+ );
271
+ return false;
272
+ }
273
+
274
+ // Prevent concurrent restart attempts
275
+ if (isRestarting) {
276
+ console.warn("[agent-mail] Restart already in progress");
277
+ return false;
278
+ }
279
+
280
+ // Respect cooldown
281
+ const now = Date.now();
282
+ if (now - lastRestartAttempt < RECOVERY_CONFIG.restartCooldownMs) {
283
+ const waitSec = Math.ceil(
284
+ (RECOVERY_CONFIG.restartCooldownMs - (now - lastRestartAttempt)) / 1000,
285
+ );
286
+ console.warn(`[agent-mail] Restart cooldown active, wait ${waitSec}s`);
287
+ return false;
288
+ }
289
+
290
+ isRestarting = true;
291
+ lastRestartAttempt = now;
292
+
293
+ try {
294
+ console.warn("[agent-mail] Attempting server restart...");
295
+
296
+ // Find the agent-mail process
297
+ const findProc = Bun.spawn(["lsof", "-i", ":8765", "-t"], {
298
+ stdout: "pipe",
299
+ stderr: "pipe",
300
+ });
301
+ const findOutput = await new Response(findProc.stdout).text();
302
+ await findProc.exited;
303
+
304
+ const pids = findOutput.trim().split("\n").filter(Boolean);
305
+
306
+ if (pids.length > 0) {
307
+ // Kill existing process(es)
308
+ for (const pid of pids) {
309
+ console.warn(`[agent-mail] Killing process ${pid}`);
310
+ Bun.spawn(["kill", pid]);
311
+ }
312
+
313
+ // Wait for process to die
314
+ await new Promise((resolve) => setTimeout(resolve, 2000));
315
+ }
316
+
317
+ // Find the agent-mail installation directory
318
+ // Try common locations
319
+ const possiblePaths = [
320
+ `${process.env.HOME}/Code/Dicklesworthstone/mcp_agent_mail`,
321
+ `${process.env.HOME}/.local/share/agent-mail`,
322
+ `${process.env.HOME}/mcp_agent_mail`,
323
+ ];
324
+
325
+ let serverDir: string | null = null;
326
+ for (const path of possiblePaths) {
327
+ try {
328
+ const stat = await Bun.file(`${path}/pyproject.toml`).exists();
329
+ if (stat) {
330
+ serverDir = path;
331
+ break;
332
+ }
333
+ } catch {
334
+ continue;
335
+ }
336
+ }
337
+
338
+ if (!serverDir) {
339
+ console.error(
340
+ "[agent-mail] Could not find agent-mail installation directory",
341
+ );
342
+ return false;
343
+ }
344
+
345
+ // Start the server
346
+ console.warn(`[agent-mail] Starting server from ${serverDir}`);
347
+ Bun.spawn(["python", "-m", "mcp_agent_mail.cli", "serve-http"], {
348
+ cwd: serverDir,
349
+ stdout: "ignore",
350
+ stderr: "ignore",
351
+ // Detach so it survives our process
352
+ detached: true,
353
+ });
354
+
355
+ // Wait for server to come up
356
+ for (let i = 0; i < 10; i++) {
357
+ await new Promise((resolve) => setTimeout(resolve, 1000));
358
+ if (await isServerHealthy()) {
359
+ console.warn("[agent-mail] Server restarted successfully");
360
+ consecutiveFailures = 0;
361
+ return true;
362
+ }
363
+ }
364
+
365
+ console.error("[agent-mail] Server failed to start after restart");
366
+ return false;
367
+ } catch (error) {
368
+ console.error("[agent-mail] Restart failed:", error);
369
+ return false;
370
+ } finally {
371
+ isRestarting = false;
372
+ }
373
+ }
374
+
375
+ /**
376
+ * Reset recovery state (for testing)
377
+ */
378
+ export function resetRecoveryState(): void {
379
+ consecutiveFailures = 0;
380
+ lastRestartAttempt = 0;
381
+ isRestarting = false;
382
+ }
383
+
384
+ // ============================================================================
385
+ // Retry Logic
386
+ // ============================================================================
387
+
388
+ /**
389
+ * Calculate delay with exponential backoff + jitter
390
+ */
391
+ function calculateBackoffDelay(attempt: number): number {
392
+ if (attempt === 0) return 0;
393
+
394
+ const exponentialDelay = RETRY_CONFIG.baseDelayMs * Math.pow(2, attempt - 1);
395
+ const cappedDelay = Math.min(exponentialDelay, RETRY_CONFIG.maxDelayMs);
396
+
397
+ // Add jitter (±jitterPercent%)
398
+ const jitterRange = cappedDelay * (RETRY_CONFIG.jitterPercent / 100);
399
+ const jitter = (Math.random() * 2 - 1) * jitterRange;
400
+
401
+ return Math.round(cappedDelay + jitter);
402
+ }
403
+
404
+ /**
405
+ * Check if an error is retryable (transient network/server issues)
406
+ */
407
+ function isRetryableError(error: unknown): boolean {
408
+ if (error instanceof Error) {
409
+ const message = error.message.toLowerCase();
410
+
411
+ // Network errors
412
+ if (
413
+ message.includes("econnrefused") ||
414
+ message.includes("econnreset") ||
415
+ message.includes("socket") ||
416
+ message.includes("network") ||
417
+ message.includes("timeout") ||
418
+ message.includes("aborted")
419
+ ) {
420
+ return true;
421
+ }
422
+
423
+ // Server errors (but not 500 which is usually a logic bug)
424
+ if (error instanceof AgentMailError && error.code) {
425
+ return error.code === 502 || error.code === 503 || error.code === 504;
426
+ }
427
+
428
+ // Generic "unexpected error" from server - might be recoverable with restart
429
+ if (message.includes("unexpected error")) {
430
+ return true;
431
+ }
432
+ }
433
+
434
+ return false;
435
+ }
436
+
179
437
  // ============================================================================
180
438
  // MCP Client
181
439
  // ============================================================================
@@ -277,7 +535,83 @@ export async function resetRateLimiterCache(): Promise<void> {
277
535
  }
278
536
 
279
537
  /**
280
- * Call an Agent Mail MCP tool
538
+ * Execute a single MCP call (no retry)
539
+ */
540
+ async function mcpCallOnce<T>(
541
+ toolName: string,
542
+ args: Record<string, unknown>,
543
+ ): Promise<T> {
544
+ const controller = new AbortController();
545
+ const timeout = setTimeout(() => controller.abort(), RETRY_CONFIG.timeoutMs);
546
+
547
+ try {
548
+ const response = await fetch(`${AGENT_MAIL_URL}/mcp/`, {
549
+ method: "POST",
550
+ headers: { "Content-Type": "application/json" },
551
+ body: JSON.stringify({
552
+ jsonrpc: "2.0",
553
+ id: crypto.randomUUID(),
554
+ method: "tools/call",
555
+ params: { name: toolName, arguments: args },
556
+ }),
557
+ signal: controller.signal,
558
+ });
559
+
560
+ clearTimeout(timeout);
561
+
562
+ if (!response.ok) {
563
+ throw new AgentMailError(
564
+ `HTTP ${response.status}: ${response.statusText}`,
565
+ toolName,
566
+ response.status,
567
+ );
568
+ }
569
+
570
+ const json = (await response.json()) as MCPResponse<MCPToolResult<T> | T>;
571
+
572
+ if (json.error) {
573
+ throw new AgentMailError(
574
+ json.error.message,
575
+ toolName,
576
+ json.error.code,
577
+ json.error.data,
578
+ );
579
+ }
580
+
581
+ const result = json.result;
582
+
583
+ // Handle wrapped response format (real Agent Mail server)
584
+ // Check for isError first (error responses don't have structuredContent)
585
+ if (result && typeof result === "object") {
586
+ const wrapped = result as MCPToolResult<T>;
587
+
588
+ // Check for error response (has isError: true but no structuredContent)
589
+ if (wrapped.isError) {
590
+ const errorText = wrapped.content?.[0]?.text || "Unknown error";
591
+ throw new AgentMailError(errorText, toolName);
592
+ }
593
+
594
+ // Check for success response with structuredContent
595
+ if ("structuredContent" in wrapped) {
596
+ return wrapped.structuredContent as T;
597
+ }
598
+ }
599
+
600
+ // Handle direct response format (mock server)
601
+ return result as T;
602
+ } catch (error) {
603
+ clearTimeout(timeout);
604
+ throw error;
605
+ }
606
+ }
607
+
608
+ /**
609
+ * Call an Agent Mail MCP tool with retry and auto-restart
610
+ *
611
+ * Features:
612
+ * - Exponential backoff with jitter on retryable errors
613
+ * - Auto-restart server after consecutive failures
614
+ * - Timeout handling per request
281
615
  *
282
616
  * Handles both direct results (mock server) and wrapped results (real server).
283
617
  * Real Agent Mail returns: { content: [...], structuredContent: {...} }
@@ -286,56 +620,73 @@ export async function mcpCall<T>(
286
620
  toolName: string,
287
621
  args: Record<string, unknown>,
288
622
  ): Promise<T> {
289
- const response = await fetch(`${AGENT_MAIL_URL}/mcp/`, {
290
- method: "POST",
291
- headers: { "Content-Type": "application/json" },
292
- body: JSON.stringify({
293
- jsonrpc: "2.0",
294
- id: crypto.randomUUID(),
295
- method: "tools/call",
296
- params: { name: toolName, arguments: args },
297
- }),
298
- });
299
-
300
- if (!response.ok) {
301
- throw new AgentMailError(
302
- `HTTP ${response.status}: ${response.statusText}`,
303
- toolName,
304
- );
305
- }
306
-
307
- const json = (await response.json()) as MCPResponse<MCPToolResult<T> | T>;
308
-
309
- if (json.error) {
310
- throw new AgentMailError(
311
- json.error.message,
312
- toolName,
313
- json.error.code,
314
- json.error.data,
315
- );
316
- }
623
+ let lastError: Error | null = null;
624
+
625
+ for (let attempt = 0; attempt <= RETRY_CONFIG.maxRetries; attempt++) {
626
+ // Apply backoff delay (except first attempt)
627
+ if (attempt > 0) {
628
+ const delay = calculateBackoffDelay(attempt);
629
+ console.warn(
630
+ `[agent-mail] Retry ${attempt}/${RETRY_CONFIG.maxRetries} for ${toolName} after ${delay}ms`,
631
+ );
632
+ await new Promise((resolve) => setTimeout(resolve, delay));
633
+ }
317
634
 
318
- const result = json.result;
635
+ try {
636
+ const result = await mcpCallOnce<T>(toolName, args);
319
637
 
320
- // Handle wrapped response format (real Agent Mail server)
321
- // Check for isError first (error responses don't have structuredContent)
322
- if (result && typeof result === "object") {
323
- const wrapped = result as MCPToolResult<T>;
638
+ // Success - reset failure counter
639
+ consecutiveFailures = 0;
640
+ return result;
641
+ } catch (error) {
642
+ lastError = error instanceof Error ? error : new Error(String(error));
643
+
644
+ // Track consecutive failures
645
+ consecutiveFailures++;
646
+
647
+ // Check if we should attempt server restart
648
+ if (
649
+ consecutiveFailures >= RECOVERY_CONFIG.failureThreshold &&
650
+ RECOVERY_CONFIG.enabled
651
+ ) {
652
+ console.warn(
653
+ `[agent-mail] ${consecutiveFailures} consecutive failures, checking server health...`,
654
+ );
655
+
656
+ const healthy = await isServerFunctional();
657
+ if (!healthy) {
658
+ console.warn("[agent-mail] Server unhealthy, attempting restart...");
659
+ const restarted = await restartServer();
660
+ if (restarted) {
661
+ // Reset availability cache since server restarted
662
+ agentMailAvailable = null;
663
+ // Don't count this attempt against retries - try again
664
+ attempt--;
665
+ continue;
666
+ }
667
+ }
668
+ }
324
669
 
325
- // Check for error response (has isError: true but no structuredContent)
326
- if (wrapped.isError) {
327
- const errorText = wrapped.content?.[0]?.text || "Unknown error";
328
- throw new AgentMailError(errorText, toolName);
329
- }
670
+ // Check if error is retryable
671
+ if (!isRetryableError(error)) {
672
+ console.warn(
673
+ `[agent-mail] Non-retryable error for ${toolName}: ${lastError.message}`,
674
+ );
675
+ throw lastError;
676
+ }
330
677
 
331
- // Check for success response with structuredContent
332
- if ("structuredContent" in wrapped) {
333
- return wrapped.structuredContent as T;
678
+ // If this was the last retry, throw
679
+ if (attempt === RETRY_CONFIG.maxRetries) {
680
+ console.error(
681
+ `[agent-mail] All ${RETRY_CONFIG.maxRetries} retries exhausted for ${toolName}`,
682
+ );
683
+ throw lastError;
684
+ }
334
685
  }
335
686
  }
336
687
 
337
- // Handle direct response format (mock server)
338
- return result as T;
688
+ // Should never reach here, but TypeScript needs it
689
+ throw lastError || new Error("Unknown error in mcpCall");
339
690
  }
340
691
 
341
692
  /**
@@ -839,4 +1190,10 @@ export {
839
1190
  sessionStates,
840
1191
  AGENT_MAIL_URL,
841
1192
  MAX_INBOX_LIMIT,
1193
+ // Recovery/retry utilities (resetRecoveryState already exported at definition)
1194
+ isServerHealthy,
1195
+ isServerFunctional,
1196
+ restartServer,
1197
+ RETRY_CONFIG,
1198
+ RECOVERY_CONFIG,
842
1199
  };