@shipers-dev/multi 0.9.5 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.js +150 -15
  2. package/package.json +1 -1
  3. package/src/index.ts +153 -12
package/dist/index.js CHANGED
@@ -5703,7 +5703,7 @@ import { join as join3, dirname as dirname2 } from "path";
5703
5703
  // package.json
5704
5704
  var package_default = {
5705
5705
  name: "@shipers-dev/multi",
5706
- version: "0.9.5",
5706
+ version: "0.10.0",
5707
5707
  type: "module",
5708
5708
  bin: {
5709
5709
  "multi-agent": "./dist/index.js"
@@ -6052,6 +6052,9 @@ async function cmdConnect(apiUrl, config) {
6052
6052
  if (t.issue_id) {
6053
6053
  postStream(apiUrl, t.issue_id, "queued", { queue_position: pos });
6054
6054
  }
6055
+ if (t.dispatch_id) {
6056
+ ackDispatch(apiUrl, t.dispatch_id, config.dispatchSecret);
6057
+ }
6055
6058
  queueMicrotask(() => schedule());
6056
6059
  return Response.json({ accepted: true, task_id: taskId }, { status: 202 });
6057
6060
  } catch (e) {
@@ -6144,24 +6147,25 @@ async function cmdConnect(apiUrl, config) {
6144
6147
  try {
6145
6148
  writeFileSync3(PORT_PATH, String(port));
6146
6149
  } catch {}
6147
- const cf = Bun.spawn(["cloudflared", "tunnel", "--no-autoupdate", "--url", `http://127.0.0.1:${port}`], {
6148
- stdout: "pipe",
6149
- stderr: "pipe",
6150
- stdin: "ignore"
6151
- });
6152
- const tunnelUrl = await parseTunnelUrl(cf.stderr);
6153
- if (!tunnelUrl) {
6150
+ let tunnel = await startTunnel(port);
6151
+ if (!tunnel) {
6154
6152
  log("\u274C cloudflared did not emit a tunnel URL \u2014 is `cloudflared` installed? (`brew install cloudflared`)");
6155
- try {
6156
- cf.kill();
6157
- } catch {}
6158
6153
  try {
6159
6154
  server.stop();
6160
6155
  } catch {}
6161
6156
  process.exit(1);
6162
6157
  }
6163
- log(`\u2601\uFE0F Tunnel up: ${tunnelUrl}`);
6164
- await apiClient.post(`${apiUrl}/api/devices/${config.deviceId}/heartbeat`, { status: "online", tunnel_url: tunnelUrl });
6158
+ log(`\u2601\uFE0F Tunnel up: ${tunnel.url}`);
6159
+ const heartbeat = async () => {
6160
+ const res = await apiClient.post(`${apiUrl}/api/devices/${config.deviceId}/heartbeat`, { status: "online", tunnel_url: tunnel?.url });
6161
+ return res.success && res.data?.pending_dispatches || 0;
6162
+ };
6163
+ {
6164
+ const pending = await heartbeat();
6165
+ if (pending > 0)
6166
+ drainOfflineDispatches(apiUrl, config.deviceId, config.dispatchSecret, db, () => schedule());
6167
+ }
6168
+ drainOfflineDispatches(apiUrl, config.deviceId, config.dispatchSecret, db, () => schedule());
6165
6169
  let alive = true;
6166
6170
  const shutdown = async (reason) => {
6167
6171
  if (!alive)
@@ -6172,7 +6176,7 @@ async function cmdConnect(apiUrl, config) {
6172
6176
  server.stop();
6173
6177
  } catch {}
6174
6178
  try {
6175
- cf.kill();
6179
+ tunnel?.child.kill();
6176
6180
  } catch {}
6177
6181
  try {
6178
6182
  await apiClient.post(`${apiUrl}/api/devices/${config.deviceId}/heartbeat`, { status: "offline", tunnel_url: null });
@@ -6190,19 +6194,106 @@ async function cmdConnect(apiUrl, config) {
6190
6194
  process.on("SIGINT", () => shutdown("SIGINT"));
6191
6195
  process.on("SIGTERM", () => shutdown("SIGTERM"));
6192
6196
  schedule();
6197
+ const restartTunnel = async (reason) => {
6198
+ if (!alive)
6199
+ return;
6200
+ log(`\uD83D\uDD01 Restarting tunnel (${reason})`);
6201
+ try {
6202
+ tunnel?.child.kill();
6203
+ } catch {}
6204
+ for (let attempt = 1;alive; attempt++) {
6205
+ const next = await startTunnel(port);
6206
+ if (next) {
6207
+ tunnel = next;
6208
+ log(`\u2601\uFE0F Tunnel up: ${tunnel.url}`);
6209
+ try {
6210
+ const pending = await heartbeat();
6211
+ if (pending > 0)
6212
+ drainOfflineDispatches(apiUrl, config.deviceId, config.dispatchSecret, db, () => schedule());
6213
+ } catch (e) {
6214
+ log(`heartbeat error after tunnel restart: ${String(e)}`);
6215
+ }
6216
+ return;
6217
+ }
6218
+ const wait = Math.min(30000, 2000 * attempt);
6219
+ log(`tunnel restart failed, retry in ${wait}ms`);
6220
+ await sleep(wait);
6221
+ }
6222
+ };
6223
+ (async () => {
6224
+ while (alive) {
6225
+ const t = tunnel;
6226
+ if (!t) {
6227
+ await sleep(1000);
6228
+ continue;
6229
+ }
6230
+ const code = await t.child.exited;
6231
+ if (!alive)
6232
+ return;
6233
+ if (tunnel === t)
6234
+ await restartTunnel(`cloudflared exited code=${code}`);
6235
+ }
6236
+ })();
6237
+ let probeFailures = 0;
6238
+ let tick = 0;
6239
+ const PROBE_EVERY = 6;
6193
6240
  while (alive) {
6194
6241
  await sleep(20000);
6195
6242
  if (existsSync3(STOP_PATH)) {
6196
6243
  await shutdown("stop flag");
6197
6244
  break;
6198
6245
  }
6246
+ tick++;
6247
+ const currentUrl = tunnel?.url;
6248
+ if (currentUrl && tick % PROBE_EVERY === 0) {
6249
+ const ok = await probeTunnel(currentUrl);
6250
+ if (!ok) {
6251
+ probeFailures++;
6252
+ log(`tunnel probe failed (${probeFailures}/2): ${currentUrl}`);
6253
+ if (probeFailures >= 2) {
6254
+ probeFailures = 0;
6255
+ await restartTunnel("probe failed 2x");
6256
+ continue;
6257
+ }
6258
+ } else {
6259
+ probeFailures = 0;
6260
+ }
6261
+ }
6199
6262
  try {
6200
- await apiClient.post(`${apiUrl}/api/devices/${config.deviceId}/heartbeat`, { status: "online", tunnel_url: tunnelUrl });
6263
+ const pending = await heartbeat();
6264
+ if (pending > 0)
6265
+ drainOfflineDispatches(apiUrl, config.deviceId, config.dispatchSecret, db, () => schedule());
6201
6266
  } catch (e) {
6202
6267
  log(`heartbeat error: ${String(e)}`);
6203
6268
  }
6204
6269
  }
6205
6270
  }
6271
+ async function startTunnel(port) {
6272
+ const child = Bun.spawn(["cloudflared", "tunnel", "--no-autoupdate", "--url", `http://127.0.0.1:${port}`], {
6273
+ stdout: "pipe",
6274
+ stderr: "pipe",
6275
+ stdin: "ignore"
6276
+ });
6277
+ const url = await parseTunnelUrl(child.stderr);
6278
+ if (!url) {
6279
+ try {
6280
+ child.kill();
6281
+ } catch {}
6282
+ return null;
6283
+ }
6284
+ return { child, url };
6285
+ }
6286
+ async function probeTunnel(url) {
6287
+ try {
6288
+ const ctrl = new AbortController;
6289
+ const t = setTimeout(() => ctrl.abort(), 8000);
6290
+ const res = await fetch(url, { method: "GET", signal: ctrl.signal });
6291
+ clearTimeout(t);
6292
+ return res.status > 0;
6293
+ } catch {
6294
+ return false;
6295
+ }
6296
+ }
6206
6297
  async function cmdConnectDetached(apiUrl) {
6207
6298
  if (existsSync3(PID_PATH)) {
6208
6299
  const pid = Number(readFileSync3(PID_PATH, "utf8").trim());
@@ -6985,6 +7076,50 @@ async function resolveAcpAdapter(agentType, detectedPath) {
6985
7076
  return [c];
6986
7077
  return null;
6987
7078
  }
7079
+ async function ackDispatch(apiUrl, dispatchId, secret) {
7080
+ try {
7081
+ await fetch(`${apiUrl}/api/issues/dispatches/${dispatchId}/ack`, {
7082
+ method: "POST",
7083
+ headers: { authorization: `Bearer ${secret}` }
7084
+ });
7085
+ } catch (e) {
7086
+ log(`ack dispatch ${dispatchId} failed: ${String(e)}`);
7087
+ }
7088
+ }
7089
+ async function drainOfflineDispatches(apiUrl, deviceId, secret, db, onEnqueued) {
7090
+ let list;
7091
+ try {
7092
+ list = await apiClient.get(`${apiUrl}/api/devices/${deviceId}/dispatches/pending`);
7093
+ } catch (e) {
7094
+ log(`drain list failed: ${String(e)}`);
7095
+ return;
7096
+ }
7097
+ const rows = list?.data?.results || list?.data || list?.results || list || [];
7098
+ if (!Array.isArray(rows) || rows.length === 0)
7099
+ return;
7100
+ log(`\uD83E\uDEA3 draining ${rows.length} offline dispatch(es)`);
7101
+ for (const r of rows) {
7102
+ try {
7103
+ const res = await fetch(`${apiUrl}/api/issues/dispatches/${r.id}/claim`, {
7104
+ method: "POST",
7105
+ headers: { authorization: `Bearer ${secret}` }
7106
+ });
7107
+ if (!res.ok) {
7108
+ log(`claim ${r.id} skipped: ${res.status}`);
7109
+ continue;
7110
+ }
7111
+ const { task } = await res.json();
7112
+ const taskId = task.issue_id ? `${task.issue_id}-${Date.now()}` : crypto.randomUUID();
7113
+ db.run("INSERT INTO tasks (id, status, payload, agent_id, issue_id) VALUES (?, 'queued', ?, ?, ?)", [taskId, JSON.stringify(task), task.agent_id ?? null, task.issue_id ?? null]);
7114
+ if (task.issue_id)
7115
+ postStream(apiUrl, task.issue_id, "queued", { drained: true });
7116
+ ackDispatch(apiUrl, r.id, secret);
7117
+ } catch (e) {
7118
+ log(`drain ${r.id} failed: ${String(e)}`);
7119
+ }
7120
+ }
7121
+ onEnqueued();
7122
+ }
6988
7123
  async function postStream(apiUrl, issueId, event_type, payload) {
6989
7124
  try {
6990
7125
  ensureDirs();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@shipers-dev/multi",
3
- "version": "0.9.5",
3
+ "version": "0.10.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "multi-agent": "./dist/index.js"
package/src/index.ts CHANGED
@@ -355,6 +355,11 @@ async function cmdConnect(apiUrl: string, config: Config) {
355
355
  if (t.issue_id) {
356
356
  void postStream(apiUrl, t.issue_id, 'queued', { queue_position: pos });
357
357
  }
358
+ // Ack dispatch back to worker so UI flips from queued → acked. Fire-and-forget:
359
+ // missing/failed ack is fine, stall-sweep covers it.
360
+ if (t.dispatch_id) {
361
+ void ackDispatch(apiUrl, t.dispatch_id, config.dispatchSecret!);
362
+ }
358
363
  queueMicrotask(() => schedule());
359
364
  return Response.json({ accepted: true, task_id: taskId }, { status: 202 });
360
365
  } catch (e) {
@@ -432,20 +437,26 @@ async function cmdConnect(apiUrl: string, config: Config) {
432
437
  log(`🌐 Local server: http://127.0.0.1:${port}`);
433
438
  try { writeFileSync(PORT_PATH, String(port)); } catch {}
434
439
 
435
- // Spawn cloudflared quick tunnel
436
- const cf = Bun.spawn(['cloudflared', 'tunnel', '--no-autoupdate', '--url', `http://127.0.0.1:${port}`], {
437
- stdout: 'pipe', stderr: 'pipe', stdin: 'ignore',
438
- });
439
- const tunnelUrl = await parseTunnelUrl(cf.stderr as ReadableStream<Uint8Array>);
440
- if (!tunnelUrl) {
440
+ // Spawn cloudflared quick tunnel (with self-healing on death)
441
+ let tunnel = await startTunnel(port);
442
+ if (!tunnel) {
441
443
  log('❌ cloudflared did not emit a tunnel URL — is `cloudflared` installed? (`brew install cloudflared`)');
442
- try { cf.kill(); } catch {}
443
444
  try { server.stop(); } catch {}
444
445
  process.exit(1);
445
446
  }
446
- log(`☁️ Tunnel up: ${tunnelUrl}`);
447
+ log(`☁️ Tunnel up: ${tunnel.url}`);
447
448
 
448
- await apiClient.post(`${apiUrl}/api/devices/${config.deviceId}/heartbeat`, { status: 'online', tunnel_url: tunnelUrl });
449
+ const heartbeat = async (): Promise<number> => {
450
+ const res = await apiClient.post<{ pending_dispatches?: number }>(`${apiUrl}/api/devices/${config.deviceId}/heartbeat`, { status: 'online', tunnel_url: tunnel?.url });
451
+ return (res.success && res.data?.pending_dispatches) || 0;
452
+ };
453
+ {
454
+ const pending = await heartbeat();
455
+ if (pending > 0) void drainOfflineDispatches(apiUrl, config.deviceId!, config.dispatchSecret!, db, () => schedule());
456
+ }
457
+ // Always attempt one drain on fresh startup, covering the case where worker
458
+ // couldn't reach the previous daemon instance right before restart.
459
+ void drainOfflineDispatches(apiUrl, config.deviceId!, config.dispatchSecret!, db, () => schedule());
449
460
 
450
461
  let alive = true;
451
462
 
@@ -454,7 +465,7 @@ async function cmdConnect(apiUrl: string, config: Config) {
454
465
  alive = false;
455
466
  log(`🛑 Shutting down (${reason})`);
456
467
  try { server.stop(); } catch {}
457
- try { cf.kill(); } catch {}
468
+ try { tunnel?.child.kill(); } catch {}
458
469
  try { await apiClient.post(`${apiUrl}/api/devices/${config.deviceId}/heartbeat`, { status: 'offline', tunnel_url: null }); } catch {}
459
470
  if (existsSync(PID_PATH)) unlinkSync(PID_PATH);
460
471
  if (existsSync(STOP_PATH)) unlinkSync(STOP_PATH);
@@ -469,18 +480,99 @@ async function cmdConnect(apiUrl: string, config: Config) {
469
480
  // Kick the scheduler on startup to drain any leftover queued rows.
470
481
  schedule();
471
482
 
472
- // Heartbeat loop
483
+ // Tunnel self-heal: relaunch cloudflared if child exits or DNS stops resolving.
484
+ const restartTunnel = async (reason: string) => {
485
+ if (!alive) return;
486
+ log(`🔁 Restarting tunnel (${reason})`);
487
+ try { tunnel?.child.kill(); } catch {}
488
+ // Small backoff so we don't spam cloudflared edge under prolonged outage.
489
+ for (let attempt = 1; alive; attempt++) {
490
+ const next = await startTunnel(port);
491
+ if (next) {
492
+ tunnel = next;
493
+ log(`☁️ Tunnel up: ${tunnel.url}`);
494
+ try {
495
+ const pending = await heartbeat();
496
+ if (pending > 0) void drainOfflineDispatches(apiUrl, config.deviceId!, config.dispatchSecret!, db, () => schedule());
497
+ } catch (e) {
498
+ log(`heartbeat error after tunnel restart: ${String(e)}`);
499
+ }
500
+ return;
501
+ }
502
+ const wait = Math.min(30000, 2000 * attempt);
503
+ log(`tunnel restart failed, retry in ${wait}ms`);
504
+ await sleep(wait);
505
+ }
506
+ };
507
+
508
+ // Watch for cloudflared process exit.
509
+ (async () => {
510
+ while (alive) {
511
+ const t = tunnel;
512
+ if (!t) { await sleep(1000); continue; }
513
+ const code = await t.child.exited;
514
+ if (!alive) return;
515
+ if (tunnel === t) await restartTunnel(`cloudflared exited code=${code}`);
516
+ }
517
+ })();
518
+
519
+ // Heartbeat (20s) + tunnel liveness probe (every 6 ticks = ~2min).
520
+ let probeFailures = 0;
521
+ let tick = 0;
522
+ const PROBE_EVERY = 6;
473
523
  while (alive) {
474
524
  await sleep(20000);
475
525
  if (existsSync(STOP_PATH)) { await shutdown('stop flag'); break; }
526
+ tick++;
527
+ const currentUrl = tunnel?.url;
528
+ if (currentUrl && tick % PROBE_EVERY === 0) {
529
+ const ok = await probeTunnel(currentUrl);
530
+ if (!ok) {
531
+ probeFailures++;
532
+ log(`tunnel probe failed (${probeFailures}/2): ${currentUrl}`);
533
+ if (probeFailures >= 2) {
534
+ probeFailures = 0;
535
+ await restartTunnel('probe failed 2x');
536
+ continue;
537
+ }
538
+ } else {
539
+ probeFailures = 0;
540
+ }
541
+ }
476
542
  try {
477
- await apiClient.post(`${apiUrl}/api/devices/${config.deviceId}/heartbeat`, { status: 'online', tunnel_url: tunnelUrl });
543
+ const pending = await heartbeat();
544
+ if (pending > 0) void drainOfflineDispatches(apiUrl, config.deviceId!, config.dispatchSecret!, db, () => schedule());
478
545
  } catch (e) {
479
546
  log(`heartbeat error: ${String(e)}`);
480
547
  }
481
548
  }
482
549
  }
483
550
 
551
+ async function startTunnel(port: number): Promise<{ child: ReturnType<typeof Bun.spawn>; url: string } | null> {
552
+ const child = Bun.spawn(['cloudflared', 'tunnel', '--no-autoupdate', '--url', `http://127.0.0.1:${port}`], {
553
+ stdout: 'pipe', stderr: 'pipe', stdin: 'ignore',
554
+ });
555
+ const url = await parseTunnelUrl(child.stderr as ReadableStream<Uint8Array>);
556
+ if (!url) {
557
+ try { child.kill(); } catch {}
558
+ return null;
559
+ }
560
+ return { child, url };
561
+ }
562
+
563
+ async function probeTunnel(url: string): Promise<boolean> {
564
+ try {
565
+ const ctrl = new AbortController();
566
+ const t = setTimeout(() => ctrl.abort(), 8000);
567
+ // Any HTTP response — even 404 — proves the tunnel edge is routing.
568
+ const res = await fetch(url, { method: 'GET', signal: ctrl.signal });
569
+ clearTimeout(t);
570
+ return res.status > 0;
571
+ } catch {
572
+ return false;
573
+ }
574
+ }
575
+
484
576
  async function cmdConnectDetached(apiUrl: string) {
485
577
  if (existsSync(PID_PATH)) {
486
578
  const pid = Number(readFileSync(PID_PATH, 'utf8').trim());
@@ -1172,6 +1264,55 @@ async function resolveAcpAdapter(agentType: string, detectedPath?: string): Prom
1172
1264
  return null;
1173
1265
  }
1174
1266
 
1267
+ // Ack a dispatch so the worker flips its status from dispatched → acked.
1268
+ // Uses dispatch_secret (Bearer) so the daemon can call an unauthenticated-by-user endpoint.
1269
+ async function ackDispatch(apiUrl: string, dispatchId: string, secret: string) {
1270
+ try {
1271
+ await fetch(`${apiUrl}/api/issues/dispatches/${dispatchId}/ack`, {
1272
+ method: 'POST',
1273
+ headers: { 'authorization': `Bearer ${secret}` },
1274
+ });
1275
+ } catch (e) {
1276
+ log(`ack dispatch ${dispatchId} failed: ${String(e)}`);
1277
+ }
1278
+ }
1279
+
1280
+ // On reconnect, pull dispatches the worker couldn't deliver and enqueue them.
1281
+ // Each call claims the row (offline → dispatched) atomically so parallel daemons
1282
+ // don't double-run the same task.
1283
+ async function drainOfflineDispatches(apiUrl: string, deviceId: string, secret: string, db: Database, onEnqueued: () => void) {
1284
+ let list: any;
1285
+ try {
1286
+ list = await apiClient.get<any>(`${apiUrl}/api/devices/${deviceId}/dispatches/pending`);
1287
+ } catch (e) {
1288
+ log(`drain list failed: ${String(e)}`);
1289
+ return;
1290
+ }
1291
+ const rows = (list?.data?.results || list?.data || list?.results || list || []) as any[];
1292
+ if (!Array.isArray(rows) || rows.length === 0) return;
1293
+ log(`🪣 draining ${rows.length} offline dispatch(es)`);
1294
+ for (const r of rows) {
1295
+ try {
1296
+ const res = await fetch(`${apiUrl}/api/issues/dispatches/${r.id}/claim`, {
1297
+ method: 'POST',
1298
+ headers: { 'authorization': `Bearer ${secret}` },
1299
+ });
1300
+ if (!res.ok) { log(`claim ${r.id} skipped: ${res.status}`); continue; }
1301
+ const { task } = await res.json() as { task: any };
1302
+ const taskId = task.issue_id ? `${task.issue_id}-${Date.now()}` : crypto.randomUUID();
1303
+ db.run(
1304
+ "INSERT INTO tasks (id, status, payload, agent_id, issue_id) VALUES (?, 'queued', ?, ?, ?)",
1305
+ [taskId, JSON.stringify(task), task.agent_id ?? null, task.issue_id ?? null],
1306
+ );
1307
+ if (task.issue_id) void postStream(apiUrl, task.issue_id, 'queued', { drained: true });
1308
+ void ackDispatch(apiUrl, r.id, secret);
1309
+ } catch (e) {
1310
+ log(`drain ${r.id} failed: ${String(e)}`);
1311
+ }
1312
+ }
1313
+ onEnqueued();
1314
+ }
1315
+
1175
1316
  async function postStream(apiUrl: string, issueId: string, event_type: string, payload: any) {
1176
1317
  // Local ndjson sink for tail -f debugging.
1177
1318
  try {