pilotswarm-sdk 0.1.13 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/facts-tools.d.ts +1 -0
  2. package/dist/facts-tools.d.ts.map +1 -1
  3. package/dist/facts-tools.js +22 -18
  4. package/dist/facts-tools.js.map +1 -1
  5. package/dist/index.d.ts +1 -1
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js.map +1 -1
  8. package/dist/management-client.d.ts +13 -0
  9. package/dist/management-client.d.ts.map +1 -1
  10. package/dist/management-client.js +30 -0
  11. package/dist/management-client.js.map +1 -1
  12. package/dist/orchestration-registry.d.ts +1 -1
  13. package/dist/orchestration-registry.d.ts.map +1 -1
  14. package/dist/orchestration-registry.js +8 -2
  15. package/dist/orchestration-registry.js.map +1 -1
  16. package/dist/orchestration.d.ts +3 -3
  17. package/dist/orchestration.d.ts.map +1 -1
  18. package/dist/orchestration.js +355 -55
  19. package/dist/orchestration.js.map +1 -1
  20. package/dist/orchestration_1_0_33.d.ts +12 -0
  21. package/dist/orchestration_1_0_33.d.ts.map +1 -0
  22. package/dist/orchestration_1_0_33.js +2051 -0
  23. package/dist/orchestration_1_0_33.js.map +1 -0
  24. package/dist/orchestration_1_0_34.d.ts +12 -0
  25. package/dist/orchestration_1_0_34.d.ts.map +1 -0
  26. package/dist/orchestration_1_0_34.js +2177 -0
  27. package/dist/orchestration_1_0_34.js.map +1 -0
  28. package/dist/orchestration_1_0_35.d.ts +12 -0
  29. package/dist/orchestration_1_0_35.d.ts.map +1 -0
  30. package/dist/orchestration_1_0_35.js +2178 -0
  31. package/dist/orchestration_1_0_35.js.map +1 -0
  32. package/dist/session-manager.d.ts +5 -3
  33. package/dist/session-manager.d.ts.map +1 -1
  34. package/dist/session-manager.js +9 -5
  35. package/dist/session-manager.js.map +1 -1
  36. package/dist/session-proxy.d.ts +1 -1
  37. package/dist/session-proxy.d.ts.map +1 -1
  38. package/dist/session-proxy.js +14 -3
  39. package/dist/session-proxy.js.map +1 -1
  40. package/dist/types.d.ts +11 -0
  41. package/dist/types.d.ts.map +1 -1
  42. package/dist/types.js.map +1 -1
  43. package/dist/worker.d.ts.map +1 -1
  44. package/dist/worker.js +22 -1
  45. package/dist/worker.js.map +1 -1
  46. package/package.json +2 -2
@@ -28,6 +28,23 @@ function optionalBoolean(value) {
28
28
  function isSubAgentTerminalStatus(status) {
29
29
  return status === "completed" || status === "failed" || status === "cancelled";
30
30
  }
31
+ const COPILOT_CONNECTION_CLOSED_MAX_RETRIES = 3;
32
+ const COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS = 15;
33
+ function isCopilotConnectionClosedError(message) {
34
+ return /\bConnection is closed\b/i.test(String(message || ""));
35
+ }
36
+ function buildConnectionClosedRetryDetail(retryAttempt) {
37
+ return `Live Copilot connection lost; retry ${retryAttempt}/${COPILOT_CONNECTION_CLOSED_MAX_RETRIES} in ${COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS}s.`;
38
+ }
39
+ function buildLossyHandoffSummary(errorMessage) {
40
+ return `Live Copilot connection stayed closed after ${COPILOT_CONNECTION_CLOSED_MAX_RETRIES} retries; ` +
41
+ `dehydrating for handoff to a new worker. Last error: ${errorMessage}`;
42
+ }
43
+ function buildLossyHandoffRehydrationMessage(errorMessage) {
44
+ return `The previous worker lost the live Copilot connection and handed this session off after ` +
45
+ `${COPILOT_CONNECTION_CLOSED_MAX_RETRIES} retries. The LLM conversation history is preserved. ` +
46
+ `Review the latest durable context and continue carefully. Last transport error: ${errorMessage}`;
47
+ }
31
48
  function updateContextUsageFromEvents(previous, events, observedAt) {
32
49
  let next = cloneContextUsage(previous);
33
50
  if (!Array.isArray(events) || events.length === 0)
@@ -165,18 +182,18 @@ function updateContextUsageFromEvents(previous, events, observedAt) {
165
182
  return next;
166
183
  }
167
184
  /**
168
- * Flat event loop durable session orchestration (v1.0.33).
185
+ * Flat event loop durable session orchestration (v1.0.36).
169
186
  *
170
187
  * Replaces the nested while loops of v1.0.31 with a single
171
188
  * drain → decide → process loop backed by a KV FIFO work buffer.
172
189
  *
173
190
  * @internal
174
191
  */
175
- export const CURRENT_ORCHESTRATION_VERSION = "1.0.33";
176
- export function* durableSessionOrchestration_1_0_33(ctx, input) {
192
+ export const CURRENT_ORCHESTRATION_VERSION = "1.0.36";
193
+ export function* durableSessionOrchestration_1_0_36(ctx, input) {
177
194
  const rawTraceInfo = typeof ctx.traceInfo === "function" ? ctx.traceInfo.bind(ctx) : null;
178
195
  if (rawTraceInfo) {
179
- ctx.traceInfo = (message) => rawTraceInfo(`[v1.0.33] ${message}`);
196
+ ctx.traceInfo = (message) => rawTraceInfo(`[v1.0.36] ${message}`);
180
197
  }
181
198
  const dehydrateThreshold = input.dehydrateThreshold ?? 30;
182
199
  const idleTimeout = input.idleTimeout ?? 30;
@@ -198,6 +215,7 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
198
215
  const MAX_RETRIES = 3;
199
216
  const MAX_SUB_AGENTS = 20;
200
217
  const MAX_NESTING_LEVEL = 2;
218
+ const CHILD_UPDATE_BATCH_MS = 30_000;
201
219
  // ─── Sub-agent tracking ──────────────────────────────────
202
220
  let subAgents = input.subAgents ? [...input.subAgents] : [];
203
221
  let pendingToolActions = input.pendingToolActions ? [...input.pendingToolActions] : [];
@@ -324,6 +342,18 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
324
342
  }
325
343
  return { prompt: rawPrompt };
326
344
  }
345
+ function appendSystemContext(rawPrompt, extraSystemPrompt) {
346
+ if (!extraSystemPrompt)
347
+ return rawPrompt;
348
+ const extracted = extractPromptSystemContext(rawPrompt);
349
+ const mergedSystemPrompt = mergePrompt(extracted.systemPrompt, extraSystemPrompt);
350
+ if (!mergedSystemPrompt)
351
+ return extracted.prompt ?? rawPrompt;
352
+ if (extracted.prompt) {
353
+ return `${extracted.prompt}\n\n[SYSTEM: ${mergedSystemPrompt}]`;
354
+ }
355
+ return `[SYSTEM: ${mergedSystemPrompt}]`;
356
+ }
327
357
  function ensureTaskContext(sourcePrompt) {
328
358
  if (taskContext || !sourcePrompt)
329
359
  return;
@@ -394,6 +424,7 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
394
424
  ...(pendingInputQuestion ? { pendingInputQuestion } : {}),
395
425
  ...(waitingForAgentIds ? { waitingForAgentIds } : {}),
396
426
  ...(interruptedWaitTimer ? { interruptedWaitTimer } : {}),
427
+ ...(pendingChildDigest ? { pendingChildDigest } : {}),
397
428
  ...restOverrides,
398
429
  };
399
430
  }
@@ -469,6 +500,99 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
469
500
  content: promptText.split("\n").slice(1).join("\n").trim(),
470
501
  };
471
502
  }
503
+ function bufferChildUpdate(update, observedAtMs) {
504
+ if (!pendingChildDigest) {
505
+ pendingChildDigest = {
506
+ startedAtMs: observedAtMs,
507
+ updates: [],
508
+ };
509
+ }
510
+ const nextEntry = {
511
+ sessionId: update.sessionId,
512
+ updateType: update.updateType,
513
+ ...(update.content ? { content: update.content.slice(0, 2000) } : {}),
514
+ observedAtMs,
515
+ };
516
+ const existingIndex = pendingChildDigest.updates.findIndex((entry) => entry.sessionId === update.sessionId);
517
+ if (existingIndex >= 0) {
518
+ pendingChildDigest.updates[existingIndex] = nextEntry;
519
+ }
520
+ else {
521
+ pendingChildDigest.updates.push(nextEntry);
522
+ }
523
+ }
524
+ function clearPendingChildDigest() {
525
+ pendingChildDigest = null;
526
+ }
527
+ function buildPendingChildDigestSystemPrompt() {
528
+ if (!pendingChildDigest || pendingChildDigest.updates.length === 0)
529
+ return undefined;
530
+ const lines = pendingChildDigest.updates.map((update) => {
531
+ const agent = subAgents.find((entry) => entry.sessionId === update.sessionId);
532
+ const label = agent?.orchId ?? update.sessionId;
533
+ const task = agent?.task ? `Task: "${agent.task.slice(0, 120)}"\n` : "";
534
+ const status = agent?.status ?? update.updateType;
535
+ const resultText = String(update.content || agent?.result || "").trim();
536
+ const result = resultText ? resultText.slice(0, 240) : "(no summary)";
537
+ return ` - Agent ${label}\n` +
538
+ ` ${task}` +
539
+ ` Update: ${update.updateType}\n` +
540
+ ` Status: ${status}\n` +
541
+ ` Result: ${result}`;
542
+ });
543
+ return `Buffered child updates arrived during the last 30 seconds:\n${lines.join("\n")}\nReview the updates and continue your task.`;
544
+ }
545
+ function flushPendingChildDigestIntoPrompt(rawPrompt) {
546
+ const childDigestPrompt = buildPendingChildDigestSystemPrompt();
547
+ if (!childDigestPrompt)
548
+ return rawPrompt;
549
+ clearPendingChildDigest();
550
+ return appendSystemContext(rawPrompt, childDigestPrompt);
551
+ }
552
+ function* processPendingChildDigest() {
553
+ const digestPrompt = buildPendingChildDigestSystemPrompt();
554
+ if (!digestPrompt) {
555
+ clearPendingChildDigest();
556
+ return;
557
+ }
558
+ if (activeTimer?.type === "wait") {
559
+ const now = yield ctx.utcNow();
560
+ const remainingMs = Math.max(0, activeTimer.deadlineMs - now);
561
+ const remainingSec = Math.round(remainingMs / 1000);
562
+ const elapsedMs = activeTimer.originalDurationMs - remainingMs;
563
+ const elapsedSec = Math.round(elapsedMs / 1000);
564
+ const totalSec = Math.round(activeTimer.originalDurationMs / 1000);
565
+ interruptedWaitTimer = {
566
+ remainingSec,
567
+ reason: activeTimer.reason,
568
+ shouldRehydrate: activeTimer.shouldRehydrate ?? false,
569
+ waitPlan: activeTimer.waitPlan,
570
+ };
571
+ activeTimer = null;
572
+ clearPendingChildDigest();
573
+ yield* processPrompt(`[SYSTEM: Buffered child updates interrupted your ${totalSec}s timer (reason: "${interruptedWaitTimer.reason}"). ` +
574
+ `${elapsedSec}s elapsed, ${remainingSec}s remain. ` +
575
+ `Review the updates and continue your task now. The remaining wait will be resumed automatically after this turn completes.\n\n${digestPrompt}]`, true);
576
+ return;
577
+ }
578
+ if (activeTimer?.type === "cron") {
579
+ const activeCron = cronSchedule;
580
+ activeTimer = null;
581
+ clearPendingChildDigest();
582
+ yield* processPrompt(`[SYSTEM: Buffered child updates arrived while your recurring schedule was waiting for the next wake-up${activeCron ? ` ("${activeCron.reason}")` : ""}. ` +
583
+ `Review the updates and continue your task now. The recurring cron schedule remains active and will be re-armed automatically after this turn completes.\n\n${digestPrompt}]`, true);
584
+ return;
585
+ }
586
+ if (activeTimer?.type === "idle") {
587
+ activeTimer = null;
588
+ }
589
+ else if (activeTimer?.type === "agent-poll") {
590
+ waitingForAgentIds = null;
591
+ activeTimer = null;
592
+ }
593
+ clearPendingChildDigest();
594
+ yield* processPrompt(`[SYSTEM: ${digestPrompt}]`, true);
595
+ }
472
596
  function* applyChildUpdate(update) {
473
597
  ctx.traceInfo(`[orch] child update from=${update.sessionId} type=${update.updateType}`);
474
598
  const agent = subAgents.find(a => a.sessionId === update.sessionId);
@@ -547,9 +671,9 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
547
671
  return `[SYSTEM: Sub-agents completed:\n${summaries.join("\n")}]`;
548
672
  }
549
673
  // ─── Helper: dehydrate and optionally release affinity ───
550
- function* dehydrateForNextTurn(reason, resetAffinity = true) {
674
+ function* dehydrateForNextTurn(reason, resetAffinity = true, eventData) {
551
675
  ctx.traceInfo(`[orch] dehydrating session (reason=${reason}, resetAffinity=${resetAffinity})`);
552
- yield session.dehydrate(reason);
676
+ yield session.dehydrate(reason, eventData);
553
677
  needsHydration = true;
554
678
  preserveAffinityOnHydrate = !resetAffinity;
555
679
  if (resetAffinity) {
@@ -604,6 +728,13 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
604
728
  * After the LLM's response turn completes, the orchestration
605
729
  * automatically re-arms the remaining wait — no LLM action needed. */
606
730
  let interruptedWaitTimer = input.interruptedWaitTimer ?? null;
731
+ let pendingChildDigest = input.pendingChildDigest
732
+ ? {
733
+ startedAtMs: input.pendingChildDigest.startedAtMs,
734
+ ...(input.pendingChildDigest.ready ? { ready: true } : {}),
735
+ updates: [...(input.pendingChildDigest.updates || [])],
736
+ }
737
+ : null;
607
738
  // Reconstruct active timer from CAN input
608
739
  if (input.activeTimerState) {
609
740
  const initNow = yield ctx.utcNow();
@@ -636,6 +767,26 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
636
767
  const MAX_DRAIN_PER_TURN = 50;
637
768
  const MAX_ITERATIONS_PER_EXECUTION = 100;
638
769
  const NON_BLOCKING_TIMER_MS = 10;
770
+ function nextTimerCandidate(now) {
771
+ const candidates = [];
772
+ if (activeTimer) {
773
+ candidates.push({
774
+ kind: "active",
775
+ remainingMs: Math.max(0, activeTimer.deadlineMs - now),
776
+ timer: activeTimer,
777
+ });
778
+ }
779
+ if (pendingChildDigest && !pendingChildDigest.ready && pendingChildDigest.updates.length > 0) {
780
+ candidates.push({
781
+ kind: "child-digest",
782
+ remainingMs: Math.max(0, pendingChildDigest.startedAtMs + CHILD_UPDATE_BATCH_MS - now),
783
+ });
784
+ }
785
+ if (candidates.length === 0)
786
+ return null;
787
+ candidates.sort((left, right) => left.remainingMs - right.remainingMs);
788
+ return candidates[0];
789
+ }
639
790
  function fifoBucketKey(i) { return `fifo.${i}`; }
640
791
  function readFifoBucket(i) {
641
792
  const raw = ctx.getValue(fifoBucketKey(i));
@@ -885,26 +1036,40 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
885
1036
  const seenChildUpdates = new Set();
886
1037
  for (let i = 0; i < MAX_DRAIN_PER_TURN; i++) {
887
1038
  let msg = null;
888
- // ─── Mode 1: Active Timer — race dequeue vs timer ───
889
- if (activeTimer) {
1039
+ // ─── Mode 1: Active Timer / Child Digest — race dequeue vs timer ───
1040
+ if (activeTimer || (pendingChildDigest && !pendingChildDigest.ready)) {
890
1041
  const now = yield ctx.utcNow();
891
- const remainingMs = Math.max(0, activeTimer.deadlineMs - now);
892
- if (remainingMs === 0) {
893
- stash.push({ kind: "timer", timer: { ...activeTimer }, firedAtMs: now });
894
- activeTimer = null;
1042
+ const candidate = nextTimerCandidate(now);
1043
+ if (!candidate)
1044
+ continue;
1045
+ if (candidate.remainingMs === 0) {
1046
+ if (candidate.kind === "active" && candidate.timer) {
1047
+ stash.push({ kind: "timer", timer: { ...candidate.timer }, firedAtMs: now });
1048
+ activeTimer = null;
1049
+ }
1050
+ else if (pendingChildDigest && pendingChildDigest.updates.length > 0) {
1051
+ pendingChildDigest.ready = true;
1052
+ break;
1053
+ }
895
1054
  continue;
896
1055
  }
897
1056
  const msgTask = ctx.dequeueEvent("messages");
898
- const timerTask = ctx.scheduleTimer(remainingMs);
1057
+ const timerTask = ctx.scheduleTimer(candidate.remainingMs);
899
1058
  const race = yield ctx.race(msgTask, timerTask);
900
1059
  if (race.index === 1) {
901
- const firedAt = yield ctx.utcNow();
902
- stash.push({ kind: "timer", timer: { ...activeTimer }, firedAtMs: firedAt });
903
- activeTimer = null;
1060
+ if (candidate.kind === "active" && candidate.timer) {
1061
+ const firedAt = yield ctx.utcNow();
1062
+ stash.push({ kind: "timer", timer: { ...candidate.timer }, firedAtMs: firedAt });
1063
+ activeTimer = null;
1064
+ }
1065
+ else if (pendingChildDigest && pendingChildDigest.updates.length > 0) {
1066
+ pendingChildDigest.ready = true;
1067
+ break;
1068
+ }
904
1069
  continue; // keep draining — pick up queued msgs in mode 3
905
1070
  }
906
1071
  msg = typeof race.value === "string" ? JSON.parse(race.value) : race.value;
907
- // activeTimer stays set — deadline unchanged
1072
+ // activeTimer / pending child digest stay set — deadlines unchanged
908
1073
  // ─── Mode 2: Blocking Dequeue — nothing to process ──
909
1074
  }
910
1075
  else if (needsBlockingDequeue()) {
@@ -940,43 +1105,12 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
940
1105
  // ─── Route: Child updates → apply immediately ───────
941
1106
  const childUpdate = parseChildUpdate(msg.prompt);
942
1107
  if (childUpdate) {
943
- const key = `${childUpdate.sessionId}|${childUpdate.updateType}`;
1108
+ const key = `${childUpdate.sessionId}|${childUpdate.updateType}|${childUpdate.content ?? ""}`;
944
1109
  if (!seenChildUpdates.has(key)) {
945
1110
  seenChildUpdates.add(key);
946
1111
  yield* applyChildUpdate(childUpdate);
947
- // Break active wait timer if >60s remaining
948
- if (activeTimer?.type === "wait") {
949
- const now = yield ctx.utcNow();
950
- const remainingMs = Math.max(0, activeTimer.deadlineMs - now);
951
- const remainingSec = Math.round(remainingMs / 1000);
952
- if (remainingSec > 60) {
953
- const elapsedMs = activeTimer.originalDurationMs - remainingMs;
954
- const elapsedSec = Math.round(elapsedMs / 1000);
955
- ctx.traceInfo(`[drain] child update during wait, ${remainingSec}s remain (>60s) — breaking timer`);
956
- stash.push({
957
- kind: "prompt",
958
- prompt: `The wait was partially completed (${elapsedSec}s elapsed, ${remainingSec}s remain). Resume the wait for the remaining ${remainingSec} seconds.`,
959
- bootstrap: true,
960
- });
961
- activeTimer = null;
962
- }
963
- }
964
- // Break active cron timer if >60s remaining
965
- if (activeTimer?.type === "cron") {
966
- const now = yield ctx.utcNow();
967
- const remainingMs = Math.max(0, activeTimer.deadlineMs - now);
968
- if (Math.round(remainingMs / 1000) > 60) {
969
- ctx.traceInfo(`[drain] child update during cron wait, >60s remain — breaking timer`);
970
- const activeCron = cronSchedule;
971
- stash.push({
972
- kind: "prompt",
973
- prompt: `[SYSTEM: A child update arrived while your recurring schedule was waiting for the next wake-up${activeCron ? ` ("${activeCron.reason}")` : ""}. ` +
974
- `Review the update and continue your task now. The recurring cron schedule remains active and will be re-armed automatically after this turn completes.]`,
975
- bootstrap: true,
976
- });
977
- activeTimer = null;
978
- }
979
- }
1112
+ const childObservedAt = yield ctx.utcNow();
1113
+ bufferChildUpdate(childUpdate, childObservedAt);
980
1114
  // Check if all waited-for agents are now done
981
1115
  if (waitingForAgentIds) {
982
1116
  const allDone = waitingForAgentIds.every(id => {
@@ -989,6 +1123,7 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
989
1123
  // and produces a single LLM turn, matching v1.0.31 behavior.
990
1124
  queueFollowup(buildWaitForAgentsFollowup(waitingForAgentIds));
991
1125
  waitingForAgentIds = null;
1126
+ clearPendingChildDigest();
992
1127
  activeTimer = null;
993
1128
  }
994
1129
  }
@@ -1060,6 +1195,9 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
1060
1195
  waitingForAgentIds = null;
1061
1196
  activeTimer = null;
1062
1197
  }
1198
+ if (pendingChildDigest?.updates.length) {
1199
+ userPrompt = flushPendingChildDigestIntoPrompt(userPrompt);
1200
+ }
1063
1201
  stash.push({
1064
1202
  kind: "prompt",
1065
1203
  prompt: userPrompt,
@@ -1176,6 +1314,79 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
1176
1314
  }
1177
1315
  retryCount++;
1178
1316
  ctx.traceInfo(`[orch] runTurn FAILED (attempt ${retryCount}/${MAX_RETRIES}): ${errorMsg}`);
1317
+ if (isCopilotConnectionClosedError(errorMsg)) {
1318
+ if (retryCount <= COPILOT_CONNECTION_CLOSED_MAX_RETRIES) {
1319
+ const retryDetail = buildConnectionClosedRetryDetail(retryCount);
1320
+ publishStatus("error", {
1321
+ error: `${errorMsg} (${retryDetail})`,
1322
+ recoverableTransportLoss: true,
1323
+ });
1324
+ ctx.traceInfo(`[orch] live Copilot connection lost; retrying in ${COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS}s`);
1325
+ if (blobEnabled) {
1326
+ yield* dehydrateForNextTurn("error", true, {
1327
+ detail: retryDetail,
1328
+ error: errorMsg,
1329
+ phase: "runTurn.throw",
1330
+ retryAttempt: retryCount,
1331
+ maxRetries: COPILOT_CONNECTION_CLOSED_MAX_RETRIES,
1332
+ retryDelaySeconds: COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS,
1333
+ });
1334
+ }
1335
+ yield ctx.scheduleTimer(COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS * 1000);
1336
+ yield* versionedContinueAsNew(continueInput({
1337
+ ...(systemOnlyTurn ? {} : { prompt }),
1338
+ ...(requiredTool ? { requiredTool } : {}),
1339
+ ...(turnSystemPrompt ? { systemPrompt: turnSystemPrompt } : {}),
1340
+ retryCount,
1341
+ needsHydration: blobEnabled ? true : needsHydration,
1342
+ }));
1343
+ return;
1344
+ }
1345
+ const handoffMessage = buildLossyHandoffSummary(errorMsg);
1346
+ ctx.traceInfo(`[orch] ${handoffMessage}`);
1347
+ publishStatus("error", {
1348
+ error: handoffMessage,
1349
+ retriesExhausted: true,
1350
+ lossyHandoff: true,
1351
+ });
1352
+ yield manager.recordSessionEvent(input.sessionId, [{
1353
+ eventType: "session.lossy_handoff",
1354
+ data: {
1355
+ message: handoffMessage,
1356
+ error: errorMsg,
1357
+ phase: "runTurn.throw",
1358
+ retries: COPILOT_CONNECTION_CLOSED_MAX_RETRIES,
1359
+ retryDelaySeconds: COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS,
1360
+ nextStep: "dehydrate_and_resume_on_new_worker",
1361
+ },
1362
+ }]);
1363
+ if (blobEnabled) {
1364
+ yield* dehydrateForNextTurn("lossy_handoff", true, {
1365
+ detail: handoffMessage,
1366
+ error: errorMsg,
1367
+ phase: "runTurn.throw",
1368
+ retries: COPILOT_CONNECTION_CLOSED_MAX_RETRIES,
1369
+ retryDelaySeconds: COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS,
1370
+ nextStep: "dehydrate_and_resume_on_new_worker",
1371
+ });
1372
+ yield* versionedContinueAsNew(continueInput({
1373
+ ...(systemOnlyTurn ? {} : { prompt }),
1374
+ ...(requiredTool ? { requiredTool } : {}),
1375
+ ...(turnSystemPrompt ? { systemPrompt: turnSystemPrompt } : {}),
1376
+ retryCount: 0,
1377
+ needsHydration: true,
1378
+ rehydrationMessage: buildLossyHandoffRehydrationMessage(errorMsg),
1379
+ }));
1380
+ return;
1381
+ }
1382
+ publishStatus("error", {
1383
+ error: `${handoffMessage} Durable handoff is unavailable because blob persistence is disabled.`,
1384
+ retriesExhausted: true,
1385
+ lossyHandoff: false,
1386
+ });
1387
+ retryCount = 0;
1388
+ return;
1389
+ }
1179
1390
  if (retryCount >= MAX_RETRIES) {
1180
1391
  ctx.traceInfo(`[orch] max retries exhausted, waiting for user input`);
1181
1392
  publishStatus("error", {
@@ -1191,7 +1402,14 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
1191
1402
  const retryDelay = 15 * Math.pow(2, retryCount - 1);
1192
1403
  ctx.traceInfo(`[orch] retrying in ${retryDelay}s`);
1193
1404
  if (blobEnabled) {
1194
- yield* dehydrateForNextTurn("error");
1405
+ yield* dehydrateForNextTurn("error", true, {
1406
+ detail: errorMsg,
1407
+ error: errorMsg,
1408
+ phase: "runTurn.throw",
1409
+ retryAttempt: retryCount,
1410
+ maxRetries: MAX_RETRIES,
1411
+ retryDelaySeconds: retryDelay,
1412
+ });
1195
1413
  }
1196
1414
  yield ctx.scheduleTimer(retryDelay * 1000);
1197
1415
  yield* versionedContinueAsNew(continueInput({
@@ -1584,6 +1802,7 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
1584
1802
  `- You are autonomous and goal-driven. If the task implies ongoing monitoring or follow-through until done, keep yourself alive with durable timers until the goal is complete or you can no longer make progress.\n` +
1585
1803
  `- If it is ambiguous whether the task should become a long-running recurring workflow, report that ambiguity back to the parent instead of guessing or asking the user directly.\n` +
1586
1804
  `- When your task is complete, provide a clear summary of your findings/results.\n` +
1805
+ `- Prefer using \`store_fact\` for larger structured context handoffs across your session lineage. Put the durable details in facts, then pass fact keys or \`read_facts\` pointers in messages/prompts instead of pasting large context blobs.\n` +
1587
1806
  `- If you write any files with write_artifact, you MUST also call export_artifact and include the artifact:// link in your response.\n` +
1588
1807
  `- If you override a sub-agent model, you MUST first call list_available_models in this session and use only an exact provider:model value returned there. ` +
1589
1808
  `NEVER invent, guess, shorten, or reuse a stale model name.\n` +
@@ -1828,6 +2047,75 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
1828
2047
  }
1829
2048
  retryCount++;
1830
2049
  ctx.traceInfo(`[orch] turn returned error (attempt ${retryCount}/${MAX_RETRIES}): ${result.message}`);
2050
+ if (isCopilotConnectionClosedError(result.message)) {
2051
+ if (retryCount <= COPILOT_CONNECTION_CLOSED_MAX_RETRIES) {
2052
+ const retryDetail = buildConnectionClosedRetryDetail(retryCount);
2053
+ publishStatus("error", {
2054
+ error: `${result.message} (${retryDetail})`,
2055
+ recoverableTransportLoss: true,
2056
+ });
2057
+ ctx.traceInfo(`[orch] live Copilot connection loss returned as turn error; retrying in ${COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS}s`);
2058
+ if (blobEnabled) {
2059
+ yield* dehydrateForNextTurn("error", true, {
2060
+ detail: retryDetail,
2061
+ error: result.message,
2062
+ phase: "turn.result.error",
2063
+ retryAttempt: retryCount,
2064
+ maxRetries: COPILOT_CONNECTION_CLOSED_MAX_RETRIES,
2065
+ retryDelaySeconds: COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS,
2066
+ });
2067
+ }
2068
+ yield ctx.scheduleTimer(COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS * 1000);
2069
+ yield* versionedContinueAsNew(continueInput({
2070
+ prompt: sourcePrompt,
2071
+ retryCount,
2072
+ needsHydration: blobEnabled ? true : needsHydration,
2073
+ }));
2074
+ return;
2075
+ }
2076
+ const handoffMessage = buildLossyHandoffSummary(result.message);
2077
+ ctx.traceInfo(`[orch] ${handoffMessage}`);
2078
+ publishStatus("error", {
2079
+ error: handoffMessage,
2080
+ retriesExhausted: true,
2081
+ lossyHandoff: true,
2082
+ });
2083
+ yield manager.recordSessionEvent(input.sessionId, [{
2084
+ eventType: "session.lossy_handoff",
2085
+ data: {
2086
+ message: handoffMessage,
2087
+ error: result.message,
2088
+ phase: "turn.result.error",
2089
+ retries: COPILOT_CONNECTION_CLOSED_MAX_RETRIES,
2090
+ retryDelaySeconds: COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS,
2091
+ nextStep: "dehydrate_and_resume_on_new_worker",
2092
+ },
2093
+ }]);
2094
+ if (blobEnabled) {
2095
+ yield* dehydrateForNextTurn("lossy_handoff", true, {
2096
+ detail: handoffMessage,
2097
+ error: result.message,
2098
+ phase: "turn.result.error",
2099
+ retries: COPILOT_CONNECTION_CLOSED_MAX_RETRIES,
2100
+ retryDelaySeconds: COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS,
2101
+ nextStep: "dehydrate_and_resume_on_new_worker",
2102
+ });
2103
+ yield* versionedContinueAsNew(continueInput({
2104
+ prompt: sourcePrompt,
2105
+ retryCount: 0,
2106
+ needsHydration: true,
2107
+ rehydrationMessage: buildLossyHandoffRehydrationMessage(result.message),
2108
+ }));
2109
+ return;
2110
+ }
2111
+ publishStatus("error", {
2112
+ error: `${handoffMessage} Durable handoff is unavailable because blob persistence is disabled.`,
2113
+ retriesExhausted: true,
2114
+ lossyHandoff: false,
2115
+ });
2116
+ retryCount = 0;
2117
+ return;
2118
+ }
1831
2119
  if (retryCount >= MAX_RETRIES) {
1832
2120
  ctx.traceInfo(`[orch] max retries exhausted for turn error, waiting for user input`);
1833
2121
  publishStatus("error", {
@@ -1837,13 +2125,20 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
1837
2125
  retryCount = 0;
1838
2126
  return;
1839
2127
  }
2128
+ const errorRetryDelay = 15 * Math.pow(2, retryCount - 1);
1840
2129
  publishStatus("error", {
1841
- error: `${result.message} (retry ${retryCount}/${MAX_RETRIES})`,
2130
+ error: `${result.message} (retry ${retryCount}/${MAX_RETRIES} in ${errorRetryDelay}s)`,
1842
2131
  });
1843
- const errorRetryDelay = 15 * Math.pow(2, retryCount - 1);
1844
2132
  ctx.traceInfo(`[orch] retrying in ${errorRetryDelay}s after turn error`);
1845
2133
  if (blobEnabled) {
1846
- yield* dehydrateForNextTurn("error");
2134
+ yield* dehydrateForNextTurn("error", true, {
2135
+ detail: result.message,
2136
+ error: result.message,
2137
+ phase: "turn.result.error",
2138
+ retryAttempt: retryCount,
2139
+ maxRetries: MAX_RETRIES,
2140
+ retryDelaySeconds: errorRetryDelay,
2141
+ });
1847
2142
  }
1848
2143
  yield ctx.scheduleTimer(errorRetryDelay * 1000);
1849
2144
  yield* versionedContinueAsNew(continueInput({
@@ -2013,6 +2308,11 @@ export function* durableSessionOrchestration_1_0_33(ctx, input) {
2013
2308
  }
2014
2309
  return true;
2015
2310
  }
2311
+ // Priority 4: buffered child digest — only after user/FIFO work is drained
2312
+ if (pendingChildDigest?.ready && pendingChildDigest.updates.length > 0 && !waitingForAgentIds) {
2313
+ yield* processPendingChildDigest();
2314
+ return true;
2315
+ }
2016
2316
  return false;
2017
2317
  }
2018
2318
  // ═══════════════════════════════════════════════════════════