pilotswarm-sdk 0.1.19 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/README.md +6 -0
  2. package/dist/artifact-tools.d.ts.map +1 -1
  3. package/dist/artifact-tools.js +20 -5
  4. package/dist/artifact-tools.js.map +1 -1
  5. package/dist/blob-store.d.ts +6 -4
  6. package/dist/blob-store.d.ts.map +1 -1
  7. package/dist/blob-store.js +55 -12
  8. package/dist/blob-store.js.map +1 -1
  9. package/dist/client.d.ts +4 -1
  10. package/dist/client.d.ts.map +1 -1
  11. package/dist/client.js +4 -0
  12. package/dist/client.js.map +1 -1
  13. package/dist/cms-migrations.d.ts.map +1 -1
  14. package/dist/cms-migrations.js +628 -0
  15. package/dist/cms-migrations.js.map +1 -1
  16. package/dist/cms.d.ts +145 -0
  17. package/dist/cms.d.ts.map +1 -1
  18. package/dist/cms.js +288 -17
  19. package/dist/cms.js.map +1 -1
  20. package/dist/facts-migrations.d.ts.map +1 -1
  21. package/dist/facts-migrations.js +227 -0
  22. package/dist/facts-migrations.js.map +1 -1
  23. package/dist/facts-store.d.ts +21 -0
  24. package/dist/facts-store.d.ts.map +1 -1
  25. package/dist/facts-store.js +34 -1
  26. package/dist/facts-store.js.map +1 -1
  27. package/dist/facts-tools.d.ts +7 -0
  28. package/dist/facts-tools.d.ts.map +1 -1
  29. package/dist/facts-tools.js +29 -2
  30. package/dist/facts-tools.js.map +1 -1
  31. package/dist/index.d.ts +6 -5
  32. package/dist/index.d.ts.map +1 -1
  33. package/dist/index.js +3 -1
  34. package/dist/index.js.map +1 -1
  35. package/dist/inspect-tools.d.ts +42 -0
  36. package/dist/inspect-tools.d.ts.map +1 -0
  37. package/dist/inspect-tools.js +800 -0
  38. package/dist/inspect-tools.js.map +1 -0
  39. package/dist/managed-session.d.ts.map +1 -1
  40. package/dist/managed-session.js +76 -35
  41. package/dist/managed-session.js.map +1 -1
  42. package/dist/management-client.d.ts +64 -2
  43. package/dist/management-client.d.ts.map +1 -1
  44. package/dist/management-client.js +109 -0
  45. package/dist/management-client.js.map +1 -1
  46. package/dist/orchestration-registry.d.ts.map +1 -1
  47. package/dist/orchestration-registry.js +6 -2
  48. package/dist/orchestration-registry.js.map +1 -1
  49. package/dist/orchestration-version.d.ts +1 -1
  50. package/dist/orchestration-version.js +1 -1
  51. package/dist/orchestration.d.ts +3 -3
  52. package/dist/orchestration.d.ts.map +1 -1
  53. package/dist/orchestration.js +27 -4
  54. package/dist/orchestration.js.map +1 -1
  55. package/dist/orchestration_1_0_43.d.ts +12 -0
  56. package/dist/orchestration_1_0_43.d.ts.map +1 -0
  57. package/dist/orchestration_1_0_43.js +2710 -0
  58. package/dist/orchestration_1_0_43.js.map +1 -0
  59. package/dist/orchestration_1_0_44.d.ts +12 -0
  60. package/dist/orchestration_1_0_44.d.ts.map +1 -0
  61. package/dist/orchestration_1_0_44.js +2710 -0
  62. package/dist/orchestration_1_0_44.js.map +1 -0
  63. package/dist/session-manager.d.ts +9 -0
  64. package/dist/session-manager.d.ts.map +1 -1
  65. package/dist/session-manager.js +40 -3
  66. package/dist/session-manager.js.map +1 -1
  67. package/dist/session-owner-utils.d.ts +25 -0
  68. package/dist/session-owner-utils.d.ts.map +1 -0
  69. package/dist/session-owner-utils.js +82 -0
  70. package/dist/session-owner-utils.js.map +1 -0
  71. package/dist/session-proxy.d.ts +5 -1
  72. package/dist/session-proxy.d.ts.map +1 -1
  73. package/dist/session-proxy.js +70 -8
  74. package/dist/session-proxy.js.map +1 -1
  75. package/dist/session-store.d.ts +38 -6
  76. package/dist/session-store.d.ts.map +1 -1
  77. package/dist/session-store.js +187 -9
  78. package/dist/session-store.js.map +1 -1
  79. package/dist/types.d.ts +19 -1
  80. package/dist/types.d.ts.map +1 -1
  81. package/dist/types.js.map +1 -1
  82. package/dist/worker.d.ts.map +1 -1
  83. package/dist/worker.js +11 -2
  84. package/dist/worker.js.map +1 -1
  85. package/package.json +10 -4
  86. package/plugins/mgmt/agents/agent-tuner.agent.md +222 -0
  87. package/plugins/mgmt/agents/facts-manager.agent.md +8 -1
  88. package/plugins/mgmt/agents/pilotswarm.agent.md +13 -10
  89. package/plugins/mgmt/agents/resourcemgr.agent.md +11 -4
  90. package/plugins/mgmt/agents/sweeper.agent.md +5 -4
  91. package/plugins/mgmt/skills/cost-latency-analysis/SKILL.md +117 -0
  92. package/plugins/mgmt/skills/orchestration-session-lifecycle/SKILL.md +117 -0
  93. package/plugins/mgmt/skills/resourcemgr/SKILL.md +1 -1
  94. package/plugins/mgmt/skills/sweeper/SKILL.md +4 -4
  95. package/plugins/system/agents/default.agent.md +22 -0
@@ -0,0 +1,2710 @@
1
+ import { RESPONSE_VERSION_KEY, COMMAND_VERSION_KEY, RESPONSE_LATEST_KEY, commandResponseKey, } from "./types.js";
2
+ import { SESSION_STATE_MISSING_PREFIX, } from "./types.js";
3
+ import { createSessionProxy, createSessionManagerProxy } from "./session-proxy.js";
4
+ import { DURABLE_SESSION_LATEST_VERSION } from "./orchestration-version.js";
5
+ import { planWaitHandling } from "./wait-affinity.js";
6
+ /**
7
+ * Set custom status as a JSON blob of session state.
8
+ * Clients read this via waitForStatusChange() or getStatus().
9
+ * @internal
10
+ */
11
+ function setStatus(ctx, status, extra) {
12
+ const signal = { status, ...(extra ?? {}) };
13
+ ctx.setCustomStatus(JSON.stringify(signal));
14
+ }
15
+ function cloneContextUsage(contextUsage) {
16
+ if (!contextUsage)
17
+ return undefined;
18
+ return {
19
+ ...contextUsage,
20
+ ...(contextUsage.compaction ? { compaction: { ...contextUsage.compaction } } : {}),
21
+ };
22
+ }
23
+ function finiteNumber(value) {
24
+ return typeof value === "number" && Number.isFinite(value) ? value : undefined;
25
+ }
26
+ function optionalBoolean(value) {
27
+ return typeof value === "boolean" ? value : undefined;
28
+ }
29
+ function isSubAgentTerminalStatus(status) {
30
+ return status === "completed" || status === "failed" || status === "cancelled";
31
+ }
32
+ const COPILOT_CONNECTION_CLOSED_MAX_RETRIES = 3;
33
+ const COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS = 15;
34
+ function isCopilotConnectionClosedError(message) {
35
+ return /\bConnection is closed\b/i.test(String(message || ""));
36
+ }
37
+ function buildConnectionClosedRetryDetail(retryAttempt) {
38
+ return `Live Copilot connection lost; retry ${retryAttempt}/${COPILOT_CONNECTION_CLOSED_MAX_RETRIES} in ${COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS}s.`;
39
+ }
40
+ function buildLossyHandoffSummary(errorMessage) {
41
+ return `Live Copilot connection stayed closed after ${COPILOT_CONNECTION_CLOSED_MAX_RETRIES} retries; ` +
42
+ `dehydrating for handoff to a new worker. Last error: ${errorMessage}`;
43
+ }
44
+ function buildLossyHandoffRehydrationMessage(errorMessage) {
45
+ return `The previous worker lost the live Copilot connection and handed this session off after ` +
46
+ `${COPILOT_CONNECTION_CLOSED_MAX_RETRIES} retries. The LLM conversation history is preserved. ` +
47
+ `Review the latest durable context and continue carefully. Last transport error: ${errorMessage}`;
48
+ }
49
+ function updateContextUsageFromEvents(previous, events, observedAt) {
50
+ let next = cloneContextUsage(previous);
51
+ if (!Array.isArray(events) || events.length === 0)
52
+ return next;
53
+ for (const event of events) {
54
+ if (!event || typeof event !== "object")
55
+ continue;
56
+ const eventType = event.eventType;
57
+ const data = event.data;
58
+ if (!eventType || !data || typeof data !== "object")
59
+ continue;
60
+ if (eventType === "session.usage_info") {
61
+ const tokenLimit = finiteNumber(data.tokenLimit);
62
+ const currentTokens = finiteNumber(data.currentTokens);
63
+ const messagesLength = finiteNumber(data.messagesLength);
64
+ if (tokenLimit == null || currentTokens == null || messagesLength == null)
65
+ continue;
66
+ next = {
67
+ ...(next ?? {}),
68
+ tokenLimit,
69
+ currentTokens,
70
+ utilization: tokenLimit > 0 ? currentTokens / tokenLimit : 0,
71
+ messagesLength,
72
+ updatedAt: observedAt,
73
+ };
74
+ const systemTokens = finiteNumber(data.systemTokens);
75
+ if (systemTokens != null)
76
+ next.systemTokens = systemTokens;
77
+ const conversationTokens = finiteNumber(data.conversationTokens);
78
+ if (conversationTokens != null)
79
+ next.conversationTokens = conversationTokens;
80
+ const toolDefinitionsTokens = finiteNumber(data.toolDefinitionsTokens);
81
+ if (toolDefinitionsTokens != null)
82
+ next.toolDefinitionsTokens = toolDefinitionsTokens;
83
+ const isInitial = optionalBoolean(data.isInitial);
84
+ if (isInitial != null)
85
+ next.isInitial = isInitial;
86
+ continue;
87
+ }
88
+ if (!next)
89
+ continue;
90
+ if (eventType === "assistant.usage") {
91
+ const inputTokens = finiteNumber(data.inputTokens);
92
+ if (inputTokens != null)
93
+ next.lastInputTokens = inputTokens;
94
+ const outputTokens = finiteNumber(data.outputTokens);
95
+ if (outputTokens != null)
96
+ next.lastOutputTokens = outputTokens;
97
+ const cacheReadTokens = finiteNumber(data.cacheReadTokens);
98
+ if (cacheReadTokens != null)
99
+ next.lastCacheReadTokens = cacheReadTokens;
100
+ const cacheWriteTokens = finiteNumber(data.cacheWriteTokens);
101
+ if (cacheWriteTokens != null)
102
+ next.lastCacheWriteTokens = cacheWriteTokens;
103
+ next.updatedAt = observedAt;
104
+ continue;
105
+ }
106
+ if (eventType === "session.compaction_start") {
107
+ const compaction = {
108
+ ...(next.compaction ?? { state: "idle" }),
109
+ state: "running",
110
+ startedAt: observedAt,
111
+ completedAt: undefined,
112
+ error: undefined,
113
+ };
114
+ next.compaction = compaction;
115
+ next.updatedAt = observedAt;
116
+ continue;
117
+ }
118
+ if (eventType === "session.compaction_complete") {
119
+ const compaction = {
120
+ ...(next.compaction ?? { state: "idle" }),
121
+ state: data.success === false ? "failed" : "succeeded",
122
+ completedAt: observedAt,
123
+ };
124
+ if (typeof data.error === "string" && data.error)
125
+ compaction.error = data.error;
126
+ else
127
+ delete compaction.error;
128
+ const preCompactionTokens = finiteNumber(data.preCompactionTokens);
129
+ if (preCompactionTokens != null)
130
+ compaction.preCompactionTokens = preCompactionTokens;
131
+ const postCompactionTokens = finiteNumber(data.postCompactionTokens);
132
+ if (postCompactionTokens != null)
133
+ compaction.postCompactionTokens = postCompactionTokens;
134
+ const preCompactionMessagesLength = finiteNumber(data.preCompactionMessagesLength);
135
+ if (preCompactionMessagesLength != null)
136
+ compaction.preCompactionMessagesLength = preCompactionMessagesLength;
137
+ const messagesRemoved = finiteNumber(data.messagesRemoved);
138
+ if (messagesRemoved != null)
139
+ compaction.messagesRemoved = messagesRemoved;
140
+ const tokensRemoved = finiteNumber(data.tokensRemoved);
141
+ if (tokensRemoved != null)
142
+ compaction.tokensRemoved = tokensRemoved;
143
+ const systemTokens = finiteNumber(data.systemTokens);
144
+ if (systemTokens != null)
145
+ compaction.systemTokens = systemTokens;
146
+ const conversationTokens = finiteNumber(data.conversationTokens);
147
+ if (conversationTokens != null)
148
+ compaction.conversationTokens = conversationTokens;
149
+ const toolDefinitionsTokens = finiteNumber(data.toolDefinitionsTokens);
150
+ if (toolDefinitionsTokens != null)
151
+ compaction.toolDefinitionsTokens = toolDefinitionsTokens;
152
+ const compactionTokensUsed = data.compactionTokensUsed && typeof data.compactionTokensUsed === "object"
153
+ ? data.compactionTokensUsed
154
+ : null;
155
+ if (compactionTokensUsed) {
156
+ const compactionInputTokens = finiteNumber(compactionTokensUsed.input);
157
+ if (compactionInputTokens != null)
158
+ compaction.inputTokens = compactionInputTokens;
159
+ const compactionOutputTokens = finiteNumber(compactionTokensUsed.output);
160
+ if (compactionOutputTokens != null)
161
+ compaction.outputTokens = compactionOutputTokens;
162
+ const compactionCachedInputTokens = finiteNumber(compactionTokensUsed.cachedInput);
163
+ if (compactionCachedInputTokens != null)
164
+ compaction.cachedInputTokens = compactionCachedInputTokens;
165
+ }
166
+ if (postCompactionTokens != null) {
167
+ next.currentTokens = postCompactionTokens;
168
+ next.utilization = next.tokenLimit > 0 ? postCompactionTokens / next.tokenLimit : 0;
169
+ }
170
+ if (preCompactionMessagesLength != null && messagesRemoved != null) {
171
+ next.messagesLength = Math.max(0, preCompactionMessagesLength - messagesRemoved);
172
+ }
173
+ if (systemTokens != null)
174
+ next.systemTokens = systemTokens;
175
+ if (conversationTokens != null)
176
+ next.conversationTokens = conversationTokens;
177
+ if (toolDefinitionsTokens != null)
178
+ next.toolDefinitionsTokens = toolDefinitionsTokens;
179
+ next.compaction = compaction;
180
+ next.updatedAt = observedAt;
181
+ }
182
+ }
183
+ return next;
184
+ }
185
+ /**
186
+ * Flat event loop durable session orchestration (v1.0.44).
187
+ *
188
+ * Replaces the nested while loops of v1.0.31 with a single
189
+ * drain → decide → process loop backed by a KV FIFO work buffer.
190
+ *
191
+ * @internal
192
+ */
193
+ export const CURRENT_ORCHESTRATION_VERSION = "1.0.44";
194
+ export function* durableSessionOrchestration_1_0_44(ctx, input) {
195
+ const sourceOrchestrationVersion = typeof input.sourceOrchestrationVersion === "string" && input.sourceOrchestrationVersion
196
+ ? input.sourceOrchestrationVersion
197
+ : CURRENT_ORCHESTRATION_VERSION;
198
+ const rawTraceInfo = typeof ctx.traceInfo === "function" ? ctx.traceInfo.bind(ctx) : null;
199
+ if (rawTraceInfo) {
200
+ const versionPrefix = sourceOrchestrationVersion === CURRENT_ORCHESTRATION_VERSION
201
+ ? `[v${CURRENT_ORCHESTRATION_VERSION}]`
202
+ : `[v${CURRENT_ORCHESTRATION_VERSION} from=${sourceOrchestrationVersion}]`;
203
+ ctx.traceInfo = (message) => rawTraceInfo(`${versionPrefix} ${message}`);
204
+ }
205
+ const dehydrateThreshold = input.dehydrateThreshold ?? 29;
206
+ const idleTimeout = input.idleTimeout ?? 60;
207
+ const inputGracePeriod = input.inputGracePeriod ?? 30;
208
+ const checkpointInterval = input.checkpointInterval ?? -1;
209
+ let pendingRehydrationMessage = input.rehydrationMessage;
210
+ const blobEnabled = input.blobEnabled ?? false;
211
+ let needsHydration = input.needsHydration ?? false;
212
+ let affinityKey = input.affinityKey ?? input.sessionId;
213
+ let preserveAffinityOnHydrate = input.preserveAffinityOnHydrate ?? false;
214
+ let iteration = input.iteration ?? 0;
215
+ let config = { ...input.config };
216
+ let retryCount = input.retryCount ?? 0;
217
+ let taskContext = input.taskContext;
218
+ const baseSystemMessage = input.baseSystemMessage ?? config.systemMessage;
219
+ const isSystem = input.isSystem ?? false;
220
+ let cronSchedule = input.cronSchedule ? { ...input.cronSchedule } : undefined;
221
+ let contextUsage = cloneContextUsage(input.contextUsage);
222
+ const MAX_RETRIES = 3;
223
+ const MAX_SUB_AGENTS = 20;
224
+ const MAX_NESTING_LEVEL = 2;
225
+ const CHILD_UPDATE_BATCH_MS = 30_000;
226
+ const SHUTDOWN_TIMEOUT_MS = 60_000;
227
+ const SHUTDOWN_POLL_INTERVAL_MS = 5_000;
228
+ // ─── Sub-agent tracking ──────────────────────────────────
229
+ let subAgents = input.subAgents ? [...input.subAgents] : [];
230
+ let pendingToolActions = input.pendingToolActions ? [...input.pendingToolActions] : [];
231
+ const parentSessionId = input.parentSessionId
232
+ ?? (input.parentOrchId ? input.parentOrchId.replace(/^session-/, '') : undefined);
233
+ const nestingLevel = input.nestingLevel ?? 0;
234
+ if (taskContext) {
235
+ const base = typeof baseSystemMessage === 'string'
236
+ ? baseSystemMessage ?? ''
237
+ : baseSystemMessage?.content ?? '';
238
+ config.systemMessage = base + (base ? '\n\n' : '') +
239
+ '[RECURRING TASK]\n' +
240
+ 'Original user request (always remember, even if conversation history is truncated):\n"' +
241
+ taskContext + '"';
242
+ }
243
+ // ─── Title summarization timer ───────────────────────────
244
+ let nextSummarizeAt = input.nextSummarizeAt ?? 0;
245
+ // ─── Create proxies ──────────────────────────────────────
246
+ const manager = createSessionManagerProxy(ctx);
247
+ let session = createSessionProxy(ctx, input.sessionId, affinityKey, config);
248
+ function writeJsonValue(key, value) {
249
+ ctx.setValue(key, JSON.stringify(value));
250
+ }
251
+ function readCounter(key) {
252
+ const raw = ctx.getValue(key);
253
+ if (raw == null)
254
+ return 0;
255
+ const parsed = Number(raw);
256
+ return Number.isFinite(parsed) ? parsed : 0;
257
+ }
258
+ function bumpCounter(key) {
259
+ const next = readCounter(key) + 1;
260
+ ctx.setValue(key, String(next));
261
+ return next;
262
+ }
263
+ let lastResponseVersion = readCounter(RESPONSE_VERSION_KEY);
264
+ let lastCommandVersion = readCounter(COMMAND_VERSION_KEY);
265
+ let lastCommandId;
266
+ function publishStatus(status, extra = {}) {
267
+ const signal = {
268
+ iteration,
269
+ ...(lastResponseVersion > 0 ? { responseVersion: lastResponseVersion } : {}),
270
+ ...(lastCommandVersion > 0 ? { commandVersion: lastCommandVersion } : {}),
271
+ ...(lastCommandId ? { commandId: lastCommandId } : {}),
272
+ ...(cronSchedule
273
+ ? {
274
+ cronActive: true,
275
+ cronInterval: cronSchedule.intervalSeconds,
276
+ cronReason: cronSchedule.reason,
277
+ }
278
+ : { cronActive: false }),
279
+ ...(contextUsage ? { contextUsage } : {}),
280
+ ...extra,
281
+ };
282
+ setStatus(ctx, status, signal);
283
+ }
284
+ function* writeLatestResponse(payload) {
285
+ const version = bumpCounter(RESPONSE_VERSION_KEY);
286
+ const emittedAt = yield ctx.utcNow();
287
+ const responsePayload = {
288
+ schemaVersion: 1,
289
+ version,
290
+ emittedAt,
291
+ ...payload,
292
+ };
293
+ writeJsonValue(RESPONSE_LATEST_KEY, responsePayload);
294
+ lastResponseVersion = version;
295
+ return responsePayload;
296
+ }
297
+ function* writeCommandResponse(response) {
298
+ const version = bumpCounter(COMMAND_VERSION_KEY);
299
+ const emittedAt = yield ctx.utcNow();
300
+ const payload = {
301
+ ...response,
302
+ schemaVersion: 1,
303
+ version,
304
+ emittedAt,
305
+ };
306
+ writeJsonValue(commandResponseKey(response.id), payload);
307
+ lastCommandVersion = version;
308
+ lastCommandId = response.id;
309
+ yield manager.recordSessionEvent(input.sessionId, [{
310
+ eventType: "session.command_completed",
311
+ data: { cmd: response.cmd, id: response.id },
312
+ }]);
313
+ return payload;
314
+ }
315
+ // ─── Helper: wrap prompt with resume context after dehydration ──
316
+ function wrapWithResumeContext(userPrompt, extra) {
317
+ const base = pendingRehydrationMessage ??
318
+ `The session was dehydrated and has been rehydrated on a new worker. ` +
319
+ `The LLM conversation history is preserved.`;
320
+ pendingRehydrationMessage = undefined;
321
+ const parts = [userPrompt, ``, `[SYSTEM: ${base}`];
322
+ if (extra)
323
+ parts.push(extra);
324
+ parts.push(`]`);
325
+ return parts.join('\n');
326
+ }
327
+ function mergePrompt(existingPrompt, nextPrompt) {
328
+ if (!existingPrompt)
329
+ return nextPrompt;
330
+ if (!nextPrompt)
331
+ return existingPrompt;
332
+ return `${existingPrompt}\n\n${nextPrompt}`;
333
+ }
334
+ const INTERNAL_SYSTEM_TURN_PROMPT = "Internal orchestration wake-up. The user did not send a new message. Continue with the latest system instructions.";
335
+ function extractPromptSystemContext(rawPrompt) {
336
+ if (!rawPrompt)
337
+ return {};
338
+ const trimmed = rawPrompt.trim();
339
+ if (trimmed.startsWith("[SYSTEM:") && trimmed.endsWith("]")) {
340
+ return {
341
+ systemPrompt: trimmed.slice("[SYSTEM:".length, -1).trim(),
342
+ };
343
+ }
344
+ const marker = rawPrompt.lastIndexOf("\n\n[SYSTEM:");
345
+ if (marker >= 0 && rawPrompt.trimEnd().endsWith("]")) {
346
+ const prompt = rawPrompt.slice(0, marker).trim();
347
+ const systemPrompt = rawPrompt.slice(marker + 2).trim();
348
+ return {
349
+ ...(prompt ? { prompt } : {}),
350
+ systemPrompt: systemPrompt.slice("[SYSTEM:".length, -1).trim(),
351
+ };
352
+ }
353
+ return { prompt: rawPrompt };
354
+ }
355
+ function appendSystemContext(rawPrompt, extraSystemPrompt) {
356
+ if (!extraSystemPrompt)
357
+ return rawPrompt;
358
+ const extracted = extractPromptSystemContext(rawPrompt);
359
+ const mergedSystemPrompt = mergePrompt(extracted.systemPrompt, extraSystemPrompt);
360
+ if (!mergedSystemPrompt)
361
+ return extracted.prompt ?? rawPrompt;
362
+ if (extracted.prompt) {
363
+ return `${extracted.prompt}\n\n[SYSTEM: ${mergedSystemPrompt}]`;
364
+ }
365
+ return `[SYSTEM: ${mergedSystemPrompt}]`;
366
+ }
367
+ function ensureTaskContext(sourcePrompt) {
368
+ if (taskContext || !sourcePrompt)
369
+ return;
370
+ taskContext = sourcePrompt.slice(0, 2000);
371
+ const base = typeof baseSystemMessage === "string"
372
+ ? baseSystemMessage ?? ""
373
+ : baseSystemMessage?.content ?? "";
374
+ config.systemMessage = base + (base ? "\n\n" : "") +
375
+ "[RECURRING TASK]\n" +
376
+ "Original user request (always remember, even if conversation history is truncated):\n\"" +
377
+ taskContext + "\"";
378
+ }
379
+ function applyCronAction(action, sourcePrompt) {
380
+ interruptedCronTimer = null;
381
+ if (action.action === "cancel") {
382
+ ctx.traceInfo("[orch] cron cancelled");
383
+ cronSchedule = undefined;
384
+ return;
385
+ }
386
+ ensureTaskContext(sourcePrompt);
387
+ cronSchedule = {
388
+ intervalSeconds: action.intervalSeconds,
389
+ reason: action.reason,
390
+ };
391
+ ctx.traceInfo(`[orch] cron scheduled: every ${action.intervalSeconds}s (${action.reason})`);
392
+ }
393
+ function drainLeadingQueuedCronActions(sourcePrompt) {
394
+ while (pendingToolActions[0]?.type === "cron") {
395
+ applyCronAction(pendingToolActions.shift(), sourcePrompt);
396
+ }
397
+ }
398
+ // ─── Shared continueAsNew input builder ──────────────────
399
+ function continueInput(overrides = {}) {
400
+ const { prompt: overridePrompt, requiredTool: overrideRequiredTool, systemPrompt: overrideSystemPrompt, bootstrapPrompt: overrideBootstrapPrompt, rehydrationMessage: overrideRehydrationMessage, ...restOverrides } = overrides;
401
+ const carriedPrompt = overridePrompt ?? pendingPrompt;
402
+ const carriedRequiredTool = overrideRequiredTool ?? pendingRequiredTool;
403
+ const carriedSystemPrompt = overrideSystemPrompt ?? pendingSystemPrompt;
404
+ const carriedRehydrationMessage = overrideRehydrationMessage ?? pendingRehydrationMessage;
405
+ const promptForInput = carriedPrompt ?? (carriedSystemPrompt ? INTERNAL_SYSTEM_TURN_PROMPT : undefined);
406
+ const bootstrapForInput = overrideBootstrapPrompt
407
+ ?? (carriedPrompt ? bootstrapPrompt : carriedSystemPrompt ? true : undefined);
408
+ return {
409
+ sessionId: input.sessionId,
410
+ config,
411
+ iteration,
412
+ affinityKey,
413
+ preserveAffinityOnHydrate,
414
+ needsHydration,
415
+ blobEnabled,
416
+ dehydrateThreshold,
417
+ idleTimeout,
418
+ inputGracePeriod,
419
+ checkpointInterval,
420
+ ...(carriedRehydrationMessage ? { rehydrationMessage: carriedRehydrationMessage } : {}),
421
+ nextSummarizeAt,
422
+ taskContext,
423
+ baseSystemMessage,
424
+ ...(cronSchedule ? { cronSchedule } : {}),
425
+ ...(contextUsage ? { contextUsage } : {}),
426
+ ...(carriedSystemPrompt ? { systemPrompt: carriedSystemPrompt } : {}),
427
+ ...(promptForInput ? { prompt: promptForInput } : {}),
428
+ ...(carriedRequiredTool ? { requiredTool: carriedRequiredTool } : {}),
429
+ ...(promptForInput && bootstrapForInput !== undefined ? { bootstrapPrompt: bootstrapForInput } : {}),
430
+ subAgents,
431
+ ...(pendingToolActions.length > 0 ? { pendingToolActions } : {}),
432
+ parentSessionId,
433
+ nestingLevel,
434
+ ...(isSystem ? { isSystem: true } : {}),
435
+ retryCount: 0,
436
+ ...(pendingInputQuestion ? { pendingInputQuestion } : {}),
437
+ ...(waitingForAgentIds ? { waitingForAgentIds } : {}),
438
+ ...(interruptedWaitTimer ? { interruptedWaitTimer } : {}),
439
+ ...(interruptedCronTimer ? { interruptedCronTimer } : {}),
440
+ ...(pendingChildDigest ? { pendingChildDigest } : {}),
441
+ ...(pendingShutdown ? { pendingShutdown } : {}),
442
+ ...restOverrides,
443
+ };
444
+ }
445
+ function continueInputWithPrompt(nextPrompt, overrides = {}) {
446
+ const extracted = extractPromptSystemContext(nextPrompt);
447
+ const mergedPrompt = mergePrompt(pendingPrompt, extracted.prompt);
448
+ const mergedSystemPrompt = mergePrompt(pendingSystemPrompt, extracted.systemPrompt);
449
+ return continueInput({
450
+ ...(mergedPrompt ? { prompt: mergedPrompt } : {}),
451
+ ...(mergedSystemPrompt ? { systemPrompt: mergedSystemPrompt } : {}),
452
+ ...overrides,
453
+ });
454
+ }
455
+ /** Queue a followup prompt for the LLM. In the flat loop, never CANs.
456
+ * Unlike CAN carry-forward, followups go directly into pendingPrompt
457
+ * as user-visible text. The [SYSTEM: ...] wrapper is stripped so
458
+ * processPrompt doesn't extract it into turnSystemPrompt (which would
459
+ * create a "Continue with system instructions" prompt that loops). */
460
+ function queueFollowup(nextPrompt) {
461
+ // Strip [SYSTEM: ...] wrapper — tool results should be visible prompt text
462
+ let text = nextPrompt;
463
+ const trimmed = text.trim();
464
+ if (trimmed.startsWith("[SYSTEM:") && trimmed.endsWith("]")) {
465
+ text = trimmed.slice("[SYSTEM:".length, -1).trim();
466
+ }
467
+ pendingPrompt = mergePrompt(pendingPrompt, text);
468
+ }
469
+ function* ensureWarmResumeCheckpoint() {
470
+ if (!blobEnabled)
471
+ return;
472
+ try {
473
+ ctx.traceInfo(`[orch] checkpoint before warm continueAsNew (iteration=${iteration})`);
474
+ yield session.checkpoint();
475
+ }
476
+ catch (err) {
477
+ ctx.traceInfo(`[orch] warm continueAsNew checkpoint failed: ${err.message ?? err}`);
478
+ }
479
+ }
480
+ /** Yield this to continueAsNew into the current (latest) orchestration version. */
481
+ function* versionedContinueAsNew(canInput) {
482
+ // Carry active timer state across CAN
483
+ if (activeTimer) {
484
+ const now = yield ctx.utcNow();
485
+ const remainingMs = Math.max(0, activeTimer.deadlineMs - now);
486
+ canInput.activeTimerState = {
487
+ remainingMs,
488
+ reason: activeTimer.reason,
489
+ type: activeTimer.type,
490
+ originalDurationMs: activeTimer.originalDurationMs,
491
+ ...(activeTimer.shouldRehydrate ? { shouldRehydrate: true } : {}),
492
+ ...(activeTimer.waitPlan ? { waitPlan: activeTimer.waitPlan } : {}),
493
+ ...(activeTimer.content ? { content: activeTimer.content } : {}),
494
+ ...(activeTimer.question ? { question: activeTimer.question } : {}),
495
+ ...(activeTimer.choices ? { choices: activeTimer.choices } : {}),
496
+ ...(activeTimer.allowFreeform !== undefined ? { allowFreeform: activeTimer.allowFreeform } : {}),
497
+ ...(activeTimer.agentIds ? { agentIds: activeTimer.agentIds } : {}),
498
+ };
499
+ }
500
+ if (!canInput.needsHydration) {
501
+ yield* ensureWarmResumeCheckpoint();
502
+ }
503
+ canInput.sourceOrchestrationVersion = CURRENT_ORCHESTRATION_VERSION;
504
+ yield ctx.continueAsNewVersioned(canInput, DURABLE_SESSION_LATEST_VERSION);
505
+ }
506
+ function parseChildUpdate(promptText) {
507
+ if (typeof promptText !== "string")
508
+ return null;
509
+ const match = promptText.match(/^\[CHILD_UPDATE from=(\S+) type=(\S+)/);
510
+ if (!match)
511
+ return null;
512
+ return {
513
+ sessionId: match[1],
514
+ updateType: match[2].replace(/\]$/, ""),
515
+ content: promptText.split("\n").slice(1).join("\n").trim(),
516
+ };
517
+ }
518
+ function bufferChildUpdate(update, observedAtMs) {
519
+ if (!pendingChildDigest) {
520
+ pendingChildDigest = {
521
+ startedAtMs: observedAtMs,
522
+ updates: [],
523
+ };
524
+ }
525
+ const nextEntry = {
526
+ sessionId: update.sessionId,
527
+ updateType: update.updateType,
528
+ ...(update.content ? { content: update.content.slice(0, 2000) } : {}),
529
+ observedAtMs,
530
+ };
531
+ const existingIndex = pendingChildDigest.updates.findIndex((entry) => entry.sessionId === update.sessionId);
532
+ if (existingIndex >= 0) {
533
+ pendingChildDigest.updates[existingIndex] = nextEntry;
534
+ }
535
+ else {
536
+ pendingChildDigest.updates.push(nextEntry);
537
+ }
538
+ }
539
+ function clearPendingChildDigest() {
540
+ pendingChildDigest = null;
541
+ }
542
+ function buildPendingChildDigestSystemPrompt() {
543
+ if (!pendingChildDigest || pendingChildDigest.updates.length === 0)
544
+ return undefined;
545
+ const lines = pendingChildDigest.updates.map((update) => {
546
+ const agent = subAgents.find((entry) => entry.sessionId === update.sessionId);
547
+ const label = agent?.orchId ?? update.sessionId;
548
+ const task = agent?.task ? `Task: "${agent.task.slice(0, 120)}"\n` : "";
549
+ const status = agent?.status ?? update.updateType;
550
+ const resultText = String(update.content || agent?.result || "").trim();
551
+ const result = resultText ? resultText.slice(0, 240) : "(no summary)";
552
+ return ` - Agent ${label}\n` +
553
+ ` ${task}` +
554
+ ` Update: ${update.updateType}\n` +
555
+ ` Status: ${status}\n` +
556
+ ` Result: ${result}`;
557
+ });
558
+ return `Buffered child updates arrived during the last 30 seconds:\n${lines.join("\n")}\nReview the updates and continue your task.`;
559
+ }
560
+ function flushPendingChildDigestIntoPrompt(rawPrompt) {
561
+ const childDigestPrompt = buildPendingChildDigestSystemPrompt();
562
+ if (!childDigestPrompt)
563
+ return rawPrompt;
564
+ clearPendingChildDigest();
565
+ return appendSystemContext(rawPrompt, childDigestPrompt);
566
+ }
567
+ function* processPendingChildDigest() {
568
+ const digestPrompt = buildPendingChildDigestSystemPrompt();
569
+ if (!digestPrompt) {
570
+ clearPendingChildDigest();
571
+ return;
572
+ }
573
+ if (activeTimer?.type === "wait") {
574
+ const now = yield ctx.utcNow();
575
+ const remainingMs = Math.max(0, activeTimer.deadlineMs - now);
576
+ const remainingSec = Math.round(remainingMs / 1000);
577
+ const elapsedMs = activeTimer.originalDurationMs - remainingMs;
578
+ const elapsedSec = Math.round(elapsedMs / 1000);
579
+ const totalSec = Math.round(activeTimer.originalDurationMs / 1000);
580
+ interruptedWaitTimer = {
581
+ remainingSec,
582
+ reason: activeTimer.reason,
583
+ shouldRehydrate: activeTimer.shouldRehydrate ?? false,
584
+ waitPlan: activeTimer.waitPlan,
585
+ interruptKind: "child",
586
+ };
587
+ activeTimer = null;
588
+ clearPendingChildDigest();
589
+ yield* processPrompt(`[SYSTEM: Buffered child updates interrupted your ${totalSec}s timer (reason: "${interruptedWaitTimer.reason}"). ` +
590
+ `${elapsedSec}s elapsed, ${remainingSec}s remain. ` +
591
+ `Review the updates and continue your task now. The remaining wait will be resumed automatically after this turn completes.\n\n${digestPrompt}]`, true);
592
+ return;
593
+ }
594
+ if (activeTimer?.type === "cron") {
595
+ const activeCron = cronSchedule;
596
+ const now = yield ctx.utcNow();
597
+ const remainingMs = Math.max(0, activeTimer.deadlineMs - now);
598
+ interruptedCronTimer = {
599
+ remainingMs,
600
+ reason: activeTimer.reason,
601
+ originalDurationMs: activeTimer.originalDurationMs,
602
+ ...(activeTimer.shouldRehydrate ? { shouldRehydrate: true } : {}),
603
+ };
604
+ activeTimer = null;
605
+ clearPendingChildDigest();
606
+ yield* processPrompt(`[SYSTEM: This is an internal orchestration wake-up caused by child session updates; the user did not send a new message. ` +
607
+ `Buffered child updates arrived while your recurring schedule was waiting for the next wake-up${activeCron ? ` ("${activeCron.reason}")` : ""}. ` +
608
+ `Review the updates and continue your task now. The recurring cron schedule remains active and will be re-armed automatically after this turn completes.\n\n${digestPrompt}]`, true);
609
+ return;
610
+ }
611
+ if (activeTimer?.type === "idle") {
612
+ activeTimer = null;
613
+ }
614
+ else if (activeTimer?.type === "agent-poll") {
615
+ waitingForAgentIds = null;
616
+ activeTimer = null;
617
+ }
618
+ clearPendingChildDigest();
619
+ yield* processPrompt(`[SYSTEM: ${digestPrompt}]`, true);
620
+ }
621
+ function* applyChildUpdate(update) {
622
+ ctx.traceInfo(`[orch] child update from=${update.sessionId} type=${update.updateType}`);
623
+ const agent = subAgents.find(a => a.sessionId === update.sessionId);
624
+ if (!agent)
625
+ return;
626
+ if (update.content) {
627
+ agent.result = update.content.slice(0, 2000);
628
+ }
629
+ if (update.updateType === "completed") {
630
+ agent.status = "completed";
631
+ }
632
+ else if (update.updateType === "cancelled" || update.updateType === "deleted") {
633
+ agent.status = "cancelled";
634
+ }
635
+ else if (update.updateType === "failed") {
636
+ agent.status = "failed";
637
+ }
638
+ try {
639
+ const rawStatus = yield manager.getSessionStatus(agent.sessionId);
640
+ const parsed = JSON.parse(rawStatus);
641
+ if (parsed.status === "failed") {
642
+ agent.status = "failed";
643
+ }
644
+ else if (parsed.status === "completed") {
645
+ agent.status = "completed";
646
+ }
647
+ else if (parsed.status === "cancelled") {
648
+ agent.status = "cancelled";
649
+ }
650
+ else if (parsed.status === "waiting") {
651
+ agent.status = "waiting";
652
+ }
653
+ if (parsed.result && parsed.result !== "done") {
654
+ agent.result = parsed.result.slice(0, 2000);
655
+ }
656
+ }
657
+ catch { }
658
+ }
659
+ function* refreshTrackedSubAgents() {
660
+ try {
661
+ const rawChildren = yield manager.listChildSessions(input.sessionId);
662
+ const directChildren = JSON.parse(rawChildren);
663
+ const refreshed = directChildren
664
+ .filter(child => !child.isSystem)
665
+ .map((child) => {
666
+ const existing = subAgents.find(agent => agent.sessionId === child.sessionId || agent.orchId === child.orchId);
667
+ const rawStatus = child.status ?? existing?.status ?? "running";
668
+ const normalizedStatus = rawStatus === "failed" ? "failed"
669
+ : rawStatus === "cancelled" ? "cancelled"
670
+ : rawStatus === "waiting" ? "waiting"
671
+ : rawStatus === "completed" ? "completed"
672
+ : "running";
673
+ return {
674
+ orchId: child.orchId,
675
+ sessionId: child.sessionId,
676
+ task: existing?.task ?? child.title ?? "(spawned sub-agent)",
677
+ status: normalizedStatus,
678
+ result: child.result ?? existing?.result,
679
+ agentId: child.agentId ?? existing?.agentId,
680
+ };
681
+ });
682
+ subAgents = refreshed;
683
+ }
684
+ catch (err) {
685
+ ctx.traceInfo(`[orch] refreshTrackedSubAgents failed (non-fatal): ${err.message ?? err}`);
686
+ }
687
+ }
688
+ function buildWaitForAgentsFollowup(targetIds) {
689
+ const summaries = targetIds
690
+ .map((targetId) => subAgents.find((agent) => agent.orchId === targetId))
691
+ .filter((agent) => Boolean(agent))
692
+ .map((agent) => ` - Agent ${agent.orchId}\n` +
693
+ ` Task: "${agent.task.slice(0, 120)}"\n` +
694
+ ` Status: ${agent.status}\n` +
695
+ ` Result: ${agent.result ?? "(no result)"}`);
696
+ if (summaries.length === 0) {
697
+ return `[SYSTEM: No tracked sub-agents produced a completion summary.]`;
698
+ }
699
+ if (summaries.length === 1) {
700
+ return `[SYSTEM: Sub-agent completed. If the user asked you to relay the child's final output, return the single sub-agent Result text verbatim.\n${summaries[0]}]`;
701
+ }
702
+ return `[SYSTEM: Sub-agents completed:\n${summaries.join("\n")}]`;
703
+ }
704
+ // ─── Helper: dehydrate and optionally release affinity ───
705
+ function* dehydrateForNextTurn(reason, resetAffinity = true, eventData) {
706
+ ctx.traceInfo(`[orch] dehydrating session (reason=${reason}, resetAffinity=${resetAffinity})`);
707
+ const dehydrateResult = yield session.dehydrate(reason, eventData);
708
+ const lossyHandoff = dehydrateResult?.lossyHandoff;
709
+ if (lossyHandoff && typeof lossyHandoff === "object") {
710
+ const lossyMessage = String(lossyHandoff.message || "dehydrate lost the live Copilot session state");
711
+ ctx.traceInfo(`[orch] ${lossyMessage}`);
712
+ yield manager.recordSessionEvent(input.sessionId, [{
713
+ eventType: "session.lossy_handoff",
714
+ data: lossyHandoff,
715
+ }]);
716
+ needsHydration = false;
717
+ preserveAffinityOnHydrate = false;
718
+ if (resetAffinity) {
719
+ affinityKey = yield ctx.newGuid();
720
+ session = createSessionProxy(ctx, input.sessionId, affinityKey, config);
721
+ }
722
+ return;
723
+ }
724
+ needsHydration = true;
725
+ preserveAffinityOnHydrate = !resetAffinity;
726
+ if (resetAffinity) {
727
+ affinityKey = yield ctx.newGuid();
728
+ session = createSessionProxy(ctx, input.sessionId, affinityKey, config);
729
+ }
730
+ }
731
+ // ─── Helper: checkpoint without releasing pin ────────────
732
+ function* maybeCheckpoint() {
733
+ if (!blobEnabled || checkpointInterval < 0)
734
+ return;
735
+ try {
736
+ ctx.traceInfo(`[orch] checkpoint (iteration=${iteration})`);
737
+ yield session.checkpoint();
738
+ }
739
+ catch (err) {
740
+ ctx.traceInfo(`[orch] checkpoint failed: ${err.message ?? err}`);
741
+ }
742
+ }
743
+ // ─── Helper: summarize session title if due ──────────────
744
+ const FIRST_SUMMARIZE_DELAY = 60_000;
745
+ const REPEAT_SUMMARIZE_DELAY = 300_000;
746
+ function* maybeSummarize() {
747
+ if (isSystem)
748
+ return;
749
+ const now = yield ctx.utcNow();
750
+ if (nextSummarizeAt === 0) {
751
+ nextSummarizeAt = now + FIRST_SUMMARIZE_DELAY;
752
+ return;
753
+ }
754
+ if (now < nextSummarizeAt)
755
+ return;
756
+ try {
757
+ ctx.traceInfo(`[orch] summarizing session title`);
758
+ yield manager.summarizeSession(input.sessionId);
759
+ }
760
+ catch (err) {
761
+ ctx.traceInfo(`[orch] summarize failed: ${err.message}`);
762
+ }
763
+ nextSummarizeAt = now + REPEAT_SUMMARIZE_DELAY;
764
+ }
765
+ // ─── Prompt carried from continueAsNew ───────────────────
766
+ let pendingPrompt = input.prompt;
767
+ let pendingRequiredTool = input.requiredTool;
768
+ let pendingSystemPrompt = input.systemPrompt;
769
+ let bootstrapPrompt = input.bootstrapPrompt ?? false;
770
+ let activeTimer = null;
771
+ let waitingForAgentIds = input.waitingForAgentIds ?? null;
772
+ let pendingInputQuestion = input.pendingInputQuestion ?? null;
773
+ let orchestrationResult = null;
774
+ /** Saved when a user message interrupts an active wait timer.
775
+ * After the LLM's response turn completes, the orchestration
776
+ * automatically re-arms the remaining wait — no LLM action needed. */
777
+ let interruptedWaitTimer = input.interruptedWaitTimer ?? null;
778
+ let interruptedCronTimer = input.interruptedCronTimer ?? null;
779
+ let pendingChildDigest = input.pendingChildDigest
780
+ ? {
781
+ startedAtMs: input.pendingChildDigest.startedAtMs,
782
+ ...(input.pendingChildDigest.ready ? { ready: true } : {}),
783
+ updates: [...(input.pendingChildDigest.updates || [])],
784
+ }
785
+ : null;
786
+ let pendingShutdown = input.pendingShutdown
787
+ ? {
788
+ ...input.pendingShutdown,
789
+ targetAgentIds: [...(input.pendingShutdown.targetAgentIds || [])],
790
+ }
791
+ : null;
792
+ // Reconstruct active timer from CAN input
793
+ if (input.activeTimerState) {
794
+ const initNow = yield ctx.utcNow();
795
+ activeTimer = {
796
+ deadlineMs: initNow + (input.activeTimerState.remainingMs ?? 0),
797
+ originalDurationMs: input.activeTimerState.originalDurationMs ?? input.activeTimerState.remainingMs ?? 0,
798
+ reason: input.activeTimerState.reason,
799
+ type: input.activeTimerState.type,
800
+ ...(input.activeTimerState.shouldRehydrate ? { shouldRehydrate: true } : {}),
801
+ ...(input.activeTimerState.waitPlan ? { waitPlan: input.activeTimerState.waitPlan } : {}),
802
+ ...(input.activeTimerState.content ? { content: input.activeTimerState.content } : {}),
803
+ ...(input.activeTimerState.question ? { question: input.activeTimerState.question } : {}),
804
+ ...(input.activeTimerState.choices ? { choices: input.activeTimerState.choices } : {}),
805
+ ...(input.activeTimerState.allowFreeform !== undefined ? { allowFreeform: input.activeTimerState.allowFreeform } : {}),
806
+ ...(input.activeTimerState.agentIds ? { agentIds: input.activeTimerState.agentIds } : {}),
807
+ };
808
+ }
809
+ function defaultShutdownReason(mode) {
810
+ switch (mode) {
811
+ case "done":
812
+ return "Completed by user";
813
+ case "cancel":
814
+ return "Cancelled by user";
815
+ case "delete":
816
+ return "Deleted by user";
817
+ }
818
+ }
819
+ function buildShutdownWaitReason(shutdown) {
820
+ switch (shutdown.mode) {
821
+ case "done":
822
+ return `Waiting for ${shutdown.targetAgentIds.length} child session(s) to complete before closing.`;
823
+ case "cancel":
824
+ return `Waiting for ${shutdown.targetAgentIds.length} child session(s) to cancel before closing.`;
825
+ case "delete":
826
+ return `Waiting for ${shutdown.targetAgentIds.length} child session(s) to cancel before deletion.`;
827
+ }
828
+ }
829
+ function findTrackedAgentByOrchId(orchId) {
830
+ return subAgents.find((agent) => agent.orchId === orchId);
831
+ }
832
+ function areTrackedAgentsTerminal(agentIds) {
833
+ return agentIds.every((agentId) => {
834
+ const agent = findTrackedAgentByOrchId(agentId);
835
+ return Boolean(agent && isSubAgentTerminalStatus(agent.status));
836
+ });
837
+ }
838
+ function getStillRunningAgentIds(agentIds) {
839
+ return agentIds.filter((agentId) => {
840
+ const agent = findTrackedAgentByOrchId(agentId);
841
+ return agent && !isSubAgentTerminalStatus(agent.status);
842
+ });
843
+ }
844
+ function* notifyParentOfTerminalState(updateType, reason) {
845
+ if (!parentSessionId)
846
+ return;
847
+ try {
848
+ yield manager.sendToSession(parentSessionId, `[CHILD_UPDATE from=${input.sessionId} type=${updateType} iter=${iteration}]\n${reason}`);
849
+ }
850
+ catch (err) {
851
+ ctx.traceInfo(`[orch] sendToSession(parent) on ${updateType} failed: ${err.message} (non-fatal)`);
852
+ }
853
+ }
854
+ function* completeShutdownSession(reason, commandId) {
855
+ pendingShutdown = null;
856
+ waitingForAgentIds = null;
857
+ clearPendingChildDigest();
858
+ activeTimer = null;
859
+ yield manager.updateCmsState(input.sessionId, "completed", null, null);
860
+ publishStatus("completed");
861
+ yield* notifyParentOfTerminalState("completed", reason);
862
+ try {
863
+ yield session.destroy();
864
+ }
865
+ catch { }
866
+ if (commandId) {
867
+ const resp = {
868
+ id: commandId,
869
+ cmd: "done",
870
+ result: { ok: true, message: "Session completed" },
871
+ };
872
+ yield* writeCommandResponse(resp);
873
+ }
874
+ orchestrationResult = "done";
875
+ }
876
+ function* cancelShutdownSession(reason, commandId, deleteAfterCancel = false) {
877
+ pendingShutdown = null;
878
+ waitingForAgentIds = null;
879
+ clearPendingChildDigest();
880
+ activeTimer = null;
881
+ const commandName = deleteAfterCancel ? "delete" : "cancel";
882
+ if (!deleteAfterCancel) {
883
+ yield manager.updateCmsState(input.sessionId, "cancelled", null, null);
884
+ publishStatus("cancelled");
885
+ }
886
+ yield* notifyParentOfTerminalState("cancelled", reason);
887
+ try {
888
+ yield session.destroy();
889
+ }
890
+ catch { }
891
+ if (commandId) {
892
+ const resp = {
893
+ id: commandId,
894
+ cmd: commandName,
895
+ result: {
896
+ ok: true,
897
+ message: deleteAfterCancel ? "Session deleted" : "Session cancelled",
898
+ },
899
+ };
900
+ yield* writeCommandResponse(resp);
901
+ }
902
+ if (deleteAfterCancel) {
903
+ const deleteReason = reason || "Deleted by user";
904
+ let descendants = [];
905
+ try {
906
+ descendants = yield manager.getDescendantSessionIds(input.sessionId);
907
+ }
908
+ catch (err) {
909
+ ctx.traceInfo(`[orch] delete: failed to enumerate descendants: ${err.message}`);
910
+ }
911
+ for (const descendantId of descendants) {
912
+ try {
913
+ yield manager.deleteSession(descendantId, `Ancestor ${input.sessionId} deleted: ${deleteReason}`);
914
+ }
915
+ catch (err) {
916
+ ctx.traceInfo(`[orch] delete: failed to delete descendant ${descendantId}: ${err.message} (non-fatal)`);
917
+ }
918
+ }
919
+ try {
920
+ yield manager.deleteSession(input.sessionId, deleteReason);
921
+ }
922
+ catch (err) {
923
+ ctx.traceInfo(`[orch] delete: failed to delete ${input.sessionId}: ${err.message}`);
924
+ }
925
+ orchestrationResult = "deleted";
926
+ return;
927
+ }
928
+ orchestrationResult = "cancelled";
929
+ }
930
+ function* failPendingShutdown(errorMessage) {
931
+ const shutdown = pendingShutdown;
932
+ pendingShutdown = null;
933
+ waitingForAgentIds = null;
934
+ clearPendingChildDigest();
935
+ activeTimer = null;
936
+ try {
937
+ yield session.destroy();
938
+ }
939
+ catch { }
940
+ if (shutdown?.commandId) {
941
+ const resp = {
942
+ id: shutdown.commandId,
943
+ cmd: shutdown.mode,
944
+ error: errorMessage,
945
+ };
946
+ yield* writeCommandResponse(resp);
947
+ }
948
+ publishStatus("failed", { error: errorMessage });
949
+ yield manager.updateCmsState(input.sessionId, "failed", errorMessage, null);
950
+ orchestrationResult = "failed";
951
+ }
952
+ function* finalizePendingShutdown() {
953
+ if (!pendingShutdown)
954
+ return;
955
+ const shutdown = pendingShutdown;
956
+ if (shutdown.mode === "done") {
957
+ yield* completeShutdownSession(shutdown.reason, shutdown.commandId);
958
+ return;
959
+ }
960
+ yield* cancelShutdownSession(shutdown.reason, shutdown.commandId, shutdown.mode === "delete");
961
+ }
962
+ function* maybeResolveAgentWaitCompletion() {
963
+ if (!waitingForAgentIds || !areTrackedAgentsTerminal(waitingForAgentIds)) {
964
+ return false;
965
+ }
966
+ if (pendingShutdown) {
967
+ yield* finalizePendingShutdown();
968
+ return true;
969
+ }
970
+ queueFollowup(buildWaitForAgentsFollowup(waitingForAgentIds));
971
+ waitingForAgentIds = null;
972
+ clearPendingChildDigest();
973
+ activeTimer = null;
974
+ return true;
975
+ }
976
+ function* beginGracefulShutdown(mode, cmdMsg) {
977
+ if (pendingShutdown) {
978
+ const now = yield ctx.utcNow();
979
+ const resp = {
980
+ id: cmdMsg.id,
981
+ cmd: cmdMsg.cmd,
982
+ result: {
983
+ ok: true,
984
+ message: `Shutdown already in progress (${pendingShutdown.mode}).`,
985
+ },
986
+ };
987
+ yield* writeCommandResponse(resp);
988
+ publishStatus("waiting", {
989
+ waitReason: buildShutdownWaitReason(pendingShutdown),
990
+ waitStartedAt: pendingShutdown.startedAtMs,
991
+ waitSeconds: Math.max(0, Math.ceil((pendingShutdown.deadlineAtMs - now) / 1000)),
992
+ });
993
+ return;
994
+ }
995
+ yield* refreshTrackedSubAgents();
996
+ const shutdownReason = String(cmdMsg.args?.reason || defaultShutdownReason(mode));
997
+ const targetAgents = subAgents.filter((agent) => !isSubAgentTerminalStatus(agent.status));
998
+ if (targetAgents.length === 0) {
999
+ if (mode === "done") {
1000
+ yield* completeShutdownSession(shutdownReason, cmdMsg.id);
1001
+ return;
1002
+ }
1003
+ yield* cancelShutdownSession(shutdownReason, cmdMsg.id, mode === "delete");
1004
+ return;
1005
+ }
1006
+ const childCmd = mode === "done" ? "done" : "cancel";
1007
+ const childReason = mode === "done"
1008
+ ? "Parent session completing"
1009
+ : shutdownReason;
1010
+ ctx.traceInfo(`[orch] ${cmdMsg.cmd}: cascading ${childCmd} to ${targetAgents.length} child session(s)`);
1011
+ for (const child of targetAgents) {
1012
+ try {
1013
+ const childCmdId = `${cmdMsg.cmd}-cascade-${iteration}-${child.sessionId.slice(0, 8)}`;
1014
+ yield manager.sendCommandToSession(child.sessionId, { type: "cmd", cmd: childCmd, id: childCmdId, args: { reason: childReason } });
1015
+ }
1016
+ catch (err) {
1017
+ ctx.traceInfo(`[orch] ${cmdMsg.cmd}: failed to signal child ${child.sessionId}: ${err.message} (non-fatal)`);
1018
+ }
1019
+ }
1020
+ const startedAtMs = yield ctx.utcNow();
1021
+ pendingShutdown = {
1022
+ mode,
1023
+ reason: shutdownReason,
1024
+ startedAtMs,
1025
+ deadlineAtMs: startedAtMs + SHUTDOWN_TIMEOUT_MS,
1026
+ targetAgentIds: targetAgents.map((agent) => agent.orchId),
1027
+ commandId: cmdMsg.id,
1028
+ };
1029
+ waitingForAgentIds = [...pendingShutdown.targetAgentIds];
1030
+ clearPendingChildDigest();
1031
+ activeTimer = {
1032
+ deadlineMs: startedAtMs + SHUTDOWN_POLL_INTERVAL_MS,
1033
+ originalDurationMs: SHUTDOWN_POLL_INTERVAL_MS,
1034
+ reason: buildShutdownWaitReason(pendingShutdown),
1035
+ type: "agent-poll",
1036
+ agentIds: waitingForAgentIds,
1037
+ };
1038
+ publishStatus("waiting", {
1039
+ waitReason: buildShutdownWaitReason(pendingShutdown),
1040
+ waitStartedAt: startedAtMs,
1041
+ waitSeconds: Math.ceil(SHUTDOWN_TIMEOUT_MS / 1000),
1042
+ });
1043
+ }
1044
+ let legacyPendingMessage = undefined;
1045
+ // Handle legacy pendingMessage from older versions.
1046
+ // Pre-v1.0.32 handlers sometimes carried raw queue messages instead of
1047
+ // the newer flat-loop prompt/tool state. Route those through the latest
1048
+ // drain logic instead of silently dropping non-prompt payloads.
1049
+ if (input.pendingMessage) {
1050
+ const legacyMsg = input.pendingMessage;
1051
+ if (legacyMsg.prompt && !pendingPrompt) {
1052
+ pendingPrompt = legacyMsg.prompt;
1053
+ bootstrapPrompt = Boolean(legacyMsg.bootstrap);
1054
+ pendingRequiredTool = legacyMsg.requiredTool;
1055
+ }
1056
+ else {
1057
+ legacyPendingMessage = legacyMsg;
1058
+ }
1059
+ }
1060
+ // ─── KV FIFO Work Buffer ────────────────────────────────
1061
+ const FIFO_BUCKET_COUNT = 20;
1062
+ const MAX_BUCKET_BYTES = 14 * 1024;
1063
+ const MAX_DRAIN_PER_TURN = 50;
1064
+ const MAX_ITERATIONS_PER_EXECUTION = 10;
1065
+ const MAX_HISTORY_SIZE_BEFORE_CONTINUE_AS_NEW_BYTES = 800 * 1024;
1066
+ const HISTORY_SIZE_CHECK_INTERVAL_ITERATIONS = 3;
1067
+ const NON_BLOCKING_TIMER_MS = 10;
1068
+ function nextTimerCandidate(now) {
1069
+ const candidates = [];
1070
+ if (activeTimer) {
1071
+ candidates.push({
1072
+ kind: "active",
1073
+ remainingMs: Math.max(0, activeTimer.deadlineMs - now),
1074
+ timer: activeTimer,
1075
+ });
1076
+ }
1077
+ if (pendingChildDigest && !pendingChildDigest.ready && pendingChildDigest.updates.length > 0) {
1078
+ candidates.push({
1079
+ kind: "child-digest",
1080
+ remainingMs: Math.max(0, pendingChildDigest.startedAtMs + CHILD_UPDATE_BATCH_MS - now),
1081
+ });
1082
+ }
1083
+ if (candidates.length === 0)
1084
+ return null;
1085
+ candidates.sort((left, right) => left.remainingMs - right.remainingMs);
1086
+ return candidates[0];
1087
+ }
1088
+ function fifoBucketKey(i) { return `fifo.${i}`; }
1089
+ function readFifoBucket(i) {
1090
+ const raw = ctx.getValue(fifoBucketKey(i));
1091
+ if (!raw)
1092
+ return [];
1093
+ try {
1094
+ return JSON.parse(raw);
1095
+ }
1096
+ catch {
1097
+ return [];
1098
+ }
1099
+ }
1100
+ function writeFifoBucket(i, items) {
1101
+ if (items.length === 0) {
1102
+ ctx.clearValue(fifoBucketKey(i));
1103
+ }
1104
+ else {
1105
+ ctx.setValue(fifoBucketKey(i), JSON.stringify(items));
1106
+ }
1107
+ }
1108
+ function appendToFifo(newItems) {
1109
+ let writeBucketIdx = 0;
1110
+ for (let i = FIFO_BUCKET_COUNT - 1; i >= 0; i--) {
1111
+ if (readFifoBucket(i).length > 0) {
1112
+ writeBucketIdx = i;
1113
+ break;
1114
+ }
1115
+ }
1116
+ for (const item of newItems) {
1117
+ const bucket = readFifoBucket(writeBucketIdx);
1118
+ bucket.push(item);
1119
+ const serialized = JSON.stringify(bucket);
1120
+ if (serialized.length > MAX_BUCKET_BYTES) {
1121
+ bucket.pop();
1122
+ writeFifoBucket(writeBucketIdx, bucket);
1123
+ writeBucketIdx++;
1124
+ if (writeBucketIdx >= FIFO_BUCKET_COUNT) {
1125
+ ctx.traceInfo(`[fifo] overflow — ${newItems.length} item(s) may rely on carry-forward`);
1126
+ return;
1127
+ }
1128
+ writeFifoBucket(writeBucketIdx, [item]);
1129
+ }
1130
+ else {
1131
+ writeFifoBucket(writeBucketIdx, bucket);
1132
+ }
1133
+ }
1134
+ }
1135
+ function popFifoItem() {
1136
+ for (let i = 0; i < FIFO_BUCKET_COUNT; i++) {
1137
+ const items = readFifoBucket(i);
1138
+ if (items.length > 0) {
1139
+ const [first, ...rest] = items;
1140
+ writeFifoBucket(i, rest);
1141
+ return first;
1142
+ }
1143
+ }
1144
+ return null;
1145
+ }
1146
+ function hasFifoItems() {
1147
+ for (let i = 0; i < FIFO_BUCKET_COUNT; i++) {
1148
+ if (readFifoBucket(i).length > 0)
1149
+ return true;
1150
+ }
1151
+ return false;
1152
+ }
1153
+ ctx.traceInfo(`[orch] start: iter=${iteration} pending=${pendingPrompt ? `"${pendingPrompt.slice(0, 40)}"` : 'NONE'} queued=${pendingToolActions.length} hydrate=${needsHydration} blob=${blobEnabled} timer=${activeTimer?.type ?? 'none'}`);
1154
+ // ─── Policy enforcement (orchestration-side) ─────────────
1155
+ if (iteration === 0 && !parentSessionId && !isSystem) {
1156
+ const workerPolicy = yield manager.getWorkerSessionPolicy();
1157
+ const policy = workerPolicy.policy;
1158
+ if (policy && policy.creation?.mode === "allowlist") {
1159
+ const agentId = input.agentId;
1160
+ const allowedNames = workerPolicy.allowedAgentNames;
1161
+ if (!agentId && !policy.creation.allowGeneric) {
1162
+ ctx.traceInfo(`[orch] policy rejection: generic session not allowed`);
1163
+ publishStatus("failed", { policyRejected: true });
1164
+ yield manager.updateCmsState(input.sessionId, "rejected");
1165
+ return "[POLICY] Session rejected: generic sessions are not allowed by session creation policy.";
1166
+ }
1167
+ if (agentId && allowedNames.length > 0 && !allowedNames.includes(agentId)) {
1168
+ ctx.traceInfo(`[orch] policy rejection: agent "${agentId}" not in allowed list`);
1169
+ publishStatus("failed", { policyRejected: true });
1170
+ yield manager.updateCmsState(input.sessionId, "rejected");
1171
+ return `[POLICY] Session rejected: agent "${agentId}" is not in the allowed agent list.`;
1172
+ }
1173
+ }
1174
+ }
1175
+ // ─── Resolve agent config for top-level named-agent sessions ───
1176
+ if (iteration === 0 && !parentSessionId && input.agentId && !isSystem) {
1177
+ const agentDef = yield manager.resolveAgentConfig(input.agentId);
1178
+ if (agentDef?.system && agentDef?.creatable === false) {
1179
+ const message = `Agent "${input.agentId}" is a worker-managed system agent and cannot be started manually. ` +
1180
+ `If it is missing, the workers likely need to be restarted.`;
1181
+ ctx.traceInfo(`[orch] top-level named session denied: ${message}`);
1182
+ publishStatus("failed", { workerManagedAgent: true });
1183
+ yield manager.updateCmsState(input.sessionId, "failed", message);
1184
+ return `[SYSTEM: ${message}]`;
1185
+ }
1186
+ if (agentDef) {
1187
+ const mergedToolNames = Array.from(new Set([
1188
+ ...(agentDef.tools ?? []),
1189
+ ...(config.toolNames ?? []),
1190
+ ]));
1191
+ if (mergedToolNames.length > 0) {
1192
+ config.toolNames = mergedToolNames;
1193
+ ctx.traceInfo(`[orch] merged top-level agent tools for ${input.agentId}: ${mergedToolNames.join(", ")}`);
1194
+ }
1195
+ session = createSessionProxy(ctx, input.sessionId, affinityKey, config);
1196
+ }
1197
+ }
1198
+ if (input.agentId) {
1199
+ config.agentIdentity = input.agentId;
1200
+ }
1201
+ // ═══════════════════════════════════════════════════════════
1202
+ // ═══ HANDLE COMMAND (extracted from main loop) ════════════
1203
+ // ═══════════════════════════════════════════════════════════
1204
+ function* handleCommand(cmdMsg) {
1205
+ ctx.traceInfo(`[orch-cmd] ${cmdMsg.cmd} id=${cmdMsg.id}`);
1206
+ yield manager.recordSessionEvent(input.sessionId, [{
1207
+ eventType: "session.command_received",
1208
+ data: { cmd: cmdMsg.cmd, id: cmdMsg.id },
1209
+ }]);
1210
+ switch (cmdMsg.cmd) {
1211
+ case "set_model": {
1212
+ const newModel = String(cmdMsg.args?.model || "");
1213
+ const oldModel = config.model || "(default)";
1214
+ config = { ...config, model: newModel };
1215
+ const resp = {
1216
+ id: cmdMsg.id,
1217
+ cmd: cmdMsg.cmd,
1218
+ result: { ok: true, oldModel, newModel },
1219
+ };
1220
+ yield* writeCommandResponse(resp);
1221
+ publishStatus("idle");
1222
+ yield* versionedContinueAsNew(continueInput());
1223
+ return; // unreachable after CAN
1224
+ }
1225
+ case "list_models": {
1226
+ publishStatus("idle", { cmdProcessing: cmdMsg.id });
1227
+ let models;
1228
+ try {
1229
+ const raw = yield manager.listModels();
1230
+ models = typeof raw === "string" ? JSON.parse(raw) : raw;
1231
+ }
1232
+ catch (err) {
1233
+ const resp = {
1234
+ id: cmdMsg.id,
1235
+ cmd: cmdMsg.cmd,
1236
+ error: err.message || String(err),
1237
+ };
1238
+ yield* writeCommandResponse(resp);
1239
+ publishStatus("idle");
1240
+ return;
1241
+ }
1242
+ const resp = {
1243
+ id: cmdMsg.id,
1244
+ cmd: cmdMsg.cmd,
1245
+ result: { models, currentModel: config.model },
1246
+ };
1247
+ yield* writeCommandResponse(resp);
1248
+ publishStatus("idle");
1249
+ return;
1250
+ }
1251
+ case "get_info": {
1252
+ const resp = {
1253
+ id: cmdMsg.id,
1254
+ cmd: cmdMsg.cmd,
1255
+ result: {
1256
+ model: config.model || "(default)",
1257
+ iteration,
1258
+ sessionId: input.sessionId,
1259
+ affinityKey: affinityKey,
1260
+ affinityKeyShort: affinityKey?.slice(0, 8),
1261
+ preserveAffinityOnHydrate,
1262
+ needsHydration,
1263
+ blobEnabled,
1264
+ contextUsage,
1265
+ },
1266
+ };
1267
+ yield* writeCommandResponse(resp);
1268
+ publishStatus("idle");
1269
+ return;
1270
+ }
1271
+ case "done": {
1272
+ ctx.traceInfo(`[orch] /done command received — beginning graceful shutdown`);
1273
+ yield* beginGracefulShutdown("done", cmdMsg);
1274
+ return;
1275
+ }
1276
+ case "cancel": {
1277
+ ctx.traceInfo(`[orch] cancel command received — beginning graceful cancellation`);
1278
+ yield* beginGracefulShutdown("cancel", cmdMsg);
1279
+ return;
1280
+ }
1281
+ case "delete": {
1282
+ ctx.traceInfo(`[orch] delete command received — beginning graceful delete`);
1283
+ yield* beginGracefulShutdown("delete", cmdMsg);
1284
+ return;
1285
+ }
1286
+ default: {
1287
+ const resp = {
1288
+ id: cmdMsg.id,
1289
+ cmd: cmdMsg.cmd,
1290
+ error: `Unknown command: ${cmdMsg.cmd}`,
1291
+ };
1292
+ yield* writeCommandResponse(resp);
1293
+ publishStatus("idle");
1294
+ return;
1295
+ }
1296
+ }
1297
+ }
1298
+ // ═══════════════════════════════════════════════════════════
1299
+ // ═══ DRAIN — greedily move queue + timer into KV FIFO ════
1300
+ // ═══════════════════════════════════════════════════════════
1301
+ function needsBlockingDequeue() {
1302
+ return (legacyPendingMessage === undefined &&
1303
+ !activeTimer &&
1304
+ pendingToolActions.length === 0 &&
1305
+ !pendingPrompt &&
1306
+ !hasFifoItems());
1307
+ }
1308
+ function* drain() {
1309
+ const stash = [];
1310
+ const seenChildUpdates = new Set();
1311
+ for (let i = 0; i < MAX_DRAIN_PER_TURN; i++) {
1312
+ let msg = null;
1313
+ // ─── Mode 0: Legacy carry-forward message from older versions ───
1314
+ if (legacyPendingMessage !== undefined) {
1315
+ msg = legacyPendingMessage;
1316
+ legacyPendingMessage = undefined;
1317
+ // ─── Mode 1: Active Timer / Child Digest — race dequeue vs timer ───
1318
+ }
1319
+ else if (activeTimer || (pendingChildDigest && !pendingChildDigest.ready)) {
1320
+ const now = yield ctx.utcNow();
1321
+ const candidate = nextTimerCandidate(now);
1322
+ if (!candidate)
1323
+ continue;
1324
+ if (candidate.remainingMs === 0) {
1325
+ if (candidate.kind === "active" && candidate.timer) {
1326
+ stash.push({ kind: "timer", timer: { ...candidate.timer }, firedAtMs: now });
1327
+ activeTimer = null;
1328
+ }
1329
+ else if (pendingChildDigest && pendingChildDigest.updates.length > 0) {
1330
+ pendingChildDigest.ready = true;
1331
+ break;
1332
+ }
1333
+ continue;
1334
+ }
1335
+ const msgTask = ctx.dequeueEvent("messages");
1336
+ const timerTask = ctx.scheduleTimer(candidate.remainingMs);
1337
+ const race = yield ctx.race(msgTask, timerTask);
1338
+ if (race.index === 1) {
1339
+ if (candidate.kind === "active" && candidate.timer) {
1340
+ const firedAt = yield ctx.utcNow();
1341
+ stash.push({ kind: "timer", timer: { ...candidate.timer }, firedAtMs: firedAt });
1342
+ activeTimer = null;
1343
+ }
1344
+ else if (pendingChildDigest && pendingChildDigest.updates.length > 0) {
1345
+ pendingChildDigest.ready = true;
1346
+ break;
1347
+ }
1348
+ continue; // keep draining — pick up queued msgs in mode 3
1349
+ }
1350
+ msg = typeof race.value === "string" ? JSON.parse(race.value) : race.value;
1351
+ // activeTimer / pending child digest stay set — deadlines unchanged
1352
+ // ─── Mode 2: Blocking Dequeue — nothing to process ──
1353
+ }
1354
+ else if (needsBlockingDequeue()) {
1355
+ if (i > 0)
1356
+ break; // only block on first iteration
1357
+ publishStatus(pendingInputQuestion ? "input_required" : "idle");
1358
+ const rawMsg = yield ctx.dequeueEvent("messages");
1359
+ msg = typeof rawMsg === "string" ? JSON.parse(rawMsg) : rawMsg;
1360
+ // ─── Mode 3: Non-blocking Dequeue — opportunistic ───
1361
+ }
1362
+ else {
1363
+ const msgTask = ctx.dequeueEvent("messages");
1364
+ const timerTask = ctx.scheduleTimer(NON_BLOCKING_TIMER_MS);
1365
+ const race = yield ctx.race(msgTask, timerTask);
1366
+ if (race.index === 1)
1367
+ break; // queue empty
1368
+ msg = typeof race.value === "string" ? JSON.parse(race.value) : race.value;
1369
+ }
1370
+ if (!msg)
1371
+ continue;
1372
+ // ─── Route: Commands → handle immediately ───────────
1373
+ if (msg.type === "cmd") {
1374
+ // Flush anything already stashed before handling the command
1375
+ if (stash.length > 0) {
1376
+ appendToFifo(stash);
1377
+ stash.length = 0;
1378
+ }
1379
+ yield* handleCommand(msg);
1380
+ if (orchestrationResult !== null)
1381
+ return;
1382
+ continue;
1383
+ }
1384
+ // ─── Route: Child updates → apply immediately ───────
1385
+ const childUpdate = parseChildUpdate(msg.prompt);
1386
+ if (childUpdate) {
1387
+ const key = `${childUpdate.sessionId}|${childUpdate.updateType}|${childUpdate.content ?? ""}`;
1388
+ if (!seenChildUpdates.has(key)) {
1389
+ seenChildUpdates.add(key);
1390
+ yield* applyChildUpdate(childUpdate);
1391
+ if (!pendingShutdown) {
1392
+ const childObservedAt = yield ctx.utcNow();
1393
+ bufferChildUpdate(childUpdate, childObservedAt);
1394
+ }
1395
+ // Check if all waited-for agents are now done
1396
+ if (waitingForAgentIds) {
1397
+ yield* maybeResolveAgentWaitCompletion();
1398
+ }
1399
+ }
1400
+ continue;
1401
+ }
1402
+ // ─── Route: Answers → stash ─────────────────────────
1403
+ if (msg.answer !== undefined) {
1404
+ stash.push({ kind: "answer", answer: msg.answer, wasFreeform: msg.wasFreeform });
1405
+ continue;
1406
+ }
1407
+ // ─── Route: User prompts → stash ────────────────────
1408
+ if (msg.prompt) {
1409
+ let userPrompt = msg.prompt;
1410
+ // If a wait/cron timer is active, cancel it and augment the prompt
1411
+ // with timer-interrupt context (matches v1.0.31 wait-loop behavior).
1412
+ if (activeTimer?.type === "wait") {
1413
+ const now = yield ctx.utcNow();
1414
+ const remainingMs = Math.max(0, activeTimer.deadlineMs - now);
1415
+ const remainingSec = Math.round(remainingMs / 1000);
1416
+ const elapsedMs = activeTimer.originalDurationMs - remainingMs;
1417
+ const elapsedSec = Math.round(elapsedMs / 1000);
1418
+ const totalSec = Math.round(activeTimer.originalDurationMs / 1000);
1419
+ ctx.traceInfo(`[drain] user prompt interrupted wait timer, ${remainingSec}s remain — orchestration will auto-resume`);
1420
+ // Save the interrupted timer. The orchestration will automatically
1421
+ // re-arm it after the LLM's response turn completes. This avoids
1422
+ // conflicting "call wait(N) to resume" instructions that clash
1423
+ // with agent-specific prompts.
1424
+ interruptedWaitTimer = {
1425
+ remainingSec,
1426
+ reason: activeTimer.reason,
1427
+ shouldRehydrate: activeTimer.shouldRehydrate ?? false,
1428
+ waitPlan: activeTimer.waitPlan,
1429
+ interruptKind: "user",
1430
+ };
1431
+ // Just tell the LLM about the context, not what to do next
1432
+ if (activeTimer.shouldRehydrate && userPrompt) {
1433
+ userPrompt = wrapWithResumeContext(userPrompt, `Your ${totalSec}s timer (reason: "${activeTimer.reason}") was interrupted by the above message. ` +
1434
+ `${elapsedSec}s elapsed, ${remainingSec}s remain. ` +
1435
+ `Reply to the message. The timer will be automatically resumed after your reply.`);
1436
+ }
1437
+ else if (userPrompt) {
1438
+ userPrompt = `${userPrompt}\n\n` +
1439
+ `[SYSTEM: The above is a message that interrupted your ${totalSec}s timer (reason: "${activeTimer.reason}"). ` +
1440
+ `${elapsedSec}s elapsed, ${remainingSec}s remain. ` +
1441
+ `Reply to the message. The timer will be automatically resumed after your reply.]`;
1442
+ }
1443
+ activeTimer = null;
1444
+ }
1445
+ else if (activeTimer?.type === "cron") {
1446
+ const activeCron = cronSchedule;
1447
+ const now = yield ctx.utcNow();
1448
+ const remainingMs = Math.max(0, activeTimer.deadlineMs - now);
1449
+ interruptedCronTimer = {
1450
+ remainingMs,
1451
+ reason: activeTimer.reason,
1452
+ originalDurationMs: activeTimer.originalDurationMs,
1453
+ ...(activeTimer.shouldRehydrate ? { shouldRehydrate: true } : {}),
1454
+ };
1455
+ const cronResumeNote = `This is an internal recurring schedule, not a new user prompt. ` +
1456
+ `There is an active recurring schedule every ${activeCron?.intervalSeconds ?? "?"} seconds for "${activeCron?.reason ?? activeTimer.reason}". ` +
1457
+ `The next cron wake-up will keep the original schedule and resume after the remaining ${Math.round(remainingMs / 1000)} seconds unless you explicitly reset cron. ` +
1458
+ `Do NOT call wait() just to keep the recurring loop alive. ` +
1459
+ `Call cron(action="cancel") only if you need to stop it.`;
1460
+ if (activeTimer.shouldRehydrate && userPrompt) {
1461
+ userPrompt = wrapWithResumeContext(userPrompt, cronResumeNote);
1462
+ }
1463
+ else if (userPrompt) {
1464
+ userPrompt = `${userPrompt}\n\n[SYSTEM: ${cronResumeNote}]`;
1465
+ }
1466
+ ctx.traceInfo(`[drain] user prompt interrupted cron timer`);
1467
+ activeTimer = null;
1468
+ }
1469
+ else if (activeTimer?.type === "idle") {
1470
+ ctx.traceInfo(`[drain] user prompt within idle window, cancelling idle timer`);
1471
+ activeTimer = null;
1472
+ }
1473
+ else if (activeTimer?.type === "agent-poll") {
1474
+ ctx.traceInfo(`[drain] user prompt interrupted agent wait`);
1475
+ waitingForAgentIds = null;
1476
+ activeTimer = null;
1477
+ }
1478
+ if (pendingChildDigest?.updates.length) {
1479
+ userPrompt = flushPendingChildDigestIntoPrompt(userPrompt);
1480
+ }
1481
+ stash.push({
1482
+ kind: "prompt",
1483
+ prompt: userPrompt,
1484
+ bootstrap: Boolean(msg.bootstrap),
1485
+ ...(msg.requiredTool ? { requiredTool: msg.requiredTool } : {}),
1486
+ });
1487
+ continue;
1488
+ }
1489
+ ctx.traceInfo(`[drain] skipping unknown: ${JSON.stringify(msg).slice(0, 120)}`);
1490
+ }
1491
+ if (stash.length > 0)
1492
+ appendToFifo(stash);
1493
+ }
1494
+ // ═══════════════════════════════════════════════════════════
1495
+ // ═══ PROCESS PROMPT — hydrate + runTurn + handleResult ═══
1496
+ // ═══════════════════════════════════════════════════════════
1497
+ function* processPrompt(promptText, isBootstrap, requiredTool) {
1498
+ let prompt = promptText;
1499
+ let promptIsBootstrap = isBootstrap;
1500
+ if (blobEnabled && !needsHydration) {
1501
+ try {
1502
+ needsHydration = yield session.needsHydration();
1503
+ }
1504
+ catch (err) {
1505
+ ctx.traceInfo(`[orch] needsHydration probe failed: ${err.message ?? err}`);
1506
+ }
1507
+ }
1508
+ if (needsHydration && blobEnabled && prompt) {
1509
+ prompt = wrapWithResumeContext(prompt);
1510
+ }
1511
+ let turnSystemPrompt = pendingSystemPrompt;
1512
+ pendingSystemPrompt = undefined;
1513
+ const extractedPrompt = extractPromptSystemContext(prompt);
1514
+ prompt = extractedPrompt.prompt ?? "";
1515
+ turnSystemPrompt = mergePrompt(turnSystemPrompt, extractedPrompt.systemPrompt);
1516
+ const systemOnlyTurn = !prompt && !!turnSystemPrompt;
1517
+ if (systemOnlyTurn) {
1518
+ prompt = INTERNAL_SYSTEM_TURN_PROMPT;
1519
+ promptIsBootstrap = true;
1520
+ }
1521
+ config.turnSystemPrompt = turnSystemPrompt;
1522
+ ctx.traceInfo(`[turn ${iteration}] session=${input.sessionId} prompt="${prompt.slice(0, 80)}"`);
1523
+ // Hydrate if needed (with retry)
1524
+ if (needsHydration && blobEnabled) {
1525
+ let hydrateAttempts = 0;
1526
+ while (true) {
1527
+ try {
1528
+ if (!preserveAffinityOnHydrate) {
1529
+ affinityKey = yield ctx.newGuid();
1530
+ }
1531
+ session = createSessionProxy(ctx, input.sessionId, affinityKey, config);
1532
+ yield session.hydrate();
1533
+ needsHydration = false;
1534
+ preserveAffinityOnHydrate = false;
1535
+ break;
1536
+ }
1537
+ catch (hydrateErr) {
1538
+ const hMsg = hydrateErr.message || String(hydrateErr);
1539
+ if (hMsg.includes("blob does not exist")
1540
+ || hMsg.includes("BlobNotFound")
1541
+ || hMsg.includes("Session archive not found")
1542
+ || hMsg.includes("404")) {
1543
+ ctx.traceInfo(`[orch] hydrate skipped — blob not found, starting fresh session`);
1544
+ needsHydration = false;
1545
+ preserveAffinityOnHydrate = false;
1546
+ break;
1547
+ }
1548
+ hydrateAttempts++;
1549
+ ctx.traceInfo(`[orch] hydrate FAILED (attempt ${hydrateAttempts}/${MAX_RETRIES}): ${hMsg}`);
1550
+ if (hydrateAttempts >= MAX_RETRIES) {
1551
+ publishStatus("error", {
1552
+ error: `Hydrate failed after ${MAX_RETRIES} attempts: ${hMsg}`,
1553
+ retriesExhausted: true,
1554
+ });
1555
+ break;
1556
+ }
1557
+ const hydrateDelay = 10 * Math.pow(2, hydrateAttempts - 1);
1558
+ publishStatus("error", {
1559
+ error: `Hydrate failed: ${hMsg} (retry ${hydrateAttempts}/${MAX_RETRIES} in ${hydrateDelay}s)`,
1560
+ });
1561
+ yield ctx.scheduleTimer(hydrateDelay * 1000);
1562
+ }
1563
+ }
1564
+ if (needsHydration)
1565
+ return;
1566
+ }
1567
+ // Load knowledge index
1568
+ if (config.agentIdentity !== "facts-manager") {
1569
+ try {
1570
+ yield manager.loadKnowledgeIndex();
1571
+ }
1572
+ catch (knErr) {
1573
+ ctx.traceInfo(`[orch] loadKnowledgeIndex failed (non-fatal): ${knErr.message || knErr}`);
1574
+ }
1575
+ }
1576
+ // Run turn
1577
+ publishStatus("running", { iteration: iteration + 1 });
1578
+ let turnResult;
1579
+ try {
1580
+ turnResult = yield session.runTurn(prompt, promptIsBootstrap, iteration, {
1581
+ ...(parentSessionId ? { parentSessionId } : {}),
1582
+ nestingLevel,
1583
+ ...(requiredTool ? { requiredTool } : {}),
1584
+ retryCount,
1585
+ });
1586
+ }
1587
+ catch (err) {
1588
+ config.turnSystemPrompt = undefined;
1589
+ const errorMsg = err.message || String(err);
1590
+ const missingStateIndex = errorMsg.indexOf(SESSION_STATE_MISSING_PREFIX);
1591
+ if (missingStateIndex >= 0) {
1592
+ const fatalError = errorMsg.slice(missingStateIndex + SESSION_STATE_MISSING_PREFIX.length).trim();
1593
+ ctx.traceInfo(`[orch] fatal missing session state: ${fatalError}`);
1594
+ publishStatus("failed", { error: fatalError, fatal: true });
1595
+ yield manager.updateCmsState(input.sessionId, "failed", fatalError);
1596
+ throw new Error(fatalError);
1597
+ }
1598
+ retryCount++;
1599
+ ctx.traceInfo(`[orch] runTurn FAILED (attempt ${retryCount}/${MAX_RETRIES}): ${errorMsg}`);
1600
+ if (isCopilotConnectionClosedError(errorMsg)) {
1601
+ if (retryCount <= COPILOT_CONNECTION_CLOSED_MAX_RETRIES) {
1602
+ const retryDetail = buildConnectionClosedRetryDetail(retryCount);
1603
+ publishStatus("error", {
1604
+ error: `${errorMsg} (${retryDetail})`,
1605
+ recoverableTransportLoss: true,
1606
+ });
1607
+ ctx.traceInfo(`[orch] live Copilot connection lost; retrying in ${COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS}s`);
1608
+ if (blobEnabled) {
1609
+ yield* dehydrateForNextTurn("error", true, {
1610
+ detail: retryDetail,
1611
+ error: errorMsg,
1612
+ phase: "runTurn.throw",
1613
+ retryAttempt: retryCount,
1614
+ maxRetries: COPILOT_CONNECTION_CLOSED_MAX_RETRIES,
1615
+ retryDelaySeconds: COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS,
1616
+ });
1617
+ }
1618
+ yield ctx.scheduleTimer(COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS * 1000);
1619
+ yield* versionedContinueAsNew(continueInput({
1620
+ ...(systemOnlyTurn ? {} : { prompt }),
1621
+ ...(requiredTool ? { requiredTool } : {}),
1622
+ ...(turnSystemPrompt ? { systemPrompt: turnSystemPrompt } : {}),
1623
+ retryCount,
1624
+ needsHydration: blobEnabled ? true : needsHydration,
1625
+ }));
1626
+ return;
1627
+ }
1628
+ const handoffMessage = buildLossyHandoffSummary(errorMsg);
1629
+ ctx.traceInfo(`[orch] ${handoffMessage}`);
1630
+ publishStatus("error", {
1631
+ error: handoffMessage,
1632
+ retriesExhausted: true,
1633
+ lossyHandoff: true,
1634
+ });
1635
+ yield manager.recordSessionEvent(input.sessionId, [{
1636
+ eventType: "session.lossy_handoff",
1637
+ data: {
1638
+ message: handoffMessage,
1639
+ error: errorMsg,
1640
+ phase: "runTurn.throw",
1641
+ retries: COPILOT_CONNECTION_CLOSED_MAX_RETRIES,
1642
+ retryDelaySeconds: COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS,
1643
+ nextStep: "dehydrate_and_resume_on_new_worker",
1644
+ },
1645
+ }]);
1646
+ if (blobEnabled) {
1647
+ yield* dehydrateForNextTurn("lossy_handoff", true, {
1648
+ detail: handoffMessage,
1649
+ error: errorMsg,
1650
+ phase: "runTurn.throw",
1651
+ retries: COPILOT_CONNECTION_CLOSED_MAX_RETRIES,
1652
+ retryDelaySeconds: COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS,
1653
+ nextStep: "dehydrate_and_resume_on_new_worker",
1654
+ });
1655
+ yield* versionedContinueAsNew(continueInput({
1656
+ ...(systemOnlyTurn ? {} : { prompt }),
1657
+ ...(requiredTool ? { requiredTool } : {}),
1658
+ ...(turnSystemPrompt ? { systemPrompt: turnSystemPrompt } : {}),
1659
+ retryCount: 0,
1660
+ needsHydration: true,
1661
+ rehydrationMessage: buildLossyHandoffRehydrationMessage(errorMsg),
1662
+ }));
1663
+ return;
1664
+ }
1665
+ publishStatus("error", {
1666
+ error: `${handoffMessage} Durable handoff is unavailable because blob persistence is disabled.`,
1667
+ retriesExhausted: true,
1668
+ lossyHandoff: false,
1669
+ });
1670
+ retryCount = 0;
1671
+ return;
1672
+ }
1673
+ if (retryCount >= MAX_RETRIES) {
1674
+ ctx.traceInfo(`[orch] max retries exhausted, waiting for user input`);
1675
+ publishStatus("error", {
1676
+ error: `Failed after ${MAX_RETRIES} attempts: ${errorMsg}`,
1677
+ retriesExhausted: true,
1678
+ });
1679
+ retryCount = 0;
1680
+ return;
1681
+ }
1682
+ publishStatus("error", {
1683
+ error: `${errorMsg} (retry ${retryCount}/${MAX_RETRIES} in 15s)`,
1684
+ });
1685
+ const retryDelay = 15 * Math.pow(2, retryCount - 1);
1686
+ ctx.traceInfo(`[orch] retrying in ${retryDelay}s`);
1687
+ if (blobEnabled) {
1688
+ yield* dehydrateForNextTurn("error", true, {
1689
+ detail: errorMsg,
1690
+ error: errorMsg,
1691
+ phase: "runTurn.throw",
1692
+ retryAttempt: retryCount,
1693
+ maxRetries: MAX_RETRIES,
1694
+ retryDelaySeconds: retryDelay,
1695
+ });
1696
+ }
1697
+ yield ctx.scheduleTimer(retryDelay * 1000);
1698
+ yield* versionedContinueAsNew(continueInput({
1699
+ ...(systemOnlyTurn ? {} : { prompt }),
1700
+ ...(requiredTool ? { requiredTool } : {}),
1701
+ ...(turnSystemPrompt ? { systemPrompt: turnSystemPrompt } : {}),
1702
+ retryCount,
1703
+ needsHydration: blobEnabled ? true : needsHydration,
1704
+ }));
1705
+ return;
1706
+ }
1707
+ config.turnSystemPrompt = undefined;
1708
+ retryCount = 0;
1709
+ const result = typeof turnResult === "string" ? JSON.parse(turnResult) : turnResult;
1710
+ const observedAt = yield ctx.utcNow();
1711
+ contextUsage = updateContextUsageFromEvents(contextUsage, result?.events, observedAt);
1712
+ iteration++;
1713
+ yield* maybeSummarize();
1714
+ yield* refreshTrackedSubAgents();
1715
+ if ("queuedActions" in result && Array.isArray(result.queuedActions) && result.queuedActions.length > 0) {
1716
+ pendingToolActions.push(...result.queuedActions);
1717
+ ctx.traceInfo(`[orch] queued ${result.queuedActions.length} extra action(s) from turn`);
1718
+ }
1719
+ drainLeadingQueuedCronActions(prompt);
1720
+ yield* handleTurnResult(result, prompt);
1721
+ }
1722
+ // ═══════════════════════════════════════════════════════════
1723
+ // ═══ HANDLE TURN RESULT — sets timer instead of loops ════
1724
+ // ═══════════════════════════════════════════════════════════
1725
+ function* handleTurnResult(result, sourcePrompt) {
1726
+ if (result.type === "completed"
1727
+ && parentSessionId
1728
+ && typeof result.content === "string"
1729
+ && /^QUESTION FOR PARENT:/i.test(result.content.trim())) {
1730
+ ctx.traceInfo("[orch] coercing child QUESTION FOR PARENT result into durable wait");
1731
+ result = {
1732
+ type: "wait",
1733
+ seconds: 60,
1734
+ reason: "waiting for parent answer",
1735
+ content: result.content.trim(),
1736
+ model: result.model,
1737
+ };
1738
+ }
1739
+ if (interruptedWaitTimer?.interruptKind === "user"
1740
+ && (result.type === "completed" || result.type === "wait")
1741
+ && !(typeof result.content === "string" && result.content.trim())) {
1742
+ const content = "I'm here. Resuming the timer.";
1743
+ result = { ...result, content };
1744
+ yield manager.recordSessionEvent(input.sessionId, [{
1745
+ eventType: "assistant.message",
1746
+ data: {
1747
+ content,
1748
+ synthetic: true,
1749
+ reason: "wait_interrupt_empty_reply",
1750
+ },
1751
+ }]);
1752
+ ctx.traceInfo("[orch] synthesized visible assistant reply for wait interrupt");
1753
+ }
1754
+ switch (result.type) {
1755
+ case "completed": {
1756
+ ctx.traceInfo(`[response] ${result.content}`);
1757
+ yield* writeLatestResponse({
1758
+ iteration,
1759
+ type: "completed",
1760
+ content: result.content,
1761
+ model: result.model,
1762
+ });
1763
+ // Notify parent if sub-agent
1764
+ if (parentSessionId) {
1765
+ try {
1766
+ yield manager.sendToSession(parentSessionId, `[CHILD_UPDATE from=${input.sessionId} type=completed iter=${iteration}]\n${result.content.slice(0, 2000)}`);
1767
+ }
1768
+ catch (err) {
1769
+ ctx.traceInfo(`[orch] sendToSession(parent) failed: ${err.message} (non-fatal)`);
1770
+ }
1771
+ if (!cronSchedule) {
1772
+ if (input.isSystem) {
1773
+ ctx.traceInfo(`[orch] system sub-agent completed turn, continuing loop`);
1774
+ yield* maybeCheckpoint();
1775
+ return;
1776
+ }
1777
+ ctx.traceInfo(`[orch] sub-agent completed task, auto-terminating`);
1778
+ try {
1779
+ yield session.destroy();
1780
+ }
1781
+ catch { }
1782
+ publishStatus("completed");
1783
+ orchestrationResult = "done";
1784
+ return;
1785
+ }
1786
+ }
1787
+ // Forgotten-timer safety net
1788
+ {
1789
+ const runningAgents = subAgents.filter(a => a.status === "running");
1790
+ if (runningAgents.length > 0 && !input.forgottenTimerNudged && !cronSchedule) {
1791
+ const names = runningAgents.map(a => a.task?.slice(0, 40) || a.orchId).join(", ");
1792
+ ctx.traceInfo(`[orch] forgotten-timer safety: ${runningAgents.length} agents still running, nudging LLM`);
1793
+ yield* versionedContinueAsNew(continueInputWithPrompt(`[SYSTEM: You ended your turn without calling wait(), but you have ${runningAgents.length} sub-agent(s) still running: ${names}. ` +
1794
+ `Without a wait() call, your monitoring/polling loop is DEAD — the orchestration will NOT wake you up automatically. ` +
1795
+ `You MUST call wait() now to schedule your next check-in. Call wait() with an appropriate interval to continue your loop.]`, { forgottenTimerNudged: true }));
1796
+ return;
1797
+ }
1798
+ }
1799
+ // Auto-resume interrupted wait timer. If the LLM's turn completed
1800
+ // without re-issuing wait() itself, the orchestration re-arms the
1801
+ // remaining time automatically. This avoids conflicting "call wait(N)"
1802
+ // instructions that clash with agent-specific prompts.
1803
+ if (interruptedWaitTimer && interruptedWaitTimer.remainingSec > 0) {
1804
+ const saved = interruptedWaitTimer;
1805
+ interruptedWaitTimer = null;
1806
+ ctx.traceInfo(`[orch] auto-resuming interrupted wait: ${saved.remainingSec}s (${saved.reason})`);
1807
+ if (saved.shouldRehydrate) {
1808
+ yield* dehydrateForNextTurn("timer", saved.waitPlan?.resetAffinityOnDehydrate ?? true);
1809
+ }
1810
+ const resumeNow = yield ctx.utcNow();
1811
+ publishStatus("waiting", {
1812
+ waitSeconds: saved.remainingSec,
1813
+ waitReason: saved.reason,
1814
+ waitStartedAt: resumeNow,
1815
+ });
1816
+ if (!saved.shouldRehydrate)
1817
+ yield* maybeCheckpoint();
1818
+ activeTimer = {
1819
+ deadlineMs: resumeNow + saved.remainingSec * 1000,
1820
+ originalDurationMs: saved.remainingSec * 1000,
1821
+ reason: saved.reason,
1822
+ type: "wait",
1823
+ shouldRehydrate: saved.shouldRehydrate,
1824
+ waitPlan: saved.waitPlan,
1825
+ };
1826
+ return;
1827
+ }
1828
+ if (interruptedCronTimer && interruptedCronTimer.remainingMs > 0) {
1829
+ const saved = interruptedCronTimer;
1830
+ interruptedCronTimer = null;
1831
+ const remainingMs = Math.max(0, saved.remainingMs);
1832
+ const remainingSec = Math.max(1, Math.round(remainingMs / 1000));
1833
+ ctx.traceInfo(`[orch] auto-resuming interrupted cron: ${remainingSec}s remain (${saved.reason})`);
1834
+ const cronResumePlan = planWaitHandling({
1835
+ blobEnabled,
1836
+ seconds: remainingSec,
1837
+ dehydrateThreshold,
1838
+ });
1839
+ if (cronResumePlan.shouldDehydrate) {
1840
+ yield* dehydrateForNextTurn("cron", cronResumePlan.resetAffinityOnDehydrate);
1841
+ }
1842
+ const resumeNow = yield ctx.utcNow();
1843
+ publishStatus("waiting", {
1844
+ waitSeconds: remainingSec,
1845
+ waitReason: saved.reason,
1846
+ waitStartedAt: resumeNow,
1847
+ });
1848
+ if (!cronResumePlan.shouldDehydrate)
1849
+ yield* maybeCheckpoint();
1850
+ activeTimer = {
1851
+ deadlineMs: resumeNow + remainingMs,
1852
+ originalDurationMs: remainingMs,
1853
+ reason: saved.reason,
1854
+ type: "cron",
1855
+ shouldRehydrate: cronResumePlan.shouldDehydrate,
1856
+ };
1857
+ return;
1858
+ }
1859
+ if (cronSchedule) {
1860
+ const activeCron = { ...cronSchedule };
1861
+ const cronPlan = planWaitHandling({
1862
+ blobEnabled,
1863
+ seconds: activeCron.intervalSeconds,
1864
+ dehydrateThreshold,
1865
+ });
1866
+ if (cronPlan.shouldDehydrate) {
1867
+ yield* dehydrateForNextTurn("cron", cronPlan.resetAffinityOnDehydrate);
1868
+ }
1869
+ yield manager.recordSessionEvent(input.sessionId, [{
1870
+ eventType: "session.cron_started",
1871
+ data: { intervalSeconds: activeCron.intervalSeconds, reason: activeCron.reason },
1872
+ }]);
1873
+ const cronStartedAt = yield ctx.utcNow();
1874
+ ctx.traceInfo(`[orch] cron timer: ${activeCron.intervalSeconds}s (${activeCron.reason})`);
1875
+ publishStatus("waiting", {
1876
+ waitSeconds: activeCron.intervalSeconds,
1877
+ waitReason: activeCron.reason,
1878
+ waitStartedAt: cronStartedAt,
1879
+ });
1880
+ if (!cronPlan.shouldDehydrate)
1881
+ yield* maybeCheckpoint();
1882
+ activeTimer = {
1883
+ deadlineMs: cronStartedAt + activeCron.intervalSeconds * 1000,
1884
+ originalDurationMs: activeCron.intervalSeconds * 1000,
1885
+ reason: activeCron.reason,
1886
+ type: "cron",
1887
+ shouldRehydrate: cronPlan.shouldDehydrate,
1888
+ };
1889
+ return;
1890
+ }
1891
+ if (!blobEnabled || idleTimeout < 0) {
1892
+ yield* maybeCheckpoint();
1893
+ return; // no timer — main loop will CAN
1894
+ }
1895
+ // Set idle timer
1896
+ publishStatus("idle");
1897
+ yield* maybeCheckpoint();
1898
+ const idleNow = yield ctx.utcNow();
1899
+ activeTimer = {
1900
+ deadlineMs: idleNow + idleTimeout * 1000,
1901
+ originalDurationMs: idleTimeout * 1000,
1902
+ reason: "idle timeout",
1903
+ type: "idle",
1904
+ };
1905
+ return;
1906
+ }
1907
+ case "cron":
1908
+ applyCronAction(result, sourcePrompt);
1909
+ return;
1910
+ case "wait": {
1911
+ // LLM re-issued wait itself — clear any saved interrupted timer
1912
+ interruptedWaitTimer = null;
1913
+ ensureTaskContext(sourcePrompt);
1914
+ if (parentSessionId) {
1915
+ try {
1916
+ const notifyContent = result.content
1917
+ ? result.content.slice(0, 2000)
1918
+ : `[wait: ${result.reason} (${result.seconds}s)]`;
1919
+ yield manager.sendToSession(parentSessionId, `[CHILD_UPDATE from=${input.sessionId} type=wait iter=${iteration}]\n${notifyContent}`);
1920
+ }
1921
+ catch (err) {
1922
+ ctx.traceInfo(`[orch] sendToSession(parent) wait failed: ${err.message} (non-fatal)`);
1923
+ }
1924
+ }
1925
+ ctx.traceInfo(`[orch] durable timer: ${result.seconds}s (${result.reason})`);
1926
+ const waitPlan = planWaitHandling({
1927
+ blobEnabled,
1928
+ seconds: result.seconds,
1929
+ dehydrateThreshold,
1930
+ preserveWorkerAffinity: result.preserveWorkerAffinity,
1931
+ });
1932
+ if (waitPlan.shouldDehydrate) {
1933
+ yield* dehydrateForNextTurn("timer", waitPlan.resetAffinityOnDehydrate);
1934
+ }
1935
+ const waitStartedAt = yield ctx.utcNow();
1936
+ if (result.content) {
1937
+ yield* writeLatestResponse({
1938
+ iteration,
1939
+ type: "wait",
1940
+ content: result.content,
1941
+ waitReason: result.reason,
1942
+ waitSeconds: result.seconds,
1943
+ waitStartedAt,
1944
+ model: result.model,
1945
+ });
1946
+ ctx.traceInfo(`[orch] intermediate: ${result.content.slice(0, 80)}`);
1947
+ }
1948
+ publishStatus("waiting", {
1949
+ waitSeconds: result.seconds,
1950
+ waitReason: result.reason,
1951
+ waitStartedAt,
1952
+ preserveWorkerAffinity: waitPlan.preserveAffinityOnHydrate,
1953
+ });
1954
+ if (!waitPlan.shouldDehydrate)
1955
+ yield* maybeCheckpoint();
1956
+ yield manager.recordSessionEvent(input.sessionId, [{
1957
+ eventType: "session.wait_started",
1958
+ data: { seconds: result.seconds, reason: result.reason, preserveAffinity: waitPlan.preserveAffinityOnHydrate },
1959
+ }]);
1960
+ activeTimer = {
1961
+ deadlineMs: waitStartedAt + result.seconds * 1000,
1962
+ originalDurationMs: result.seconds * 1000,
1963
+ reason: result.reason,
1964
+ type: "wait",
1965
+ shouldRehydrate: waitPlan.shouldDehydrate,
1966
+ waitPlan,
1967
+ content: result.content,
1968
+ };
1969
+ return;
1970
+ }
1971
+ case "input_required": {
1972
+ ctx.traceInfo(`[orch] waiting for user input: ${result.question}`);
1973
+ yield* writeLatestResponse({
1974
+ iteration,
1975
+ type: "input_required",
1976
+ question: result.question,
1977
+ choices: result.choices,
1978
+ allowFreeform: result.allowFreeform,
1979
+ model: result.model,
1980
+ });
1981
+ pendingInputQuestion = {
1982
+ question: result.question,
1983
+ choices: result.choices,
1984
+ allowFreeform: result.allowFreeform,
1985
+ };
1986
+ publishStatus("input_required");
1987
+ if (!blobEnabled || inputGracePeriod < 0) {
1988
+ yield* maybeCheckpoint();
1989
+ // No timer — drain will block on dequeue (mode 2) for the answer
1990
+ return;
1991
+ }
1992
+ if (inputGracePeriod === 0) {
1993
+ yield* dehydrateForNextTurn("input_required");
1994
+ // No timer — drain will block on dequeue for the answer
1995
+ return;
1996
+ }
1997
+ // Set grace period timer
1998
+ const graceNow = yield ctx.utcNow();
1999
+ activeTimer = {
2000
+ deadlineMs: graceNow + inputGracePeriod * 1000,
2001
+ originalDurationMs: inputGracePeriod * 1000,
2002
+ reason: "input grace period",
2003
+ type: "input-grace",
2004
+ question: result.question,
2005
+ choices: result.choices,
2006
+ allowFreeform: result.allowFreeform,
2007
+ };
2008
+ return;
2009
+ }
2010
+ case "cancelled":
2011
+ ctx.traceInfo("[session] turn cancelled");
2012
+ return;
2013
+ // ─── Sub-Agent Result Handlers ───────────────────
2014
+ case "spawn_agent": {
2015
+ const childNestingLevel = nestingLevel + 1;
2016
+ if (childNestingLevel > MAX_NESTING_LEVEL) {
2017
+ ctx.traceInfo(`[orch] spawn_agent denied: nesting level ${nestingLevel} is at max (${MAX_NESTING_LEVEL})`);
2018
+ queueFollowup(`[SYSTEM: spawn_agent failed — you are already at nesting level ${nestingLevel} (max ${MAX_NESTING_LEVEL}). ` +
2019
+ `Sub-agents at this depth cannot spawn further sub-agents. Handle the task directly instead.]`);
2020
+ return;
2021
+ }
2022
+ const activeCount = subAgents.filter(a => a.status === "running").length;
2023
+ if (activeCount >= MAX_SUB_AGENTS) {
2024
+ ctx.traceInfo(`[orch] spawn_agent denied: ${activeCount}/${MAX_SUB_AGENTS} agents running`);
2025
+ queueFollowup(`[SYSTEM: spawn_agent failed — you already have ${activeCount} running sub-agents (max ${MAX_SUB_AGENTS}). ` +
2026
+ `Wait for some to complete before spawning more.]`);
2027
+ return;
2028
+ }
2029
+ let agentTask = result.task;
2030
+ let agentSystemMessage = result.systemMessage;
2031
+ let agentToolNames = result.toolNames;
2032
+ let agentModel = result.model;
2033
+ let agentIsSystem = false;
2034
+ const explicitAgentTitle = typeof result.title === "string" && result.title.trim() ? result.title.trim() : undefined;
2035
+ let agentTitle = explicitAgentTitle;
2036
+ let agentTitleIsExplicit = Boolean(explicitAgentTitle);
2037
+ let agentId;
2038
+ let agentSplash;
2039
+ let boundAgentName;
2040
+ let promptLayeringKind;
2041
+ let resolvedAgentName = result.agentName;
2042
+ const applyAgentDef = (agentDef, useDefinitionDefaults = false) => {
2043
+ agentTask = useDefinitionDefaults
2044
+ ? (agentDef.initialPrompt || `You are the ${agentDef.name} agent. Begin your work.`)
2045
+ : (result.task || agentDef.initialPrompt || `You are the ${agentDef.name} agent. Begin your work.`);
2046
+ agentSystemMessage = useDefinitionDefaults ? undefined : result.systemMessage;
2047
+ agentToolNames = useDefinitionDefaults
2048
+ ? (agentDef.tools ?? undefined)
2049
+ : (result.toolNames ?? agentDef.tools ?? undefined);
2050
+ agentIsSystem = agentDef.system ?? false;
2051
+ if (!agentTitleIsExplicit)
2052
+ agentTitle = agentDef.title;
2053
+ agentId = agentDef.id ?? resolvedAgentName;
2054
+ agentSplash = agentDef.splash;
2055
+ boundAgentName = agentDef.name;
2056
+ promptLayeringKind = agentDef.promptLayerKind
2057
+ ?? (agentDef.system
2058
+ ? ((agentDef.namespace || "pilotswarm") === "pilotswarm"
2059
+ ? "pilotswarm-system-agent"
2060
+ : "app-system-agent")
2061
+ : "app-agent");
2062
+ };
2063
+ if (resolvedAgentName) {
2064
+ ctx.traceInfo(`[orch] resolving agent config for: ${resolvedAgentName}`);
2065
+ const agentDef = yield manager.resolveAgentConfig(resolvedAgentName);
2066
+ if (!agentDef) {
2067
+ queueFollowup(`[SYSTEM: spawn_agent failed — agent "${resolvedAgentName}" not found. Use ps_list_agents to see available agents.]`);
2068
+ return;
2069
+ }
2070
+ if (agentDef.system && agentDef.creatable === false) {
2071
+ queueFollowup(`[SYSTEM: spawn_agent failed — agent "${resolvedAgentName}" is a worker-managed system agent and cannot be spawned from a session. ` +
2072
+ `If it is missing, the workers likely need to be restarted.]`);
2073
+ return;
2074
+ }
2075
+ applyAgentDef(agentDef, resolvedAgentName !== result.agentName);
2076
+ }
2077
+ if (agentModel && !agentModel.includes(":")) {
2078
+ ctx.traceInfo(`[orch] spawn_agent denied: unqualified model override "${agentModel}"`);
2079
+ queueFollowup(`[SYSTEM: spawn_agent failed — model "${agentModel}" is not allowed. ` +
2080
+ `When overriding a sub-agent model, first call list_available_models and then use the exact provider:model value from that list. ` +
2081
+ `If you are unsure, omit model so the sub-agent inherits your current model.]`);
2082
+ return;
2083
+ }
2084
+ // Dedup guard: prevent re-spawning a named agent that already exists
2085
+ // as a child of this session. This catches post-rehydration re-spawns
2086
+ // when the LLM loses context that children are already running.
2087
+ if (agentId) {
2088
+ const existingChild = subAgents.find(a => a.agentId === agentId && a.status === "running");
2089
+ if (existingChild) {
2090
+ ctx.traceInfo(`[orch] spawn_agent deduplicated: agent "${agentId}" already running as ${existingChild.orchId}`);
2091
+ queueFollowup(`[SYSTEM: Agent "${resolvedAgentName || agentId}" is already running as sub-agent ${existingChild.orchId.slice(0, 16)}. ` +
2092
+ `Use check_agents to see its status, or message_agent to communicate with it.]`);
2093
+ return;
2094
+ }
2095
+ }
2096
+ if (!agentTitle && agentIsSystem) {
2097
+ const text = agentTask || "";
2098
+ const titleMatch = text.match(/You are the \*{0,2}([^*\n]+?)\*{0,2}\s*[—–-]/i)
2099
+ || text.match(/You are the \*{0,2}([^*\n]+?Agent)\*{0,2}/i);
2100
+ if (titleMatch) {
2101
+ agentTitle = titleMatch[1].trim();
2102
+ }
2103
+ }
2104
+ ctx.traceInfo(`[orch] spawning sub-agent via SDK: task="${agentTask.slice(0, 80)}" model=${agentModel || "inherit"} agent=${resolvedAgentName || "custom"} nestingLevel=${childNestingLevel}`);
2105
+ const { boundAgentName: _parentBoundAgentName, promptLayering: _parentPromptLayering, ...parentConfig } = config;
2106
+ const childConfig = {
2107
+ ...parentConfig,
2108
+ ...(agentModel ? { model: agentModel } : {}),
2109
+ ...(agentSystemMessage ? { systemMessage: agentSystemMessage } : {}),
2110
+ ...(boundAgentName ? { boundAgentName } : {}),
2111
+ ...(promptLayeringKind ? { promptLayering: { kind: promptLayeringKind } } : {}),
2112
+ ...(agentToolNames ? { toolNames: agentToolNames } : {}),
2113
+ };
2114
+ const parentSystemMsg = typeof childConfig.systemMessage === "string"
2115
+ ? childConfig.systemMessage
2116
+ : childConfig.systemMessage?.content ?? "";
2117
+ const canSpawnMore = childNestingLevel < MAX_NESTING_LEVEL;
2118
+ const timingInstruction = agentIsSystem
2119
+ ? `- For recurring or periodic work, use the \`cron\` tool instead of ending every cycle with \`wait\`. ` +
2120
+ `Call \`cron(seconds=<N>, reason="...")\` to start or update the durable recurring schedule, ` +
2121
+ `then finish turns normally so the orchestration wakes you automatically on each cron cycle. ` +
2122
+ `Use \`wait\` only for one-shot delays inside a turn. ` +
2123
+ `Call \`cron(action="cancel")\` only when you intentionally want to stop the recurring loop.\n`
2124
+ : `- For ANY waiting, sleeping, delaying, or scheduling, you MUST use the \`wait\`, \`wait_on_worker\`, or \`cron\` tools. ` +
2125
+ `Use \`wait\` or \`wait_on_worker\` for one-shot delays. Use \`cron\` for recurring or periodic monitoring. ` +
2126
+ `Do NOT burn tokens polling inside one LLM turn; after a brief immediate re-check at most, yield with a durable timer. ` +
2127
+ `NEVER use setTimeout, sleep, setInterval, or any other timing mechanism. ` +
2128
+ `Durable waits survive process restarts.\n`;
2129
+ const subAgentPreamble = `[SUB-AGENT CONTEXT]\n` +
2130
+ `You are a sub-agent spawned by a parent session (ID: session-${input.sessionId}).\n` +
2131
+ `Your nesting level: ${childNestingLevel} (max: ${MAX_NESTING_LEVEL}).\n` +
2132
+ `Your task: "${agentTask.slice(0, 500)}"\n\n` +
2133
+ `Instructions:\n` +
2134
+ `- Focus exclusively on your assigned task.\n` +
2135
+ `- Your final response will be automatically forwarded to the parent agent.\n` +
2136
+ `- Be thorough but concise — the parent will synthesize results from multiple agents.\n` +
2137
+ `- Do NOT ask the user for input — you are autonomous.\n` +
2138
+ `- You are autonomous and goal-driven. If the task implies ongoing monitoring or follow-through until done, keep yourself alive with durable timers until the goal is complete or you can no longer make progress.\n` +
2139
+ `- If it is ambiguous whether the task should become a long-running recurring workflow, report that ambiguity back to the parent instead of guessing or asking the user directly.\n` +
2140
+ `- When your task is complete, provide a clear summary of your findings/results.\n` +
2141
+ `- Prefer using \`store_fact\` for larger structured context handoffs across your session lineage. Put the durable details in facts, then pass fact keys or \`read_facts\` pointers in messages/prompts instead of pasting large context blobs.\n` +
2142
+ `- If you write any files with write_artifact, you MUST also call export_artifact and include the artifact:// link in your response.\n` +
2143
+ `- If you override a sub-agent model, you MUST first call list_available_models in this session and use only an exact provider:model value returned there. ` +
2144
+ `NEVER invent, guess, shorten, or reuse a stale model name.\n` +
2145
+ `- Worker-managed system agents are not valid spawn targets. If you expect one and it is missing, report that the workers likely need to be restarted.\n` +
2146
+ timingInstruction +
2147
+ (canSpawnMore
2148
+ ? `- If your parent task explicitly asks you to spawn sub-agents, delegate, fan out, or parallelize work, you SHOULD do so within runtime limits instead of collapsing the task into a direct answer. ` +
2149
+ `If delegation was not explicitly requested, use your judgment and avoid unnecessary fan-out. ` +
2150
+ `You have ${MAX_NESTING_LEVEL - childNestingLevel} level(s) of nesting remaining. After spawning, call wait_for_agents to block until they finish.\n`
2151
+ : `- You CANNOT spawn sub-agents — you are at the maximum nesting depth. Handle everything directly.\n`);
2152
+ childConfig.systemMessage = subAgentPreamble + (parentSystemMsg ? "\n\n" + parentSystemMsg : "");
2153
+ let childSessionId;
2154
+ try {
2155
+ childSessionId = yield manager.spawnChildSession(input.sessionId, childConfig, agentTask, childNestingLevel, agentIsSystem, agentTitle, agentId, agentSplash, agentTitleIsExplicit);
2156
+ }
2157
+ catch (err) {
2158
+ ctx.traceInfo(`[orch] spawnChildSession failed: ${err.message}`);
2159
+ queueFollowup(`[SYSTEM: spawn_agent failed: ${err.message}]`);
2160
+ return;
2161
+ }
2162
+ const childOrchId = `session-${childSessionId}`;
2163
+ yield manager.recordSessionEvent(input.sessionId, [{
2164
+ eventType: "session.agent_spawned",
2165
+ data: { childSessionId, agentId: agentId || undefined, task: agentTask.slice(0, 500) },
2166
+ }]);
2167
+ subAgents.push({
2168
+ orchId: childOrchId,
2169
+ sessionId: childSessionId,
2170
+ task: agentTask.slice(0, 500),
2171
+ status: "running",
2172
+ agentId: agentId || undefined,
2173
+ });
2174
+ queueFollowup(`[SYSTEM: Sub-agent spawned successfully.\n` +
2175
+ ` Agent ID: ${childOrchId}\n` +
2176
+ ` ${resolvedAgentName ? `Agent: ${resolvedAgentName}\n ` : ``}Task: "${agentTask.slice(0, 200)}"\n` +
2177
+ ` The agent is now running autonomously. Continue your work in this SAME turn and keep following the user's remaining steps. ` +
2178
+ `Do NOT stop just because the child started. If you need to pause, call wait or wait_for_agents explicitly. ` +
2179
+ `You can also use check_agents to poll status, ` +
2180
+ `or message_agent to send instructions.]`);
2181
+ return;
2182
+ }
2183
+ case "message_agent": {
2184
+ const targetOrchId = result.agentId;
2185
+ const agentEntry = subAgents.find(a => a.orchId === targetOrchId);
2186
+ if (!agentEntry) {
2187
+ ctx.traceInfo(`[orch] message_agent: unknown agent ${targetOrchId}`);
2188
+ queueFollowup(`[SYSTEM: message_agent failed — agent "${targetOrchId}" not found. ` +
2189
+ `Known agents: ${subAgents.map(a => a.orchId).join(", ") || "none"}]`);
2190
+ return;
2191
+ }
2192
+ ctx.traceInfo(`[orch] message_agent via SDK: ${agentEntry.sessionId} msg="${result.message.slice(0, 60)}"`);
2193
+ try {
2194
+ yield manager.sendToSession(agentEntry.sessionId, result.message);
2195
+ }
2196
+ catch (err) {
2197
+ ctx.traceInfo(`[orch] message_agent failed: ${err.message}`);
2198
+ queueFollowup(`[SYSTEM: message_agent failed: ${err.message}]`);
2199
+ return;
2200
+ }
2201
+ queueFollowup(`[SYSTEM: Message sent to sub-agent ${targetOrchId}: "${result.message.slice(0, 200)}". ` +
2202
+ `Continue your work in this SAME turn. If you are waiting on the child, call wait_for_agents explicitly rather than stopping here.]`);
2203
+ return;
2204
+ }
2205
+ case "check_agents": {
2206
+ ctx.traceInfo(`[orch] check_agents: ${subAgents.length} agents tracked`);
2207
+ if (subAgents.length === 0) {
2208
+ queueFollowup(`[SYSTEM: No sub-agents have been spawned yet.]`);
2209
+ return;
2210
+ }
2211
+ const statusLines = [];
2212
+ for (const agent of subAgents) {
2213
+ try {
2214
+ const rawStatus = yield manager.getSessionStatus(agent.sessionId);
2215
+ const parsed = JSON.parse(rawStatus);
2216
+ if (parsed.status === "completed" || parsed.status === "failed" || parsed.status === "idle") {
2217
+ agent.status = parsed.status === "failed" ? "failed" : "completed";
2218
+ if (parsed.result)
2219
+ agent.result = parsed.result.slice(0, 1000);
2220
+ }
2221
+ statusLines.push(` - Agent ${agent.orchId}\n` +
2222
+ ` Task: "${agent.task.slice(0, 120)}"\n` +
2223
+ ` Status: ${parsed.status}\n` +
2224
+ ` Iterations: ${parsed.iterations ?? 0}\n` +
2225
+ ` Output: ${parsed.result ?? "(no output yet)"}`);
2226
+ }
2227
+ catch (err) {
2228
+ statusLines.push(` - Agent ${agent.orchId}\n` +
2229
+ ` Task: "${agent.task.slice(0, 120)}"\n` +
2230
+ ` Status: unknown (error: ${err.message})`);
2231
+ }
2232
+ }
2233
+ queueFollowup(`[SYSTEM: Sub-agent status report (${subAgents.length} agents):\n${statusLines.join("\n")}]`);
2234
+ return;
2235
+ }
2236
+ case "list_sessions": {
2237
+ ctx.traceInfo(`[orch] list_sessions`);
2238
+ const rawSessions = yield manager.listSessions();
2239
+ const sessions = JSON.parse(rawSessions);
2240
+ const lines = sessions.map((s) => ` - ${s.sessionId}${s.sessionId === input.sessionId ? " (this session)" : ""}\n` +
2241
+ ` Title: ${s.title ?? "(untitled)"}\n` +
2242
+ ` Status: ${s.status}, Iterations: ${s.iterations ?? 0}\n` +
2243
+ ` Parent: ${s.parentSessionId ?? "none"}`);
2244
+ queueFollowup(`[SYSTEM: Active sessions (${sessions.length}):\n${lines.join("\n")}]`);
2245
+ return;
2246
+ }
2247
+ case "wait_for_agents": {
2248
+ let targetIds = result.agentIds;
2249
+ if (!targetIds || targetIds.length === 0) {
2250
+ const runningAgentIds = subAgents.filter(a => a.status === "running").map(a => a.orchId);
2251
+ targetIds = runningAgentIds.length > 0
2252
+ ? runningAgentIds
2253
+ : subAgents.map(a => a.orchId);
2254
+ }
2255
+ if (targetIds.length === 0) {
2256
+ ctx.traceInfo(`[orch] wait_for_agents: no running agents to wait for`);
2257
+ queueFollowup(`[SYSTEM: No running sub-agents to wait for. All agents have already completed.]`);
2258
+ return;
2259
+ }
2260
+ // Check if all are already done
2261
+ const stillRunning = targetIds.filter(id => {
2262
+ const agent = subAgents.find(a => a.orchId === id);
2263
+ return agent && !isSubAgentTerminalStatus(agent.status);
2264
+ });
2265
+ if (stillRunning.length === 0) {
2266
+ queueFollowup(buildWaitForAgentsFollowup(targetIds));
2267
+ return;
2268
+ }
2269
+ ctx.traceInfo(`[orch] wait_for_agents: waiting for ${targetIds.length} agents`);
2270
+ publishStatus("running");
2271
+ waitingForAgentIds = targetIds;
2272
+ const agentPollNow = yield ctx.utcNow();
2273
+ activeTimer = {
2274
+ deadlineMs: agentPollNow + 30_000,
2275
+ originalDurationMs: 30_000,
2276
+ reason: `waiting for ${targetIds.length} agent(s)`,
2277
+ type: "agent-poll",
2278
+ agentIds: targetIds,
2279
+ };
2280
+ return;
2281
+ }
2282
+ case "complete_agent": {
2283
+ const targetOrchId = result.agentId;
2284
+ const agentEntry = subAgents.find(a => a.orchId === targetOrchId);
2285
+ if (!agentEntry) {
2286
+ ctx.traceInfo(`[orch] complete_agent: unknown agent ${targetOrchId}`);
2287
+ queueFollowup(`[SYSTEM: complete_agent failed — agent "${targetOrchId}" not found. ` +
2288
+ `Known agents: ${subAgents.map(a => a.orchId).join(", ") || "none"}]`);
2289
+ return;
2290
+ }
2291
+ ctx.traceInfo(`[orch] complete_agent: sending /done to ${agentEntry.sessionId}`);
2292
+ try {
2293
+ const cmdId = `done-${iteration}`;
2294
+ yield manager.sendCommandToSession(agentEntry.sessionId, { type: "cmd", cmd: "done", id: cmdId, args: { reason: "Completed by parent" } });
2295
+ }
2296
+ catch (err) {
2297
+ ctx.traceInfo(`[orch] complete_agent failed: ${err.message}`);
2298
+ queueFollowup(`[SYSTEM: complete_agent failed: ${err.message}]`);
2299
+ return;
2300
+ }
2301
+ queueFollowup(`[SYSTEM: Graceful completion requested for sub-agent ${targetOrchId}. ` +
2302
+ `Use check_agents or wait_for_agents to observe final completion.]`);
2303
+ return;
2304
+ }
2305
+ case "cancel_agent": {
2306
+ const targetOrchId = result.agentId;
2307
+ const agentEntry = subAgents.find(a => a.orchId === targetOrchId);
2308
+ if (!agentEntry) {
2309
+ ctx.traceInfo(`[orch] cancel_agent: unknown agent ${targetOrchId}`);
2310
+ queueFollowup(`[SYSTEM: cancel_agent failed — agent "${targetOrchId}" not found. ` +
2311
+ `Known agents: ${subAgents.map(a => a.orchId).join(", ") || "none"}]`);
2312
+ return;
2313
+ }
2314
+ const cancelReason = result.reason ?? "Cancelled by parent";
2315
+ ctx.traceInfo(`[orch] cancel_agent: sending cancel to ${agentEntry.sessionId} reason="${cancelReason}"`);
2316
+ try {
2317
+ const cmdId = `cancel-${iteration}-${agentEntry.sessionId.slice(0, 8)}`;
2318
+ yield manager.sendCommandToSession(agentEntry.sessionId, { type: "cmd", cmd: "cancel", id: cmdId, args: { reason: cancelReason } });
2319
+ }
2320
+ catch (err) {
2321
+ ctx.traceInfo(`[orch] cancel_agent failed: ${err.message}`);
2322
+ queueFollowup(`[SYSTEM: cancel_agent failed: ${err.message}]`);
2323
+ return;
2324
+ }
2325
+ queueFollowup(`[SYSTEM: Graceful cancellation requested for sub-agent ${targetOrchId}. ` +
2326
+ `Use check_agents or wait_for_agents to observe final termination.${result.reason ? ` Reason: ${result.reason}` : ""}]`);
2327
+ return;
2328
+ }
2329
+ case "delete_agent": {
2330
+ const targetOrchId = result.agentId;
2331
+ const agentEntry = subAgents.find(a => a.orchId === targetOrchId);
2332
+ if (!agentEntry) {
2333
+ ctx.traceInfo(`[orch] delete_agent: unknown agent ${targetOrchId}`);
2334
+ queueFollowup(`[SYSTEM: delete_agent failed — agent "${targetOrchId}" not found. ` +
2335
+ `Known agents: ${subAgents.map(a => a.orchId).join(", ") || "none"}]`);
2336
+ return;
2337
+ }
2338
+ const deleteReason = result.reason ?? "Deleted by parent";
2339
+ ctx.traceInfo(`[orch] delete_agent: deleting ${agentEntry.sessionId} reason="${deleteReason}"`);
2340
+ try {
2341
+ if (isSubAgentTerminalStatus(agentEntry.status)) {
2342
+ yield manager.deleteSession(agentEntry.sessionId, deleteReason);
2343
+ subAgents = subAgents.filter((agent) => agent.orchId !== targetOrchId);
2344
+ queueFollowup(`[SYSTEM: Sub-agent ${targetOrchId} has been deleted.${result.reason ? ` Reason: ${result.reason}` : ""}]`);
2345
+ return;
2346
+ }
2347
+ const cmdId = `delete-${iteration}-${agentEntry.sessionId.slice(0, 8)}`;
2348
+ yield manager.sendCommandToSession(agentEntry.sessionId, { type: "cmd", cmd: "delete", id: cmdId, args: { reason: deleteReason } });
2349
+ }
2350
+ catch (err) {
2351
+ ctx.traceInfo(`[orch] delete_agent failed: ${err.message}`);
2352
+ queueFollowup(`[SYSTEM: delete_agent failed: ${err.message}]`);
2353
+ return;
2354
+ }
2355
+ queueFollowup(`[SYSTEM: Graceful deletion requested for sub-agent ${targetOrchId}. ` +
2356
+ `It will cancel its descendants first and then delete itself.${result.reason ? ` Reason: ${result.reason}` : ""}]`);
2357
+ return;
2358
+ }
2359
+ case "error": {
2360
+ const missingStateIndex = result.message.indexOf(SESSION_STATE_MISSING_PREFIX);
2361
+ if (missingStateIndex >= 0) {
2362
+ const fatalError = result.message.slice(missingStateIndex + SESSION_STATE_MISSING_PREFIX.length).trim();
2363
+ ctx.traceInfo(`[orch] fatal missing session state: ${fatalError}`);
2364
+ publishStatus("failed", { error: fatalError, fatal: true });
2365
+ yield manager.updateCmsState(input.sessionId, "failed", fatalError);
2366
+ throw new Error(fatalError);
2367
+ }
2368
+ retryCount++;
2369
+ ctx.traceInfo(`[orch] turn returned error (attempt ${retryCount}/${MAX_RETRIES}): ${result.message}`);
2370
+ if (isCopilotConnectionClosedError(result.message)) {
2371
+ if (retryCount <= COPILOT_CONNECTION_CLOSED_MAX_RETRIES) {
2372
+ const retryDetail = buildConnectionClosedRetryDetail(retryCount);
2373
+ publishStatus("error", {
2374
+ error: `${result.message} (${retryDetail})`,
2375
+ recoverableTransportLoss: true,
2376
+ });
2377
+ ctx.traceInfo(`[orch] live Copilot connection loss returned as turn error; retrying in ${COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS}s`);
2378
+ if (blobEnabled) {
2379
+ yield* dehydrateForNextTurn("error", true, {
2380
+ detail: retryDetail,
2381
+ error: result.message,
2382
+ phase: "turn.result.error",
2383
+ retryAttempt: retryCount,
2384
+ maxRetries: COPILOT_CONNECTION_CLOSED_MAX_RETRIES,
2385
+ retryDelaySeconds: COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS,
2386
+ });
2387
+ }
2388
+ yield ctx.scheduleTimer(COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS * 1000);
2389
+ yield* versionedContinueAsNew(continueInput({
2390
+ prompt: sourcePrompt,
2391
+ retryCount,
2392
+ needsHydration: blobEnabled ? true : needsHydration,
2393
+ }));
2394
+ return;
2395
+ }
2396
+ const handoffMessage = buildLossyHandoffSummary(result.message);
2397
+ ctx.traceInfo(`[orch] ${handoffMessage}`);
2398
+ publishStatus("error", {
2399
+ error: handoffMessage,
2400
+ retriesExhausted: true,
2401
+ lossyHandoff: true,
2402
+ });
2403
+ yield manager.recordSessionEvent(input.sessionId, [{
2404
+ eventType: "session.lossy_handoff",
2405
+ data: {
2406
+ message: handoffMessage,
2407
+ error: result.message,
2408
+ phase: "turn.result.error",
2409
+ retries: COPILOT_CONNECTION_CLOSED_MAX_RETRIES,
2410
+ retryDelaySeconds: COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS,
2411
+ nextStep: "dehydrate_and_resume_on_new_worker",
2412
+ },
2413
+ }]);
2414
+ if (blobEnabled) {
2415
+ yield* dehydrateForNextTurn("lossy_handoff", true, {
2416
+ detail: handoffMessage,
2417
+ error: result.message,
2418
+ phase: "turn.result.error",
2419
+ retries: COPILOT_CONNECTION_CLOSED_MAX_RETRIES,
2420
+ retryDelaySeconds: COPILOT_CONNECTION_CLOSED_RETRY_DELAY_SECONDS,
2421
+ nextStep: "dehydrate_and_resume_on_new_worker",
2422
+ });
2423
+ yield* versionedContinueAsNew(continueInput({
2424
+ prompt: sourcePrompt,
2425
+ retryCount: 0,
2426
+ needsHydration: true,
2427
+ rehydrationMessage: buildLossyHandoffRehydrationMessage(result.message),
2428
+ }));
2429
+ return;
2430
+ }
2431
+ publishStatus("error", {
2432
+ error: `${handoffMessage} Durable handoff is unavailable because blob persistence is disabled.`,
2433
+ retriesExhausted: true,
2434
+ lossyHandoff: false,
2435
+ });
2436
+ retryCount = 0;
2437
+ return;
2438
+ }
2439
+ if (retryCount >= MAX_RETRIES) {
2440
+ ctx.traceInfo(`[orch] max retries exhausted for turn error, waiting for user input`);
2441
+ publishStatus("error", {
2442
+ error: `Failed after ${MAX_RETRIES} attempts: ${result.message}`,
2443
+ retriesExhausted: true,
2444
+ });
2445
+ retryCount = 0;
2446
+ return;
2447
+ }
2448
+ const errorRetryDelay = 15 * Math.pow(2, retryCount - 1);
2449
+ publishStatus("error", {
2450
+ error: `${result.message} (retry ${retryCount}/${MAX_RETRIES} in ${errorRetryDelay}s)`,
2451
+ });
2452
+ ctx.traceInfo(`[orch] retrying in ${errorRetryDelay}s after turn error`);
2453
+ if (blobEnabled) {
2454
+ yield* dehydrateForNextTurn("error", true, {
2455
+ detail: result.message,
2456
+ error: result.message,
2457
+ phase: "turn.result.error",
2458
+ retryAttempt: retryCount,
2459
+ maxRetries: MAX_RETRIES,
2460
+ retryDelaySeconds: errorRetryDelay,
2461
+ });
2462
+ }
2463
+ yield ctx.scheduleTimer(errorRetryDelay * 1000);
2464
+ yield* versionedContinueAsNew(continueInput({
2465
+ prompt: sourcePrompt,
2466
+ retryCount,
2467
+ needsHydration: blobEnabled ? true : needsHydration,
2468
+ }));
2469
+ return;
2470
+ }
2471
+ }
2472
+ }
2473
+ // ═══════════════════════════════════════════════════════════
2474
+ // ═══ PROCESS TIMER — handles fired timers by type ════════
2475
+ // ═══════════════════════════════════════════════════════════
2476
+ function* processTimer(timerItem) {
2477
+ const timer = timerItem.timer;
2478
+ switch (timer.type) {
2479
+ case "wait": {
2480
+ const seconds = Math.round(timer.originalDurationMs / 1000);
2481
+ yield manager.recordSessionEvent(input.sessionId, [{
2482
+ eventType: "session.wait_completed",
2483
+ data: { seconds },
2484
+ }]);
2485
+ const timerPrompt = `The ${seconds} second wait is now complete. Continue with your task.`;
2486
+ const resumeSystemPrompt = [
2487
+ timer.reason ? `Wait reason: "${timer.reason}".` : undefined,
2488
+ taskContext ? `Original user request: "${taskContext}".` : undefined,
2489
+ "Resume the interrupted task now.",
2490
+ "Do not treat this as a new unrelated user request.",
2491
+ "Do not call wait() again for the delay that already finished.",
2492
+ ].filter(Boolean).join(" ");
2493
+ yield* processPrompt(appendSystemContext(timerPrompt, resumeSystemPrompt) ?? timerPrompt, false);
2494
+ return;
2495
+ }
2496
+ case "cron": {
2497
+ yield manager.recordSessionEvent(input.sessionId, [{
2498
+ eventType: "session.cron_fired",
2499
+ data: {},
2500
+ }]);
2501
+ const activeCron = cronSchedule;
2502
+ const cronPrompt = `[SYSTEM: Scheduled cron wake-up for: "${activeCron.reason}". Resume your recurring task.]`;
2503
+ const shouldRehydrate = timer.shouldRehydrate;
2504
+ if (shouldRehydrate) {
2505
+ yield* processPrompt(wrapWithResumeContext("Resume your recurring task.", `Scheduled cron wake-up for: "${activeCron.reason}".`), true);
2506
+ }
2507
+ else {
2508
+ yield* processPrompt(cronPrompt, true);
2509
+ }
2510
+ return;
2511
+ }
2512
+ case "idle": {
2513
+ ctx.traceInfo("[session] idle timeout, dehydrating");
2514
+ yield* dehydrateForNextTurn("idle");
2515
+ // No LLM turn — main loop will CAN
2516
+ return;
2517
+ }
2518
+ case "agent-poll": {
2519
+ // Fallback poll — check agent statuses via SDK
2520
+ if (waitingForAgentIds) {
2521
+ const stillRunning = waitingForAgentIds.filter(id => {
2522
+ const agent = subAgents.find(a => a.orchId === id);
2523
+ return agent && !isSubAgentTerminalStatus(agent.status);
2524
+ });
2525
+ ctx.traceInfo(`[orch] wait_for_agents: fallback poll, checking ${stillRunning.length} agents`);
2526
+ for (const targetId of stillRunning) {
2527
+ const agent = subAgents.find(a => a.orchId === targetId);
2528
+ if (!agent || isSubAgentTerminalStatus(agent.status))
2529
+ continue;
2530
+ try {
2531
+ const rawStatus = yield manager.getSessionStatus(agent.sessionId);
2532
+ const parsed = JSON.parse(rawStatus);
2533
+ if (parsed.status === "failed") {
2534
+ agent.status = "failed";
2535
+ }
2536
+ else if (parsed.status === "completed") {
2537
+ agent.status = "completed";
2538
+ }
2539
+ else if (parsed.status === "cancelled") {
2540
+ agent.status = "cancelled";
2541
+ }
2542
+ else if (parsed.status === "waiting") {
2543
+ agent.status = "waiting";
2544
+ }
2545
+ if (parsed.result) {
2546
+ agent.result = parsed.result.slice(0, 2000);
2547
+ }
2548
+ }
2549
+ catch { }
2550
+ }
2551
+ if (yield* maybeResolveAgentWaitCompletion()) {
2552
+ return;
2553
+ }
2554
+ const nowRunning = getStillRunningAgentIds(waitingForAgentIds);
2555
+ if (pendingShutdown) {
2556
+ const now = yield ctx.utcNow();
2557
+ if (now >= pendingShutdown.deadlineAtMs) {
2558
+ const timeoutMessage = `Graceful ${pendingShutdown.mode} timed out after ${Math.round(SHUTDOWN_TIMEOUT_MS / 1000)}s ` +
2559
+ `waiting for ${nowRunning.length} child session(s): ${nowRunning.join(", ") || "unknown"}`;
2560
+ yield* failPendingShutdown(timeoutMessage);
2561
+ return;
2562
+ }
2563
+ const remainingMs = Math.max(0, pendingShutdown.deadlineAtMs - now);
2564
+ const nextPollMs = Math.min(SHUTDOWN_POLL_INTERVAL_MS, remainingMs);
2565
+ activeTimer = {
2566
+ deadlineMs: now + nextPollMs,
2567
+ originalDurationMs: nextPollMs,
2568
+ reason: buildShutdownWaitReason(pendingShutdown),
2569
+ type: "agent-poll",
2570
+ agentIds: waitingForAgentIds,
2571
+ };
2572
+ publishStatus("waiting", {
2573
+ waitReason: buildShutdownWaitReason(pendingShutdown),
2574
+ waitStartedAt: pendingShutdown.startedAtMs,
2575
+ waitSeconds: Math.ceil(remainingMs / 1000),
2576
+ });
2577
+ }
2578
+ else {
2579
+ // Re-arm poll timer
2580
+ const now = yield ctx.utcNow();
2581
+ activeTimer = {
2582
+ deadlineMs: now + 30_000,
2583
+ originalDurationMs: 30_000,
2584
+ reason: `waiting for ${nowRunning.length} agent(s)`,
2585
+ type: "agent-poll",
2586
+ agentIds: waitingForAgentIds,
2587
+ };
2588
+ }
2589
+ }
2590
+ return;
2591
+ }
2592
+ case "input-grace": {
2593
+ // Grace period expired — dehydrate and wait for answer
2594
+ yield* dehydrateForNextTurn("input_required");
2595
+ // No timer — drain will block on dequeue for the answer
2596
+ return;
2597
+ }
2598
+ }
2599
+ }
2600
+ // ═══════════════════════════════════════════════════════════
2601
+ // ═══ PROCESS ANSWER — format answer and run turn ═════════
2602
+ // ═══════════════════════════════════════════════════════════
2603
+ function* processAnswer(answerItem) {
2604
+ const question = pendingInputQuestion?.question ?? "a question";
2605
+ pendingInputQuestion = null;
2606
+ const answerPrompt = `The user was asked: "${question}"\nThe user responded: "${answerItem.answer}"`;
2607
+ yield* processPrompt(answerPrompt, false);
2608
+ }
2609
+ // ═══════════════════════════════════════════════════════════
2610
+ // ═══ DECIDE — pop + process one item from FIFO ══════════
2611
+ // ═══════════════════════════════════════════════════════════
2612
+ function* decide() {
2613
+ // Priority 1: pending tool actions (in-memory)
2614
+ drainLeadingQueuedCronActions();
2615
+ if (pendingToolActions.length > 0) {
2616
+ const action = pendingToolActions.shift();
2617
+ ctx.traceInfo(`[orch] replaying queued action: ${action.type} remaining=${pendingToolActions.length}`);
2618
+ yield* handleTurnResult(action, "");
2619
+ return true;
2620
+ }
2621
+ // Priority 2: pending prompt from tool action followups or CAN carry-forward
2622
+ // Hold while waiting for agents — let confirmations accumulate and merge
2623
+ // with the agents-done summary for a single combined LLM turn.
2624
+ if (pendingPrompt && !waitingForAgentIds) {
2625
+ const prompt = pendingPrompt;
2626
+ const isBootstrap = bootstrapPrompt;
2627
+ const requiredTool = pendingRequiredTool;
2628
+ pendingPrompt = undefined;
2629
+ bootstrapPrompt = false;
2630
+ pendingRequiredTool = undefined;
2631
+ yield* processPrompt(prompt, isBootstrap, requiredTool);
2632
+ return true;
2633
+ }
2634
+ // Priority 3: FIFO — next item in arrival order
2635
+ const item = popFifoItem();
2636
+ if (item) {
2637
+ switch (item.kind) {
2638
+ case "prompt":
2639
+ yield* processPrompt(item.prompt, item.bootstrap ?? false, item.requiredTool);
2640
+ break;
2641
+ case "answer":
2642
+ yield* processAnswer(item);
2643
+ break;
2644
+ case "timer":
2645
+ yield* processTimer(item);
2646
+ break;
2647
+ case "agents-done":
2648
+ queueFollowup(item.summary);
2649
+ break;
2650
+ default:
2651
+ ctx.traceInfo(`[decide] unknown FIFO item kind: ${item.kind}`);
2652
+ }
2653
+ return true;
2654
+ }
2655
+ // Priority 4: buffered child digest — only after user/FIFO work is drained
2656
+ if (pendingChildDigest?.ready && pendingChildDigest.updates.length > 0 && !waitingForAgentIds) {
2657
+ yield* processPendingChildDigest();
2658
+ return true;
2659
+ }
2660
+ return false;
2661
+ }
2662
+ // ═══════════════════════════════════════════════════════════
2663
+ // ═══ FLAT MAIN LOOP ══════════════════════════════════════
2664
+ // ═══════════════════════════════════════════════════════════
2665
+ let loopIteration = 0;
2666
+ while (true) {
2667
+ loopIteration++;
2668
+ // Safety valve: CAN if too many iterations in this execution
2669
+ if (loopIteration > MAX_ITERATIONS_PER_EXECUTION) {
2670
+ ctx.traceInfo(`[orch] iteration cap (${MAX_ITERATIONS_PER_EXECUTION}) — continuing as new`);
2671
+ yield* versionedContinueAsNew(continueInput());
2672
+ return "";
2673
+ }
2674
+ if (loopIteration % HISTORY_SIZE_CHECK_INTERVAL_ITERATIONS === 0) {
2675
+ try {
2676
+ const stats = yield manager.getOrchestrationStats(input.sessionId);
2677
+ const historySizeBytes = Number(stats?.historySizeBytes) || 0;
2678
+ if (historySizeBytes >= MAX_HISTORY_SIZE_BEFORE_CONTINUE_AS_NEW_BYTES) {
2679
+ ctx.traceInfo(`[orch] history size cap (${historySizeBytes} >= ${MAX_HISTORY_SIZE_BEFORE_CONTINUE_AS_NEW_BYTES}) ` +
2680
+ `at loop ${loopIteration} — continuing as new`);
2681
+ yield* versionedContinueAsNew(continueInput());
2682
+ return "";
2683
+ }
2684
+ }
2685
+ catch (err) {
2686
+ ctx.traceInfo(`[orch] history size check failed at loop ${loopIteration}: ${err?.message ?? err}`);
2687
+ }
2688
+ }
2689
+ // DRAIN: greedily move queue events + timer fires into KV FIFO
2690
+ yield* drain();
2691
+ if (orchestrationResult !== null)
2692
+ return orchestrationResult;
2693
+ // DECIDE: pop + process one item from FIFO in arrival order
2694
+ const didWork = yield* decide();
2695
+ if (orchestrationResult !== null)
2696
+ return orchestrationResult;
2697
+ if (didWork)
2698
+ continue;
2699
+ // No buffered work — check if we should wait or CAN
2700
+ if (activeTimer)
2701
+ continue; // drain will race the timer next iteration
2702
+ if (pendingInputQuestion)
2703
+ continue; // drain will block on dequeue for answer
2704
+ // Truly nothing to do — CAN (safe checkpoint)
2705
+ ctx.traceInfo(`[orch] no buffered work, continuing as new`);
2706
+ yield* versionedContinueAsNew(continueInput());
2707
+ return "";
2708
+ }
2709
+ }
2710
+ //# sourceMappingURL=orchestration_1_0_44.js.map