@flowdesk/opencode-plugin 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +1 -1
  2. package/dist/agent-task-output.d.ts +29 -0
  3. package/dist/agent-task-output.d.ts.map +1 -0
  4. package/dist/agent-task-output.js +225 -0
  5. package/dist/agent-task-output.js.map +1 -0
  6. package/dist/agent-task-runner.d.ts +34 -0
  7. package/dist/agent-task-runner.d.ts.map +1 -1
  8. package/dist/agent-task-runner.js +634 -84
  9. package/dist/agent-task-runner.js.map +1 -1
  10. package/dist/auto-continue-preview-tool.d.ts +36 -0
  11. package/dist/auto-continue-preview-tool.d.ts.map +1 -0
  12. package/dist/auto-continue-preview-tool.js +119 -0
  13. package/dist/auto-continue-preview-tool.js.map +1 -0
  14. package/dist/completion-ui-cache.d.ts +6 -0
  15. package/dist/completion-ui-cache.d.ts.map +1 -0
  16. package/dist/completion-ui-cache.js +390 -0
  17. package/dist/completion-ui-cache.js.map +1 -0
  18. package/dist/event-hook-observer.d.ts +14 -0
  19. package/dist/event-hook-observer.d.ts.map +1 -0
  20. package/dist/event-hook-observer.js +257 -0
  21. package/dist/event-hook-observer.js.map +1 -0
  22. package/dist/managed-dispatch-adapter.d.ts +62 -0
  23. package/dist/managed-dispatch-adapter.d.ts.map +1 -1
  24. package/dist/managed-dispatch-adapter.js +472 -4
  25. package/dist/managed-dispatch-adapter.js.map +1 -1
  26. package/dist/model-selection-engine.d.ts +60 -0
  27. package/dist/model-selection-engine.d.ts.map +1 -0
  28. package/dist/model-selection-engine.js +242 -0
  29. package/dist/model-selection-engine.js.map +1 -0
  30. package/dist/provider-usage-live-tool.d.ts +10 -0
  31. package/dist/provider-usage-live-tool.d.ts.map +1 -1
  32. package/dist/provider-usage-live-tool.js +262 -33
  33. package/dist/provider-usage-live-tool.js.map +1 -1
  34. package/dist/server.d.ts +36 -1
  35. package/dist/server.d.ts.map +1 -1
  36. package/dist/server.js +497 -20
  37. package/dist/server.js.map +1 -1
  38. package/dist/stall-recovery.d.ts +34 -0
  39. package/dist/stall-recovery.d.ts.map +1 -1
  40. package/dist/stall-recovery.js +680 -3
  41. package/dist/stall-recovery.js.map +1 -1
  42. package/dist/status-live-tool.d.ts +54 -0
  43. package/dist/status-live-tool.d.ts.map +1 -1
  44. package/dist/status-live-tool.js +449 -44
  45. package/dist/status-live-tool.js.map +1 -1
  46. package/dist/tui-subtask-activity.d.ts +73 -0
  47. package/dist/tui-subtask-activity.d.ts.map +1 -0
  48. package/dist/tui-subtask-activity.js +271 -0
  49. package/dist/tui-subtask-activity.js.map +1 -0
  50. package/dist/tui-usage-snapshot.d.ts +14 -0
  51. package/dist/tui-usage-snapshot.d.ts.map +1 -1
  52. package/dist/tui-usage-snapshot.js +275 -8
  53. package/dist/tui-usage-snapshot.js.map +1 -1
  54. package/dist/tui.d.ts.map +1 -1
  55. package/dist/tui.js +102 -44
  56. package/dist/tui.js.map +1 -1
  57. package/dist/workflow-assign-tool.d.ts +23 -0
  58. package/dist/workflow-assign-tool.d.ts.map +1 -0
  59. package/dist/workflow-assign-tool.js +135 -0
  60. package/dist/workflow-assign-tool.js.map +1 -0
  61. package/dist/workflow-author-tool.d.ts +29 -0
  62. package/dist/workflow-author-tool.d.ts.map +1 -0
  63. package/dist/workflow-author-tool.js +227 -0
  64. package/dist/workflow-author-tool.js.map +1 -0
  65. package/dist/workflow-dispatch-tool.d.ts +12 -0
  66. package/dist/workflow-dispatch-tool.d.ts.map +1 -1
  67. package/dist/workflow-dispatch-tool.js +31 -3
  68. package/dist/workflow-dispatch-tool.js.map +1 -1
  69. package/dist/workflow-orchestrator.d.ts +31 -0
  70. package/dist/workflow-orchestrator.d.ts.map +1 -0
  71. package/dist/workflow-orchestrator.js +160 -0
  72. package/dist/workflow-orchestrator.js.map +1 -0
  73. package/dist/workflow-scheduler.d.ts.map +1 -1
  74. package/dist/workflow-scheduler.js +3 -1
  75. package/dist/workflow-scheduler.js.map +1 -1
  76. package/dist/workflow-synthesis-tool.d.ts +31 -0
  77. package/dist/workflow-synthesis-tool.d.ts.map +1 -0
  78. package/dist/workflow-synthesis-tool.js +194 -0
  79. package/dist/workflow-synthesis-tool.js.map +1 -0
  80. package/package.json +2 -2
@@ -1,9 +1,25 @@
1
1
  import { createHash } from "node:crypto";
2
- import { applyFlowDeskSessionEvidenceWriteIntentsV1, prepareFlowDeskSessionEvidenceWriteIntentV1, } from "@flowdesk/core";
2
+ import { applyFlowDeskSessionEvidenceWriteIntentsV1, prepareFlowDeskSessionEvidenceWriteIntentV1, reloadFlowDeskSessionEvidenceV1, validateTopTierReviewVerdictV1, } from "@flowdesk/core";
3
3
  import { launchFlowDeskInjectedSdkRuntimeLaneFromPlanV1, materializeFlowDeskRuntimeLaneLaunchLifecycleEvidenceV1, } from "./managed-dispatch-adapter.js";
4
+ import { observeFlowDeskAgentTaskOutputV1 } from "./agent-task-output.js";
5
+ import { refreshFlowDeskCompletionUiCachesV1 } from "./completion-ui-cache.js";
4
6
  import { recordFlowDeskLaneHeartbeatV1 } from "./lane-heartbeat-writer.js";
5
7
  const TASK_RESULT_MAX_TEXT = 32_768;
6
8
  const AGENT_TASK_CONTEXT_MAX_PROMPT_TEXT = 32_768;
9
+ const INVALID_PARENT_SESSION_REF = "ses-invalid-parent-session-binding";
10
+ /** Schema version for async child session tracking evidence */
11
+ export const AGENT_TASK_CHILD_SESSION_SCHEMA_VERSION = "flowdesk.agent_task_child_session.v1";
12
+ /** Stable-idle finalization thresholds for non-terminal captured text. */
13
+ const STABLE_IDLE_MIN_CYCLES = 3;
14
+ const STABLE_IDLE_MIN_MS = 12_000;
15
+ const STABLE_IDLE_MIN_LEN = 16;
16
+ export function sanitizeFlowDeskTaskResultTextV1(text) {
17
+ return {
18
+ text: text.length > TASK_RESULT_MAX_TEXT ? text.slice(0, TASK_RESULT_MAX_TEXT) : text,
19
+ changed: false,
20
+ truncated: text.length > TASK_RESULT_MAX_TEXT,
21
+ };
22
+ }
7
23
  function agentTaskLaunchPlan(input) {
8
24
  return {
9
25
  schema_version: "flowdesk.runtime_lane_launch_plan.v1",
@@ -32,66 +48,194 @@ function agentTaskLaunchPlan(input) {
32
48
  runtimeExecution: false,
33
49
  };
34
50
  }
35
- function extractAssistantTextFromResponse(client, childSessionId) {
36
- // We extract response text via messages API
51
+ function validateAgentTaskParentSessionId(parentSessionId) {
52
+ const value = parentSessionId.trim();
53
+ if (value.length === 0)
54
+ return { ok: false, redactedReason: "missing_parent_session_binding", parentSessionRef: INVALID_PARENT_SESSION_REF };
55
+ if (value.length > 128)
56
+ return { ok: false, redactedReason: "invalid_parent_session_binding", parentSessionRef: INVALID_PARENT_SESSION_REF };
57
+ // `ses-...` is FlowDesk's opaque session-ref wrapper, not the raw OpenCode
58
+ // session id expected by SDK `session.create({ parentID })`. Accepting it here
59
+ // causes evidence such as `ses-ses-flowdesk-coordinator` and can make the SDK
60
+ // wait on a non-existent synthetic parent session until launch timeout.
61
+ if (value.startsWith("ses-"))
62
+ return { ok: false, redactedReason: "invalid_parent_session_binding", parentSessionRef: INVALID_PARENT_SESSION_REF };
63
+ if (/\s/.test(value))
64
+ return { ok: false, redactedReason: "invalid_parent_session_binding", parentSessionRef: INVALID_PARENT_SESSION_REF };
65
+ if (!/^[A-Za-z0-9_.:-]+$/.test(value))
66
+ return { ok: false, redactedReason: "invalid_parent_session_binding", parentSessionRef: INVALID_PARENT_SESSION_REF };
67
+ return { ok: true, parentSessionRef: `ses-${value}` };
68
+ }
69
+ /** Bounded nudge text — versioned constant, never echoes user input */
70
+ const AGENT_TASK_NUDGE_TEXT = "Please provide your final answer now. If you have completed your analysis, output your complete response.";
71
+ /**
72
+ * Polls `session.messages` with a per-call 3-second cap so it works whether the SDK
73
+ * uses snapshot (returns immediately) or long-poll (blocks until output) semantics.
74
+ *
75
+ * Heartbeat: fires every `quietPeriodMs` of silence — only when inactive.
76
+ * Nudge: after `quietPeriodMs` of silence, sends a bounded prompt to the child
77
+ * session asking for the final answer. Max `maxNudges` nudges total.
78
+ * After exhausting nudges with no response, returns undefined.
79
+ */
80
+ async function extractAssistantTextFromResponse(client, childSessionId, opts) {
37
81
  const messages = client.session.messages;
38
82
  if (messages === undefined)
39
83
  return undefined;
40
- return (async () => {
41
- try {
42
- const method = messages;
84
+ const quietPeriodMs = opts?.quietPeriodMs ?? 10_000;
85
+ const maxNudges = opts?.maxNudges ?? 2;
86
+ const MESSAGES_TIMEOUT_MS = opts?.messagesTimeoutMs ?? 3_000; // per-call cap — handles both snapshot and long-poll
87
+ const method = messages;
88
+ /**
89
+ * Call session.messages with a ceiling timeout so we can check inactivity periodically.
90
+ * This handles both snapshot APIs (return immediately) and long-poll APIs
91
+ * (block until LLM produces output). With the timeout, a long-poll call that
92
+ * hasn't returned after MESSAGES_TIMEOUT_MS resolves as null so we can
93
+ * check the inactivity clock and possibly send a nudge.
94
+ */
95
+ const callMessages = () => {
96
+ const messagePromise = (async () => {
43
97
  const current = await method.call(client.session, { sessionID: childSessionId });
44
- const response = isSdkErrorResponse(current)
45
- ? await method.call(client.session, { path: { id: childSessionId } })
46
- : current;
47
- const data = asResponseData(response);
48
- const record = asRecord(data);
49
- const items = Array.isArray(data)
50
- ? data
51
- : Array.isArray(record?.items)
52
- ? record.items
53
- : Array.isArray(record?.messages)
54
- ? record.messages
55
- : [];
56
- for (let index = items.length - 1; index >= 0; index -= 1) {
57
- const message = items[index];
58
- const record = asRecord(message);
59
- const info = asRecord(record?.info) ?? record;
60
- if (info?.role !== "assistant")
61
- continue;
62
- const parts = Array.isArray(record?.parts)
63
- ? record.parts
64
- : Array.isArray(info?.parts)
65
- ? info.parts
66
- : [];
67
- for (const part of parts) {
68
- const partRecord = asRecord(part);
69
- const text = typeof partRecord?.text === "string"
70
- ? partRecord.text
71
- : typeof partRecord?.content === "string"
72
- ? partRecord.content
73
- : undefined;
74
- if (typeof text === "string" && text.trim().length > 0)
75
- return text;
76
- }
77
- }
78
- return undefined;
98
+ if (isSdkErrorResponse(current))
99
+ return method.call(client.session, { path: { id: childSessionId } });
100
+ return current;
101
+ })();
102
+ // Only race against timeout when the API might block (MESSAGES_TIMEOUT_MS > 0)
103
+ if (MESSAGES_TIMEOUT_MS <= 0)
104
+ return messagePromise;
105
+ return Promise.race([
106
+ messagePromise,
107
+ new Promise(resolve => setTimeout(() => resolve(null), MESSAGES_TIMEOUT_MS)),
108
+ ]);
109
+ };
110
+ /** Send a nudge to the child session with a hard timeout to prevent blocking.
111
+ * Uses noReply: true so the child does not generate a spurious second assistant turn.
112
+ */
113
+ const sendNudge = async () => {
114
+ const promptFn = client.session.prompt ?? client.session.promptAsync;
115
+ if (promptFn === undefined)
116
+ return "skipped";
117
+ const NUDGE_TIMEOUT_MS = 5_000;
118
+ try {
119
+ await Promise.race([
120
+ promptFn.call(client.session, {
121
+ sessionID: childSessionId,
122
+ noReply: true,
123
+ ...(opts?.runtimeModel !== undefined ? { model: opts.runtimeModel } : {}),
124
+ ...(opts?.agentName !== undefined ? { agent: opts.agentName } : {}),
125
+ parts: [{ type: "text", text: AGENT_TASK_NUDGE_TEXT }],
126
+ }),
127
+ new Promise((_, reject) => setTimeout(() => reject(new Error("nudge timeout")), NUDGE_TIMEOUT_MS)),
128
+ ]);
129
+ return "sent";
79
130
  }
80
131
  catch {
81
- return undefined;
132
+ return "timeout";
82
133
  }
83
- })();
84
- }
85
- function isProcessOnlyAssistantOutput(text) {
86
- const normalized = text.trim().toLowerCase();
87
- return normalized.length === 0 || [
88
- "working",
89
- "thinking",
90
- "i'll take a look",
91
- "i will take a look",
92
- "let me inspect",
93
- "i'm going to inspect",
94
- ].some((fragment) => normalized.includes(fragment));
134
+ };
135
+ const observe = (response) => {
136
+ if (response === null)
137
+ return undefined; // timed-out poll cycle
138
+ return observeFlowDeskAgentTaskOutputV1(response);
139
+ };
140
+ const startMs = Date.now();
141
+ let lastActivityMs = startMs;
142
+ let lastSignature = "";
143
+ let lastHeartbeatMs = startMs;
144
+ let nudgeCount = 0;
145
+ let latestCandidate;
146
+ // Stable-idle tracking: capture non-terminal text once it has settled, so a
147
+ // good answer is not lost just because the SDK shape never surfaced an
148
+ // explicit terminal/finish marker.
149
+ let stableText;
150
+ let stableCount = 0;
151
+ let firstStableMs = 0;
152
+ try {
153
+ while (true) {
154
+ const response = await callMessages();
155
+ const nowMs = Date.now();
156
+ // Build signature (null response = timeout, no change)
157
+ const sig = response === null ? lastSignature : (() => {
158
+ const data = asResponseData(response);
159
+ const record = asRecord(data);
160
+ const items = Array.isArray(data) ? data
161
+ : Array.isArray(record?.items) ? record.items
162
+ : Array.isArray(record?.messages) ? record.messages : [];
163
+ const observed = observe(response);
164
+ return `${items.length}:${observed?.latestText?.length ?? 0}:${observed?.terminalObserved === true ? "terminal" : "open"}`;
165
+ })();
166
+ if (sig !== lastSignature) {
167
+ // New activity — reset all inactivity clocks
168
+ lastSignature = sig;
169
+ lastActivityMs = nowMs;
170
+ lastHeartbeatMs = nowMs;
171
+ }
172
+ const observed = observe(response);
173
+ if (observed?.latestText !== undefined && observed.latestText.trim().length > 0) {
174
+ latestCandidate = observed;
175
+ // Track text stability for idle finalization. Active tool runs reset
176
+ // stability so we never finalize mid tool-call.
177
+ if (observed.hasRunningTool) {
178
+ stableText = undefined;
179
+ stableCount = 0;
180
+ }
181
+ else if (observed.latestText === stableText) {
182
+ stableCount++;
183
+ }
184
+ else {
185
+ stableText = observed.latestText;
186
+ stableCount = 1;
187
+ firstStableMs = nowMs;
188
+ }
189
+ }
190
+ if (observed?.terminalObserved === true && observed.latestText !== undefined && observed.latestText.trim().length > 0) {
191
+ return { text: observed.latestText, completionStatus: "final", outputKind: observed.outputKind, usableForSynthesis: observed.usableForSynthesis, finalizationReason: "terminal_marker", looksLikeRefusalOrError: observed.looksLikeRefusalOrError };
192
+ }
193
+ // Stable-idle: non-terminal text that has been unchanged across several
194
+ // poll cycles and a minimum interval is treated as captured (not a
195
+ // semantic success claim — completion_status stays "final" but the
196
+ // finalization_reason records that this was idle-based capture).
197
+ if (latestCandidate?.latestText !== undefined &&
198
+ stableText !== undefined &&
199
+ stableText.trim().length >= STABLE_IDLE_MIN_LEN &&
200
+ stableCount >= STABLE_IDLE_MIN_CYCLES &&
201
+ nowMs - firstStableMs >= STABLE_IDLE_MIN_MS) {
202
+ return { text: latestCandidate.latestText, completionStatus: "final", outputKind: latestCandidate.outputKind, usableForSynthesis: latestCandidate.usableForSynthesis, finalizationReason: "stable_idle", looksLikeRefusalOrError: latestCandidate.looksLikeRefusalOrError };
203
+ }
204
+ const silenceMs = nowMs - lastActivityMs;
205
+ if (silenceMs >= quietPeriodMs) {
206
+ // Emit heartbeat on first quiet-period expiry of each silence window
207
+ if (nowMs - lastHeartbeatMs >= quietPeriodMs) {
208
+ lastHeartbeatMs = nowMs;
209
+ opts?.heartbeatFn?.(nowMs - startMs);
210
+ }
211
+ // Send nudge after quiet period
212
+ if (nudgeCount < maxNudges) {
213
+ nudgeCount++;
214
+ await sendNudge();
215
+ // Reset activity clock after nudge — give a fresh quiet window
216
+ lastActivityMs = Date.now();
217
+ lastHeartbeatMs = lastActivityMs;
218
+ }
219
+ else {
220
+ // Exhausted all nudges. Preserve usable candidate text as partial output.
221
+ if (latestCandidate?.latestText !== undefined && latestCandidate.latestText.trim().length > 0) {
222
+ return { text: latestCandidate.latestText, completionStatus: "partial", outputKind: latestCandidate.outputKind, usableForSynthesis: latestCandidate.usableForSynthesis, finalizationReason: "nudge_exhausted_partial", looksLikeRefusalOrError: latestCandidate.looksLikeRefusalOrError };
223
+ }
224
+ return undefined;
225
+ }
226
+ }
227
+ else {
228
+ // No activity and not yet at quiet period — yield to event loop before next poll.
229
+ // Sleep for up to 1s or quietPeriodMs/10, whichever is smaller, to avoid tight loops
230
+ // while still being responsive when messages arrive quickly (snapshot mode).
231
+ const yieldMs = Math.max(10, Math.min(1_000, Math.floor(quietPeriodMs / 10)));
232
+ await new Promise(resolve => setTimeout(resolve, yieldMs));
233
+ }
234
+ }
235
+ }
236
+ catch {
237
+ return undefined;
238
+ }
95
239
  }
96
240
  function asRecord(value) {
97
241
  return typeof value === "object" && value !== null && !Array.isArray(value)
@@ -117,11 +261,42 @@ function writeSessionEvidence(input) {
117
261
  record: input.record,
118
262
  });
119
263
  if (prepared.ok && prepared.writeIntent !== undefined) {
120
- applyFlowDeskSessionEvidenceWriteIntentsV1(input.rootDir, [prepared.writeIntent]);
264
+ const applied = applyFlowDeskSessionEvidenceWriteIntentsV1(input.rootDir, [prepared.writeIntent]);
265
+ return applied.ok && applied.writtenPaths.length > 0;
121
266
  }
267
+ return false;
268
+ }
269
+ function progressLabel(value) {
270
+ const compact = value.replace(/\s+/g, " ").trim();
271
+ return compact.length > 120 ? `${compact.slice(0, 119)}…` : compact;
272
+ }
273
+ function writeAgentTaskProgress(input) {
274
+ const observedAt = input.observedAt ?? new Date().toISOString();
275
+ const record = {
276
+ schema_version: "flowdesk.agent_task_progress.v1",
277
+ workflow_id: input.workflowId,
278
+ lane_id: input.laneId,
279
+ task_id: input.taskId,
280
+ agent_ref: input.agentRef,
281
+ provider_qualified_model_id: input.providerQualifiedModelId,
282
+ progress_seq: input.progressSeq,
283
+ observed_at: observedAt,
284
+ phase: input.phase,
285
+ progress_label: progressLabel(input.progressLabel),
286
+ progress_ref: `progress-${input.laneId}-${input.progressSeq}`,
287
+ redaction_version: "v1",
288
+ dispatch_authority_enabled: false,
289
+ };
290
+ writeSessionEvidence({
291
+ rootDir: input.rootDir,
292
+ workflowId: input.workflowId,
293
+ evidenceId: `agent-task-progress-${input.laneId}-${input.progressSeq}`,
294
+ record: record,
295
+ });
122
296
  }
123
297
  function writeAgentTaskTerminalLifecycle(input) {
124
298
  const childSessionRef = input.childSessionRef === input.parentSessionRef ? undefined : input.childSessionRef;
299
+ const messageRef = input.messageRef ?? (input.state === "complete" ? `msg-${input.laneId}` : undefined);
125
300
  const record = {
126
301
  schema_version: "flowdesk.lane_lifecycle_record.v1",
127
302
  lane_id: input.laneId,
@@ -129,11 +304,13 @@ function writeAgentTaskTerminalLifecycle(input) {
129
304
  attempt_id: input.attemptId,
130
305
  parent_session_ref: input.parentSessionRef,
131
306
  ...(childSessionRef === undefined ? {} : { child_session_ref: childSessionRef }),
132
- ...(input.messageRef === undefined ? {} : { message_ref: input.messageRef }),
307
+ ...(messageRef === undefined ? {} : { message_ref: messageRef }),
133
308
  agent_ref: input.agentRef,
134
309
  provider_qualified_model_id: input.providerQualifiedModelId,
135
310
  state: input.state,
311
+ ...(input.verdictRef === undefined ? {} : { verdict_ref: input.verdictRef }),
136
312
  ...(input.outputRef === undefined ? {} : { output_ref: input.outputRef }),
313
+ ...(input.state === "complete" ? { runtime_echo_ref: `runtime-echo-${input.laneId}`, telemetry_ref: `telemetry-${input.laneId}` } : {}),
137
314
  timeout_ms: input.timeoutMs ?? 0,
138
315
  orphan_max_age_ms: 0,
139
316
  retry_count: 0,
@@ -151,9 +328,113 @@ function writeAgentTaskTerminalLifecycle(input) {
151
328
  record: record,
152
329
  });
153
330
  }
331
+ function extractJsonBlocksFromText(raw) {
332
+ const trimmed = raw.trim();
333
+ const results = [];
334
+ if (trimmed.startsWith("{") && trimmed.endsWith("}"))
335
+ return [trimmed];
336
+ const fencePattern = /```(?:json)?\s*\n?(\{[\s\S]*?\})\s*\n?```/g;
337
+ for (const match of trimmed.matchAll(fencePattern)) {
338
+ if (match[1])
339
+ results.push(match[1].trim());
340
+ }
341
+ if (results.length > 0)
342
+ return results;
343
+ let depth = 0;
344
+ let start = -1;
345
+ let lastBlock;
346
+ for (let i = 0; i < trimmed.length; i++) {
347
+ const ch = trimmed[i];
348
+ if (ch === "{") {
349
+ if (depth === 0)
350
+ start = i;
351
+ depth++;
352
+ }
353
+ else if (ch === "}") {
354
+ depth--;
355
+ if (depth === 0 && start !== -1) {
356
+ lastBlock = trimmed.slice(start, i + 1).trim();
357
+ start = -1;
358
+ }
359
+ }
360
+ }
361
+ return lastBlock === undefined ? [] : [lastBlock];
362
+ }
363
+ function observedTopTierReviewerVerdictFromText(input) {
364
+ for (const block of extractJsonBlocksFromText(input.text)) {
365
+ try {
366
+ const candidate = JSON.parse(block);
367
+ const validation = validateTopTierReviewVerdictV1(candidate);
368
+ if (!validation.ok)
369
+ continue;
370
+ const verdict = candidate;
371
+ if (verdict.workflow_id === input.workflowId)
372
+ return verdict;
373
+ }
374
+ catch {
375
+ // Keep scanning candidates.
376
+ }
377
+ }
378
+ return undefined;
379
+ }
380
+ function persistObservedReviewerVerdict(input) {
381
+ const evidenceId = input.verdict.verdict_id;
382
+ if (!writeSessionEvidence({
383
+ rootDir: input.rootDir,
384
+ workflowId: input.workflowId,
385
+ evidenceId,
386
+ record: input.verdict,
387
+ }))
388
+ return false;
389
+ const reloaded = reloadFlowDeskSessionEvidenceV1({
390
+ rootDir: input.rootDir,
391
+ workflowId: input.workflowId,
392
+ });
393
+ return reloaded.ok && reloaded.blocked.length === 0 && reloaded.entries.some((entry) => entry.evidenceClass === "reviewer_verdict" &&
394
+ entry.evidenceId === evidenceId &&
395
+ entry.record.verdict_id === input.verdict.verdict_id);
396
+ }
154
397
  export async function executeFlowDeskAgentTaskV1(input) {
155
398
  const observedAt = new Date().toISOString();
156
399
  const token = `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
400
+ const parentBinding = validateAgentTaskParentSessionId(input.parentSessionId);
401
+ const parentSessionRef = parentBinding.parentSessionRef;
402
+ const attemptId = `attempt-task-${token}`;
403
+ if (!parentBinding.ok) {
404
+ const taskFailedEvidenceId = `task-failed-${input.taskId}-${token}-invalid-parent`;
405
+ const redactedReason = parentBinding.redactedReason;
406
+ writeSessionEvidence({
407
+ rootDir: input.rootDir,
408
+ workflowId: input.workflowId,
409
+ evidenceId: taskFailedEvidenceId,
410
+ record: {
411
+ schema_version: "flowdesk.task_failed.v1",
412
+ workflow_id: input.workflowId,
413
+ lane_id: input.laneId,
414
+ task_id: input.taskId,
415
+ agent_ref: input.agentRef,
416
+ provider_qualified_model_id: input.providerQualifiedModelId,
417
+ failure_category: "sdk_create_failed",
418
+ redacted_reason: redactedReason,
419
+ created_at: observedAt,
420
+ dispatch_authority_enabled: false,
421
+ },
422
+ });
423
+ writeAgentTaskTerminalLifecycle({
424
+ rootDir: input.rootDir,
425
+ workflowId: input.workflowId,
426
+ laneId: input.laneId,
427
+ attemptId,
428
+ parentSessionRef,
429
+ agentRef: input.agentRef,
430
+ providerQualifiedModelId: input.providerQualifiedModelId,
431
+ state: "invocation_failed",
432
+ evidenceId: `lifecycle-task-terminal-${input.laneId}-${token}-invalid-parent`,
433
+ createdAt: observedAt,
434
+ updatedAt: observedAt,
435
+ });
436
+ return { status: "task_failed", failureCategory: "sdk_create_failed", redactedReason, laneId: input.laneId };
437
+ }
157
438
  const launchPlan = agentTaskLaunchPlan({
158
439
  workflowId: input.workflowId,
159
440
  laneId: input.laneId,
@@ -163,8 +444,6 @@ export async function executeFlowDeskAgentTaskV1(input) {
163
444
  token,
164
445
  });
165
446
  const runningLifecycleEvidenceId = `lifecycle-task-running-${input.laneId}-${token}`;
166
- const attemptId = launchPlan.attempt_id ?? `attempt-task-${token}`;
167
- const parentSessionRef = `ses-${input.parentSessionId}`;
168
447
  const promptTextTruncated = input.promptText.length > AGENT_TASK_CONTEXT_MAX_PROMPT_TEXT;
169
448
  const agentTaskContextRecord = {
170
449
  schema_version: "flowdesk.agent_task_context.v1",
@@ -189,17 +468,72 @@ export async function executeFlowDeskAgentTaskV1(input) {
189
468
  evidenceId: `agent-task-context-${input.taskId}-${token}`,
190
469
  record: agentTaskContextRecord,
191
470
  });
192
- // Launch the lane
193
- const launchResult = await launchFlowDeskInjectedSdkRuntimeLaneFromPlanV1({
194
- client: input.client,
195
- launchPlan,
196
- request: {
197
- allowActualLaneLaunch: true,
198
- parentSessionId: input.parentSessionId,
199
- promptText: input.promptText,
200
- dispatchMethod: "prompt",
201
- },
471
+ writeAgentTaskProgress({
472
+ rootDir: input.rootDir,
473
+ workflowId: input.workflowId,
474
+ laneId: input.laneId,
475
+ taskId: input.taskId,
476
+ agentRef: input.agentRef,
477
+ providerQualifiedModelId: input.providerQualifiedModelId,
478
+ phase: "started",
479
+ progressSeq: 1,
480
+ progressLabel: "agent task lane launch started",
481
+ observedAt,
202
482
  });
483
+ // Launch the lane — wrap in absolute timeout so session.prompt blocking doesn't hang forever.
484
+ // 30s default — if session.prompt blocks for more than 30s with no activity, give up.
485
+ const LAUNCH_TIMEOUT_MS = input._launchTimeoutMs ?? 30_000;
486
+ const launchTimeoutHandle = setTimeout(() => { }, LAUNCH_TIMEOUT_MS);
487
+ const dispatchMethod = input.client.session.promptAsync !== undefined ? "promptAsync" : "prompt";
488
+ const launchResult = await Promise.race([
489
+ launchFlowDeskInjectedSdkRuntimeLaneFromPlanV1({
490
+ client: input.client,
491
+ launchPlan,
492
+ request: {
493
+ allowActualLaneLaunch: true,
494
+ parentSessionId: input.parentSessionId,
495
+ promptText: input.promptText,
496
+ dispatchMethod,
497
+ },
498
+ }),
499
+ new Promise(resolve => setTimeout(() => resolve({ status: "launch_timeout" }), LAUNCH_TIMEOUT_MS)),
500
+ ]);
501
+ clearTimeout(launchTimeoutHandle);
502
+ if ("status" in launchResult && launchResult.status === "launch_timeout") {
503
+ // session.prompt blocked for too long — treat as invocation failure
504
+ const failedEvidenceId = `task-failed-${input.taskId}-${token}-launch-timeout`;
505
+ writeSessionEvidence({
506
+ rootDir: input.rootDir,
507
+ workflowId: input.workflowId,
508
+ evidenceId: failedEvidenceId,
509
+ record: {
510
+ schema_version: "flowdesk.task_failed.v1",
511
+ workflow_id: input.workflowId,
512
+ lane_id: input.laneId,
513
+ task_id: input.taskId,
514
+ agent_ref: input.agentRef,
515
+ provider_qualified_model_id: input.providerQualifiedModelId,
516
+ failure_category: "sdk_create_failed",
517
+ redacted_reason: "lane launch timed out: session.prompt did not respond",
518
+ created_at: observedAt,
519
+ dispatch_authority_enabled: false,
520
+ },
521
+ });
522
+ writeAgentTaskTerminalLifecycle({
523
+ rootDir: input.rootDir,
524
+ workflowId: input.workflowId,
525
+ laneId: input.laneId,
526
+ attemptId,
527
+ parentSessionRef,
528
+ agentRef: input.agentRef,
529
+ providerQualifiedModelId: input.providerQualifiedModelId,
530
+ state: "invocation_failed",
531
+ evidenceId: `lifecycle-task-terminal-${input.laneId}-${token}-launch-timeout`,
532
+ createdAt: observedAt,
533
+ updatedAt: new Date().toISOString(),
534
+ });
535
+ return { status: "task_failed", failureCategory: "sdk_create_failed", redactedReason: "launch timeout: session.prompt did not respond within the allowed window", laneId: input.laneId };
536
+ }
203
537
  // Write running lifecycle evidence
204
538
  materializeFlowDeskRuntimeLaneLaunchLifecycleEvidenceV1({
205
539
  rootDir: input.rootDir,
@@ -258,6 +592,11 @@ export async function executeFlowDeskAgentTaskV1(input) {
258
592
  updatedAt: new Date().toISOString(),
259
593
  timeoutMs: input.timeoutMs,
260
594
  });
595
+ refreshFlowDeskCompletionUiCachesV1({
596
+ rootDir: input.rootDir,
597
+ workflowId: input.workflowId,
598
+ observedAt,
599
+ });
261
600
  return {
262
601
  status: "task_failed",
263
602
  failureCategory,
@@ -278,22 +617,91 @@ export async function executeFlowDeskAgentTaskV1(input) {
278
617
  observedAt,
279
618
  progressSummaryLabel: `agent task lane launch heartbeat`,
280
619
  });
281
- // Extract child session ID and get response text
620
+ refreshFlowDeskCompletionUiCachesV1({
621
+ rootDir: input.rootDir,
622
+ workflowId: input.workflowId,
623
+ observedAt,
624
+ });
625
+ // Extract child session ID
282
626
  const childSessionId = launchResult.childSessionRef?.startsWith("ses-")
283
627
  ? launchResult.childSessionRef.slice("ses-".length)
284
628
  : undefined;
285
- let resultText;
629
+ // ── Async mode: return immediately, watchdog handles polling/nudging/abort ──
630
+ if (input.asyncMode === true) {
631
+ const resolvedChildId = childSessionId ?? "";
632
+ // Write child session evidence so watchdog can find it
633
+ writeSessionEvidence({
634
+ rootDir: input.rootDir,
635
+ workflowId: input.workflowId,
636
+ evidenceId: `agent-task-child-session-${input.laneId}-${token}`,
637
+ record: {
638
+ schema_version: AGENT_TASK_CHILD_SESSION_SCHEMA_VERSION,
639
+ workflow_id: input.workflowId,
640
+ lane_id: input.laneId,
641
+ task_id: input.taskId,
642
+ child_session_id: resolvedChildId,
643
+ parent_session_ref: parentSessionRef,
644
+ provider_qualified_model_id: input.providerQualifiedModelId,
645
+ agent_ref: input.agentRef,
646
+ nudge_count: 0,
647
+ last_nudge_at: null,
648
+ created_at: observedAt,
649
+ dispatch_authority_enabled: false,
650
+ },
651
+ });
652
+ writeAgentTaskProgress({
653
+ rootDir: input.rootDir,
654
+ workflowId: input.workflowId,
655
+ laneId: input.laneId,
656
+ taskId: input.taskId,
657
+ agentRef: input.agentRef,
658
+ providerQualifiedModelId: input.providerQualifiedModelId,
659
+ phase: "waiting",
660
+ progressSeq: 2,
661
+ progressLabel: "agent task waiting for async child result",
662
+ });
663
+ refreshFlowDeskCompletionUiCachesV1({
664
+ rootDir: input.rootDir,
665
+ workflowId: input.workflowId,
666
+ observedAt: new Date().toISOString(),
667
+ });
668
+ return { status: "task_launched", laneId: input.laneId, childSessionId: resolvedChildId };
669
+ }
670
+ let resultObservation;
286
671
  if (childSessionId !== undefined) {
287
- resultText = await extractAssistantTextFromResponse(input.client, childSessionId);
672
+ const runtimeModel = launchResult.status === "lane_launch_started" && typeof launchResult.model === "string"
673
+ ? launchResult.model : undefined;
674
+ const agentName = launchResult.status === "lane_launch_started" && typeof launchResult.agent === "string"
675
+ ? launchResult.agent : undefined;
676
+ resultObservation = await extractAssistantTextFromResponse(input.client, childSessionId, {
677
+ quietPeriodMs: input._nudgeQuietPeriodMs ?? 10_000, // default 10s per policy
678
+ maxNudges: 2,
679
+ runtimeModel,
680
+ agentName,
681
+ messagesTimeoutMs: input._messagesTimeoutMs,
682
+ heartbeatFn: (elapsedMs) => {
683
+ recordFlowDeskLaneHeartbeatV1({
684
+ rootDir: input.rootDir,
685
+ workflowId: input.workflowId,
686
+ attemptId,
687
+ laneId: input.laneId,
688
+ parentSessionRef,
689
+ agentRef: input.agentRef,
690
+ providerQualifiedModelId: input.providerQualifiedModelId,
691
+ state: "running",
692
+ observedAt: new Date().toISOString(),
693
+ progressSummaryLabel: `agent task waiting for response elapsed=${Math.floor(elapsedMs / 1000)}s`,
694
+ });
695
+ },
696
+ });
288
697
  }
289
- if (resultText === undefined || (input.outputContract === "final_assistant_text" && isProcessOnlyAssistantOutput(resultText))) {
698
+ const resultText = resultObservation?.text;
699
+ if (resultText === undefined) {
290
700
  // No response text - write task_failed
291
701
  const taskFailedEvidenceId = `task-failed-${input.taskId}-${token}`;
292
- const failureCategory = resultText === undefined ? "no_response" : "contract_not_satisfied";
293
- const evidenceFailureCategory = resultText === undefined ? "no_response" : "unknown";
294
- const redactedReason = resultText === undefined
295
- ? "lane launched but no assistant response text found"
296
- : "lane launched but final assistant response did not satisfy requested output contract";
702
+ const failureCategory = "no_response";
703
+ const evidenceFailureCategory = "no_response";
704
+ const redactedReason = "lane launched but no assistant response text found";
297
705
  const taskFailedRecord = {
298
706
  schema_version: "flowdesk.task_failed.v1",
299
707
  workflow_id: input.workflowId,
@@ -312,6 +720,17 @@ export async function executeFlowDeskAgentTaskV1(input) {
312
720
  evidenceId: taskFailedEvidenceId,
313
721
  record: taskFailedRecord,
314
722
  });
723
+ writeAgentTaskProgress({
724
+ rootDir: input.rootDir,
725
+ workflowId: input.workflowId,
726
+ laneId: input.laneId,
727
+ taskId: input.taskId,
728
+ agentRef: input.agentRef,
729
+ providerQualifiedModelId: input.providerQualifiedModelId,
730
+ phase: "failed",
731
+ progressSeq: 3,
732
+ progressLabel: failureCategory === "no_response" ? "agent task finished without response" : "agent task output contract not satisfied",
733
+ });
315
734
  writeAgentTaskTerminalLifecycle({
316
735
  rootDir: input.rootDir,
317
736
  workflowId: input.workflowId,
@@ -322,12 +741,43 @@ export async function executeFlowDeskAgentTaskV1(input) {
322
741
  messageRef: launchResult.messageRef?.startsWith("msg-") ? launchResult.messageRef : undefined,
323
742
  agentRef: input.agentRef,
324
743
  providerQualifiedModelId: input.providerQualifiedModelId,
325
- state: resultText === undefined ? "no_output" : "incomplete",
744
+ state: "no_output",
326
745
  evidenceId: `lifecycle-task-terminal-${input.laneId}-${token}`,
327
746
  createdAt: observedAt,
328
747
  updatedAt: new Date().toISOString(),
329
748
  timeoutMs: input.timeoutMs,
330
749
  });
750
+ refreshFlowDeskCompletionUiCachesV1({
751
+ rootDir: input.rootDir,
752
+ workflowId: input.workflowId,
753
+ observedAt: new Date().toISOString(),
754
+ });
755
+ // Auto-retry with fallback binding if configured and this is not already a retry
756
+ if (input.fallbackBinding !== undefined && !input._isFallbackRetry) {
757
+ const retryToken = `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
758
+ const retryTaskId = `${input.taskId}-retry-${retryToken.slice(0, 6)}`;
759
+ const retryLaneId = `${input.laneId}-retry`;
760
+ writeAgentTaskProgress({
761
+ rootDir: input.rootDir,
762
+ workflowId: input.workflowId,
763
+ laneId: retryLaneId,
764
+ taskId: retryTaskId,
765
+ agentRef: input.fallbackBinding.agentRef,
766
+ providerQualifiedModelId: input.fallbackBinding.providerQualifiedModelId,
767
+ phase: "retrying",
768
+ progressSeq: 0,
769
+ progressLabel: `auto-retry with ${input.fallbackBinding.providerQualifiedModelId} after ${failureCategory}`,
770
+ });
771
+ return executeFlowDeskAgentTaskV1({
772
+ ...input,
773
+ taskId: retryTaskId,
774
+ laneId: retryLaneId,
775
+ agentRef: input.fallbackBinding.agentRef,
776
+ providerQualifiedModelId: input.fallbackBinding.providerQualifiedModelId,
777
+ fallbackBinding: undefined,
778
+ _isFallbackRetry: true,
779
+ });
780
+ }
331
781
  return {
332
782
  status: "task_failed",
333
783
  failureCategory,
@@ -335,10 +785,9 @@ export async function executeFlowDeskAgentTaskV1(input) {
335
785
  laneId: input.laneId,
336
786
  };
337
787
  }
338
- // Truncate if needed
339
788
  const fullResultText = resultText;
340
- const truncated = fullResultText.length > TASK_RESULT_MAX_TEXT;
341
- const storedResultText = truncated ? fullResultText.slice(0, TASK_RESULT_MAX_TEXT) : fullResultText;
789
+ const sanitizedResult = sanitizeFlowDeskTaskResultTextV1(fullResultText);
790
+ const storedResultText = sanitizedResult.text;
342
791
  const promptSha256 = sha256Hex(input.promptText);
343
792
  const resultSha256 = sha256Hex(fullResultText);
344
793
  // Write task_result evidence
@@ -352,17 +801,112 @@ export async function executeFlowDeskAgentTaskV1(input) {
352
801
  provider_qualified_model_id: input.providerQualifiedModelId,
353
802
  task_prompt_sha256: promptSha256,
354
803
  result_text: storedResultText,
355
- result_text_truncated: truncated,
804
+ result_text_truncated: sanitizedResult.truncated,
356
805
  result_text_sha256: resultSha256,
806
+ completion_status: resultObservation?.completionStatus ?? "final",
807
+ output_kind: resultObservation?.outputKind ?? "final_answer",
808
+ usable_for_synthesis: resultObservation?.usableForSynthesis ?? true,
809
+ // Capture/judgement separation: text was captured, so this is NOT a
810
+ // contract failure. output_kind/completion_status/looks_like_refusal_or_error
811
+ // are advisory inputs for the coordinator's substance judgement, never a
812
+ // capture-side drop. missing_contract is only ever true when an explicit
813
+ // contract was requested AND no text was captured (that path returns
814
+ // task_failed above, so here it is always false).
815
+ missing_contract: false,
816
+ ...(resultObservation?.finalizationReason === undefined
817
+ ? {}
818
+ : { finalization_reason: resultObservation.finalizationReason }),
819
+ looks_like_refusal_or_error: resultObservation?.looksLikeRefusalOrError ?? false,
357
820
  created_at: observedAt,
358
821
  dispatch_authority_enabled: false,
359
822
  };
360
- writeSessionEvidence({
823
+ const taskResultWritten = writeSessionEvidence({
361
824
  rootDir: input.rootDir,
362
825
  workflowId: input.workflowId,
363
826
  evidenceId: taskResultEvidenceId,
364
827
  record: taskResultRecord,
365
828
  });
829
+ if (!taskResultWritten) {
830
+ const taskFailedEvidenceId = `task-failed-${input.taskId}-${token}-result-write`;
831
+ const redactedReason = "task_result evidence persistence failed";
832
+ writeSessionEvidence({
833
+ rootDir: input.rootDir,
834
+ workflowId: input.workflowId,
835
+ evidenceId: taskFailedEvidenceId,
836
+ record: {
837
+ schema_version: "flowdesk.task_failed.v1",
838
+ workflow_id: input.workflowId,
839
+ lane_id: input.laneId,
840
+ task_id: input.taskId,
841
+ agent_ref: input.agentRef,
842
+ provider_qualified_model_id: input.providerQualifiedModelId,
843
+ failure_category: "unknown",
844
+ redacted_reason: redactedReason,
845
+ created_at: observedAt,
846
+ dispatch_authority_enabled: false,
847
+ },
848
+ });
849
+ writeAgentTaskProgress({
850
+ rootDir: input.rootDir,
851
+ workflowId: input.workflowId,
852
+ laneId: input.laneId,
853
+ taskId: input.taskId,
854
+ agentRef: input.agentRef,
855
+ providerQualifiedModelId: input.providerQualifiedModelId,
856
+ phase: "failed",
857
+ progressSeq: 4,
858
+ progressLabel: "agent task result persistence failed",
859
+ });
860
+ writeAgentTaskTerminalLifecycle({
861
+ rootDir: input.rootDir,
862
+ workflowId: input.workflowId,
863
+ laneId: input.laneId,
864
+ attemptId,
865
+ parentSessionRef,
866
+ agentRef: input.agentRef,
867
+ providerQualifiedModelId: input.providerQualifiedModelId,
868
+ state: "invocation_failed",
869
+ evidenceId: `lifecycle-task-terminal-${input.laneId}-${token}-result-write`,
870
+ createdAt: observedAt,
871
+ updatedAt: new Date().toISOString(),
872
+ timeoutMs: input.timeoutMs,
873
+ });
874
+ refreshFlowDeskCompletionUiCachesV1({
875
+ rootDir: input.rootDir,
876
+ workflowId: input.workflowId,
877
+ observedAt,
878
+ });
879
+ return {
880
+ status: "task_failed",
881
+ failureCategory: "unknown",
882
+ redactedReason,
883
+ laneId: input.laneId,
884
+ };
885
+ }
886
+ const observedReviewerVerdict = observedTopTierReviewerVerdictFromText({
887
+ text: fullResultText,
888
+ workflowId: input.workflowId,
889
+ });
890
+ const reviewerVerdictPersisted = observedReviewerVerdict === undefined
891
+ ? false
892
+ : persistObservedReviewerVerdict({
893
+ rootDir: input.rootDir,
894
+ workflowId: input.workflowId,
895
+ verdict: observedReviewerVerdict,
896
+ });
897
+ writeAgentTaskProgress({
898
+ rootDir: input.rootDir,
899
+ workflowId: input.workflowId,
900
+ laneId: input.laneId,
901
+ taskId: input.taskId,
902
+ agentRef: input.agentRef,
903
+ providerQualifiedModelId: input.providerQualifiedModelId,
904
+ phase: "finalizing",
905
+ progressSeq: 3,
906
+ progressLabel: reviewerVerdictPersisted
907
+ ? "agent task result captured with reviewer verdict evidence"
908
+ : "agent task result captured",
909
+ });
366
910
  writeAgentTaskTerminalLifecycle({
367
911
  rootDir: input.rootDir,
368
912
  workflowId: input.workflowId,
@@ -373,13 +917,19 @@ export async function executeFlowDeskAgentTaskV1(input) {
373
917
  messageRef: launchResult.messageRef?.startsWith("msg-") ? launchResult.messageRef : undefined,
374
918
  agentRef: input.agentRef,
375
919
  providerQualifiedModelId: input.providerQualifiedModelId,
376
- state: "incomplete",
920
+ state: reviewerVerdictPersisted ? "complete" : "incomplete",
921
+ verdictRef: reviewerVerdictPersisted ? observedReviewerVerdict?.verdict_id : undefined,
377
922
  outputRef: `output-${taskResultEvidenceId}`,
378
923
  evidenceId: `lifecycle-task-terminal-${input.laneId}-${token}`,
379
924
  createdAt: observedAt,
380
925
  updatedAt: new Date().toISOString(),
381
926
  timeoutMs: input.timeoutMs,
382
927
  });
928
+ refreshFlowDeskCompletionUiCachesV1({
929
+ rootDir: input.rootDir,
930
+ workflowId: input.workflowId,
931
+ observedAt,
932
+ });
383
933
  return {
384
934
  status: "task_completed",
385
935
  resultText: fullResultText,