@steipete/oracle 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +55 -10
  2. package/dist/bin/oracle-cli.js +104 -16
  3. package/dist/src/browser/actions/archiveConversation.js +224 -0
  4. package/dist/src/browser/actions/assistantResponse.js +26 -0
  5. package/dist/src/browser/actions/deepResearch.js +662 -0
  6. package/dist/src/browser/actions/modelSelection.js +78 -13
  7. package/dist/src/browser/actions/navigation.js +22 -0
  8. package/dist/src/browser/actions/projectSources.js +491 -0
  9. package/dist/src/browser/actions/promptComposer.js +52 -27
  10. package/dist/src/browser/actions/thinkingStatus.js +391 -0
  11. package/dist/src/browser/artifacts.js +150 -0
  12. package/dist/src/browser/attachRunning.js +31 -0
  13. package/dist/src/browser/chatgptImages.js +315 -0
  14. package/dist/src/browser/chromeLifecycle.js +214 -3
  15. package/dist/src/browser/config.js +26 -2
  16. package/dist/src/browser/constants.js +8 -0
  17. package/dist/src/browser/controlPlan.js +81 -0
  18. package/dist/src/browser/detect.js +206 -33
  19. package/dist/src/browser/domDebug.js +49 -0
  20. package/dist/src/browser/index.js +1257 -485
  21. package/dist/src/browser/liveTabs.js +434 -0
  22. package/dist/src/browser/profileState.js +83 -3
  23. package/dist/src/browser/projectSourcesRunner.js +366 -0
  24. package/dist/src/browser/reattach.js +117 -45
  25. package/dist/src/browser/reattachHelpers.js +1 -1
  26. package/dist/src/browser/sessionRunner.js +53 -1
  27. package/dist/src/browser/tabLeaseRegistry.js +182 -0
  28. package/dist/src/cli/bridge/claudeConfig.js +12 -8
  29. package/dist/src/cli/bridge/codexConfig.js +2 -2
  30. package/dist/src/cli/browserConfig.js +40 -0
  31. package/dist/src/cli/browserDefaults.js +31 -7
  32. package/dist/src/cli/browserTabs.js +228 -0
  33. package/dist/src/cli/dryRun.js +33 -1
  34. package/dist/src/cli/duplicatePromptGuard.js +10 -2
  35. package/dist/src/cli/help.js +1 -1
  36. package/dist/src/cli/options.js +4 -0
  37. package/dist/src/cli/projectSources.js +116 -0
  38. package/dist/src/cli/sessionCommand.js +51 -0
  39. package/dist/src/cli/sessionDisplay.js +121 -9
  40. package/dist/src/cli/sessionRunner.js +51 -7
  41. package/dist/src/mcp/consultPresets.js +19 -0
  42. package/dist/src/mcp/server.js +2 -0
  43. package/dist/src/mcp/tools/consult.js +201 -26
  44. package/dist/src/mcp/tools/projectSources.js +123 -0
  45. package/dist/src/mcp/types.js +7 -0
  46. package/dist/src/mcp/utils.js +6 -1
  47. package/dist/src/oracle/run.js +4 -1
  48. package/dist/src/projectSources/plan.js +27 -0
  49. package/dist/src/projectSources/types.js +1 -0
  50. package/dist/src/projectSources/url.js +23 -0
  51. package/dist/src/sessionManager.js +1 -0
  52. package/package.json +2 -1
@@ -3,22 +3,33 @@ import path from "node:path";
3
3
  import os from "node:os";
4
4
  import net from "node:net";
5
5
  import { resolveBrowserConfig } from "./config.js";
6
- import { launchChrome, registerTerminationHooks, hideChromeWindow, connectToRemoteChrome, closeRemoteChromeTarget, connectWithNewTab, closeTab, } from "./chromeLifecycle.js";
6
+ import { launchChrome, registerTerminationHooks, hideChromeWindow, connectToRemoteChrome, connectWithNewTab, closeTab, closeRemoteChromeTarget, closeBlankChromeTabs, } from "./chromeLifecycle.js";
7
7
  import { syncCookies } from "./cookies.js";
8
8
  import { navigateToChatGPT, navigateToPromptReadyWithFallback, ensureNotBlocked, ensureLoggedIn, ensurePromptReady, installJavaScriptDialogAutoDismissal, ensureModelSelection, clearPromptComposer, waitForAssistantResponse, captureAssistantMarkdown, clearComposerAttachments, uploadAttachmentFile, waitForAttachmentCompletion, waitForUserTurnAttachments, readAssistantSnapshot, } from "./pageActions.js";
9
9
  import { INPUT_SELECTORS } from "./constants.js";
10
10
  import { uploadAttachmentViaDataTransfer } from "./actions/remoteFileTransfer.js";
11
11
  import { ensureThinkingTime } from "./actions/thinkingTime.js";
12
+ import { startThinkingStatusMonitor } from "./actions/thinkingStatus.js";
13
+ import { activateDeepResearch, waitForDeepResearchCompletion, waitForResearchPlanAutoConfirm, } from "./actions/deepResearch.js";
12
14
  import { estimateTokenCount, withRetries, delay } from "./utils.js";
13
15
  import { formatElapsed } from "../oracle/format.js";
14
16
  import { CHATGPT_URL, CONVERSATION_TURN_SELECTOR, DEFAULT_MODEL_STRATEGY } from "./constants.js";
15
17
  import { BrowserAutomationError } from "../oracle/errors.js";
16
18
  import { alignPromptEchoPair, buildPromptEchoMatcher } from "./reattachHelpers.js";
17
- import { cleanupStaleProfileState, acquireProfileRunLock, readChromePid, readDevToolsPort, shouldCleanupManualLoginProfileState, verifyDevToolsReachable, writeChromePid, writeDevToolsActivePort, } from "./profileState.js";
19
+ import { cleanupStaleProfileState, acquireProfileRunLock, findRunningChromeDebugTargetForProfile, readChromePid, readDevToolsPort, shouldCleanupManualLoginProfileState, terminateRecordedChromeForProfile, verifyDevToolsReachable, writeChromePid, writeDevToolsActivePort, } from "./profileState.js";
20
+ import { acquireBrowserTabLease, hasOtherActiveBrowserTabLeases, } from "./tabLeaseRegistry.js";
21
+ import { appendArtifacts, saveBrowserTranscriptArtifact, saveDeepResearchReportArtifact, } from "./artifacts.js";
22
+ import { collectGeneratedImageArtifacts } from "./chatgptImages.js";
18
23
  import { runProviderSubmissionFlow } from "./providerDomFlow.js";
19
24
  import { chatgptDomProvider } from "./providers/index.js";
25
+ import { resolveAttachRunningConnection } from "./attachRunning.js";
26
+ import { connectToExistingChatGptTab } from "./liveTabs.js";
27
+ import { captureBrowserDiagnostics } from "./domDebug.js";
28
+ import { archiveChatGptConversation, resolveBrowserArchiveDecision, } from "./actions/archiveConversation.js";
29
+ import { describeBrowserControlPlan, formatBrowserControlPlan } from "./controlPlan.js";
20
30
  export { CHATGPT_URL, DEFAULT_MODEL_STRATEGY, DEFAULT_MODEL_TARGET } from "./constants.js";
21
31
  export { parseDuration, delay, normalizeChatgptUrl, isTemporaryChatUrl } from "./utils.js";
32
+ export { formatThinkingLog, formatThinkingWaitingLog, buildThinkingStatusExpressionForTest, readThinkingStatusForTest, sanitizeThinkingText, startThinkingStatusMonitorForTest, } from "./actions/thinkingStatus.js";
22
33
  function redactBrowserConfigForDebugLog(config) {
23
34
  const redacted = { ...config };
24
35
  if (Array.isArray(config.inlineCookies)) {
@@ -35,16 +46,198 @@ function isCloudflareChallengeError(error) {
35
46
  return false;
36
47
  return error.details?.stage === "cloudflare-challenge";
37
48
  }
49
+ function isReattachableCaptureError(error) {
50
+ if (!(error instanceof BrowserAutomationError))
51
+ return false;
52
+ const stage = error.details?.stage;
53
+ return stage === "assistant-timeout" || stage === "assistant-recheck";
54
+ }
55
+ function classifyPreservedBrowserError(error, headless) {
56
+ if (headless)
57
+ return null;
58
+ if (isCloudflareChallengeError(error))
59
+ return "cloudflare-challenge";
60
+ if (isReattachableCaptureError(error))
61
+ return "reattachable-capture";
62
+ return null;
63
+ }
38
64
  function shouldPreserveBrowserOnError(error, headless) {
39
- return !headless && isCloudflareChallengeError(error);
65
+ return classifyPreservedBrowserError(error, headless) !== null;
40
66
  }
41
67
  export function shouldPreserveBrowserOnErrorForTest(error, headless) {
42
68
  return shouldPreserveBrowserOnError(error, headless);
43
69
  }
70
+ export function classifyPreservedBrowserErrorForTest(error, headless) {
71
+ return classifyPreservedBrowserError(error, headless);
72
+ }
73
+ function shouldSkipThinkingTimeSelection(desiredModel, thinkingTime) {
74
+ if (thinkingTime !== "extended" || !desiredModel) {
75
+ return false;
76
+ }
77
+ const normalized = desiredModel.toLowerCase();
78
+ return (normalized === "gpt-5.5-pro" ||
79
+ normalized.includes("gpt-5.5 pro") ||
80
+ normalized.includes("gpt 5.5 pro") ||
81
+ normalized.includes("gpt 5 5 pro"));
82
+ }
83
+ export function shouldSkipThinkingTimeSelectionForTest(desiredModel, thinkingTime) {
84
+ return shouldSkipThinkingTimeSelection(desiredModel, thinkingTime);
85
+ }
86
+ function listIgnoredRemoteChromeFlags(config) {
87
+ return [
88
+ config.headless ? "--browser-headless" : null,
89
+ config.hideWindow ? "--browser-hide-window" : null,
90
+ config.keepBrowser ? "--browser-keep-browser" : null,
91
+ !config.attachRunning && config.chromePath ? "--browser-chrome-path" : null,
92
+ ].filter((value) => Boolean(value));
93
+ }
44
94
  function hasBrowserErrorCode(error, code) {
45
95
  return (error instanceof BrowserAutomationError &&
46
96
  error.details?.code === code);
47
97
  }
98
+ async function saveOptionalArtifact(operation, logger) {
99
+ try {
100
+ return await operation();
101
+ }
102
+ catch (error) {
103
+ const message = error instanceof Error ? error.message : String(error);
104
+ logger(`[browser] Failed to save session artifact: ${message}`);
105
+ return null;
106
+ }
107
+ }
108
+ async function waitForAssistantOrGeneratedImageResponse(params) {
109
+ if (!params.imageOutputRequested) {
110
+ return params.waitForText();
111
+ }
112
+ params.logger("[browser] Waiting for ChatGPT generated image response.");
113
+ const response = await pollGeneratedImageOrTextAssistantResponse(params.Runtime, params.timeoutMs, params.minTurnIndex, params.expectedConversationId);
114
+ if (response) {
115
+ if (response.html?.includes("/backend-api/estuary/content?id=file_")) {
116
+ params.logger("[browser] Captured generated image response before text appeared.");
117
+ }
118
+ return response;
119
+ }
120
+ throw new Error("assistant response timeout while waiting for generated image or text");
121
+ }
122
+ async function attemptAssistantRecheckOrRethrow(operation) {
123
+ try {
124
+ return await operation();
125
+ }
126
+ catch (error) {
127
+ if (error instanceof BrowserAutomationError) {
128
+ throw error;
129
+ }
130
+ return null;
131
+ }
132
+ }
133
+ async function pollGeneratedImageOrTextAssistantResponse(Runtime, timeoutMs, minTurnIndex, expectedConversationId) {
134
+ const deadline = Date.now() + timeoutMs;
135
+ while (Date.now() < deadline) {
136
+ let snapshot = await readAssistantSnapshot(Runtime, minTurnIndex, expectedConversationId).catch(() => null);
137
+ if (!snapshot && typeof minTurnIndex === "number" && Number.isFinite(minTurnIndex)) {
138
+ const relaxedSnapshot = await readAssistantSnapshot(Runtime, undefined, expectedConversationId).catch(() => null);
139
+ const relaxedHtml = typeof relaxedSnapshot?.html === "string" ? relaxedSnapshot.html : "";
140
+ if (relaxedHtml.includes("/backend-api/estuary/content?id=file_")) {
141
+ snapshot = relaxedSnapshot;
142
+ }
143
+ }
144
+ const text = typeof snapshot?.text === "string" ? snapshot.text.trim() : "";
145
+ const html = typeof snapshot?.html === "string" ? snapshot.html : "";
146
+ const hasGeneratedImage = html.includes("/backend-api/estuary/content?id=file_");
147
+ if (text && (hasGeneratedImage || !isImageOnlyUiChromeText(text))) {
148
+ return {
149
+ text,
150
+ html,
151
+ meta: {
152
+ turnId: snapshot?.turnId ?? undefined,
153
+ messageId: snapshot?.messageId ?? undefined,
154
+ },
155
+ };
156
+ }
157
+ await delay(750);
158
+ }
159
+ return null;
160
+ }
161
+ function isImageOnlyUiChromeText(text) {
162
+ const normalized = text.toLowerCase().replace(/\s+/g, " ").trim();
163
+ return (normalized.length === 0 ||
164
+ normalized === "edit" ||
165
+ normalized === "stopped thinking" ||
166
+ normalized === "stopped thinking edit");
167
+ }
168
+ function normalizeBrowserFollowUpPrompts(values) {
169
+ return (values ?? []).map((entry) => entry.trim()).filter(Boolean);
170
+ }
171
+ export function formatBrowserTurnTranscript(turns) {
172
+ if (turns.length <= 1) {
173
+ const turn = turns[0];
174
+ return {
175
+ answerText: turn?.answerText ?? "",
176
+ answerMarkdown: turn?.answerMarkdown ?? turn?.answerText ?? "",
177
+ };
178
+ }
179
+ const answerMarkdown = turns
180
+ .map((turn, index) => {
181
+ const label = turn.label.trim() || `Turn ${index + 1}`;
182
+ const prompt = turn.prompt?.trim();
183
+ const promptBlock = prompt ? `\n\n### Prompt\n\n${prompt}` : "";
184
+ const answer = (turn.answerMarkdown || turn.answerText).trim() || "_No text captured._";
185
+ return `## ${label}${promptBlock}\n\n### Answer\n\n${answer}`;
186
+ })
187
+ .join("\n\n")
188
+ .trim();
189
+ return {
190
+ answerText: answerMarkdown,
191
+ answerMarkdown,
192
+ };
193
+ }
194
+ async function maybeArchiveCompletedConversation({ Runtime, logger, config, conversationUrl, followUpCount, requiredArtifactsSaved, }) {
195
+ const decision = resolveBrowserArchiveDecision({
196
+ mode: config.archiveConversations,
197
+ chatgptUrl: config.chatgptUrl ?? config.url,
198
+ conversationUrl,
199
+ researchMode: config.researchMode,
200
+ followUpCount,
201
+ });
202
+ if (!decision.shouldArchive) {
203
+ logger(`[browser] ChatGPT archive skipped (${decision.reason}).`);
204
+ return {
205
+ mode: decision.mode,
206
+ attempted: false,
207
+ archived: false,
208
+ reason: decision.reason,
209
+ conversationUrl: conversationUrl ?? undefined,
210
+ };
211
+ }
212
+ if (!requiredArtifactsSaved) {
213
+ logger("[browser] ChatGPT archive skipped (artifact-save-failed).");
214
+ return {
215
+ mode: decision.mode,
216
+ attempted: false,
217
+ archived: false,
218
+ reason: "artifact-save-failed",
219
+ conversationUrl: conversationUrl ?? undefined,
220
+ };
221
+ }
222
+ return archiveChatGptConversation(Runtime, logger, {
223
+ mode: decision.mode,
224
+ conversationUrl,
225
+ }).catch((error) => {
226
+ const message = error instanceof Error ? error.message : String(error);
227
+ logger(`[browser] ChatGPT archive failed (${message}).`);
228
+ return {
229
+ mode: decision.mode,
230
+ attempted: true,
231
+ archived: false,
232
+ reason: "archive-failed",
233
+ conversationUrl: conversationUrl ?? undefined,
234
+ error: message,
235
+ };
236
+ });
237
+ }
238
+ export function maybeArchiveCompletedConversationForTest(args) {
239
+ return maybeArchiveCompletedConversation(args);
240
+ }
48
241
  async function runSubmissionWithRecovery({ prompt, attachments, fallbackSubmission, submit, reloadPromptComposer, prepareFallbackSubmission, logger, }) {
49
242
  let currentPrompt = prompt;
50
243
  let currentAttachments = attachments;
@@ -77,6 +270,33 @@ async function runSubmissionWithRecovery({ prompt, attachments, fallbackSubmissi
77
270
  export async function runSubmissionWithRecoveryForTest(args) {
78
271
  return runSubmissionWithRecovery(args);
79
272
  }
273
+ function resolveRemoteTabLeaseProfileDir(config) {
274
+ if (!config.remoteChrome || !config.manualLogin || !config.manualLoginProfileDir) {
275
+ return null;
276
+ }
277
+ return path.resolve(config.manualLoginProfileDir);
278
+ }
279
+ export function resolveRemoteTabLeaseProfileDirForTest(config) {
280
+ return resolveRemoteTabLeaseProfileDir(config);
281
+ }
282
+ async function closeRemoteConnectionAfterRun(options) {
283
+ if (options.connectionClosedUnexpectedly) {
284
+ return;
285
+ }
286
+ if (!options.connection) {
287
+ await options.client?.close();
288
+ return;
289
+ }
290
+ if (options.runStatus === "complete") {
291
+ await options.connection.close();
292
+ }
293
+ else {
294
+ await options.client?.close();
295
+ }
296
+ }
297
+ function shouldCloseOwnedRunTargetAfterRun(options) {
298
+ return options.runStatus === "complete" && options.ownsTarget && !options.keepBrowser;
299
+ }
80
300
  export async function runBrowserMode(options) {
81
301
  const promptText = options.prompt?.trim();
82
302
  if (!promptText) {
@@ -85,6 +305,13 @@ export async function runBrowserMode(options) {
85
305
  const attachments = options.attachments ?? [];
86
306
  const fallbackSubmission = options.fallbackSubmission;
87
307
  let config = resolveBrowserConfig(options.config);
308
+ const followUpPrompts = normalizeBrowserFollowUpPrompts(options.followUpPrompts);
309
+ if (config.researchMode === "deep" && followUpPrompts.length > 0) {
310
+ throw new BrowserAutomationError("Browser follow-ups are not supported with Deep Research mode. Put the full research plan into the initial prompt or run a normal browser consult for multi-turn review.", {
311
+ stage: "browser-follow-ups",
312
+ details: { researchMode: "deep", followUps: followUpPrompts.length },
313
+ });
314
+ }
88
315
  const logger = options.log ?? ((_message) => { });
89
316
  if (logger.verbose === undefined) {
90
317
  logger.verbose = Boolean(config.debug);
@@ -95,8 +322,9 @@ export async function runBrowserMode(options) {
95
322
  const runtimeHintCb = options.runtimeHintCb;
96
323
  let lastTargetId;
97
324
  let lastUrl;
325
+ let tabLease = null;
98
326
  const emitRuntimeHint = async () => {
99
- if (!runtimeHintCb || !chrome?.port) {
327
+ if (!chrome?.port) {
100
328
  return;
101
329
  }
102
330
  const conversationId = lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined;
@@ -111,7 +339,13 @@ export async function runBrowserMode(options) {
111
339
  controllerPid: process.pid,
112
340
  };
113
341
  try {
114
- await runtimeHintCb(hint);
342
+ await runtimeHintCb?.(hint);
343
+ await tabLease?.update({
344
+ chromeHost,
345
+ chromePort: chrome.port,
346
+ chromeTargetId: lastTargetId,
347
+ tabUrl: lastUrl,
348
+ });
115
349
  }
116
350
  catch (error) {
117
351
  const message = error instanceof Error ? error.message : String(error);
@@ -124,6 +358,18 @@ export async function runBrowserMode(options) {
124
358
  promptLength: promptText.length,
125
359
  })}`);
126
360
  }
361
+ for (const line of formatBrowserControlPlan(describeBrowserControlPlan(config), "browser")) {
362
+ logger(line);
363
+ }
364
+ if (config.attachRunning) {
365
+ const attached = await resolveAttachRunningConnection(config, logger);
366
+ config = {
367
+ ...config,
368
+ remoteChrome: { host: attached.host, port: attached.port },
369
+ remoteChromeBrowserWSEndpoint: attached.browserWSEndpoint,
370
+ remoteChromeProfileRoot: attached.profileRoot,
371
+ };
372
+ }
127
373
  if (!config.remoteChrome && !config.manualLogin) {
128
374
  const preferredPort = config.debugPort ?? DEFAULT_DEBUG_PORT;
129
375
  const availablePort = await pickAvailableDebugPort(preferredPort, logger);
@@ -135,9 +381,9 @@ export async function runBrowserMode(options) {
135
381
  // Remote Chrome mode - connect to existing browser
136
382
  if (config.remoteChrome) {
137
383
  // Warn about ignored local-only options
138
- if (config.headless || config.hideWindow || config.keepBrowser || config.chromePath) {
139
- logger("Note: --remote-chrome ignores local Chrome flags " +
140
- "(--browser-headless, --browser-hide-window, --browser-keep-browser, --browser-chrome-path).");
384
+ const ignoredFlags = listIgnoredRemoteChromeFlags(config);
385
+ if (ignoredFlags.length > 0) {
386
+ logger(`Note: --remote-chrome ignores local Chrome flags (${ignoredFlags.join(", ")}).`);
141
387
  }
142
388
  return runRemoteBrowserMode(promptText, attachments, config, logger, options);
143
389
  }
@@ -156,24 +402,42 @@ export async function runBrowserMode(options) {
156
402
  else {
157
403
  logger(`Created temporary Chrome profile at ${userDataDir}`);
158
404
  }
405
+ if (manualLogin) {
406
+ tabLease = await acquireBrowserTabLease(userDataDir, {
407
+ maxConcurrentTabs: config.maxConcurrentTabs,
408
+ timeoutMs: config.timeoutMs,
409
+ logger,
410
+ sessionId: options.sessionId,
411
+ });
412
+ }
159
413
  const effectiveKeepBrowser = Boolean(config.keepBrowser);
160
- const reusedChrome = manualLogin
161
- ? await maybeReuseRunningChrome(userDataDir, logger, {
162
- waitForPortMs: config.reuseChromeWaitMs,
163
- })
164
- : null;
165
- const chrome = reusedChrome ??
166
- (await launchChrome({
167
- ...config,
168
- remoteChrome: config.remoteChrome,
169
- }, userDataDir, logger));
170
- const chromeHost = chrome.host ?? "127.0.0.1";
171
- // Persist profile state so future manual-login runs can reuse this Chrome.
172
- if (manualLogin && chrome.port) {
173
- await writeDevToolsActivePort(userDataDir, chrome.port);
174
- if (!reusedChrome && chrome.pid) {
175
- await writeChromePid(userDataDir, chrome.pid);
414
+ let acquiredChrome;
415
+ try {
416
+ acquiredChrome = manualLogin
417
+ ? await acquireManualLoginChromeForRun(userDataDir, config, logger, options.sessionId)
418
+ : {
419
+ chrome: await launchChrome({
420
+ ...config,
421
+ remoteChrome: config.remoteChrome,
422
+ }, userDataDir, logger),
423
+ reusedChrome: null,
424
+ };
425
+ }
426
+ catch (error) {
427
+ if (tabLease) {
428
+ const handle = tabLease;
429
+ tabLease = null;
430
+ await handle.release().catch(() => undefined);
176
431
  }
432
+ throw error;
433
+ }
434
+ const { chrome, reusedChrome } = acquiredChrome;
435
+ const chromeHost = chrome.host ?? "127.0.0.1";
436
+ if (tabLease) {
437
+ await tabLease.update({
438
+ chromeHost,
439
+ chromePort: chrome.port,
440
+ });
177
441
  }
178
442
  let removeTerminationHooks = null;
179
443
  try {
@@ -188,6 +452,7 @@ export async function runBrowserMode(options) {
188
452
  }
189
453
  let client = null;
190
454
  let isolatedTargetId = null;
455
+ let ownsTarget = true;
191
456
  const startedAt = Date.now();
192
457
  let answerText = "";
193
458
  let answerMarkdown = "";
@@ -200,14 +465,37 @@ export async function runBrowserMode(options) {
200
465
  let preserveBrowserOnError = false;
201
466
  try {
202
467
  try {
203
- const strictTabIsolation = Boolean(manualLogin && reusedChrome);
204
- const connection = await connectWithNewTab(chrome.port, logger, config.url, chromeHost, {
205
- fallbackToDefault: !strictTabIsolation,
206
- retries: strictTabIsolation ? 3 : 0,
207
- retryDelayMs: 500,
208
- });
209
- client = connection.client;
210
- isolatedTargetId = connection.targetId ?? null;
468
+ if (config.browserTabRef) {
469
+ const attached = await connectToExistingChatGptTab({
470
+ host: chromeHost,
471
+ port: chrome.port,
472
+ ref: config.browserTabRef,
473
+ });
474
+ client = attached.client;
475
+ isolatedTargetId = attached.targetId ?? null;
476
+ lastTargetId = attached.targetId ?? undefined;
477
+ lastUrl = attached.tab.url || lastUrl;
478
+ ownsTarget = false;
479
+ logger(`Attached to existing ChatGPT tab ${attached.targetId}${attached.tab.url ? ` (${attached.tab.url})` : ""}`);
480
+ }
481
+ else {
482
+ const strictTabIsolation = Boolean(manualLogin && reusedChrome);
483
+ const connection = await connectWithNewTab(chrome.port, logger, config.url, chromeHost, {
484
+ fallbackToDefault: !strictTabIsolation,
485
+ retries: strictTabIsolation ? 3 : 0,
486
+ retryDelayMs: 500,
487
+ });
488
+ client = connection.client;
489
+ isolatedTargetId = connection.targetId ?? null;
490
+ ownsTarget = true;
491
+ }
492
+ if (tabLease && isolatedTargetId) {
493
+ await tabLease.update({
494
+ chromeHost,
495
+ chromePort: chrome.port,
496
+ chromeTargetId: isolatedTargetId,
497
+ });
498
+ }
211
499
  }
212
500
  catch (error) {
213
501
  const hint = describeDevtoolsFirewallHint(chromeHost, chrome.port);
@@ -288,30 +576,37 @@ export async function runBrowserMode(options) {
288
576
  },
289
577
  });
290
578
  }
291
- const baseUrl = CHATGPT_URL;
292
- // First load the base ChatGPT homepage to satisfy potential interstitials,
293
- // then hop to the requested URL if it differs.
294
- await raceWithDisconnect(navigateToChatGPT(Page, Runtime, baseUrl, logger));
295
- await raceWithDisconnect(ensureNotBlocked(Runtime, config.headless, logger));
296
- // Learned: login checks must happen on the base domain before jumping into project URLs.
297
- await raceWithDisconnect(waitForLogin({
298
- runtime: Runtime,
299
- logger,
300
- appliedCookies,
301
- manualLogin,
302
- timeoutMs: config.timeoutMs,
303
- }));
304
- if (config.url !== baseUrl) {
305
- await raceWithDisconnect(navigateToPromptReadyWithFallback(Page, Runtime, {
306
- url: config.url,
307
- fallbackUrl: baseUrl,
308
- timeoutMs: config.inputTimeoutMs,
309
- headless: config.headless,
310
- logger,
311
- }));
579
+ if (config.browserTabRef) {
580
+ await raceWithDisconnect(ensureNotBlocked(Runtime, config.headless, logger));
581
+ await raceWithDisconnect(ensureLoggedIn(Runtime, logger));
582
+ await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
312
583
  }
313
584
  else {
314
- await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
585
+ const baseUrl = CHATGPT_URL;
586
+ // First load the base ChatGPT homepage to satisfy potential interstitials,
587
+ // then hop to the requested URL if it differs.
588
+ await raceWithDisconnect(navigateToChatGPT(Page, Runtime, baseUrl, logger));
589
+ await raceWithDisconnect(ensureNotBlocked(Runtime, config.headless, logger));
590
+ // Learned: login checks must happen on the base domain before jumping into project URLs.
591
+ await raceWithDisconnect(waitForLogin({
592
+ runtime: Runtime,
593
+ logger,
594
+ appliedCookies,
595
+ manualLogin,
596
+ timeoutMs: config.timeoutMs,
597
+ }));
598
+ if (config.url !== baseUrl) {
599
+ await raceWithDisconnect(navigateToPromptReadyWithFallback(Page, Runtime, {
600
+ url: config.url,
601
+ fallbackUrl: baseUrl,
602
+ timeoutMs: config.inputTimeoutMs,
603
+ headless: config.headless,
604
+ logger,
605
+ }));
606
+ }
607
+ else {
608
+ await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
609
+ }
315
610
  }
316
611
  logger(`Prompt textarea ready (initial focus, ${promptText.length.toLocaleString()} chars queued)`);
317
612
  const captureRuntimeSnapshot = async () => {
@@ -413,18 +708,37 @@ export async function runBrowserMode(options) {
413
708
  else if (modelStrategy === "ignore") {
414
709
  logger("Model picker: skipped (strategy=ignore)");
415
710
  }
416
- // Handle thinking time selection if specified
711
+ const deepResearch = config.researchMode === "deep";
712
+ // Handle thinking time selection if specified. Deep Research owns its own effort flow.
417
713
  const thinkingTime = config.thinkingTime;
418
- if (thinkingTime) {
419
- await raceWithDisconnect(withRetries(() => ensureThinkingTime(Runtime, thinkingTime, logger), {
714
+ if (thinkingTime && !deepResearch) {
715
+ if (shouldSkipThinkingTimeSelection(config.desiredModel, thinkingTime)) {
716
+ logger("Thinking time: Pro Extended (via model selection)");
717
+ }
718
+ else {
719
+ await raceWithDisconnect(withRetries(() => ensureThinkingTime(Runtime, thinkingTime, logger), {
720
+ retries: 2,
721
+ delayMs: 300,
722
+ onRetry: (attempt, error) => {
723
+ if (options.verbose) {
724
+ logger(`[retry] Thinking time (${thinkingTime}) attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`);
725
+ }
726
+ },
727
+ }));
728
+ }
729
+ }
730
+ if (deepResearch) {
731
+ await raceWithDisconnect(withRetries(() => activateDeepResearch(Runtime, Input, logger), {
420
732
  retries: 2,
421
- delayMs: 300,
733
+ delayMs: 500,
422
734
  onRetry: (attempt, error) => {
423
735
  if (options.verbose) {
424
- logger(`[retry] Thinking time (${thinkingTime}) attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`);
736
+ logger(`[retry] Deep Research activation attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`);
425
737
  }
426
738
  },
427
739
  }));
740
+ await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
741
+ logger(`Prompt textarea ready (after Deep Research activation, ${promptText.length.toLocaleString()} chars queued)`);
428
742
  }
429
743
  const profileLockTimeoutMs = manualLogin ? (config.profileLockTimeoutMs ?? 0) : 0;
430
744
  let profileLock = null;
@@ -448,6 +762,8 @@ export async function runBrowserMode(options) {
448
762
  const baselineAssistantText = typeof baselineSnapshot?.text === "string" ? baselineSnapshot.text.trim() : "";
449
763
  const attachmentNames = submissionAttachments.map((a) => path.basename(a.path));
450
764
  let inputOnlyAttachments = false;
765
+ await raceWithDisconnect(clearPromptComposer(Runtime, logger));
766
+ await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
451
767
  if (submissionAttachments.length > 0) {
452
768
  if (!DOM) {
453
769
  throw new Error("Chrome DOM domain unavailable while uploading attachments.");
@@ -540,7 +856,56 @@ export async function runBrowserMode(options) {
540
856
  finally {
541
857
  await releaseProfileLockIfHeld();
542
858
  }
543
- stopThinkingMonitor = startThinkingStatusMonitor(Runtime, logger, options.verbose ?? false);
859
+ const imageArtifactMinTurnIndex = baselineTurns;
860
+ if (deepResearch) {
861
+ await raceWithDisconnect(waitForResearchPlanAutoConfirm(Runtime, logger));
862
+ const researchResult = await raceWithDisconnect(waitForDeepResearchCompletion(Runtime, logger, config.timeoutMs, baselineTurns, Page, client));
863
+ await updateConversationHint("post-deep-research", 15_000).catch(() => false);
864
+ runStatus = "complete";
865
+ const durationMs = Date.now() - startedAt;
866
+ const tokens = estimateTokenCount(researchResult.text);
867
+ const reportArtifact = await saveOptionalArtifact(() => saveDeepResearchReportArtifact({
868
+ sessionId: options.sessionId,
869
+ reportMarkdown: researchResult.text,
870
+ conversationUrl: lastUrl,
871
+ logger,
872
+ }), logger);
873
+ const transcriptArtifact = await saveOptionalArtifact(() => saveBrowserTranscriptArtifact({
874
+ sessionId: options.sessionId,
875
+ prompt: promptText,
876
+ answerMarkdown: researchResult.text,
877
+ conversationUrl: lastUrl,
878
+ artifacts: appendArtifacts(undefined, [reportArtifact]),
879
+ logger,
880
+ }), logger);
881
+ const savedArtifacts = appendArtifacts(undefined, [reportArtifact, transcriptArtifact]);
882
+ const archive = await maybeArchiveCompletedConversation({
883
+ Runtime,
884
+ logger,
885
+ config,
886
+ conversationUrl: lastUrl,
887
+ followUpCount: 0,
888
+ requiredArtifactsSaved: Boolean(reportArtifact && transcriptArtifact),
889
+ });
890
+ return {
891
+ answerText: researchResult.text,
892
+ answerMarkdown: researchResult.text,
893
+ answerHtml: researchResult.html,
894
+ artifacts: savedArtifacts,
895
+ archive,
896
+ tookMs: durationMs,
897
+ answerTokens: tokens,
898
+ answerChars: researchResult.text.length,
899
+ chromePid: chrome.pid,
900
+ chromePort: chrome.port,
901
+ chromeHost,
902
+ userDataDir,
903
+ chromeTargetId: lastTargetId,
904
+ tabUrl: lastUrl,
905
+ conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
906
+ controllerPid: process.pid,
907
+ };
908
+ }
544
909
  // Helper to normalize text for echo detection (collapse whitespace, lowercase)
545
910
  const normalizeForComparison = (text) => text.toLowerCase().replace(/\s+/g, " ").trim();
546
911
  const expectedConversationId = () => lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined;
@@ -571,7 +936,19 @@ export async function runBrowserMode(options) {
571
936
  }
572
937
  return null;
573
938
  };
574
- let answer;
939
+ const waitWithThinkingMonitor = async (operation) => {
940
+ stopThinkingMonitor?.();
941
+ stopThinkingMonitor = startThinkingStatusMonitor(Runtime, logger, {
942
+ intervalMs: options.heartbeatIntervalMs,
943
+ });
944
+ try {
945
+ return await operation();
946
+ }
947
+ finally {
948
+ stopThinkingMonitor?.();
949
+ stopThinkingMonitor = null;
950
+ }
951
+ };
575
952
  const recheckDelayMs = Math.max(0, config.assistantRecheckDelayMs ?? 0);
576
953
  const recheckTimeoutMs = Math.max(0, config.assistantRecheckTimeoutMs ?? 0);
577
954
  const attemptAssistantRecheck = async () => {
@@ -615,172 +992,280 @@ export async function runBrowserMode(options) {
615
992
  });
616
993
  }
617
994
  const timeoutMs = recheckTimeoutMs > 0 ? recheckTimeoutMs : config.timeoutMs;
618
- const rechecked = await raceWithDisconnect(waitForAssistantResponseWithReload(Runtime, Page, timeoutMs, logger, baselineTurns ?? undefined, expectedConversationId()));
995
+ const rechecked = await waitWithThinkingMonitor(() => raceWithDisconnect(waitForAssistantOrGeneratedImageResponse({
996
+ Runtime,
997
+ waitForText: () => waitForAssistantResponseWithReload(Runtime, Page, timeoutMs, logger, baselineTurns ?? undefined, expectedConversationId()),
998
+ timeoutMs,
999
+ logger,
1000
+ minTurnIndex: baselineTurns ?? undefined,
1001
+ expectedConversationId: expectedConversationId(),
1002
+ imageOutputRequested,
1003
+ })));
619
1004
  logger("Recovered assistant response after delayed recheck");
620
1005
  return rechecked;
621
1006
  };
622
- try {
623
- await updateConversationHint("assistant-wait", 15_000).catch(() => false);
624
- answer = await raceWithDisconnect(waitForAssistantResponseWithReload(Runtime, Page, config.timeoutMs, logger, baselineTurns ?? undefined, expectedConversationId()));
625
- }
626
- catch (error) {
627
- if (isAssistantResponseTimeoutError(error)) {
628
- const rechecked = await attemptAssistantRecheck().catch(() => null);
629
- if (rechecked) {
630
- answer = rechecked;
1007
+ const imageOutputRequested = Boolean(options.generateImagePath ||
1008
+ options.outputPath ||
1009
+ options.generateImage);
1010
+ const captureAssistantTurn = async (turnPrompt, label) => {
1011
+ let turnAnswer;
1012
+ try {
1013
+ await updateConversationHint("assistant-wait", 15_000).catch(() => false);
1014
+ turnAnswer = await waitWithThinkingMonitor(() => raceWithDisconnect(waitForAssistantOrGeneratedImageResponse({
1015
+ Runtime,
1016
+ waitForText: () => waitForAssistantResponseWithReload(Runtime, Page, config.timeoutMs, logger, baselineTurns ?? undefined, expectedConversationId()),
1017
+ timeoutMs: config.timeoutMs,
1018
+ logger,
1019
+ minTurnIndex: baselineTurns ?? undefined,
1020
+ expectedConversationId: expectedConversationId(),
1021
+ imageOutputRequested,
1022
+ })));
1023
+ }
1024
+ catch (error) {
1025
+ if (isAssistantResponseTimeoutError(error)) {
1026
+ const rechecked = await attemptAssistantRecheckOrRethrow(attemptAssistantRecheck);
1027
+ if (rechecked) {
1028
+ turnAnswer = rechecked;
1029
+ }
1030
+ else {
1031
+ await updateConversationHint("assistant-timeout", 15_000).catch(() => false);
1032
+ await captureRuntimeSnapshot().catch(() => undefined);
1033
+ const diagnostics = await captureBrowserDiagnostics(Runtime, logger, "assistant-timeout", {
1034
+ Page,
1035
+ sessionId: options.sessionId,
1036
+ }).catch(() => undefined);
1037
+ const runtime = {
1038
+ chromePid: chrome.pid,
1039
+ chromePort: chrome.port,
1040
+ chromeHost,
1041
+ userDataDir,
1042
+ chromeTargetId: lastTargetId,
1043
+ tabUrl: lastUrl,
1044
+ conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
1045
+ controllerPid: process.pid,
1046
+ };
1047
+ throw new BrowserAutomationError("Assistant response timed out before completion; reattach later to capture the answer.", { stage: "assistant-timeout", runtime, diagnostics }, error);
1048
+ }
631
1049
  }
632
1050
  else {
633
- await updateConversationHint("assistant-timeout", 15_000).catch(() => false);
634
- await captureRuntimeSnapshot().catch(() => undefined);
635
- const runtime = {
636
- chromePid: chrome.pid,
637
- chromePort: chrome.port,
638
- chromeHost,
639
- userDataDir,
640
- chromeTargetId: lastTargetId,
641
- tabUrl: lastUrl,
642
- conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
643
- controllerPid: process.pid,
644
- };
645
- throw new BrowserAutomationError("Assistant response timed out before completion; reattach later to capture the answer.", { stage: "assistant-timeout", runtime }, error);
1051
+ throw error;
646
1052
  }
647
1053
  }
648
- else {
649
- throw error;
650
- }
651
- }
652
- // Ensure we store the final conversation URL even if the UI updated late.
653
- await updateConversationHint("post-response", 15_000);
654
- const baselineNormalized = baselineAssistantText
655
- ? normalizeForComparison(baselineAssistantText)
656
- : "";
657
- if (baselineNormalized) {
658
- const normalizedAnswer = normalizeForComparison(answer.text ?? "");
659
- const baselinePrefix = baselineNormalized.length >= 80
660
- ? baselineNormalized.slice(0, Math.min(200, baselineNormalized.length))
1054
+ // Ensure we store the final conversation URL even if the UI updated late.
1055
+ await updateConversationHint("post-response", 15_000);
1056
+ const baselineNormalized = baselineAssistantText
1057
+ ? normalizeForComparison(baselineAssistantText)
661
1058
  : "";
662
- const isBaseline = normalizedAnswer === baselineNormalized ||
663
- (baselinePrefix.length > 0 && normalizedAnswer.startsWith(baselinePrefix));
664
- if (isBaseline) {
665
- logger("Detected stale assistant response; waiting for new response...");
666
- const refreshed = await waitForFreshAssistantResponse(baselineNormalized, 15_000);
667
- if (refreshed) {
668
- answer = refreshed;
1059
+ if (baselineNormalized) {
1060
+ const normalizedAnswer = normalizeForComparison(turnAnswer.text ?? "");
1061
+ const baselinePrefix = baselineNormalized.length >= 80
1062
+ ? baselineNormalized.slice(0, Math.min(200, baselineNormalized.length))
1063
+ : "";
1064
+ const isBaseline = normalizedAnswer === baselineNormalized ||
1065
+ (baselinePrefix.length > 0 && normalizedAnswer.startsWith(baselinePrefix));
1066
+ if (isBaseline) {
1067
+ logger("Detected stale assistant response; waiting for new response...");
1068
+ const refreshed = await waitForFreshAssistantResponse(baselineNormalized, 15_000);
1069
+ if (refreshed) {
1070
+ turnAnswer = refreshed;
1071
+ }
669
1072
  }
670
1073
  }
671
- }
672
- answerText = answer.text;
673
- answerHtml = answer.html ?? "";
674
- const copiedMarkdown = await raceWithDisconnect(withRetries(async () => {
675
- const attempt = await captureAssistantMarkdown(Runtime, answer.meta, logger);
676
- if (!attempt) {
677
- throw new Error("copy-missing");
678
- }
679
- return attempt;
680
- }, {
681
- retries: 2,
682
- delayMs: 350,
683
- onRetry: (attempt, error) => {
684
- if (options.verbose) {
685
- logger(`[retry] Markdown capture attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`);
1074
+ let turnAnswerText = turnAnswer.text;
1075
+ const turnAnswerHtml = turnAnswer.html ?? "";
1076
+ const copiedMarkdown = await raceWithDisconnect(withRetries(async () => {
1077
+ const attempt = await captureAssistantMarkdown(Runtime, turnAnswer.meta, logger);
1078
+ if (!attempt) {
1079
+ throw new Error("copy-missing");
1080
+ }
1081
+ return attempt;
1082
+ }, {
1083
+ retries: 2,
1084
+ delayMs: 350,
1085
+ onRetry: (attempt, error) => {
1086
+ if (options.verbose) {
1087
+ logger(`[retry] Markdown capture attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`);
1088
+ }
1089
+ },
1090
+ })).catch(() => null);
1091
+ let turnAnswerMarkdown = copiedMarkdown ?? turnAnswerText;
1092
+ const promptEchoMatcher = buildPromptEchoMatcher(turnPrompt);
1093
+ ({ answerText: turnAnswerText, answerMarkdown: turnAnswerMarkdown } =
1094
+ await maybeRecoverLongAssistantResponse({
1095
+ runtime: Runtime,
1096
+ baselineTurns,
1097
+ answerText: turnAnswerText,
1098
+ answerMarkdown: turnAnswerMarkdown,
1099
+ logger,
1100
+ allowMarkdownUpdate: !copiedMarkdown,
1101
+ }));
1102
+ // Final sanity check: ensure we didn't accidentally capture the user prompt instead of the assistant turn.
1103
+ const finalSnapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
1104
+ const finalText = typeof finalSnapshot?.text === "string" ? finalSnapshot.text.trim() : "";
1105
+ if (finalText && finalText !== turnPrompt.trim()) {
1106
+ const trimmedMarkdown = turnAnswerMarkdown.trim();
1107
+ const finalIsEcho = promptEchoMatcher ? promptEchoMatcher.isEcho(finalText) : false;
1108
+ const lengthDelta = finalText.length - trimmedMarkdown.length;
1109
+ const missingCopy = !copiedMarkdown && lengthDelta >= 0;
1110
+ const likelyTruncatedCopy = copiedMarkdown &&
1111
+ trimmedMarkdown.length > 0 &&
1112
+ lengthDelta >= Math.max(12, Math.floor(trimmedMarkdown.length * 0.75));
1113
+ if ((missingCopy || likelyTruncatedCopy) && !finalIsEcho && finalText !== trimmedMarkdown) {
1114
+ logger("Refreshed assistant response via final DOM snapshot");
1115
+ turnAnswerText = finalText;
1116
+ turnAnswerMarkdown = finalText;
686
1117
  }
687
- },
688
- })).catch(() => null);
689
- answerMarkdown = copiedMarkdown ?? answerText;
690
- const promptEchoMatcher = buildPromptEchoMatcher(promptText);
691
- ({ answerText, answerMarkdown } = await maybeRecoverLongAssistantResponse({
692
- runtime: Runtime,
693
- baselineTurns,
694
- answerText,
695
- answerMarkdown,
696
- logger,
697
- allowMarkdownUpdate: !copiedMarkdown,
698
- }));
699
- // Final sanity check: ensure we didn't accidentally capture the user prompt instead of the assistant turn.
700
- const finalSnapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
701
- const finalText = typeof finalSnapshot?.text === "string" ? finalSnapshot.text.trim() : "";
702
- if (finalText && finalText !== promptText.trim()) {
703
- const trimmedMarkdown = answerMarkdown.trim();
704
- const finalIsEcho = promptEchoMatcher ? promptEchoMatcher.isEcho(finalText) : false;
705
- const lengthDelta = finalText.length - trimmedMarkdown.length;
706
- const missingCopy = !copiedMarkdown && lengthDelta >= 0;
707
- const likelyTruncatedCopy = copiedMarkdown &&
708
- trimmedMarkdown.length > 0 &&
709
- lengthDelta >= Math.max(12, Math.floor(trimmedMarkdown.length * 0.75));
710
- if ((missingCopy || likelyTruncatedCopy) && !finalIsEcho && finalText !== trimmedMarkdown) {
711
- logger("Refreshed assistant response via final DOM snapshot");
712
- answerText = finalText;
713
- answerMarkdown = finalText;
714
1118
  }
715
- }
716
- // Detect prompt echo using normalized comparison (whitespace-insensitive).
717
- const alignedEcho = alignPromptEchoPair(answerText, answerMarkdown, promptEchoMatcher, copiedMarkdown ? logger : undefined, {
718
- text: "Aligned assistant response text to copied markdown after prompt echo",
719
- markdown: "Aligned assistant markdown to response text after prompt echo",
720
- });
721
- answerText = alignedEcho.answerText;
722
- answerMarkdown = alignedEcho.answerMarkdown;
723
- const isPromptEcho = alignedEcho.isEcho;
724
- if (isPromptEcho) {
725
- logger("Detected prompt echo in response; waiting for actual assistant response...");
726
- const deadline = Date.now() + 15_000;
727
- let bestText = null;
728
- let stableCount = 0;
729
- while (Date.now() < deadline) {
730
- const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
731
- const text = typeof snapshot?.text === "string" ? snapshot.text.trim() : "";
732
- const isStillEcho = !text || Boolean(promptEchoMatcher?.isEcho(text));
733
- if (!isStillEcho) {
734
- if (!bestText || text.length > bestText.length) {
1119
+ // Detect prompt echo using normalized comparison (whitespace-insensitive).
1120
+ const alignedEcho = alignPromptEchoPair(turnAnswerText, turnAnswerMarkdown, promptEchoMatcher, copiedMarkdown ? logger : undefined, {
1121
+ text: "Aligned assistant response text to copied markdown after prompt echo",
1122
+ markdown: "Aligned assistant markdown to response text after prompt echo",
1123
+ });
1124
+ turnAnswerText = alignedEcho.answerText;
1125
+ turnAnswerMarkdown = alignedEcho.answerMarkdown;
1126
+ const isPromptEcho = alignedEcho.isEcho;
1127
+ if (isPromptEcho) {
1128
+ logger("Detected prompt echo in response; waiting for actual assistant response...");
1129
+ const deadline = Date.now() + 15_000;
1130
+ let bestText = null;
1131
+ let stableCount = 0;
1132
+ while (Date.now() < deadline) {
1133
+ const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
1134
+ const text = typeof snapshot?.text === "string" ? snapshot.text.trim() : "";
1135
+ const isStillEcho = !text || Boolean(promptEchoMatcher?.isEcho(text));
1136
+ if (!isStillEcho) {
1137
+ if (!bestText || text.length > bestText.length) {
1138
+ bestText = text;
1139
+ stableCount = 0;
1140
+ }
1141
+ else if (text === bestText) {
1142
+ stableCount += 1;
1143
+ }
1144
+ if (stableCount >= 2) {
1145
+ break;
1146
+ }
1147
+ }
1148
+ await new Promise((resolve) => setTimeout(resolve, 300));
1149
+ }
1150
+ if (bestText) {
1151
+ logger("Recovered assistant response after detecting prompt echo");
1152
+ turnAnswerText = bestText;
1153
+ turnAnswerMarkdown = bestText;
1154
+ }
1155
+ }
1156
+ const minAnswerChars = 16;
1157
+ if (turnAnswerText.trim().length > 0 && turnAnswerText.trim().length < minAnswerChars) {
1158
+ const deadline = Date.now() + 12_000;
1159
+ let bestText = turnAnswerText.trim();
1160
+ let stableCycles = 0;
1161
+ while (Date.now() < deadline) {
1162
+ const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
1163
+ const text = typeof snapshot?.text === "string" ? snapshot.text.trim() : "";
1164
+ if (text && text.length > bestText.length) {
735
1165
  bestText = text;
736
- stableCount = 0;
1166
+ stableCycles = 0;
737
1167
  }
738
- else if (text === bestText) {
739
- stableCount += 1;
1168
+ else {
1169
+ stableCycles += 1;
740
1170
  }
741
- if (stableCount >= 2) {
1171
+ if (stableCycles >= 3 && bestText.length >= minAnswerChars) {
742
1172
  break;
743
1173
  }
1174
+ await delay(400);
744
1175
  }
745
- await new Promise((resolve) => setTimeout(resolve, 300));
746
- }
747
- if (bestText) {
748
- logger("Recovered assistant response after detecting prompt echo");
749
- answerText = bestText;
750
- answerMarkdown = bestText;
751
- }
752
- }
753
- const minAnswerChars = 16;
754
- if (answerText.trim().length > 0 && answerText.trim().length < minAnswerChars) {
755
- const deadline = Date.now() + 12_000;
756
- let bestText = answerText.trim();
757
- let stableCycles = 0;
758
- while (Date.now() < deadline) {
759
- const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
760
- const text = typeof snapshot?.text === "string" ? snapshot.text.trim() : "";
761
- if (text && text.length > bestText.length) {
762
- bestText = text;
763
- stableCycles = 0;
764
- }
765
- else {
766
- stableCycles += 1;
767
- }
768
- if (stableCycles >= 3 && bestText.length >= minAnswerChars) {
769
- break;
1176
+ if (bestText.length > turnAnswerText.trim().length) {
1177
+ logger("Refreshed short assistant response from latest DOM snapshot");
1178
+ turnAnswerText = bestText;
1179
+ turnAnswerMarkdown = bestText;
770
1180
  }
771
- await delay(400);
772
1181
  }
773
- if (bestText.length > answerText.trim().length) {
774
- logger("Refreshed short assistant response from latest DOM snapshot");
775
- answerText = bestText;
776
- answerMarkdown = bestText;
1182
+ return {
1183
+ label,
1184
+ answerText: turnAnswerText,
1185
+ answerMarkdown: turnAnswerMarkdown,
1186
+ answerHtml: turnAnswerHtml,
1187
+ };
1188
+ };
1189
+ const turns = [];
1190
+ const initialTurn = await captureAssistantTurn(promptText, "Initial response");
1191
+ turns.push(initialTurn);
1192
+ answerText = initialTurn.answerText;
1193
+ answerMarkdown = initialTurn.answerMarkdown;
1194
+ answerHtml = initialTurn.answerHtml;
1195
+ for (let index = 0; index < followUpPrompts.length; index += 1) {
1196
+ const followUpPrompt = followUpPrompts[index];
1197
+ logger(`[browser] Sending follow-up ${index + 1}/${followUpPrompts.length}`);
1198
+ await acquireProfileLockIfNeeded();
1199
+ try {
1200
+ await raceWithDisconnect(clearPromptComposer(Runtime, logger));
1201
+ await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
1202
+ const submission = await runSubmissionWithRecovery({
1203
+ prompt: followUpPrompt,
1204
+ attachments: [],
1205
+ submit: (submissionPrompt, submissionAttachments) => raceWithDisconnect(submitOnce(submissionPrompt, submissionAttachments)),
1206
+ reloadPromptComposer,
1207
+ prepareFallbackSubmission: async () => {
1208
+ await raceWithDisconnect(clearPromptComposer(Runtime, logger));
1209
+ await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
1210
+ },
1211
+ logger,
1212
+ });
1213
+ baselineTurns = submission.baselineTurns;
1214
+ baselineAssistantText = submission.baselineAssistantText;
1215
+ }
1216
+ finally {
1217
+ await releaseProfileLockIfHeld();
777
1218
  }
1219
+ const turn = await captureAssistantTurn(followUpPrompt, `Follow-up ${index + 1}`);
1220
+ turns.push({ ...turn, prompt: followUpPrompt });
1221
+ answerText = turn.answerText;
1222
+ answerMarkdown = turn.answerMarkdown;
1223
+ answerHtml = turn.answerHtml;
1224
+ }
1225
+ if (turns.length > 1) {
1226
+ const formatted = formatBrowserTurnTranscript(turns);
1227
+ answerText = formatted.answerText;
1228
+ answerMarkdown = formatted.answerMarkdown;
1229
+ answerHtml = "";
778
1230
  }
779
1231
  if (connectionClosedUnexpectedly) {
780
1232
  // Bail out on mid-run disconnects so the session stays reattachable.
781
1233
  throw new Error("Chrome disconnected before completion");
782
1234
  }
783
- stopThinkingMonitor?.();
1235
+ const imageArtifacts = await collectGeneratedImageArtifacts({
1236
+ Runtime,
1237
+ Network,
1238
+ logger,
1239
+ minTurnIndex: imageArtifactMinTurnIndex,
1240
+ sessionId: options.sessionId,
1241
+ generateImagePath: options.generateImagePath,
1242
+ outputPath: options.outputPath,
1243
+ answerText,
1244
+ waitTimeoutMs: options.config?.timeoutMs,
1245
+ });
1246
+ answerText = imageArtifacts.answerText || answerText;
1247
+ if (imageArtifacts.markdownSuffix) {
1248
+ answerMarkdown += imageArtifacts.markdownSuffix;
1249
+ }
1250
+ const savedImageArtifacts = appendArtifacts(undefined, imageArtifacts.savedImages);
1251
+ const transcriptArtifact = await saveOptionalArtifact(() => saveBrowserTranscriptArtifact({
1252
+ sessionId: options.sessionId,
1253
+ prompt: promptText,
1254
+ answerMarkdown,
1255
+ conversationUrl: lastUrl,
1256
+ artifacts: savedImageArtifacts,
1257
+ logger,
1258
+ }), logger);
1259
+ const savedArtifacts = appendArtifacts(savedImageArtifacts, [transcriptArtifact]);
1260
+ const archive = await maybeArchiveCompletedConversation({
1261
+ Runtime,
1262
+ logger,
1263
+ config,
1264
+ conversationUrl: lastUrl,
1265
+ followUpCount: followUpPrompts.length,
1266
+ requiredArtifactsSaved: Boolean(transcriptArtifact) &&
1267
+ imageArtifacts.savedImages.length === imageArtifacts.imageCount,
1268
+ });
784
1269
  runStatus = "complete";
785
1270
  const durationMs = Date.now() - startedAt;
786
1271
  const answerChars = answerText.length;
@@ -789,6 +1274,10 @@ export async function runBrowserMode(options) {
789
1274
  answerText,
790
1275
  answerMarkdown,
791
1276
  answerHtml: answerHtml.length > 0 ? answerHtml : undefined,
1277
+ artifacts: savedArtifacts,
1278
+ generatedImages: imageArtifacts.generatedImages,
1279
+ savedImages: imageArtifacts.savedImages,
1280
+ archive,
792
1281
  tookMs: durationMs,
793
1282
  answerTokens,
794
1283
  answerChars,
@@ -798,15 +1287,16 @@ export async function runBrowserMode(options) {
798
1287
  userDataDir,
799
1288
  chromeTargetId: lastTargetId,
800
1289
  tabUrl: lastUrl,
1290
+ conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
801
1291
  controllerPid: process.pid,
802
1292
  };
803
1293
  }
804
1294
  catch (error) {
805
1295
  const normalizedError = error instanceof Error ? error : new Error(String(error));
806
- stopThinkingMonitor?.();
807
1296
  const socketClosed = connectionClosedUnexpectedly || isWebSocketClosureError(normalizedError);
808
1297
  connectionClosedUnexpectedly = connectionClosedUnexpectedly || socketClosed;
809
- if (shouldPreserveBrowserOnError(normalizedError, config.headless)) {
1298
+ const preservedErrorKind = classifyPreservedBrowserError(normalizedError, config.headless);
1299
+ if (preservedErrorKind === "cloudflare-challenge") {
810
1300
  preserveBrowserOnError = true;
811
1301
  const runtime = {
812
1302
  chromePid: chrome.pid,
@@ -828,6 +1318,12 @@ export async function runBrowserMode(options) {
828
1318
  reuseProfileHint,
829
1319
  }, normalizedError);
830
1320
  }
1321
+ if (preservedErrorKind === "reattachable-capture") {
1322
+ preserveBrowserOnError = true;
1323
+ await emitRuntimeHint();
1324
+ logger("Assistant capture incomplete; leaving browser open for reattach.");
1325
+ throw normalizedError;
1326
+ }
831
1327
  if (!socketClosed) {
832
1328
  logger(`Failed to complete ChatGPT run: ${normalizedError.message}`);
833
1329
  if ((config.debug || process.env.CHATGPT_DEVTOOLS_TRACE === "1") && normalizedError.stack) {
@@ -865,16 +1361,70 @@ export async function runBrowserMode(options) {
865
1361
  // Close the isolated tab once the response has been fully captured to prevent
866
1362
  // tab accumulation across repeated runs. Keep the tab open on incomplete runs
867
1363
  // so reattach can recover the response.
868
- if (runStatus === "complete" && isolatedTargetId && chrome?.port) {
1364
+ if (shouldCloseOwnedRunTargetAfterRun({
1365
+ runStatus,
1366
+ ownsTarget,
1367
+ keepBrowser: effectiveKeepBrowser,
1368
+ }) &&
1369
+ isolatedTargetId &&
1370
+ chrome?.port) {
869
1371
  await closeTab(chrome.port, isolatedTargetId, logger, chromeHost).catch(() => undefined);
870
1372
  }
1373
+ let keepBrowserOpen = effectiveKeepBrowser || preserveBrowserOnError;
1374
+ let cleanupProfileLock = null;
1375
+ let terminatedRecordedChrome = false;
1376
+ let otherActiveBrowserTabLeases = null;
1377
+ const hasOtherActiveLeases = async () => {
1378
+ if (!manualLogin || !tabLease) {
1379
+ return false;
1380
+ }
1381
+ if (otherActiveBrowserTabLeases === null) {
1382
+ otherActiveBrowserTabLeases = await hasOtherActiveBrowserTabLeases(userDataDir, tabLease.id);
1383
+ }
1384
+ return otherActiveBrowserTabLeases;
1385
+ };
1386
+ if (runStatus === "complete" &&
1387
+ manualLogin &&
1388
+ !connectionClosedUnexpectedly &&
1389
+ chrome?.port &&
1390
+ ownsTarget) {
1391
+ const otherLeasesActive = await hasOtherActiveLeases().catch(() => true);
1392
+ if (!otherLeasesActive) {
1393
+ await closeBlankChromeTabs(chrome.port, logger, chromeHost, {
1394
+ excludeTargetIds: [isolatedTargetId, lastTargetId],
1395
+ }).catch(() => undefined);
1396
+ }
1397
+ }
1398
+ if (!keepBrowserOpen && manualLogin && tabLease) {
1399
+ const cleanupLockTimeoutMs = Math.max(0, config.profileLockTimeoutMs ?? 0);
1400
+ if (cleanupLockTimeoutMs > 0) {
1401
+ cleanupProfileLock = await acquireProfileRunLock(userDataDir, {
1402
+ timeoutMs: cleanupLockTimeoutMs,
1403
+ logger,
1404
+ sessionId: options.sessionId,
1405
+ }).catch(() => null);
1406
+ }
1407
+ keepBrowserOpen = await hasOtherActiveLeases().catch(() => false);
1408
+ if (keepBrowserOpen) {
1409
+ logger("[browser] Other ChatGPT tab leases still active; leaving shared Chrome running.");
1410
+ }
1411
+ else if (reusedChrome && !connectionClosedUnexpectedly) {
1412
+ terminatedRecordedChrome = await terminateRecordedChromeForProfile(userDataDir, logger).catch(() => false);
1413
+ }
1414
+ }
1415
+ if (tabLease) {
1416
+ const handle = tabLease;
1417
+ tabLease = null;
1418
+ await handle.release().catch(() => undefined);
1419
+ }
871
1420
  removeDialogHandler?.();
872
1421
  removeTerminationHooks?.();
873
- const keepBrowserOpen = effectiveKeepBrowser || preserveBrowserOnError;
874
1422
  if (!keepBrowserOpen) {
875
1423
  if (!connectionClosedUnexpectedly) {
876
1424
  try {
877
- await chrome.kill();
1425
+ if (!terminatedRecordedChrome) {
1426
+ await chrome.kill();
1427
+ }
878
1428
  }
879
1429
  catch {
880
1430
  // ignore kill failures
@@ -898,8 +1448,16 @@ export async function runBrowserMode(options) {
898
1448
  logger(`Cleanup ${runStatus} • ${totalSeconds.toFixed(1)}s total`);
899
1449
  }
900
1450
  }
901
- else if (!connectionClosedUnexpectedly) {
902
- logger(`Chrome left running on port ${chrome.port} with profile ${userDataDir}`);
1451
+ else {
1452
+ detachKeptChromeProcess(chrome);
1453
+ if (!connectionClosedUnexpectedly) {
1454
+ logger(`Chrome left running on port ${chrome.port} with profile ${userDataDir}`);
1455
+ }
1456
+ }
1457
+ if (cleanupProfileLock) {
1458
+ const handle = cleanupProfileLock;
1459
+ cleanupProfileLock = null;
1460
+ await handle.release().catch(() => undefined);
903
1461
  }
904
1462
  }
905
1463
  }
@@ -1025,6 +1583,51 @@ async function _assertNavigatedToHttp(runtime, _logger, timeoutMs = 10_000) {
1025
1583
  details: { url: lastUrl || "(empty)" },
1026
1584
  });
1027
1585
  }
1586
+ function detachKeptChromeProcess(chrome) {
1587
+ try {
1588
+ chrome.process?.unref();
1589
+ }
1590
+ catch {
1591
+ // Best-effort only; cleanup should not mask the original browser result.
1592
+ }
1593
+ }
1594
+ async function acquireManualLoginChromeForRun(userDataDir, config, logger, sessionId, deps = {}) {
1595
+ const maybeReuse = deps.maybeReuse ?? maybeReuseRunningChrome;
1596
+ const launch = deps.launch ?? launchChrome;
1597
+ const lockTimeoutMs = Math.max(0, config.profileLockTimeoutMs ?? 0);
1598
+ let launchLock = null;
1599
+ if (lockTimeoutMs > 0) {
1600
+ launchLock = await acquireProfileRunLock(userDataDir, {
1601
+ timeoutMs: lockTimeoutMs,
1602
+ logger,
1603
+ sessionId,
1604
+ });
1605
+ }
1606
+ try {
1607
+ const reusedChrome = await maybeReuse(userDataDir, logger, {
1608
+ waitForPortMs: config.reuseChromeWaitMs,
1609
+ });
1610
+ const chrome = reusedChrome ??
1611
+ (await launch({
1612
+ ...config,
1613
+ remoteChrome: config.remoteChrome,
1614
+ }, userDataDir, logger));
1615
+ // Persist while the launch lock is still held so parallel callers reuse
1616
+ // this Chrome instead of racing to start another one on the same profile.
1617
+ if (chrome.port) {
1618
+ await writeDevToolsActivePort(userDataDir, chrome.port);
1619
+ if (!reusedChrome && chrome.pid) {
1620
+ await writeChromePid(userDataDir, chrome.pid);
1621
+ }
1622
+ }
1623
+ return { chrome, reusedChrome };
1624
+ }
1625
+ finally {
1626
+ if (launchLock) {
1627
+ await launchLock.release().catch(() => undefined);
1628
+ }
1629
+ }
1630
+ }
1028
1631
  async function maybeReuseRunningChrome(userDataDir, logger, options = {}) {
1029
1632
  const waitForPortMs = Math.max(0, options.waitForPortMs ?? 0);
1030
1633
  let port = await readDevToolsPort(userDataDir);
@@ -1036,8 +1639,30 @@ async function maybeReuseRunningChrome(userDataDir, logger, options = {}) {
1036
1639
  port = await readDevToolsPort(userDataDir);
1037
1640
  }
1038
1641
  }
1039
- if (!port)
1040
- return null;
1642
+ let pid = await readChromePid(userDataDir);
1643
+ if (!port) {
1644
+ const discovered = await findRunningChromeDebugTargetForProfile(userDataDir);
1645
+ if (!discovered)
1646
+ return null;
1647
+ const discoveredProbe = await (options.probe ?? verifyDevToolsReachable)({
1648
+ port: discovered.port,
1649
+ });
1650
+ if (!discoveredProbe.ok) {
1651
+ logger(`Discovered Chrome for ${userDataDir} on port ${discovered.port} but it was unreachable (${discoveredProbe.error}); launching new Chrome.`);
1652
+ return null;
1653
+ }
1654
+ await writeDevToolsActivePort(userDataDir, discovered.port);
1655
+ await writeChromePid(userDataDir, discovered.pid);
1656
+ port = discovered.port;
1657
+ pid = discovered.pid;
1658
+ logger(`Discovered running Chrome for ${userDataDir}; reusing (DevTools port ${port}, pid ${pid})`);
1659
+ return {
1660
+ port,
1661
+ pid,
1662
+ kill: async () => { },
1663
+ process: undefined,
1664
+ };
1665
+ }
1041
1666
  const probe = await (options.probe ?? verifyDevToolsReachable)({ port });
1042
1667
  if (!probe.ok) {
1043
1668
  logger(`DevToolsActivePort found for ${userDataDir} but unreachable (${probe.error}); launching new Chrome.`);
@@ -1045,7 +1670,6 @@ async function maybeReuseRunningChrome(userDataDir, logger, options = {}) {
1045
1670
  await cleanupStaleProfileState(userDataDir, logger, { lockRemovalMode: "if_oracle_pid_dead" });
1046
1671
  return null;
1047
1672
  }
1048
- const pid = await readChromePid(userDataDir);
1049
1673
  logger(`Found running Chrome for ${userDataDir}; reusing (DevTools port ${port}${pid ? `, pid ${pid}` : ""})`);
1050
1674
  return {
1051
1675
  port,
@@ -1063,7 +1687,10 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1063
1687
  logger(`Connecting to remote Chrome at ${host}:${port}`);
1064
1688
  let client = null;
1065
1689
  let remoteTargetId = null;
1690
+ let tabLease = null;
1066
1691
  let lastUrl;
1692
+ let attachedExistingTab = false;
1693
+ let ownsTarget = true;
1067
1694
  const runtimeHintCb = options.runtimeHintCb;
1068
1695
  const emitRuntimeHint = async () => {
1069
1696
  if (!runtimeHintCb)
@@ -1072,10 +1699,19 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1072
1699
  await runtimeHintCb({
1073
1700
  chromePort: port,
1074
1701
  chromeHost: host,
1702
+ chromeBrowserWSEndpoint: browserWSEndpoint,
1703
+ chromeProfileRoot,
1075
1704
  chromeTargetId: remoteTargetId ?? undefined,
1076
1705
  tabUrl: lastUrl,
1706
+ conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
1077
1707
  controllerPid: process.pid,
1078
1708
  });
1709
+ await tabLease?.update({
1710
+ chromeHost: host,
1711
+ chromePort: port,
1712
+ chromeTargetId: remoteTargetId ?? undefined,
1713
+ tabUrl: lastUrl,
1714
+ });
1079
1715
  }
1080
1716
  catch (error) {
1081
1717
  const message = error instanceof Error ? error.message : String(error);
@@ -1087,12 +1723,55 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1087
1723
  let answerMarkdown = "";
1088
1724
  let answerHtml = "";
1089
1725
  let connectionClosedUnexpectedly = false;
1726
+ let runStatus = "attempted";
1090
1727
  let stopThinkingMonitor = null;
1091
1728
  let removeDialogHandler = null;
1729
+ let connection = null;
1730
+ const browserWSEndpoint = config.remoteChromeBrowserWSEndpoint ?? undefined;
1731
+ const chromeProfileRoot = config.remoteChromeProfileRoot ?? undefined;
1092
1732
  try {
1093
- const connection = await connectToRemoteChrome(host, port, logger, config.url);
1094
- client = connection.client;
1095
- remoteTargetId = connection.targetId ?? null;
1733
+ const remoteLeaseProfileDir = config.browserTabRef
1734
+ ? null
1735
+ : resolveRemoteTabLeaseProfileDir(config);
1736
+ if (remoteLeaseProfileDir) {
1737
+ await mkdir(remoteLeaseProfileDir, { recursive: true });
1738
+ tabLease = await acquireBrowserTabLease(remoteLeaseProfileDir, {
1739
+ maxConcurrentTabs: config.maxConcurrentTabs,
1740
+ timeoutMs: config.timeoutMs,
1741
+ logger,
1742
+ sessionId: options.sessionId,
1743
+ chromeHost: host,
1744
+ chromePort: port,
1745
+ });
1746
+ }
1747
+ if (config.browserTabRef) {
1748
+ const attached = await connectToExistingChatGptTab({
1749
+ host,
1750
+ port,
1751
+ ref: config.browserTabRef,
1752
+ });
1753
+ client = attached.client;
1754
+ remoteTargetId = attached.targetId ?? null;
1755
+ lastUrl = attached.tab.url || lastUrl;
1756
+ attachedExistingTab = true;
1757
+ ownsTarget = false;
1758
+ logger(`Attached to existing remote ChatGPT tab ${attached.targetId}${attached.tab.url ? ` (${attached.tab.url})` : ""}`);
1759
+ }
1760
+ else {
1761
+ connection = await connectToRemoteChrome(host, port, logger, config.url, browserWSEndpoint, {
1762
+ approvalWaitMs: config.attachRunning && browserWSEndpoint ? 20_000 : undefined,
1763
+ });
1764
+ client = connection.client;
1765
+ remoteTargetId = connection.targetId ?? null;
1766
+ ownsTarget = true;
1767
+ }
1768
+ if (tabLease && remoteTargetId) {
1769
+ await tabLease.update({
1770
+ chromeHost: host,
1771
+ chromePort: port,
1772
+ chromeTargetId: remoteTargetId,
1773
+ });
1774
+ }
1096
1775
  await emitRuntimeHint();
1097
1776
  const markConnectionLost = () => {
1098
1777
  connectionClosedUnexpectedly = true;
@@ -1107,10 +1786,17 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1107
1786
  removeDialogHandler = installJavaScriptDialogAutoDismissal(Page, logger);
1108
1787
  // Skip cookie sync for remote Chrome - it already has cookies
1109
1788
  logger("Skipping cookie sync for remote Chrome (using existing session)");
1110
- await navigateToChatGPT(Page, Runtime, config.url, logger);
1111
- await ensureNotBlocked(Runtime, config.headless, logger);
1112
- await ensureLoggedIn(Runtime, logger, { remoteSession: true });
1113
- await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
1789
+ if (!attachedExistingTab) {
1790
+ await navigateToChatGPT(Page, Runtime, config.url, logger);
1791
+ await ensureNotBlocked(Runtime, config.headless, logger);
1792
+ await ensureLoggedIn(Runtime, logger, { remoteSession: true });
1793
+ await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
1794
+ }
1795
+ else {
1796
+ await ensureNotBlocked(Runtime, config.headless, logger);
1797
+ await ensureLoggedIn(Runtime, logger, { remoteSession: true });
1798
+ await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
1799
+ }
1114
1800
  logger(`Prompt textarea ready (initial focus, ${promptText.length.toLocaleString()} chars queued)`);
1115
1801
  try {
1116
1802
  const { result } = await Runtime.evaluate({
@@ -1142,23 +1828,44 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1142
1828
  else if (modelStrategy === "ignore") {
1143
1829
  logger("Model picker: skipped (strategy=ignore)");
1144
1830
  }
1145
- // Handle thinking time selection if specified
1831
+ const deepResearch = config.researchMode === "deep";
1832
+ // Handle thinking time selection if specified. Deep Research owns its own effort flow.
1146
1833
  const thinkingTime = config.thinkingTime;
1147
- if (thinkingTime) {
1148
- await withRetries(() => ensureThinkingTime(Runtime, thinkingTime, logger), {
1834
+ if (thinkingTime && !deepResearch) {
1835
+ if (shouldSkipThinkingTimeSelection(config.desiredModel, thinkingTime)) {
1836
+ logger("Thinking time: Pro Extended (via model selection)");
1837
+ }
1838
+ else {
1839
+ await withRetries(() => ensureThinkingTime(Runtime, thinkingTime, logger), {
1840
+ retries: 2,
1841
+ delayMs: 300,
1842
+ onRetry: (attempt, error) => {
1843
+ if (options.verbose) {
1844
+ logger(`[retry] Thinking time (${thinkingTime}) attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`);
1845
+ }
1846
+ },
1847
+ });
1848
+ }
1849
+ }
1850
+ if (deepResearch) {
1851
+ await withRetries(() => activateDeepResearch(Runtime, Input, logger), {
1149
1852
  retries: 2,
1150
- delayMs: 300,
1853
+ delayMs: 500,
1151
1854
  onRetry: (attempt, error) => {
1152
1855
  if (options.verbose) {
1153
- logger(`[retry] Thinking time (${thinkingTime}) attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`);
1856
+ logger(`[retry] Deep Research activation attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`);
1154
1857
  }
1155
1858
  },
1156
1859
  });
1860
+ await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
1861
+ logger(`Prompt textarea ready (after Deep Research activation, ${promptText.length.toLocaleString()} chars queued)`);
1157
1862
  }
1158
1863
  const submitOnce = async (prompt, submissionAttachments) => {
1159
1864
  const baselineSnapshot = await readAssistantSnapshot(Runtime).catch(() => null);
1160
1865
  const baselineAssistantText = typeof baselineSnapshot?.text === "string" ? baselineSnapshot.text.trim() : "";
1161
1866
  const attachmentNames = submissionAttachments.map((a) => path.basename(a.path));
1867
+ await clearPromptComposer(Runtime, logger);
1868
+ await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
1162
1869
  if (submissionAttachments.length > 0) {
1163
1870
  if (!DOM) {
1164
1871
  throw new Error("Chrome DOM domain unavailable while uploading attachments.");
@@ -1221,7 +1928,54 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1221
1928
  });
1222
1929
  baselineTurns = submission.baselineTurns;
1223
1930
  baselineAssistantText = submission.baselineAssistantText;
1224
- stopThinkingMonitor = startThinkingStatusMonitor(Runtime, logger, options.verbose ?? false);
1931
+ const imageArtifactMinTurnIndex = baselineTurns;
1932
+ if (deepResearch) {
1933
+ await waitForResearchPlanAutoConfirm(Runtime, logger);
1934
+ const researchResult = await waitForDeepResearchCompletion(Runtime, logger, config.timeoutMs, baselineTurns, Page, client);
1935
+ await emitRuntimeHint();
1936
+ const durationMs = Date.now() - startedAt;
1937
+ const tokens = estimateTokenCount(researchResult.text);
1938
+ const reportArtifact = await saveOptionalArtifact(() => saveDeepResearchReportArtifact({
1939
+ sessionId: options.sessionId,
1940
+ reportMarkdown: researchResult.text,
1941
+ conversationUrl: lastUrl,
1942
+ logger,
1943
+ }), logger);
1944
+ const transcriptArtifact = await saveOptionalArtifact(() => saveBrowserTranscriptArtifact({
1945
+ sessionId: options.sessionId,
1946
+ prompt: promptText,
1947
+ answerMarkdown: researchResult.text,
1948
+ conversationUrl: lastUrl,
1949
+ artifacts: appendArtifacts(undefined, [reportArtifact]),
1950
+ logger,
1951
+ }), logger);
1952
+ const savedArtifacts = appendArtifacts(undefined, [reportArtifact, transcriptArtifact]);
1953
+ const archive = await maybeArchiveCompletedConversation({
1954
+ Runtime,
1955
+ logger,
1956
+ config,
1957
+ conversationUrl: lastUrl,
1958
+ followUpCount: 0,
1959
+ requiredArtifactsSaved: Boolean(reportArtifact && transcriptArtifact),
1960
+ });
1961
+ runStatus = "complete";
1962
+ return {
1963
+ answerText: researchResult.text,
1964
+ answerMarkdown: researchResult.text,
1965
+ answerHtml: researchResult.html,
1966
+ artifacts: savedArtifacts,
1967
+ archive,
1968
+ tookMs: durationMs,
1969
+ answerTokens: tokens,
1970
+ answerChars: researchResult.text.length,
1971
+ chromePort: port,
1972
+ chromeHost: host,
1973
+ chromeTargetId: remoteTargetId ?? undefined,
1974
+ tabUrl: lastUrl,
1975
+ conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
1976
+ controllerPid: process.pid,
1977
+ };
1978
+ }
1225
1979
  // Helper to normalize text for echo detection (collapse whitespace, lowercase)
1226
1980
  const normalizeForComparison = (text) => text.toLowerCase().replace(/\s+/g, " ").trim();
1227
1981
  const expectedConversationId = () => lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined;
@@ -1252,7 +2006,19 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1252
2006
  }
1253
2007
  return null;
1254
2008
  };
1255
- let answer;
2009
+ const waitWithThinkingMonitor = async (operation) => {
2010
+ stopThinkingMonitor?.();
2011
+ stopThinkingMonitor = startThinkingStatusMonitor(Runtime, logger, {
2012
+ intervalMs: options.heartbeatIntervalMs,
2013
+ });
2014
+ try {
2015
+ return await operation();
2016
+ }
2017
+ finally {
2018
+ stopThinkingMonitor?.();
2019
+ stopThinkingMonitor = null;
2020
+ }
2021
+ };
1256
2022
  const recheckDelayMs = Math.max(0, config.assistantRecheckDelayMs ?? 0);
1257
2023
  const recheckTimeoutMs = Math.max(0, config.assistantRecheckTimeoutMs ?? 0);
1258
2024
  const attemptAssistantRecheck = async () => {
@@ -1285,6 +2051,8 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1285
2051
  runtime: {
1286
2052
  chromeHost: host,
1287
2053
  chromePort: port,
2054
+ chromeBrowserWSEndpoint: browserWSEndpoint,
2055
+ chromeProfileRoot,
1288
2056
  chromeTargetId: remoteTargetId ?? undefined,
1289
2057
  tabUrl: lastUrl,
1290
2058
  conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
@@ -1294,147 +2062,253 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1294
2062
  }
1295
2063
  await emitRuntimeHint();
1296
2064
  const timeoutMs = recheckTimeoutMs > 0 ? recheckTimeoutMs : config.timeoutMs;
1297
- const rechecked = await waitForAssistantResponseWithReload(Runtime, Page, timeoutMs, logger, baselineTurns ?? undefined, expectedConversationId());
2065
+ const rechecked = await waitWithThinkingMonitor(() => waitForAssistantOrGeneratedImageResponse({
2066
+ Runtime,
2067
+ waitForText: () => waitForAssistantResponseWithReload(Runtime, Page, timeoutMs, logger, baselineTurns ?? undefined, expectedConversationId()),
2068
+ timeoutMs,
2069
+ logger,
2070
+ minTurnIndex: baselineTurns ?? undefined,
2071
+ expectedConversationId: expectedConversationId(),
2072
+ imageOutputRequested,
2073
+ }));
1298
2074
  logger("Recovered assistant response after delayed recheck");
1299
2075
  return rechecked;
1300
2076
  };
1301
- try {
1302
- const conversationUrl = await readConversationUrl(Runtime).catch(() => null);
1303
- if (conversationUrl && isConversationUrl(conversationUrl)) {
1304
- lastUrl = conversationUrl;
1305
- await emitRuntimeHint();
1306
- }
1307
- answer = await waitForAssistantResponseWithReload(Runtime, Page, config.timeoutMs, logger, baselineTurns ?? undefined, expectedConversationId());
1308
- }
1309
- catch (error) {
1310
- if (isAssistantResponseTimeoutError(error)) {
1311
- const rechecked = await attemptAssistantRecheck().catch(() => null);
1312
- if (rechecked) {
1313
- answer = rechecked;
2077
+ const imageOutputRequested = Boolean(options.generateImagePath ||
2078
+ options.outputPath ||
2079
+ options.generateImage);
2080
+ const captureAssistantTurn = async (turnPrompt, label) => {
2081
+ let turnAnswer;
2082
+ try {
2083
+ const conversationUrl = await readConversationUrl(Runtime).catch(() => null);
2084
+ if (conversationUrl && isConversationUrl(conversationUrl)) {
2085
+ lastUrl = conversationUrl;
2086
+ await emitRuntimeHint();
1314
2087
  }
1315
- else {
1316
- try {
1317
- const conversationUrl = await readConversationUrl(Runtime);
1318
- if (conversationUrl) {
1319
- lastUrl = conversationUrl;
1320
- }
2088
+ turnAnswer = await waitWithThinkingMonitor(() => waitForAssistantOrGeneratedImageResponse({
2089
+ Runtime,
2090
+ waitForText: () => waitForAssistantResponseWithReload(Runtime, Page, config.timeoutMs, logger, baselineTurns ?? undefined, expectedConversationId()),
2091
+ timeoutMs: config.timeoutMs,
2092
+ logger,
2093
+ minTurnIndex: baselineTurns ?? undefined,
2094
+ expectedConversationId: expectedConversationId(),
2095
+ imageOutputRequested,
2096
+ }));
2097
+ }
2098
+ catch (error) {
2099
+ if (isAssistantResponseTimeoutError(error)) {
2100
+ const rechecked = await attemptAssistantRecheckOrRethrow(attemptAssistantRecheck);
2101
+ if (rechecked) {
2102
+ turnAnswer = rechecked;
1321
2103
  }
1322
- catch {
1323
- // ignore
2104
+ else {
2105
+ try {
2106
+ const conversationUrl = await readConversationUrl(Runtime);
2107
+ if (conversationUrl) {
2108
+ lastUrl = conversationUrl;
2109
+ }
2110
+ }
2111
+ catch {
2112
+ // ignore
2113
+ }
2114
+ await emitRuntimeHint();
2115
+ const diagnostics = await captureBrowserDiagnostics(Runtime, logger, "assistant-timeout", {
2116
+ Page,
2117
+ sessionId: options.sessionId,
2118
+ }).catch(() => undefined);
2119
+ const runtime = {
2120
+ chromePort: port,
2121
+ chromeHost: host,
2122
+ chromeBrowserWSEndpoint: browserWSEndpoint,
2123
+ chromeProfileRoot,
2124
+ chromeTargetId: remoteTargetId ?? undefined,
2125
+ tabUrl: lastUrl,
2126
+ conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
2127
+ controllerPid: process.pid,
2128
+ };
2129
+ throw new BrowserAutomationError("Assistant response timed out before completion; reattach later to capture the answer.", { stage: "assistant-timeout", runtime, diagnostics }, error);
1324
2130
  }
1325
- await emitRuntimeHint();
1326
- const runtime = {
1327
- chromePort: port,
1328
- chromeHost: host,
1329
- chromeTargetId: remoteTargetId ?? undefined,
1330
- tabUrl: lastUrl,
1331
- conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
1332
- controllerPid: process.pid,
1333
- };
1334
- throw new BrowserAutomationError("Assistant response timed out before completion; reattach later to capture the answer.", { stage: "assistant-timeout", runtime }, error);
2131
+ }
2132
+ else {
2133
+ throw error;
1335
2134
  }
1336
2135
  }
1337
- else {
1338
- throw error;
1339
- }
1340
- }
1341
- const baselineNormalized = baselineAssistantText
1342
- ? normalizeForComparison(baselineAssistantText)
1343
- : "";
1344
- if (baselineNormalized) {
1345
- const normalizedAnswer = normalizeForComparison(answer.text ?? "");
1346
- const baselinePrefix = baselineNormalized.length >= 80
1347
- ? baselineNormalized.slice(0, Math.min(200, baselineNormalized.length))
2136
+ const baselineNormalized = baselineAssistantText
2137
+ ? normalizeForComparison(baselineAssistantText)
1348
2138
  : "";
1349
- const isBaseline = normalizedAnswer === baselineNormalized ||
1350
- (baselinePrefix.length > 0 && normalizedAnswer.startsWith(baselinePrefix));
1351
- if (isBaseline) {
1352
- logger("Detected stale assistant response; waiting for new response...");
1353
- const refreshed = await waitForFreshAssistantResponse(baselineNormalized, 15_000);
1354
- if (refreshed) {
1355
- answer = refreshed;
2139
+ if (baselineNormalized) {
2140
+ const normalizedAnswer = normalizeForComparison(turnAnswer.text ?? "");
2141
+ const baselinePrefix = baselineNormalized.length >= 80
2142
+ ? baselineNormalized.slice(0, Math.min(200, baselineNormalized.length))
2143
+ : "";
2144
+ const isBaseline = normalizedAnswer === baselineNormalized ||
2145
+ (baselinePrefix.length > 0 && normalizedAnswer.startsWith(baselinePrefix));
2146
+ if (isBaseline) {
2147
+ logger("Detected stale assistant response; waiting for new response...");
2148
+ const refreshed = await waitForFreshAssistantResponse(baselineNormalized, 15_000);
2149
+ if (refreshed) {
2150
+ turnAnswer = refreshed;
2151
+ }
1356
2152
  }
1357
2153
  }
1358
- }
1359
- answerText = answer.text;
1360
- answerHtml = answer.html ?? "";
1361
- const copiedMarkdown = await withRetries(async () => {
1362
- const attempt = await captureAssistantMarkdown(Runtime, answer.meta, logger);
1363
- if (!attempt) {
1364
- throw new Error("copy-missing");
1365
- }
1366
- return attempt;
1367
- }, {
1368
- retries: 2,
1369
- delayMs: 350,
1370
- onRetry: (attempt, error) => {
1371
- if (options.verbose) {
1372
- logger(`[retry] Markdown capture attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`);
2154
+ let turnAnswerText = turnAnswer.text;
2155
+ const turnAnswerHtml = turnAnswer.html ?? "";
2156
+ const copiedMarkdown = await withRetries(async () => {
2157
+ const attempt = await captureAssistantMarkdown(Runtime, turnAnswer.meta, logger);
2158
+ if (!attempt) {
2159
+ throw new Error("copy-missing");
1373
2160
  }
1374
- },
1375
- }).catch(() => null);
1376
- answerMarkdown = copiedMarkdown ?? answerText;
1377
- ({ answerText, answerMarkdown } = await maybeRecoverLongAssistantResponse({
1378
- runtime: Runtime,
1379
- baselineTurns,
1380
- answerText,
1381
- answerMarkdown,
1382
- logger,
1383
- allowMarkdownUpdate: !copiedMarkdown,
1384
- }));
1385
- // Final sanity check: ensure we didn't accidentally capture the user prompt instead of the assistant turn.
1386
- const finalSnapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
1387
- const finalText = typeof finalSnapshot?.text === "string" ? finalSnapshot.text.trim() : "";
1388
- if (finalText &&
1389
- finalText !== answerMarkdown.trim() &&
1390
- finalText !== promptText.trim() &&
1391
- finalText.length >= answerMarkdown.trim().length) {
1392
- logger("Refreshed assistant response via final DOM snapshot");
1393
- answerText = finalText;
1394
- answerMarkdown = finalText;
1395
- }
1396
- // Detect prompt echo using normalized comparison (whitespace-insensitive).
1397
- const promptEchoMatcher = buildPromptEchoMatcher(promptText);
1398
- const alignedEcho = alignPromptEchoPair(answerText, answerMarkdown, promptEchoMatcher, copiedMarkdown ? logger : undefined, {
1399
- text: "Aligned assistant response text to copied markdown after prompt echo",
1400
- markdown: "Aligned assistant markdown to response text after prompt echo",
1401
- });
1402
- answerText = alignedEcho.answerText;
1403
- answerMarkdown = alignedEcho.answerMarkdown;
1404
- const isPromptEcho = alignedEcho.isEcho;
1405
- if (isPromptEcho) {
1406
- logger("Detected prompt echo in response; waiting for actual assistant response...");
1407
- const deadline = Date.now() + 15_000;
1408
- let bestText = null;
1409
- let stableCount = 0;
1410
- while (Date.now() < deadline) {
1411
- const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
1412
- const text = typeof snapshot?.text === "string" ? snapshot.text.trim() : "";
1413
- const isStillEcho = !text || Boolean(promptEchoMatcher?.isEcho(text));
1414
- if (!isStillEcho) {
1415
- if (!bestText || text.length > bestText.length) {
1416
- bestText = text;
1417
- stableCount = 0;
1418
- }
1419
- else if (text === bestText) {
1420
- stableCount += 1;
2161
+ return attempt;
2162
+ }, {
2163
+ retries: 2,
2164
+ delayMs: 350,
2165
+ onRetry: (attempt, error) => {
2166
+ if (options.verbose) {
2167
+ logger(`[retry] Markdown capture attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`);
1421
2168
  }
1422
- if (stableCount >= 2) {
1423
- break;
2169
+ },
2170
+ }).catch(() => null);
2171
+ let turnAnswerMarkdown = copiedMarkdown ?? turnAnswerText;
2172
+ ({ answerText: turnAnswerText, answerMarkdown: turnAnswerMarkdown } =
2173
+ await maybeRecoverLongAssistantResponse({
2174
+ runtime: Runtime,
2175
+ baselineTurns,
2176
+ answerText: turnAnswerText,
2177
+ answerMarkdown: turnAnswerMarkdown,
2178
+ logger,
2179
+ allowMarkdownUpdate: !copiedMarkdown,
2180
+ }));
2181
+ // Final sanity check: ensure we didn't accidentally capture the user prompt instead of the assistant turn.
2182
+ const finalSnapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
2183
+ const finalText = typeof finalSnapshot?.text === "string" ? finalSnapshot.text.trim() : "";
2184
+ if (finalText &&
2185
+ finalText !== turnAnswerMarkdown.trim() &&
2186
+ finalText !== turnPrompt.trim() &&
2187
+ finalText.length >= turnAnswerMarkdown.trim().length) {
2188
+ logger("Refreshed assistant response via final DOM snapshot");
2189
+ turnAnswerText = finalText;
2190
+ turnAnswerMarkdown = finalText;
2191
+ }
2192
+ // Detect prompt echo using normalized comparison (whitespace-insensitive).
2193
+ const promptEchoMatcher = buildPromptEchoMatcher(turnPrompt);
2194
+ const alignedEcho = alignPromptEchoPair(turnAnswerText, turnAnswerMarkdown, promptEchoMatcher, copiedMarkdown ? logger : undefined, {
2195
+ text: "Aligned assistant response text to copied markdown after prompt echo",
2196
+ markdown: "Aligned assistant markdown to response text after prompt echo",
2197
+ });
2198
+ turnAnswerText = alignedEcho.answerText;
2199
+ turnAnswerMarkdown = alignedEcho.answerMarkdown;
2200
+ const isPromptEcho = alignedEcho.isEcho;
2201
+ if (isPromptEcho) {
2202
+ logger("Detected prompt echo in response; waiting for actual assistant response...");
2203
+ const deadline = Date.now() + 15_000;
2204
+ let bestText = null;
2205
+ let stableCount = 0;
2206
+ while (Date.now() < deadline) {
2207
+ const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
2208
+ const text = typeof snapshot?.text === "string" ? snapshot.text.trim() : "";
2209
+ const isStillEcho = !text || Boolean(promptEchoMatcher?.isEcho(text));
2210
+ if (!isStillEcho) {
2211
+ if (!bestText || text.length > bestText.length) {
2212
+ bestText = text;
2213
+ stableCount = 0;
2214
+ }
2215
+ else if (text === bestText) {
2216
+ stableCount += 1;
2217
+ }
2218
+ if (stableCount >= 2) {
2219
+ break;
2220
+ }
1424
2221
  }
2222
+ await new Promise((resolve) => setTimeout(resolve, 300));
2223
+ }
2224
+ if (bestText) {
2225
+ logger("Recovered assistant response after detecting prompt echo");
2226
+ turnAnswerText = bestText;
2227
+ turnAnswerMarkdown = bestText;
1425
2228
  }
1426
- await new Promise((resolve) => setTimeout(resolve, 300));
1427
- }
1428
- if (bestText) {
1429
- logger("Recovered assistant response after detecting prompt echo");
1430
- answerText = bestText;
1431
- answerMarkdown = bestText;
1432
2229
  }
2230
+ return {
2231
+ label,
2232
+ answerText: turnAnswerText,
2233
+ answerMarkdown: turnAnswerMarkdown,
2234
+ answerHtml: turnAnswerHtml,
2235
+ };
2236
+ };
2237
+ const followUpPrompts = normalizeBrowserFollowUpPrompts(options.followUpPrompts);
2238
+ const turns = [];
2239
+ const initialTurn = await captureAssistantTurn(promptText, "Initial response");
2240
+ turns.push(initialTurn);
2241
+ answerText = initialTurn.answerText;
2242
+ answerMarkdown = initialTurn.answerMarkdown;
2243
+ answerHtml = initialTurn.answerHtml;
2244
+ for (let index = 0; index < followUpPrompts.length; index += 1) {
2245
+ const followUpPrompt = followUpPrompts[index];
2246
+ logger(`[browser] Sending follow-up ${index + 1}/${followUpPrompts.length}`);
2247
+ await clearPromptComposer(Runtime, logger);
2248
+ await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
2249
+ const submission = await runSubmissionWithRecovery({
2250
+ prompt: followUpPrompt,
2251
+ attachments: [],
2252
+ submit: submitOnce,
2253
+ reloadPromptComposer,
2254
+ prepareFallbackSubmission: async () => {
2255
+ await clearPromptComposer(Runtime, logger);
2256
+ await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
2257
+ },
2258
+ logger,
2259
+ });
2260
+ baselineTurns = submission.baselineTurns;
2261
+ baselineAssistantText = submission.baselineAssistantText;
2262
+ const turn = await captureAssistantTurn(followUpPrompt, `Follow-up ${index + 1}`);
2263
+ turns.push({ ...turn, prompt: followUpPrompt });
2264
+ answerText = turn.answerText;
2265
+ answerMarkdown = turn.answerMarkdown;
2266
+ answerHtml = turn.answerHtml;
2267
+ }
2268
+ if (turns.length > 1) {
2269
+ const formatted = formatBrowserTurnTranscript(turns);
2270
+ answerText = formatted.answerText;
2271
+ answerMarkdown = formatted.answerMarkdown;
2272
+ answerHtml = "";
2273
+ }
2274
+ const imageArtifacts = await collectGeneratedImageArtifacts({
2275
+ Runtime,
2276
+ Network,
2277
+ logger,
2278
+ minTurnIndex: imageArtifactMinTurnIndex,
2279
+ sessionId: options.sessionId,
2280
+ generateImagePath: options.generateImagePath,
2281
+ outputPath: options.outputPath,
2282
+ answerText,
2283
+ waitTimeoutMs: options.config?.timeoutMs,
2284
+ });
2285
+ answerText = imageArtifacts.answerText || answerText;
2286
+ if (imageArtifacts.markdownSuffix) {
2287
+ answerMarkdown += imageArtifacts.markdownSuffix;
1433
2288
  }
1434
- stopThinkingMonitor?.();
2289
+ const savedImageArtifacts = appendArtifacts(undefined, imageArtifacts.savedImages);
2290
+ const transcriptArtifact = await saveOptionalArtifact(() => saveBrowserTranscriptArtifact({
2291
+ sessionId: options.sessionId,
2292
+ prompt: promptText,
2293
+ answerMarkdown,
2294
+ conversationUrl: lastUrl,
2295
+ artifacts: savedImageArtifacts,
2296
+ logger,
2297
+ }), logger);
2298
+ const savedArtifacts = appendArtifacts(savedImageArtifacts, [transcriptArtifact]);
2299
+ const archive = await maybeArchiveCompletedConversation({
2300
+ Runtime,
2301
+ logger,
2302
+ config,
2303
+ conversationUrl: lastUrl,
2304
+ followUpCount: followUpPrompts.length,
2305
+ requiredArtifactsSaved: Boolean(transcriptArtifact) &&
2306
+ imageArtifacts.savedImages.length === imageArtifacts.imageCount,
2307
+ });
1435
2308
  const durationMs = Date.now() - startedAt;
1436
2309
  const answerChars = answerText.length;
1437
2310
  const answerTokens = estimateTokenCount(answerMarkdown);
2311
+ runStatus = "complete";
1438
2312
  return {
1439
2313
  answerText,
1440
2314
  answerMarkdown,
@@ -1442,18 +2316,23 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1442
2316
  tookMs: durationMs,
1443
2317
  answerTokens,
1444
2318
  answerChars,
2319
+ browserTransport: "cdp",
1445
2320
  chromePid: undefined,
1446
2321
  chromePort: port,
1447
2322
  chromeHost: host,
2323
+ chromeBrowserWSEndpoint: browserWSEndpoint,
2324
+ chromeProfileRoot,
1448
2325
  userDataDir: undefined,
1449
2326
  chromeTargetId: remoteTargetId ?? undefined,
1450
2327
  tabUrl: lastUrl,
2328
+ conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
2329
+ artifacts: savedArtifacts,
2330
+ archive,
1451
2331
  controllerPid: process.pid,
1452
2332
  };
1453
2333
  }
1454
2334
  catch (error) {
1455
2335
  const normalizedError = error instanceof Error ? error : new Error(String(error));
1456
- stopThinkingMonitor?.();
1457
2336
  const socketClosed = connectionClosedUnexpectedly || isWebSocketClosureError(normalizedError);
1458
2337
  connectionClosedUnexpectedly = connectionClosedUnexpectedly || socketClosed;
1459
2338
  if (!socketClosed) {
@@ -1468,6 +2347,8 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1468
2347
  runtime: {
1469
2348
  chromeHost: host,
1470
2349
  chromePort: port,
2350
+ chromeBrowserWSEndpoint: browserWSEndpoint,
2351
+ chromeProfileRoot,
1471
2352
  chromeTargetId: remoteTargetId ?? undefined,
1472
2353
  tabUrl: lastUrl,
1473
2354
  controllerPid: process.pid,
@@ -1476,15 +2357,29 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1476
2357
  }
1477
2358
  finally {
1478
2359
  try {
1479
- if (!connectionClosedUnexpectedly && client) {
1480
- await client.close();
1481
- }
2360
+ await closeRemoteConnectionAfterRun({
2361
+ connectionClosedUnexpectedly,
2362
+ connection,
2363
+ client,
2364
+ runStatus,
2365
+ });
1482
2366
  }
1483
2367
  catch {
1484
2368
  // ignore
1485
2369
  }
1486
2370
  removeDialogHandler?.();
1487
- await closeRemoteChromeTarget(host, port, remoteTargetId ?? undefined, logger);
2371
+ if (tabLease) {
2372
+ const handle = tabLease;
2373
+ tabLease = null;
2374
+ await handle.release().catch(() => undefined);
2375
+ }
2376
+ if (shouldCloseOwnedRunTargetAfterRun({
2377
+ runStatus,
2378
+ ownsTarget,
2379
+ keepBrowser: Boolean(config.keepBrowser),
2380
+ })) {
2381
+ await closeRemoteChromeTarget(host, port, remoteTargetId ?? undefined, logger);
2382
+ }
1488
2383
  // Don't kill remote Chrome - it's not ours to manage
1489
2384
  const totalSeconds = (Date.now() - startedAt) / 1000;
1490
2385
  logger(`Remote session complete • ${totalSeconds.toFixed(1)}s total`);
@@ -1492,11 +2387,22 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1492
2387
  }
1493
2388
  export { estimateTokenCount } from "./utils.js";
1494
2389
  export { resolveBrowserConfig, DEFAULT_BROWSER_CONFIG } from "./config.js";
2390
+ // biome-ignore lint/style/useNamingConvention: test-only export used in vitest suite
2391
+ export const __test__ = {
2392
+ closeRemoteConnectionAfterRun,
2393
+ detachKeptChromeProcess,
2394
+ isImageOnlyUiChromeText,
2395
+ listIgnoredRemoteChromeFlags,
2396
+ shouldCloseOwnedRunTargetAfterRun,
2397
+ };
1495
2398
  export { syncCookies } from "./cookies.js";
1496
2399
  export { navigateToChatGPT, ensureNotBlocked, ensurePromptReady, ensureModelSelection, submitPrompt, waitForAssistantResponse, captureAssistantMarkdown, uploadAttachmentFile, waitForAttachmentCompletion, } from "./pageActions.js";
1497
2400
  export async function maybeReuseRunningChromeForTest(userDataDir, logger, options = {}) {
1498
2401
  return maybeReuseRunningChrome(userDataDir, logger, options);
1499
2402
  }
2403
+ export async function acquireManualLoginChromeForRunForTest(userDataDir, config, logger, sessionId, deps) {
2404
+ return acquireManualLoginChromeForRun(userDataDir, config, logger, sessionId, deps);
2405
+ }
1500
2406
  export function isWebSocketClosureError(error) {
1501
2407
  const message = error.message.toLowerCase();
1502
2408
  return (message.includes("websocket connection closed") ||
@@ -1505,16 +2411,6 @@ export function isWebSocketClosureError(error) {
1505
2411
  message.includes("inspected target navigated or closed") ||
1506
2412
  message.includes("target closed"));
1507
2413
  }
1508
- export function formatThinkingLog(startedAt, now, message, locatorSuffix) {
1509
- const elapsedMs = now - startedAt;
1510
- const elapsedText = formatElapsed(elapsedMs);
1511
- const progress = Math.min(1, elapsedMs / 600_000); // soft target: 10 minutes
1512
- const pct = Math.round(progress * 100)
1513
- .toString()
1514
- .padStart(3, " ");
1515
- const statusLabel = message ? ` — ${message}` : "";
1516
- return `${pct}% [${elapsedText} / ~10m]${statusLabel}${locatorSuffix}`;
1517
- }
1518
2414
  async function waitForAssistantResponseWithReload(Runtime, Page, timeoutMs, logger, minTurnIndex, expectedConversationId) {
1519
2415
  try {
1520
2416
  return await waitForAssistantResponse(Runtime, timeoutMs, logger, minTurnIndex, expectedConversationId);
@@ -1697,74 +2593,6 @@ async function readConversationTurnCount(Runtime, logger) {
1697
2593
  function isConversationUrl(url) {
1698
2594
  return /\/c\/[a-z0-9-]+/i.test(url);
1699
2595
  }
1700
- function startThinkingStatusMonitor(Runtime, logger, includeDiagnostics = false) {
1701
- let stopped = false;
1702
- let pending = false;
1703
- let lastMessage = null;
1704
- const startedAt = Date.now();
1705
- const interval = setInterval(async () => {
1706
- // stop flag flips asynchronously
1707
- if (stopped || pending) {
1708
- return;
1709
- }
1710
- pending = true;
1711
- try {
1712
- const nextMessage = await readThinkingStatus(Runtime);
1713
- if (nextMessage && nextMessage !== lastMessage) {
1714
- lastMessage = nextMessage;
1715
- let locatorSuffix = "";
1716
- if (includeDiagnostics) {
1717
- try {
1718
- const snapshot = await readAssistantSnapshot(Runtime);
1719
- locatorSuffix = ` | assistant-turn=${snapshot ? "present" : "missing"}`;
1720
- }
1721
- catch {
1722
- locatorSuffix = " | assistant-turn=error";
1723
- }
1724
- }
1725
- logger(formatThinkingLog(startedAt, Date.now(), nextMessage, locatorSuffix));
1726
- }
1727
- }
1728
- catch {
1729
- // ignore DOM polling errors
1730
- }
1731
- finally {
1732
- pending = false;
1733
- }
1734
- }, 1500);
1735
- interval.unref?.();
1736
- return () => {
1737
- // multiple callers may race to stop
1738
- if (stopped) {
1739
- return;
1740
- }
1741
- stopped = true;
1742
- clearInterval(interval);
1743
- };
1744
- }
1745
- async function readThinkingStatus(Runtime) {
1746
- const expression = buildThinkingStatusExpression();
1747
- try {
1748
- const { result } = await Runtime.evaluate({ expression, returnByValue: true });
1749
- const value = typeof result.value === "string" ? result.value.trim() : "";
1750
- const sanitized = sanitizeThinkingText(value);
1751
- return sanitized || null;
1752
- }
1753
- catch {
1754
- return null;
1755
- }
1756
- }
1757
- function sanitizeThinkingText(raw) {
1758
- if (!raw) {
1759
- return "";
1760
- }
1761
- const trimmed = raw.trim();
1762
- const prefixPattern = /^(pro thinking)\s*[•:\-–—]*\s*/i;
1763
- if (prefixPattern.test(trimmed)) {
1764
- return trimmed.replace(prefixPattern, "").trim();
1765
- }
1766
- return trimmed;
1767
- }
1768
2596
  function describeDevtoolsFirewallHint(host, port) {
1769
2597
  if (!isWsl())
1770
2598
  return null;
@@ -1832,59 +2660,3 @@ function shouldPreferSystemTmpDir(platform, tmpDir, homeDir) {
1832
2660
  export function shouldPreferSystemTmpDirForTest(platform, tmpDir, homeDir) {
1833
2661
  return shouldPreferSystemTmpDir(platform, tmpDir, homeDir);
1834
2662
  }
1835
- function buildThinkingStatusExpression() {
1836
- const selectors = [
1837
- "span.loading-shimmer",
1838
- "span.flex.items-center.gap-1.truncate.text-start.align-middle.text-token-text-tertiary",
1839
- '[data-testid*="thinking"]',
1840
- '[data-testid*="reasoning"]',
1841
- '[role="status"]',
1842
- '[aria-live="polite"]',
1843
- ];
1844
- const keywords = [
1845
- "pro thinking",
1846
- "thinking",
1847
- "reasoning",
1848
- "clarifying",
1849
- "planning",
1850
- "drafting",
1851
- "summarizing",
1852
- ];
1853
- const selectorLiteral = JSON.stringify(selectors);
1854
- const keywordsLiteral = JSON.stringify(keywords);
1855
- return `(() => {
1856
- const selectors = ${selectorLiteral};
1857
- const keywords = ${keywordsLiteral};
1858
- const nodes = new Set();
1859
- for (const selector of selectors) {
1860
- document.querySelectorAll(selector).forEach((node) => nodes.add(node));
1861
- }
1862
- document.querySelectorAll('[data-testid]').forEach((node) => nodes.add(node));
1863
- for (const node of nodes) {
1864
- if (!(node instanceof HTMLElement)) {
1865
- continue;
1866
- }
1867
- const text = node.textContent?.trim();
1868
- if (!text) {
1869
- continue;
1870
- }
1871
- const classLabel = (node.className || '').toLowerCase();
1872
- const dataLabel = ((node.getAttribute('data-testid') || '') + ' ' + (node.getAttribute('aria-label') || ''))
1873
- .toLowerCase();
1874
- const normalizedText = text.toLowerCase();
1875
- const matches = keywords.some((keyword) =>
1876
- normalizedText.includes(keyword) || classLabel.includes(keyword) || dataLabel.includes(keyword)
1877
- );
1878
- if (matches) {
1879
- const shimmerChild = node.querySelector(
1880
- 'span.flex.items-center.gap-1.truncate.text-start.align-middle.text-token-text-tertiary',
1881
- );
1882
- if (shimmerChild?.textContent?.trim()) {
1883
- return shimmerChild.textContent.trim();
1884
- }
1885
- return text.trim();
1886
- }
1887
- }
1888
- return null;
1889
- })()`;
1890
- }