@steipete/oracle 0.10.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +56 -11
  2. package/dist/bin/oracle-cli.js +104 -16
  3. package/dist/src/browser/actions/archiveConversation.js +236 -0
  4. package/dist/src/browser/actions/assistantResponse.js +26 -0
  5. package/dist/src/browser/actions/deepResearch.js +662 -0
  6. package/dist/src/browser/actions/modelSelection.js +86 -16
  7. package/dist/src/browser/actions/navigation.js +22 -0
  8. package/dist/src/browser/actions/projectSources.js +491 -0
  9. package/dist/src/browser/actions/promptComposer.js +52 -27
  10. package/dist/src/browser/actions/thinkingStatus.js +391 -0
  11. package/dist/src/browser/artifacts.js +150 -0
  12. package/dist/src/browser/attachRunning.js +31 -0
  13. package/dist/src/browser/chatgptImages.js +315 -0
  14. package/dist/src/browser/chromeLifecycle.js +214 -3
  15. package/dist/src/browser/config.js +27 -9
  16. package/dist/src/browser/constants.js +8 -0
  17. package/dist/src/browser/controlPlan.js +81 -0
  18. package/dist/src/browser/detect.js +206 -33
  19. package/dist/src/browser/domDebug.js +49 -0
  20. package/dist/src/browser/index.js +1234 -479
  21. package/dist/src/browser/liveTabs.js +434 -0
  22. package/dist/src/browser/profileState.js +83 -3
  23. package/dist/src/browser/projectSourcesRunner.js +366 -0
  24. package/dist/src/browser/reattach.js +117 -45
  25. package/dist/src/browser/reattachHelpers.js +1 -1
  26. package/dist/src/browser/sessionRunner.js +53 -1
  27. package/dist/src/browser/tabLeaseRegistry.js +182 -0
  28. package/dist/src/cli/bridge/claudeConfig.js +12 -8
  29. package/dist/src/cli/bridge/codexConfig.js +2 -2
  30. package/dist/src/cli/browserConfig.js +41 -8
  31. package/dist/src/cli/browserDefaults.js +31 -7
  32. package/dist/src/cli/browserTabs.js +228 -0
  33. package/dist/src/cli/dryRun.js +33 -1
  34. package/dist/src/cli/duplicatePromptGuard.js +10 -2
  35. package/dist/src/cli/help.js +1 -1
  36. package/dist/src/cli/options.js +4 -0
  37. package/dist/src/cli/projectSources.js +116 -0
  38. package/dist/src/cli/sessionCommand.js +51 -0
  39. package/dist/src/cli/sessionDisplay.js +121 -9
  40. package/dist/src/cli/sessionRunner.js +51 -7
  41. package/dist/src/mcp/consultPresets.js +19 -0
  42. package/dist/src/mcp/server.js +2 -0
  43. package/dist/src/mcp/tools/consult.js +201 -26
  44. package/dist/src/mcp/tools/projectSources.js +123 -0
  45. package/dist/src/mcp/types.js +11 -2
  46. package/dist/src/mcp/utils.js +6 -1
  47. package/dist/src/oracle/run.js +4 -1
  48. package/dist/src/projectSources/plan.js +27 -0
  49. package/dist/src/projectSources/types.js +1 -0
  50. package/dist/src/projectSources/url.js +23 -0
  51. package/dist/src/sessionManager.js +1 -0
  52. package/package.json +7 -6
@@ -3,22 +3,33 @@ import path from "node:path";
3
3
  import os from "node:os";
4
4
  import net from "node:net";
5
5
  import { resolveBrowserConfig } from "./config.js";
6
- import { launchChrome, registerTerminationHooks, hideChromeWindow, connectToRemoteChrome, closeRemoteChromeTarget, connectWithNewTab, closeTab, } from "./chromeLifecycle.js";
6
+ import { launchChrome, registerTerminationHooks, hideChromeWindow, connectToRemoteChrome, connectWithNewTab, closeTab, closeRemoteChromeTarget, closeBlankChromeTabs, } from "./chromeLifecycle.js";
7
7
  import { syncCookies } from "./cookies.js";
8
8
  import { navigateToChatGPT, navigateToPromptReadyWithFallback, ensureNotBlocked, ensureLoggedIn, ensurePromptReady, installJavaScriptDialogAutoDismissal, ensureModelSelection, clearPromptComposer, waitForAssistantResponse, captureAssistantMarkdown, clearComposerAttachments, uploadAttachmentFile, waitForAttachmentCompletion, waitForUserTurnAttachments, readAssistantSnapshot, } from "./pageActions.js";
9
9
  import { INPUT_SELECTORS } from "./constants.js";
10
10
  import { uploadAttachmentViaDataTransfer } from "./actions/remoteFileTransfer.js";
11
11
  import { ensureThinkingTime } from "./actions/thinkingTime.js";
12
+ import { startThinkingStatusMonitor } from "./actions/thinkingStatus.js";
13
+ import { activateDeepResearch, waitForDeepResearchCompletion, waitForResearchPlanAutoConfirm, } from "./actions/deepResearch.js";
12
14
  import { estimateTokenCount, withRetries, delay } from "./utils.js";
13
15
  import { formatElapsed } from "../oracle/format.js";
14
16
  import { CHATGPT_URL, CONVERSATION_TURN_SELECTOR, DEFAULT_MODEL_STRATEGY } from "./constants.js";
15
17
  import { BrowserAutomationError } from "../oracle/errors.js";
16
18
  import { alignPromptEchoPair, buildPromptEchoMatcher } from "./reattachHelpers.js";
17
- import { cleanupStaleProfileState, acquireProfileRunLock, readChromePid, readDevToolsPort, shouldCleanupManualLoginProfileState, verifyDevToolsReachable, writeChromePid, writeDevToolsActivePort, } from "./profileState.js";
19
+ import { cleanupStaleProfileState, acquireProfileRunLock, findRunningChromeDebugTargetForProfile, readChromePid, readDevToolsPort, shouldCleanupManualLoginProfileState, terminateRecordedChromeForProfile, verifyDevToolsReachable, writeChromePid, writeDevToolsActivePort, } from "./profileState.js";
20
+ import { acquireBrowserTabLease, hasOtherActiveBrowserTabLeases, } from "./tabLeaseRegistry.js";
21
+ import { appendArtifacts, saveBrowserTranscriptArtifact, saveDeepResearchReportArtifact, } from "./artifacts.js";
22
+ import { collectGeneratedImageArtifacts } from "./chatgptImages.js";
18
23
  import { runProviderSubmissionFlow } from "./providerDomFlow.js";
19
24
  import { chatgptDomProvider } from "./providers/index.js";
25
+ import { resolveAttachRunningConnection } from "./attachRunning.js";
26
+ import { connectToExistingChatGptTab } from "./liveTabs.js";
27
+ import { captureBrowserDiagnostics } from "./domDebug.js";
28
+ import { archiveChatGptConversation, resolveBrowserArchiveDecision, } from "./actions/archiveConversation.js";
29
+ import { describeBrowserControlPlan, formatBrowserControlPlan } from "./controlPlan.js";
20
30
  export { CHATGPT_URL, DEFAULT_MODEL_STRATEGY, DEFAULT_MODEL_TARGET } from "./constants.js";
21
31
  export { parseDuration, delay, normalizeChatgptUrl, isTemporaryChatUrl } from "./utils.js";
32
+ export { formatThinkingLog, formatThinkingWaitingLog, buildThinkingStatusExpressionForTest, readThinkingStatusForTest, sanitizeThinkingText, startThinkingStatusMonitorForTest, } from "./actions/thinkingStatus.js";
22
33
  function redactBrowserConfigForDebugLog(config) {
23
34
  const redacted = { ...config };
24
35
  if (Array.isArray(config.inlineCookies)) {
@@ -35,16 +46,191 @@ function isCloudflareChallengeError(error) {
35
46
  return false;
36
47
  return error.details?.stage === "cloudflare-challenge";
37
48
  }
49
+ function isReattachableCaptureError(error) {
50
+ if (!(error instanceof BrowserAutomationError))
51
+ return false;
52
+ const stage = error.details?.stage;
53
+ return stage === "assistant-timeout" || stage === "assistant-recheck";
54
+ }
55
+ function classifyPreservedBrowserError(error, headless) {
56
+ if (headless)
57
+ return null;
58
+ if (isCloudflareChallengeError(error))
59
+ return "cloudflare-challenge";
60
+ if (isReattachableCaptureError(error))
61
+ return "reattachable-capture";
62
+ return null;
63
+ }
38
64
  function shouldPreserveBrowserOnError(error, headless) {
39
- return !headless && isCloudflareChallengeError(error);
65
+ return classifyPreservedBrowserError(error, headless) !== null;
40
66
  }
41
67
  export function shouldPreserveBrowserOnErrorForTest(error, headless) {
42
68
  return shouldPreserveBrowserOnError(error, headless);
43
69
  }
70
+ export function classifyPreservedBrowserErrorForTest(error, headless) {
71
+ return classifyPreservedBrowserError(error, headless);
72
+ }
73
+ // NOTE: Previously, shouldSkipThinkingTimeSelection() would skip the thinking
74
+ // time UI step when desiredModel was gpt-5.5-pro and thinkingTime was "extended",
75
+ // assuming that selecting "Pro Extended" in the old UI already implied Extended
76
+ // effort. This is wrong for lower-tier plans ($100/mo Pro) where selecting "Pro"
77
+ // defaults to Standard effort. ensureThinkingTime() already handles the
78
+ // "already-selected" case as a no-op, so always attempting it is safe.
79
+ function listIgnoredRemoteChromeFlags(config) {
80
+ return [
81
+ config.headless ? "--browser-headless" : null,
82
+ config.hideWindow ? "--browser-hide-window" : null,
83
+ config.keepBrowser ? "--browser-keep-browser" : null,
84
+ !config.attachRunning && config.chromePath ? "--browser-chrome-path" : null,
85
+ ].filter((value) => Boolean(value));
86
+ }
44
87
  function hasBrowserErrorCode(error, code) {
45
88
  return (error instanceof BrowserAutomationError &&
46
89
  error.details?.code === code);
47
90
  }
91
+ async function saveOptionalArtifact(operation, logger) {
92
+ try {
93
+ return await operation();
94
+ }
95
+ catch (error) {
96
+ const message = error instanceof Error ? error.message : String(error);
97
+ logger(`[browser] Failed to save session artifact: ${message}`);
98
+ return null;
99
+ }
100
+ }
101
+ async function waitForAssistantOrGeneratedImageResponse(params) {
102
+ if (!params.imageOutputRequested) {
103
+ return params.waitForText();
104
+ }
105
+ params.logger("[browser] Waiting for ChatGPT generated image response.");
106
+ const response = await pollGeneratedImageOrTextAssistantResponse(params.Runtime, params.timeoutMs, params.minTurnIndex, params.expectedConversationId);
107
+ if (response) {
108
+ if (response.html?.includes("/backend-api/estuary/content?id=file_")) {
109
+ params.logger("[browser] Captured generated image response before text appeared.");
110
+ }
111
+ return response;
112
+ }
113
+ throw new Error("assistant response timeout while waiting for generated image or text");
114
+ }
115
+ async function attemptAssistantRecheckOrRethrow(operation) {
116
+ try {
117
+ return await operation();
118
+ }
119
+ catch (error) {
120
+ if (error instanceof BrowserAutomationError) {
121
+ throw error;
122
+ }
123
+ return null;
124
+ }
125
+ }
126
+ async function pollGeneratedImageOrTextAssistantResponse(Runtime, timeoutMs, minTurnIndex, expectedConversationId) {
127
+ const deadline = Date.now() + timeoutMs;
128
+ while (Date.now() < deadline) {
129
+ let snapshot = await readAssistantSnapshot(Runtime, minTurnIndex, expectedConversationId).catch(() => null);
130
+ if (!snapshot && typeof minTurnIndex === "number" && Number.isFinite(minTurnIndex)) {
131
+ const relaxedSnapshot = await readAssistantSnapshot(Runtime, undefined, expectedConversationId).catch(() => null);
132
+ const relaxedHtml = typeof relaxedSnapshot?.html === "string" ? relaxedSnapshot.html : "";
133
+ if (relaxedHtml.includes("/backend-api/estuary/content?id=file_")) {
134
+ snapshot = relaxedSnapshot;
135
+ }
136
+ }
137
+ const text = typeof snapshot?.text === "string" ? snapshot.text.trim() : "";
138
+ const html = typeof snapshot?.html === "string" ? snapshot.html : "";
139
+ const hasGeneratedImage = html.includes("/backend-api/estuary/content?id=file_");
140
+ if (text && (hasGeneratedImage || !isImageOnlyUiChromeText(text))) {
141
+ return {
142
+ text,
143
+ html,
144
+ meta: {
145
+ turnId: snapshot?.turnId ?? undefined,
146
+ messageId: snapshot?.messageId ?? undefined,
147
+ },
148
+ };
149
+ }
150
+ await delay(750);
151
+ }
152
+ return null;
153
+ }
154
+ function isImageOnlyUiChromeText(text) {
155
+ const normalized = text.toLowerCase().replace(/\s+/g, " ").trim();
156
+ return (normalized.length === 0 ||
157
+ normalized === "edit" ||
158
+ normalized === "stopped thinking" ||
159
+ normalized === "stopped thinking edit");
160
+ }
161
+ function normalizeBrowserFollowUpPrompts(values) {
162
+ return (values ?? []).map((entry) => entry.trim()).filter(Boolean);
163
+ }
164
+ export function formatBrowserTurnTranscript(turns) {
165
+ if (turns.length <= 1) {
166
+ const turn = turns[0];
167
+ return {
168
+ answerText: turn?.answerText ?? "",
169
+ answerMarkdown: turn?.answerMarkdown ?? turn?.answerText ?? "",
170
+ };
171
+ }
172
+ const answerMarkdown = turns
173
+ .map((turn, index) => {
174
+ const label = turn.label.trim() || `Turn ${index + 1}`;
175
+ const prompt = turn.prompt?.trim();
176
+ const promptBlock = prompt ? `\n\n### Prompt\n\n${prompt}` : "";
177
+ const answer = (turn.answerMarkdown || turn.answerText).trim() || "_No text captured._";
178
+ return `## ${label}${promptBlock}\n\n### Answer\n\n${answer}`;
179
+ })
180
+ .join("\n\n")
181
+ .trim();
182
+ return {
183
+ answerText: answerMarkdown,
184
+ answerMarkdown,
185
+ };
186
+ }
187
+ async function maybeArchiveCompletedConversation({ Runtime, logger, config, conversationUrl, followUpCount, requiredArtifactsSaved, }) {
188
+ const decision = resolveBrowserArchiveDecision({
189
+ mode: config.archiveConversations,
190
+ chatgptUrl: config.chatgptUrl ?? config.url,
191
+ conversationUrl,
192
+ researchMode: config.researchMode,
193
+ followUpCount,
194
+ });
195
+ if (!decision.shouldArchive) {
196
+ logger(`[browser] ChatGPT archive skipped (${decision.reason}).`);
197
+ return {
198
+ mode: decision.mode,
199
+ attempted: false,
200
+ archived: false,
201
+ reason: decision.reason,
202
+ conversationUrl: conversationUrl ?? undefined,
203
+ };
204
+ }
205
+ if (!requiredArtifactsSaved) {
206
+ logger("[browser] ChatGPT archive skipped (artifact-save-failed).");
207
+ return {
208
+ mode: decision.mode,
209
+ attempted: false,
210
+ archived: false,
211
+ reason: "artifact-save-failed",
212
+ conversationUrl: conversationUrl ?? undefined,
213
+ };
214
+ }
215
+ return archiveChatGptConversation(Runtime, logger, {
216
+ mode: decision.mode,
217
+ conversationUrl,
218
+ }).catch((error) => {
219
+ const message = error instanceof Error ? error.message : String(error);
220
+ logger(`[browser] ChatGPT archive failed (${message}).`);
221
+ return {
222
+ mode: decision.mode,
223
+ attempted: true,
224
+ archived: false,
225
+ reason: "archive-failed",
226
+ conversationUrl: conversationUrl ?? undefined,
227
+ error: message,
228
+ };
229
+ });
230
+ }
231
+ export function maybeArchiveCompletedConversationForTest(args) {
232
+ return maybeArchiveCompletedConversation(args);
233
+ }
48
234
  async function runSubmissionWithRecovery({ prompt, attachments, fallbackSubmission, submit, reloadPromptComposer, prepareFallbackSubmission, logger, }) {
49
235
  let currentPrompt = prompt;
50
236
  let currentAttachments = attachments;
@@ -77,6 +263,33 @@ async function runSubmissionWithRecovery({ prompt, attachments, fallbackSubmissi
77
263
  export async function runSubmissionWithRecoveryForTest(args) {
78
264
  return runSubmissionWithRecovery(args);
79
265
  }
266
+ function resolveRemoteTabLeaseProfileDir(config) {
267
+ if (!config.remoteChrome || !config.manualLogin || !config.manualLoginProfileDir) {
268
+ return null;
269
+ }
270
+ return path.resolve(config.manualLoginProfileDir);
271
+ }
272
+ export function resolveRemoteTabLeaseProfileDirForTest(config) {
273
+ return resolveRemoteTabLeaseProfileDir(config);
274
+ }
275
+ async function closeRemoteConnectionAfterRun(options) {
276
+ if (options.connectionClosedUnexpectedly) {
277
+ return;
278
+ }
279
+ if (!options.connection) {
280
+ await options.client?.close();
281
+ return;
282
+ }
283
+ if (options.runStatus === "complete") {
284
+ await options.connection.close();
285
+ }
286
+ else {
287
+ await options.client?.close();
288
+ }
289
+ }
290
+ function shouldCloseOwnedRunTargetAfterRun(options) {
291
+ return options.runStatus === "complete" && options.ownsTarget && !options.keepBrowser;
292
+ }
80
293
  export async function runBrowserMode(options) {
81
294
  const promptText = options.prompt?.trim();
82
295
  if (!promptText) {
@@ -85,6 +298,13 @@ export async function runBrowserMode(options) {
85
298
  const attachments = options.attachments ?? [];
86
299
  const fallbackSubmission = options.fallbackSubmission;
87
300
  let config = resolveBrowserConfig(options.config);
301
+ const followUpPrompts = normalizeBrowserFollowUpPrompts(options.followUpPrompts);
302
+ if (config.researchMode === "deep" && followUpPrompts.length > 0) {
303
+ throw new BrowserAutomationError("Browser follow-ups are not supported with Deep Research mode. Put the full research plan into the initial prompt or run a normal browser consult for multi-turn review.", {
304
+ stage: "browser-follow-ups",
305
+ details: { researchMode: "deep", followUps: followUpPrompts.length },
306
+ });
307
+ }
88
308
  const logger = options.log ?? ((_message) => { });
89
309
  if (logger.verbose === undefined) {
90
310
  logger.verbose = Boolean(config.debug);
@@ -95,8 +315,9 @@ export async function runBrowserMode(options) {
95
315
  const runtimeHintCb = options.runtimeHintCb;
96
316
  let lastTargetId;
97
317
  let lastUrl;
318
+ let tabLease = null;
98
319
  const emitRuntimeHint = async () => {
99
- if (!runtimeHintCb || !chrome?.port) {
320
+ if (!chrome?.port) {
100
321
  return;
101
322
  }
102
323
  const conversationId = lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined;
@@ -111,7 +332,13 @@ export async function runBrowserMode(options) {
111
332
  controllerPid: process.pid,
112
333
  };
113
334
  try {
114
- await runtimeHintCb(hint);
335
+ await runtimeHintCb?.(hint);
336
+ await tabLease?.update({
337
+ chromeHost,
338
+ chromePort: chrome.port,
339
+ chromeTargetId: lastTargetId,
340
+ tabUrl: lastUrl,
341
+ });
115
342
  }
116
343
  catch (error) {
117
344
  const message = error instanceof Error ? error.message : String(error);
@@ -124,6 +351,18 @@ export async function runBrowserMode(options) {
124
351
  promptLength: promptText.length,
125
352
  })}`);
126
353
  }
354
+ for (const line of formatBrowserControlPlan(describeBrowserControlPlan(config), "browser")) {
355
+ logger(line);
356
+ }
357
+ if (config.attachRunning) {
358
+ const attached = await resolveAttachRunningConnection(config, logger);
359
+ config = {
360
+ ...config,
361
+ remoteChrome: { host: attached.host, port: attached.port },
362
+ remoteChromeBrowserWSEndpoint: attached.browserWSEndpoint,
363
+ remoteChromeProfileRoot: attached.profileRoot,
364
+ };
365
+ }
127
366
  if (!config.remoteChrome && !config.manualLogin) {
128
367
  const preferredPort = config.debugPort ?? DEFAULT_DEBUG_PORT;
129
368
  const availablePort = await pickAvailableDebugPort(preferredPort, logger);
@@ -135,9 +374,9 @@ export async function runBrowserMode(options) {
135
374
  // Remote Chrome mode - connect to existing browser
136
375
  if (config.remoteChrome) {
137
376
  // Warn about ignored local-only options
138
- if (config.headless || config.hideWindow || config.keepBrowser || config.chromePath) {
139
- logger("Note: --remote-chrome ignores local Chrome flags " +
140
- "(--browser-headless, --browser-hide-window, --browser-keep-browser, --browser-chrome-path).");
377
+ const ignoredFlags = listIgnoredRemoteChromeFlags(config);
378
+ if (ignoredFlags.length > 0) {
379
+ logger(`Note: --remote-chrome ignores local Chrome flags (${ignoredFlags.join(", ")}).`);
141
380
  }
142
381
  return runRemoteBrowserMode(promptText, attachments, config, logger, options);
143
382
  }
@@ -156,24 +395,42 @@ export async function runBrowserMode(options) {
156
395
  else {
157
396
  logger(`Created temporary Chrome profile at ${userDataDir}`);
158
397
  }
398
+ if (manualLogin) {
399
+ tabLease = await acquireBrowserTabLease(userDataDir, {
400
+ maxConcurrentTabs: config.maxConcurrentTabs,
401
+ timeoutMs: config.timeoutMs,
402
+ logger,
403
+ sessionId: options.sessionId,
404
+ });
405
+ }
159
406
  const effectiveKeepBrowser = Boolean(config.keepBrowser);
160
- const reusedChrome = manualLogin
161
- ? await maybeReuseRunningChrome(userDataDir, logger, {
162
- waitForPortMs: config.reuseChromeWaitMs,
163
- })
164
- : null;
165
- const chrome = reusedChrome ??
166
- (await launchChrome({
167
- ...config,
168
- remoteChrome: config.remoteChrome,
169
- }, userDataDir, logger));
170
- const chromeHost = chrome.host ?? "127.0.0.1";
171
- // Persist profile state so future manual-login runs can reuse this Chrome.
172
- if (manualLogin && chrome.port) {
173
- await writeDevToolsActivePort(userDataDir, chrome.port);
174
- if (!reusedChrome && chrome.pid) {
175
- await writeChromePid(userDataDir, chrome.pid);
407
+ let acquiredChrome;
408
+ try {
409
+ acquiredChrome = manualLogin
410
+ ? await acquireManualLoginChromeForRun(userDataDir, config, logger, options.sessionId)
411
+ : {
412
+ chrome: await launchChrome({
413
+ ...config,
414
+ remoteChrome: config.remoteChrome,
415
+ }, userDataDir, logger),
416
+ reusedChrome: null,
417
+ };
418
+ }
419
+ catch (error) {
420
+ if (tabLease) {
421
+ const handle = tabLease;
422
+ tabLease = null;
423
+ await handle.release().catch(() => undefined);
176
424
  }
425
+ throw error;
426
+ }
427
+ const { chrome, reusedChrome } = acquiredChrome;
428
+ const chromeHost = chrome.host ?? "127.0.0.1";
429
+ if (tabLease) {
430
+ await tabLease.update({
431
+ chromeHost,
432
+ chromePort: chrome.port,
433
+ });
177
434
  }
178
435
  let removeTerminationHooks = null;
179
436
  try {
@@ -188,6 +445,7 @@ export async function runBrowserMode(options) {
188
445
  }
189
446
  let client = null;
190
447
  let isolatedTargetId = null;
448
+ let ownsTarget = true;
191
449
  const startedAt = Date.now();
192
450
  let answerText = "";
193
451
  let answerMarkdown = "";
@@ -200,14 +458,37 @@ export async function runBrowserMode(options) {
200
458
  let preserveBrowserOnError = false;
201
459
  try {
202
460
  try {
203
- const strictTabIsolation = Boolean(manualLogin && reusedChrome);
204
- const connection = await connectWithNewTab(chrome.port, logger, config.url, chromeHost, {
205
- fallbackToDefault: !strictTabIsolation,
206
- retries: strictTabIsolation ? 3 : 0,
207
- retryDelayMs: 500,
208
- });
209
- client = connection.client;
210
- isolatedTargetId = connection.targetId ?? null;
461
+ if (config.browserTabRef) {
462
+ const attached = await connectToExistingChatGptTab({
463
+ host: chromeHost,
464
+ port: chrome.port,
465
+ ref: config.browserTabRef,
466
+ });
467
+ client = attached.client;
468
+ isolatedTargetId = attached.targetId ?? null;
469
+ lastTargetId = attached.targetId ?? undefined;
470
+ lastUrl = attached.tab.url || lastUrl;
471
+ ownsTarget = false;
472
+ logger(`Attached to existing ChatGPT tab ${attached.targetId}${attached.tab.url ? ` (${attached.tab.url})` : ""}`);
473
+ }
474
+ else {
475
+ const strictTabIsolation = Boolean(manualLogin && reusedChrome);
476
+ const connection = await connectWithNewTab(chrome.port, logger, config.url, chromeHost, {
477
+ fallbackToDefault: !strictTabIsolation,
478
+ retries: strictTabIsolation ? 3 : 0,
479
+ retryDelayMs: 500,
480
+ });
481
+ client = connection.client;
482
+ isolatedTargetId = connection.targetId ?? null;
483
+ ownsTarget = true;
484
+ }
485
+ if (tabLease && isolatedTargetId) {
486
+ await tabLease.update({
487
+ chromeHost,
488
+ chromePort: chrome.port,
489
+ chromeTargetId: isolatedTargetId,
490
+ });
491
+ }
211
492
  }
212
493
  catch (error) {
213
494
  const hint = describeDevtoolsFirewallHint(chromeHost, chrome.port);
@@ -288,30 +569,37 @@ export async function runBrowserMode(options) {
288
569
  },
289
570
  });
290
571
  }
291
- const baseUrl = CHATGPT_URL;
292
- // First load the base ChatGPT homepage to satisfy potential interstitials,
293
- // then hop to the requested URL if it differs.
294
- await raceWithDisconnect(navigateToChatGPT(Page, Runtime, baseUrl, logger));
295
- await raceWithDisconnect(ensureNotBlocked(Runtime, config.headless, logger));
296
- // Learned: login checks must happen on the base domain before jumping into project URLs.
297
- await raceWithDisconnect(waitForLogin({
298
- runtime: Runtime,
299
- logger,
300
- appliedCookies,
301
- manualLogin,
302
- timeoutMs: config.timeoutMs,
303
- }));
304
- if (config.url !== baseUrl) {
305
- await raceWithDisconnect(navigateToPromptReadyWithFallback(Page, Runtime, {
306
- url: config.url,
307
- fallbackUrl: baseUrl,
308
- timeoutMs: config.inputTimeoutMs,
309
- headless: config.headless,
310
- logger,
311
- }));
572
+ if (config.browserTabRef) {
573
+ await raceWithDisconnect(ensureNotBlocked(Runtime, config.headless, logger));
574
+ await raceWithDisconnect(ensureLoggedIn(Runtime, logger));
575
+ await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
312
576
  }
313
577
  else {
314
- await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
578
+ const baseUrl = CHATGPT_URL;
579
+ // First load the base ChatGPT homepage to satisfy potential interstitials,
580
+ // then hop to the requested URL if it differs.
581
+ await raceWithDisconnect(navigateToChatGPT(Page, Runtime, baseUrl, logger));
582
+ await raceWithDisconnect(ensureNotBlocked(Runtime, config.headless, logger));
583
+ // Learned: login checks must happen on the base domain before jumping into project URLs.
584
+ await raceWithDisconnect(waitForLogin({
585
+ runtime: Runtime,
586
+ logger,
587
+ appliedCookies,
588
+ manualLogin,
589
+ timeoutMs: config.timeoutMs,
590
+ }));
591
+ if (config.url !== baseUrl) {
592
+ await raceWithDisconnect(navigateToPromptReadyWithFallback(Page, Runtime, {
593
+ url: config.url,
594
+ fallbackUrl: baseUrl,
595
+ timeoutMs: config.inputTimeoutMs,
596
+ headless: config.headless,
597
+ logger,
598
+ }));
599
+ }
600
+ else {
601
+ await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
602
+ }
315
603
  }
316
604
  logger(`Prompt textarea ready (initial focus, ${promptText.length.toLocaleString()} chars queued)`);
317
605
  const captureRuntimeSnapshot = async () => {
@@ -413,9 +701,10 @@ export async function runBrowserMode(options) {
413
701
  else if (modelStrategy === "ignore") {
414
702
  logger("Model picker: skipped (strategy=ignore)");
415
703
  }
416
- // Handle thinking time selection if specified
704
+ const deepResearch = config.researchMode === "deep";
705
+ // Handle thinking time selection if specified. Deep Research owns its own effort flow.
417
706
  const thinkingTime = config.thinkingTime;
418
- if (thinkingTime) {
707
+ if (thinkingTime && !deepResearch) {
419
708
  await raceWithDisconnect(withRetries(() => ensureThinkingTime(Runtime, thinkingTime, logger), {
420
709
  retries: 2,
421
710
  delayMs: 300,
@@ -426,6 +715,19 @@ export async function runBrowserMode(options) {
426
715
  },
427
716
  }));
428
717
  }
718
+ if (deepResearch) {
719
+ await raceWithDisconnect(withRetries(() => activateDeepResearch(Runtime, Input, logger), {
720
+ retries: 2,
721
+ delayMs: 500,
722
+ onRetry: (attempt, error) => {
723
+ if (options.verbose) {
724
+ logger(`[retry] Deep Research activation attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`);
725
+ }
726
+ },
727
+ }));
728
+ await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
729
+ logger(`Prompt textarea ready (after Deep Research activation, ${promptText.length.toLocaleString()} chars queued)`);
730
+ }
429
731
  const profileLockTimeoutMs = manualLogin ? (config.profileLockTimeoutMs ?? 0) : 0;
430
732
  let profileLock = null;
431
733
  const acquireProfileLockIfNeeded = async () => {
@@ -448,6 +750,8 @@ export async function runBrowserMode(options) {
448
750
  const baselineAssistantText = typeof baselineSnapshot?.text === "string" ? baselineSnapshot.text.trim() : "";
449
751
  const attachmentNames = submissionAttachments.map((a) => path.basename(a.path));
450
752
  let inputOnlyAttachments = false;
753
+ await raceWithDisconnect(clearPromptComposer(Runtime, logger));
754
+ await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
451
755
  if (submissionAttachments.length > 0) {
452
756
  if (!DOM) {
453
757
  throw new Error("Chrome DOM domain unavailable while uploading attachments.");
@@ -540,7 +844,56 @@ export async function runBrowserMode(options) {
540
844
  finally {
541
845
  await releaseProfileLockIfHeld();
542
846
  }
543
- stopThinkingMonitor = startThinkingStatusMonitor(Runtime, logger, options.verbose ?? false);
847
+ const imageArtifactMinTurnIndex = baselineTurns;
848
+ if (deepResearch) {
849
+ await raceWithDisconnect(waitForResearchPlanAutoConfirm(Runtime, logger));
850
+ const researchResult = await raceWithDisconnect(waitForDeepResearchCompletion(Runtime, logger, config.timeoutMs, baselineTurns, Page, client));
851
+ await updateConversationHint("post-deep-research", 15_000).catch(() => false);
852
+ runStatus = "complete";
853
+ const durationMs = Date.now() - startedAt;
854
+ const tokens = estimateTokenCount(researchResult.text);
855
+ const reportArtifact = await saveOptionalArtifact(() => saveDeepResearchReportArtifact({
856
+ sessionId: options.sessionId,
857
+ reportMarkdown: researchResult.text,
858
+ conversationUrl: lastUrl,
859
+ logger,
860
+ }), logger);
861
+ const transcriptArtifact = await saveOptionalArtifact(() => saveBrowserTranscriptArtifact({
862
+ sessionId: options.sessionId,
863
+ prompt: promptText,
864
+ answerMarkdown: researchResult.text,
865
+ conversationUrl: lastUrl,
866
+ artifacts: appendArtifacts(undefined, [reportArtifact]),
867
+ logger,
868
+ }), logger);
869
+ const savedArtifacts = appendArtifacts(undefined, [reportArtifact, transcriptArtifact]);
870
+ const archive = await maybeArchiveCompletedConversation({
871
+ Runtime,
872
+ logger,
873
+ config,
874
+ conversationUrl: lastUrl,
875
+ followUpCount: 0,
876
+ requiredArtifactsSaved: Boolean(reportArtifact && transcriptArtifact),
877
+ });
878
+ return {
879
+ answerText: researchResult.text,
880
+ answerMarkdown: researchResult.text,
881
+ answerHtml: researchResult.html,
882
+ artifacts: savedArtifacts,
883
+ archive,
884
+ tookMs: durationMs,
885
+ answerTokens: tokens,
886
+ answerChars: researchResult.text.length,
887
+ chromePid: chrome.pid,
888
+ chromePort: chrome.port,
889
+ chromeHost,
890
+ userDataDir,
891
+ chromeTargetId: lastTargetId,
892
+ tabUrl: lastUrl,
893
+ conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
894
+ controllerPid: process.pid,
895
+ };
896
+ }
544
897
  // Helper to normalize text for echo detection (collapse whitespace, lowercase)
545
898
  const normalizeForComparison = (text) => text.toLowerCase().replace(/\s+/g, " ").trim();
546
899
  const expectedConversationId = () => lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined;
@@ -571,7 +924,19 @@ export async function runBrowserMode(options) {
571
924
  }
572
925
  return null;
573
926
  };
574
- let answer;
927
+ const waitWithThinkingMonitor = async (operation) => {
928
+ stopThinkingMonitor?.();
929
+ stopThinkingMonitor = startThinkingStatusMonitor(Runtime, logger, {
930
+ intervalMs: options.heartbeatIntervalMs,
931
+ });
932
+ try {
933
+ return await operation();
934
+ }
935
+ finally {
936
+ stopThinkingMonitor?.();
937
+ stopThinkingMonitor = null;
938
+ }
939
+ };
575
940
  const recheckDelayMs = Math.max(0, config.assistantRecheckDelayMs ?? 0);
576
941
  const recheckTimeoutMs = Math.max(0, config.assistantRecheckTimeoutMs ?? 0);
577
942
  const attemptAssistantRecheck = async () => {
@@ -615,172 +980,280 @@ export async function runBrowserMode(options) {
615
980
  });
616
981
  }
617
982
  const timeoutMs = recheckTimeoutMs > 0 ? recheckTimeoutMs : config.timeoutMs;
618
- const rechecked = await raceWithDisconnect(waitForAssistantResponseWithReload(Runtime, Page, timeoutMs, logger, baselineTurns ?? undefined, expectedConversationId()));
983
+ const rechecked = await waitWithThinkingMonitor(() => raceWithDisconnect(waitForAssistantOrGeneratedImageResponse({
984
+ Runtime,
985
+ waitForText: () => waitForAssistantResponseWithReload(Runtime, Page, timeoutMs, logger, baselineTurns ?? undefined, expectedConversationId()),
986
+ timeoutMs,
987
+ logger,
988
+ minTurnIndex: baselineTurns ?? undefined,
989
+ expectedConversationId: expectedConversationId(),
990
+ imageOutputRequested,
991
+ })));
619
992
  logger("Recovered assistant response after delayed recheck");
620
993
  return rechecked;
621
994
  };
622
- try {
623
- await updateConversationHint("assistant-wait", 15_000).catch(() => false);
624
- answer = await raceWithDisconnect(waitForAssistantResponseWithReload(Runtime, Page, config.timeoutMs, logger, baselineTurns ?? undefined, expectedConversationId()));
625
- }
626
- catch (error) {
627
- if (isAssistantResponseTimeoutError(error)) {
628
- const rechecked = await attemptAssistantRecheck().catch(() => null);
629
- if (rechecked) {
630
- answer = rechecked;
995
+ const imageOutputRequested = Boolean(options.generateImagePath ||
996
+ options.outputPath ||
997
+ options.generateImage);
998
+ const captureAssistantTurn = async (turnPrompt, label) => {
999
+ let turnAnswer;
1000
+ try {
1001
+ await updateConversationHint("assistant-wait", 15_000).catch(() => false);
1002
+ turnAnswer = await waitWithThinkingMonitor(() => raceWithDisconnect(waitForAssistantOrGeneratedImageResponse({
1003
+ Runtime,
1004
+ waitForText: () => waitForAssistantResponseWithReload(Runtime, Page, config.timeoutMs, logger, baselineTurns ?? undefined, expectedConversationId()),
1005
+ timeoutMs: config.timeoutMs,
1006
+ logger,
1007
+ minTurnIndex: baselineTurns ?? undefined,
1008
+ expectedConversationId: expectedConversationId(),
1009
+ imageOutputRequested,
1010
+ })));
1011
+ }
1012
+ catch (error) {
1013
+ if (isAssistantResponseTimeoutError(error)) {
1014
+ const rechecked = await attemptAssistantRecheckOrRethrow(attemptAssistantRecheck);
1015
+ if (rechecked) {
1016
+ turnAnswer = rechecked;
1017
+ }
1018
+ else {
1019
+ await updateConversationHint("assistant-timeout", 15_000).catch(() => false);
1020
+ await captureRuntimeSnapshot().catch(() => undefined);
1021
+ const diagnostics = await captureBrowserDiagnostics(Runtime, logger, "assistant-timeout", {
1022
+ Page,
1023
+ sessionId: options.sessionId,
1024
+ }).catch(() => undefined);
1025
+ const runtime = {
1026
+ chromePid: chrome.pid,
1027
+ chromePort: chrome.port,
1028
+ chromeHost,
1029
+ userDataDir,
1030
+ chromeTargetId: lastTargetId,
1031
+ tabUrl: lastUrl,
1032
+ conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
1033
+ controllerPid: process.pid,
1034
+ };
1035
+ throw new BrowserAutomationError("Assistant response timed out before completion; reattach later to capture the answer.", { stage: "assistant-timeout", runtime, diagnostics }, error);
1036
+ }
631
1037
  }
632
1038
  else {
633
- await updateConversationHint("assistant-timeout", 15_000).catch(() => false);
634
- await captureRuntimeSnapshot().catch(() => undefined);
635
- const runtime = {
636
- chromePid: chrome.pid,
637
- chromePort: chrome.port,
638
- chromeHost,
639
- userDataDir,
640
- chromeTargetId: lastTargetId,
641
- tabUrl: lastUrl,
642
- conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
643
- controllerPid: process.pid,
644
- };
645
- throw new BrowserAutomationError("Assistant response timed out before completion; reattach later to capture the answer.", { stage: "assistant-timeout", runtime }, error);
1039
+ throw error;
646
1040
  }
647
1041
  }
648
- else {
649
- throw error;
650
- }
651
- }
652
- // Ensure we store the final conversation URL even if the UI updated late.
653
- await updateConversationHint("post-response", 15_000);
654
- const baselineNormalized = baselineAssistantText
655
- ? normalizeForComparison(baselineAssistantText)
656
- : "";
657
- if (baselineNormalized) {
658
- const normalizedAnswer = normalizeForComparison(answer.text ?? "");
659
- const baselinePrefix = baselineNormalized.length >= 80
660
- ? baselineNormalized.slice(0, Math.min(200, baselineNormalized.length))
1042
+ // Ensure we store the final conversation URL even if the UI updated late.
1043
+ await updateConversationHint("post-response", 15_000);
1044
+ const baselineNormalized = baselineAssistantText
1045
+ ? normalizeForComparison(baselineAssistantText)
661
1046
  : "";
662
- const isBaseline = normalizedAnswer === baselineNormalized ||
663
- (baselinePrefix.length > 0 && normalizedAnswer.startsWith(baselinePrefix));
664
- if (isBaseline) {
665
- logger("Detected stale assistant response; waiting for new response...");
666
- const refreshed = await waitForFreshAssistantResponse(baselineNormalized, 15_000);
667
- if (refreshed) {
668
- answer = refreshed;
1047
+ if (baselineNormalized) {
1048
+ const normalizedAnswer = normalizeForComparison(turnAnswer.text ?? "");
1049
+ const baselinePrefix = baselineNormalized.length >= 80
1050
+ ? baselineNormalized.slice(0, Math.min(200, baselineNormalized.length))
1051
+ : "";
1052
+ const isBaseline = normalizedAnswer === baselineNormalized ||
1053
+ (baselinePrefix.length > 0 && normalizedAnswer.startsWith(baselinePrefix));
1054
+ if (isBaseline) {
1055
+ logger("Detected stale assistant response; waiting for new response...");
1056
+ const refreshed = await waitForFreshAssistantResponse(baselineNormalized, 15_000);
1057
+ if (refreshed) {
1058
+ turnAnswer = refreshed;
1059
+ }
669
1060
  }
670
1061
  }
671
- }
672
- answerText = answer.text;
673
- answerHtml = answer.html ?? "";
674
- const copiedMarkdown = await raceWithDisconnect(withRetries(async () => {
675
- const attempt = await captureAssistantMarkdown(Runtime, answer.meta, logger);
676
- if (!attempt) {
677
- throw new Error("copy-missing");
678
- }
679
- return attempt;
680
- }, {
681
- retries: 2,
682
- delayMs: 350,
683
- onRetry: (attempt, error) => {
684
- if (options.verbose) {
685
- logger(`[retry] Markdown capture attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`);
1062
+ let turnAnswerText = turnAnswer.text;
1063
+ const turnAnswerHtml = turnAnswer.html ?? "";
1064
+ const copiedMarkdown = await raceWithDisconnect(withRetries(async () => {
1065
+ const attempt = await captureAssistantMarkdown(Runtime, turnAnswer.meta, logger);
1066
+ if (!attempt) {
1067
+ throw new Error("copy-missing");
1068
+ }
1069
+ return attempt;
1070
+ }, {
1071
+ retries: 2,
1072
+ delayMs: 350,
1073
+ onRetry: (attempt, error) => {
1074
+ if (options.verbose) {
1075
+ logger(`[retry] Markdown capture attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`);
1076
+ }
1077
+ },
1078
+ })).catch(() => null);
1079
+ let turnAnswerMarkdown = copiedMarkdown ?? turnAnswerText;
1080
+ const promptEchoMatcher = buildPromptEchoMatcher(turnPrompt);
1081
+ ({ answerText: turnAnswerText, answerMarkdown: turnAnswerMarkdown } =
1082
+ await maybeRecoverLongAssistantResponse({
1083
+ runtime: Runtime,
1084
+ baselineTurns,
1085
+ answerText: turnAnswerText,
1086
+ answerMarkdown: turnAnswerMarkdown,
1087
+ logger,
1088
+ allowMarkdownUpdate: !copiedMarkdown,
1089
+ }));
1090
+ // Final sanity check: ensure we didn't accidentally capture the user prompt instead of the assistant turn.
1091
+ const finalSnapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
1092
+ const finalText = typeof finalSnapshot?.text === "string" ? finalSnapshot.text.trim() : "";
1093
+ if (finalText && finalText !== turnPrompt.trim()) {
1094
+ const trimmedMarkdown = turnAnswerMarkdown.trim();
1095
+ const finalIsEcho = promptEchoMatcher ? promptEchoMatcher.isEcho(finalText) : false;
1096
+ const lengthDelta = finalText.length - trimmedMarkdown.length;
1097
+ const missingCopy = !copiedMarkdown && lengthDelta >= 0;
1098
+ const likelyTruncatedCopy = copiedMarkdown &&
1099
+ trimmedMarkdown.length > 0 &&
1100
+ lengthDelta >= Math.max(12, Math.floor(trimmedMarkdown.length * 0.75));
1101
+ if ((missingCopy || likelyTruncatedCopy) && !finalIsEcho && finalText !== trimmedMarkdown) {
1102
+ logger("Refreshed assistant response via final DOM snapshot");
1103
+ turnAnswerText = finalText;
1104
+ turnAnswerMarkdown = finalText;
686
1105
  }
687
- },
688
- })).catch(() => null);
689
- answerMarkdown = copiedMarkdown ?? answerText;
690
- const promptEchoMatcher = buildPromptEchoMatcher(promptText);
691
- ({ answerText, answerMarkdown } = await maybeRecoverLongAssistantResponse({
692
- runtime: Runtime,
693
- baselineTurns,
694
- answerText,
695
- answerMarkdown,
696
- logger,
697
- allowMarkdownUpdate: !copiedMarkdown,
698
- }));
699
- // Final sanity check: ensure we didn't accidentally capture the user prompt instead of the assistant turn.
700
- const finalSnapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
701
- const finalText = typeof finalSnapshot?.text === "string" ? finalSnapshot.text.trim() : "";
702
- if (finalText && finalText !== promptText.trim()) {
703
- const trimmedMarkdown = answerMarkdown.trim();
704
- const finalIsEcho = promptEchoMatcher ? promptEchoMatcher.isEcho(finalText) : false;
705
- const lengthDelta = finalText.length - trimmedMarkdown.length;
706
- const missingCopy = !copiedMarkdown && lengthDelta >= 0;
707
- const likelyTruncatedCopy = copiedMarkdown &&
708
- trimmedMarkdown.length > 0 &&
709
- lengthDelta >= Math.max(12, Math.floor(trimmedMarkdown.length * 0.75));
710
- if ((missingCopy || likelyTruncatedCopy) && !finalIsEcho && finalText !== trimmedMarkdown) {
711
- logger("Refreshed assistant response via final DOM snapshot");
712
- answerText = finalText;
713
- answerMarkdown = finalText;
714
1106
  }
715
- }
716
- // Detect prompt echo using normalized comparison (whitespace-insensitive).
717
- const alignedEcho = alignPromptEchoPair(answerText, answerMarkdown, promptEchoMatcher, copiedMarkdown ? logger : undefined, {
718
- text: "Aligned assistant response text to copied markdown after prompt echo",
719
- markdown: "Aligned assistant markdown to response text after prompt echo",
720
- });
721
- answerText = alignedEcho.answerText;
722
- answerMarkdown = alignedEcho.answerMarkdown;
723
- const isPromptEcho = alignedEcho.isEcho;
724
- if (isPromptEcho) {
725
- logger("Detected prompt echo in response; waiting for actual assistant response...");
726
- const deadline = Date.now() + 15_000;
727
- let bestText = null;
728
- let stableCount = 0;
729
- while (Date.now() < deadline) {
730
- const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
731
- const text = typeof snapshot?.text === "string" ? snapshot.text.trim() : "";
732
- const isStillEcho = !text || Boolean(promptEchoMatcher?.isEcho(text));
733
- if (!isStillEcho) {
734
- if (!bestText || text.length > bestText.length) {
1107
+ // Detect prompt echo using normalized comparison (whitespace-insensitive).
1108
+ const alignedEcho = alignPromptEchoPair(turnAnswerText, turnAnswerMarkdown, promptEchoMatcher, copiedMarkdown ? logger : undefined, {
1109
+ text: "Aligned assistant response text to copied markdown after prompt echo",
1110
+ markdown: "Aligned assistant markdown to response text after prompt echo",
1111
+ });
1112
+ turnAnswerText = alignedEcho.answerText;
1113
+ turnAnswerMarkdown = alignedEcho.answerMarkdown;
1114
+ const isPromptEcho = alignedEcho.isEcho;
1115
+ if (isPromptEcho) {
1116
+ logger("Detected prompt echo in response; waiting for actual assistant response...");
1117
+ const deadline = Date.now() + 15_000;
1118
+ let bestText = null;
1119
+ let stableCount = 0;
1120
+ while (Date.now() < deadline) {
1121
+ const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
1122
+ const text = typeof snapshot?.text === "string" ? snapshot.text.trim() : "";
1123
+ const isStillEcho = !text || Boolean(promptEchoMatcher?.isEcho(text));
1124
+ if (!isStillEcho) {
1125
+ if (!bestText || text.length > bestText.length) {
1126
+ bestText = text;
1127
+ stableCount = 0;
1128
+ }
1129
+ else if (text === bestText) {
1130
+ stableCount += 1;
1131
+ }
1132
+ if (stableCount >= 2) {
1133
+ break;
1134
+ }
1135
+ }
1136
+ await new Promise((resolve) => setTimeout(resolve, 300));
1137
+ }
1138
+ if (bestText) {
1139
+ logger("Recovered assistant response after detecting prompt echo");
1140
+ turnAnswerText = bestText;
1141
+ turnAnswerMarkdown = bestText;
1142
+ }
1143
+ }
1144
+ const minAnswerChars = 16;
1145
+ if (turnAnswerText.trim().length > 0 && turnAnswerText.trim().length < minAnswerChars) {
1146
+ const deadline = Date.now() + 12_000;
1147
+ let bestText = turnAnswerText.trim();
1148
+ let stableCycles = 0;
1149
+ while (Date.now() < deadline) {
1150
+ const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
1151
+ const text = typeof snapshot?.text === "string" ? snapshot.text.trim() : "";
1152
+ if (text && text.length > bestText.length) {
735
1153
  bestText = text;
736
- stableCount = 0;
1154
+ stableCycles = 0;
737
1155
  }
738
- else if (text === bestText) {
739
- stableCount += 1;
1156
+ else {
1157
+ stableCycles += 1;
740
1158
  }
741
- if (stableCount >= 2) {
1159
+ if (stableCycles >= 3 && bestText.length >= minAnswerChars) {
742
1160
  break;
743
1161
  }
1162
+ await delay(400);
744
1163
  }
745
- await new Promise((resolve) => setTimeout(resolve, 300));
746
- }
747
- if (bestText) {
748
- logger("Recovered assistant response after detecting prompt echo");
749
- answerText = bestText;
750
- answerMarkdown = bestText;
751
- }
752
- }
753
- const minAnswerChars = 16;
754
- if (answerText.trim().length > 0 && answerText.trim().length < minAnswerChars) {
755
- const deadline = Date.now() + 12_000;
756
- let bestText = answerText.trim();
757
- let stableCycles = 0;
758
- while (Date.now() < deadline) {
759
- const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
760
- const text = typeof snapshot?.text === "string" ? snapshot.text.trim() : "";
761
- if (text && text.length > bestText.length) {
762
- bestText = text;
763
- stableCycles = 0;
764
- }
765
- else {
766
- stableCycles += 1;
767
- }
768
- if (stableCycles >= 3 && bestText.length >= minAnswerChars) {
769
- break;
1164
+ if (bestText.length > turnAnswerText.trim().length) {
1165
+ logger("Refreshed short assistant response from latest DOM snapshot");
1166
+ turnAnswerText = bestText;
1167
+ turnAnswerMarkdown = bestText;
770
1168
  }
771
- await delay(400);
772
1169
  }
773
- if (bestText.length > answerText.trim().length) {
774
- logger("Refreshed short assistant response from latest DOM snapshot");
775
- answerText = bestText;
776
- answerMarkdown = bestText;
1170
+ return {
1171
+ label,
1172
+ answerText: turnAnswerText,
1173
+ answerMarkdown: turnAnswerMarkdown,
1174
+ answerHtml: turnAnswerHtml,
1175
+ };
1176
+ };
1177
+ const turns = [];
1178
+ const initialTurn = await captureAssistantTurn(promptText, "Initial response");
1179
+ turns.push(initialTurn);
1180
+ answerText = initialTurn.answerText;
1181
+ answerMarkdown = initialTurn.answerMarkdown;
1182
+ answerHtml = initialTurn.answerHtml;
1183
+ for (let index = 0; index < followUpPrompts.length; index += 1) {
1184
+ const followUpPrompt = followUpPrompts[index];
1185
+ logger(`[browser] Sending follow-up ${index + 1}/${followUpPrompts.length}`);
1186
+ await acquireProfileLockIfNeeded();
1187
+ try {
1188
+ await raceWithDisconnect(clearPromptComposer(Runtime, logger));
1189
+ await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
1190
+ const submission = await runSubmissionWithRecovery({
1191
+ prompt: followUpPrompt,
1192
+ attachments: [],
1193
+ submit: (submissionPrompt, submissionAttachments) => raceWithDisconnect(submitOnce(submissionPrompt, submissionAttachments)),
1194
+ reloadPromptComposer,
1195
+ prepareFallbackSubmission: async () => {
1196
+ await raceWithDisconnect(clearPromptComposer(Runtime, logger));
1197
+ await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
1198
+ },
1199
+ logger,
1200
+ });
1201
+ baselineTurns = submission.baselineTurns;
1202
+ baselineAssistantText = submission.baselineAssistantText;
1203
+ }
1204
+ finally {
1205
+ await releaseProfileLockIfHeld();
777
1206
  }
1207
+ const turn = await captureAssistantTurn(followUpPrompt, `Follow-up ${index + 1}`);
1208
+ turns.push({ ...turn, prompt: followUpPrompt });
1209
+ answerText = turn.answerText;
1210
+ answerMarkdown = turn.answerMarkdown;
1211
+ answerHtml = turn.answerHtml;
1212
+ }
1213
+ if (turns.length > 1) {
1214
+ const formatted = formatBrowserTurnTranscript(turns);
1215
+ answerText = formatted.answerText;
1216
+ answerMarkdown = formatted.answerMarkdown;
1217
+ answerHtml = "";
778
1218
  }
779
1219
  if (connectionClosedUnexpectedly) {
780
1220
  // Bail out on mid-run disconnects so the session stays reattachable.
781
1221
  throw new Error("Chrome disconnected before completion");
782
1222
  }
783
- stopThinkingMonitor?.();
1223
+ const imageArtifacts = await collectGeneratedImageArtifacts({
1224
+ Runtime,
1225
+ Network,
1226
+ logger,
1227
+ minTurnIndex: imageArtifactMinTurnIndex,
1228
+ sessionId: options.sessionId,
1229
+ generateImagePath: options.generateImagePath,
1230
+ outputPath: options.outputPath,
1231
+ answerText,
1232
+ waitTimeoutMs: options.config?.timeoutMs,
1233
+ });
1234
+ answerText = imageArtifacts.answerText || answerText;
1235
+ if (imageArtifacts.markdownSuffix) {
1236
+ answerMarkdown += imageArtifacts.markdownSuffix;
1237
+ }
1238
+ const savedImageArtifacts = appendArtifacts(undefined, imageArtifacts.savedImages);
1239
+ const transcriptArtifact = await saveOptionalArtifact(() => saveBrowserTranscriptArtifact({
1240
+ sessionId: options.sessionId,
1241
+ prompt: promptText,
1242
+ answerMarkdown,
1243
+ conversationUrl: lastUrl,
1244
+ artifacts: savedImageArtifacts,
1245
+ logger,
1246
+ }), logger);
1247
+ const savedArtifacts = appendArtifacts(savedImageArtifacts, [transcriptArtifact]);
1248
+ const archive = await maybeArchiveCompletedConversation({
1249
+ Runtime,
1250
+ logger,
1251
+ config,
1252
+ conversationUrl: lastUrl,
1253
+ followUpCount: followUpPrompts.length,
1254
+ requiredArtifactsSaved: Boolean(transcriptArtifact) &&
1255
+ imageArtifacts.savedImages.length === imageArtifacts.imageCount,
1256
+ });
784
1257
  runStatus = "complete";
785
1258
  const durationMs = Date.now() - startedAt;
786
1259
  const answerChars = answerText.length;
@@ -789,6 +1262,10 @@ export async function runBrowserMode(options) {
789
1262
  answerText,
790
1263
  answerMarkdown,
791
1264
  answerHtml: answerHtml.length > 0 ? answerHtml : undefined,
1265
+ artifacts: savedArtifacts,
1266
+ generatedImages: imageArtifacts.generatedImages,
1267
+ savedImages: imageArtifacts.savedImages,
1268
+ archive,
792
1269
  tookMs: durationMs,
793
1270
  answerTokens,
794
1271
  answerChars,
@@ -798,15 +1275,16 @@ export async function runBrowserMode(options) {
798
1275
  userDataDir,
799
1276
  chromeTargetId: lastTargetId,
800
1277
  tabUrl: lastUrl,
1278
+ conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
801
1279
  controllerPid: process.pid,
802
1280
  };
803
1281
  }
804
1282
  catch (error) {
805
1283
  const normalizedError = error instanceof Error ? error : new Error(String(error));
806
- stopThinkingMonitor?.();
807
1284
  const socketClosed = connectionClosedUnexpectedly || isWebSocketClosureError(normalizedError);
808
1285
  connectionClosedUnexpectedly = connectionClosedUnexpectedly || socketClosed;
809
- if (shouldPreserveBrowserOnError(normalizedError, config.headless)) {
1286
+ const preservedErrorKind = classifyPreservedBrowserError(normalizedError, config.headless);
1287
+ if (preservedErrorKind === "cloudflare-challenge") {
810
1288
  preserveBrowserOnError = true;
811
1289
  const runtime = {
812
1290
  chromePid: chrome.pid,
@@ -828,6 +1306,12 @@ export async function runBrowserMode(options) {
828
1306
  reuseProfileHint,
829
1307
  }, normalizedError);
830
1308
  }
1309
+ if (preservedErrorKind === "reattachable-capture") {
1310
+ preserveBrowserOnError = true;
1311
+ await emitRuntimeHint();
1312
+ logger("Assistant capture incomplete; leaving browser open for reattach.");
1313
+ throw normalizedError;
1314
+ }
831
1315
  if (!socketClosed) {
832
1316
  logger(`Failed to complete ChatGPT run: ${normalizedError.message}`);
833
1317
  if ((config.debug || process.env.CHATGPT_DEVTOOLS_TRACE === "1") && normalizedError.stack) {
@@ -865,16 +1349,70 @@ export async function runBrowserMode(options) {
865
1349
  // Close the isolated tab once the response has been fully captured to prevent
866
1350
  // tab accumulation across repeated runs. Keep the tab open on incomplete runs
867
1351
  // so reattach can recover the response.
868
- if (runStatus === "complete" && isolatedTargetId && chrome?.port) {
1352
+ if (shouldCloseOwnedRunTargetAfterRun({
1353
+ runStatus,
1354
+ ownsTarget,
1355
+ keepBrowser: effectiveKeepBrowser,
1356
+ }) &&
1357
+ isolatedTargetId &&
1358
+ chrome?.port) {
869
1359
  await closeTab(chrome.port, isolatedTargetId, logger, chromeHost).catch(() => undefined);
870
1360
  }
1361
+ let keepBrowserOpen = effectiveKeepBrowser || preserveBrowserOnError;
1362
+ let cleanupProfileLock = null;
1363
+ let terminatedRecordedChrome = false;
1364
+ let otherActiveBrowserTabLeases = null;
1365
+ const hasOtherActiveLeases = async () => {
1366
+ if (!manualLogin || !tabLease) {
1367
+ return false;
1368
+ }
1369
+ if (otherActiveBrowserTabLeases === null) {
1370
+ otherActiveBrowserTabLeases = await hasOtherActiveBrowserTabLeases(userDataDir, tabLease.id);
1371
+ }
1372
+ return otherActiveBrowserTabLeases;
1373
+ };
1374
+ if (runStatus === "complete" &&
1375
+ manualLogin &&
1376
+ !connectionClosedUnexpectedly &&
1377
+ chrome?.port &&
1378
+ ownsTarget) {
1379
+ const otherLeasesActive = await hasOtherActiveLeases().catch(() => true);
1380
+ if (!otherLeasesActive) {
1381
+ await closeBlankChromeTabs(chrome.port, logger, chromeHost, {
1382
+ excludeTargetIds: [isolatedTargetId, lastTargetId],
1383
+ }).catch(() => undefined);
1384
+ }
1385
+ }
1386
+ if (!keepBrowserOpen && manualLogin && tabLease) {
1387
+ const cleanupLockTimeoutMs = Math.max(0, config.profileLockTimeoutMs ?? 0);
1388
+ if (cleanupLockTimeoutMs > 0) {
1389
+ cleanupProfileLock = await acquireProfileRunLock(userDataDir, {
1390
+ timeoutMs: cleanupLockTimeoutMs,
1391
+ logger,
1392
+ sessionId: options.sessionId,
1393
+ }).catch(() => null);
1394
+ }
1395
+ keepBrowserOpen = await hasOtherActiveLeases().catch(() => false);
1396
+ if (keepBrowserOpen) {
1397
+ logger("[browser] Other ChatGPT tab leases still active; leaving shared Chrome running.");
1398
+ }
1399
+ else if (reusedChrome && !connectionClosedUnexpectedly) {
1400
+ terminatedRecordedChrome = await terminateRecordedChromeForProfile(userDataDir, logger).catch(() => false);
1401
+ }
1402
+ }
1403
+ if (tabLease) {
1404
+ const handle = tabLease;
1405
+ tabLease = null;
1406
+ await handle.release().catch(() => undefined);
1407
+ }
871
1408
  removeDialogHandler?.();
872
1409
  removeTerminationHooks?.();
873
- const keepBrowserOpen = effectiveKeepBrowser || preserveBrowserOnError;
874
1410
  if (!keepBrowserOpen) {
875
1411
  if (!connectionClosedUnexpectedly) {
876
1412
  try {
877
- await chrome.kill();
1413
+ if (!terminatedRecordedChrome) {
1414
+ await chrome.kill();
1415
+ }
878
1416
  }
879
1417
  catch {
880
1418
  // ignore kill failures
@@ -898,8 +1436,16 @@ export async function runBrowserMode(options) {
898
1436
  logger(`Cleanup ${runStatus} • ${totalSeconds.toFixed(1)}s total`);
899
1437
  }
900
1438
  }
901
- else if (!connectionClosedUnexpectedly) {
902
- logger(`Chrome left running on port ${chrome.port} with profile ${userDataDir}`);
1439
+ else {
1440
+ detachKeptChromeProcess(chrome);
1441
+ if (!connectionClosedUnexpectedly) {
1442
+ logger(`Chrome left running on port ${chrome.port} with profile ${userDataDir}`);
1443
+ }
1444
+ }
1445
+ if (cleanupProfileLock) {
1446
+ const handle = cleanupProfileLock;
1447
+ cleanupProfileLock = null;
1448
+ await handle.release().catch(() => undefined);
903
1449
  }
904
1450
  }
905
1451
  }
@@ -1025,6 +1571,51 @@ async function _assertNavigatedToHttp(runtime, _logger, timeoutMs = 10_000) {
1025
1571
  details: { url: lastUrl || "(empty)" },
1026
1572
  });
1027
1573
  }
1574
+ function detachKeptChromeProcess(chrome) {
1575
+ try {
1576
+ chrome.process?.unref();
1577
+ }
1578
+ catch {
1579
+ // Best-effort only; cleanup should not mask the original browser result.
1580
+ }
1581
+ }
1582
+ async function acquireManualLoginChromeForRun(userDataDir, config, logger, sessionId, deps = {}) {
1583
+ const maybeReuse = deps.maybeReuse ?? maybeReuseRunningChrome;
1584
+ const launch = deps.launch ?? launchChrome;
1585
+ const lockTimeoutMs = Math.max(0, config.profileLockTimeoutMs ?? 0);
1586
+ let launchLock = null;
1587
+ if (lockTimeoutMs > 0) {
1588
+ launchLock = await acquireProfileRunLock(userDataDir, {
1589
+ timeoutMs: lockTimeoutMs,
1590
+ logger,
1591
+ sessionId,
1592
+ });
1593
+ }
1594
+ try {
1595
+ const reusedChrome = await maybeReuse(userDataDir, logger, {
1596
+ waitForPortMs: config.reuseChromeWaitMs,
1597
+ });
1598
+ const chrome = reusedChrome ??
1599
+ (await launch({
1600
+ ...config,
1601
+ remoteChrome: config.remoteChrome,
1602
+ }, userDataDir, logger));
1603
+ // Persist while the launch lock is still held so parallel callers reuse
1604
+ // this Chrome instead of racing to start another one on the same profile.
1605
+ if (chrome.port) {
1606
+ await writeDevToolsActivePort(userDataDir, chrome.port);
1607
+ if (!reusedChrome && chrome.pid) {
1608
+ await writeChromePid(userDataDir, chrome.pid);
1609
+ }
1610
+ }
1611
+ return { chrome, reusedChrome };
1612
+ }
1613
+ finally {
1614
+ if (launchLock) {
1615
+ await launchLock.release().catch(() => undefined);
1616
+ }
1617
+ }
1618
+ }
1028
1619
  async function maybeReuseRunningChrome(userDataDir, logger, options = {}) {
1029
1620
  const waitForPortMs = Math.max(0, options.waitForPortMs ?? 0);
1030
1621
  let port = await readDevToolsPort(userDataDir);
@@ -1036,8 +1627,30 @@ async function maybeReuseRunningChrome(userDataDir, logger, options = {}) {
1036
1627
  port = await readDevToolsPort(userDataDir);
1037
1628
  }
1038
1629
  }
1039
- if (!port)
1040
- return null;
1630
+ let pid = await readChromePid(userDataDir);
1631
+ if (!port) {
1632
+ const discovered = await findRunningChromeDebugTargetForProfile(userDataDir);
1633
+ if (!discovered)
1634
+ return null;
1635
+ const discoveredProbe = await (options.probe ?? verifyDevToolsReachable)({
1636
+ port: discovered.port,
1637
+ });
1638
+ if (!discoveredProbe.ok) {
1639
+ logger(`Discovered Chrome for ${userDataDir} on port ${discovered.port} but it was unreachable (${discoveredProbe.error}); launching new Chrome.`);
1640
+ return null;
1641
+ }
1642
+ await writeDevToolsActivePort(userDataDir, discovered.port);
1643
+ await writeChromePid(userDataDir, discovered.pid);
1644
+ port = discovered.port;
1645
+ pid = discovered.pid;
1646
+ logger(`Discovered running Chrome for ${userDataDir}; reusing (DevTools port ${port}, pid ${pid})`);
1647
+ return {
1648
+ port,
1649
+ pid,
1650
+ kill: async () => { },
1651
+ process: undefined,
1652
+ };
1653
+ }
1041
1654
  const probe = await (options.probe ?? verifyDevToolsReachable)({ port });
1042
1655
  if (!probe.ok) {
1043
1656
  logger(`DevToolsActivePort found for ${userDataDir} but unreachable (${probe.error}); launching new Chrome.`);
@@ -1045,7 +1658,6 @@ async function maybeReuseRunningChrome(userDataDir, logger, options = {}) {
1045
1658
  await cleanupStaleProfileState(userDataDir, logger, { lockRemovalMode: "if_oracle_pid_dead" });
1046
1659
  return null;
1047
1660
  }
1048
- const pid = await readChromePid(userDataDir);
1049
1661
  logger(`Found running Chrome for ${userDataDir}; reusing (DevTools port ${port}${pid ? `, pid ${pid}` : ""})`);
1050
1662
  return {
1051
1663
  port,
@@ -1063,7 +1675,10 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1063
1675
  logger(`Connecting to remote Chrome at ${host}:${port}`);
1064
1676
  let client = null;
1065
1677
  let remoteTargetId = null;
1678
+ let tabLease = null;
1066
1679
  let lastUrl;
1680
+ let attachedExistingTab = false;
1681
+ let ownsTarget = true;
1067
1682
  const runtimeHintCb = options.runtimeHintCb;
1068
1683
  const emitRuntimeHint = async () => {
1069
1684
  if (!runtimeHintCb)
@@ -1072,10 +1687,19 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1072
1687
  await runtimeHintCb({
1073
1688
  chromePort: port,
1074
1689
  chromeHost: host,
1690
+ chromeBrowserWSEndpoint: browserWSEndpoint,
1691
+ chromeProfileRoot,
1075
1692
  chromeTargetId: remoteTargetId ?? undefined,
1076
1693
  tabUrl: lastUrl,
1694
+ conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
1077
1695
  controllerPid: process.pid,
1078
1696
  });
1697
+ await tabLease?.update({
1698
+ chromeHost: host,
1699
+ chromePort: port,
1700
+ chromeTargetId: remoteTargetId ?? undefined,
1701
+ tabUrl: lastUrl,
1702
+ });
1079
1703
  }
1080
1704
  catch (error) {
1081
1705
  const message = error instanceof Error ? error.message : String(error);
@@ -1087,12 +1711,55 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1087
1711
  let answerMarkdown = "";
1088
1712
  let answerHtml = "";
1089
1713
  let connectionClosedUnexpectedly = false;
1714
+ let runStatus = "attempted";
1090
1715
  let stopThinkingMonitor = null;
1091
1716
  let removeDialogHandler = null;
1717
+ let connection = null;
1718
+ const browserWSEndpoint = config.remoteChromeBrowserWSEndpoint ?? undefined;
1719
+ const chromeProfileRoot = config.remoteChromeProfileRoot ?? undefined;
1092
1720
  try {
1093
- const connection = await connectToRemoteChrome(host, port, logger, config.url);
1094
- client = connection.client;
1095
- remoteTargetId = connection.targetId ?? null;
1721
+ const remoteLeaseProfileDir = config.browserTabRef
1722
+ ? null
1723
+ : resolveRemoteTabLeaseProfileDir(config);
1724
+ if (remoteLeaseProfileDir) {
1725
+ await mkdir(remoteLeaseProfileDir, { recursive: true });
1726
+ tabLease = await acquireBrowserTabLease(remoteLeaseProfileDir, {
1727
+ maxConcurrentTabs: config.maxConcurrentTabs,
1728
+ timeoutMs: config.timeoutMs,
1729
+ logger,
1730
+ sessionId: options.sessionId,
1731
+ chromeHost: host,
1732
+ chromePort: port,
1733
+ });
1734
+ }
1735
+ if (config.browserTabRef) {
1736
+ const attached = await connectToExistingChatGptTab({
1737
+ host,
1738
+ port,
1739
+ ref: config.browserTabRef,
1740
+ });
1741
+ client = attached.client;
1742
+ remoteTargetId = attached.targetId ?? null;
1743
+ lastUrl = attached.tab.url || lastUrl;
1744
+ attachedExistingTab = true;
1745
+ ownsTarget = false;
1746
+ logger(`Attached to existing remote ChatGPT tab ${attached.targetId}${attached.tab.url ? ` (${attached.tab.url})` : ""}`);
1747
+ }
1748
+ else {
1749
+ connection = await connectToRemoteChrome(host, port, logger, config.url, browserWSEndpoint, {
1750
+ approvalWaitMs: config.attachRunning && browserWSEndpoint ? 20_000 : undefined,
1751
+ });
1752
+ client = connection.client;
1753
+ remoteTargetId = connection.targetId ?? null;
1754
+ ownsTarget = true;
1755
+ }
1756
+ if (tabLease && remoteTargetId) {
1757
+ await tabLease.update({
1758
+ chromeHost: host,
1759
+ chromePort: port,
1760
+ chromeTargetId: remoteTargetId,
1761
+ });
1762
+ }
1096
1763
  await emitRuntimeHint();
1097
1764
  const markConnectionLost = () => {
1098
1765
  connectionClosedUnexpectedly = true;
@@ -1107,10 +1774,17 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1107
1774
  removeDialogHandler = installJavaScriptDialogAutoDismissal(Page, logger);
1108
1775
  // Skip cookie sync for remote Chrome - it already has cookies
1109
1776
  logger("Skipping cookie sync for remote Chrome (using existing session)");
1110
- await navigateToChatGPT(Page, Runtime, config.url, logger);
1111
- await ensureNotBlocked(Runtime, config.headless, logger);
1112
- await ensureLoggedIn(Runtime, logger, { remoteSession: true });
1113
- await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
1777
+ if (!attachedExistingTab) {
1778
+ await navigateToChatGPT(Page, Runtime, config.url, logger);
1779
+ await ensureNotBlocked(Runtime, config.headless, logger);
1780
+ await ensureLoggedIn(Runtime, logger, { remoteSession: true });
1781
+ await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
1782
+ }
1783
+ else {
1784
+ await ensureNotBlocked(Runtime, config.headless, logger);
1785
+ await ensureLoggedIn(Runtime, logger, { remoteSession: true });
1786
+ await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
1787
+ }
1114
1788
  logger(`Prompt textarea ready (initial focus, ${promptText.length.toLocaleString()} chars queued)`);
1115
1789
  try {
1116
1790
  const { result } = await Runtime.evaluate({
@@ -1142,9 +1816,10 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1142
1816
  else if (modelStrategy === "ignore") {
1143
1817
  logger("Model picker: skipped (strategy=ignore)");
1144
1818
  }
1145
- // Handle thinking time selection if specified
1819
+ const deepResearch = config.researchMode === "deep";
1820
+ // Handle thinking time selection if specified. Deep Research owns its own effort flow.
1146
1821
  const thinkingTime = config.thinkingTime;
1147
- if (thinkingTime) {
1822
+ if (thinkingTime && !deepResearch) {
1148
1823
  await withRetries(() => ensureThinkingTime(Runtime, thinkingTime, logger), {
1149
1824
  retries: 2,
1150
1825
  delayMs: 300,
@@ -1155,10 +1830,25 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1155
1830
  },
1156
1831
  });
1157
1832
  }
1833
+ if (deepResearch) {
1834
+ await withRetries(() => activateDeepResearch(Runtime, Input, logger), {
1835
+ retries: 2,
1836
+ delayMs: 500,
1837
+ onRetry: (attempt, error) => {
1838
+ if (options.verbose) {
1839
+ logger(`[retry] Deep Research activation attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`);
1840
+ }
1841
+ },
1842
+ });
1843
+ await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
1844
+ logger(`Prompt textarea ready (after Deep Research activation, ${promptText.length.toLocaleString()} chars queued)`);
1845
+ }
1158
1846
  const submitOnce = async (prompt, submissionAttachments) => {
1159
1847
  const baselineSnapshot = await readAssistantSnapshot(Runtime).catch(() => null);
1160
1848
  const baselineAssistantText = typeof baselineSnapshot?.text === "string" ? baselineSnapshot.text.trim() : "";
1161
1849
  const attachmentNames = submissionAttachments.map((a) => path.basename(a.path));
1850
+ await clearPromptComposer(Runtime, logger);
1851
+ await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
1162
1852
  if (submissionAttachments.length > 0) {
1163
1853
  if (!DOM) {
1164
1854
  throw new Error("Chrome DOM domain unavailable while uploading attachments.");
@@ -1221,7 +1911,54 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1221
1911
  });
1222
1912
  baselineTurns = submission.baselineTurns;
1223
1913
  baselineAssistantText = submission.baselineAssistantText;
1224
- stopThinkingMonitor = startThinkingStatusMonitor(Runtime, logger, options.verbose ?? false);
1914
+ const imageArtifactMinTurnIndex = baselineTurns;
1915
+ if (deepResearch) {
1916
+ await waitForResearchPlanAutoConfirm(Runtime, logger);
1917
+ const researchResult = await waitForDeepResearchCompletion(Runtime, logger, config.timeoutMs, baselineTurns, Page, client);
1918
+ await emitRuntimeHint();
1919
+ const durationMs = Date.now() - startedAt;
1920
+ const tokens = estimateTokenCount(researchResult.text);
1921
+ const reportArtifact = await saveOptionalArtifact(() => saveDeepResearchReportArtifact({
1922
+ sessionId: options.sessionId,
1923
+ reportMarkdown: researchResult.text,
1924
+ conversationUrl: lastUrl,
1925
+ logger,
1926
+ }), logger);
1927
+ const transcriptArtifact = await saveOptionalArtifact(() => saveBrowserTranscriptArtifact({
1928
+ sessionId: options.sessionId,
1929
+ prompt: promptText,
1930
+ answerMarkdown: researchResult.text,
1931
+ conversationUrl: lastUrl,
1932
+ artifacts: appendArtifacts(undefined, [reportArtifact]),
1933
+ logger,
1934
+ }), logger);
1935
+ const savedArtifacts = appendArtifacts(undefined, [reportArtifact, transcriptArtifact]);
1936
+ const archive = await maybeArchiveCompletedConversation({
1937
+ Runtime,
1938
+ logger,
1939
+ config,
1940
+ conversationUrl: lastUrl,
1941
+ followUpCount: 0,
1942
+ requiredArtifactsSaved: Boolean(reportArtifact && transcriptArtifact),
1943
+ });
1944
+ runStatus = "complete";
1945
+ return {
1946
+ answerText: researchResult.text,
1947
+ answerMarkdown: researchResult.text,
1948
+ answerHtml: researchResult.html,
1949
+ artifacts: savedArtifacts,
1950
+ archive,
1951
+ tookMs: durationMs,
1952
+ answerTokens: tokens,
1953
+ answerChars: researchResult.text.length,
1954
+ chromePort: port,
1955
+ chromeHost: host,
1956
+ chromeTargetId: remoteTargetId ?? undefined,
1957
+ tabUrl: lastUrl,
1958
+ conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
1959
+ controllerPid: process.pid,
1960
+ };
1961
+ }
1225
1962
  // Helper to normalize text for echo detection (collapse whitespace, lowercase)
1226
1963
  const normalizeForComparison = (text) => text.toLowerCase().replace(/\s+/g, " ").trim();
1227
1964
  const expectedConversationId = () => lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined;
@@ -1252,7 +1989,19 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1252
1989
  }
1253
1990
  return null;
1254
1991
  };
1255
- let answer;
1992
+ const waitWithThinkingMonitor = async (operation) => {
1993
+ stopThinkingMonitor?.();
1994
+ stopThinkingMonitor = startThinkingStatusMonitor(Runtime, logger, {
1995
+ intervalMs: options.heartbeatIntervalMs,
1996
+ });
1997
+ try {
1998
+ return await operation();
1999
+ }
2000
+ finally {
2001
+ stopThinkingMonitor?.();
2002
+ stopThinkingMonitor = null;
2003
+ }
2004
+ };
1256
2005
  const recheckDelayMs = Math.max(0, config.assistantRecheckDelayMs ?? 0);
1257
2006
  const recheckTimeoutMs = Math.max(0, config.assistantRecheckTimeoutMs ?? 0);
1258
2007
  const attemptAssistantRecheck = async () => {
@@ -1285,6 +2034,8 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1285
2034
  runtime: {
1286
2035
  chromeHost: host,
1287
2036
  chromePort: port,
2037
+ chromeBrowserWSEndpoint: browserWSEndpoint,
2038
+ chromeProfileRoot,
1288
2039
  chromeTargetId: remoteTargetId ?? undefined,
1289
2040
  tabUrl: lastUrl,
1290
2041
  conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
@@ -1294,147 +2045,253 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1294
2045
  }
1295
2046
  await emitRuntimeHint();
1296
2047
  const timeoutMs = recheckTimeoutMs > 0 ? recheckTimeoutMs : config.timeoutMs;
1297
- const rechecked = await waitForAssistantResponseWithReload(Runtime, Page, timeoutMs, logger, baselineTurns ?? undefined, expectedConversationId());
2048
+ const rechecked = await waitWithThinkingMonitor(() => waitForAssistantOrGeneratedImageResponse({
2049
+ Runtime,
2050
+ waitForText: () => waitForAssistantResponseWithReload(Runtime, Page, timeoutMs, logger, baselineTurns ?? undefined, expectedConversationId()),
2051
+ timeoutMs,
2052
+ logger,
2053
+ minTurnIndex: baselineTurns ?? undefined,
2054
+ expectedConversationId: expectedConversationId(),
2055
+ imageOutputRequested,
2056
+ }));
1298
2057
  logger("Recovered assistant response after delayed recheck");
1299
2058
  return rechecked;
1300
2059
  };
1301
- try {
1302
- const conversationUrl = await readConversationUrl(Runtime).catch(() => null);
1303
- if (conversationUrl && isConversationUrl(conversationUrl)) {
1304
- lastUrl = conversationUrl;
1305
- await emitRuntimeHint();
1306
- }
1307
- answer = await waitForAssistantResponseWithReload(Runtime, Page, config.timeoutMs, logger, baselineTurns ?? undefined, expectedConversationId());
1308
- }
1309
- catch (error) {
1310
- if (isAssistantResponseTimeoutError(error)) {
1311
- const rechecked = await attemptAssistantRecheck().catch(() => null);
1312
- if (rechecked) {
1313
- answer = rechecked;
2060
+ const imageOutputRequested = Boolean(options.generateImagePath ||
2061
+ options.outputPath ||
2062
+ options.generateImage);
2063
+ const captureAssistantTurn = async (turnPrompt, label) => {
2064
+ let turnAnswer;
2065
+ try {
2066
+ const conversationUrl = await readConversationUrl(Runtime).catch(() => null);
2067
+ if (conversationUrl && isConversationUrl(conversationUrl)) {
2068
+ lastUrl = conversationUrl;
2069
+ await emitRuntimeHint();
1314
2070
  }
1315
- else {
1316
- try {
1317
- const conversationUrl = await readConversationUrl(Runtime);
1318
- if (conversationUrl) {
1319
- lastUrl = conversationUrl;
1320
- }
2071
+ turnAnswer = await waitWithThinkingMonitor(() => waitForAssistantOrGeneratedImageResponse({
2072
+ Runtime,
2073
+ waitForText: () => waitForAssistantResponseWithReload(Runtime, Page, config.timeoutMs, logger, baselineTurns ?? undefined, expectedConversationId()),
2074
+ timeoutMs: config.timeoutMs,
2075
+ logger,
2076
+ minTurnIndex: baselineTurns ?? undefined,
2077
+ expectedConversationId: expectedConversationId(),
2078
+ imageOutputRequested,
2079
+ }));
2080
+ }
2081
+ catch (error) {
2082
+ if (isAssistantResponseTimeoutError(error)) {
2083
+ const rechecked = await attemptAssistantRecheckOrRethrow(attemptAssistantRecheck);
2084
+ if (rechecked) {
2085
+ turnAnswer = rechecked;
1321
2086
  }
1322
- catch {
1323
- // ignore
2087
+ else {
2088
+ try {
2089
+ const conversationUrl = await readConversationUrl(Runtime);
2090
+ if (conversationUrl) {
2091
+ lastUrl = conversationUrl;
2092
+ }
2093
+ }
2094
+ catch {
2095
+ // ignore
2096
+ }
2097
+ await emitRuntimeHint();
2098
+ const diagnostics = await captureBrowserDiagnostics(Runtime, logger, "assistant-timeout", {
2099
+ Page,
2100
+ sessionId: options.sessionId,
2101
+ }).catch(() => undefined);
2102
+ const runtime = {
2103
+ chromePort: port,
2104
+ chromeHost: host,
2105
+ chromeBrowserWSEndpoint: browserWSEndpoint,
2106
+ chromeProfileRoot,
2107
+ chromeTargetId: remoteTargetId ?? undefined,
2108
+ tabUrl: lastUrl,
2109
+ conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
2110
+ controllerPid: process.pid,
2111
+ };
2112
+ throw new BrowserAutomationError("Assistant response timed out before completion; reattach later to capture the answer.", { stage: "assistant-timeout", runtime, diagnostics }, error);
1324
2113
  }
1325
- await emitRuntimeHint();
1326
- const runtime = {
1327
- chromePort: port,
1328
- chromeHost: host,
1329
- chromeTargetId: remoteTargetId ?? undefined,
1330
- tabUrl: lastUrl,
1331
- conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
1332
- controllerPid: process.pid,
1333
- };
1334
- throw new BrowserAutomationError("Assistant response timed out before completion; reattach later to capture the answer.", { stage: "assistant-timeout", runtime }, error);
2114
+ }
2115
+ else {
2116
+ throw error;
1335
2117
  }
1336
2118
  }
1337
- else {
1338
- throw error;
1339
- }
1340
- }
1341
- const baselineNormalized = baselineAssistantText
1342
- ? normalizeForComparison(baselineAssistantText)
1343
- : "";
1344
- if (baselineNormalized) {
1345
- const normalizedAnswer = normalizeForComparison(answer.text ?? "");
1346
- const baselinePrefix = baselineNormalized.length >= 80
1347
- ? baselineNormalized.slice(0, Math.min(200, baselineNormalized.length))
2119
+ const baselineNormalized = baselineAssistantText
2120
+ ? normalizeForComparison(baselineAssistantText)
1348
2121
  : "";
1349
- const isBaseline = normalizedAnswer === baselineNormalized ||
1350
- (baselinePrefix.length > 0 && normalizedAnswer.startsWith(baselinePrefix));
1351
- if (isBaseline) {
1352
- logger("Detected stale assistant response; waiting for new response...");
1353
- const refreshed = await waitForFreshAssistantResponse(baselineNormalized, 15_000);
1354
- if (refreshed) {
1355
- answer = refreshed;
2122
+ if (baselineNormalized) {
2123
+ const normalizedAnswer = normalizeForComparison(turnAnswer.text ?? "");
2124
+ const baselinePrefix = baselineNormalized.length >= 80
2125
+ ? baselineNormalized.slice(0, Math.min(200, baselineNormalized.length))
2126
+ : "";
2127
+ const isBaseline = normalizedAnswer === baselineNormalized ||
2128
+ (baselinePrefix.length > 0 && normalizedAnswer.startsWith(baselinePrefix));
2129
+ if (isBaseline) {
2130
+ logger("Detected stale assistant response; waiting for new response...");
2131
+ const refreshed = await waitForFreshAssistantResponse(baselineNormalized, 15_000);
2132
+ if (refreshed) {
2133
+ turnAnswer = refreshed;
2134
+ }
1356
2135
  }
1357
2136
  }
1358
- }
1359
- answerText = answer.text;
1360
- answerHtml = answer.html ?? "";
1361
- const copiedMarkdown = await withRetries(async () => {
1362
- const attempt = await captureAssistantMarkdown(Runtime, answer.meta, logger);
1363
- if (!attempt) {
1364
- throw new Error("copy-missing");
1365
- }
1366
- return attempt;
1367
- }, {
1368
- retries: 2,
1369
- delayMs: 350,
1370
- onRetry: (attempt, error) => {
1371
- if (options.verbose) {
1372
- logger(`[retry] Markdown capture attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`);
2137
+ let turnAnswerText = turnAnswer.text;
2138
+ const turnAnswerHtml = turnAnswer.html ?? "";
2139
+ const copiedMarkdown = await withRetries(async () => {
2140
+ const attempt = await captureAssistantMarkdown(Runtime, turnAnswer.meta, logger);
2141
+ if (!attempt) {
2142
+ throw new Error("copy-missing");
1373
2143
  }
1374
- },
1375
- }).catch(() => null);
1376
- answerMarkdown = copiedMarkdown ?? answerText;
1377
- ({ answerText, answerMarkdown } = await maybeRecoverLongAssistantResponse({
1378
- runtime: Runtime,
1379
- baselineTurns,
1380
- answerText,
1381
- answerMarkdown,
1382
- logger,
1383
- allowMarkdownUpdate: !copiedMarkdown,
1384
- }));
1385
- // Final sanity check: ensure we didn't accidentally capture the user prompt instead of the assistant turn.
1386
- const finalSnapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
1387
- const finalText = typeof finalSnapshot?.text === "string" ? finalSnapshot.text.trim() : "";
1388
- if (finalText &&
1389
- finalText !== answerMarkdown.trim() &&
1390
- finalText !== promptText.trim() &&
1391
- finalText.length >= answerMarkdown.trim().length) {
1392
- logger("Refreshed assistant response via final DOM snapshot");
1393
- answerText = finalText;
1394
- answerMarkdown = finalText;
1395
- }
1396
- // Detect prompt echo using normalized comparison (whitespace-insensitive).
1397
- const promptEchoMatcher = buildPromptEchoMatcher(promptText);
1398
- const alignedEcho = alignPromptEchoPair(answerText, answerMarkdown, promptEchoMatcher, copiedMarkdown ? logger : undefined, {
1399
- text: "Aligned assistant response text to copied markdown after prompt echo",
1400
- markdown: "Aligned assistant markdown to response text after prompt echo",
1401
- });
1402
- answerText = alignedEcho.answerText;
1403
- answerMarkdown = alignedEcho.answerMarkdown;
1404
- const isPromptEcho = alignedEcho.isEcho;
1405
- if (isPromptEcho) {
1406
- logger("Detected prompt echo in response; waiting for actual assistant response...");
1407
- const deadline = Date.now() + 15_000;
1408
- let bestText = null;
1409
- let stableCount = 0;
1410
- while (Date.now() < deadline) {
1411
- const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
1412
- const text = typeof snapshot?.text === "string" ? snapshot.text.trim() : "";
1413
- const isStillEcho = !text || Boolean(promptEchoMatcher?.isEcho(text));
1414
- if (!isStillEcho) {
1415
- if (!bestText || text.length > bestText.length) {
1416
- bestText = text;
1417
- stableCount = 0;
1418
- }
1419
- else if (text === bestText) {
1420
- stableCount += 1;
2144
+ return attempt;
2145
+ }, {
2146
+ retries: 2,
2147
+ delayMs: 350,
2148
+ onRetry: (attempt, error) => {
2149
+ if (options.verbose) {
2150
+ logger(`[retry] Markdown capture attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`);
1421
2151
  }
1422
- if (stableCount >= 2) {
1423
- break;
2152
+ },
2153
+ }).catch(() => null);
2154
+ let turnAnswerMarkdown = copiedMarkdown ?? turnAnswerText;
2155
+ ({ answerText: turnAnswerText, answerMarkdown: turnAnswerMarkdown } =
2156
+ await maybeRecoverLongAssistantResponse({
2157
+ runtime: Runtime,
2158
+ baselineTurns,
2159
+ answerText: turnAnswerText,
2160
+ answerMarkdown: turnAnswerMarkdown,
2161
+ logger,
2162
+ allowMarkdownUpdate: !copiedMarkdown,
2163
+ }));
2164
+ // Final sanity check: ensure we didn't accidentally capture the user prompt instead of the assistant turn.
2165
+ const finalSnapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
2166
+ const finalText = typeof finalSnapshot?.text === "string" ? finalSnapshot.text.trim() : "";
2167
+ if (finalText &&
2168
+ finalText !== turnAnswerMarkdown.trim() &&
2169
+ finalText !== turnPrompt.trim() &&
2170
+ finalText.length >= turnAnswerMarkdown.trim().length) {
2171
+ logger("Refreshed assistant response via final DOM snapshot");
2172
+ turnAnswerText = finalText;
2173
+ turnAnswerMarkdown = finalText;
2174
+ }
2175
+ // Detect prompt echo using normalized comparison (whitespace-insensitive).
2176
+ const promptEchoMatcher = buildPromptEchoMatcher(turnPrompt);
2177
+ const alignedEcho = alignPromptEchoPair(turnAnswerText, turnAnswerMarkdown, promptEchoMatcher, copiedMarkdown ? logger : undefined, {
2178
+ text: "Aligned assistant response text to copied markdown after prompt echo",
2179
+ markdown: "Aligned assistant markdown to response text after prompt echo",
2180
+ });
2181
+ turnAnswerText = alignedEcho.answerText;
2182
+ turnAnswerMarkdown = alignedEcho.answerMarkdown;
2183
+ const isPromptEcho = alignedEcho.isEcho;
2184
+ if (isPromptEcho) {
2185
+ logger("Detected prompt echo in response; waiting for actual assistant response...");
2186
+ const deadline = Date.now() + 15_000;
2187
+ let bestText = null;
2188
+ let stableCount = 0;
2189
+ while (Date.now() < deadline) {
2190
+ const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined, expectedConversationId()).catch(() => null);
2191
+ const text = typeof snapshot?.text === "string" ? snapshot.text.trim() : "";
2192
+ const isStillEcho = !text || Boolean(promptEchoMatcher?.isEcho(text));
2193
+ if (!isStillEcho) {
2194
+ if (!bestText || text.length > bestText.length) {
2195
+ bestText = text;
2196
+ stableCount = 0;
2197
+ }
2198
+ else if (text === bestText) {
2199
+ stableCount += 1;
2200
+ }
2201
+ if (stableCount >= 2) {
2202
+ break;
2203
+ }
1424
2204
  }
2205
+ await new Promise((resolve) => setTimeout(resolve, 300));
2206
+ }
2207
+ if (bestText) {
2208
+ logger("Recovered assistant response after detecting prompt echo");
2209
+ turnAnswerText = bestText;
2210
+ turnAnswerMarkdown = bestText;
1425
2211
  }
1426
- await new Promise((resolve) => setTimeout(resolve, 300));
1427
- }
1428
- if (bestText) {
1429
- logger("Recovered assistant response after detecting prompt echo");
1430
- answerText = bestText;
1431
- answerMarkdown = bestText;
1432
2212
  }
2213
+ return {
2214
+ label,
2215
+ answerText: turnAnswerText,
2216
+ answerMarkdown: turnAnswerMarkdown,
2217
+ answerHtml: turnAnswerHtml,
2218
+ };
2219
+ };
2220
+ const followUpPrompts = normalizeBrowserFollowUpPrompts(options.followUpPrompts);
2221
+ const turns = [];
2222
+ const initialTurn = await captureAssistantTurn(promptText, "Initial response");
2223
+ turns.push(initialTurn);
2224
+ answerText = initialTurn.answerText;
2225
+ answerMarkdown = initialTurn.answerMarkdown;
2226
+ answerHtml = initialTurn.answerHtml;
2227
+ for (let index = 0; index < followUpPrompts.length; index += 1) {
2228
+ const followUpPrompt = followUpPrompts[index];
2229
+ logger(`[browser] Sending follow-up ${index + 1}/${followUpPrompts.length}`);
2230
+ await clearPromptComposer(Runtime, logger);
2231
+ await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
2232
+ const submission = await runSubmissionWithRecovery({
2233
+ prompt: followUpPrompt,
2234
+ attachments: [],
2235
+ submit: submitOnce,
2236
+ reloadPromptComposer,
2237
+ prepareFallbackSubmission: async () => {
2238
+ await clearPromptComposer(Runtime, logger);
2239
+ await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
2240
+ },
2241
+ logger,
2242
+ });
2243
+ baselineTurns = submission.baselineTurns;
2244
+ baselineAssistantText = submission.baselineAssistantText;
2245
+ const turn = await captureAssistantTurn(followUpPrompt, `Follow-up ${index + 1}`);
2246
+ turns.push({ ...turn, prompt: followUpPrompt });
2247
+ answerText = turn.answerText;
2248
+ answerMarkdown = turn.answerMarkdown;
2249
+ answerHtml = turn.answerHtml;
2250
+ }
2251
+ if (turns.length > 1) {
2252
+ const formatted = formatBrowserTurnTranscript(turns);
2253
+ answerText = formatted.answerText;
2254
+ answerMarkdown = formatted.answerMarkdown;
2255
+ answerHtml = "";
2256
+ }
2257
+ const imageArtifacts = await collectGeneratedImageArtifacts({
2258
+ Runtime,
2259
+ Network,
2260
+ logger,
2261
+ minTurnIndex: imageArtifactMinTurnIndex,
2262
+ sessionId: options.sessionId,
2263
+ generateImagePath: options.generateImagePath,
2264
+ outputPath: options.outputPath,
2265
+ answerText,
2266
+ waitTimeoutMs: options.config?.timeoutMs,
2267
+ });
2268
+ answerText = imageArtifacts.answerText || answerText;
2269
+ if (imageArtifacts.markdownSuffix) {
2270
+ answerMarkdown += imageArtifacts.markdownSuffix;
1433
2271
  }
1434
- stopThinkingMonitor?.();
2272
+ const savedImageArtifacts = appendArtifacts(undefined, imageArtifacts.savedImages);
2273
+ const transcriptArtifact = await saveOptionalArtifact(() => saveBrowserTranscriptArtifact({
2274
+ sessionId: options.sessionId,
2275
+ prompt: promptText,
2276
+ answerMarkdown,
2277
+ conversationUrl: lastUrl,
2278
+ artifacts: savedImageArtifacts,
2279
+ logger,
2280
+ }), logger);
2281
+ const savedArtifacts = appendArtifacts(savedImageArtifacts, [transcriptArtifact]);
2282
+ const archive = await maybeArchiveCompletedConversation({
2283
+ Runtime,
2284
+ logger,
2285
+ config,
2286
+ conversationUrl: lastUrl,
2287
+ followUpCount: followUpPrompts.length,
2288
+ requiredArtifactsSaved: Boolean(transcriptArtifact) &&
2289
+ imageArtifacts.savedImages.length === imageArtifacts.imageCount,
2290
+ });
1435
2291
  const durationMs = Date.now() - startedAt;
1436
2292
  const answerChars = answerText.length;
1437
2293
  const answerTokens = estimateTokenCount(answerMarkdown);
2294
+ runStatus = "complete";
1438
2295
  return {
1439
2296
  answerText,
1440
2297
  answerMarkdown,
@@ -1442,18 +2299,23 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1442
2299
  tookMs: durationMs,
1443
2300
  answerTokens,
1444
2301
  answerChars,
2302
+ browserTransport: "cdp",
1445
2303
  chromePid: undefined,
1446
2304
  chromePort: port,
1447
2305
  chromeHost: host,
2306
+ chromeBrowserWSEndpoint: browserWSEndpoint,
2307
+ chromeProfileRoot,
1448
2308
  userDataDir: undefined,
1449
2309
  chromeTargetId: remoteTargetId ?? undefined,
1450
2310
  tabUrl: lastUrl,
2311
+ conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined,
2312
+ artifacts: savedArtifacts,
2313
+ archive,
1451
2314
  controllerPid: process.pid,
1452
2315
  };
1453
2316
  }
1454
2317
  catch (error) {
1455
2318
  const normalizedError = error instanceof Error ? error : new Error(String(error));
1456
- stopThinkingMonitor?.();
1457
2319
  const socketClosed = connectionClosedUnexpectedly || isWebSocketClosureError(normalizedError);
1458
2320
  connectionClosedUnexpectedly = connectionClosedUnexpectedly || socketClosed;
1459
2321
  if (!socketClosed) {
@@ -1468,6 +2330,8 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1468
2330
  runtime: {
1469
2331
  chromeHost: host,
1470
2332
  chromePort: port,
2333
+ chromeBrowserWSEndpoint: browserWSEndpoint,
2334
+ chromeProfileRoot,
1471
2335
  chromeTargetId: remoteTargetId ?? undefined,
1472
2336
  tabUrl: lastUrl,
1473
2337
  controllerPid: process.pid,
@@ -1476,15 +2340,29 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1476
2340
  }
1477
2341
  finally {
1478
2342
  try {
1479
- if (!connectionClosedUnexpectedly && client) {
1480
- await client.close();
1481
- }
2343
+ await closeRemoteConnectionAfterRun({
2344
+ connectionClosedUnexpectedly,
2345
+ connection,
2346
+ client,
2347
+ runStatus,
2348
+ });
1482
2349
  }
1483
2350
  catch {
1484
2351
  // ignore
1485
2352
  }
1486
2353
  removeDialogHandler?.();
1487
- await closeRemoteChromeTarget(host, port, remoteTargetId ?? undefined, logger);
2354
+ if (tabLease) {
2355
+ const handle = tabLease;
2356
+ tabLease = null;
2357
+ await handle.release().catch(() => undefined);
2358
+ }
2359
+ if (shouldCloseOwnedRunTargetAfterRun({
2360
+ runStatus,
2361
+ ownsTarget,
2362
+ keepBrowser: Boolean(config.keepBrowser),
2363
+ })) {
2364
+ await closeRemoteChromeTarget(host, port, remoteTargetId ?? undefined, logger);
2365
+ }
1488
2366
  // Don't kill remote Chrome - it's not ours to manage
1489
2367
  const totalSeconds = (Date.now() - startedAt) / 1000;
1490
2368
  logger(`Remote session complete • ${totalSeconds.toFixed(1)}s total`);
@@ -1492,11 +2370,22 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1492
2370
  }
1493
2371
  export { estimateTokenCount } from "./utils.js";
1494
2372
  export { resolveBrowserConfig, DEFAULT_BROWSER_CONFIG } from "./config.js";
2373
+ // biome-ignore lint/style/useNamingConvention: test-only export used in vitest suite
2374
+ export const __test__ = {
2375
+ closeRemoteConnectionAfterRun,
2376
+ detachKeptChromeProcess,
2377
+ isImageOnlyUiChromeText,
2378
+ listIgnoredRemoteChromeFlags,
2379
+ shouldCloseOwnedRunTargetAfterRun,
2380
+ };
1495
2381
  export { syncCookies } from "./cookies.js";
1496
2382
  export { navigateToChatGPT, ensureNotBlocked, ensurePromptReady, ensureModelSelection, submitPrompt, waitForAssistantResponse, captureAssistantMarkdown, uploadAttachmentFile, waitForAttachmentCompletion, } from "./pageActions.js";
1497
2383
  export async function maybeReuseRunningChromeForTest(userDataDir, logger, options = {}) {
1498
2384
  return maybeReuseRunningChrome(userDataDir, logger, options);
1499
2385
  }
2386
+ export async function acquireManualLoginChromeForRunForTest(userDataDir, config, logger, sessionId, deps) {
2387
+ return acquireManualLoginChromeForRun(userDataDir, config, logger, sessionId, deps);
2388
+ }
1500
2389
  export function isWebSocketClosureError(error) {
1501
2390
  const message = error.message.toLowerCase();
1502
2391
  return (message.includes("websocket connection closed") ||
@@ -1505,16 +2394,6 @@ export function isWebSocketClosureError(error) {
1505
2394
  message.includes("inspected target navigated or closed") ||
1506
2395
  message.includes("target closed"));
1507
2396
  }
1508
- export function formatThinkingLog(startedAt, now, message, locatorSuffix) {
1509
- const elapsedMs = now - startedAt;
1510
- const elapsedText = formatElapsed(elapsedMs);
1511
- const progress = Math.min(1, elapsedMs / 600_000); // soft target: 10 minutes
1512
- const pct = Math.round(progress * 100)
1513
- .toString()
1514
- .padStart(3, " ");
1515
- const statusLabel = message ? ` — ${message}` : "";
1516
- return `${pct}% [${elapsedText} / ~10m]${statusLabel}${locatorSuffix}`;
1517
- }
1518
2397
  async function waitForAssistantResponseWithReload(Runtime, Page, timeoutMs, logger, minTurnIndex, expectedConversationId) {
1519
2398
  try {
1520
2399
  return await waitForAssistantResponse(Runtime, timeoutMs, logger, minTurnIndex, expectedConversationId);
@@ -1697,74 +2576,6 @@ async function readConversationTurnCount(Runtime, logger) {
1697
2576
  function isConversationUrl(url) {
1698
2577
  return /\/c\/[a-z0-9-]+/i.test(url);
1699
2578
  }
1700
- function startThinkingStatusMonitor(Runtime, logger, includeDiagnostics = false) {
1701
- let stopped = false;
1702
- let pending = false;
1703
- let lastMessage = null;
1704
- const startedAt = Date.now();
1705
- const interval = setInterval(async () => {
1706
- // stop flag flips asynchronously
1707
- if (stopped || pending) {
1708
- return;
1709
- }
1710
- pending = true;
1711
- try {
1712
- const nextMessage = await readThinkingStatus(Runtime);
1713
- if (nextMessage && nextMessage !== lastMessage) {
1714
- lastMessage = nextMessage;
1715
- let locatorSuffix = "";
1716
- if (includeDiagnostics) {
1717
- try {
1718
- const snapshot = await readAssistantSnapshot(Runtime);
1719
- locatorSuffix = ` | assistant-turn=${snapshot ? "present" : "missing"}`;
1720
- }
1721
- catch {
1722
- locatorSuffix = " | assistant-turn=error";
1723
- }
1724
- }
1725
- logger(formatThinkingLog(startedAt, Date.now(), nextMessage, locatorSuffix));
1726
- }
1727
- }
1728
- catch {
1729
- // ignore DOM polling errors
1730
- }
1731
- finally {
1732
- pending = false;
1733
- }
1734
- }, 1500);
1735
- interval.unref?.();
1736
- return () => {
1737
- // multiple callers may race to stop
1738
- if (stopped) {
1739
- return;
1740
- }
1741
- stopped = true;
1742
- clearInterval(interval);
1743
- };
1744
- }
1745
- async function readThinkingStatus(Runtime) {
1746
- const expression = buildThinkingStatusExpression();
1747
- try {
1748
- const { result } = await Runtime.evaluate({ expression, returnByValue: true });
1749
- const value = typeof result.value === "string" ? result.value.trim() : "";
1750
- const sanitized = sanitizeThinkingText(value);
1751
- return sanitized || null;
1752
- }
1753
- catch {
1754
- return null;
1755
- }
1756
- }
1757
- function sanitizeThinkingText(raw) {
1758
- if (!raw) {
1759
- return "";
1760
- }
1761
- const trimmed = raw.trim();
1762
- const prefixPattern = /^(pro thinking)\s*[•:\-–—]*\s*/i;
1763
- if (prefixPattern.test(trimmed)) {
1764
- return trimmed.replace(prefixPattern, "").trim();
1765
- }
1766
- return trimmed;
1767
- }
1768
2579
  function describeDevtoolsFirewallHint(host, port) {
1769
2580
  if (!isWsl())
1770
2581
  return null;
@@ -1832,59 +2643,3 @@ function shouldPreferSystemTmpDir(platform, tmpDir, homeDir) {
1832
2643
  export function shouldPreferSystemTmpDirForTest(platform, tmpDir, homeDir) {
1833
2644
  return shouldPreferSystemTmpDir(platform, tmpDir, homeDir);
1834
2645
  }
1835
- function buildThinkingStatusExpression() {
1836
- const selectors = [
1837
- "span.loading-shimmer",
1838
- "span.flex.items-center.gap-1.truncate.text-start.align-middle.text-token-text-tertiary",
1839
- '[data-testid*="thinking"]',
1840
- '[data-testid*="reasoning"]',
1841
- '[role="status"]',
1842
- '[aria-live="polite"]',
1843
- ];
1844
- const keywords = [
1845
- "pro thinking",
1846
- "thinking",
1847
- "reasoning",
1848
- "clarifying",
1849
- "planning",
1850
- "drafting",
1851
- "summarizing",
1852
- ];
1853
- const selectorLiteral = JSON.stringify(selectors);
1854
- const keywordsLiteral = JSON.stringify(keywords);
1855
- return `(() => {
1856
- const selectors = ${selectorLiteral};
1857
- const keywords = ${keywordsLiteral};
1858
- const nodes = new Set();
1859
- for (const selector of selectors) {
1860
- document.querySelectorAll(selector).forEach((node) => nodes.add(node));
1861
- }
1862
- document.querySelectorAll('[data-testid]').forEach((node) => nodes.add(node));
1863
- for (const node of nodes) {
1864
- if (!(node instanceof HTMLElement)) {
1865
- continue;
1866
- }
1867
- const text = node.textContent?.trim();
1868
- if (!text) {
1869
- continue;
1870
- }
1871
- const classLabel = (node.className || '').toLowerCase();
1872
- const dataLabel = ((node.getAttribute('data-testid') || '') + ' ' + (node.getAttribute('aria-label') || ''))
1873
- .toLowerCase();
1874
- const normalizedText = text.toLowerCase();
1875
- const matches = keywords.some((keyword) =>
1876
- normalizedText.includes(keyword) || classLabel.includes(keyword) || dataLabel.includes(keyword)
1877
- );
1878
- if (matches) {
1879
- const shimmerChild = node.querySelector(
1880
- 'span.flex.items-center.gap-1.truncate.text-start.align-middle.text-token-text-tertiary',
1881
- );
1882
- if (shimmerChild?.textContent?.trim()) {
1883
- return shimmerChild.textContent.trim();
1884
- }
1885
- return text.trim();
1886
- }
1887
- }
1888
- return null;
1889
- })()`;
1890
- }