@steipete/oracle 0.7.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,15 +5,16 @@ import net from 'node:net';
5
5
  import { resolveBrowserConfig } from './config.js';
6
6
  import { launchChrome, registerTerminationHooks, hideChromeWindow, connectToChrome, connectToRemoteChrome, closeRemoteChromeTarget, } from './chromeLifecycle.js';
7
7
  import { syncCookies } from './cookies.js';
8
- import { navigateToChatGPT, ensureNotBlocked, ensureLoggedIn, ensurePromptReady, ensureModelSelection, submitPrompt, clearPromptComposer, waitForAssistantResponse, captureAssistantMarkdown, uploadAttachmentFile, waitForAttachmentCompletion, waitForUserTurnAttachments, readAssistantSnapshot, } from './pageActions.js';
8
+ import { navigateToChatGPT, ensureNotBlocked, ensureLoggedIn, ensurePromptReady, ensureModelSelection, submitPrompt, clearPromptComposer, waitForAssistantResponse, captureAssistantMarkdown, clearComposerAttachments, uploadAttachmentFile, waitForAttachmentCompletion, waitForUserTurnAttachments, readAssistantSnapshot, } from './pageActions.js';
9
9
  import { uploadAttachmentViaDataTransfer } from './actions/remoteFileTransfer.js';
10
10
  import { ensureThinkingTime } from './actions/thinkingTime.js';
11
11
  import { estimateTokenCount, withRetries, delay } from './utils.js';
12
12
  import { formatElapsed } from '../oracle/format.js';
13
- import { CHATGPT_URL } from './constants.js';
13
+ import { CHATGPT_URL, CONVERSATION_TURN_SELECTOR, DEFAULT_MODEL_STRATEGY } from './constants.js';
14
14
  import { BrowserAutomationError } from '../oracle/errors.js';
15
+ import { alignPromptEchoPair, buildPromptEchoMatcher } from './reattachHelpers.js';
15
16
  import { cleanupStaleProfileState, readChromePid, readDevToolsPort, verifyDevToolsReachable, writeChromePid, writeDevToolsActivePort, } from './profileState.js';
16
- export { CHATGPT_URL, DEFAULT_MODEL_TARGET } from './constants.js';
17
+ export { CHATGPT_URL, DEFAULT_MODEL_STRATEGY, DEFAULT_MODEL_TARGET } from './constants.js';
17
18
  export { parseDuration, delay, normalizeChatgptUrl, isTemporaryChatUrl } from './utils.js';
18
19
  export async function runBrowserMode(options) {
19
20
  const promptText = options.prompt?.trim();
@@ -87,6 +88,7 @@ export async function runBrowserMode(options) {
87
88
  ? manualProfileDir
88
89
  : await mkdtemp(path.join(await resolveUserDataBaseDir(), 'oracle-browser-'));
89
90
  if (manualLogin) {
91
+ // Learned: manual login reuses a persistent profile so cookies/SSO survive.
90
92
  await mkdir(userDataDir, { recursive: true });
91
93
  logger(`Manual login mode enabled; reusing persistent profile at ${userDataDir}`);
92
94
  }
@@ -166,6 +168,7 @@ export async function runBrowserMode(options) {
166
168
  else {
167
169
  logger('Applying inline cookies (skipping Chrome profile read and Keychain prompt)');
168
170
  }
171
+ // Learned: always sync cookies before the first navigation so /backend-api/me succeeds.
169
172
  const cookieCount = await syncCookies(Network, config.url, config.chromeProfile, logger, {
170
173
  allowErrors: config.allowCookieErrors ?? false,
171
174
  filterNames: config.cookieNames ?? undefined,
@@ -190,13 +193,15 @@ export async function runBrowserMode(options) {
190
193
  : 'Skipping Chrome cookie sync (--browser-no-cookie-sync)');
191
194
  }
192
195
  if (cookieSyncEnabled && !manualLogin && (appliedCookies ?? 0) === 0 && !config.inlineCookies) {
196
+ // Learned: if the profile has no ChatGPT cookies, browser mode will just bounce to login.
197
+ // Fail early so the user knows to sign in.
193
198
  throw new BrowserAutomationError('No ChatGPT cookies were applied from your Chrome profile; cannot proceed in browser mode. ' +
194
- 'Make sure ChatGPT is signed in in the selected profile or rebuild the keytar native module if it failed to load.', {
199
+ 'Make sure ChatGPT is signed in in the selected profile, or use --browser-manual-login / inline cookies.', {
195
200
  stage: 'execute-browser',
196
201
  details: {
197
202
  profile: config.chromeProfile ?? 'Default',
198
203
  cookiePath: config.chromeCookiePath ?? null,
199
- hint: 'Rebuild keytar: PYTHON=/usr/bin/python3 /Users/steipete/Projects/oracle/runner npx node-gyp rebuild (run inside the keytar path from the error), then retry.',
204
+ hint: 'If macOS Keychain prompts or denies access, run oracle from a GUI session or use --copy/--render for the manual flow.',
200
205
  },
201
206
  });
202
207
  }
@@ -205,6 +210,7 @@ export async function runBrowserMode(options) {
205
210
  // then hop to the requested URL if it differs.
206
211
  await raceWithDisconnect(navigateToChatGPT(Page, Runtime, baseUrl, logger));
207
212
  await raceWithDisconnect(ensureNotBlocked(Runtime, config.headless, logger));
213
+ // Learned: login checks must happen on the base domain before jumping into project URLs.
208
214
  await raceWithDisconnect(waitForLogin({ runtime: Runtime, logger, appliedCookies, manualLogin, timeoutMs: config.timeoutMs }));
209
215
  if (config.url !== baseUrl) {
210
216
  await raceWithDisconnect(navigateToChatGPT(Page, Runtime, config.url, logger));
@@ -235,6 +241,9 @@ export async function runBrowserMode(options) {
235
241
  catch {
236
242
  // ignore
237
243
  }
244
+ if (lastUrl) {
245
+ logger(`[browser] url = ${lastUrl}`);
246
+ }
238
247
  if (chrome?.port) {
239
248
  const suffix = lastTargetId ? ` target=${lastTargetId}` : '';
240
249
  if (lastUrl) {
@@ -246,9 +255,45 @@ export async function runBrowserMode(options) {
246
255
  await emitRuntimeHint();
247
256
  }
248
257
  };
258
+ let conversationHintInFlight = null;
259
+ const updateConversationHint = async (label, timeoutMs = 10_000) => {
260
+ if (!chrome?.port) {
261
+ return false;
262
+ }
263
+ const start = Date.now();
264
+ while (Date.now() - start < timeoutMs) {
265
+ try {
266
+ const { result } = await Runtime.evaluate({ expression: 'location.href', returnByValue: true });
267
+ if (typeof result?.value === 'string' && result.value.includes('/c/')) {
268
+ lastUrl = result.value;
269
+ logger(`[browser] conversation url (${label}) = ${lastUrl}`);
270
+ await emitRuntimeHint();
271
+ return true;
272
+ }
273
+ }
274
+ catch {
275
+ // ignore; keep polling until timeout
276
+ }
277
+ await delay(250);
278
+ }
279
+ return false;
280
+ };
281
+ const scheduleConversationHint = (label, timeoutMs) => {
282
+ if (conversationHintInFlight) {
283
+ return;
284
+ }
285
+ // Learned: the /c/ URL can update after the answer; emit hints in the background.
286
+ // Run in the background so prompt submission/streaming isn't blocked by slow URL updates.
287
+ conversationHintInFlight = updateConversationHint(label, timeoutMs)
288
+ .catch(() => false)
289
+ .finally(() => {
290
+ conversationHintInFlight = null;
291
+ });
292
+ };
249
293
  await captureRuntimeSnapshot();
250
- if (config.desiredModel) {
251
- await raceWithDisconnect(withRetries(() => ensureModelSelection(Runtime, config.desiredModel, logger), {
294
+ const modelStrategy = config.modelStrategy ?? DEFAULT_MODEL_STRATEGY;
295
+ if (config.desiredModel && modelStrategy !== 'ignore') {
296
+ await raceWithDisconnect(withRetries(() => ensureModelSelection(Runtime, config.desiredModel, logger, modelStrategy), {
252
297
  retries: 2,
253
298
  delayMs: 300,
254
299
  onRetry: (attempt, error) => {
@@ -266,6 +311,9 @@ export async function runBrowserMode(options) {
266
311
  await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
267
312
  logger(`Prompt textarea ready (after model switch, ${promptText.length.toLocaleString()} chars queued)`);
268
313
  }
314
+ else if (modelStrategy === 'ignore') {
315
+ logger('Model picker: skipped (strategy=ignore)');
316
+ }
269
317
  // Handle thinking time selection if specified
270
318
  const thinkingTime = config.thinkingTime;
271
319
  if (thinkingTime) {
@@ -280,14 +328,22 @@ export async function runBrowserMode(options) {
280
328
  }));
281
329
  }
282
330
  const submitOnce = async (prompt, submissionAttachments) => {
331
+ const baselineSnapshot = await readAssistantSnapshot(Runtime).catch(() => null);
332
+ const baselineAssistantText = typeof baselineSnapshot?.text === 'string' ? baselineSnapshot.text.trim() : '';
283
333
  const attachmentNames = submissionAttachments.map((a) => path.basename(a.path));
334
+ let inputOnlyAttachments = false;
284
335
  if (submissionAttachments.length > 0) {
285
336
  if (!DOM) {
286
337
  throw new Error('Chrome DOM domain unavailable while uploading attachments.');
287
338
  }
288
- for (const attachment of submissionAttachments) {
339
+ await clearComposerAttachments(Runtime, 5_000, logger);
340
+ for (let attachmentIndex = 0; attachmentIndex < submissionAttachments.length; attachmentIndex += 1) {
341
+ const attachment = submissionAttachments[attachmentIndex];
289
342
  logger(`Uploading attachment: ${attachment.displayPath}`);
290
- await uploadAttachmentFile({ runtime: Runtime, dom: DOM }, attachment, logger);
343
+ const uiConfirmed = await uploadAttachmentFile({ runtime: Runtime, dom: DOM }, attachment, logger, { expectedCount: attachmentIndex + 1 });
344
+ if (!uiConfirmed) {
345
+ inputOnlyAttachments = true;
346
+ }
291
347
  await delay(500);
292
348
  }
293
349
  // Scale timeout based on number of files: base 30s + 15s per additional file
@@ -297,30 +353,103 @@ export async function runBrowserMode(options) {
297
353
  await waitForAttachmentCompletion(Runtime, waitBudget, attachmentNames, logger);
298
354
  logger('All attachments uploaded');
299
355
  }
300
- await submitPrompt({ runtime: Runtime, input: Input, attachmentNames }, prompt, logger);
356
+ let baselineTurns = await readConversationTurnCount(Runtime, logger);
357
+ // Learned: return baselineTurns so assistant polling can ignore earlier content.
358
+ const committedTurns = await submitPrompt({
359
+ runtime: Runtime,
360
+ input: Input,
361
+ attachmentNames,
362
+ baselineTurns: baselineTurns ?? undefined,
363
+ inputTimeoutMs: config.inputTimeoutMs ?? undefined,
364
+ }, prompt, logger);
365
+ if (typeof committedTurns === 'number' && Number.isFinite(committedTurns)) {
366
+ if (baselineTurns === null || committedTurns > baselineTurns) {
367
+ baselineTurns = Math.max(0, committedTurns - 1);
368
+ }
369
+ }
301
370
  if (attachmentNames.length > 0) {
302
- await waitForUserTurnAttachments(Runtime, attachmentNames, 20_000, logger);
303
- logger('Verified attachments present on sent user message');
371
+ if (inputOnlyAttachments) {
372
+ logger('Attachment UI did not render before send; skipping user-turn attachment verification.');
373
+ }
374
+ else {
375
+ const verified = await waitForUserTurnAttachments(Runtime, attachmentNames, 20_000, logger);
376
+ if (verified) {
377
+ logger('Verified attachments present on sent user message');
378
+ }
379
+ }
304
380
  }
381
+ // Reattach needs a /c/ URL; ChatGPT can update it late, so poll in the background.
382
+ scheduleConversationHint('post-submit', config.timeoutMs ?? 120_000);
383
+ return { baselineTurns, baselineAssistantText };
305
384
  };
385
+ let baselineTurns = null;
386
+ let baselineAssistantText = null;
306
387
  try {
307
- await raceWithDisconnect(submitOnce(promptText, attachments));
388
+ const submission = await raceWithDisconnect(submitOnce(promptText, attachments));
389
+ baselineTurns = submission.baselineTurns;
390
+ baselineAssistantText = submission.baselineAssistantText;
308
391
  }
309
392
  catch (error) {
310
393
  const isPromptTooLarge = error instanceof BrowserAutomationError &&
311
394
  error.details?.code === 'prompt-too-large';
312
395
  if (fallbackSubmission && isPromptTooLarge) {
396
+ // Learned: when prompts truncate, retry with file uploads so the UI receives the full content.
313
397
  logger('[browser] Inline prompt too large; retrying with file uploads.');
314
398
  await raceWithDisconnect(clearPromptComposer(Runtime, logger));
315
399
  await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
316
- await raceWithDisconnect(submitOnce(fallbackSubmission.prompt, fallbackSubmission.attachments));
400
+ const submission = await raceWithDisconnect(submitOnce(fallbackSubmission.prompt, fallbackSubmission.attachments));
401
+ baselineTurns = submission.baselineTurns;
402
+ baselineAssistantText = submission.baselineAssistantText;
317
403
  }
318
404
  else {
319
405
  throw error;
320
406
  }
321
407
  }
322
408
  stopThinkingMonitor = startThinkingStatusMonitor(Runtime, logger, options.verbose ?? false);
323
- const answer = await raceWithDisconnect(waitForAssistantResponseWithReload(Runtime, Page, config.timeoutMs, logger));
409
+ // Helper to normalize text for echo detection (collapse whitespace, lowercase)
410
+ const normalizeForComparison = (text) => text.toLowerCase().replace(/\s+/g, ' ').trim();
411
+ const waitForFreshAssistantResponse = async (baselineNormalized, timeoutMs) => {
412
+ const baselinePrefix = baselineNormalized.length >= 80
413
+ ? baselineNormalized.slice(0, Math.min(200, baselineNormalized.length))
414
+ : '';
415
+ const deadline = Date.now() + timeoutMs;
416
+ while (Date.now() < deadline) {
417
+ const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined).catch(() => null);
418
+ const text = typeof snapshot?.text === 'string' ? snapshot.text.trim() : '';
419
+ if (text) {
420
+ const normalized = normalizeForComparison(text);
421
+ const isBaseline = normalized === baselineNormalized || (baselinePrefix.length > 0 && normalized.startsWith(baselinePrefix));
422
+ if (!isBaseline) {
423
+ return {
424
+ text,
425
+ html: snapshot?.html ?? undefined,
426
+ meta: { turnId: snapshot?.turnId ?? undefined, messageId: snapshot?.messageId ?? undefined },
427
+ };
428
+ }
429
+ }
430
+ await delay(350);
431
+ }
432
+ return null;
433
+ };
434
+ let answer = await raceWithDisconnect(waitForAssistantResponseWithReload(Runtime, Page, config.timeoutMs, logger, baselineTurns ?? undefined));
435
+ // Ensure we store the final conversation URL even if the UI updated late.
436
+ await updateConversationHint('post-response', 15_000);
437
+ const baselineNormalized = baselineAssistantText ? normalizeForComparison(baselineAssistantText) : '';
438
+ if (baselineNormalized) {
439
+ const normalizedAnswer = normalizeForComparison(answer.text ?? '');
440
+ const baselinePrefix = baselineNormalized.length >= 80
441
+ ? baselineNormalized.slice(0, Math.min(200, baselineNormalized.length))
442
+ : '';
443
+ const isBaseline = normalizedAnswer === baselineNormalized ||
444
+ (baselinePrefix.length > 0 && normalizedAnswer.startsWith(baselinePrefix));
445
+ if (isBaseline) {
446
+ logger('Detected stale assistant response; waiting for new response...');
447
+ const refreshed = await waitForFreshAssistantResponse(baselineNormalized, 15_000);
448
+ if (refreshed) {
449
+ answer = refreshed;
450
+ }
451
+ }
452
+ }
324
453
  answerText = answer.text;
325
454
  answerHtml = answer.html ?? '';
326
455
  const copiedMarkdown = await raceWithDisconnect(withRetries(async () => {
@@ -339,39 +468,41 @@ export async function runBrowserMode(options) {
339
468
  },
340
469
  })).catch(() => null);
341
470
  answerMarkdown = copiedMarkdown ?? answerText;
342
- // Helper to normalize text for echo detection (collapse whitespace, lowercase)
343
- const normalizeForComparison = (text) => text.toLowerCase().replace(/\s+/g, ' ').trim();
471
+ const promptEchoMatcher = buildPromptEchoMatcher(promptText);
344
472
  // Final sanity check: ensure we didn't accidentally capture the user prompt instead of the assistant turn.
345
- const finalSnapshot = await readAssistantSnapshot(Runtime).catch(() => null);
473
+ const finalSnapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined).catch(() => null);
346
474
  const finalText = typeof finalSnapshot?.text === 'string' ? finalSnapshot.text.trim() : '';
347
- if (!copiedMarkdown &&
348
- finalText &&
349
- finalText !== answerMarkdown.trim() &&
350
- finalText !== promptText.trim() &&
351
- finalText.length >= answerMarkdown.trim().length) {
352
- logger('Refreshed assistant response via final DOM snapshot');
353
- answerText = finalText;
354
- answerMarkdown = finalText;
475
+ if (finalText && finalText !== promptText.trim()) {
476
+ const trimmedMarkdown = answerMarkdown.trim();
477
+ const finalIsEcho = promptEchoMatcher ? promptEchoMatcher.isEcho(finalText) : false;
478
+ const lengthDelta = finalText.length - trimmedMarkdown.length;
479
+ const missingCopy = !copiedMarkdown && lengthDelta >= 0;
480
+ const likelyTruncatedCopy = copiedMarkdown &&
481
+ trimmedMarkdown.length > 0 &&
482
+ lengthDelta >= Math.max(12, Math.floor(trimmedMarkdown.length * 0.75));
483
+ if ((missingCopy || likelyTruncatedCopy) && !finalIsEcho && finalText !== trimmedMarkdown) {
484
+ logger('Refreshed assistant response via final DOM snapshot');
485
+ answerText = finalText;
486
+ answerMarkdown = finalText;
487
+ }
355
488
  }
356
- // Detect prompt echo using normalized comparison (whitespace-insensitive)
357
- const normalizedAnswer = normalizeForComparison(answerMarkdown);
358
- const normalizedPrompt = normalizeForComparison(promptText);
359
- const promptPrefix = normalizedPrompt.length >= 80
360
- ? normalizedPrompt.slice(0, Math.min(200, normalizedPrompt.length))
361
- : '';
362
- const isPromptEcho = normalizedAnswer === normalizedPrompt || (promptPrefix.length > 0 && normalizedAnswer.startsWith(promptPrefix));
489
+ // Detect prompt echo using normalized comparison (whitespace-insensitive).
490
+ const alignedEcho = alignPromptEchoPair(answerText, answerMarkdown, promptEchoMatcher, copiedMarkdown ? logger : undefined, {
491
+ text: 'Aligned assistant response text to copied markdown after prompt echo',
492
+ markdown: 'Aligned assistant markdown to response text after prompt echo',
493
+ });
494
+ answerText = alignedEcho.answerText;
495
+ answerMarkdown = alignedEcho.answerMarkdown;
496
+ const isPromptEcho = alignedEcho.isEcho;
363
497
  if (isPromptEcho) {
364
498
  logger('Detected prompt echo in response; waiting for actual assistant response...');
365
- const deadline = Date.now() + 8_000;
499
+ const deadline = Date.now() + 15_000;
366
500
  let bestText = null;
367
501
  let stableCount = 0;
368
502
  while (Date.now() < deadline) {
369
- const snapshot = await readAssistantSnapshot(Runtime).catch(() => null);
503
+ const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined).catch(() => null);
370
504
  const text = typeof snapshot?.text === 'string' ? snapshot.text.trim() : '';
371
- const normalizedText = normalizeForComparison(text);
372
- const isStillEcho = !text ||
373
- normalizedText === normalizedPrompt ||
374
- (promptPrefix.length > 0 && normalizedText.startsWith(promptPrefix));
505
+ const isStillEcho = !text || Boolean(promptEchoMatcher?.isEcho(text));
375
506
  if (!isStillEcho) {
376
507
  if (!bestText || text.length > bestText.length) {
377
508
  bestText = text;
@@ -392,6 +523,36 @@ export async function runBrowserMode(options) {
392
523
  answerMarkdown = bestText;
393
524
  }
394
525
  }
526
+ const minAnswerChars = 16;
527
+ if (answerText.trim().length > 0 && answerText.trim().length < minAnswerChars) {
528
+ const deadline = Date.now() + 12_000;
529
+ let bestText = answerText.trim();
530
+ let stableCycles = 0;
531
+ while (Date.now() < deadline) {
532
+ const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined).catch(() => null);
533
+ const text = typeof snapshot?.text === 'string' ? snapshot.text.trim() : '';
534
+ if (text && text.length > bestText.length) {
535
+ bestText = text;
536
+ stableCycles = 0;
537
+ }
538
+ else {
539
+ stableCycles += 1;
540
+ }
541
+ if (stableCycles >= 3 && bestText.length >= minAnswerChars) {
542
+ break;
543
+ }
544
+ await delay(400);
545
+ }
546
+ if (bestText.length > answerText.trim().length) {
547
+ logger('Refreshed short assistant response from latest DOM snapshot');
548
+ answerText = bestText;
549
+ answerMarkdown = bestText;
550
+ }
551
+ }
552
+ if (connectionClosedUnexpectedly) {
553
+ // Bail out on mid-run disconnects so the session stays reattachable.
554
+ throw new Error('Chrome disconnected before completion');
555
+ }
395
556
  stopThinkingMonitor?.();
396
557
  runStatus = 'complete';
397
558
  const durationMs = Date.now() - startedAt;
@@ -653,8 +814,9 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
653
814
  catch {
654
815
  // ignore
655
816
  }
656
- if (config.desiredModel) {
657
- await withRetries(() => ensureModelSelection(Runtime, config.desiredModel, logger), {
817
+ const modelStrategy = config.modelStrategy ?? DEFAULT_MODEL_STRATEGY;
818
+ if (config.desiredModel && modelStrategy !== 'ignore') {
819
+ await withRetries(() => ensureModelSelection(Runtime, config.desiredModel, logger, modelStrategy), {
658
820
  retries: 2,
659
821
  delayMs: 300,
660
822
  onRetry: (attempt, error) => {
@@ -666,6 +828,9 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
666
828
  await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
667
829
  logger(`Prompt textarea ready (after model switch, ${promptText.length.toLocaleString()} chars queued)`);
668
830
  }
831
+ else if (modelStrategy === 'ignore') {
832
+ logger('Model picker: skipped (strategy=ignore)');
833
+ }
669
834
  // Handle thinking time selection if specified
670
835
  const thinkingTime = config.thinkingTime;
671
836
  if (thinkingTime) {
@@ -680,11 +845,14 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
680
845
  });
681
846
  }
682
847
  const submitOnce = async (prompt, submissionAttachments) => {
848
+ const baselineSnapshot = await readAssistantSnapshot(Runtime).catch(() => null);
849
+ const baselineAssistantText = typeof baselineSnapshot?.text === 'string' ? baselineSnapshot.text.trim() : '';
683
850
  const attachmentNames = submissionAttachments.map((a) => path.basename(a.path));
684
851
  if (submissionAttachments.length > 0) {
685
852
  if (!DOM) {
686
853
  throw new Error('Chrome DOM domain unavailable while uploading attachments.');
687
854
  }
855
+ await clearComposerAttachments(Runtime, 5_000, logger);
688
856
  // Use remote file transfer for remote Chrome (reads local files and injects via CDP)
689
857
  for (const attachment of submissionAttachments) {
690
858
  logger(`Uploading attachment: ${attachment.displayPath}`);
@@ -698,10 +866,27 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
698
866
  await waitForAttachmentCompletion(Runtime, waitBudget, attachmentNames, logger);
699
867
  logger('All attachments uploaded');
700
868
  }
701
- await submitPrompt({ runtime: Runtime, input: Input, attachmentNames }, prompt, logger);
869
+ let baselineTurns = await readConversationTurnCount(Runtime, logger);
870
+ const committedTurns = await submitPrompt({
871
+ runtime: Runtime,
872
+ input: Input,
873
+ attachmentNames,
874
+ baselineTurns: baselineTurns ?? undefined,
875
+ inputTimeoutMs: config.inputTimeoutMs ?? undefined,
876
+ }, prompt, logger);
877
+ if (typeof committedTurns === 'number' && Number.isFinite(committedTurns)) {
878
+ if (baselineTurns === null || committedTurns > baselineTurns) {
879
+ baselineTurns = Math.max(0, committedTurns - 1);
880
+ }
881
+ }
882
+ return { baselineTurns, baselineAssistantText };
702
883
  };
884
+ let baselineTurns = null;
885
+ let baselineAssistantText = null;
703
886
  try {
704
- await submitOnce(promptText, attachments);
887
+ const submission = await submitOnce(promptText, attachments);
888
+ baselineTurns = submission.baselineTurns;
889
+ baselineAssistantText = submission.baselineAssistantText;
705
890
  }
706
891
  catch (error) {
707
892
  const isPromptTooLarge = error instanceof BrowserAutomationError &&
@@ -710,14 +895,57 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
710
895
  logger('[browser] Inline prompt too large; retrying with file uploads.');
711
896
  await clearPromptComposer(Runtime, logger);
712
897
  await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
713
- await submitOnce(options.fallbackSubmission.prompt, options.fallbackSubmission.attachments);
898
+ const submission = await submitOnce(options.fallbackSubmission.prompt, options.fallbackSubmission.attachments);
899
+ baselineTurns = submission.baselineTurns;
900
+ baselineAssistantText = submission.baselineAssistantText;
714
901
  }
715
902
  else {
716
903
  throw error;
717
904
  }
718
905
  }
719
906
  stopThinkingMonitor = startThinkingStatusMonitor(Runtime, logger, options.verbose ?? false);
720
- const answer = await waitForAssistantResponseWithReload(Runtime, Page, config.timeoutMs, logger);
907
+ // Helper to normalize text for echo detection (collapse whitespace, lowercase)
908
+ const normalizeForComparison = (text) => text.toLowerCase().replace(/\s+/g, ' ').trim();
909
+ const waitForFreshAssistantResponse = async (baselineNormalized, timeoutMs) => {
910
+ const baselinePrefix = baselineNormalized.length >= 80
911
+ ? baselineNormalized.slice(0, Math.min(200, baselineNormalized.length))
912
+ : '';
913
+ const deadline = Date.now() + timeoutMs;
914
+ while (Date.now() < deadline) {
915
+ const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined).catch(() => null);
916
+ const text = typeof snapshot?.text === 'string' ? snapshot.text.trim() : '';
917
+ if (text) {
918
+ const normalized = normalizeForComparison(text);
919
+ const isBaseline = normalized === baselineNormalized || (baselinePrefix.length > 0 && normalized.startsWith(baselinePrefix));
920
+ if (!isBaseline) {
921
+ return {
922
+ text,
923
+ html: snapshot?.html ?? undefined,
924
+ meta: { turnId: snapshot?.turnId ?? undefined, messageId: snapshot?.messageId ?? undefined },
925
+ };
926
+ }
927
+ }
928
+ await delay(350);
929
+ }
930
+ return null;
931
+ };
932
+ let answer = await waitForAssistantResponseWithReload(Runtime, Page, config.timeoutMs, logger, baselineTurns ?? undefined);
933
+ const baselineNormalized = baselineAssistantText ? normalizeForComparison(baselineAssistantText) : '';
934
+ if (baselineNormalized) {
935
+ const normalizedAnswer = normalizeForComparison(answer.text ?? '');
936
+ const baselinePrefix = baselineNormalized.length >= 80
937
+ ? baselineNormalized.slice(0, Math.min(200, baselineNormalized.length))
938
+ : '';
939
+ const isBaseline = normalizedAnswer === baselineNormalized ||
940
+ (baselinePrefix.length > 0 && normalizedAnswer.startsWith(baselinePrefix));
941
+ if (isBaseline) {
942
+ logger('Detected stale assistant response; waiting for new response...');
943
+ const refreshed = await waitForFreshAssistantResponse(baselineNormalized, 15_000);
944
+ if (refreshed) {
945
+ answer = refreshed;
946
+ }
947
+ }
948
+ }
721
949
  answerText = answer.text;
722
950
  answerHtml = answer.html ?? '';
723
951
  const copiedMarkdown = await withRetries(async () => {
@@ -736,10 +964,8 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
736
964
  },
737
965
  }).catch(() => null);
738
966
  answerMarkdown = copiedMarkdown ?? answerText;
739
- // Helper to normalize text for echo detection (collapse whitespace, lowercase)
740
- const normalizeForComparison = (text) => text.toLowerCase().replace(/\s+/g, ' ').trim();
741
967
  // Final sanity check: ensure we didn't accidentally capture the user prompt instead of the assistant turn.
742
- const finalSnapshot = await readAssistantSnapshot(Runtime).catch(() => null);
968
+ const finalSnapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined).catch(() => null);
743
969
  const finalText = typeof finalSnapshot?.text === 'string' ? finalSnapshot.text.trim() : '';
744
970
  if (finalText &&
745
971
  finalText !== answerMarkdown.trim() &&
@@ -749,25 +975,24 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
749
975
  answerText = finalText;
750
976
  answerMarkdown = finalText;
751
977
  }
752
- // Detect prompt echo using normalized comparison (whitespace-insensitive)
753
- const normalizedAnswer = normalizeForComparison(answerMarkdown);
754
- const normalizedPrompt = normalizeForComparison(promptText);
755
- const promptPrefix = normalizedPrompt.length >= 80
756
- ? normalizedPrompt.slice(0, Math.min(200, normalizedPrompt.length))
757
- : '';
758
- const isPromptEcho = normalizedAnswer === normalizedPrompt || (promptPrefix.length > 0 && normalizedAnswer.startsWith(promptPrefix));
978
+ // Detect prompt echo using normalized comparison (whitespace-insensitive).
979
+ const promptEchoMatcher = buildPromptEchoMatcher(promptText);
980
+ const alignedEcho = alignPromptEchoPair(answerText, answerMarkdown, promptEchoMatcher, copiedMarkdown ? logger : undefined, {
981
+ text: 'Aligned assistant response text to copied markdown after prompt echo',
982
+ markdown: 'Aligned assistant markdown to response text after prompt echo',
983
+ });
984
+ answerText = alignedEcho.answerText;
985
+ answerMarkdown = alignedEcho.answerMarkdown;
986
+ const isPromptEcho = alignedEcho.isEcho;
759
987
  if (isPromptEcho) {
760
988
  logger('Detected prompt echo in response; waiting for actual assistant response...');
761
- const deadline = Date.now() + 8_000;
989
+ const deadline = Date.now() + 15_000;
762
990
  let bestText = null;
763
991
  let stableCount = 0;
764
992
  while (Date.now() < deadline) {
765
- const snapshot = await readAssistantSnapshot(Runtime).catch(() => null);
993
+ const snapshot = await readAssistantSnapshot(Runtime, baselineTurns ?? undefined).catch(() => null);
766
994
  const text = typeof snapshot?.text === 'string' ? snapshot.text.trim() : '';
767
- const normalizedText = normalizeForComparison(text);
768
- const isStillEcho = !text ||
769
- normalizedText === normalizedPrompt ||
770
- (promptPrefix.length > 0 && normalizedText.startsWith(promptPrefix));
995
+ const isStillEcho = !text || Boolean(promptEchoMatcher?.isEcho(text));
771
996
  if (!isStillEcho) {
772
997
  if (!bestText || text.length > bestText.length) {
773
998
  bestText = text;
@@ -867,9 +1092,9 @@ export function formatThinkingLog(startedAt, now, message, locatorSuffix) {
867
1092
  const statusLabel = message ? ` — ${message}` : '';
868
1093
  return `${pct}% [${elapsedText} / ~10m]${statusLabel}${locatorSuffix}`;
869
1094
  }
870
- async function waitForAssistantResponseWithReload(Runtime, Page, timeoutMs, logger) {
1095
+ async function waitForAssistantResponseWithReload(Runtime, Page, timeoutMs, logger, minTurnIndex) {
871
1096
  try {
872
- return await waitForAssistantResponse(Runtime, timeoutMs, logger);
1097
+ return await waitForAssistantResponse(Runtime, timeoutMs, logger, minTurnIndex);
873
1098
  }
874
1099
  catch (error) {
875
1100
  if (!shouldReloadAfterAssistantError(error)) {
@@ -882,14 +1107,17 @@ async function waitForAssistantResponseWithReload(Runtime, Page, timeoutMs, logg
882
1107
  logger('Assistant response stalled; reloading conversation and retrying once');
883
1108
  await Page.navigate({ url: conversationUrl });
884
1109
  await delay(1000);
885
- return await waitForAssistantResponse(Runtime, timeoutMs, logger);
1110
+ return await waitForAssistantResponse(Runtime, timeoutMs, logger, minTurnIndex);
886
1111
  }
887
1112
  }
888
1113
  function shouldReloadAfterAssistantError(error) {
889
1114
  if (!(error instanceof Error))
890
1115
  return false;
891
1116
  const message = error.message.toLowerCase();
892
- return message.includes('assistant-response') || message.includes('watchdog') || message.includes('timeout');
1117
+ return (message.includes('assistant-response') ||
1118
+ message.includes('watchdog') ||
1119
+ message.includes('timeout') ||
1120
+ message.includes('capture assistant response'));
893
1121
  }
894
1122
  async function readConversationUrl(Runtime) {
895
1123
  try {
@@ -900,6 +1128,34 @@ async function readConversationUrl(Runtime) {
900
1128
  return null;
901
1129
  }
902
1130
  }
1131
+ async function readConversationTurnCount(Runtime, logger) {
1132
+ const selectorLiteral = JSON.stringify(CONVERSATION_TURN_SELECTOR);
1133
+ const attempts = 4;
1134
+ for (let attempt = 0; attempt < attempts; attempt += 1) {
1135
+ try {
1136
+ const { result } = await Runtime.evaluate({
1137
+ expression: `document.querySelectorAll(${selectorLiteral}).length`,
1138
+ returnByValue: true,
1139
+ });
1140
+ const raw = typeof result?.value === 'number' ? result.value : Number(result?.value);
1141
+ if (!Number.isFinite(raw)) {
1142
+ throw new Error('Turn count not numeric');
1143
+ }
1144
+ return Math.max(0, Math.floor(raw));
1145
+ }
1146
+ catch (error) {
1147
+ if (attempt < attempts - 1) {
1148
+ await delay(150);
1149
+ continue;
1150
+ }
1151
+ if (logger?.verbose) {
1152
+ logger(`Failed to read conversation turn count: ${error instanceof Error ? error.message : String(error)}`);
1153
+ }
1154
+ return null;
1155
+ }
1156
+ }
1157
+ return null;
1158
+ }
903
1159
  function isConversationUrl(url) {
904
1160
  return /\/c\/[a-z0-9-]+/i.test(url);
905
1161
  }
@@ -0,0 +1,13 @@
1
+ export function normalizeBrowserModelStrategy(value) {
2
+ if (value == null) {
3
+ return undefined;
4
+ }
5
+ const normalized = value.trim().toLowerCase();
6
+ if (!normalized) {
7
+ return undefined;
8
+ }
9
+ if (normalized === 'select' || normalized === 'current' || normalized === 'ignore') {
10
+ return normalized;
11
+ }
12
+ throw new Error(`Invalid browser model strategy: "${value}". Expected "select", "current", or "ignore".`);
13
+ }
@@ -1,5 +1,5 @@
1
1
  export { navigateToChatGPT, ensureNotBlocked, ensureLoggedIn, ensurePromptReady } from './actions/navigation.js';
2
2
  export { ensureModelSelection } from './actions/modelSelection.js';
3
3
  export { submitPrompt, clearPromptComposer } from './actions/promptComposer.js';
4
- export { uploadAttachmentFile, waitForAttachmentCompletion, waitForUserTurnAttachments } from './actions/attachments.js';
5
- export { waitForAssistantResponse, readAssistantSnapshot, captureAssistantMarkdown, buildAssistantExtractorForTest, buildConversationDebugExpressionForTest, } from './actions/assistantResponse.js';
4
+ export { clearComposerAttachments, uploadAttachmentFile, waitForAttachmentCompletion, waitForUserTurnAttachments, } from './actions/attachments.js';
5
+ export { waitForAssistantResponse, readAssistantSnapshot, captureAssistantMarkdown, buildAssistantExtractorForTest, buildConversationDebugExpressionForTest, buildMarkdownFallbackExtractorForTest, buildCopyExpressionForTest, } from './actions/assistantResponse.js';