@steipete/oracle 0.6.1 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/README.md +16 -8
  2. package/dist/bin/oracle-cli.js +37 -17
  3. package/dist/src/browser/actions/assistantResponse.js +81 -49
  4. package/dist/src/browser/actions/attachments.js +37 -3
  5. package/dist/src/browser/actions/modelSelection.js +94 -5
  6. package/dist/src/browser/actions/promptComposer.js +22 -14
  7. package/dist/src/browser/constants.js +6 -2
  8. package/dist/src/browser/index.js +78 -5
  9. package/dist/src/browser/prompt.js +30 -6
  10. package/dist/src/browser/sessionRunner.js +0 -5
  11. package/dist/src/cli/browserConfig.js +34 -8
  12. package/dist/src/cli/help.js +3 -3
  13. package/dist/src/cli/options.js +20 -8
  14. package/dist/src/cli/runOptions.js +10 -8
  15. package/dist/src/cli/sessionRunner.js +0 -3
  16. package/dist/src/gemini-web/client.js +328 -0
  17. package/dist/src/gemini-web/executor.js +224 -0
  18. package/dist/src/gemini-web/index.js +1 -0
  19. package/dist/src/gemini-web/types.js +1 -0
  20. package/dist/src/mcp/tools/consult.js +4 -1
  21. package/dist/src/oracle/config.js +1 -1
  22. package/dist/src/oracle/run.js +15 -4
  23. package/package.json +17 -17
  24. package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/CodeResources +0 -0
  25. package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/Info.plist +0 -20
  26. package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/MacOS/OracleNotifier +0 -0
  27. package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/Resources/OracleIcon.icns +0 -0
  28. package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/_CodeSignature/CodeResources +0 -128
  29. package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.swift +0 -45
  30. package/dist/vendor/oracle-notifier/oracle-notifier/README.md +0 -24
  31. package/dist/vendor/oracle-notifier/oracle-notifier/build-notifier.sh +0 -93
package/README.md CHANGED
@@ -21,26 +21,29 @@ Use `npx -y @steipete/oracle …` (not `pnpx`)—pnpx's sandboxed cache can’t
21
21
 
22
22
  ```bash
23
23
  # Copy the bundle and paste into ChatGPT
24
- npx @steipete/oracle --render --copy -p "Review the TS data layer for schema drift" --file "src/**/*.ts,*/*.test.ts"
24
+ npx -y @steipete/oracle --render --copy -p "Review the TS data layer for schema drift" --file "src/**/*.ts,*/*.test.ts"
25
25
 
26
26
  # Minimal API run (expects OPENAI_API_KEY in your env)
27
- npx @steipete/oracle -p "Write a concise architecture note for the storage adapters" --file src/storage/README.md
27
+ npx -y @steipete/oracle -p "Write a concise architecture note for the storage adapters" --file src/storage/README.md
28
28
 
29
29
  # Multi-model API run
30
- npx @steipete/oracle -p "Cross-check the data layer assumptions" --models gpt-5.1-pro,gemini-3-pro --file "src/**/*.ts"
30
+ npx -y @steipete/oracle -p "Cross-check the data layer assumptions" --models gpt-5.1-pro,gemini-3-pro --file "src/**/*.ts"
31
31
 
32
32
  # Preview without spending tokens
33
- npx @steipete/oracle --dry-run summary -p "Check release notes" --file docs/release-notes.md
33
+ npx -y @steipete/oracle --dry-run summary -p "Check release notes" --file docs/release-notes.md
34
34
 
35
35
  # Browser run (no API key, will open ChatGPT)
36
- npx @steipete/oracle --engine browser -p "Walk through the UI smoke test" --file "src/**/*.ts"
36
+ npx -y @steipete/oracle --engine browser -p "Walk through the UI smoke test" --file "src/**/*.ts"
37
+
38
+ # Gemini browser mode (no API key; uses Chrome cookies from gemini.google.com)
39
+ npx -y @steipete/oracle --engine browser --model gemini-3-pro --prompt "a cute robot holding a banana" --generate-image out.jpg --aspect 1:1
37
40
 
38
41
  # Sessions (list and replay)
39
- npx @steipete/oracle status --hours 72
40
- npx @steipete/oracle session <id> --render
42
+ npx -y @steipete/oracle status --hours 72
43
+ npx -y @steipete/oracle session <id> --render
41
44
 
42
45
  # TUI (interactive, only for humans)
43
- npx @steipete/oracle tui
46
+ npx -y @steipete/oracle tui
44
47
  ```
45
48
 
46
49
  Engine auto-picks API when `OPENAI_API_KEY` is set, otherwise browser; browser is stable on macOS and works on Linux and Windows. On Linux pass `--browser-chrome-path/--browser-cookie-path` if detection fails; on Windows prefer `--browser-manual-login` or inline cookies if decryption is blocked.
@@ -49,6 +52,8 @@ Engine auto-picks API when `OPENAI_API_KEY` is set, otherwise browser; browser i
49
52
 
50
53
  **CLI**
51
54
  - API mode expects API keys in your environment: `OPENAI_API_KEY` (GPT-5.x), `GEMINI_API_KEY` (Gemini 3 Pro), `ANTHROPIC_API_KEY` (Claude Sonnet 4.5 / Opus 4.1).
55
+ - Gemini browser mode uses Chrome cookies instead of an API key—just be logged into `gemini.google.com` in Chrome (no Python/venv required).
56
+ - If your Gemini account can’t access “Pro”, Oracle auto-falls back to a supported model for web runs (and logs the fallback in verbose mode).
52
57
  - Prefer API mode or `--copy` + manual paste; browser automation is experimental.
53
58
  - Browser support: stable on macOS; works on Linux (add `--browser-chrome-path/--browser-cookie-path` when needed) and Windows (manual-login or inline cookies recommended when app-bound cookies block decryption).
54
59
  - Remote browser service: `oracle serve` on a signed-in host; clients use `--remote-host/--remote-token`.
@@ -109,6 +114,9 @@ npx -y @steipete/oracle oracle-mcp
109
114
  | `--dry-run [summary\|json\|full]` | Preview without sending. |
110
115
  | `--remote-host`, `--remote-token` | Use a remote `oracle serve` host (browser). |
111
116
  | `--remote-chrome <host:port>` | Attach to an existing remote Chrome session (browser). |
117
+ | `--youtube <url>` | YouTube video URL to analyze (Gemini browser mode). |
118
+ | `--generate-image <file>` | Generate image and save to file (Gemini browser mode). |
119
+ | `--edit-image <file>` | Edit existing image with `--output` (Gemini browser mode). |
112
120
  | `--azure-endpoint`, `--azure-deployment`, `--azure-api-version` | Target Azure OpenAI endpoints (picks Azure client automatically). |
113
121
 
114
122
  ## Configuration
@@ -18,6 +18,7 @@ import { DEFAULT_MODEL, MODEL_CONFIGS, readFiles, estimateRequestTokens, buildRe
18
18
  import { isKnownModel } from '../src/oracle/modelResolver.js';
19
19
  import { CHATGPT_URL } from '../src/browserMode.js';
20
20
  import { createRemoteBrowserExecutor } from '../src/remote/client.js';
21
+ import { createGeminiWebExecutor } from '../src/gemini-web/index.js';
21
22
  import { applyHelpStyling } from '../src/cli/help.js';
22
23
  import { collectPaths, collectModelList, parseFloatOption, parseIntOption, parseSearchOption, usesDefaultStatusFilters, resolvePreviewMode, normalizeModelOption, normalizeBaseUrl, resolveApiModel, inferModelFromLabel, parseHeartbeatOption, parseTimeoutOption, mergePathLikeOptions, } from '../src/cli/options.js';
23
24
  import { copyToClipboard } from '../src/cli/clipboard.js';
@@ -26,6 +27,7 @@ import { shouldDetachSession } from '../src/cli/detach.js';
26
27
  import { applyHiddenAliases } from '../src/cli/hiddenAliases.js';
27
28
  import { buildBrowserConfig, resolveBrowserModelLabel } from '../src/cli/browserConfig.js';
28
29
  import { performSessionRun } from '../src/cli/sessionRunner.js';
30
+ import { isMediaFile } from '../src/browser/prompt.js';
29
31
  import { attachSession, showStatus, formatCompletionSummary } from '../src/cli/sessionDisplay.js';
30
32
  import { formatCompactNumber } from '../src/cli/format.js';
31
33
  import { formatIntroLine } from '../src/cli/tagline.js';
@@ -85,7 +87,7 @@ program.hook('preAction', (thisCommand) => {
85
87
  });
86
88
  program
87
89
  .name('oracle')
88
- .description('One-shot GPT-5.1 Pro / GPT-5.1 / GPT-5.1 Codex tool for hard questions that benefit from large file context and server-side search.')
90
+ .description('One-shot GPT-5.2 Pro / GPT-5.2 / GPT-5.1 Codex tool for hard questions that benefit from large file context and server-side search.')
89
91
  .version(VERSION)
90
92
  .argument('[prompt]', 'Prompt text (shorthand for --prompt).')
91
93
  .option('-p, --prompt <text>', 'User prompt to send to the model.')
@@ -110,18 +112,18 @@ program
110
112
  .addOption(new Option('--copy-markdown', 'Copy the assembled markdown bundle to the clipboard; pair with --render to print it too.').default(false))
111
113
  .addOption(new Option('--copy').hideHelp().default(false))
112
114
  .option('-s, --slug <words>', 'Custom session slug (3-5 words).')
113
- .option('-m, --model <model>', 'Model to target (gpt-5.1-pro default; aliases to gpt-5.2-pro on API. Also gpt-5-pro, gpt-5.1, gpt-5.1-codex API-only, gpt-5.2, gpt-5.2-instant, gpt-5.2-pro, gemini-3-pro, claude-4.5-sonnet, claude-4.1-opus, or ChatGPT labels like "5.2 Thinking" for browser runs).', normalizeModelOption)
114
- .addOption(new Option('--models <models>', 'Comma-separated API model list to query in parallel (e.g., "gpt-5.1-pro,gemini-3-pro").')
115
+ .option('-m, --model <model>', 'Model to target (gpt-5.2-pro default; also supports gpt-5.1-pro alias). Also gpt-5-pro, gpt-5.1, gpt-5.1-codex API-only, gpt-5.2, gpt-5.2-instant, gpt-5.2-pro, gemini-3-pro, claude-4.5-sonnet, claude-4.1-opus, or ChatGPT labels like "5.2 Thinking" for browser runs).', normalizeModelOption)
116
+ .addOption(new Option('--models <models>', 'Comma-separated API model list to query in parallel (e.g., "gpt-5.2-pro,gemini-3-pro").')
115
117
  .argParser(collectModelList)
116
118
  .default([]))
117
- .addOption(new Option('-e, --engine <mode>', 'Execution engine (api | browser). Engine is preferred; --mode is a legacy alias. If omitted, oracle picks api when OPENAI_API_KEY is set, otherwise browser.').choices(['api', 'browser']))
119
+ .addOption(new Option('-e, --engine <mode>', 'Execution engine (api | browser). Browser engine: GPT models automate ChatGPT; Gemini models use a cookie-based client for gemini.google.com. If omitted, oracle picks api when OPENAI_API_KEY is set, otherwise browser.').choices(['api', 'browser']))
118
120
  .addOption(new Option('--mode <mode>', 'Alias for --engine (api | browser).').choices(['api', 'browser']).hideHelp())
119
121
  .option('--files-report', 'Show token usage per attached file (also prints automatically when files exceed the token budget).', false)
120
122
  .option('-v, --verbose', 'Enable verbose logging for all operations.', false)
121
123
  .addOption(new Option('--[no-]notify', 'Desktop notification when a session finishes (default on unless CI/SSH).')
122
124
  .default(undefined))
123
125
  .addOption(new Option('--[no-]notify-sound', 'Play a notification sound on completion (default off).').default(undefined))
124
- .addOption(new Option('--timeout <seconds|auto>', 'Overall timeout before aborting the API call (auto = 60m for gpt-5.1-pro, 120s otherwise).')
126
+ .addOption(new Option('--timeout <seconds|auto>', 'Overall timeout before aborting the API call (auto = 60m for gpt-5.2-pro, 120s otherwise).')
125
127
  .argParser(parseTimeoutOption)
126
128
  .default('auto'))
127
129
  .addOption(new Option('--preview [mode]', '(alias) Preview the request without calling the model (summary | json | full). Deprecated: use --dry-run instead.')
@@ -182,6 +184,12 @@ program
182
184
  .addOption(new Option('--remote-token <token>', 'Access token for the remote `oracle serve` instance.'))
183
185
  .addOption(new Option('--browser-inline-files', 'Alias for --browser-attachments never (force pasting file contents inline).').default(false))
184
186
  .addOption(new Option('--browser-bundle-files', 'Bundle all attachments into a single archive before uploading.').default(false))
187
+ .addOption(new Option('--youtube <url>', 'YouTube video URL to analyze (Gemini web/cookie mode only; uses your signed-in Chrome cookies for gemini.google.com).'))
188
+ .addOption(new Option('--generate-image <file>', 'Generate image and save to file (Gemini web/cookie mode only; requires gemini.google.com Chrome cookies).'))
189
+ .addOption(new Option('--edit-image <file>', 'Edit existing image (use with --output, Gemini web/cookie mode only).'))
190
+ .addOption(new Option('--output <file>', 'Output file path for image operations (Gemini web/cookie mode only).'))
191
+ .addOption(new Option('--aspect <ratio>', 'Aspect ratio for image generation: 16:9, 1:1, 4:3, 3:4 (Gemini web/cookie mode only).'))
192
+ .addOption(new Option('--gemini-show-thoughts', 'Display Gemini thinking process (Gemini web/cookie mode only).').default(false))
185
193
  .option('--retain-hours <hours>', 'Prune stored sessions older than this many hours before running (set 0 to disable).', parseFloatOption)
186
194
  .option('--force', 'Force start a new session even if an identical prompt is already running.', false)
187
195
  .option('--debug-help', 'Show the advanced/debug option set and exit.', false)
@@ -512,18 +520,13 @@ async function runRootCommand(options) {
512
520
  const isCodex = primaryModelCandidate.startsWith('gpt-5.1-codex');
513
521
  const isClaude = primaryModelCandidate.startsWith('claude');
514
522
  const userForcedBrowser = options.browser || options.engine === 'browser';
515
- const hasNonGptBrowserTarget = (engine === 'browser' || userForcedBrowser) &&
523
+ const isBrowserCompatible = (model) => model.startsWith('gpt-') || model.startsWith('gemini');
524
+ const hasNonBrowserCompatibleTarget = (engine === 'browser' || userForcedBrowser) &&
516
525
  (normalizedMultiModels.length > 0
517
- ? normalizedMultiModels.some((model) => !model.startsWith('gpt-'))
518
- : !resolvedModelCandidate.startsWith('gpt-'));
519
- if (hasNonGptBrowserTarget) {
520
- throw new Error('Browser engine only supports GPT-series ChatGPT models. Re-run with --engine api for Grok, Claude, Gemini, or other non-GPT models.');
521
- }
522
- if (isGemini && userForcedBrowser) {
523
- throw new Error('Gemini is only supported via API. Use --engine api.');
524
- }
525
- if (isGemini && engine === 'browser') {
526
- engine = 'api';
526
+ ? normalizedMultiModels.some((model) => !isBrowserCompatible(model))
527
+ : !isBrowserCompatible(resolvedModelCandidate));
528
+ if (hasNonBrowserCompatibleTarget) {
529
+ throw new Error('Browser engine only supports GPT and Gemini models. Re-run with --engine api for Grok, Claude, or other models.');
527
530
  }
528
531
  if (isClaude && engine === 'browser') {
529
532
  console.log(chalk.dim('Browser engine is not supported for Claude models; switching to API.'));
@@ -672,7 +675,11 @@ async function runRootCommand(options) {
672
675
  return;
673
676
  }
674
677
  if (options.file && options.file.length > 0) {
675
- await readFiles(options.file, { cwd: process.cwd() });
678
+ const isBrowserMode = engine === 'browser' || userForcedBrowser;
679
+ const filesToValidate = isBrowserMode ? options.file.filter((f) => !isMediaFile(f)) : options.file;
680
+ if (filesToValidate.length > 0) {
681
+ await readFiles(filesToValidate, { cwd: process.cwd() });
682
+ }
676
683
  }
677
684
  const getSource = (key) => program.getOptionValueSource?.(key) ?? undefined;
678
685
  applyBrowserDefaultsFromConfig(options, userConfig, getSource);
@@ -698,6 +705,19 @@ async function runRootCommand(options) {
698
705
  };
699
706
  console.log(chalk.dim(`Routing browser automation to remote host ${remoteHost}`));
700
707
  }
708
+ else if (browserConfig && resolvedModel.startsWith('gemini')) {
709
+ browserDeps = {
710
+ executeBrowser: createGeminiWebExecutor({
711
+ youtube: options.youtube,
712
+ generateImage: options.generateImage,
713
+ editImage: options.editImage,
714
+ outputPath: options.output,
715
+ aspectRatio: options.aspect,
716
+ showThoughts: options.geminiShowThoughts,
717
+ }),
718
+ };
719
+ console.log(chalk.dim('Using Gemini web client for browser automation'));
720
+ }
701
721
  const remoteExecutionActive = Boolean(browserDeps);
702
722
  if (options.dryRun) {
703
723
  const baseRunOptions = buildRunOptions(resolvedOptions, {
@@ -218,6 +218,8 @@ async function isCompletionVisible(Runtime) {
218
218
  const ASSISTANT_SELECTOR = '${ASSISTANT_ROLE_SELECTOR}';
219
219
  const isAssistantTurn = (node) => {
220
220
  if (!(node instanceof HTMLElement)) return false;
221
+ const turnAttr = (node.getAttribute('data-turn') || node.dataset?.turn || '').toLowerCase();
222
+ if (turnAttr === 'assistant') return true;
221
223
  const role = (node.getAttribute('data-message-author-role') || node.dataset?.messageAuthorRole || '').toLowerCase();
222
224
  if (role === 'assistant') return true;
223
225
  const testId = (node.getAttribute('data-testid') || '').toLowerCase();
@@ -257,6 +259,12 @@ function normalizeAssistantSnapshot(snapshot) {
257
259
  if (!text.trim()) {
258
260
  return null;
259
261
  }
262
+ const normalized = text.toLowerCase();
263
+ // "Pro thinking" often renders a placeholder turn containing an "Answer now" gate.
264
+ // Treat it as incomplete so browser mode keeps waiting (and can click the gate).
265
+ if (normalized.includes('answer now') && (normalized.includes('pro thinking') || normalized.includes('chatgpt said'))) {
266
+ return null;
267
+ }
260
268
  return {
261
269
  text,
262
270
  html: snapshot?.html ?? undefined,
@@ -295,10 +303,13 @@ function buildResponseObserverExpression(timeoutMs) {
295
303
  const CONVERSATION_SELECTOR = ${conversationLiteral};
296
304
  const ASSISTANT_SELECTOR = ${assistantLiteral};
297
305
  const settleDelayMs = 800;
306
+ const ANSWER_NOW_LABEL = 'answer now';
298
307
 
299
308
  // Helper to detect assistant turns - matches buildAssistantExtractor logic
300
309
  const isAssistantTurn = (node) => {
301
310
  if (!(node instanceof HTMLElement)) return false;
311
+ const turnAttr = (node.getAttribute('data-turn') || node.dataset?.turn || '').toLowerCase();
312
+ if (turnAttr === 'assistant') return true;
302
313
  const role = (node.getAttribute('data-message-author-role') || node.dataset?.messageAuthorRole || '').toLowerCase();
303
314
  if (role === 'assistant') return true;
304
315
  const testId = (node.getAttribute('data-testid') || '').toLowerCase();
@@ -330,6 +341,11 @@ function buildResponseObserverExpression(timeoutMs) {
330
341
  });
331
342
  observer.observe(document.body, { childList: true, subtree: true, characterData: true });
332
343
  stopInterval = setInterval(() => {
344
+ // Pro thinking can gate the response behind an "Answer now" button. Keep clicking it while present.
345
+ const answerNow = Array.from(document.querySelectorAll('button,span')).find((el) => (el?.textContent || '').trim().toLowerCase() === ANSWER_NOW_LABEL);
346
+ if (answerNow) {
347
+ dispatchClickSequence(answerNow.closest('button') ?? answerNow);
348
+ }
333
349
  const stop = document.querySelector(STOP_SELECTOR);
334
350
  if (!stop) {
335
351
  return;
@@ -382,9 +398,10 @@ function buildResponseObserverExpression(timeoutMs) {
382
398
  lastLength = refreshed.text?.length ?? lastLength;
383
399
  }
384
400
  const stopVisible = Boolean(document.querySelector(STOP_SELECTOR));
401
+ const answerNowVisible = Boolean(Array.from(document.querySelectorAll('button,span')).find((el) => (el?.textContent || '').trim().toLowerCase() === ANSWER_NOW_LABEL));
385
402
  const finishedVisible = isLastAssistantTurnFinished();
386
403
 
387
- if (!stopVisible || finishedVisible) {
404
+ if ((!stopVisible && !answerNowVisible) || finishedVisible) {
388
405
  break;
389
406
  }
390
407
  }
@@ -407,6 +424,10 @@ function buildAssistantExtractor(functionName) {
407
424
  const ASSISTANT_SELECTOR = ${assistantLiteral};
408
425
  const isAssistantTurn = (node) => {
409
426
  if (!(node instanceof HTMLElement)) return false;
427
+ const turnAttr = (node.getAttribute('data-turn') || node.dataset?.turn || '').toLowerCase();
428
+ if (turnAttr === 'assistant') {
429
+ return true;
430
+ }
410
431
  const role = (node.getAttribute('data-message-author-role') || node.dataset?.messageAuthorRole || '').toLowerCase();
411
432
  if (role === 'assistant') {
412
433
  return true;
@@ -443,11 +464,13 @@ function buildAssistantExtractor(functionName) {
443
464
  }
444
465
  const messageRoot = turn.querySelector(ASSISTANT_SELECTOR) ?? turn;
445
466
  expandCollapsibles(messageRoot);
446
- const preferred =
447
- messageRoot.querySelector('.markdown') ||
448
- messageRoot.querySelector('[data-message-content]') ||
449
- messageRoot;
450
- const text = preferred?.innerText ?? '';
467
+ const preferred = messageRoot.querySelector('.markdown') || messageRoot.querySelector('[data-message-content]');
468
+ if (!preferred) {
469
+ continue;
470
+ }
471
+ const innerText = preferred?.innerText ?? '';
472
+ const textContent = preferred?.textContent ?? '';
473
+ const text = innerText.trim().length > 0 ? innerText : textContent;
451
474
  const html = preferred?.innerHTML ?? '';
452
475
  const messageId = messageRoot.getAttribute('data-message-id');
453
476
  const turnId = messageRoot.getAttribute('data-testid');
@@ -462,7 +485,7 @@ function buildCopyExpression(meta) {
462
485
  return `(() => {
463
486
  ${buildClickDispatcher()}
464
487
  const BUTTON_SELECTOR = '${COPY_BUTTON_SELECTOR}';
465
- const TIMEOUT_MS = 5000;
488
+ const TIMEOUT_MS = 10000;
466
489
 
467
490
  const locateButton = () => {
468
491
  const hint = ${JSON.stringify(meta ?? {})};
@@ -526,53 +549,62 @@ function buildCopyExpression(meta) {
526
549
  };
527
550
 
528
551
  return new Promise((resolve) => {
529
- const button = locateButton();
530
- if (!button) {
531
- resolve({ success: false, status: 'missing-button' });
532
- return;
533
- }
534
- const interception = interceptClipboard();
535
- let settled = false;
536
- let pollId = null;
537
- let timeoutId = null;
538
- const finish = (payload) => {
539
- if (settled) {
552
+ const deadline = Date.now() + TIMEOUT_MS;
553
+ const waitForButton = () => {
554
+ const button = locateButton();
555
+ if (button) {
556
+ const interception = interceptClipboard();
557
+ let settled = false;
558
+ let pollId = null;
559
+ let timeoutId = null;
560
+ const finish = (payload) => {
561
+ if (settled) {
562
+ return;
563
+ }
564
+ settled = true;
565
+ if (pollId) {
566
+ clearInterval(pollId);
567
+ }
568
+ if (timeoutId) {
569
+ clearTimeout(timeoutId);
570
+ }
571
+ button.removeEventListener('copy', handleCopy, true);
572
+ interception.restore?.();
573
+ resolve(payload);
574
+ };
575
+
576
+ const readIntercepted = () => {
577
+ const markdown = interception.state.text ?? '';
578
+ return { success: Boolean(markdown.trim()), markdown };
579
+ };
580
+
581
+ const handleCopy = () => {
582
+ finish(readIntercepted());
583
+ };
584
+
585
+ button.addEventListener('copy', handleCopy, true);
586
+ button.scrollIntoView({ block: 'center', behavior: 'instant' });
587
+ dispatchClickSequence(button);
588
+ pollId = setInterval(() => {
589
+ const payload = readIntercepted();
590
+ if (payload.success) {
591
+ finish(payload);
592
+ }
593
+ }, 100);
594
+ timeoutId = setTimeout(() => {
595
+ button.removeEventListener('copy', handleCopy, true);
596
+ finish({ success: false, status: 'timeout' });
597
+ }, TIMEOUT_MS);
540
598
  return;
541
599
  }
542
- settled = true;
543
- if (pollId) {
544
- clearInterval(pollId);
545
- }
546
- if (timeoutId) {
547
- clearTimeout(timeoutId);
600
+ if (Date.now() > deadline) {
601
+ resolve({ success: false, status: 'missing-button' });
602
+ return;
548
603
  }
549
- button.removeEventListener('copy', handleCopy, true);
550
- interception.restore?.();
551
- resolve(payload);
552
- };
553
-
554
- const readIntercepted = () => {
555
- const markdown = interception.state.text ?? '';
556
- return { success: Boolean(markdown.trim()), markdown };
604
+ setTimeout(waitForButton, 120);
557
605
  };
558
606
 
559
- const handleCopy = () => {
560
- finish(readIntercepted());
561
- };
562
-
563
- button.addEventListener('copy', handleCopy, true);
564
- button.scrollIntoView({ block: 'center', behavior: 'instant' });
565
- dispatchClickSequence(button);
566
- pollId = setInterval(() => {
567
- const payload = readIntercepted();
568
- if (payload.success) {
569
- finish(payload);
570
- }
571
- }, 100);
572
- timeoutId = setTimeout(() => {
573
- button.removeEventListener('copy', handleCopy, true);
574
- finish({ success: false, status: 'timeout' });
575
- }, TIMEOUT_MS);
607
+ waitForButton();
576
608
  });
577
609
  })()`;
578
610
  }
@@ -214,8 +214,33 @@ export async function waitForAttachmentCompletion(Runtime, timeoutMs, expectedNa
214
214
  const { result } = await Runtime.evaluate({ expression, returnByValue: true });
215
215
  const value = result?.value;
216
216
  if (value && !value.uploading) {
217
- const attached = new Set((value.attachedNames ?? []).map((name) => name.toLowerCase()));
218
- const missing = expectedNormalized.filter((name) => !attached.has(name));
217
+ const attachedNames = (value.attachedNames ?? [])
218
+ .map((name) => name.toLowerCase().replace(/\s+/g, ' ').trim())
219
+ .filter(Boolean);
220
+ const matchesExpected = (expected) => {
221
+ const baseName = expected.split('/').pop()?.split('\\').pop() ?? expected;
222
+ const normalizedExpected = baseName.toLowerCase().replace(/\s+/g, ' ').trim();
223
+ const expectedNoExt = normalizedExpected.replace(/\.[a-z0-9]{1,10}$/i, '');
224
+ return attachedNames.some((raw) => {
225
+ if (raw.includes(normalizedExpected))
226
+ return true;
227
+ if (expectedNoExt.length >= 6 && raw.includes(expectedNoExt))
228
+ return true;
229
+ if (raw.includes('…') || raw.includes('...')) {
230
+ const escaped = raw.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
231
+ const pattern = escaped.replace(/\\…|\\\.\\\.\\\./g, '.*');
232
+ try {
233
+ const re = new RegExp(pattern);
234
+ return re.test(normalizedExpected) || (expectedNoExt.length >= 6 && re.test(expectedNoExt));
235
+ }
236
+ catch {
237
+ return false;
238
+ }
239
+ }
240
+ return false;
241
+ });
242
+ };
243
+ const missing = expectedNormalized.filter((expected) => !matchesExpected(expected));
219
244
  if (missing.length === 0) {
220
245
  if (value.state === 'ready') {
221
246
  return;
@@ -223,6 +248,11 @@ export async function waitForAttachmentCompletion(Runtime, timeoutMs, expectedNa
223
248
  if (value.state === 'missing' && value.filesAttached) {
224
249
  return;
225
250
  }
251
+ // If files are attached but button isn't ready yet, give it more time but don't fail immediately
252
+ if (value.filesAttached) {
253
+ await delay(500);
254
+ continue;
255
+ }
226
256
  }
227
257
  }
228
258
  await delay(250);
@@ -249,7 +279,11 @@ export async function waitForAttachmentVisible(Runtime, expectedName, timeoutMs,
249
279
  };
250
280
 
251
281
  const turns = Array.from(document.querySelectorAll('article[data-testid^="conversation-turn"]'));
252
- const userTurns = turns.filter((node) => node.querySelector('[data-message-author-role="user"]'));
282
+ const userTurns = turns.filter((node) => {
283
+ const turnAttr = (node.getAttribute('data-turn') || node.dataset?.turn || '').toLowerCase();
284
+ if (turnAttr === 'user') return true;
285
+ return Boolean(node.querySelector('[data-message-author-role="user"]'));
286
+ });
253
287
  const lastUser = userTurns[userTurns.length - 1];
254
288
  if (lastUser) {
255
289
  const turnMatch = Array.from(lastUser.querySelectorAll('*')).some(matchNode);
@@ -63,12 +63,41 @@ function buildModelSelectionExpression(targetModel) {
63
63
  .map((token) => normalizeText(token))
64
64
  .filter(Boolean);
65
65
  const targetWords = normalizedTarget.split(' ').filter(Boolean);
66
+ const desiredVersion = normalizedTarget.includes('5 2')
67
+ ? '5-2'
68
+ : normalizedTarget.includes('5 1')
69
+ ? '5-1'
70
+ : normalizedTarget.includes('5 0')
71
+ ? '5-0'
72
+ : null;
73
+ const wantsPro = normalizedTarget.includes(' pro') || normalizedTarget.endsWith(' pro') || normalizedTokens.includes('pro');
74
+ const wantsInstant = normalizedTarget.includes('instant');
75
+ const wantsThinking = normalizedTarget.includes('thinking');
66
76
 
67
77
  const button = document.querySelector(BUTTON_SELECTOR);
68
78
  if (!button) {
69
79
  return { status: 'button-missing' };
70
80
  }
71
81
 
82
+ const getButtonLabel = () => (button.textContent ?? '').trim();
83
+ const buttonMatchesTarget = () => {
84
+ const normalizedLabel = normalizeText(getButtonLabel());
85
+ if (!normalizedLabel) return false;
86
+ if (desiredVersion) {
87
+ if (desiredVersion === '5-2' && !normalizedLabel.includes('5 2')) return false;
88
+ if (desiredVersion === '5-1' && !normalizedLabel.includes('5 1')) return false;
89
+ if (desiredVersion === '5-0' && !normalizedLabel.includes('5 0')) return false;
90
+ }
91
+ if (wantsPro && !normalizedLabel.includes(' pro')) return false;
92
+ if (wantsInstant && !normalizedLabel.includes('instant')) return false;
93
+ if (wantsThinking && !normalizedLabel.includes('thinking')) return false;
94
+ return true;
95
+ };
96
+
97
+ if (buttonMatchesTarget()) {
98
+ return { status: 'already-selected', label: getButtonLabel() };
99
+ }
100
+
72
101
  let lastPointerClick = 0;
73
102
  const pointerClick = () => {
74
103
  if (dispatchClickSequence(button)) {
@@ -106,8 +135,46 @@ function buildModelSelectionExpression(targetModel) {
106
135
  }
107
136
  let score = 0;
108
137
  const normalizedTestId = (testid ?? '').toLowerCase();
109
- if (normalizedTestId && TEST_IDS.some((id) => normalizedTestId.includes(id))) {
110
- score += 1000;
138
+ if (normalizedTestId) {
139
+ if (desiredVersion) {
140
+ // data-testid strings have been observed with both dotted and dashed versions (e.g. gpt-5.2-pro vs gpt-5-2-pro).
141
+ const has52 =
142
+ normalizedTestId.includes('5-2') ||
143
+ normalizedTestId.includes('5.2') ||
144
+ normalizedTestId.includes('gpt-5-2') ||
145
+ normalizedTestId.includes('gpt-5.2') ||
146
+ normalizedTestId.includes('gpt52');
147
+ const has51 =
148
+ normalizedTestId.includes('5-1') ||
149
+ normalizedTestId.includes('5.1') ||
150
+ normalizedTestId.includes('gpt-5-1') ||
151
+ normalizedTestId.includes('gpt-5.1') ||
152
+ normalizedTestId.includes('gpt51');
153
+ const has50 =
154
+ normalizedTestId.includes('5-0') ||
155
+ normalizedTestId.includes('5.0') ||
156
+ normalizedTestId.includes('gpt-5-0') ||
157
+ normalizedTestId.includes('gpt-5.0') ||
158
+ normalizedTestId.includes('gpt50');
159
+ const candidateVersion = has52 ? '5-2' : has51 ? '5-1' : has50 ? '5-0' : null;
160
+ // If a candidate advertises a different version, ignore it entirely.
161
+ if (candidateVersion && candidateVersion !== desiredVersion) {
162
+ return 0;
163
+ }
164
+ // When targeting an explicit version, avoid selecting submenu wrappers that can contain legacy models.
165
+ if (normalizedTestId.includes('submenu') && candidateVersion === null) {
166
+ return 0;
167
+ }
168
+ }
169
+ const matches = TEST_IDS.filter((id) => id && normalizedTestId.includes(id));
170
+ if (matches.length > 0) {
171
+ // Prefer the most specific match (longest token) instead of treating any hit as equal.
172
+ // This prevents generic tokens (e.g. "pro") from outweighing version-specific targets.
173
+ const best = matches.reduce((acc, token) => (token.length > acc.length ? token : acc), '');
174
+ score += 200 + Math.min(900, best.length * 25);
175
+ if (best.startsWith('model-switcher-')) score += 120;
176
+ if (best.includes('gpt-')) score += 60;
177
+ }
111
178
  }
112
179
  if (normalizedText && normalizedTarget) {
113
180
  if (normalizedText === normalizedTarget) {
@@ -134,6 +201,14 @@ function buildModelSelectionExpression(targetModel) {
134
201
  }
135
202
  score -= missing * 12;
136
203
  }
204
+ // If the caller didn't explicitly ask for Pro, prefer non-Pro options when both exist.
205
+ if (wantsPro) {
206
+ if (!normalizedText.includes(' pro')) {
207
+ score -= 80;
208
+ }
209
+ } else if (normalizedText.includes(' pro')) {
210
+ score -= 40;
211
+ }
137
212
  return Math.max(score, 0);
138
213
  };
139
214
 
@@ -153,7 +228,7 @@ function buildModelSelectionExpression(targetModel) {
153
228
  }
154
229
  const label = getOptionLabel(option);
155
230
  if (!bestMatch || score > bestMatch.score) {
156
- bestMatch = { node: option, label, score };
231
+ bestMatch = { node: option, label, score, testid, normalizedText };
157
232
  }
158
233
  }
159
234
  }
@@ -182,11 +257,25 @@ function buildModelSelectionExpression(targetModel) {
182
257
  const match = findBestOption();
183
258
  if (match) {
184
259
  if (optionIsSelected(match.node)) {
185
- resolve({ status: 'already-selected', label: match.label });
260
+ resolve({ status: 'already-selected', label: getButtonLabel() || match.label });
186
261
  return;
187
262
  }
188
263
  dispatchClickSequence(match.node);
189
- resolve({ status: 'switched', label: match.label });
264
+ // Submenus (e.g. "Legacy models") need a second pass to pick the actual model option.
265
+ // Keep scanning once the submenu opens instead of treating the submenu click as a final switch.
266
+ const isSubmenu = (match.testid ?? '').toLowerCase().includes('submenu');
267
+ if (isSubmenu) {
268
+ setTimeout(attempt, REOPEN_INTERVAL_MS / 2);
269
+ return;
270
+ }
271
+ // Wait for the top bar label to reflect the requested model; otherwise keep scanning.
272
+ setTimeout(() => {
273
+ if (buttonMatchesTarget()) {
274
+ resolve({ status: 'switched', label: getButtonLabel() || match.label });
275
+ return;
276
+ }
277
+ attempt();
278
+ }, Math.max(120, INITIAL_WAIT_MS));
190
279
  return;
191
280
  }
192
281
  if (performance.now() - start > MAX_WAIT_MS) {
@@ -283,20 +283,28 @@ async function verifyPromptCommitted(Runtime, prompt, timeoutMs, logger) {
283
283
  const primarySelectorLiteral = JSON.stringify(PROMPT_PRIMARY_SELECTOR);
284
284
  const fallbackSelectorLiteral = JSON.stringify(PROMPT_FALLBACK_SELECTOR);
285
285
  const script = `(() => {
286
- const editor = document.querySelector(${primarySelectorLiteral});
287
- const fallback = document.querySelector(${fallbackSelectorLiteral});
288
- const normalize = (value) => value?.toLowerCase?.().replace(/\\s+/g, ' ').trim() ?? '';
289
- const normalizedPrompt = normalize(${encodedPrompt});
290
- const normalizedPromptPrefix = normalizedPrompt.slice(0, 120);
291
- const CONVERSATION_SELECTOR = ${JSON.stringify(CONVERSATION_TURN_SELECTOR)};
292
- const articles = Array.from(document.querySelectorAll(CONVERSATION_SELECTOR));
293
- const normalizedTurns = articles.map((node) => normalize(node?.innerText));
294
- const userMatched = normalizedTurns.some((text) => text.includes(normalizedPrompt));
295
- const prefixMatched =
296
- normalizedPromptPrefix.length > 30 &&
297
- normalizedTurns.some((text) => text.includes(normalizedPromptPrefix));
298
- const lastTurn = normalizedTurns[normalizedTurns.length - 1] ?? '';
299
- return {
286
+ const editor = document.querySelector(${primarySelectorLiteral});
287
+ const fallback = document.querySelector(${fallbackSelectorLiteral});
288
+ const normalize = (value) => {
289
+ let text = value?.toLowerCase?.() ?? '';
290
+ // Strip markdown *markers* but keep content (ChatGPT renders fence markers differently).
291
+ text = text.replace(/\`\`\`[^\\n]*\\n([\\s\\S]*?)\`\`\`/g, ' $1 ');
292
+ text = text.replace(/\`\`\`/g, ' ');
293
+ text = text.replace(/\`([^\`]*)\`/g, '$1');
294
+ return text.replace(/\\s+/g, ' ').trim();
295
+ };
296
+ const normalizedPrompt = normalize(${encodedPrompt});
297
+ const normalizedPromptPrefix = normalizedPrompt.slice(0, 120);
298
+ const CONVERSATION_SELECTOR = ${JSON.stringify(CONVERSATION_TURN_SELECTOR)};
299
+ const articles = Array.from(document.querySelectorAll(CONVERSATION_SELECTOR));
300
+ const normalizedTurns = articles.map((node) => normalize(node?.innerText));
301
+ const userMatched =
302
+ normalizedPrompt.length > 0 && normalizedTurns.some((text) => text.includes(normalizedPrompt));
303
+ const prefixMatched =
304
+ normalizedPromptPrefix.length > 30 &&
305
+ normalizedTurns.some((text) => text.includes(normalizedPromptPrefix));
306
+ const lastTurn = normalizedTurns[normalizedTurns.length - 1] ?? '';
307
+ return {
300
308
  userMatched,
301
309
  prefixMatched,
302
310
  fallbackValue: fallback?.value ?? '',