npm - @steipete/oracle - Versions diffs - 0.6.1 → 0.7.1 - Mend

@steipete/oracle 0.6.1 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/README.md +16 -8
package/dist/bin/oracle-cli.js +37 -17
package/dist/src/browser/actions/assistantResponse.js +81 -49
package/dist/src/browser/actions/attachments.js +37 -3
package/dist/src/browser/actions/modelSelection.js +94 -5
package/dist/src/browser/actions/promptComposer.js +22 -14
package/dist/src/browser/constants.js +6 -2
package/dist/src/browser/index.js +78 -5
package/dist/src/browser/prompt.js +30 -6
package/dist/src/browser/sessionRunner.js +0 -5
package/dist/src/cli/browserConfig.js +34 -8
package/dist/src/cli/help.js +3 -3
package/dist/src/cli/options.js +20 -8
package/dist/src/cli/runOptions.js +10 -8
package/dist/src/cli/sessionRunner.js +0 -3
package/dist/src/gemini-web/client.js +328 -0
package/dist/src/gemini-web/executor.js +224 -0
package/dist/src/gemini-web/index.js +1 -0
package/dist/src/gemini-web/types.js +1 -0
package/dist/src/mcp/tools/consult.js +4 -1
package/dist/src/oracle/config.js +1 -1
package/dist/src/oracle/run.js +15 -4
package/package.json +17 -17
package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/CodeResources +0 -0
package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/Info.plist +0 -20
package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/MacOS/OracleNotifier +0 -0
package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/Resources/OracleIcon.icns +0 -0
package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/_CodeSignature/CodeResources +0 -128
package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.swift +0 -45
package/dist/vendor/oracle-notifier/oracle-notifier/README.md +0 -24
package/dist/vendor/oracle-notifier/oracle-notifier/build-notifier.sh +0 -93

package/README.md CHANGED Viewed

@@ -21,26 +21,29 @@ Use `npx -y @steipete/oracle …` (not `pnpx`)—pnpx's sandboxed cache can’t
 ```bash
 # Copy the bundle and paste into ChatGPT
-npx @steipete/oracle --render --copy -p "Review the TS data layer for schema drift" --file "src/**/*.ts,*/*.test.ts"
+npx -y @steipete/oracle --render --copy -p "Review the TS data layer for schema drift" --file "src/**/*.ts,*/*.test.ts"
 # Minimal API run (expects OPENAI_API_KEY in your env)
-npx @steipete/oracle -p "Write a concise architecture note for the storage adapters" --file src/storage/README.md
+npx -y @steipete/oracle -p "Write a concise architecture note for the storage adapters" --file src/storage/README.md
 # Multi-model API run
-npx @steipete/oracle -p "Cross-check the data layer assumptions" --models gpt-5.1-pro,gemini-3-pro --file "src/**/*.ts"
+npx -y @steipete/oracle -p "Cross-check the data layer assumptions" --models gpt-5.1-pro,gemini-3-pro --file "src/**/*.ts"
 # Preview without spending tokens
-npx @steipete/oracle --dry-run summary -p "Check release notes" --file docs/release-notes.md
+npx -y @steipete/oracle --dry-run summary -p "Check release notes" --file docs/release-notes.md
 # Browser run (no API key, will open ChatGPT)
-npx @steipete/oracle --engine browser -p "Walk through the UI smoke test" --file "src/**/*.ts"
+npx -y @steipete/oracle --engine browser -p "Walk through the UI smoke test" --file "src/**/*.ts"
+# Gemini browser mode (no API key; uses Chrome cookies from gemini.google.com)
+npx -y @steipete/oracle --engine browser --model gemini-3-pro --prompt "a cute robot holding a banana" --generate-image out.jpg --aspect 1:1
 # Sessions (list and replay)
-npx @steipete/oracle status --hours 72
-npx @steipete/oracle session <id> --render
+npx -y @steipete/oracle status --hours 72
+npx -y @steipete/oracle session <id> --render
 # TUI (interactive, only for humans)
-npx @steipete/oracle tui
+npx -y @steipete/oracle tui
 ```
 Engine auto-picks API when `OPENAI_API_KEY` is set, otherwise browser; browser is stable on macOS and works on Linux and Windows. On Linux pass `--browser-chrome-path/--browser-cookie-path` if detection fails; on Windows prefer `--browser-manual-login` or inline cookies if decryption is blocked.
@@ -49,6 +52,8 @@ Engine auto-picks API when `OPENAI_API_KEY` is set, otherwise browser; browser i
 **CLI**
 - API mode expects API keys in your environment: `OPENAI_API_KEY` (GPT-5.x), `GEMINI_API_KEY` (Gemini 3 Pro), `ANTHROPIC_API_KEY` (Claude Sonnet 4.5 / Opus 4.1).
+- Gemini browser mode uses Chrome cookies instead of an API key—just be logged into `gemini.google.com` in Chrome (no Python/venv required).
+- If your Gemini account can’t access “Pro”, Oracle auto-falls back to a supported model for web runs (and logs the fallback in verbose mode).
 - Prefer API mode or `--copy` + manual paste; browser automation is experimental.
 - Browser support: stable on macOS; works on Linux (add `--browser-chrome-path/--browser-cookie-path` when needed) and Windows (manual-login or inline cookies recommended when app-bound cookies block decryption).
 - Remote browser service: `oracle serve` on a signed-in host; clients use `--remote-host/--remote-token`.
@@ -109,6 +114,9 @@ npx -y @steipete/oracle oracle-mcp
 | `--dry-run [summary\|json\|full]` | Preview without sending. |
 | `--remote-host`, `--remote-token` | Use a remote `oracle serve` host (browser). |
 | `--remote-chrome <host:port>` | Attach to an existing remote Chrome session (browser). |
+| `--youtube <url>` | YouTube video URL to analyze (Gemini browser mode). |
+| `--generate-image <file>` | Generate image and save to file (Gemini browser mode). |
+| `--edit-image <file>` | Edit existing image with `--output` (Gemini browser mode). |
 | `--azure-endpoint`, `--azure-deployment`, `--azure-api-version` | Target Azure OpenAI endpoints (picks Azure client automatically). |
 ## Configuration

package/dist/bin/oracle-cli.js CHANGED Viewed

@@ -18,6 +18,7 @@ import { DEFAULT_MODEL, MODEL_CONFIGS, readFiles, estimateRequestTokens, buildRe
 import { isKnownModel } from '../src/oracle/modelResolver.js';
 import { CHATGPT_URL } from '../src/browserMode.js';
 import { createRemoteBrowserExecutor } from '../src/remote/client.js';
+import { createGeminiWebExecutor } from '../src/gemini-web/index.js';
 import { applyHelpStyling } from '../src/cli/help.js';
 import { collectPaths, collectModelList, parseFloatOption, parseIntOption, parseSearchOption, usesDefaultStatusFilters, resolvePreviewMode, normalizeModelOption, normalizeBaseUrl, resolveApiModel, inferModelFromLabel, parseHeartbeatOption, parseTimeoutOption, mergePathLikeOptions, } from '../src/cli/options.js';
 import { copyToClipboard } from '../src/cli/clipboard.js';
@@ -26,6 +27,7 @@ import { shouldDetachSession } from '../src/cli/detach.js';
 import { applyHiddenAliases } from '../src/cli/hiddenAliases.js';
 import { buildBrowserConfig, resolveBrowserModelLabel } from '../src/cli/browserConfig.js';
 import { performSessionRun } from '../src/cli/sessionRunner.js';
+import { isMediaFile } from '../src/browser/prompt.js';
 import { attachSession, showStatus, formatCompletionSummary } from '../src/cli/sessionDisplay.js';
 import { formatCompactNumber } from '../src/cli/format.js';
 import { formatIntroLine } from '../src/cli/tagline.js';
@@ -85,7 +87,7 @@ program.hook('preAction', (thisCommand) => {
 });
 program
     .name('oracle')
-    .description('One-shot GPT-5.1 Pro / GPT-5.1 / GPT-5.1 Codex tool for hard questions that benefit from large file context and server-side search.')
+    .description('One-shot GPT-5.2 Pro / GPT-5.2 / GPT-5.1 Codex tool for hard questions that benefit from large file context and server-side search.')
     .version(VERSION)
     .argument('[prompt]', 'Prompt text (shorthand for --prompt).')
     .option('-p, --prompt <text>', 'User prompt to send to the model.')
@@ -110,18 +112,18 @@ program
     .addOption(new Option('--copy-markdown', 'Copy the assembled markdown bundle to the clipboard; pair with --render to print it too.').default(false))
     .addOption(new Option('--copy').hideHelp().default(false))
     .option('-s, --slug <words>', 'Custom session slug (3-5 words).')
-    .option('-m, --model <model>', 'Model to target (gpt-5.1-pro default; aliases to gpt-5.2-pro on API. Also gpt-5-pro, gpt-5.1, gpt-5.1-codex API-only, gpt-5.2, gpt-5.2-instant, gpt-5.2-pro, gemini-3-pro, claude-4.5-sonnet, claude-4.1-opus, or ChatGPT labels like "5.2 Thinking" for browser runs).', normalizeModelOption)
-    .addOption(new Option('--models <models>', 'Comma-separated API model list to query in parallel (e.g., "gpt-5.1-pro,gemini-3-pro").')
+    .option('-m, --model <model>', 'Model to target (gpt-5.2-pro default; also supports gpt-5.1-pro alias). Also gpt-5-pro, gpt-5.1, gpt-5.1-codex API-only, gpt-5.2, gpt-5.2-instant, gpt-5.2-pro, gemini-3-pro, claude-4.5-sonnet, claude-4.1-opus, or ChatGPT labels like "5.2 Thinking" for browser runs).', normalizeModelOption)
+    .addOption(new Option('--models <models>', 'Comma-separated API model list to query in parallel (e.g., "gpt-5.2-pro,gemini-3-pro").')
     .argParser(collectModelList)
     .default([]))
-    .addOption(new Option('-e, --engine <mode>', 'Execution engine (api | browser). Engine is preferred; --mode is a legacy alias. If omitted, oracle picks api when OPENAI_API_KEY is set, otherwise browser.').choices(['api', 'browser']))
+    .addOption(new Option('-e, --engine <mode>', 'Execution engine (api | browser). Browser engine: GPT models automate ChatGPT; Gemini models use a cookie-based client for gemini.google.com. If omitted, oracle picks api when OPENAI_API_KEY is set, otherwise browser.').choices(['api', 'browser']))
     .addOption(new Option('--mode <mode>', 'Alias for --engine (api | browser).').choices(['api', 'browser']).hideHelp())
     .option('--files-report', 'Show token usage per attached file (also prints automatically when files exceed the token budget).', false)
     .option('-v, --verbose', 'Enable verbose logging for all operations.', false)
     .addOption(new Option('--[no-]notify', 'Desktop notification when a session finishes (default on unless CI/SSH).')
     .default(undefined))
     .addOption(new Option('--[no-]notify-sound', 'Play a notification sound on completion (default off).').default(undefined))
-    .addOption(new Option('--timeout <seconds|auto>', 'Overall timeout before aborting the API call (auto = 60m for gpt-5.1-pro, 120s otherwise).')
+    .addOption(new Option('--timeout <seconds|auto>', 'Overall timeout before aborting the API call (auto = 60m for gpt-5.2-pro, 120s otherwise).')
     .argParser(parseTimeoutOption)
     .default('auto'))
     .addOption(new Option('--preview [mode]', '(alias) Preview the request without calling the model (summary | json | full). Deprecated: use --dry-run instead.')
@@ -182,6 +184,12 @@ program
     .addOption(new Option('--remote-token <token>', 'Access token for the remote `oracle serve` instance.'))
     .addOption(new Option('--browser-inline-files', 'Alias for --browser-attachments never (force pasting file contents inline).').default(false))
     .addOption(new Option('--browser-bundle-files', 'Bundle all attachments into a single archive before uploading.').default(false))
+    .addOption(new Option('--youtube <url>', 'YouTube video URL to analyze (Gemini web/cookie mode only; uses your signed-in Chrome cookies for gemini.google.com).'))
+    .addOption(new Option('--generate-image <file>', 'Generate image and save to file (Gemini web/cookie mode only; requires gemini.google.com Chrome cookies).'))
+    .addOption(new Option('--edit-image <file>', 'Edit existing image (use with --output, Gemini web/cookie mode only).'))
+    .addOption(new Option('--output <file>', 'Output file path for image operations (Gemini web/cookie mode only).'))
+    .addOption(new Option('--aspect <ratio>', 'Aspect ratio for image generation: 16:9, 1:1, 4:3, 3:4 (Gemini web/cookie mode only).'))
+    .addOption(new Option('--gemini-show-thoughts', 'Display Gemini thinking process (Gemini web/cookie mode only).').default(false))
     .option('--retain-hours <hours>', 'Prune stored sessions older than this many hours before running (set 0 to disable).', parseFloatOption)
     .option('--force', 'Force start a new session even if an identical prompt is already running.', false)
     .option('--debug-help', 'Show the advanced/debug option set and exit.', false)
@@ -512,18 +520,13 @@ async function runRootCommand(options) {
     const isCodex = primaryModelCandidate.startsWith('gpt-5.1-codex');
     const isClaude = primaryModelCandidate.startsWith('claude');
     const userForcedBrowser = options.browser || options.engine === 'browser';
-    const hasNonGptBrowserTarget = (engine === 'browser' || userForcedBrowser) &&
+    const isBrowserCompatible = (model) => model.startsWith('gpt-') || model.startsWith('gemini');
+    const hasNonBrowserCompatibleTarget = (engine === 'browser' || userForcedBrowser) &&
         (normalizedMultiModels.length > 0
-            ? normalizedMultiModels.some((model) => !model.startsWith('gpt-'))
-            : !resolvedModelCandidate.startsWith('gpt-'));
-    if (hasNonGptBrowserTarget) {
-        throw new Error('Browser engine only supports GPT-series ChatGPT models. Re-run with --engine api for Grok, Claude, Gemini, or other non-GPT models.');
-    }
-    if (isGemini && userForcedBrowser) {
-        throw new Error('Gemini is only supported via API. Use --engine api.');
-    }
-    if (isGemini && engine === 'browser') {
-        engine = 'api';
+            ? normalizedMultiModels.some((model) => !isBrowserCompatible(model))
+            : !isBrowserCompatible(resolvedModelCandidate));
+    if (hasNonBrowserCompatibleTarget) {
+        throw new Error('Browser engine only supports GPT and Gemini models. Re-run with --engine api for Grok, Claude, or other models.');
     }
     if (isClaude && engine === 'browser') {
         console.log(chalk.dim('Browser engine is not supported for Claude models; switching to API.'));
@@ -672,7 +675,11 @@ async function runRootCommand(options) {
         return;
     }
     if (options.file && options.file.length > 0) {
-        await readFiles(options.file, { cwd: process.cwd() });
+        const isBrowserMode = engine === 'browser' || userForcedBrowser;
+        const filesToValidate = isBrowserMode ? options.file.filter((f) => !isMediaFile(f)) : options.file;
+        if (filesToValidate.length > 0) {
+            await readFiles(filesToValidate, { cwd: process.cwd() });
+        }
     }
     const getSource = (key) => program.getOptionValueSource?.(key) ?? undefined;
     applyBrowserDefaultsFromConfig(options, userConfig, getSource);
@@ -698,6 +705,19 @@ async function runRootCommand(options) {
         };
         console.log(chalk.dim(`Routing browser automation to remote host ${remoteHost}`));
     }
+    else if (browserConfig && resolvedModel.startsWith('gemini')) {
+        browserDeps = {
+            executeBrowser: createGeminiWebExecutor({
+                youtube: options.youtube,
+                generateImage: options.generateImage,
+                editImage: options.editImage,
+                outputPath: options.output,
+                aspectRatio: options.aspect,
+                showThoughts: options.geminiShowThoughts,
+            }),
+        };
+        console.log(chalk.dim('Using Gemini web client for browser automation'));
+    }
     const remoteExecutionActive = Boolean(browserDeps);
     if (options.dryRun) {
         const baseRunOptions = buildRunOptions(resolvedOptions, {

package/dist/src/browser/actions/assistantResponse.js CHANGED Viewed

@@ -218,6 +218,8 @@ async function isCompletionVisible(Runtime) {
         const ASSISTANT_SELECTOR = '${ASSISTANT_ROLE_SELECTOR}';
         const isAssistantTurn = (node) => {
           if (!(node instanceof HTMLElement)) return false;
+          const turnAttr = (node.getAttribute('data-turn') || node.dataset?.turn || '').toLowerCase();
+          if (turnAttr === 'assistant') return true;
           const role = (node.getAttribute('data-message-author-role') || node.dataset?.messageAuthorRole || '').toLowerCase();
           if (role === 'assistant') return true;
           const testId = (node.getAttribute('data-testid') || '').toLowerCase();
@@ -257,6 +259,12 @@ function normalizeAssistantSnapshot(snapshot) {
     if (!text.trim()) {
         return null;
     }
+    const normalized = text.toLowerCase();
+    // "Pro thinking" often renders a placeholder turn containing an "Answer now" gate.
+    // Treat it as incomplete so browser mode keeps waiting (and can click the gate).
+    if (normalized.includes('answer now') && (normalized.includes('pro thinking') || normalized.includes('chatgpt said'))) {
+        return null;
+    }
     return {
         text,
         html: snapshot?.html ?? undefined,
@@ -295,10 +303,13 @@ function buildResponseObserverExpression(timeoutMs) {
     const CONVERSATION_SELECTOR = ${conversationLiteral};
     const ASSISTANT_SELECTOR = ${assistantLiteral};
     const settleDelayMs = 800;
+    const ANSWER_NOW_LABEL = 'answer now';
     // Helper to detect assistant turns - matches buildAssistantExtractor logic
     const isAssistantTurn = (node) => {
       if (!(node instanceof HTMLElement)) return false;
+      const turnAttr = (node.getAttribute('data-turn') || node.dataset?.turn || '').toLowerCase();
+      if (turnAttr === 'assistant') return true;
       const role = (node.getAttribute('data-message-author-role') || node.dataset?.messageAuthorRole || '').toLowerCase();
       if (role === 'assistant') return true;
       const testId = (node.getAttribute('data-testid') || '').toLowerCase();
@@ -330,6 +341,11 @@ function buildResponseObserverExpression(timeoutMs) {
         });
         observer.observe(document.body, { childList: true, subtree: true, characterData: true });
         stopInterval = setInterval(() => {
+          // Pro thinking can gate the response behind an "Answer now" button. Keep clicking it while present.
+          const answerNow = Array.from(document.querySelectorAll('button,span')).find((el) => (el?.textContent || '').trim().toLowerCase() === ANSWER_NOW_LABEL);
+          if (answerNow) {
+            dispatchClickSequence(answerNow.closest('button') ?? answerNow);
+          }
           const stop = document.querySelector(STOP_SELECTOR);
           if (!stop) {
             return;
@@ -382,9 +398,10 @@ function buildResponseObserverExpression(timeoutMs) {
           lastLength = refreshed.text?.length ?? lastLength;
         }
         const stopVisible = Boolean(document.querySelector(STOP_SELECTOR));
+        const answerNowVisible = Boolean(Array.from(document.querySelectorAll('button,span')).find((el) => (el?.textContent || '').trim().toLowerCase() === ANSWER_NOW_LABEL));
         const finishedVisible = isLastAssistantTurnFinished();
-        if (!stopVisible || finishedVisible) {
+        if ((!stopVisible && !answerNowVisible) || finishedVisible) {
           break;
         }
       }
@@ -407,6 +424,10 @@ function buildAssistantExtractor(functionName) {
     const ASSISTANT_SELECTOR = ${assistantLiteral};
     const isAssistantTurn = (node) => {
       if (!(node instanceof HTMLElement)) return false;
+      const turnAttr = (node.getAttribute('data-turn') || node.dataset?.turn || '').toLowerCase();
+      if (turnAttr === 'assistant') {
+        return true;
+      }
       const role = (node.getAttribute('data-message-author-role') || node.dataset?.messageAuthorRole || '').toLowerCase();
       if (role === 'assistant') {
         return true;
@@ -443,11 +464,13 @@ function buildAssistantExtractor(functionName) {
       }
       const messageRoot = turn.querySelector(ASSISTANT_SELECTOR) ?? turn;
       expandCollapsibles(messageRoot);
-      const preferred =
-        messageRoot.querySelector('.markdown') ||
-        messageRoot.querySelector('[data-message-content]') ||
-        messageRoot;
-      const text = preferred?.innerText ?? '';
+      const preferred = messageRoot.querySelector('.markdown') || messageRoot.querySelector('[data-message-content]');
+      if (!preferred) {
+        continue;
+      }
+      const innerText = preferred?.innerText ?? '';
+      const textContent = preferred?.textContent ?? '';
+      const text = innerText.trim().length > 0 ? innerText : textContent;
       const html = preferred?.innerHTML ?? '';
       const messageId = messageRoot.getAttribute('data-message-id');
       const turnId = messageRoot.getAttribute('data-testid');
@@ -462,7 +485,7 @@ function buildCopyExpression(meta) {
     return `(() => {
     ${buildClickDispatcher()}
     const BUTTON_SELECTOR = '${COPY_BUTTON_SELECTOR}';
-    const TIMEOUT_MS = 5000;
+    const TIMEOUT_MS = 10000;
     const locateButton = () => {
       const hint = ${JSON.stringify(meta ?? {})};
@@ -526,53 +549,62 @@ function buildCopyExpression(meta) {
     };
     return new Promise((resolve) => {
-      const button = locateButton();
-      if (!button) {
-        resolve({ success: false, status: 'missing-button' });
-        return;
-      }
-      const interception = interceptClipboard();
-      let settled = false;
-      let pollId = null;
-      let timeoutId = null;
-      const finish = (payload) => {
-        if (settled) {
+      const deadline = Date.now() + TIMEOUT_MS;
+      const waitForButton = () => {
+        const button = locateButton();
+        if (button) {
+          const interception = interceptClipboard();
+          let settled = false;
+          let pollId = null;
+          let timeoutId = null;
+          const finish = (payload) => {
+            if (settled) {
+              return;
+            }
+            settled = true;
+            if (pollId) {
+              clearInterval(pollId);
+            }
+            if (timeoutId) {
+              clearTimeout(timeoutId);
+            }
+            button.removeEventListener('copy', handleCopy, true);
+            interception.restore?.();
+            resolve(payload);
+          };
+          const readIntercepted = () => {
+            const markdown = interception.state.text ?? '';
+            return { success: Boolean(markdown.trim()), markdown };
+          };
+          const handleCopy = () => {
+            finish(readIntercepted());
+          };
+          button.addEventListener('copy', handleCopy, true);
+          button.scrollIntoView({ block: 'center', behavior: 'instant' });
+          dispatchClickSequence(button);
+          pollId = setInterval(() => {
+            const payload = readIntercepted();
+            if (payload.success) {
+              finish(payload);
+            }
+          }, 100);
+          timeoutId = setTimeout(() => {
+            button.removeEventListener('copy', handleCopy, true);
+            finish({ success: false, status: 'timeout' });
+          }, TIMEOUT_MS);
           return;
         }
-        settled = true;
-        if (pollId) {
-          clearInterval(pollId);
-        }
-        if (timeoutId) {
-          clearTimeout(timeoutId);
+        if (Date.now() > deadline) {
+          resolve({ success: false, status: 'missing-button' });
+          return;
         }
-        button.removeEventListener('copy', handleCopy, true);
-        interception.restore?.();
-        resolve(payload);
-      };
-      const readIntercepted = () => {
-        const markdown = interception.state.text ?? '';
-        return { success: Boolean(markdown.trim()), markdown };
+        setTimeout(waitForButton, 120);
       };
-      const handleCopy = () => {
-        finish(readIntercepted());
-      };
-      button.addEventListener('copy', handleCopy, true);
-      button.scrollIntoView({ block: 'center', behavior: 'instant' });
-      dispatchClickSequence(button);
-      pollId = setInterval(() => {
-        const payload = readIntercepted();
-        if (payload.success) {
-          finish(payload);
-        }
-      }, 100);
-      timeoutId = setTimeout(() => {
-        button.removeEventListener('copy', handleCopy, true);
-        finish({ success: false, status: 'timeout' });
-      }, TIMEOUT_MS);
+      waitForButton();
     });
   })()`;
 }

package/dist/src/browser/actions/attachments.js CHANGED Viewed

@@ -214,8 +214,33 @@ export async function waitForAttachmentCompletion(Runtime, timeoutMs, expectedNa
         const { result } = await Runtime.evaluate({ expression, returnByValue: true });
         const value = result?.value;
         if (value && !value.uploading) {
-            const attached = new Set((value.attachedNames ?? []).map((name) => name.toLowerCase()));
-            const missing = expectedNormalized.filter((name) => !attached.has(name));
+            const attachedNames = (value.attachedNames ?? [])
+                .map((name) => name.toLowerCase().replace(/\s+/g, ' ').trim())
+                .filter(Boolean);
+            const matchesExpected = (expected) => {
+                const baseName = expected.split('/').pop()?.split('\\').pop() ?? expected;
+                const normalizedExpected = baseName.toLowerCase().replace(/\s+/g, ' ').trim();
+                const expectedNoExt = normalizedExpected.replace(/\.[a-z0-9]{1,10}$/i, '');
+                return attachedNames.some((raw) => {
+                    if (raw.includes(normalizedExpected))
+                        return true;
+                    if (expectedNoExt.length >= 6 && raw.includes(expectedNoExt))
+                        return true;
+                    if (raw.includes('…') || raw.includes('...')) {
+                        const escaped = raw.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+                        const pattern = escaped.replace(/\\…|\\\.\\\.\\\./g, '.*');
+                        try {
+                            const re = new RegExp(pattern);
+                            return re.test(normalizedExpected) || (expectedNoExt.length >= 6 && re.test(expectedNoExt));
+                        }
+                        catch {
+                            return false;
+                        }
+                    }
+                    return false;
+                });
+            };
+            const missing = expectedNormalized.filter((expected) => !matchesExpected(expected));
             if (missing.length === 0) {
                 if (value.state === 'ready') {
                     return;
@@ -223,6 +248,11 @@ export async function waitForAttachmentCompletion(Runtime, timeoutMs, expectedNa
                 if (value.state === 'missing' && value.filesAttached) {
                     return;
                 }
+                // If files are attached but button isn't ready yet, give it more time but don't fail immediately
+                if (value.filesAttached) {
+                    await delay(500);
+                    continue;
+                }
             }
         }
         await delay(250);
@@ -249,7 +279,11 @@ export async function waitForAttachmentVisible(Runtime, expectedName, timeoutMs,
     };
     const turns = Array.from(document.querySelectorAll('article[data-testid^="conversation-turn"]'));
-    const userTurns = turns.filter((node) => node.querySelector('[data-message-author-role="user"]'));
+    const userTurns = turns.filter((node) => {
+      const turnAttr = (node.getAttribute('data-turn') || node.dataset?.turn || '').toLowerCase();
+      if (turnAttr === 'user') return true;
+      return Boolean(node.querySelector('[data-message-author-role="user"]'));
+    });
     const lastUser = userTurns[userTurns.length - 1];
     if (lastUser) {
       const turnMatch = Array.from(lastUser.querySelectorAll('*')).some(matchNode);

package/dist/src/browser/actions/modelSelection.js CHANGED Viewed

@@ -63,12 +63,41 @@ function buildModelSelectionExpression(targetModel) {
       .map((token) => normalizeText(token))
       .filter(Boolean);
     const targetWords = normalizedTarget.split(' ').filter(Boolean);
+    const desiredVersion = normalizedTarget.includes('5 2')
+      ? '5-2'
+      : normalizedTarget.includes('5 1')
+        ? '5-1'
+        : normalizedTarget.includes('5 0')
+          ? '5-0'
+          : null;
+    const wantsPro = normalizedTarget.includes(' pro') || normalizedTarget.endsWith(' pro') || normalizedTokens.includes('pro');
+    const wantsInstant = normalizedTarget.includes('instant');
+    const wantsThinking = normalizedTarget.includes('thinking');
     const button = document.querySelector(BUTTON_SELECTOR);
     if (!button) {
       return { status: 'button-missing' };
     }
+    const getButtonLabel = () => (button.textContent ?? '').trim();
+    const buttonMatchesTarget = () => {
+      const normalizedLabel = normalizeText(getButtonLabel());
+      if (!normalizedLabel) return false;
+      if (desiredVersion) {
+        if (desiredVersion === '5-2' && !normalizedLabel.includes('5 2')) return false;
+        if (desiredVersion === '5-1' && !normalizedLabel.includes('5 1')) return false;
+        if (desiredVersion === '5-0' && !normalizedLabel.includes('5 0')) return false;
+      }
+      if (wantsPro && !normalizedLabel.includes(' pro')) return false;
+      if (wantsInstant && !normalizedLabel.includes('instant')) return false;
+      if (wantsThinking && !normalizedLabel.includes('thinking')) return false;
+      return true;
+    };
+    if (buttonMatchesTarget()) {
+      return { status: 'already-selected', label: getButtonLabel() };
+    }
     let lastPointerClick = 0;
     const pointerClick = () => {
       if (dispatchClickSequence(button)) {
@@ -106,8 +135,46 @@ function buildModelSelectionExpression(targetModel) {
       }
       let score = 0;
       const normalizedTestId = (testid ?? '').toLowerCase();
-      if (normalizedTestId && TEST_IDS.some((id) => normalizedTestId.includes(id))) {
-        score += 1000;
+      if (normalizedTestId) {
+        if (desiredVersion) {
+          // data-testid strings have been observed with both dotted and dashed versions (e.g. gpt-5.2-pro vs gpt-5-2-pro).
+          const has52 =
+            normalizedTestId.includes('5-2') ||
+            normalizedTestId.includes('5.2') ||
+            normalizedTestId.includes('gpt-5-2') ||
+            normalizedTestId.includes('gpt-5.2') ||
+            normalizedTestId.includes('gpt52');
+          const has51 =
+            normalizedTestId.includes('5-1') ||
+            normalizedTestId.includes('5.1') ||
+            normalizedTestId.includes('gpt-5-1') ||
+            normalizedTestId.includes('gpt-5.1') ||
+            normalizedTestId.includes('gpt51');
+          const has50 =
+            normalizedTestId.includes('5-0') ||
+            normalizedTestId.includes('5.0') ||
+            normalizedTestId.includes('gpt-5-0') ||
+            normalizedTestId.includes('gpt-5.0') ||
+            normalizedTestId.includes('gpt50');
+          const candidateVersion = has52 ? '5-2' : has51 ? '5-1' : has50 ? '5-0' : null;
+          // If a candidate advertises a different version, ignore it entirely.
+          if (candidateVersion && candidateVersion !== desiredVersion) {
+            return 0;
+          }
+          // When targeting an explicit version, avoid selecting submenu wrappers that can contain legacy models.
+          if (normalizedTestId.includes('submenu') && candidateVersion === null) {
+            return 0;
+          }
+        }
+        const matches = TEST_IDS.filter((id) => id && normalizedTestId.includes(id));
+        if (matches.length > 0) {
+          // Prefer the most specific match (longest token) instead of treating any hit as equal.
+          // This prevents generic tokens (e.g. "pro") from outweighing version-specific targets.
+          const best = matches.reduce((acc, token) => (token.length > acc.length ? token : acc), '');
+          score += 200 + Math.min(900, best.length * 25);
+          if (best.startsWith('model-switcher-')) score += 120;
+          if (best.includes('gpt-')) score += 60;
+        }
       }
       if (normalizedText && normalizedTarget) {
         if (normalizedText === normalizedTarget) {
@@ -134,6 +201,14 @@ function buildModelSelectionExpression(targetModel) {
         }
         score -= missing * 12;
       }
+      // If the caller didn't explicitly ask for Pro, prefer non-Pro options when both exist.
+      if (wantsPro) {
+        if (!normalizedText.includes(' pro')) {
+          score -= 80;
+        }
+      } else if (normalizedText.includes(' pro')) {
+        score -= 40;
+      }
       return Math.max(score, 0);
     };
@@ -153,7 +228,7 @@ function buildModelSelectionExpression(targetModel) {
           }
           const label = getOptionLabel(option);
           if (!bestMatch || score > bestMatch.score) {
-            bestMatch = { node: option, label, score };
+            bestMatch = { node: option, label, score, testid, normalizedText };
           }
         }
       }
@@ -182,11 +257,25 @@ function buildModelSelectionExpression(targetModel) {
         const match = findBestOption();
         if (match) {
           if (optionIsSelected(match.node)) {
-            resolve({ status: 'already-selected', label: match.label });
+            resolve({ status: 'already-selected', label: getButtonLabel() || match.label });
             return;
           }
           dispatchClickSequence(match.node);
-          resolve({ status: 'switched', label: match.label });
+          // Submenus (e.g. "Legacy models") need a second pass to pick the actual model option.
+          // Keep scanning once the submenu opens instead of treating the submenu click as a final switch.
+          const isSubmenu = (match.testid ?? '').toLowerCase().includes('submenu');
+          if (isSubmenu) {
+            setTimeout(attempt, REOPEN_INTERVAL_MS / 2);
+            return;
+          }
+          // Wait for the top bar label to reflect the requested model; otherwise keep scanning.
+          setTimeout(() => {
+            if (buttonMatchesTarget()) {
+              resolve({ status: 'switched', label: getButtonLabel() || match.label });
+              return;
+            }
+            attempt();
+          }, Math.max(120, INITIAL_WAIT_MS));
           return;
         }
         if (performance.now() - start > MAX_WAIT_MS) {

package/dist/src/browser/actions/promptComposer.js CHANGED Viewed

@@ -283,20 +283,28 @@ async function verifyPromptCommitted(Runtime, prompt, timeoutMs, logger) {
     const primarySelectorLiteral = JSON.stringify(PROMPT_PRIMARY_SELECTOR);
     const fallbackSelectorLiteral = JSON.stringify(PROMPT_FALLBACK_SELECTOR);
     const script = `(() => {
-    const editor = document.querySelector(${primarySelectorLiteral});
-    const fallback = document.querySelector(${fallbackSelectorLiteral});
-    const normalize = (value) => value?.toLowerCase?.().replace(/\\s+/g, ' ').trim() ?? '';
-    const normalizedPrompt = normalize(${encodedPrompt});
-    const normalizedPromptPrefix = normalizedPrompt.slice(0, 120);
-    const CONVERSATION_SELECTOR = ${JSON.stringify(CONVERSATION_TURN_SELECTOR)};
-    const articles = Array.from(document.querySelectorAll(CONVERSATION_SELECTOR));
-    const normalizedTurns = articles.map((node) => normalize(node?.innerText));
-    const userMatched = normalizedTurns.some((text) => text.includes(normalizedPrompt));
-    const prefixMatched =
-      normalizedPromptPrefix.length > 30 &&
-      normalizedTurns.some((text) => text.includes(normalizedPromptPrefix));
-    const lastTurn = normalizedTurns[normalizedTurns.length - 1] ?? '';
-    return {
+	    const editor = document.querySelector(${primarySelectorLiteral});
+	    const fallback = document.querySelector(${fallbackSelectorLiteral});
+	    const normalize = (value) => {
+	      let text = value?.toLowerCase?.() ?? '';
+	      // Strip markdown *markers* but keep content (ChatGPT renders fence markers differently).
+	      text = text.replace(/\`\`\`[^\\n]*\\n([\\s\\S]*?)\`\`\`/g, ' $1 ');
+	      text = text.replace(/\`\`\`/g, ' ');
+	      text = text.replace(/\`([^\`]*)\`/g, '$1');
+	      return text.replace(/\\s+/g, ' ').trim();
+	    };
+	    const normalizedPrompt = normalize(${encodedPrompt});
+	    const normalizedPromptPrefix = normalizedPrompt.slice(0, 120);
+	    const CONVERSATION_SELECTOR = ${JSON.stringify(CONVERSATION_TURN_SELECTOR)};
+	    const articles = Array.from(document.querySelectorAll(CONVERSATION_SELECTOR));
+	    const normalizedTurns = articles.map((node) => normalize(node?.innerText));
+	    const userMatched =
+	      normalizedPrompt.length > 0 && normalizedTurns.some((text) => text.includes(normalizedPrompt));
+	    const prefixMatched =
+	      normalizedPromptPrefix.length > 30 &&
+	      normalizedTurns.some((text) => text.includes(normalizedPromptPrefix));
+	    const lastTurn = normalizedTurns[normalizedTurns.length - 1] ?? '';
+	    return {
       userMatched,
       prefixMatched,
       fallbackValue: fallback?.value ?? '',