npm - @steipete/oracle - Versions diffs - 0.6.0 → 0.7.0 - Mend

@steipete/oracle 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/README.md CHANGED Viewed

@@ -21,26 +21,29 @@ Use `npx -y @steipete/oracle …` (not `pnpx`)—pnpx's sandboxed cache can’t
 ```bash
 # Copy the bundle and paste into ChatGPT
-npx @steipete/oracle --render --copy -p "Review the TS data layer for schema drift" --file "src/**/*.ts,*/*.test.ts"
+npx -y @steipete/oracle --render --copy -p "Review the TS data layer for schema drift" --file "src/**/*.ts,*/*.test.ts"
 # Minimal API run (expects OPENAI_API_KEY in your env)
-npx @steipete/oracle -p "Write a concise architecture note for the storage adapters" --file src/storage/README.md
+npx -y @steipete/oracle -p "Write a concise architecture note for the storage adapters" --file src/storage/README.md
 # Multi-model API run
-npx @steipete/oracle -p "Cross-check the data layer assumptions" --models gpt-5.1-pro,gemini-3-pro --file "src/**/*.ts"
+npx -y @steipete/oracle -p "Cross-check the data layer assumptions" --models gpt-5.1-pro,gemini-3-pro --file "src/**/*.ts"
 # Preview without spending tokens
-npx @steipete/oracle --dry-run summary -p "Check release notes" --file docs/release-notes.md
+npx -y @steipete/oracle --dry-run summary -p "Check release notes" --file docs/release-notes.md
 # Browser run (no API key, will open ChatGPT)
-npx @steipete/oracle --engine browser -p "Walk through the UI smoke test" --file "src/**/*.ts"
+npx -y @steipete/oracle --engine browser -p "Walk through the UI smoke test" --file "src/**/*.ts"
+# Gemini browser mode (no API key; uses Chrome cookies from gemini.google.com)
+npx -y @steipete/oracle --engine browser --model gemini-3-pro --prompt "a cute robot holding a banana" --generate-image out.jpg --aspect 1:1
 # Sessions (list and replay)
-npx @steipete/oracle status --hours 72
-npx @steipete/oracle session <id> --render
+npx -y @steipete/oracle status --hours 72
+npx -y @steipete/oracle session <id> --render
 # TUI (interactive, only for humans)
-npx @steipete/oracle tui
+npx -y @steipete/oracle tui
 ```
 Engine auto-picks API when `OPENAI_API_KEY` is set, otherwise browser; browser is stable on macOS and works on Linux and Windows. On Linux pass `--browser-chrome-path/--browser-cookie-path` if detection fails; on Windows prefer `--browser-manual-login` or inline cookies if decryption is blocked.
@@ -49,6 +52,8 @@ Engine auto-picks API when `OPENAI_API_KEY` is set, otherwise browser; browser i
 **CLI**
 - API mode expects API keys in your environment: `OPENAI_API_KEY` (GPT-5.x), `GEMINI_API_KEY` (Gemini 3 Pro), `ANTHROPIC_API_KEY` (Claude Sonnet 4.5 / Opus 4.1).
+- Gemini browser mode uses Chrome cookies instead of an API key—just be logged into `gemini.google.com` in Chrome (no Python/venv required).
+- If your Gemini account can’t access “Pro”, Oracle auto-falls back to a supported model for web runs (and logs the fallback in verbose mode).
 - Prefer API mode or `--copy` + manual paste; browser automation is experimental.
 - Browser support: stable on macOS; works on Linux (add `--browser-chrome-path/--browser-cookie-path` when needed) and Windows (manual-login or inline cookies recommended when app-bound cookies block decryption).
 - Remote browser service: `oracle serve` on a signed-in host; clients use `--remote-host/--remote-token`.
@@ -109,6 +114,9 @@ npx -y @steipete/oracle oracle-mcp
 | `--dry-run [summary\|json\|full]` | Preview without sending. |
 | `--remote-host`, `--remote-token` | Use a remote `oracle serve` host (browser). |
 | `--remote-chrome <host:port>` | Attach to an existing remote Chrome session (browser). |
+| `--youtube <url>` | YouTube video URL to analyze (Gemini browser mode). |
+| `--generate-image <file>` | Generate image and save to file (Gemini browser mode). |
+| `--edit-image <file>` | Edit existing image with `--output` (Gemini browser mode). |
 | `--azure-endpoint`, `--azure-deployment`, `--azure-api-version` | Target Azure OpenAI endpoints (picks Azure client automatically). |
 ## Configuration

package/dist/bin/oracle-cli.js CHANGED Viewed

@@ -18,6 +18,7 @@ import { DEFAULT_MODEL, MODEL_CONFIGS, readFiles, estimateRequestTokens, buildRe
 import { isKnownModel } from '../src/oracle/modelResolver.js';
 import { CHATGPT_URL } from '../src/browserMode.js';
 import { createRemoteBrowserExecutor } from '../src/remote/client.js';
+import { createGeminiWebExecutor } from '../src/gemini-web/index.js';
 import { applyHelpStyling } from '../src/cli/help.js';
 import { collectPaths, collectModelList, parseFloatOption, parseIntOption, parseSearchOption, usesDefaultStatusFilters, resolvePreviewMode, normalizeModelOption, normalizeBaseUrl, resolveApiModel, inferModelFromLabel, parseHeartbeatOption, parseTimeoutOption, mergePathLikeOptions, } from '../src/cli/options.js';
 import { copyToClipboard } from '../src/cli/clipboard.js';
@@ -26,6 +27,7 @@ import { shouldDetachSession } from '../src/cli/detach.js';
 import { applyHiddenAliases } from '../src/cli/hiddenAliases.js';
 import { buildBrowserConfig, resolveBrowserModelLabel } from '../src/cli/browserConfig.js';
 import { performSessionRun } from '../src/cli/sessionRunner.js';
+import { isMediaFile } from '../src/browser/prompt.js';
 import { attachSession, showStatus, formatCompletionSummary } from '../src/cli/sessionDisplay.js';
 import { formatCompactNumber } from '../src/cli/format.js';
 import { formatIntroLine } from '../src/cli/tagline.js';
@@ -114,7 +116,7 @@ program
     .addOption(new Option('--models <models>', 'Comma-separated API model list to query in parallel (e.g., "gpt-5.1-pro,gemini-3-pro").')
     .argParser(collectModelList)
     .default([]))
-    .addOption(new Option('-e, --engine <mode>', 'Execution engine (api | browser). Engine is preferred; --mode is a legacy alias. If omitted, oracle picks api when OPENAI_API_KEY is set, otherwise browser.').choices(['api', 'browser']))
+    .addOption(new Option('-e, --engine <mode>', 'Execution engine (api | browser). Browser engine: GPT models automate ChatGPT; Gemini models use a cookie-based client for gemini.google.com. If omitted, oracle picks api when OPENAI_API_KEY is set, otherwise browser.').choices(['api', 'browser']))
     .addOption(new Option('--mode <mode>', 'Alias for --engine (api | browser).').choices(['api', 'browser']).hideHelp())
     .option('--files-report', 'Show token usage per attached file (also prints automatically when files exceed the token budget).', false)
     .option('-v, --verbose', 'Enable verbose logging for all operations.', false)
@@ -182,6 +184,12 @@ program
     .addOption(new Option('--remote-token <token>', 'Access token for the remote `oracle serve` instance.'))
     .addOption(new Option('--browser-inline-files', 'Alias for --browser-attachments never (force pasting file contents inline).').default(false))
     .addOption(new Option('--browser-bundle-files', 'Bundle all attachments into a single archive before uploading.').default(false))
+    .addOption(new Option('--youtube <url>', 'YouTube video URL to analyze (Gemini web/cookie mode only; uses your signed-in Chrome cookies for gemini.google.com).'))
+    .addOption(new Option('--generate-image <file>', 'Generate image and save to file (Gemini web/cookie mode only; requires gemini.google.com Chrome cookies).'))
+    .addOption(new Option('--edit-image <file>', 'Edit existing image (use with --output, Gemini web/cookie mode only).'))
+    .addOption(new Option('--output <file>', 'Output file path for image operations (Gemini web/cookie mode only).'))
+    .addOption(new Option('--aspect <ratio>', 'Aspect ratio for image generation: 16:9, 1:1, 4:3, 3:4 (Gemini web/cookie mode only).'))
+    .addOption(new Option('--gemini-show-thoughts', 'Display Gemini thinking process (Gemini web/cookie mode only).').default(false))
     .option('--retain-hours <hours>', 'Prune stored sessions older than this many hours before running (set 0 to disable).', parseFloatOption)
     .option('--force', 'Force start a new session even if an identical prompt is already running.', false)
     .option('--debug-help', 'Show the advanced/debug option set and exit.', false)
@@ -512,18 +520,13 @@ async function runRootCommand(options) {
     const isCodex = primaryModelCandidate.startsWith('gpt-5.1-codex');
     const isClaude = primaryModelCandidate.startsWith('claude');
     const userForcedBrowser = options.browser || options.engine === 'browser';
-    const hasNonGptBrowserTarget = (engine === 'browser' || userForcedBrowser) &&
+    const isBrowserCompatible = (model) => model.startsWith('gpt-') || model.startsWith('gemini');
+    const hasNonBrowserCompatibleTarget = (engine === 'browser' || userForcedBrowser) &&
         (normalizedMultiModels.length > 0
-            ? normalizedMultiModels.some((model) => !model.startsWith('gpt-'))
-            : !resolvedModelCandidate.startsWith('gpt-'));
-    if (hasNonGptBrowserTarget) {
-        throw new Error('Browser engine only supports GPT-series ChatGPT models. Re-run with --engine api for Grok, Claude, Gemini, or other non-GPT models.');
-    }
-    if (isGemini && userForcedBrowser) {
-        throw new Error('Gemini is only supported via API. Use --engine api.');
-    }
-    if (isGemini && engine === 'browser') {
-        engine = 'api';
+            ? normalizedMultiModels.some((model) => !isBrowserCompatible(model))
+            : !isBrowserCompatible(resolvedModelCandidate));
+    if (hasNonBrowserCompatibleTarget) {
+        throw new Error('Browser engine only supports GPT and Gemini models. Re-run with --engine api for Grok, Claude, or other models.');
     }
     if (isClaude && engine === 'browser') {
         console.log(chalk.dim('Browser engine is not supported for Claude models; switching to API.'));
@@ -672,7 +675,11 @@ async function runRootCommand(options) {
         return;
     }
     if (options.file && options.file.length > 0) {
-        await readFiles(options.file, { cwd: process.cwd() });
+        const isBrowserMode = engine === 'browser' || userForcedBrowser;
+        const filesToValidate = isBrowserMode ? options.file.filter((f) => !isMediaFile(f)) : options.file;
+        if (filesToValidate.length > 0) {
+            await readFiles(filesToValidate, { cwd: process.cwd() });
+        }
     }
     const getSource = (key) => program.getOptionValueSource?.(key) ?? undefined;
     applyBrowserDefaultsFromConfig(options, userConfig, getSource);
@@ -698,6 +705,19 @@ async function runRootCommand(options) {
         };
         console.log(chalk.dim(`Routing browser automation to remote host ${remoteHost}`));
     }
+    else if (browserConfig && resolvedModel.startsWith('gemini')) {
+        browserDeps = {
+            executeBrowser: createGeminiWebExecutor({
+                youtube: options.youtube,
+                generateImage: options.generateImage,
+                editImage: options.editImage,
+                outputPath: options.output,
+                aspectRatio: options.aspect,
+                showThoughts: options.geminiShowThoughts,
+            }),
+        };
+        console.log(chalk.dim('Using Gemini web client for browser automation'));
+    }
     const remoteExecutionActive = Boolean(browserDeps);
     if (options.dryRun) {
         const baseRunOptions = buildRunOptions(resolvedOptions, {

package/dist/src/browser/actions/assistantResponse.js CHANGED Viewed

@@ -183,7 +183,9 @@ async function pollAssistantCompletion(Runtime, timeoutMs) {
                 isStopButtonVisible(Runtime),
                 isCompletionVisible(Runtime),
             ]);
-            if (completionVisible || (!stopVisible && stableCycles >= requiredStableCycles)) {
+            // Require at least 2 stable cycles even when completion buttons are visible
+            // to ensure DOM text has fully rendered (buttons can appear before text settles)
+            if ((completionVisible && stableCycles >= 2) || (!stopVisible && stableCycles >= requiredStableCycles)) {
                 return normalized;
             }
         }
@@ -211,10 +213,36 @@ async function isCompletionVisible(Runtime) {
     try {
         const { result } = await Runtime.evaluate({
             expression: `(() => {
-        if (document.querySelector('${FINISHED_ACTIONS_SELECTOR}')) {
+        // Find the LAST assistant turn to check completion status
+        // Must match the same logic as buildAssistantExtractor for consistency
+        const ASSISTANT_SELECTOR = '${ASSISTANT_ROLE_SELECTOR}';
+        const isAssistantTurn = (node) => {
+          if (!(node instanceof HTMLElement)) return false;
+          const role = (node.getAttribute('data-message-author-role') || node.dataset?.messageAuthorRole || '').toLowerCase();
+          if (role === 'assistant') return true;
+          const testId = (node.getAttribute('data-testid') || '').toLowerCase();
+          if (testId.includes('assistant')) return true;
+          return Boolean(node.querySelector(ASSISTANT_SELECTOR) || node.querySelector('[data-testid*="assistant"]'));
+        };
+        const turns = Array.from(document.querySelectorAll('${CONVERSATION_TURN_SELECTOR}'));
+        let lastAssistantTurn = null;
+        for (let i = turns.length - 1; i >= 0; i--) {
+          if (isAssistantTurn(turns[i])) {
+            lastAssistantTurn = turns[i];
+            break;
+          }
+        }
+        if (!lastAssistantTurn) {
+          return false;
+        }
+        // Check if the last assistant turn has finished action buttons (copy, thumbs up/down, share)
+        if (lastAssistantTurn.querySelector('${FINISHED_ACTIONS_SELECTOR}')) {
           return true;
         }
-        return Array.from(document.querySelectorAll('.markdown')).some((n) => (n.textContent || '').trim() === 'Done');
+        // Also check for "Done" text in the last assistant turn's markdown
+        const markdowns = lastAssistantTurn.querySelectorAll('.markdown');
+        return Array.from(markdowns).some((n) => (n.textContent || '').trim() === 'Done');
       })()`,
             returnByValue: true,
         });
@@ -257,12 +285,27 @@ function buildAssistantSnapshotExpression() {
 }
 function buildResponseObserverExpression(timeoutMs) {
     const selectorsLiteral = JSON.stringify(ANSWER_SELECTORS);
+    const conversationLiteral = JSON.stringify(CONVERSATION_TURN_SELECTOR);
+    const assistantLiteral = JSON.stringify(ASSISTANT_ROLE_SELECTOR);
     return `(() => {
     ${buildClickDispatcher()}
     const SELECTORS = ${selectorsLiteral};
     const STOP_SELECTOR = '${STOP_BUTTON_SELECTOR}';
     const FINISHED_SELECTOR = '${FINISHED_ACTIONS_SELECTOR}';
+    const CONVERSATION_SELECTOR = ${conversationLiteral};
+    const ASSISTANT_SELECTOR = ${assistantLiteral};
     const settleDelayMs = 800;
+    // Helper to detect assistant turns - matches buildAssistantExtractor logic
+    const isAssistantTurn = (node) => {
+      if (!(node instanceof HTMLElement)) return false;
+      const role = (node.getAttribute('data-message-author-role') || node.dataset?.messageAuthorRole || '').toLowerCase();
+      if (role === 'assistant') return true;
+      const testId = (node.getAttribute('data-testid') || '').toLowerCase();
+      if (testId.includes('assistant')) return true;
+      return Boolean(node.querySelector(ASSISTANT_SELECTOR) || node.querySelector('[data-testid*="assistant"]'));
+    };
     ${buildAssistantExtractor('extractFromTurns')}
     const captureViaObserver = () =>
@@ -307,6 +350,24 @@ function buildResponseObserverExpression(timeoutMs) {
         }, ${timeoutMs});
       });
+    // Check if the last assistant turn has finished (scoped to avoid detecting old turns)
+    const isLastAssistantTurnFinished = () => {
+      const turns = Array.from(document.querySelectorAll(CONVERSATION_SELECTOR));
+      let lastAssistantTurn = null;
+      for (let i = turns.length - 1; i >= 0; i--) {
+        if (isAssistantTurn(turns[i])) {
+          lastAssistantTurn = turns[i];
+          break;
+        }
+      }
+      if (!lastAssistantTurn) return false;
+      // Check for action buttons in this specific turn
+      if (lastAssistantTurn.querySelector(FINISHED_SELECTOR)) return true;
+      // Check for "Done" text in this turn's markdown
+      const markdowns = lastAssistantTurn.querySelectorAll('.markdown');
+      return Array.from(markdowns).some((n) => (n.textContent || '').trim() === 'Done');
+    };
     const waitForSettle = async (snapshot) => {
       const settleWindowMs = 5000;
       const settleIntervalMs = 400;
@@ -321,9 +382,7 @@ function buildResponseObserverExpression(timeoutMs) {
           lastLength = refreshed.text?.length ?? lastLength;
         }
         const stopVisible = Boolean(document.querySelector(STOP_SELECTOR));
-        const finishedVisible =
-          Boolean(document.querySelector(FINISHED_SELECTOR)) ||
-          Array.from(document.querySelectorAll('.markdown')).some((n) => (n.textContent || '').trim() === 'Done');
+        const finishedVisible = isLastAssistantTurnFinished();
         if (!stopVisible || finishedVisible) {
           break;

package/dist/src/browser/constants.js CHANGED Viewed

@@ -1,5 +1,5 @@
 export const CHATGPT_URL = 'https://chatgpt.com/';
-export const DEFAULT_MODEL_TARGET = 'ChatGPT 5.1';
+export const DEFAULT_MODEL_TARGET = 'ChatGPT 5.2';
 export const COOKIE_URLS = ['https://chatgpt.com', 'https://chat.openai.com', 'https://atlas.openai.com'];
 export const INPUT_SELECTORS = [
     'textarea[data-id="prompt-textarea"]',

package/dist/src/browser/index.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { mkdtemp, rm, mkdir, readFile } from 'node:fs/promises';
+import { mkdtemp, rm, mkdir } from 'node:fs/promises';
 import path from 'node:path';
 import os from 'node:os';
 import net from 'node:net';
@@ -12,6 +12,7 @@ import { estimateTokenCount, withRetries, delay } from './utils.js';
 import { formatElapsed } from '../oracle/format.js';
 import { CHATGPT_URL } from './constants.js';
 import { BrowserAutomationError } from '../oracle/errors.js';
+import { cleanupStaleProfileState, readChromePid, readDevToolsPort, verifyDevToolsReachable, writeChromePid, writeDevToolsActivePort, } from './profileState.js';
 export { CHATGPT_URL, DEFAULT_MODEL_TARGET } from './constants.js';
 export { parseDuration, delay, normalizeChatgptUrl } from './utils.js';
 export async function runBrowserMode(options) {
@@ -98,6 +99,13 @@ export async function runBrowserMode(options) {
             remoteChrome: config.remoteChrome,
         }, userDataDir, logger));
     const chromeHost = chrome.host ?? '127.0.0.1';
+    // Persist profile state so future manual-login runs can reuse this Chrome.
+    if (manualLogin && chrome.port) {
+        await writeDevToolsActivePort(userDataDir, chrome.port);
+        if (!reusedChrome && chrome.pid) {
+            await writeChromePid(userDataDir, chrome.pid);
+        }
+    }
     let removeTerminationHooks = null;
     try {
         removeTerminationHooks = registerTerminationHooks(chrome, userDataDir, effectiveKeepBrowser, logger, {
@@ -533,57 +541,21 @@ async function maybeReuseRunningChrome(userDataDir, logger) {
     const port = await readDevToolsPort(userDataDir);
     if (!port)
         return null;
-    const versionUrl = `http://127.0.0.1:${port}/json/version`;
-    try {
-        const controller = new AbortController();
-        const timeout = setTimeout(() => controller.abort(), 1500);
-        const response = await fetch(versionUrl, { signal: controller.signal });
-        clearTimeout(timeout);
-        if (!response.ok)
-            throw new Error(`HTTP ${response.status}`);
-        const pidPath = path.join(userDataDir, 'chrome.pid');
-        let pid;
-        try {
-            const rawPid = (await readFile(pidPath, 'utf8')).trim();
-            pid = Number.parseInt(rawPid, 10);
-            if (Number.isNaN(pid))
-                pid = undefined;
-        }
-        catch {
-            pid = undefined;
-        }
-        logger(`Found running Chrome for ${userDataDir}; reusing (DevTools port ${port}${pid ? `, pid ${pid}` : ''})`);
-        return {
-            port,
-            pid,
-            kill: async () => { },
-            process: undefined,
-        };
-    }
-    catch (error) {
-        const message = error instanceof Error ? error.message : String(error);
-        logger(`DevToolsActivePort found for ${userDataDir} but unreachable (${message}); launching new Chrome.`);
+    const probe = await verifyDevToolsReachable({ port });
+    if (!probe.ok) {
+        logger(`DevToolsActivePort found for ${userDataDir} but unreachable (${probe.error}); launching new Chrome.`);
+        // Safe cleanup: remove stale DevToolsActivePort; only remove lock files if this was an Oracle-owned pid that died.
+        await cleanupStaleProfileState(userDataDir, logger, { lockRemovalMode: 'if_oracle_pid_dead' });
         return null;
     }
-}
-async function readDevToolsPort(userDataDir) {
-    const candidates = [
-        path.join(userDataDir, 'DevToolsActivePort'),
-        path.join(userDataDir, 'Default', 'DevToolsActivePort'),
-    ];
-    for (const candidate of candidates) {
-        try {
-            const raw = await readFile(candidate, 'utf8');
-            const firstLine = raw.split(/\r?\n/u)[0]?.trim();
-            const port = Number.parseInt(firstLine ?? '', 10);
-            if (Number.isFinite(port)) {
-                return port;
-            }
-        }
-        catch {
-        }
-    }
-    return null;
+    const pid = await readChromePid(userDataDir);
+    logger(`Found running Chrome for ${userDataDir}; reusing (DevTools port ${port}${pid ? `, pid ${pid}` : ''})`);
+    return {
+        port,
+        pid: pid ?? undefined,
+        kill: async () => { },
+        process: undefined,
+    };
 }
 async function runRemoteBrowserMode(promptText, attachments, config, logger, options) {
     const remoteChromeConfig = config.remoteChrome;

package/dist/src/browser/profileState.js ADDED Viewed

@@ -0,0 +1,171 @@
+import path from 'node:path';
+import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
+import { execFile } from 'node:child_process';
+import { promisify } from 'node:util';
+const DEVTOOLS_ACTIVE_PORT_FILENAME = 'DevToolsActivePort';
+const DEVTOOLS_ACTIVE_PORT_RELATIVE_PATHS = [
+    DEVTOOLS_ACTIVE_PORT_FILENAME,
+    path.join('Default', DEVTOOLS_ACTIVE_PORT_FILENAME),
+];
+const CHROME_PID_FILENAME = 'chrome.pid';
+const execFileAsync = promisify(execFile);
+export function getDevToolsActivePortPaths(userDataDir) {
+    return DEVTOOLS_ACTIVE_PORT_RELATIVE_PATHS.map((relative) => path.join(userDataDir, relative));
+}
+export async function readDevToolsPort(userDataDir) {
+    for (const candidate of getDevToolsActivePortPaths(userDataDir)) {
+        try {
+            const raw = await readFile(candidate, 'utf8');
+            const firstLine = raw.split(/\r?\n/u)[0]?.trim();
+            const port = Number.parseInt(firstLine ?? '', 10);
+            if (Number.isFinite(port)) {
+                return port;
+            }
+        }
+        catch {
+            // ignore missing/unreadable candidates
+        }
+    }
+    return null;
+}
+export async function writeDevToolsActivePort(userDataDir, port) {
+    const contents = `${port}\n/devtools/browser`;
+    for (const candidate of getDevToolsActivePortPaths(userDataDir)) {
+        try {
+            await mkdir(path.dirname(candidate), { recursive: true });
+            await writeFile(candidate, contents, 'utf8');
+        }
+        catch {
+            // best effort
+        }
+    }
+}
+export async function readChromePid(userDataDir) {
+    const pidPath = path.join(userDataDir, CHROME_PID_FILENAME);
+    try {
+        const raw = (await readFile(pidPath, 'utf8')).trim();
+        const pid = Number.parseInt(raw, 10);
+        if (!Number.isFinite(pid) || pid <= 0) {
+            return null;
+        }
+        return pid;
+    }
+    catch {
+        return null;
+    }
+}
+export async function writeChromePid(userDataDir, pid) {
+    if (!Number.isFinite(pid) || pid <= 0)
+        return;
+    const pidPath = path.join(userDataDir, CHROME_PID_FILENAME);
+    try {
+        await mkdir(path.dirname(pidPath), { recursive: true });
+        await writeFile(pidPath, `${Math.trunc(pid)}\n`, 'utf8');
+    }
+    catch {
+        // best effort
+    }
+}
+export function isProcessAlive(pid) {
+    if (!Number.isFinite(pid) || pid <= 0)
+        return false;
+    try {
+        process.kill(pid, 0);
+        return true;
+    }
+    catch (error) {
+        // EPERM means "exists but no permission"; treat as alive.
+        if (error && typeof error === 'object' && 'code' in error && error.code === 'EPERM') {
+            return true;
+        }
+        return false;
+    }
+}
+export async function verifyDevToolsReachable({ port, host = '127.0.0.1', attempts = 3, timeoutMs = 3000, }) {
+    const versionUrl = `http://${host}:${port}/json/version`;
+    for (let attempt = 0; attempt < attempts; attempt++) {
+        try {
+            const controller = new AbortController();
+            const timeout = setTimeout(() => controller.abort(), timeoutMs);
+            const response = await fetch(versionUrl, { signal: controller.signal });
+            clearTimeout(timeout);
+            if (!response.ok) {
+                throw new Error(`HTTP ${response.status}`);
+            }
+            return { ok: true };
+        }
+        catch (error) {
+            if (attempt < attempts - 1) {
+                await new Promise((resolve) => setTimeout(resolve, 500 * (attempt + 1)));
+                continue;
+            }
+            const message = error instanceof Error ? error.message : String(error);
+            return { ok: false, error: message };
+        }
+    }
+    return { ok: false, error: 'unreachable' };
+}
+export async function cleanupStaleProfileState(userDataDir, logger, options = {}) {
+    for (const candidate of getDevToolsActivePortPaths(userDataDir)) {
+        try {
+            await rm(candidate, { force: true });
+            logger?.(`Removed stale DevToolsActivePort: ${candidate}`);
+        }
+        catch {
+            // ignore cleanup errors
+        }
+    }
+    const lockRemovalMode = options.lockRemovalMode ?? 'never';
+    if (lockRemovalMode === 'never') {
+        return;
+    }
+    const pid = await readChromePid(userDataDir);
+    if (!pid) {
+        return;
+    }
+    if (isProcessAlive(pid)) {
+        logger?.(`Chrome pid ${pid} still alive; skipping profile lock cleanup`);
+        return;
+    }
+    // Extra safety: if Chrome is running with this profile (but with a different PID, e.g. user relaunched
+    // without remote debugging), never delete lock files.
+    if (await isChromeUsingUserDataDir(userDataDir)) {
+        logger?.('Detected running Chrome using this profile; skipping profile lock cleanup');
+        return;
+    }
+    const lockFiles = [
+        path.join(userDataDir, 'lockfile'),
+        path.join(userDataDir, 'SingletonLock'),
+        path.join(userDataDir, 'SingletonSocket'),
+        path.join(userDataDir, 'SingletonCookie'),
+    ];
+    for (const lock of lockFiles) {
+        await rm(lock, { force: true }).catch(() => undefined);
+    }
+    logger?.('Cleaned up stale Chrome profile locks');
+}
+async function isChromeUsingUserDataDir(userDataDir) {
+    if (process.platform === 'win32') {
+        // On Windows, lockfiles are typically held open and removal should fail anyway; avoid expensive process scans.
+        return false;
+    }
+    try {
+        const { stdout } = await execFileAsync('ps', ['-ax', '-o', 'command='], { maxBuffer: 10 * 1024 * 1024 });
+        const lines = String(stdout ?? '').split('\n');
+        const needle = userDataDir;
+        for (const line of lines) {
+            if (!line)
+                continue;
+            const lower = line.toLowerCase();
+            if (!lower.includes('chrome') && !lower.includes('chromium'))
+                continue;
+            if (line.includes(needle) && lower.includes('user-data-dir')) {
+                return true;
+            }
+        }
+    }
+    catch {
+        // best effort
+    }
+    return false;
+}

package/dist/src/browser/prompt.js CHANGED Viewed

@@ -6,10 +6,32 @@ import { isKnownModel } from '../oracle/modelResolver.js';
 import { buildPromptMarkdown } from '../oracle/promptAssembly.js';
 import { buildAttachmentPlan } from './policies.js';
 const DEFAULT_BROWSER_INLINE_CHAR_BUDGET = 60_000;
+const MEDIA_EXTENSIONS = new Set([
+    '.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v',
+    '.mp3', '.wav', '.aac', '.flac', '.ogg', '.m4a',
+    '.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.svg', '.heic', '.heif',
+    '.pdf',
+]);
+export function isMediaFile(filePath) {
+    const ext = path.extname(filePath).toLowerCase();
+    return MEDIA_EXTENSIONS.has(ext);
+}
 export async function assembleBrowserPrompt(runOptions, deps = {}) {
     const cwd = deps.cwd ?? process.cwd();
     const readFilesFn = deps.readFilesImpl ?? readFiles;
-    const files = await readFilesFn(runOptions.file ?? [], { cwd });
+    const allFilePaths = runOptions.file ?? [];
+    const textFilePaths = allFilePaths.filter((f) => !isMediaFile(f));
+    const mediaFilePaths = allFilePaths.filter((f) => isMediaFile(f));
+    const mediaAttachments = await Promise.all(mediaFilePaths.map(async (filePath) => {
+        const resolvedPath = path.isAbsolute(filePath) ? filePath : path.resolve(cwd, filePath);
+        const stats = await fs.stat(resolvedPath);
+        return {
+            path: resolvedPath,
+            displayPath: path.relative(cwd, resolvedPath) || path.basename(resolvedPath),
+            sizeBytes: stats.size,
+        };
+    }));
+    const files = await readFilesFn(textFilePaths, { cwd });
     const basePrompt = (runOptions.prompt ?? '').trim();
     const userPrompt = basePrompt;
     const systemPrompt = runOptions.system?.trim() || '';
@@ -40,9 +62,10 @@ export async function assembleBrowserPrompt(runOptions, deps = {}) {
         .filter(Boolean)
         .join('\n\n')
         .trim();
-    const attachments = selectedPlan.attachments.slice();
+    const attachments = [...selectedPlan.attachments, ...mediaAttachments];
     const shouldBundle = selectedPlan.shouldBundle;
     let bundleText = null;
+    let bundled = null;
     if (shouldBundle) {
         const bundleDir = await fs.mkdtemp(path.join(os.tmpdir(), 'oracle-browser-bundle-'));
         const bundlePath = path.join(bundleDir, 'attachments-bundle.txt');
@@ -59,6 +82,8 @@ export async function assembleBrowserPrompt(runOptions, deps = {}) {
             displayPath: bundlePath,
             sizeBytes: Buffer.byteLength(bundleText, 'utf8'),
         });
+        attachments.push(...mediaAttachments);
+        bundled = { originalCount: sections.length, bundlePath };
     }
     const inlineFileCount = selectedPlan.inlineFileCount;
     const modelConfig = isKnownModel(runOptions.model) ? MODEL_CONFIGS[runOptions.model] : MODEL_CONFIGS['gpt-5.1'];
@@ -85,7 +110,7 @@ export async function assembleBrowserPrompt(runOptions, deps = {}) {
     let fallback = null;
     if (attachmentsPolicy === 'auto' && selectedPlan.mode === 'inline' && sections.length > 0) {
         const fallbackComposerText = baseComposerSections.join('\n\n').trim();
-        const fallbackAttachments = uploadPlan.attachments.slice();
+        const fallbackAttachments = [...uploadPlan.attachments, ...mediaAttachments];
         let fallbackBundled = null;
         if (uploadPlan.shouldBundle) {
             const bundleDir = await fs.mkdtemp(path.join(os.tmpdir(), 'oracle-browser-bundle-'));
@@ -103,6 +128,7 @@ export async function assembleBrowserPrompt(runOptions, deps = {}) {
                 displayPath: bundlePath,
                 sizeBytes: Buffer.byteLength(fallbackBundleText, 'utf8'),
             });
+            fallbackAttachments.push(...mediaAttachments);
             fallbackBundled = { originalCount: sections.length, bundlePath };
         }
         fallback = {
@@ -121,8 +147,6 @@ export async function assembleBrowserPrompt(runOptions, deps = {}) {
         attachmentsPolicy,
         attachmentMode: selectedPlan.mode,
         fallback,
-        bundled: shouldBundle && attachments.length === 1 && attachments[0]?.displayPath
-            ? { originalCount: sections.length, bundlePath: attachments[0].displayPath }
-            : null,
+        bundled,
     };
 }

package/dist/src/browser/sessionRunner.js CHANGED Viewed

@@ -5,11 +5,6 @@ import { runBrowserMode } from '../browserMode.js';
 import { assembleBrowserPrompt } from './prompt.js';
 import { BrowserAutomationError } from '../oracle/errors.js';
 export async function runBrowserSessionExecution({ runOptions, browserConfig, cwd, log }, deps = {}) {
-    if (runOptions.model.startsWith('gemini')) {
-        throw new BrowserAutomationError('Gemini models are not available in browser mode. Re-run with --engine api.', {
-            stage: 'preflight',
-        });
-    }
     const assemblePrompt = deps.assemblePrompt ?? assembleBrowserPrompt;
     const executeBrowser = deps.executeBrowser ?? runBrowserMode;
     const promptArtifacts = await assemblePrompt(runOptions, { cwd });