@steipete/oracle 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. package/README.md +16 -8
  2. package/dist/bin/oracle-cli.js +33 -13
  3. package/dist/src/browser/actions/assistantResponse.js +65 -6
  4. package/dist/src/browser/constants.js +1 -1
  5. package/dist/src/browser/index.js +22 -50
  6. package/dist/src/browser/profileState.js +171 -0
  7. package/dist/src/browser/prompt.js +30 -6
  8. package/dist/src/browser/sessionRunner.js +0 -5
  9. package/dist/src/cli/runOptions.js +6 -7
  10. package/dist/src/cli/sessionDisplay.js +8 -1
  11. package/dist/src/cli/sessionRunner.js +0 -8
  12. package/dist/src/gemini-web/client.js +322 -0
  13. package/dist/src/gemini-web/executor.js +204 -0
  14. package/dist/src/gemini-web/index.js +1 -0
  15. package/dist/src/gemini-web/types.js +1 -0
  16. package/dist/src/remote/server.js +17 -11
  17. package/package.json +2 -2
  18. package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/CodeResources +0 -0
  19. package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/Info.plist +0 -20
  20. package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/MacOS/OracleNotifier +0 -0
  21. package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/Resources/OracleIcon.icns +0 -0
  22. package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/_CodeSignature/CodeResources +0 -128
  23. package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.swift +0 -45
  24. package/dist/vendor/oracle-notifier/oracle-notifier/README.md +0 -24
  25. package/dist/vendor/oracle-notifier/oracle-notifier/build-notifier.sh +0 -93
package/README.md CHANGED
@@ -21,26 +21,29 @@ Use `npx -y @steipete/oracle …` (not `pnpx`)—pnpx's sandboxed cache can’t
21
21
 
22
22
  ```bash
23
23
  # Copy the bundle and paste into ChatGPT
24
- npx @steipete/oracle --render --copy -p "Review the TS data layer for schema drift" --file "src/**/*.ts,*/*.test.ts"
24
+ npx -y @steipete/oracle --render --copy -p "Review the TS data layer for schema drift" --file "src/**/*.ts,*/*.test.ts"
25
25
 
26
26
  # Minimal API run (expects OPENAI_API_KEY in your env)
27
- npx @steipete/oracle -p "Write a concise architecture note for the storage adapters" --file src/storage/README.md
27
+ npx -y @steipete/oracle -p "Write a concise architecture note for the storage adapters" --file src/storage/README.md
28
28
 
29
29
  # Multi-model API run
30
- npx @steipete/oracle -p "Cross-check the data layer assumptions" --models gpt-5.1-pro,gemini-3-pro --file "src/**/*.ts"
30
+ npx -y @steipete/oracle -p "Cross-check the data layer assumptions" --models gpt-5.1-pro,gemini-3-pro --file "src/**/*.ts"
31
31
 
32
32
  # Preview without spending tokens
33
- npx @steipete/oracle --dry-run summary -p "Check release notes" --file docs/release-notes.md
33
+ npx -y @steipete/oracle --dry-run summary -p "Check release notes" --file docs/release-notes.md
34
34
 
35
35
  # Browser run (no API key, will open ChatGPT)
36
- npx @steipete/oracle --engine browser -p "Walk through the UI smoke test" --file "src/**/*.ts"
36
+ npx -y @steipete/oracle --engine browser -p "Walk through the UI smoke test" --file "src/**/*.ts"
37
+
38
+ # Gemini browser mode (no API key; uses Chrome cookies from gemini.google.com)
39
+ npx -y @steipete/oracle --engine browser --model gemini-3-pro --prompt "a cute robot holding a banana" --generate-image out.jpg --aspect 1:1
37
40
 
38
41
  # Sessions (list and replay)
39
- npx @steipete/oracle status --hours 72
40
- npx @steipete/oracle session <id> --render
42
+ npx -y @steipete/oracle status --hours 72
43
+ npx -y @steipete/oracle session <id> --render
41
44
 
42
45
  # TUI (interactive, only for humans)
43
- npx @steipete/oracle tui
46
+ npx -y @steipete/oracle tui
44
47
  ```
45
48
 
46
49
  Engine auto-picks API when `OPENAI_API_KEY` is set, otherwise browser; browser is stable on macOS and works on Linux and Windows. On Linux pass `--browser-chrome-path/--browser-cookie-path` if detection fails; on Windows prefer `--browser-manual-login` or inline cookies if decryption is blocked.
@@ -49,6 +52,8 @@ Engine auto-picks API when `OPENAI_API_KEY` is set, otherwise browser; browser i
49
52
 
50
53
  **CLI**
51
54
  - API mode expects API keys in your environment: `OPENAI_API_KEY` (GPT-5.x), `GEMINI_API_KEY` (Gemini 3 Pro), `ANTHROPIC_API_KEY` (Claude Sonnet 4.5 / Opus 4.1).
55
+ - Gemini browser mode uses Chrome cookies instead of an API key—just be logged into `gemini.google.com` in Chrome (no Python/venv required).
56
+ - If your Gemini account can’t access “Pro”, Oracle auto-falls back to a supported model for web runs (and logs the fallback in verbose mode).
52
57
  - Prefer API mode or `--copy` + manual paste; browser automation is experimental.
53
58
  - Browser support: stable on macOS; works on Linux (add `--browser-chrome-path/--browser-cookie-path` when needed) and Windows (manual-login or inline cookies recommended when app-bound cookies block decryption).
54
59
  - Remote browser service: `oracle serve` on a signed-in host; clients use `--remote-host/--remote-token`.
@@ -109,6 +114,9 @@ npx -y @steipete/oracle oracle-mcp
109
114
  | `--dry-run [summary\|json\|full]` | Preview without sending. |
110
115
  | `--remote-host`, `--remote-token` | Use a remote `oracle serve` host (browser). |
111
116
  | `--remote-chrome <host:port>` | Attach to an existing remote Chrome session (browser). |
117
+ | `--youtube <url>` | YouTube video URL to analyze (Gemini browser mode). |
118
+ | `--generate-image <file>` | Generate image and save to file (Gemini browser mode). |
119
+ | `--edit-image <file>` | Edit existing image with `--output` (Gemini browser mode). |
112
120
  | `--azure-endpoint`, `--azure-deployment`, `--azure-api-version` | Target Azure OpenAI endpoints (picks Azure client automatically). |
113
121
 
114
122
  ## Configuration
@@ -18,6 +18,7 @@ import { DEFAULT_MODEL, MODEL_CONFIGS, readFiles, estimateRequestTokens, buildRe
18
18
  import { isKnownModel } from '../src/oracle/modelResolver.js';
19
19
  import { CHATGPT_URL } from '../src/browserMode.js';
20
20
  import { createRemoteBrowserExecutor } from '../src/remote/client.js';
21
+ import { createGeminiWebExecutor } from '../src/gemini-web/index.js';
21
22
  import { applyHelpStyling } from '../src/cli/help.js';
22
23
  import { collectPaths, collectModelList, parseFloatOption, parseIntOption, parseSearchOption, usesDefaultStatusFilters, resolvePreviewMode, normalizeModelOption, normalizeBaseUrl, resolveApiModel, inferModelFromLabel, parseHeartbeatOption, parseTimeoutOption, mergePathLikeOptions, } from '../src/cli/options.js';
23
24
  import { copyToClipboard } from '../src/cli/clipboard.js';
@@ -26,6 +27,7 @@ import { shouldDetachSession } from '../src/cli/detach.js';
26
27
  import { applyHiddenAliases } from '../src/cli/hiddenAliases.js';
27
28
  import { buildBrowserConfig, resolveBrowserModelLabel } from '../src/cli/browserConfig.js';
28
29
  import { performSessionRun } from '../src/cli/sessionRunner.js';
30
+ import { isMediaFile } from '../src/browser/prompt.js';
29
31
  import { attachSession, showStatus, formatCompletionSummary } from '../src/cli/sessionDisplay.js';
30
32
  import { formatCompactNumber } from '../src/cli/format.js';
31
33
  import { formatIntroLine } from '../src/cli/tagline.js';
@@ -114,7 +116,7 @@ program
114
116
  .addOption(new Option('--models <models>', 'Comma-separated API model list to query in parallel (e.g., "gpt-5.1-pro,gemini-3-pro").')
115
117
  .argParser(collectModelList)
116
118
  .default([]))
117
- .addOption(new Option('-e, --engine <mode>', 'Execution engine (api | browser). Engine is preferred; --mode is a legacy alias. If omitted, oracle picks api when OPENAI_API_KEY is set, otherwise browser.').choices(['api', 'browser']))
119
+ .addOption(new Option('-e, --engine <mode>', 'Execution engine (api | browser). Browser engine: GPT models automate ChatGPT; Gemini models use a cookie-based client for gemini.google.com. If omitted, oracle picks api when OPENAI_API_KEY is set, otherwise browser.').choices(['api', 'browser']))
118
120
  .addOption(new Option('--mode <mode>', 'Alias for --engine (api | browser).').choices(['api', 'browser']).hideHelp())
119
121
  .option('--files-report', 'Show token usage per attached file (also prints automatically when files exceed the token budget).', false)
120
122
  .option('-v, --verbose', 'Enable verbose logging for all operations.', false)
@@ -182,6 +184,12 @@ program
182
184
  .addOption(new Option('--remote-token <token>', 'Access token for the remote `oracle serve` instance.'))
183
185
  .addOption(new Option('--browser-inline-files', 'Alias for --browser-attachments never (force pasting file contents inline).').default(false))
184
186
  .addOption(new Option('--browser-bundle-files', 'Bundle all attachments into a single archive before uploading.').default(false))
187
+ .addOption(new Option('--youtube <url>', 'YouTube video URL to analyze (Gemini web/cookie mode only; uses your signed-in Chrome cookies for gemini.google.com).'))
188
+ .addOption(new Option('--generate-image <file>', 'Generate image and save to file (Gemini web/cookie mode only; requires gemini.google.com Chrome cookies).'))
189
+ .addOption(new Option('--edit-image <file>', 'Edit existing image (use with --output, Gemini web/cookie mode only).'))
190
+ .addOption(new Option('--output <file>', 'Output file path for image operations (Gemini web/cookie mode only).'))
191
+ .addOption(new Option('--aspect <ratio>', 'Aspect ratio for image generation: 16:9, 1:1, 4:3, 3:4 (Gemini web/cookie mode only).'))
192
+ .addOption(new Option('--gemini-show-thoughts', 'Display Gemini thinking process (Gemini web/cookie mode only).').default(false))
185
193
  .option('--retain-hours <hours>', 'Prune stored sessions older than this many hours before running (set 0 to disable).', parseFloatOption)
186
194
  .option('--force', 'Force start a new session even if an identical prompt is already running.', false)
187
195
  .option('--debug-help', 'Show the advanced/debug option set and exit.', false)
@@ -512,18 +520,13 @@ async function runRootCommand(options) {
512
520
  const isCodex = primaryModelCandidate.startsWith('gpt-5.1-codex');
513
521
  const isClaude = primaryModelCandidate.startsWith('claude');
514
522
  const userForcedBrowser = options.browser || options.engine === 'browser';
515
- const hasNonGptBrowserTarget = (engine === 'browser' || userForcedBrowser) &&
523
+ const isBrowserCompatible = (model) => model.startsWith('gpt-') || model.startsWith('gemini');
524
+ const hasNonBrowserCompatibleTarget = (engine === 'browser' || userForcedBrowser) &&
516
525
  (normalizedMultiModels.length > 0
517
- ? normalizedMultiModels.some((model) => !model.startsWith('gpt-'))
518
- : !resolvedModelCandidate.startsWith('gpt-'));
519
- if (hasNonGptBrowserTarget) {
520
- throw new Error('Browser engine only supports GPT-series ChatGPT models. Re-run with --engine api for Grok, Claude, Gemini, or other non-GPT models.');
521
- }
522
- if (isGemini && userForcedBrowser) {
523
- throw new Error('Gemini is only supported via API. Use --engine api.');
524
- }
525
- if (isGemini && engine === 'browser') {
526
- engine = 'api';
526
+ ? normalizedMultiModels.some((model) => !isBrowserCompatible(model))
527
+ : !isBrowserCompatible(resolvedModelCandidate));
528
+ if (hasNonBrowserCompatibleTarget) {
529
+ throw new Error('Browser engine only supports GPT and Gemini models. Re-run with --engine api for Grok, Claude, or other models.');
527
530
  }
528
531
  if (isClaude && engine === 'browser') {
529
532
  console.log(chalk.dim('Browser engine is not supported for Claude models; switching to API.'));
@@ -672,7 +675,11 @@ async function runRootCommand(options) {
672
675
  return;
673
676
  }
674
677
  if (options.file && options.file.length > 0) {
675
- await readFiles(options.file, { cwd: process.cwd() });
678
+ const isBrowserMode = engine === 'browser' || userForcedBrowser;
679
+ const filesToValidate = isBrowserMode ? options.file.filter((f) => !isMediaFile(f)) : options.file;
680
+ if (filesToValidate.length > 0) {
681
+ await readFiles(filesToValidate, { cwd: process.cwd() });
682
+ }
676
683
  }
677
684
  const getSource = (key) => program.getOptionValueSource?.(key) ?? undefined;
678
685
  applyBrowserDefaultsFromConfig(options, userConfig, getSource);
@@ -698,6 +705,19 @@ async function runRootCommand(options) {
698
705
  };
699
706
  console.log(chalk.dim(`Routing browser automation to remote host ${remoteHost}`));
700
707
  }
708
+ else if (browserConfig && resolvedModel.startsWith('gemini')) {
709
+ browserDeps = {
710
+ executeBrowser: createGeminiWebExecutor({
711
+ youtube: options.youtube,
712
+ generateImage: options.generateImage,
713
+ editImage: options.editImage,
714
+ outputPath: options.output,
715
+ aspectRatio: options.aspect,
716
+ showThoughts: options.geminiShowThoughts,
717
+ }),
718
+ };
719
+ console.log(chalk.dim('Using Gemini web client for browser automation'));
720
+ }
701
721
  const remoteExecutionActive = Boolean(browserDeps);
702
722
  if (options.dryRun) {
703
723
  const baseRunOptions = buildRunOptions(resolvedOptions, {
@@ -183,7 +183,9 @@ async function pollAssistantCompletion(Runtime, timeoutMs) {
183
183
  isStopButtonVisible(Runtime),
184
184
  isCompletionVisible(Runtime),
185
185
  ]);
186
- if (completionVisible || (!stopVisible && stableCycles >= requiredStableCycles)) {
186
+ // Require at least 2 stable cycles even when completion buttons are visible
187
+ // to ensure DOM text has fully rendered (buttons can appear before text settles)
188
+ if ((completionVisible && stableCycles >= 2) || (!stopVisible && stableCycles >= requiredStableCycles)) {
187
189
  return normalized;
188
190
  }
189
191
  }
@@ -211,10 +213,36 @@ async function isCompletionVisible(Runtime) {
211
213
  try {
212
214
  const { result } = await Runtime.evaluate({
213
215
  expression: `(() => {
214
- if (document.querySelector('${FINISHED_ACTIONS_SELECTOR}')) {
216
+ // Find the LAST assistant turn to check completion status
217
+ // Must match the same logic as buildAssistantExtractor for consistency
218
+ const ASSISTANT_SELECTOR = '${ASSISTANT_ROLE_SELECTOR}';
219
+ const isAssistantTurn = (node) => {
220
+ if (!(node instanceof HTMLElement)) return false;
221
+ const role = (node.getAttribute('data-message-author-role') || node.dataset?.messageAuthorRole || '').toLowerCase();
222
+ if (role === 'assistant') return true;
223
+ const testId = (node.getAttribute('data-testid') || '').toLowerCase();
224
+ if (testId.includes('assistant')) return true;
225
+ return Boolean(node.querySelector(ASSISTANT_SELECTOR) || node.querySelector('[data-testid*="assistant"]'));
226
+ };
227
+
228
+ const turns = Array.from(document.querySelectorAll('${CONVERSATION_TURN_SELECTOR}'));
229
+ let lastAssistantTurn = null;
230
+ for (let i = turns.length - 1; i >= 0; i--) {
231
+ if (isAssistantTurn(turns[i])) {
232
+ lastAssistantTurn = turns[i];
233
+ break;
234
+ }
235
+ }
236
+ if (!lastAssistantTurn) {
237
+ return false;
238
+ }
239
+ // Check if the last assistant turn has finished action buttons (copy, thumbs up/down, share)
240
+ if (lastAssistantTurn.querySelector('${FINISHED_ACTIONS_SELECTOR}')) {
215
241
  return true;
216
242
  }
217
- return Array.from(document.querySelectorAll('.markdown')).some((n) => (n.textContent || '').trim() === 'Done');
243
+ // Also check for "Done" text in the last assistant turn's markdown
244
+ const markdowns = lastAssistantTurn.querySelectorAll('.markdown');
245
+ return Array.from(markdowns).some((n) => (n.textContent || '').trim() === 'Done');
218
246
  })()`,
219
247
  returnByValue: true,
220
248
  });
@@ -257,12 +285,27 @@ function buildAssistantSnapshotExpression() {
257
285
  }
258
286
  function buildResponseObserverExpression(timeoutMs) {
259
287
  const selectorsLiteral = JSON.stringify(ANSWER_SELECTORS);
288
+ const conversationLiteral = JSON.stringify(CONVERSATION_TURN_SELECTOR);
289
+ const assistantLiteral = JSON.stringify(ASSISTANT_ROLE_SELECTOR);
260
290
  return `(() => {
261
291
  ${buildClickDispatcher()}
262
292
  const SELECTORS = ${selectorsLiteral};
263
293
  const STOP_SELECTOR = '${STOP_BUTTON_SELECTOR}';
264
294
  const FINISHED_SELECTOR = '${FINISHED_ACTIONS_SELECTOR}';
295
+ const CONVERSATION_SELECTOR = ${conversationLiteral};
296
+ const ASSISTANT_SELECTOR = ${assistantLiteral};
265
297
  const settleDelayMs = 800;
298
+
299
+ // Helper to detect assistant turns - matches buildAssistantExtractor logic
300
+ const isAssistantTurn = (node) => {
301
+ if (!(node instanceof HTMLElement)) return false;
302
+ const role = (node.getAttribute('data-message-author-role') || node.dataset?.messageAuthorRole || '').toLowerCase();
303
+ if (role === 'assistant') return true;
304
+ const testId = (node.getAttribute('data-testid') || '').toLowerCase();
305
+ if (testId.includes('assistant')) return true;
306
+ return Boolean(node.querySelector(ASSISTANT_SELECTOR) || node.querySelector('[data-testid*="assistant"]'));
307
+ };
308
+
266
309
  ${buildAssistantExtractor('extractFromTurns')}
267
310
 
268
311
  const captureViaObserver = () =>
@@ -307,6 +350,24 @@ function buildResponseObserverExpression(timeoutMs) {
307
350
  }, ${timeoutMs});
308
351
  });
309
352
 
353
+ // Check if the last assistant turn has finished (scoped to avoid detecting old turns)
354
+ const isLastAssistantTurnFinished = () => {
355
+ const turns = Array.from(document.querySelectorAll(CONVERSATION_SELECTOR));
356
+ let lastAssistantTurn = null;
357
+ for (let i = turns.length - 1; i >= 0; i--) {
358
+ if (isAssistantTurn(turns[i])) {
359
+ lastAssistantTurn = turns[i];
360
+ break;
361
+ }
362
+ }
363
+ if (!lastAssistantTurn) return false;
364
+ // Check for action buttons in this specific turn
365
+ if (lastAssistantTurn.querySelector(FINISHED_SELECTOR)) return true;
366
+ // Check for "Done" text in this turn's markdown
367
+ const markdowns = lastAssistantTurn.querySelectorAll('.markdown');
368
+ return Array.from(markdowns).some((n) => (n.textContent || '').trim() === 'Done');
369
+ };
370
+
310
371
  const waitForSettle = async (snapshot) => {
311
372
  const settleWindowMs = 5000;
312
373
  const settleIntervalMs = 400;
@@ -321,9 +382,7 @@ function buildResponseObserverExpression(timeoutMs) {
321
382
  lastLength = refreshed.text?.length ?? lastLength;
322
383
  }
323
384
  const stopVisible = Boolean(document.querySelector(STOP_SELECTOR));
324
- const finishedVisible =
325
- Boolean(document.querySelector(FINISHED_SELECTOR)) ||
326
- Array.from(document.querySelectorAll('.markdown')).some((n) => (n.textContent || '').trim() === 'Done');
385
+ const finishedVisible = isLastAssistantTurnFinished();
327
386
 
328
387
  if (!stopVisible || finishedVisible) {
329
388
  break;
@@ -1,5 +1,5 @@
1
1
  export const CHATGPT_URL = 'https://chatgpt.com/';
2
- export const DEFAULT_MODEL_TARGET = 'ChatGPT 5.1';
2
+ export const DEFAULT_MODEL_TARGET = 'ChatGPT 5.2';
3
3
  export const COOKIE_URLS = ['https://chatgpt.com', 'https://chat.openai.com', 'https://atlas.openai.com'];
4
4
  export const INPUT_SELECTORS = [
5
5
  'textarea[data-id="prompt-textarea"]',
@@ -1,4 +1,4 @@
1
- import { mkdtemp, rm, mkdir, readFile } from 'node:fs/promises';
1
+ import { mkdtemp, rm, mkdir } from 'node:fs/promises';
2
2
  import path from 'node:path';
3
3
  import os from 'node:os';
4
4
  import net from 'node:net';
@@ -12,6 +12,7 @@ import { estimateTokenCount, withRetries, delay } from './utils.js';
12
12
  import { formatElapsed } from '../oracle/format.js';
13
13
  import { CHATGPT_URL } from './constants.js';
14
14
  import { BrowserAutomationError } from '../oracle/errors.js';
15
+ import { cleanupStaleProfileState, readChromePid, readDevToolsPort, verifyDevToolsReachable, writeChromePid, writeDevToolsActivePort, } from './profileState.js';
15
16
  export { CHATGPT_URL, DEFAULT_MODEL_TARGET } from './constants.js';
16
17
  export { parseDuration, delay, normalizeChatgptUrl } from './utils.js';
17
18
  export async function runBrowserMode(options) {
@@ -98,6 +99,13 @@ export async function runBrowserMode(options) {
98
99
  remoteChrome: config.remoteChrome,
99
100
  }, userDataDir, logger));
100
101
  const chromeHost = chrome.host ?? '127.0.0.1';
102
+ // Persist profile state so future manual-login runs can reuse this Chrome.
103
+ if (manualLogin && chrome.port) {
104
+ await writeDevToolsActivePort(userDataDir, chrome.port);
105
+ if (!reusedChrome && chrome.pid) {
106
+ await writeChromePid(userDataDir, chrome.pid);
107
+ }
108
+ }
101
109
  let removeTerminationHooks = null;
102
110
  try {
103
111
  removeTerminationHooks = registerTerminationHooks(chrome, userDataDir, effectiveKeepBrowser, logger, {
@@ -533,57 +541,21 @@ async function maybeReuseRunningChrome(userDataDir, logger) {
533
541
  const port = await readDevToolsPort(userDataDir);
534
542
  if (!port)
535
543
  return null;
536
- const versionUrl = `http://127.0.0.1:${port}/json/version`;
537
- try {
538
- const controller = new AbortController();
539
- const timeout = setTimeout(() => controller.abort(), 1500);
540
- const response = await fetch(versionUrl, { signal: controller.signal });
541
- clearTimeout(timeout);
542
- if (!response.ok)
543
- throw new Error(`HTTP ${response.status}`);
544
- const pidPath = path.join(userDataDir, 'chrome.pid');
545
- let pid;
546
- try {
547
- const rawPid = (await readFile(pidPath, 'utf8')).trim();
548
- pid = Number.parseInt(rawPid, 10);
549
- if (Number.isNaN(pid))
550
- pid = undefined;
551
- }
552
- catch {
553
- pid = undefined;
554
- }
555
- logger(`Found running Chrome for ${userDataDir}; reusing (DevTools port ${port}${pid ? `, pid ${pid}` : ''})`);
556
- return {
557
- port,
558
- pid,
559
- kill: async () => { },
560
- process: undefined,
561
- };
562
- }
563
- catch (error) {
564
- const message = error instanceof Error ? error.message : String(error);
565
- logger(`DevToolsActivePort found for ${userDataDir} but unreachable (${message}); launching new Chrome.`);
544
+ const probe = await verifyDevToolsReachable({ port });
545
+ if (!probe.ok) {
546
+ logger(`DevToolsActivePort found for ${userDataDir} but unreachable (${probe.error}); launching new Chrome.`);
547
+ // Safe cleanup: remove stale DevToolsActivePort; only remove lock files if this was an Oracle-owned pid that died.
548
+ await cleanupStaleProfileState(userDataDir, logger, { lockRemovalMode: 'if_oracle_pid_dead' });
566
549
  return null;
567
550
  }
568
- }
569
- async function readDevToolsPort(userDataDir) {
570
- const candidates = [
571
- path.join(userDataDir, 'DevToolsActivePort'),
572
- path.join(userDataDir, 'Default', 'DevToolsActivePort'),
573
- ];
574
- for (const candidate of candidates) {
575
- try {
576
- const raw = await readFile(candidate, 'utf8');
577
- const firstLine = raw.split(/\r?\n/u)[0]?.trim();
578
- const port = Number.parseInt(firstLine ?? '', 10);
579
- if (Number.isFinite(port)) {
580
- return port;
581
- }
582
- }
583
- catch {
584
- }
585
- }
586
- return null;
551
+ const pid = await readChromePid(userDataDir);
552
+ logger(`Found running Chrome for ${userDataDir}; reusing (DevTools port ${port}${pid ? `, pid ${pid}` : ''})`);
553
+ return {
554
+ port,
555
+ pid: pid ?? undefined,
556
+ kill: async () => { },
557
+ process: undefined,
558
+ };
587
559
  }
588
560
  async function runRemoteBrowserMode(promptText, attachments, config, logger, options) {
589
561
  const remoteChromeConfig = config.remoteChrome;
@@ -0,0 +1,171 @@
1
+ import path from 'node:path';
2
+ import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
3
+ import { execFile } from 'node:child_process';
4
+ import { promisify } from 'node:util';
5
+ const DEVTOOLS_ACTIVE_PORT_FILENAME = 'DevToolsActivePort';
6
+ const DEVTOOLS_ACTIVE_PORT_RELATIVE_PATHS = [
7
+ DEVTOOLS_ACTIVE_PORT_FILENAME,
8
+ path.join('Default', DEVTOOLS_ACTIVE_PORT_FILENAME),
9
+ ];
10
+ const CHROME_PID_FILENAME = 'chrome.pid';
11
+ const execFileAsync = promisify(execFile);
12
+ export function getDevToolsActivePortPaths(userDataDir) {
13
+ return DEVTOOLS_ACTIVE_PORT_RELATIVE_PATHS.map((relative) => path.join(userDataDir, relative));
14
+ }
15
+ export async function readDevToolsPort(userDataDir) {
16
+ for (const candidate of getDevToolsActivePortPaths(userDataDir)) {
17
+ try {
18
+ const raw = await readFile(candidate, 'utf8');
19
+ const firstLine = raw.split(/\r?\n/u)[0]?.trim();
20
+ const port = Number.parseInt(firstLine ?? '', 10);
21
+ if (Number.isFinite(port)) {
22
+ return port;
23
+ }
24
+ }
25
+ catch {
26
+ // ignore missing/unreadable candidates
27
+ }
28
+ }
29
+ return null;
30
+ }
31
+ export async function writeDevToolsActivePort(userDataDir, port) {
32
+ const contents = `${port}\n/devtools/browser`;
33
+ for (const candidate of getDevToolsActivePortPaths(userDataDir)) {
34
+ try {
35
+ await mkdir(path.dirname(candidate), { recursive: true });
36
+ await writeFile(candidate, contents, 'utf8');
37
+ }
38
+ catch {
39
+ // best effort
40
+ }
41
+ }
42
+ }
43
+ export async function readChromePid(userDataDir) {
44
+ const pidPath = path.join(userDataDir, CHROME_PID_FILENAME);
45
+ try {
46
+ const raw = (await readFile(pidPath, 'utf8')).trim();
47
+ const pid = Number.parseInt(raw, 10);
48
+ if (!Number.isFinite(pid) || pid <= 0) {
49
+ return null;
50
+ }
51
+ return pid;
52
+ }
53
+ catch {
54
+ return null;
55
+ }
56
+ }
57
+ export async function writeChromePid(userDataDir, pid) {
58
+ if (!Number.isFinite(pid) || pid <= 0)
59
+ return;
60
+ const pidPath = path.join(userDataDir, CHROME_PID_FILENAME);
61
+ try {
62
+ await mkdir(path.dirname(pidPath), { recursive: true });
63
+ await writeFile(pidPath, `${Math.trunc(pid)}\n`, 'utf8');
64
+ }
65
+ catch {
66
+ // best effort
67
+ }
68
+ }
69
+ export function isProcessAlive(pid) {
70
+ if (!Number.isFinite(pid) || pid <= 0)
71
+ return false;
72
+ try {
73
+ process.kill(pid, 0);
74
+ return true;
75
+ }
76
+ catch (error) {
77
+ // EPERM means "exists but no permission"; treat as alive.
78
+ if (error && typeof error === 'object' && 'code' in error && error.code === 'EPERM') {
79
+ return true;
80
+ }
81
+ return false;
82
+ }
83
+ }
84
+ export async function verifyDevToolsReachable({ port, host = '127.0.0.1', attempts = 3, timeoutMs = 3000, }) {
85
+ const versionUrl = `http://${host}:${port}/json/version`;
86
+ for (let attempt = 0; attempt < attempts; attempt++) {
87
+ try {
88
+ const controller = new AbortController();
89
+ const timeout = setTimeout(() => controller.abort(), timeoutMs);
90
+ const response = await fetch(versionUrl, { signal: controller.signal });
91
+ clearTimeout(timeout);
92
+ if (!response.ok) {
93
+ throw new Error(`HTTP ${response.status}`);
94
+ }
95
+ return { ok: true };
96
+ }
97
+ catch (error) {
98
+ if (attempt < attempts - 1) {
99
+ await new Promise((resolve) => setTimeout(resolve, 500 * (attempt + 1)));
100
+ continue;
101
+ }
102
+ const message = error instanceof Error ? error.message : String(error);
103
+ return { ok: false, error: message };
104
+ }
105
+ }
106
+ return { ok: false, error: 'unreachable' };
107
+ }
108
+ export async function cleanupStaleProfileState(userDataDir, logger, options = {}) {
109
+ for (const candidate of getDevToolsActivePortPaths(userDataDir)) {
110
+ try {
111
+ await rm(candidate, { force: true });
112
+ logger?.(`Removed stale DevToolsActivePort: ${candidate}`);
113
+ }
114
+ catch {
115
+ // ignore cleanup errors
116
+ }
117
+ }
118
+ const lockRemovalMode = options.lockRemovalMode ?? 'never';
119
+ if (lockRemovalMode === 'never') {
120
+ return;
121
+ }
122
+ const pid = await readChromePid(userDataDir);
123
+ if (!pid) {
124
+ return;
125
+ }
126
+ if (isProcessAlive(pid)) {
127
+ logger?.(`Chrome pid ${pid} still alive; skipping profile lock cleanup`);
128
+ return;
129
+ }
130
+ // Extra safety: if Chrome is running with this profile (but with a different PID, e.g. user relaunched
131
+ // without remote debugging), never delete lock files.
132
+ if (await isChromeUsingUserDataDir(userDataDir)) {
133
+ logger?.('Detected running Chrome using this profile; skipping profile lock cleanup');
134
+ return;
135
+ }
136
+ const lockFiles = [
137
+ path.join(userDataDir, 'lockfile'),
138
+ path.join(userDataDir, 'SingletonLock'),
139
+ path.join(userDataDir, 'SingletonSocket'),
140
+ path.join(userDataDir, 'SingletonCookie'),
141
+ ];
142
+ for (const lock of lockFiles) {
143
+ await rm(lock, { force: true }).catch(() => undefined);
144
+ }
145
+ logger?.('Cleaned up stale Chrome profile locks');
146
+ }
147
+ async function isChromeUsingUserDataDir(userDataDir) {
148
+ if (process.platform === 'win32') {
149
+ // On Windows, lockfiles are typically held open and removal should fail anyway; avoid expensive process scans.
150
+ return false;
151
+ }
152
+ try {
153
+ const { stdout } = await execFileAsync('ps', ['-ax', '-o', 'command='], { maxBuffer: 10 * 1024 * 1024 });
154
+ const lines = String(stdout ?? '').split('\n');
155
+ const needle = userDataDir;
156
+ for (const line of lines) {
157
+ if (!line)
158
+ continue;
159
+ const lower = line.toLowerCase();
160
+ if (!lower.includes('chrome') && !lower.includes('chromium'))
161
+ continue;
162
+ if (line.includes(needle) && lower.includes('user-data-dir')) {
163
+ return true;
164
+ }
165
+ }
166
+ }
167
+ catch {
168
+ // best effort
169
+ }
170
+ return false;
171
+ }
@@ -6,10 +6,32 @@ import { isKnownModel } from '../oracle/modelResolver.js';
6
6
  import { buildPromptMarkdown } from '../oracle/promptAssembly.js';
7
7
  import { buildAttachmentPlan } from './policies.js';
8
8
  const DEFAULT_BROWSER_INLINE_CHAR_BUDGET = 60_000;
9
+ const MEDIA_EXTENSIONS = new Set([
10
+ '.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v',
11
+ '.mp3', '.wav', '.aac', '.flac', '.ogg', '.m4a',
12
+ '.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.svg', '.heic', '.heif',
13
+ '.pdf',
14
+ ]);
15
+ export function isMediaFile(filePath) {
16
+ const ext = path.extname(filePath).toLowerCase();
17
+ return MEDIA_EXTENSIONS.has(ext);
18
+ }
9
19
  export async function assembleBrowserPrompt(runOptions, deps = {}) {
10
20
  const cwd = deps.cwd ?? process.cwd();
11
21
  const readFilesFn = deps.readFilesImpl ?? readFiles;
12
- const files = await readFilesFn(runOptions.file ?? [], { cwd });
22
+ const allFilePaths = runOptions.file ?? [];
23
+ const textFilePaths = allFilePaths.filter((f) => !isMediaFile(f));
24
+ const mediaFilePaths = allFilePaths.filter((f) => isMediaFile(f));
25
+ const mediaAttachments = await Promise.all(mediaFilePaths.map(async (filePath) => {
26
+ const resolvedPath = path.isAbsolute(filePath) ? filePath : path.resolve(cwd, filePath);
27
+ const stats = await fs.stat(resolvedPath);
28
+ return {
29
+ path: resolvedPath,
30
+ displayPath: path.relative(cwd, resolvedPath) || path.basename(resolvedPath),
31
+ sizeBytes: stats.size,
32
+ };
33
+ }));
34
+ const files = await readFilesFn(textFilePaths, { cwd });
13
35
  const basePrompt = (runOptions.prompt ?? '').trim();
14
36
  const userPrompt = basePrompt;
15
37
  const systemPrompt = runOptions.system?.trim() || '';
@@ -40,9 +62,10 @@ export async function assembleBrowserPrompt(runOptions, deps = {}) {
40
62
  .filter(Boolean)
41
63
  .join('\n\n')
42
64
  .trim();
43
- const attachments = selectedPlan.attachments.slice();
65
+ const attachments = [...selectedPlan.attachments, ...mediaAttachments];
44
66
  const shouldBundle = selectedPlan.shouldBundle;
45
67
  let bundleText = null;
68
+ let bundled = null;
46
69
  if (shouldBundle) {
47
70
  const bundleDir = await fs.mkdtemp(path.join(os.tmpdir(), 'oracle-browser-bundle-'));
48
71
  const bundlePath = path.join(bundleDir, 'attachments-bundle.txt');
@@ -59,6 +82,8 @@ export async function assembleBrowserPrompt(runOptions, deps = {}) {
59
82
  displayPath: bundlePath,
60
83
  sizeBytes: Buffer.byteLength(bundleText, 'utf8'),
61
84
  });
85
+ attachments.push(...mediaAttachments);
86
+ bundled = { originalCount: sections.length, bundlePath };
62
87
  }
63
88
  const inlineFileCount = selectedPlan.inlineFileCount;
64
89
  const modelConfig = isKnownModel(runOptions.model) ? MODEL_CONFIGS[runOptions.model] : MODEL_CONFIGS['gpt-5.1'];
@@ -85,7 +110,7 @@ export async function assembleBrowserPrompt(runOptions, deps = {}) {
85
110
  let fallback = null;
86
111
  if (attachmentsPolicy === 'auto' && selectedPlan.mode === 'inline' && sections.length > 0) {
87
112
  const fallbackComposerText = baseComposerSections.join('\n\n').trim();
88
- const fallbackAttachments = uploadPlan.attachments.slice();
113
+ const fallbackAttachments = [...uploadPlan.attachments, ...mediaAttachments];
89
114
  let fallbackBundled = null;
90
115
  if (uploadPlan.shouldBundle) {
91
116
  const bundleDir = await fs.mkdtemp(path.join(os.tmpdir(), 'oracle-browser-bundle-'));
@@ -103,6 +128,7 @@ export async function assembleBrowserPrompt(runOptions, deps = {}) {
103
128
  displayPath: bundlePath,
104
129
  sizeBytes: Buffer.byteLength(fallbackBundleText, 'utf8'),
105
130
  });
131
+ fallbackAttachments.push(...mediaAttachments);
106
132
  fallbackBundled = { originalCount: sections.length, bundlePath };
107
133
  }
108
134
  fallback = {
@@ -121,8 +147,6 @@ export async function assembleBrowserPrompt(runOptions, deps = {}) {
121
147
  attachmentsPolicy,
122
148
  attachmentMode: selectedPlan.mode,
123
149
  fallback,
124
- bundled: shouldBundle && attachments.length === 1 && attachments[0]?.displayPath
125
- ? { originalCount: sections.length, bundlePath: attachments[0].displayPath }
126
- : null,
150
+ bundled,
127
151
  };
128
152
  }
@@ -5,11 +5,6 @@ import { runBrowserMode } from '../browserMode.js';
5
5
  import { assembleBrowserPrompt } from './prompt.js';
6
6
  import { BrowserAutomationError } from '../oracle/errors.js';
7
7
  export async function runBrowserSessionExecution({ runOptions, browserConfig, cwd, log }, deps = {}) {
8
- if (runOptions.model.startsWith('gemini')) {
9
- throw new BrowserAutomationError('Gemini models are not available in browser mode. Re-run with --engine api.', {
10
- stage: 'preflight',
11
- });
12
- }
13
8
  const assemblePrompt = deps.assemblePrompt ?? assembleBrowserPrompt;
14
9
  const executeBrowser = deps.executeBrowser ?? runBrowserMode;
15
10
  const promptArtifacts = await assemblePrompt(runOptions, { cwd });