@steipete/oracle 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -8
- package/dist/bin/oracle-cli.js +33 -13
- package/dist/src/browser/actions/assistantResponse.js +65 -6
- package/dist/src/browser/constants.js +1 -1
- package/dist/src/browser/index.js +22 -50
- package/dist/src/browser/profileState.js +171 -0
- package/dist/src/browser/prompt.js +30 -6
- package/dist/src/browser/sessionRunner.js +0 -5
- package/dist/src/cli/runOptions.js +6 -7
- package/dist/src/cli/sessionDisplay.js +8 -1
- package/dist/src/cli/sessionRunner.js +0 -8
- package/dist/src/gemini-web/client.js +322 -0
- package/dist/src/gemini-web/executor.js +204 -0
- package/dist/src/gemini-web/index.js +1 -0
- package/dist/src/gemini-web/types.js +1 -0
- package/dist/src/remote/server.js +17 -11
- package/package.json +2 -2
- package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/CodeResources +0 -0
- package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/Info.plist +0 -20
- package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/MacOS/OracleNotifier +0 -0
- package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/Resources/OracleIcon.icns +0 -0
- package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.app/Contents/_CodeSignature/CodeResources +0 -128
- package/dist/vendor/oracle-notifier/oracle-notifier/OracleNotifier.swift +0 -45
- package/dist/vendor/oracle-notifier/oracle-notifier/README.md +0 -24
- package/dist/vendor/oracle-notifier/oracle-notifier/build-notifier.sh +0 -93
package/README.md
CHANGED
|
@@ -21,26 +21,29 @@ Use `npx -y @steipete/oracle …` (not `pnpx`)—pnpx's sandboxed cache can’t
|
|
|
21
21
|
|
|
22
22
|
```bash
|
|
23
23
|
# Copy the bundle and paste into ChatGPT
|
|
24
|
-
npx @steipete/oracle --render --copy -p "Review the TS data layer for schema drift" --file "src/**/*.ts,*/*.test.ts"
|
|
24
|
+
npx -y @steipete/oracle --render --copy -p "Review the TS data layer for schema drift" --file "src/**/*.ts,*/*.test.ts"
|
|
25
25
|
|
|
26
26
|
# Minimal API run (expects OPENAI_API_KEY in your env)
|
|
27
|
-
npx @steipete/oracle -p "Write a concise architecture note for the storage adapters" --file src/storage/README.md
|
|
27
|
+
npx -y @steipete/oracle -p "Write a concise architecture note for the storage adapters" --file src/storage/README.md
|
|
28
28
|
|
|
29
29
|
# Multi-model API run
|
|
30
|
-
npx @steipete/oracle -p "Cross-check the data layer assumptions" --models gpt-5.1-pro,gemini-3-pro --file "src/**/*.ts"
|
|
30
|
+
npx -y @steipete/oracle -p "Cross-check the data layer assumptions" --models gpt-5.1-pro,gemini-3-pro --file "src/**/*.ts"
|
|
31
31
|
|
|
32
32
|
# Preview without spending tokens
|
|
33
|
-
npx @steipete/oracle --dry-run summary -p "Check release notes" --file docs/release-notes.md
|
|
33
|
+
npx -y @steipete/oracle --dry-run summary -p "Check release notes" --file docs/release-notes.md
|
|
34
34
|
|
|
35
35
|
# Browser run (no API key, will open ChatGPT)
|
|
36
|
-
npx @steipete/oracle --engine browser -p "Walk through the UI smoke test" --file "src/**/*.ts"
|
|
36
|
+
npx -y @steipete/oracle --engine browser -p "Walk through the UI smoke test" --file "src/**/*.ts"
|
|
37
|
+
|
|
38
|
+
# Gemini browser mode (no API key; uses Chrome cookies from gemini.google.com)
|
|
39
|
+
npx -y @steipete/oracle --engine browser --model gemini-3-pro --prompt "a cute robot holding a banana" --generate-image out.jpg --aspect 1:1
|
|
37
40
|
|
|
38
41
|
# Sessions (list and replay)
|
|
39
|
-
npx @steipete/oracle status --hours 72
|
|
40
|
-
npx @steipete/oracle session <id> --render
|
|
42
|
+
npx -y @steipete/oracle status --hours 72
|
|
43
|
+
npx -y @steipete/oracle session <id> --render
|
|
41
44
|
|
|
42
45
|
# TUI (interactive, only for humans)
|
|
43
|
-
npx @steipete/oracle tui
|
|
46
|
+
npx -y @steipete/oracle tui
|
|
44
47
|
```
|
|
45
48
|
|
|
46
49
|
Engine auto-picks API when `OPENAI_API_KEY` is set, otherwise browser; browser is stable on macOS and works on Linux and Windows. On Linux pass `--browser-chrome-path/--browser-cookie-path` if detection fails; on Windows prefer `--browser-manual-login` or inline cookies if decryption is blocked.
|
|
@@ -49,6 +52,8 @@ Engine auto-picks API when `OPENAI_API_KEY` is set, otherwise browser; browser i
|
|
|
49
52
|
|
|
50
53
|
**CLI**
|
|
51
54
|
- API mode expects API keys in your environment: `OPENAI_API_KEY` (GPT-5.x), `GEMINI_API_KEY` (Gemini 3 Pro), `ANTHROPIC_API_KEY` (Claude Sonnet 4.5 / Opus 4.1).
|
|
55
|
+
- Gemini browser mode uses Chrome cookies instead of an API key—just be logged into `gemini.google.com` in Chrome (no Python/venv required).
|
|
56
|
+
- If your Gemini account can’t access “Pro”, Oracle auto-falls back to a supported model for web runs (and logs the fallback in verbose mode).
|
|
52
57
|
- Prefer API mode or `--copy` + manual paste; browser automation is experimental.
|
|
53
58
|
- Browser support: stable on macOS; works on Linux (add `--browser-chrome-path/--browser-cookie-path` when needed) and Windows (manual-login or inline cookies recommended when app-bound cookies block decryption).
|
|
54
59
|
- Remote browser service: `oracle serve` on a signed-in host; clients use `--remote-host/--remote-token`.
|
|
@@ -109,6 +114,9 @@ npx -y @steipete/oracle oracle-mcp
|
|
|
109
114
|
| `--dry-run [summary\|json\|full]` | Preview without sending. |
|
|
110
115
|
| `--remote-host`, `--remote-token` | Use a remote `oracle serve` host (browser). |
|
|
111
116
|
| `--remote-chrome <host:port>` | Attach to an existing remote Chrome session (browser). |
|
|
117
|
+
| `--youtube <url>` | YouTube video URL to analyze (Gemini browser mode). |
|
|
118
|
+
| `--generate-image <file>` | Generate image and save to file (Gemini browser mode). |
|
|
119
|
+
| `--edit-image <file>` | Edit existing image with `--output` (Gemini browser mode). |
|
|
112
120
|
| `--azure-endpoint`, `--azure-deployment`, `--azure-api-version` | Target Azure OpenAI endpoints (picks Azure client automatically). |
|
|
113
121
|
|
|
114
122
|
## Configuration
|
package/dist/bin/oracle-cli.js
CHANGED
|
@@ -18,6 +18,7 @@ import { DEFAULT_MODEL, MODEL_CONFIGS, readFiles, estimateRequestTokens, buildRe
|
|
|
18
18
|
import { isKnownModel } from '../src/oracle/modelResolver.js';
|
|
19
19
|
import { CHATGPT_URL } from '../src/browserMode.js';
|
|
20
20
|
import { createRemoteBrowserExecutor } from '../src/remote/client.js';
|
|
21
|
+
import { createGeminiWebExecutor } from '../src/gemini-web/index.js';
|
|
21
22
|
import { applyHelpStyling } from '../src/cli/help.js';
|
|
22
23
|
import { collectPaths, collectModelList, parseFloatOption, parseIntOption, parseSearchOption, usesDefaultStatusFilters, resolvePreviewMode, normalizeModelOption, normalizeBaseUrl, resolveApiModel, inferModelFromLabel, parseHeartbeatOption, parseTimeoutOption, mergePathLikeOptions, } from '../src/cli/options.js';
|
|
23
24
|
import { copyToClipboard } from '../src/cli/clipboard.js';
|
|
@@ -26,6 +27,7 @@ import { shouldDetachSession } from '../src/cli/detach.js';
|
|
|
26
27
|
import { applyHiddenAliases } from '../src/cli/hiddenAliases.js';
|
|
27
28
|
import { buildBrowserConfig, resolveBrowserModelLabel } from '../src/cli/browserConfig.js';
|
|
28
29
|
import { performSessionRun } from '../src/cli/sessionRunner.js';
|
|
30
|
+
import { isMediaFile } from '../src/browser/prompt.js';
|
|
29
31
|
import { attachSession, showStatus, formatCompletionSummary } from '../src/cli/sessionDisplay.js';
|
|
30
32
|
import { formatCompactNumber } from '../src/cli/format.js';
|
|
31
33
|
import { formatIntroLine } from '../src/cli/tagline.js';
|
|
@@ -114,7 +116,7 @@ program
|
|
|
114
116
|
.addOption(new Option('--models <models>', 'Comma-separated API model list to query in parallel (e.g., "gpt-5.1-pro,gemini-3-pro").')
|
|
115
117
|
.argParser(collectModelList)
|
|
116
118
|
.default([]))
|
|
117
|
-
.addOption(new Option('-e, --engine <mode>', 'Execution engine (api | browser).
|
|
119
|
+
.addOption(new Option('-e, --engine <mode>', 'Execution engine (api | browser). Browser engine: GPT models automate ChatGPT; Gemini models use a cookie-based client for gemini.google.com. If omitted, oracle picks api when OPENAI_API_KEY is set, otherwise browser.').choices(['api', 'browser']))
|
|
118
120
|
.addOption(new Option('--mode <mode>', 'Alias for --engine (api | browser).').choices(['api', 'browser']).hideHelp())
|
|
119
121
|
.option('--files-report', 'Show token usage per attached file (also prints automatically when files exceed the token budget).', false)
|
|
120
122
|
.option('-v, --verbose', 'Enable verbose logging for all operations.', false)
|
|
@@ -182,6 +184,12 @@ program
|
|
|
182
184
|
.addOption(new Option('--remote-token <token>', 'Access token for the remote `oracle serve` instance.'))
|
|
183
185
|
.addOption(new Option('--browser-inline-files', 'Alias for --browser-attachments never (force pasting file contents inline).').default(false))
|
|
184
186
|
.addOption(new Option('--browser-bundle-files', 'Bundle all attachments into a single archive before uploading.').default(false))
|
|
187
|
+
.addOption(new Option('--youtube <url>', 'YouTube video URL to analyze (Gemini web/cookie mode only; uses your signed-in Chrome cookies for gemini.google.com).'))
|
|
188
|
+
.addOption(new Option('--generate-image <file>', 'Generate image and save to file (Gemini web/cookie mode only; requires gemini.google.com Chrome cookies).'))
|
|
189
|
+
.addOption(new Option('--edit-image <file>', 'Edit existing image (use with --output, Gemini web/cookie mode only).'))
|
|
190
|
+
.addOption(new Option('--output <file>', 'Output file path for image operations (Gemini web/cookie mode only).'))
|
|
191
|
+
.addOption(new Option('--aspect <ratio>', 'Aspect ratio for image generation: 16:9, 1:1, 4:3, 3:4 (Gemini web/cookie mode only).'))
|
|
192
|
+
.addOption(new Option('--gemini-show-thoughts', 'Display Gemini thinking process (Gemini web/cookie mode only).').default(false))
|
|
185
193
|
.option('--retain-hours <hours>', 'Prune stored sessions older than this many hours before running (set 0 to disable).', parseFloatOption)
|
|
186
194
|
.option('--force', 'Force start a new session even if an identical prompt is already running.', false)
|
|
187
195
|
.option('--debug-help', 'Show the advanced/debug option set and exit.', false)
|
|
@@ -512,18 +520,13 @@ async function runRootCommand(options) {
|
|
|
512
520
|
const isCodex = primaryModelCandidate.startsWith('gpt-5.1-codex');
|
|
513
521
|
const isClaude = primaryModelCandidate.startsWith('claude');
|
|
514
522
|
const userForcedBrowser = options.browser || options.engine === 'browser';
|
|
515
|
-
const
|
|
523
|
+
const isBrowserCompatible = (model) => model.startsWith('gpt-') || model.startsWith('gemini');
|
|
524
|
+
const hasNonBrowserCompatibleTarget = (engine === 'browser' || userForcedBrowser) &&
|
|
516
525
|
(normalizedMultiModels.length > 0
|
|
517
|
-
? normalizedMultiModels.some((model) => !model
|
|
518
|
-
: !resolvedModelCandidate
|
|
519
|
-
if (
|
|
520
|
-
throw new Error('Browser engine only supports GPT
|
|
521
|
-
}
|
|
522
|
-
if (isGemini && userForcedBrowser) {
|
|
523
|
-
throw new Error('Gemini is only supported via API. Use --engine api.');
|
|
524
|
-
}
|
|
525
|
-
if (isGemini && engine === 'browser') {
|
|
526
|
-
engine = 'api';
|
|
526
|
+
? normalizedMultiModels.some((model) => !isBrowserCompatible(model))
|
|
527
|
+
: !isBrowserCompatible(resolvedModelCandidate));
|
|
528
|
+
if (hasNonBrowserCompatibleTarget) {
|
|
529
|
+
throw new Error('Browser engine only supports GPT and Gemini models. Re-run with --engine api for Grok, Claude, or other models.');
|
|
527
530
|
}
|
|
528
531
|
if (isClaude && engine === 'browser') {
|
|
529
532
|
console.log(chalk.dim('Browser engine is not supported for Claude models; switching to API.'));
|
|
@@ -672,7 +675,11 @@ async function runRootCommand(options) {
|
|
|
672
675
|
return;
|
|
673
676
|
}
|
|
674
677
|
if (options.file && options.file.length > 0) {
|
|
675
|
-
|
|
678
|
+
const isBrowserMode = engine === 'browser' || userForcedBrowser;
|
|
679
|
+
const filesToValidate = isBrowserMode ? options.file.filter((f) => !isMediaFile(f)) : options.file;
|
|
680
|
+
if (filesToValidate.length > 0) {
|
|
681
|
+
await readFiles(filesToValidate, { cwd: process.cwd() });
|
|
682
|
+
}
|
|
676
683
|
}
|
|
677
684
|
const getSource = (key) => program.getOptionValueSource?.(key) ?? undefined;
|
|
678
685
|
applyBrowserDefaultsFromConfig(options, userConfig, getSource);
|
|
@@ -698,6 +705,19 @@ async function runRootCommand(options) {
|
|
|
698
705
|
};
|
|
699
706
|
console.log(chalk.dim(`Routing browser automation to remote host ${remoteHost}`));
|
|
700
707
|
}
|
|
708
|
+
else if (browserConfig && resolvedModel.startsWith('gemini')) {
|
|
709
|
+
browserDeps = {
|
|
710
|
+
executeBrowser: createGeminiWebExecutor({
|
|
711
|
+
youtube: options.youtube,
|
|
712
|
+
generateImage: options.generateImage,
|
|
713
|
+
editImage: options.editImage,
|
|
714
|
+
outputPath: options.output,
|
|
715
|
+
aspectRatio: options.aspect,
|
|
716
|
+
showThoughts: options.geminiShowThoughts,
|
|
717
|
+
}),
|
|
718
|
+
};
|
|
719
|
+
console.log(chalk.dim('Using Gemini web client for browser automation'));
|
|
720
|
+
}
|
|
701
721
|
const remoteExecutionActive = Boolean(browserDeps);
|
|
702
722
|
if (options.dryRun) {
|
|
703
723
|
const baseRunOptions = buildRunOptions(resolvedOptions, {
|
|
@@ -183,7 +183,9 @@ async function pollAssistantCompletion(Runtime, timeoutMs) {
|
|
|
183
183
|
isStopButtonVisible(Runtime),
|
|
184
184
|
isCompletionVisible(Runtime),
|
|
185
185
|
]);
|
|
186
|
-
|
|
186
|
+
// Require at least 2 stable cycles even when completion buttons are visible
|
|
187
|
+
// to ensure DOM text has fully rendered (buttons can appear before text settles)
|
|
188
|
+
if ((completionVisible && stableCycles >= 2) || (!stopVisible && stableCycles >= requiredStableCycles)) {
|
|
187
189
|
return normalized;
|
|
188
190
|
}
|
|
189
191
|
}
|
|
@@ -211,10 +213,36 @@ async function isCompletionVisible(Runtime) {
|
|
|
211
213
|
try {
|
|
212
214
|
const { result } = await Runtime.evaluate({
|
|
213
215
|
expression: `(() => {
|
|
214
|
-
|
|
216
|
+
// Find the LAST assistant turn to check completion status
|
|
217
|
+
// Must match the same logic as buildAssistantExtractor for consistency
|
|
218
|
+
const ASSISTANT_SELECTOR = '${ASSISTANT_ROLE_SELECTOR}';
|
|
219
|
+
const isAssistantTurn = (node) => {
|
|
220
|
+
if (!(node instanceof HTMLElement)) return false;
|
|
221
|
+
const role = (node.getAttribute('data-message-author-role') || node.dataset?.messageAuthorRole || '').toLowerCase();
|
|
222
|
+
if (role === 'assistant') return true;
|
|
223
|
+
const testId = (node.getAttribute('data-testid') || '').toLowerCase();
|
|
224
|
+
if (testId.includes('assistant')) return true;
|
|
225
|
+
return Boolean(node.querySelector(ASSISTANT_SELECTOR) || node.querySelector('[data-testid*="assistant"]'));
|
|
226
|
+
};
|
|
227
|
+
|
|
228
|
+
const turns = Array.from(document.querySelectorAll('${CONVERSATION_TURN_SELECTOR}'));
|
|
229
|
+
let lastAssistantTurn = null;
|
|
230
|
+
for (let i = turns.length - 1; i >= 0; i--) {
|
|
231
|
+
if (isAssistantTurn(turns[i])) {
|
|
232
|
+
lastAssistantTurn = turns[i];
|
|
233
|
+
break;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
if (!lastAssistantTurn) {
|
|
237
|
+
return false;
|
|
238
|
+
}
|
|
239
|
+
// Check if the last assistant turn has finished action buttons (copy, thumbs up/down, share)
|
|
240
|
+
if (lastAssistantTurn.querySelector('${FINISHED_ACTIONS_SELECTOR}')) {
|
|
215
241
|
return true;
|
|
216
242
|
}
|
|
217
|
-
|
|
243
|
+
// Also check for "Done" text in the last assistant turn's markdown
|
|
244
|
+
const markdowns = lastAssistantTurn.querySelectorAll('.markdown');
|
|
245
|
+
return Array.from(markdowns).some((n) => (n.textContent || '').trim() === 'Done');
|
|
218
246
|
})()`,
|
|
219
247
|
returnByValue: true,
|
|
220
248
|
});
|
|
@@ -257,12 +285,27 @@ function buildAssistantSnapshotExpression() {
|
|
|
257
285
|
}
|
|
258
286
|
function buildResponseObserverExpression(timeoutMs) {
|
|
259
287
|
const selectorsLiteral = JSON.stringify(ANSWER_SELECTORS);
|
|
288
|
+
const conversationLiteral = JSON.stringify(CONVERSATION_TURN_SELECTOR);
|
|
289
|
+
const assistantLiteral = JSON.stringify(ASSISTANT_ROLE_SELECTOR);
|
|
260
290
|
return `(() => {
|
|
261
291
|
${buildClickDispatcher()}
|
|
262
292
|
const SELECTORS = ${selectorsLiteral};
|
|
263
293
|
const STOP_SELECTOR = '${STOP_BUTTON_SELECTOR}';
|
|
264
294
|
const FINISHED_SELECTOR = '${FINISHED_ACTIONS_SELECTOR}';
|
|
295
|
+
const CONVERSATION_SELECTOR = ${conversationLiteral};
|
|
296
|
+
const ASSISTANT_SELECTOR = ${assistantLiteral};
|
|
265
297
|
const settleDelayMs = 800;
|
|
298
|
+
|
|
299
|
+
// Helper to detect assistant turns - matches buildAssistantExtractor logic
|
|
300
|
+
const isAssistantTurn = (node) => {
|
|
301
|
+
if (!(node instanceof HTMLElement)) return false;
|
|
302
|
+
const role = (node.getAttribute('data-message-author-role') || node.dataset?.messageAuthorRole || '').toLowerCase();
|
|
303
|
+
if (role === 'assistant') return true;
|
|
304
|
+
const testId = (node.getAttribute('data-testid') || '').toLowerCase();
|
|
305
|
+
if (testId.includes('assistant')) return true;
|
|
306
|
+
return Boolean(node.querySelector(ASSISTANT_SELECTOR) || node.querySelector('[data-testid*="assistant"]'));
|
|
307
|
+
};
|
|
308
|
+
|
|
266
309
|
${buildAssistantExtractor('extractFromTurns')}
|
|
267
310
|
|
|
268
311
|
const captureViaObserver = () =>
|
|
@@ -307,6 +350,24 @@ function buildResponseObserverExpression(timeoutMs) {
|
|
|
307
350
|
}, ${timeoutMs});
|
|
308
351
|
});
|
|
309
352
|
|
|
353
|
+
// Check if the last assistant turn has finished (scoped to avoid detecting old turns)
|
|
354
|
+
const isLastAssistantTurnFinished = () => {
|
|
355
|
+
const turns = Array.from(document.querySelectorAll(CONVERSATION_SELECTOR));
|
|
356
|
+
let lastAssistantTurn = null;
|
|
357
|
+
for (let i = turns.length - 1; i >= 0; i--) {
|
|
358
|
+
if (isAssistantTurn(turns[i])) {
|
|
359
|
+
lastAssistantTurn = turns[i];
|
|
360
|
+
break;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
if (!lastAssistantTurn) return false;
|
|
364
|
+
// Check for action buttons in this specific turn
|
|
365
|
+
if (lastAssistantTurn.querySelector(FINISHED_SELECTOR)) return true;
|
|
366
|
+
// Check for "Done" text in this turn's markdown
|
|
367
|
+
const markdowns = lastAssistantTurn.querySelectorAll('.markdown');
|
|
368
|
+
return Array.from(markdowns).some((n) => (n.textContent || '').trim() === 'Done');
|
|
369
|
+
};
|
|
370
|
+
|
|
310
371
|
const waitForSettle = async (snapshot) => {
|
|
311
372
|
const settleWindowMs = 5000;
|
|
312
373
|
const settleIntervalMs = 400;
|
|
@@ -321,9 +382,7 @@ function buildResponseObserverExpression(timeoutMs) {
|
|
|
321
382
|
lastLength = refreshed.text?.length ?? lastLength;
|
|
322
383
|
}
|
|
323
384
|
const stopVisible = Boolean(document.querySelector(STOP_SELECTOR));
|
|
324
|
-
const finishedVisible =
|
|
325
|
-
Boolean(document.querySelector(FINISHED_SELECTOR)) ||
|
|
326
|
-
Array.from(document.querySelectorAll('.markdown')).some((n) => (n.textContent || '').trim() === 'Done');
|
|
385
|
+
const finishedVisible = isLastAssistantTurnFinished();
|
|
327
386
|
|
|
328
387
|
if (!stopVisible || finishedVisible) {
|
|
329
388
|
break;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export const CHATGPT_URL = 'https://chatgpt.com/';
|
|
2
|
-
export const DEFAULT_MODEL_TARGET = 'ChatGPT 5.
|
|
2
|
+
export const DEFAULT_MODEL_TARGET = 'ChatGPT 5.2';
|
|
3
3
|
export const COOKIE_URLS = ['https://chatgpt.com', 'https://chat.openai.com', 'https://atlas.openai.com'];
|
|
4
4
|
export const INPUT_SELECTORS = [
|
|
5
5
|
'textarea[data-id="prompt-textarea"]',
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { mkdtemp, rm, mkdir
|
|
1
|
+
import { mkdtemp, rm, mkdir } from 'node:fs/promises';
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import os from 'node:os';
|
|
4
4
|
import net from 'node:net';
|
|
@@ -12,6 +12,7 @@ import { estimateTokenCount, withRetries, delay } from './utils.js';
|
|
|
12
12
|
import { formatElapsed } from '../oracle/format.js';
|
|
13
13
|
import { CHATGPT_URL } from './constants.js';
|
|
14
14
|
import { BrowserAutomationError } from '../oracle/errors.js';
|
|
15
|
+
import { cleanupStaleProfileState, readChromePid, readDevToolsPort, verifyDevToolsReachable, writeChromePid, writeDevToolsActivePort, } from './profileState.js';
|
|
15
16
|
export { CHATGPT_URL, DEFAULT_MODEL_TARGET } from './constants.js';
|
|
16
17
|
export { parseDuration, delay, normalizeChatgptUrl } from './utils.js';
|
|
17
18
|
export async function runBrowserMode(options) {
|
|
@@ -98,6 +99,13 @@ export async function runBrowserMode(options) {
|
|
|
98
99
|
remoteChrome: config.remoteChrome,
|
|
99
100
|
}, userDataDir, logger));
|
|
100
101
|
const chromeHost = chrome.host ?? '127.0.0.1';
|
|
102
|
+
// Persist profile state so future manual-login runs can reuse this Chrome.
|
|
103
|
+
if (manualLogin && chrome.port) {
|
|
104
|
+
await writeDevToolsActivePort(userDataDir, chrome.port);
|
|
105
|
+
if (!reusedChrome && chrome.pid) {
|
|
106
|
+
await writeChromePid(userDataDir, chrome.pid);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
101
109
|
let removeTerminationHooks = null;
|
|
102
110
|
try {
|
|
103
111
|
removeTerminationHooks = registerTerminationHooks(chrome, userDataDir, effectiveKeepBrowser, logger, {
|
|
@@ -533,57 +541,21 @@ async function maybeReuseRunningChrome(userDataDir, logger) {
|
|
|
533
541
|
const port = await readDevToolsPort(userDataDir);
|
|
534
542
|
if (!port)
|
|
535
543
|
return null;
|
|
536
|
-
const
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
clearTimeout(timeout);
|
|
542
|
-
if (!response.ok)
|
|
543
|
-
throw new Error(`HTTP ${response.status}`);
|
|
544
|
-
const pidPath = path.join(userDataDir, 'chrome.pid');
|
|
545
|
-
let pid;
|
|
546
|
-
try {
|
|
547
|
-
const rawPid = (await readFile(pidPath, 'utf8')).trim();
|
|
548
|
-
pid = Number.parseInt(rawPid, 10);
|
|
549
|
-
if (Number.isNaN(pid))
|
|
550
|
-
pid = undefined;
|
|
551
|
-
}
|
|
552
|
-
catch {
|
|
553
|
-
pid = undefined;
|
|
554
|
-
}
|
|
555
|
-
logger(`Found running Chrome for ${userDataDir}; reusing (DevTools port ${port}${pid ? `, pid ${pid}` : ''})`);
|
|
556
|
-
return {
|
|
557
|
-
port,
|
|
558
|
-
pid,
|
|
559
|
-
kill: async () => { },
|
|
560
|
-
process: undefined,
|
|
561
|
-
};
|
|
562
|
-
}
|
|
563
|
-
catch (error) {
|
|
564
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
565
|
-
logger(`DevToolsActivePort found for ${userDataDir} but unreachable (${message}); launching new Chrome.`);
|
|
544
|
+
const probe = await verifyDevToolsReachable({ port });
|
|
545
|
+
if (!probe.ok) {
|
|
546
|
+
logger(`DevToolsActivePort found for ${userDataDir} but unreachable (${probe.error}); launching new Chrome.`);
|
|
547
|
+
// Safe cleanup: remove stale DevToolsActivePort; only remove lock files if this was an Oracle-owned pid that died.
|
|
548
|
+
await cleanupStaleProfileState(userDataDir, logger, { lockRemovalMode: 'if_oracle_pid_dead' });
|
|
566
549
|
return null;
|
|
567
550
|
}
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
const raw = await readFile(candidate, 'utf8');
|
|
577
|
-
const firstLine = raw.split(/\r?\n/u)[0]?.trim();
|
|
578
|
-
const port = Number.parseInt(firstLine ?? '', 10);
|
|
579
|
-
if (Number.isFinite(port)) {
|
|
580
|
-
return port;
|
|
581
|
-
}
|
|
582
|
-
}
|
|
583
|
-
catch {
|
|
584
|
-
}
|
|
585
|
-
}
|
|
586
|
-
return null;
|
|
551
|
+
const pid = await readChromePid(userDataDir);
|
|
552
|
+
logger(`Found running Chrome for ${userDataDir}; reusing (DevTools port ${port}${pid ? `, pid ${pid}` : ''})`);
|
|
553
|
+
return {
|
|
554
|
+
port,
|
|
555
|
+
pid: pid ?? undefined,
|
|
556
|
+
kill: async () => { },
|
|
557
|
+
process: undefined,
|
|
558
|
+
};
|
|
587
559
|
}
|
|
588
560
|
async function runRemoteBrowserMode(promptText, attachments, config, logger, options) {
|
|
589
561
|
const remoteChromeConfig = config.remoteChrome;
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
|
|
3
|
+
import { execFile } from 'node:child_process';
|
|
4
|
+
import { promisify } from 'node:util';
|
|
5
|
+
const DEVTOOLS_ACTIVE_PORT_FILENAME = 'DevToolsActivePort';
|
|
6
|
+
const DEVTOOLS_ACTIVE_PORT_RELATIVE_PATHS = [
|
|
7
|
+
DEVTOOLS_ACTIVE_PORT_FILENAME,
|
|
8
|
+
path.join('Default', DEVTOOLS_ACTIVE_PORT_FILENAME),
|
|
9
|
+
];
|
|
10
|
+
const CHROME_PID_FILENAME = 'chrome.pid';
|
|
11
|
+
const execFileAsync = promisify(execFile);
|
|
12
|
+
export function getDevToolsActivePortPaths(userDataDir) {
|
|
13
|
+
return DEVTOOLS_ACTIVE_PORT_RELATIVE_PATHS.map((relative) => path.join(userDataDir, relative));
|
|
14
|
+
}
|
|
15
|
+
export async function readDevToolsPort(userDataDir) {
|
|
16
|
+
for (const candidate of getDevToolsActivePortPaths(userDataDir)) {
|
|
17
|
+
try {
|
|
18
|
+
const raw = await readFile(candidate, 'utf8');
|
|
19
|
+
const firstLine = raw.split(/\r?\n/u)[0]?.trim();
|
|
20
|
+
const port = Number.parseInt(firstLine ?? '', 10);
|
|
21
|
+
if (Number.isFinite(port)) {
|
|
22
|
+
return port;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
catch {
|
|
26
|
+
// ignore missing/unreadable candidates
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
export async function writeDevToolsActivePort(userDataDir, port) {
|
|
32
|
+
const contents = `${port}\n/devtools/browser`;
|
|
33
|
+
for (const candidate of getDevToolsActivePortPaths(userDataDir)) {
|
|
34
|
+
try {
|
|
35
|
+
await mkdir(path.dirname(candidate), { recursive: true });
|
|
36
|
+
await writeFile(candidate, contents, 'utf8');
|
|
37
|
+
}
|
|
38
|
+
catch {
|
|
39
|
+
// best effort
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
export async function readChromePid(userDataDir) {
|
|
44
|
+
const pidPath = path.join(userDataDir, CHROME_PID_FILENAME);
|
|
45
|
+
try {
|
|
46
|
+
const raw = (await readFile(pidPath, 'utf8')).trim();
|
|
47
|
+
const pid = Number.parseInt(raw, 10);
|
|
48
|
+
if (!Number.isFinite(pid) || pid <= 0) {
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
return pid;
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
export async function writeChromePid(userDataDir, pid) {
|
|
58
|
+
if (!Number.isFinite(pid) || pid <= 0)
|
|
59
|
+
return;
|
|
60
|
+
const pidPath = path.join(userDataDir, CHROME_PID_FILENAME);
|
|
61
|
+
try {
|
|
62
|
+
await mkdir(path.dirname(pidPath), { recursive: true });
|
|
63
|
+
await writeFile(pidPath, `${Math.trunc(pid)}\n`, 'utf8');
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
66
|
+
// best effort
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
export function isProcessAlive(pid) {
|
|
70
|
+
if (!Number.isFinite(pid) || pid <= 0)
|
|
71
|
+
return false;
|
|
72
|
+
try {
|
|
73
|
+
process.kill(pid, 0);
|
|
74
|
+
return true;
|
|
75
|
+
}
|
|
76
|
+
catch (error) {
|
|
77
|
+
// EPERM means "exists but no permission"; treat as alive.
|
|
78
|
+
if (error && typeof error === 'object' && 'code' in error && error.code === 'EPERM') {
|
|
79
|
+
return true;
|
|
80
|
+
}
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
export async function verifyDevToolsReachable({ port, host = '127.0.0.1', attempts = 3, timeoutMs = 3000, }) {
|
|
85
|
+
const versionUrl = `http://${host}:${port}/json/version`;
|
|
86
|
+
for (let attempt = 0; attempt < attempts; attempt++) {
|
|
87
|
+
try {
|
|
88
|
+
const controller = new AbortController();
|
|
89
|
+
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
|
90
|
+
const response = await fetch(versionUrl, { signal: controller.signal });
|
|
91
|
+
clearTimeout(timeout);
|
|
92
|
+
if (!response.ok) {
|
|
93
|
+
throw new Error(`HTTP ${response.status}`);
|
|
94
|
+
}
|
|
95
|
+
return { ok: true };
|
|
96
|
+
}
|
|
97
|
+
catch (error) {
|
|
98
|
+
if (attempt < attempts - 1) {
|
|
99
|
+
await new Promise((resolve) => setTimeout(resolve, 500 * (attempt + 1)));
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
103
|
+
return { ok: false, error: message };
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
return { ok: false, error: 'unreachable' };
|
|
107
|
+
}
|
|
108
|
+
export async function cleanupStaleProfileState(userDataDir, logger, options = {}) {
|
|
109
|
+
for (const candidate of getDevToolsActivePortPaths(userDataDir)) {
|
|
110
|
+
try {
|
|
111
|
+
await rm(candidate, { force: true });
|
|
112
|
+
logger?.(`Removed stale DevToolsActivePort: ${candidate}`);
|
|
113
|
+
}
|
|
114
|
+
catch {
|
|
115
|
+
// ignore cleanup errors
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
const lockRemovalMode = options.lockRemovalMode ?? 'never';
|
|
119
|
+
if (lockRemovalMode === 'never') {
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
122
|
+
const pid = await readChromePid(userDataDir);
|
|
123
|
+
if (!pid) {
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
if (isProcessAlive(pid)) {
|
|
127
|
+
logger?.(`Chrome pid ${pid} still alive; skipping profile lock cleanup`);
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
// Extra safety: if Chrome is running with this profile (but with a different PID, e.g. user relaunched
|
|
131
|
+
// without remote debugging), never delete lock files.
|
|
132
|
+
if (await isChromeUsingUserDataDir(userDataDir)) {
|
|
133
|
+
logger?.('Detected running Chrome using this profile; skipping profile lock cleanup');
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
const lockFiles = [
|
|
137
|
+
path.join(userDataDir, 'lockfile'),
|
|
138
|
+
path.join(userDataDir, 'SingletonLock'),
|
|
139
|
+
path.join(userDataDir, 'SingletonSocket'),
|
|
140
|
+
path.join(userDataDir, 'SingletonCookie'),
|
|
141
|
+
];
|
|
142
|
+
for (const lock of lockFiles) {
|
|
143
|
+
await rm(lock, { force: true }).catch(() => undefined);
|
|
144
|
+
}
|
|
145
|
+
logger?.('Cleaned up stale Chrome profile locks');
|
|
146
|
+
}
|
|
147
|
+
async function isChromeUsingUserDataDir(userDataDir) {
|
|
148
|
+
if (process.platform === 'win32') {
|
|
149
|
+
// On Windows, lockfiles are typically held open and removal should fail anyway; avoid expensive process scans.
|
|
150
|
+
return false;
|
|
151
|
+
}
|
|
152
|
+
try {
|
|
153
|
+
const { stdout } = await execFileAsync('ps', ['-ax', '-o', 'command='], { maxBuffer: 10 * 1024 * 1024 });
|
|
154
|
+
const lines = String(stdout ?? '').split('\n');
|
|
155
|
+
const needle = userDataDir;
|
|
156
|
+
for (const line of lines) {
|
|
157
|
+
if (!line)
|
|
158
|
+
continue;
|
|
159
|
+
const lower = line.toLowerCase();
|
|
160
|
+
if (!lower.includes('chrome') && !lower.includes('chromium'))
|
|
161
|
+
continue;
|
|
162
|
+
if (line.includes(needle) && lower.includes('user-data-dir')) {
|
|
163
|
+
return true;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
catch {
|
|
168
|
+
// best effort
|
|
169
|
+
}
|
|
170
|
+
return false;
|
|
171
|
+
}
|
|
@@ -6,10 +6,32 @@ import { isKnownModel } from '../oracle/modelResolver.js';
|
|
|
6
6
|
import { buildPromptMarkdown } from '../oracle/promptAssembly.js';
|
|
7
7
|
import { buildAttachmentPlan } from './policies.js';
|
|
8
8
|
const DEFAULT_BROWSER_INLINE_CHAR_BUDGET = 60_000;
|
|
9
|
+
const MEDIA_EXTENSIONS = new Set([
|
|
10
|
+
'.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v',
|
|
11
|
+
'.mp3', '.wav', '.aac', '.flac', '.ogg', '.m4a',
|
|
12
|
+
'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.svg', '.heic', '.heif',
|
|
13
|
+
'.pdf',
|
|
14
|
+
]);
|
|
15
|
+
export function isMediaFile(filePath) {
|
|
16
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
17
|
+
return MEDIA_EXTENSIONS.has(ext);
|
|
18
|
+
}
|
|
9
19
|
export async function assembleBrowserPrompt(runOptions, deps = {}) {
|
|
10
20
|
const cwd = deps.cwd ?? process.cwd();
|
|
11
21
|
const readFilesFn = deps.readFilesImpl ?? readFiles;
|
|
12
|
-
const
|
|
22
|
+
const allFilePaths = runOptions.file ?? [];
|
|
23
|
+
const textFilePaths = allFilePaths.filter((f) => !isMediaFile(f));
|
|
24
|
+
const mediaFilePaths = allFilePaths.filter((f) => isMediaFile(f));
|
|
25
|
+
const mediaAttachments = await Promise.all(mediaFilePaths.map(async (filePath) => {
|
|
26
|
+
const resolvedPath = path.isAbsolute(filePath) ? filePath : path.resolve(cwd, filePath);
|
|
27
|
+
const stats = await fs.stat(resolvedPath);
|
|
28
|
+
return {
|
|
29
|
+
path: resolvedPath,
|
|
30
|
+
displayPath: path.relative(cwd, resolvedPath) || path.basename(resolvedPath),
|
|
31
|
+
sizeBytes: stats.size,
|
|
32
|
+
};
|
|
33
|
+
}));
|
|
34
|
+
const files = await readFilesFn(textFilePaths, { cwd });
|
|
13
35
|
const basePrompt = (runOptions.prompt ?? '').trim();
|
|
14
36
|
const userPrompt = basePrompt;
|
|
15
37
|
const systemPrompt = runOptions.system?.trim() || '';
|
|
@@ -40,9 +62,10 @@ export async function assembleBrowserPrompt(runOptions, deps = {}) {
|
|
|
40
62
|
.filter(Boolean)
|
|
41
63
|
.join('\n\n')
|
|
42
64
|
.trim();
|
|
43
|
-
const attachments = selectedPlan.attachments
|
|
65
|
+
const attachments = [...selectedPlan.attachments, ...mediaAttachments];
|
|
44
66
|
const shouldBundle = selectedPlan.shouldBundle;
|
|
45
67
|
let bundleText = null;
|
|
68
|
+
let bundled = null;
|
|
46
69
|
if (shouldBundle) {
|
|
47
70
|
const bundleDir = await fs.mkdtemp(path.join(os.tmpdir(), 'oracle-browser-bundle-'));
|
|
48
71
|
const bundlePath = path.join(bundleDir, 'attachments-bundle.txt');
|
|
@@ -59,6 +82,8 @@ export async function assembleBrowserPrompt(runOptions, deps = {}) {
|
|
|
59
82
|
displayPath: bundlePath,
|
|
60
83
|
sizeBytes: Buffer.byteLength(bundleText, 'utf8'),
|
|
61
84
|
});
|
|
85
|
+
attachments.push(...mediaAttachments);
|
|
86
|
+
bundled = { originalCount: sections.length, bundlePath };
|
|
62
87
|
}
|
|
63
88
|
const inlineFileCount = selectedPlan.inlineFileCount;
|
|
64
89
|
const modelConfig = isKnownModel(runOptions.model) ? MODEL_CONFIGS[runOptions.model] : MODEL_CONFIGS['gpt-5.1'];
|
|
@@ -85,7 +110,7 @@ export async function assembleBrowserPrompt(runOptions, deps = {}) {
|
|
|
85
110
|
let fallback = null;
|
|
86
111
|
if (attachmentsPolicy === 'auto' && selectedPlan.mode === 'inline' && sections.length > 0) {
|
|
87
112
|
const fallbackComposerText = baseComposerSections.join('\n\n').trim();
|
|
88
|
-
const fallbackAttachments = uploadPlan.attachments
|
|
113
|
+
const fallbackAttachments = [...uploadPlan.attachments, ...mediaAttachments];
|
|
89
114
|
let fallbackBundled = null;
|
|
90
115
|
if (uploadPlan.shouldBundle) {
|
|
91
116
|
const bundleDir = await fs.mkdtemp(path.join(os.tmpdir(), 'oracle-browser-bundle-'));
|
|
@@ -103,6 +128,7 @@ export async function assembleBrowserPrompt(runOptions, deps = {}) {
|
|
|
103
128
|
displayPath: bundlePath,
|
|
104
129
|
sizeBytes: Buffer.byteLength(fallbackBundleText, 'utf8'),
|
|
105
130
|
});
|
|
131
|
+
fallbackAttachments.push(...mediaAttachments);
|
|
106
132
|
fallbackBundled = { originalCount: sections.length, bundlePath };
|
|
107
133
|
}
|
|
108
134
|
fallback = {
|
|
@@ -121,8 +147,6 @@ export async function assembleBrowserPrompt(runOptions, deps = {}) {
|
|
|
121
147
|
attachmentsPolicy,
|
|
122
148
|
attachmentMode: selectedPlan.mode,
|
|
123
149
|
fallback,
|
|
124
|
-
bundled
|
|
125
|
-
? { originalCount: sections.length, bundlePath: attachments[0].displayPath }
|
|
126
|
-
: null,
|
|
150
|
+
bundled,
|
|
127
151
|
};
|
|
128
152
|
}
|
|
@@ -5,11 +5,6 @@ import { runBrowserMode } from '../browserMode.js';
|
|
|
5
5
|
import { assembleBrowserPrompt } from './prompt.js';
|
|
6
6
|
import { BrowserAutomationError } from '../oracle/errors.js';
|
|
7
7
|
export async function runBrowserSessionExecution({ runOptions, browserConfig, cwd, log }, deps = {}) {
|
|
8
|
-
if (runOptions.model.startsWith('gemini')) {
|
|
9
|
-
throw new BrowserAutomationError('Gemini models are not available in browser mode. Re-run with --engine api.', {
|
|
10
|
-
stage: 'preflight',
|
|
11
|
-
});
|
|
12
|
-
}
|
|
13
8
|
const assemblePrompt = deps.assemblePrompt ?? assembleBrowserPrompt;
|
|
14
9
|
const executeBrowser = deps.executeBrowser ?? runBrowserMode;
|
|
15
10
|
const promptArtifacts = await assemblePrompt(runOptions, { cwd });
|