@steipete/oracle 0.8.6 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +76 -4
  2. package/dist/bin/oracle-cli.js +188 -7
  3. package/dist/src/browser/actions/modelSelection.js +60 -8
  4. package/dist/src/browser/actions/navigation.js +2 -1
  5. package/dist/src/browser/constants.js +1 -1
  6. package/dist/src/browser/index.js +73 -19
  7. package/dist/src/browser/providerDomFlow.js +17 -0
  8. package/dist/src/browser/providers/chatgptDomProvider.js +49 -0
  9. package/dist/src/browser/providers/geminiDeepThinkDomProvider.js +245 -0
  10. package/dist/src/browser/providers/index.js +2 -0
  11. package/dist/src/cli/browserConfig.js +12 -6
  12. package/dist/src/cli/detach.js +5 -2
  13. package/dist/src/cli/fileSize.js +11 -0
  14. package/dist/src/cli/help.js +3 -3
  15. package/dist/src/cli/markdownBundle.js +5 -1
  16. package/dist/src/cli/options.js +40 -3
  17. package/dist/src/cli/runOptions.js +11 -3
  18. package/dist/src/cli/sessionDisplay.js +91 -2
  19. package/dist/src/cli/sessionLineage.js +56 -0
  20. package/dist/src/cli/sessionRunner.js +20 -2
  21. package/dist/src/cli/sessionTable.js +2 -1
  22. package/dist/src/cli/tui/index.js +2 -0
  23. package/dist/src/gemini-web/browserSessionManager.js +76 -0
  24. package/dist/src/gemini-web/client.js +16 -5
  25. package/dist/src/gemini-web/executionClients.js +1 -0
  26. package/dist/src/gemini-web/executionMode.js +18 -0
  27. package/dist/src/gemini-web/executor.js +273 -120
  28. package/dist/src/mcp/tools/consult.js +34 -21
  29. package/dist/src/oracle/client.js +42 -13
  30. package/dist/src/oracle/config.js +43 -7
  31. package/dist/src/oracle/errors.js +2 -2
  32. package/dist/src/oracle/files.js +20 -5
  33. package/dist/src/oracle/gemini.js +3 -0
  34. package/dist/src/oracle/request.js +7 -2
  35. package/dist/src/oracle/run.js +22 -12
  36. package/dist/src/sessionManager.js +4 -0
  37. package/dist/vendor/oracle-notifier/OracleNotifier.app/Contents/CodeResources +0 -0
  38. package/dist/vendor/oracle-notifier/OracleNotifier.app/Contents/MacOS/OracleNotifier +0 -0
  39. package/package.json +18 -18
  40. package/vendor/oracle-notifier/OracleNotifier.app/Contents/CodeResources +0 -0
  41. package/vendor/oracle-notifier/OracleNotifier.app/Contents/MacOS/OracleNotifier +0 -0
@@ -5,7 +5,7 @@ import net from 'node:net';
5
5
  import { resolveBrowserConfig } from './config.js';
6
6
  import { launchChrome, registerTerminationHooks, hideChromeWindow, connectToRemoteChrome, closeRemoteChromeTarget, connectWithNewTab, closeTab, } from './chromeLifecycle.js';
7
7
  import { syncCookies } from './cookies.js';
8
- import { navigateToChatGPT, navigateToPromptReadyWithFallback, ensureNotBlocked, ensureLoggedIn, ensurePromptReady, installJavaScriptDialogAutoDismissal, ensureModelSelection, submitPrompt, clearPromptComposer, waitForAssistantResponse, captureAssistantMarkdown, clearComposerAttachments, uploadAttachmentFile, waitForAttachmentCompletion, waitForUserTurnAttachments, readAssistantSnapshot, } from './pageActions.js';
8
+ import { navigateToChatGPT, navigateToPromptReadyWithFallback, ensureNotBlocked, ensureLoggedIn, ensurePromptReady, installJavaScriptDialogAutoDismissal, ensureModelSelection, clearPromptComposer, waitForAssistantResponse, captureAssistantMarkdown, clearComposerAttachments, uploadAttachmentFile, waitForAttachmentCompletion, waitForUserTurnAttachments, readAssistantSnapshot, } from './pageActions.js';
9
9
  import { INPUT_SELECTORS } from './constants.js';
10
10
  import { uploadAttachmentViaDataTransfer } from './actions/remoteFileTransfer.js';
11
11
  import { ensureThinkingTime } from './actions/thinkingTime.js';
@@ -15,8 +15,21 @@ import { CHATGPT_URL, CONVERSATION_TURN_SELECTOR, DEFAULT_MODEL_STRATEGY } from
15
15
  import { BrowserAutomationError } from '../oracle/errors.js';
16
16
  import { alignPromptEchoPair, buildPromptEchoMatcher } from './reattachHelpers.js';
17
17
  import { cleanupStaleProfileState, acquireProfileRunLock, readChromePid, readDevToolsPort, shouldCleanupManualLoginProfileState, verifyDevToolsReachable, writeChromePid, writeDevToolsActivePort, } from './profileState.js';
18
+ import { runProviderSubmissionFlow } from './providerDomFlow.js';
19
+ import { chatgptDomProvider } from './providers/index.js';
18
20
  export { CHATGPT_URL, DEFAULT_MODEL_STRATEGY, DEFAULT_MODEL_TARGET } from './constants.js';
19
21
  export { parseDuration, delay, normalizeChatgptUrl, isTemporaryChatUrl } from './utils.js';
22
+ function isCloudflareChallengeError(error) {
23
+ if (!(error instanceof BrowserAutomationError))
24
+ return false;
25
+ return error.details?.stage === 'cloudflare-challenge';
26
+ }
27
+ function shouldPreserveBrowserOnError(error, headless) {
28
+ return !headless && isCloudflareChallengeError(error);
29
+ }
30
+ export function shouldPreserveBrowserOnErrorForTest(error, headless) {
31
+ return shouldPreserveBrowserOnError(error, headless);
32
+ }
20
33
  export async function runBrowserMode(options) {
21
34
  const promptText = options.prompt?.trim();
22
35
  if (!promptText) {
@@ -135,6 +148,7 @@ export async function runBrowserMode(options) {
135
148
  let stopThinkingMonitor = null;
136
149
  let removeDialogHandler = null;
137
150
  let appliedCookies = 0;
151
+ let preserveBrowserOnError = false;
138
152
  try {
139
153
  try {
140
154
  const strictTabIsolation = Boolean(manualLogin && reusedChrome);
@@ -413,17 +427,25 @@ export async function runBrowserMode(options) {
413
427
  let baselineTurns = await readConversationTurnCount(Runtime, logger);
414
428
  // Learned: return baselineTurns so assistant polling can ignore earlier content.
415
429
  const sendAttachmentNames = attachmentWaitTimedOut ? [] : attachmentNames;
416
- const committedTurns = await submitPrompt({
430
+ const providerState = {
417
431
  runtime: Runtime,
418
432
  input: Input,
419
- attachmentNames: sendAttachmentNames,
420
- baselineTurns: baselineTurns ?? undefined,
433
+ logger,
434
+ timeoutMs: config.timeoutMs,
421
435
  inputTimeoutMs: config.inputTimeoutMs ?? undefined,
422
- }, prompt, logger);
423
- if (typeof committedTurns === 'number' && Number.isFinite(committedTurns)) {
424
- if (baselineTurns === null || committedTurns > baselineTurns) {
425
- baselineTurns = Math.max(0, committedTurns - 1);
426
- }
436
+ baselineTurns: baselineTurns ?? undefined,
437
+ attachmentNames: sendAttachmentNames,
438
+ };
439
+ await runProviderSubmissionFlow(chatgptDomProvider, {
440
+ prompt,
441
+ evaluate: async () => undefined,
442
+ delay,
443
+ log: logger,
444
+ state: providerState,
445
+ });
446
+ const providerBaselineTurns = providerState.baselineTurns;
447
+ if (typeof providerBaselineTurns === 'number' && Number.isFinite(providerBaselineTurns)) {
448
+ baselineTurns = providerBaselineTurns;
427
449
  }
428
450
  if (attachmentNames.length > 0) {
429
451
  if (attachmentWaitTimedOut) {
@@ -731,6 +753,28 @@ export async function runBrowserMode(options) {
731
753
  stopThinkingMonitor?.();
732
754
  const socketClosed = connectionClosedUnexpectedly || isWebSocketClosureError(normalizedError);
733
755
  connectionClosedUnexpectedly = connectionClosedUnexpectedly || socketClosed;
756
+ if (shouldPreserveBrowserOnError(normalizedError, config.headless)) {
757
+ preserveBrowserOnError = true;
758
+ const runtime = {
759
+ chromePid: chrome.pid,
760
+ chromePort: chrome.port,
761
+ chromeHost,
762
+ userDataDir,
763
+ chromeTargetId: lastTargetId,
764
+ tabUrl: lastUrl,
765
+ controllerPid: process.pid,
766
+ };
767
+ const reuseProfileHint = `oracle --engine browser --browser-manual-login ` +
768
+ `--browser-manual-login-profile-dir ${JSON.stringify(userDataDir)}`;
769
+ await emitRuntimeHint();
770
+ logger('Cloudflare challenge detected; leaving browser open so you can complete the check.');
771
+ logger(`Reuse this browser profile with: ${reuseProfileHint}`);
772
+ throw new BrowserAutomationError('Cloudflare challenge detected. Complete the “Just a moment…” check in the open browser, then rerun.', {
773
+ stage: 'cloudflare-challenge',
774
+ runtime,
775
+ reuseProfileHint,
776
+ }, normalizedError);
777
+ }
734
778
  if (!socketClosed) {
735
779
  logger(`Failed to complete ChatGPT run: ${normalizedError.message}`);
736
780
  if ((config.debug || process.env.CHATGPT_DEVTOOLS_TRACE === '1') && normalizedError.stack) {
@@ -773,7 +817,8 @@ export async function runBrowserMode(options) {
773
817
  }
774
818
  removeDialogHandler?.();
775
819
  removeTerminationHooks?.();
776
- if (!effectiveKeepBrowser) {
820
+ const keepBrowserOpen = effectiveKeepBrowser || preserveBrowserOnError;
821
+ if (!keepBrowserOpen) {
777
822
  if (!connectionClosedUnexpectedly) {
778
823
  try {
779
824
  await chrome.kill();
@@ -1080,17 +1125,25 @@ async function runRemoteBrowserMode(promptText, attachments, config, logger, opt
1080
1125
  logger('All attachments uploaded');
1081
1126
  }
1082
1127
  let baselineTurns = await readConversationTurnCount(Runtime, logger);
1083
- const committedTurns = await submitPrompt({
1128
+ const providerState = {
1084
1129
  runtime: Runtime,
1085
1130
  input: Input,
1086
- attachmentNames,
1087
- baselineTurns: baselineTurns ?? undefined,
1131
+ logger,
1132
+ timeoutMs: config.timeoutMs,
1088
1133
  inputTimeoutMs: config.inputTimeoutMs ?? undefined,
1089
- }, prompt, logger);
1090
- if (typeof committedTurns === 'number' && Number.isFinite(committedTurns)) {
1091
- if (baselineTurns === null || committedTurns > baselineTurns) {
1092
- baselineTurns = Math.max(0, committedTurns - 1);
1093
- }
1134
+ baselineTurns: baselineTurns ?? undefined,
1135
+ attachmentNames,
1136
+ };
1137
+ await runProviderSubmissionFlow(chatgptDomProvider, {
1138
+ prompt,
1139
+ evaluate: async () => undefined,
1140
+ delay,
1141
+ log: logger,
1142
+ state: providerState,
1143
+ });
1144
+ const providerBaselineTurns = providerState.baselineTurns;
1145
+ if (typeof providerBaselineTurns === 'number' && Number.isFinite(providerBaselineTurns)) {
1146
+ baselineTurns = providerBaselineTurns;
1094
1147
  }
1095
1148
  return { baselineTurns, baselineAssistantText };
1096
1149
  };
@@ -1380,11 +1433,12 @@ export { navigateToChatGPT, ensureNotBlocked, ensurePromptReady, ensureModelSele
1380
1433
  export async function maybeReuseRunningChromeForTest(userDataDir, logger, options = {}) {
1381
1434
  return maybeReuseRunningChrome(userDataDir, logger, options);
1382
1435
  }
1383
- function isWebSocketClosureError(error) {
1436
+ export function isWebSocketClosureError(error) {
1384
1437
  const message = error.message.toLowerCase();
1385
1438
  return (message.includes('websocket connection closed') ||
1386
1439
  message.includes('websocket is closed') ||
1387
1440
  message.includes('websocket error') ||
1441
+ message.includes('inspected target navigated or closed') ||
1388
1442
  message.includes('target closed'));
1389
1443
  }
1390
1444
  export function formatThinkingLog(startedAt, now, message, locatorSuffix) {
@@ -0,0 +1,17 @@
1
+ export async function runProviderSubmissionFlow(adapter, ctx) {
2
+ await adapter.waitForUi(ctx);
3
+ if (adapter.selectMode) {
4
+ await adapter.selectMode(ctx);
5
+ }
6
+ await adapter.typePrompt(ctx);
7
+ await adapter.submitPrompt(ctx);
8
+ }
9
+ export async function runProviderDomFlow(adapter, ctx) {
10
+ await runProviderSubmissionFlow(adapter, ctx);
11
+ const response = await adapter.waitForResponse(ctx);
12
+ const thoughts = adapter.extractThoughts ? await adapter.extractThoughts(ctx) : null;
13
+ return { ...response, thoughts };
14
+ }
15
+ export function joinSelectors(selectors) {
16
+ return selectors.join(', ');
17
+ }
@@ -0,0 +1,49 @@
1
+ import { ensurePromptReady } from '../actions/navigation.js';
2
+ import { submitPrompt } from '../actions/promptComposer.js';
3
+ import { waitForAssistantResponse } from '../actions/assistantResponse.js';
4
+ function requireState(ctx) {
5
+ const state = ctx.state;
6
+ if (!state?.runtime || !state?.input || !state?.logger) {
7
+ throw new Error('chatgptDomProvider requires runtime/input/logger in context.state.');
8
+ }
9
+ return state;
10
+ }
11
+ async function waitForUi(ctx) {
12
+ const state = requireState(ctx);
13
+ await ensurePromptReady(state.runtime, state.inputTimeoutMs ?? 30_000, state.logger);
14
+ }
15
+ async function typePrompt(_ctx) {
16
+ // submitPrompt() handles typing + send for ChatGPT.
17
+ }
18
+ async function submitPromptViaAdapter(ctx) {
19
+ const state = requireState(ctx);
20
+ const committedTurns = await submitPrompt({
21
+ runtime: state.runtime,
22
+ input: state.input,
23
+ attachmentNames: state.attachmentNames ?? [],
24
+ baselineTurns: state.baselineTurns ?? undefined,
25
+ inputTimeoutMs: state.inputTimeoutMs ?? undefined,
26
+ }, ctx.prompt, state.logger);
27
+ state.committedTurns = typeof committedTurns === 'number' && Number.isFinite(committedTurns)
28
+ ? committedTurns
29
+ : null;
30
+ if (state.committedTurns != null && (state.baselineTurns == null || state.committedTurns > state.baselineTurns)) {
31
+ state.baselineTurns = Math.max(0, state.committedTurns - 1);
32
+ }
33
+ }
34
+ async function waitForResponse(ctx) {
35
+ const state = requireState(ctx);
36
+ const answer = await waitForAssistantResponse(state.runtime, state.timeoutMs, state.logger, state.baselineTurns ?? undefined);
37
+ return {
38
+ text: answer.text,
39
+ html: answer.html,
40
+ meta: answer.meta,
41
+ };
42
+ }
43
+ export const chatgptDomProvider = {
44
+ providerName: 'chatgpt-web',
45
+ waitForUi,
46
+ typePrompt,
47
+ submitPrompt: submitPromptViaAdapter,
48
+ waitForResponse,
49
+ };
@@ -0,0 +1,245 @@
1
+ import { joinSelectors } from '../providerDomFlow.js';
2
+ const UI_TIMEOUT_MS = 60_000;
3
+ const RESPONSE_TIMEOUT_MS = 10 * 60_000;
4
+ export const GEMINI_DEEP_THINK_SELECTORS = {
5
+ input: ['rich-textarea .ql-editor', '[role="textbox"][aria-label*="prompt" i]', 'div[contenteditable="true"]'],
6
+ sendButton: ['button.send-button', 'button[aria-label="Send message"]'],
7
+ toolsButton: ['button.toolbox-drawer-button', 'button[aria-label="Tools"]'],
8
+ toolsMenuItem: ['[role="menuitemcheckbox"]', '.toolbox-drawer-item-list-button'],
9
+ deepThinkActive: ['.toolbox-drawer-item-deselect-button', 'button[aria-label*="Deselect Deep Think"]'],
10
+ uploadButton: ['button[aria-label="Open upload file menu"]', '.upload-card-button'],
11
+ uploadMenuItem: ['[role="menuitem"]'],
12
+ uploadTrigger: ['.hidden-local-file-upload-button', '.hidden-local-upload-button'],
13
+ uploaderContainer: ['.uploader-button-container', '.file-uploader'],
14
+ uploaderElement: ['uploader.upload-button'],
15
+ userTurnAttachment: ['.file-preview-container'],
16
+ responseTurn: ['model-response'],
17
+ responseText: ['message-content', '.model-response-text message-content'],
18
+ responseComplete: ['.response-footer.complete'],
19
+ userQuery: ['user-query'],
20
+ userQueryText: ['user-query-content', '.query-text'],
21
+ spinner: ['[role="progressbar"]'],
22
+ thoughtsToggle: ['.thoughts-header-button', '[data-test-id="thoughts-header-button"]'],
23
+ thoughtsContent: ['model-thoughts', '[data-test-id="model-thoughts"]'],
24
+ hasThoughts: ['.has-thoughts'],
25
+ };
26
+ function asSelectorLiteral(selectors) {
27
+ return JSON.stringify(joinSelectors(selectors));
28
+ }
29
+ function readTimeouts(ctx) {
30
+ const state = ctx.state;
31
+ const uiTimeoutMs = typeof state?.inputTimeoutMs === 'number' && Number.isFinite(state.inputTimeoutMs)
32
+ ? Math.max(1_000, state.inputTimeoutMs)
33
+ : UI_TIMEOUT_MS;
34
+ const responseTimeoutMs = typeof state?.timeoutMs === 'number' && Number.isFinite(state.timeoutMs)
35
+ ? Math.max(1_000, state.timeoutMs)
36
+ : RESPONSE_TIMEOUT_MS;
37
+ return { uiTimeoutMs, responseTimeoutMs };
38
+ }
39
+ async function waitForUi(ctx) {
40
+ ctx.log?.('[gemini-web] Waiting for Gemini UI to load...');
41
+ const inputSelector = asSelectorLiteral(GEMINI_DEEP_THINK_SELECTORS.input);
42
+ const { uiTimeoutMs } = readTimeouts(ctx);
43
+ const uiDeadline = Date.now() + uiTimeoutMs;
44
+ let uiReady = false;
45
+ let sawLoginRedirect = false;
46
+ while (Date.now() < uiDeadline) {
47
+ const state = await ctx.evaluate(`(() => {
48
+ const editor = document.querySelector(${inputSelector});
49
+ const href = location.href || '';
50
+ const bodyText = (document.body?.innerText || '').toLowerCase();
51
+ const requiresLogin =
52
+ href.includes('accounts.google.com') ||
53
+ (bodyText.includes('sign in') && bodyText.includes('google'));
54
+ return { ready: Boolean(editor), requiresLogin };
55
+ })()`);
56
+ if (state?.ready) {
57
+ uiReady = true;
58
+ break;
59
+ }
60
+ if (state?.requiresLogin) {
61
+ sawLoginRedirect = true;
62
+ }
63
+ await ctx.delay(1_000);
64
+ }
65
+ if (!uiReady) {
66
+ if (sawLoginRedirect) {
67
+ throw new Error('Gemini is showing a sign-in flow. Please sign in in Chrome and retry.');
68
+ }
69
+ throw new Error('Timed out waiting for Gemini UI prompt input to become ready.');
70
+ }
71
+ }
72
+ async function selectMode(ctx) {
73
+ const toolsButtonSelectors = asSelectorLiteral(GEMINI_DEEP_THINK_SELECTORS.toolsButton);
74
+ const toolsClickResult = await ctx.evaluate(`(() => {
75
+ const btn = document.querySelector(${toolsButtonSelectors});
76
+ if (btn instanceof HTMLElement) {
77
+ btn.click();
78
+ return 'clicked';
79
+ }
80
+ return 'not-found';
81
+ })()`);
82
+ if (toolsClickResult !== 'clicked') {
83
+ throw new Error('Unable to open Gemini tools menu; Deep Think toggle is not accessible.');
84
+ }
85
+ await ctx.delay(1_000);
86
+ const deepThinkItemSelectors = asSelectorLiteral(GEMINI_DEEP_THINK_SELECTORS.toolsMenuItem);
87
+ const deepThinkClickResult = await ctx.evaluate(`(() => {
88
+ const items = Array.from(document.querySelectorAll(${deepThinkItemSelectors}));
89
+ for (const item of items) {
90
+ const text = item.textContent?.trim().toLowerCase() ?? '';
91
+ if (!text.includes('deep think')) continue;
92
+ if (item instanceof HTMLElement) item.click();
93
+ return 'clicked';
94
+ }
95
+ return 'not-found';
96
+ })()`);
97
+ if (deepThinkClickResult !== 'clicked') {
98
+ throw new Error('Unable to select "Deep Think" from Gemini tools menu.');
99
+ }
100
+ await ctx.delay(1_500);
101
+ const deepThinkActiveSelectors = asSelectorLiteral(GEMINI_DEEP_THINK_SELECTORS.deepThinkActive);
102
+ const deepThinkActive = await ctx.evaluate(`(() => {
103
+ const active = document.querySelector(${deepThinkActiveSelectors});
104
+ if (!(active instanceof HTMLElement)) return false;
105
+ const label = active.getAttribute('aria-label')?.toLowerCase() ?? '';
106
+ const text = active.textContent?.toLowerCase() ?? '';
107
+ return label.includes('deep think') || text.includes('deep think');
108
+ })()`);
109
+ if (!deepThinkActive) {
110
+ throw new Error('Deep Think did not appear selected after clicking the tools menu item.');
111
+ }
112
+ }
113
+ async function typePrompt(ctx) {
114
+ ctx.log?.('[gemini-web] Typing prompt...');
115
+ const inputSelector = asSelectorLiteral(GEMINI_DEEP_THINK_SELECTORS.input);
116
+ const typeResult = await ctx.evaluate(`(() => {
117
+ const editor = document.querySelector(${inputSelector});
118
+ if (!(editor instanceof HTMLElement)) return 'no-editor';
119
+ editor.focus();
120
+ editor.textContent = '';
121
+ if (typeof document.execCommand === 'function') {
122
+ document.execCommand('insertText', false, ${JSON.stringify(ctx.prompt)});
123
+ } else {
124
+ editor.textContent = ${JSON.stringify(ctx.prompt)};
125
+ editor.dispatchEvent(new InputEvent('input', { bubbles: true, data: ${JSON.stringify(ctx.prompt)} }));
126
+ }
127
+ const typed = (editor.textContent || '').trim().length > 0;
128
+ return typed ? 'typed' : 'empty';
129
+ })()`);
130
+ if (typeResult !== 'typed') {
131
+ throw new Error(`Failed to type Gemini prompt (status=${typeResult ?? 'unknown'}).`);
132
+ }
133
+ await ctx.delay(500);
134
+ }
135
+ async function submitPrompt(ctx) {
136
+ ctx.log?.('[gemini-web] Sending prompt...');
137
+ const inputSelector = asSelectorLiteral(GEMINI_DEEP_THINK_SELECTORS.input);
138
+ const sendButtonSelectors = asSelectorLiteral(GEMINI_DEEP_THINK_SELECTORS.sendButton);
139
+ const sendResult = await ctx.evaluate(`(() => {
140
+ const btn = document.querySelector(${sendButtonSelectors});
141
+ if (btn instanceof HTMLElement) {
142
+ btn.click();
143
+ return 'clicked';
144
+ }
145
+ const editor = document.querySelector(${inputSelector});
146
+ if (editor instanceof HTMLElement) {
147
+ editor.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', bubbles: true }));
148
+ editor.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', code: 'Enter', bubbles: true }));
149
+ return 'enter';
150
+ }
151
+ return 'not-found';
152
+ })()`);
153
+ if (sendResult !== 'clicked' && sendResult !== 'enter') {
154
+ throw new Error('Failed to submit prompt in Gemini Deep Think mode (send control not found).');
155
+ }
156
+ }
157
+ async function waitForResponse(ctx) {
158
+ ctx.log?.('[gemini-web] Waiting for Deep Think response (this may take a while)...');
159
+ const responseTurnSel = asSelectorLiteral(GEMINI_DEEP_THINK_SELECTORS.responseTurn);
160
+ const responseTextSel = asSelectorLiteral(GEMINI_DEEP_THINK_SELECTORS.responseText);
161
+ const responseCompleteSel = asSelectorLiteral(GEMINI_DEEP_THINK_SELECTORS.responseComplete);
162
+ const spinnerSel = asSelectorLiteral(GEMINI_DEEP_THINK_SELECTORS.spinner);
163
+ const { responseTimeoutMs } = readTimeouts(ctx);
164
+ const responseDeadline = Date.now() + responseTimeoutMs;
165
+ let lastLog = 0;
166
+ let responseText = '';
167
+ while (Date.now() < responseDeadline) {
168
+ const payload = await ctx.evaluate(`(() => {
169
+ const turns = document.querySelectorAll(${responseTurnSel});
170
+ if (turns.length === 0) return JSON.stringify({ status: 'waiting' });
171
+ const lastTurn = turns[turns.length - 1];
172
+ const footer = lastTurn.querySelector(${responseCompleteSel});
173
+ const content = lastTurn.querySelector(${responseTextSel});
174
+ const text = content?.textContent?.trim() ?? '';
175
+ const lower = text.toLowerCase();
176
+ if (lower.includes('generating your response') || lower.includes('check back later') || lower.includes("i'm on it")) {
177
+ return JSON.stringify({ status: 'generating' });
178
+ }
179
+ if (footer && text.length > 0) {
180
+ return JSON.stringify({ status: 'done', text });
181
+ }
182
+ const spinners = lastTurn.querySelectorAll(${spinnerSel});
183
+ const visibleSpinners = Array.from(spinners).filter((s) => s instanceof HTMLElement && s.offsetParent !== null);
184
+ if (text.length > 0 && visibleSpinners.length === 0 && !footer) {
185
+ return JSON.stringify({ status: 'streaming' });
186
+ }
187
+ return JSON.stringify({ status: 'generating' });
188
+ })()`);
189
+ try {
190
+ const parsed = JSON.parse(payload ?? '{}');
191
+ if (parsed.status === 'done' && typeof parsed.text === 'string' && parsed.text.length > 0) {
192
+ responseText = parsed.text;
193
+ break;
194
+ }
195
+ const now = Date.now();
196
+ if (now - lastLog > 10_000) {
197
+ ctx.log?.(`[gemini-web] Deep Think still generating... (${parsed.status ?? 'unknown'})`);
198
+ lastLog = now;
199
+ }
200
+ }
201
+ catch {
202
+ // ignore parse errors while polling
203
+ }
204
+ await ctx.delay(3_000);
205
+ }
206
+ if (!responseText) {
207
+ throw new Error(`Deep Think timed out waiting for response (${Math.ceil(responseTimeoutMs / 1000)} seconds).`);
208
+ }
209
+ return { text: responseText };
210
+ }
211
+ async function extractThoughts(ctx) {
212
+ const thoughtsToggleSel = asSelectorLiteral(GEMINI_DEEP_THINK_SELECTORS.thoughtsToggle);
213
+ const thoughtsContentSel = asSelectorLiteral(GEMINI_DEEP_THINK_SELECTORS.thoughtsContent);
214
+ const thinkResult = await ctx.evaluate(`(() => {
215
+ const toggle = document.querySelector(${thoughtsToggleSel});
216
+ if (!(toggle instanceof HTMLElement)) return 'no-toggle';
217
+ toggle.click();
218
+ return 'clicked';
219
+ })()`);
220
+ if (thinkResult !== 'clicked') {
221
+ return null;
222
+ }
223
+ await ctx.delay(1_500);
224
+ const extractedThoughts = await ctx.evaluate(`(() => {
225
+ const el = document.querySelector(${thoughtsContentSel});
226
+ if (!el) return '';
227
+ const full = el.textContent?.trim() ?? '';
228
+ const btn = el.querySelector('.thoughts-header-button, [data-test-id="thoughts-header-button"]');
229
+ const btnText = btn?.textContent?.trim() ?? '';
230
+ if (btnText && full.startsWith(btnText)) {
231
+ return full.slice(btnText.length).trim();
232
+ }
233
+ return full;
234
+ })()`);
235
+ return typeof extractedThoughts === 'string' && extractedThoughts.length > 0 ? extractedThoughts : null;
236
+ }
237
+ export const geminiDeepThinkDomProvider = {
238
+ providerName: 'gemini-web',
239
+ waitForUi,
240
+ selectMode,
241
+ typePrompt,
242
+ submitPrompt,
243
+ waitForResponse,
244
+ extractThoughts,
245
+ };
@@ -0,0 +1,2 @@
1
+ export { chatgptDomProvider } from './chatgptDomProvider.js';
2
+ export { geminiDeepThinkDomProvider, GEMINI_DEEP_THINK_SELECTORS, } from './geminiDeepThinkDomProvider.js';
@@ -12,24 +12,30 @@ const DEFAULT_CHROME_PROFILE = 'Default';
12
12
  // The browser label is passed to the model picker which fuzzy-matches against ChatGPT's UI.
13
13
  const BROWSER_MODEL_LABELS = [
14
14
  // Most specific first (e.g., "gpt-5.2-thinking" before "gpt-5.2")
15
+ ['gpt-5.4-pro', 'GPT-5.4 Pro'],
15
16
  ['gpt-5.2-thinking', 'GPT-5.2 Thinking'],
16
17
  ['gpt-5.2-instant', 'GPT-5.2 Instant'],
17
- ['gpt-5.2-pro', 'GPT-5.2 Pro'],
18
- ['gpt-5.1-pro', 'GPT-5.2 Pro'],
19
- ['gpt-5-pro', 'GPT-5.2 Pro'],
18
+ ['gpt-5.2-pro', 'GPT-5.4 Pro'],
19
+ ['gpt-5.1-pro', 'GPT-5.4 Pro'],
20
+ ['gpt-5-pro', 'GPT-5.4 Pro'],
20
21
  // Base models last (least specific)
22
+ ['gpt-5.4', 'Thinking 5.4'],
21
23
  ['gpt-5.2', 'GPT-5.2'], // Selects "Auto" in ChatGPT UI
22
24
  ['gpt-5.1', 'GPT-5.2'], // Legacy alias → Auto
23
25
  ['gemini-3-pro', 'Gemini 3 Pro'],
26
+ ['gemini-3-pro-deep-think', 'gemini-3-deep-think'],
24
27
  ];
25
28
  export function normalizeChatGptModelForBrowser(model) {
26
29
  const normalized = model.toLowerCase();
27
30
  if (!normalized.startsWith('gpt-') || normalized.includes('codex')) {
28
31
  return model;
29
32
  }
30
- // Pro variants: always resolve to the latest Pro model in ChatGPT.
31
- if (normalized === 'gpt-5-pro' || normalized === 'gpt-5.1-pro' || normalized.endsWith('-pro')) {
32
- return 'gpt-5.2-pro';
33
+ if (normalized === 'gpt-5.4-pro' || normalized === 'gpt-5.4') {
34
+ return normalized;
35
+ }
36
+ // Pro variants: resolve to the latest Pro model in ChatGPT.
37
+ if (normalized === 'gpt-5-pro' || normalized === 'gpt-5.1-pro' || normalized === 'gpt-5.2-pro') {
38
+ return 'gpt-5.4-pro';
33
39
  }
34
40
  // Explicit model variants: keep as-is (they have their own browser labels)
35
41
  if (normalized === 'gpt-5.2-thinking' || normalized === 'gpt-5.2-instant') {
@@ -1,9 +1,12 @@
1
1
  import { isProModel } from '../oracle/modelResolver.js';
2
2
  export function shouldDetachSession({
3
- // Params kept for future policy tweaks; currently only model/disableDetachEnv matter.
4
- engine, model, waitPreference: _waitPreference, disableDetachEnv, }) {
3
+ // Params kept for policy tweaks.
4
+ engine, model, waitPreference, disableDetachEnv, }) {
5
5
  if (disableDetachEnv)
6
6
  return false;
7
+ // Explicit --wait means "stay attached", regardless of model defaults.
8
+ if (waitPreference)
9
+ return false;
7
10
  // Only Pro-tier API runs should start detached by default; browser runs stay inline so failures surface.
8
11
  if (isProModel(model) && engine === 'api')
9
12
  return true;
@@ -0,0 +1,11 @@
1
+ import { normalizeMaxFileSizeBytes } from '../oracle/files.js';
2
+ export function resolveConfiguredMaxFileSizeBytes(userConfig, env = process.env) {
3
+ const envValue = env.ORACLE_MAX_FILE_SIZE_BYTES?.trim();
4
+ if (envValue) {
5
+ return normalizeMaxFileSizeBytes(envValue, 'ORACLE_MAX_FILE_SIZE_BYTES');
6
+ }
7
+ if (userConfig?.maxFileSizeBytes !== undefined) {
8
+ return normalizeMaxFileSizeBytes(userConfig.maxFileSizeBytes, 'config.maxFileSizeBytes');
9
+ }
10
+ return undefined;
11
+ }
@@ -38,7 +38,7 @@ export function applyHelpStyling(program, version, isTty) {
38
38
  program.addHelpText('after', () => renderHelpFooter(program, colors));
39
39
  }
40
40
  function renderHelpBanner(version, colors) {
41
- const subtitle = 'Prompt + files required — GPT-5.2 Pro/GPT-5.2 for tough questions with code/file context.';
41
+ const subtitle = 'Prompt + files required — GPT-5.4 Pro/GPT-5.4 for tough questions with code/file context.';
42
42
  return `${colors.banner(`Oracle CLI v${version}`)} ${colors.subtitle(`— ${subtitle}`)}\n`;
43
43
  }
44
44
  function renderHelpFooter(program, colors) {
@@ -49,9 +49,9 @@ function renderHelpFooter(program, colors) {
49
49
  `${colors.bullet('•')} Spell out the project + platform + version requirements (repo name, target OS/toolchain versions, API dependencies) so Oracle doesn’t guess defaults.`,
50
50
  `${colors.bullet('•')} When comparing multiple repos/files, spell out each repo + path + role (e.g., “Project A SettingsView → apps/project-a/Sources/SettingsView.swift; Project B SettingsView → ../project-b/mac/...”) so the model knows exactly which file is which.`,
51
51
  `${colors.bullet('•')} Best results: 6–30 sentences plus key source files; very short prompts often yield generic answers.`,
52
- `${colors.bullet('•')} Oracle is one-shot: it does not remember prior runs, so start fresh each time with full context.`,
52
+ `${colors.bullet('•')} Oracle is one-shot by default. For OpenAI/Azure API runs, you can chain follow-ups by passing ${colors.accent('--followup <sessionId|responseId>')} (continues via Responses API previous_response_id).`,
53
53
  `${colors.bullet('•')} Run ${colors.accent('--files-report')} to inspect token spend before hitting the API.`,
54
- `${colors.bullet('•')} Non-preview runs spawn detached sessions (especially gpt-5.2-pro API). If the CLI times out, do not re-run — reattach with ${colors.accent('oracle session <slug>')} to resume/inspect the existing run.`,
54
+ `${colors.bullet('•')} Non-preview runs spawn detached sessions (especially gpt-5.4-pro API). If the CLI times out, do not re-run — reattach with ${colors.accent('oracle session <slug>')} to resume/inspect the existing run.`,
55
55
  `${colors.bullet('•')} Set a memorable 3–5 word slug via ${colors.accent('--slug "<words>"')} to keep session IDs tidy.`,
56
56
  `${colors.bullet('•')} Finished sessions auto-hide preamble logs when reattached; raw timestamps remain in the saved log file.`,
57
57
  `${colors.bullet('•')} Need hidden flags? Run ${colors.accent(`${program.name()} --help --verbose`)} to list search/token/browser overrides.`,
@@ -7,7 +7,11 @@ import { buildPromptMarkdown } from '../oracle/promptAssembly.js';
7
7
  export async function buildMarkdownBundle(options, deps = {}) {
8
8
  const cwd = deps.cwd ?? process.cwd();
9
9
  const fsModule = deps.fs ?? createFsAdapter(fs);
10
- const files = await readFiles(options.file ?? [], { cwd, fsModule });
10
+ const files = await readFiles(options.file ?? [], {
11
+ cwd,
12
+ fsModule,
13
+ maxFileSizeBytes: options.maxFileSizeBytes,
14
+ });
11
15
  const sections = createFileSections(files, cwd);
12
16
  const systemPrompt = options.system?.trim() || DEFAULT_SYSTEM_PROMPT;
13
17
  const userPrompt = (options.prompt ?? '').trim();