@steipete/oracle 0.7.6 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,9 @@
1
1
  import { MENU_CONTAINER_SELECTOR, MENU_ITEM_SELECTOR, MODEL_BUTTON_SELECTOR, } from '../constants.js';
2
2
  import { logDomFailure } from '../domDebug.js';
3
3
  import { buildClickDispatcher } from './domEvents.js';
4
- export async function ensureModelSelection(Runtime, desiredModel, logger) {
4
+ export async function ensureModelSelection(Runtime, desiredModel, logger, strategy = 'select') {
5
5
  const outcome = await Runtime.evaluate({
6
- expression: buildModelSelectionExpression(desiredModel),
6
+ expression: buildModelSelectionExpression(desiredModel, strategy),
7
7
  awaitPromise: true,
8
8
  returnByValue: true,
9
9
  });
@@ -36,11 +36,12 @@ export async function ensureModelSelection(Runtime, desiredModel, logger) {
36
36
  * Builds the DOM expression that runs inside the ChatGPT tab to select a model.
37
37
  * The string is evaluated inside Chrome, so keep it self-contained and well-commented.
38
38
  */
39
- function buildModelSelectionExpression(targetModel) {
39
+ function buildModelSelectionExpression(targetModel, strategy) {
40
40
  const matchers = buildModelMatchersLiteral(targetModel);
41
41
  const labelLiteral = JSON.stringify(matchers.labelTokens);
42
42
  const idLiteral = JSON.stringify(matchers.testIdTokens);
43
43
  const primaryLabelLiteral = JSON.stringify(targetModel);
44
+ const strategyLiteral = JSON.stringify(strategy);
44
45
  const menuContainerLiteral = JSON.stringify(MENU_CONTAINER_SELECTOR);
45
46
  const menuItemLiteral = JSON.stringify(MENU_ITEM_SELECTOR);
46
47
  return `(() => {
@@ -50,6 +51,7 @@ function buildModelSelectionExpression(targetModel) {
50
51
  const LABEL_TOKENS = ${labelLiteral};
51
52
  const TEST_IDS = ${idLiteral};
52
53
  const PRIMARY_LABEL = ${primaryLabelLiteral};
54
+ const MODEL_STRATEGY = ${strategyLiteral};
53
55
  const INITIAL_WAIT_MS = 150;
54
56
  const REOPEN_INTERVAL_MS = 400;
55
57
  const MAX_WAIT_MS = 20000;
@@ -86,6 +88,9 @@ function buildModelSelectionExpression(targetModel) {
86
88
  }
87
89
 
88
90
  const getButtonLabel = () => (button.textContent ?? '').trim();
91
+ if (MODEL_STRATEGY === 'current') {
92
+ return { status: 'already-selected', label: getButtonLabel() };
93
+ }
89
94
  const buttonMatchesTarget = () => {
90
95
  const normalizedLabel = normalizeText(getButtonLabel());
91
96
  if (!normalizedLabel) return false;
@@ -476,5 +481,5 @@ function buildModelMatchersLiteral(targetModel) {
476
481
  };
477
482
  }
478
483
  export function buildModelSelectionExpressionForTest(targetModel) {
479
- return buildModelSelectionExpression(targetModel);
484
+ return buildModelSelectionExpression(targetModel, 'select');
480
485
  }
@@ -17,16 +17,36 @@ export async function ensureNotBlocked(Runtime, headless, logger) {
17
17
  }
18
18
  const LOGIN_CHECK_TIMEOUT_MS = 5_000;
19
19
  export async function ensureLoggedIn(Runtime, logger, options = {}) {
20
+ // Learned: ChatGPT can render the UI (project view) while auth silently failed.
21
+ // A backend-api probe plus DOM login CTA check catches both cases.
20
22
  const outcome = await Runtime.evaluate({
21
23
  expression: buildLoginProbeExpression(LOGIN_CHECK_TIMEOUT_MS),
22
24
  awaitPromise: true,
23
25
  returnByValue: true,
24
26
  });
25
27
  const probe = normalizeLoginProbe(outcome.result?.value);
26
- if (probe.ok && !probe.domLoginCta && !probe.onAuthPage) {
27
- logger('Login check passed (no login button detected on page)');
28
+ if (probe.ok) {
29
+ logger(`Login check passed (status=${probe.status}, domLoginCta=${Boolean(probe.domLoginCta)})`);
28
30
  return;
29
31
  }
32
+ const accepted = await attemptWelcomeBackLogin(Runtime, logger);
33
+ if (accepted) {
34
+ // Learned: "Welcome back" account picker needs a click even when cookies are valid,
35
+ // and the redirect can lag, so re-probe before failing hard.
36
+ await delay(1500);
37
+ const retryOutcome = await Runtime.evaluate({
38
+ expression: buildLoginProbeExpression(LOGIN_CHECK_TIMEOUT_MS),
39
+ awaitPromise: true,
40
+ returnByValue: true,
41
+ });
42
+ const retryProbe = normalizeLoginProbe(retryOutcome.result?.value);
43
+ if (retryProbe.ok) {
44
+ logger('Login restored via Welcome back account picker');
45
+ return;
46
+ }
47
+ logger(`Login retry after Welcome back failed (status=${retryProbe.status}, domLoginCta=${Boolean(retryProbe.domLoginCta)})`);
48
+ }
49
+ logger(`Login probe failed (status=${probe.status}, domLoginCta=${Boolean(probe.domLoginCta)}, onAuthPage=${Boolean(probe.onAuthPage)}, url=${probe.pageUrl ?? 'n/a'}, error=${probe.error ?? 'none'})`);
30
50
  const domLabel = probe.domLoginCta ? ' Login button detected on page.' : '';
31
51
  const cookieHint = options.remoteSession
32
52
  ? 'The remote Chrome session is not signed into ChatGPT. Sign in there, then rerun.'
@@ -35,9 +55,99 @@ export async function ensureLoggedIn(Runtime, logger, options = {}) {
35
55
  : 'ChatGPT login appears missing; open chatgpt.com in Chrome to refresh the session or provide inline cookies (--browser-inline-cookies[(-file)] / ORACLE_BROWSER_COOKIES_JSON).';
36
56
  throw new Error(`ChatGPT session not detected.${domLabel} ${cookieHint}`);
37
57
  }
58
+ async function attemptWelcomeBackLogin(Runtime, logger) {
59
+ const outcome = await Runtime.evaluate({
60
+ expression: `(() => {
61
+ // Learned: "Welcome back" shows as a modal with account chips; click the email chip.
62
+ const TIMEOUT_MS = 30000;
63
+ const getLabel = (node) =>
64
+ (node?.textContent || node?.getAttribute?.('aria-label') || '').trim();
65
+ const isAccount = (label) =>
66
+ Boolean(label) &&
67
+ label.includes('@') &&
68
+ !/log in|sign up|create account|another account/i.test(label);
69
+ const findAccount = () => {
70
+ const candidates = Array.from(document.querySelectorAll('[role="button"],button,a'));
71
+ return candidates.find((node) => isAccount(getLabel(node))) || null;
72
+ };
73
+ const clickAccount = () => {
74
+ const account = findAccount();
75
+ if (!account) return null;
76
+ try {
77
+ (account).click();
78
+ } catch (_error) {
79
+ return { clicked: false, reason: 'click-failed' };
80
+ }
81
+ return { clicked: true, label: getLabel(account) };
82
+ };
83
+ const immediate = clickAccount();
84
+ if (immediate) {
85
+ return immediate;
86
+ }
87
+ const root = document.documentElement || document.body;
88
+ if (!root) {
89
+ return { clicked: false, reason: 'no-root' };
90
+ }
91
+ return new Promise((resolve) => {
92
+ const timer = setTimeout(() => {
93
+ observer.disconnect();
94
+ resolve({ clicked: false, reason: 'timeout' });
95
+ }, TIMEOUT_MS);
96
+ const observer = new MutationObserver(() => {
97
+ const result = clickAccount();
98
+ if (result) {
99
+ clearTimeout(timer);
100
+ observer.disconnect();
101
+ resolve(result);
102
+ }
103
+ });
104
+ observer.observe(root, {
105
+ subtree: true,
106
+ childList: true,
107
+ characterData: true,
108
+ });
109
+ });
110
+ })()`,
111
+ awaitPromise: true,
112
+ returnByValue: true,
113
+ });
114
+ if (outcome.exceptionDetails) {
115
+ const details = outcome.exceptionDetails;
116
+ const description = (details.exception && typeof details.exception.description === 'string' && details.exception.description) ||
117
+ details.text ||
118
+ 'unknown error';
119
+ logger(`Welcome back auto-select probe failed: ${description}`);
120
+ }
121
+ const result = outcome.result?.value;
122
+ if (!result) {
123
+ logger('Welcome back auto-select probe returned no result.');
124
+ return false;
125
+ }
126
+ if (result?.clicked) {
127
+ logger(`Welcome back modal detected; selected account ${result.label ?? '(unknown)'}`);
128
+ return true;
129
+ }
130
+ if (result?.reason && result.reason !== 'timeout') {
131
+ logger(`Welcome back modal present but auto-select failed (${result.reason}).`);
132
+ }
133
+ if (result?.reason === 'timeout') {
134
+ logger('Welcome back modal not detected after login probe failure.');
135
+ }
136
+ return false;
137
+ }
38
138
  export async function ensurePromptReady(Runtime, timeoutMs, logger) {
39
139
  const ready = await waitForPrompt(Runtime, timeoutMs);
40
140
  if (!ready) {
141
+ const authUrl = await currentUrl(Runtime);
142
+ if (authUrl && isAuthLoginUrl(authUrl)) {
143
+ // Learned: auth.openai.com/login can appear after cookies are copied; allow manual login window.
144
+ logger('Auth login page detected; waiting for manual login to complete...');
145
+ const extended = Math.min(Math.max(timeoutMs, 60_000), 20 * 60_000);
146
+ const loggedIn = await waitForPrompt(Runtime, extended);
147
+ if (loggedIn) {
148
+ return;
149
+ }
150
+ }
41
151
  await logDomFailure(Runtime, logger, 'prompt-textarea');
42
152
  throw new Error('Prompt textarea did not appear before timeout');
43
153
  }
@@ -56,6 +166,25 @@ async function waitForDocumentReady(Runtime, timeoutMs) {
56
166
  }
57
167
  throw new Error('Page did not reach ready state in time');
58
168
  }
169
+ async function currentUrl(Runtime) {
170
+ const { result } = await Runtime.evaluate({
171
+ expression: 'typeof location === "object" && location.href ? location.href : null',
172
+ returnByValue: true,
173
+ });
174
+ return typeof result?.value === 'string' ? result.value : null;
175
+ }
176
+ function isAuthLoginUrl(url) {
177
+ try {
178
+ const parsed = new URL(url);
179
+ if (parsed.hostname.includes('auth.openai.com')) {
180
+ return true;
181
+ }
182
+ return /^\/log-?in/i.test(parsed.pathname);
183
+ }
184
+ catch {
185
+ return false;
186
+ }
187
+ }
59
188
  async function waitForPrompt(Runtime, timeoutMs) {
60
189
  const deadline = Date.now() + timeoutMs;
61
190
  while (Date.now() < deadline) {
@@ -93,7 +222,9 @@ async function isCloudflareInterstitial(Runtime) {
93
222
  return Boolean(result.value);
94
223
  }
95
224
  function buildLoginProbeExpression(timeoutMs) {
96
- return `(() => {
225
+ return `(async () => {
226
+ // Learned: /backend-api/me is the most reliable "am I logged in" signal.
227
+ // Some UIs render without a session; use DOM + network for a robust answer.
97
228
  const timer = setTimeout(() => {}, ${timeoutMs});
98
229
  const pageUrl = typeof location === 'object' && location?.href ? location.href : null;
99
230
  const onAuthPage =
@@ -138,16 +269,40 @@ function buildLoginProbeExpression(timeoutMs) {
138
269
  return false;
139
270
  };
140
271
 
272
+ let status = 0;
273
+ let error = null;
274
+ try {
275
+ if (typeof fetch === 'function') {
276
+ const controller = new AbortController();
277
+ const timeout = setTimeout(() => controller.abort(), ${timeoutMs});
278
+ try {
279
+ // Credentials included so we see a 200 only when cookies are valid.
280
+ const response = await fetch('/backend-api/me', {
281
+ cache: 'no-store',
282
+ credentials: 'include',
283
+ signal: controller.signal,
284
+ });
285
+ status = response.status || 0;
286
+ } finally {
287
+ clearTimeout(timeout);
288
+ }
289
+ }
290
+ } catch (err) {
291
+ error = err ? String(err) : 'unknown';
292
+ }
293
+
141
294
  const domLoginCta = hasLoginCta();
295
+ const loginSignals = domLoginCta || onAuthPage;
142
296
  clearTimeout(timer);
143
297
  return {
144
- ok: !domLoginCta && !onAuthPage,
145
- status: 0,
298
+ ok: !loginSignals && (status === 0 || status === 200),
299
+ status,
146
300
  redirected: false,
147
301
  url: pageUrl,
148
302
  pageUrl,
149
303
  domLoginCta,
150
304
  onAuthPage,
305
+ error,
151
306
  };
152
307
  })()`;
153
308
  }
@@ -1,4 +1,4 @@
1
- import { INPUT_SELECTORS, PROMPT_PRIMARY_SELECTOR, PROMPT_FALLBACK_SELECTOR, SEND_BUTTON_SELECTORS, CONVERSATION_TURN_SELECTOR, } from '../constants.js';
1
+ import { INPUT_SELECTORS, PROMPT_PRIMARY_SELECTOR, PROMPT_FALLBACK_SELECTOR, SEND_BUTTON_SELECTORS, CONVERSATION_TURN_SELECTOR, STOP_BUTTON_SELECTOR, ASSISTANT_ROLE_SELECTOR, } from '../constants.js';
2
2
  import { delay } from '../utils.js';
3
3
  import { logDomFailure } from '../domDebug.js';
4
4
  import { buildClickDispatcher } from './domEvents.js';
@@ -12,7 +12,7 @@ const ENTER_KEY_EVENT = {
12
12
  const ENTER_KEY_TEXT = '\r';
13
13
  export async function submitPrompt(deps, prompt, logger) {
14
14
  const { runtime, input } = deps;
15
- await waitForDomReady(runtime, logger);
15
+ await waitForDomReady(runtime, logger, deps.inputTimeoutMs ?? undefined);
16
16
  const encodedPrompt = JSON.stringify(prompt);
17
17
  const focusResult = await runtime.evaluate({
18
18
  expression: `(() => {
@@ -22,6 +22,7 @@ export async function submitPrompt(deps, prompt, logger) {
22
22
  if (!node) {
23
23
  return false;
24
24
  }
25
+ // Learned: React/ProseMirror require a real click + focus + selection for inserts to stick.
25
26
  dispatchClickSequence(node);
26
27
  if (typeof node.focus === 'function') {
27
28
  node.focus();
@@ -76,6 +77,7 @@ export async function submitPrompt(deps, prompt, logger) {
76
77
  const editorTextTrimmed = editorTextRaw?.trim?.() ?? '';
77
78
  const fallbackValueTrimmed = fallbackValueRaw?.trim?.() ?? '';
78
79
  if (!editorTextTrimmed && !fallbackValueTrimmed) {
80
+ // Learned: occasionally Input.insertText doesn't land in the editor; force textContent/value + input events.
79
81
  await runtime.evaluate({
80
82
  expression: `(() => {
81
83
  const fallback = document.querySelector(${fallbackSelectorLiteral});
@@ -109,6 +111,7 @@ export async function submitPrompt(deps, prompt, logger) {
109
111
  const observedFallback = postVerification.result?.value?.fallbackValue ?? '';
110
112
  const observedLength = Math.max(observedEditor.length, observedFallback.length);
111
113
  if (promptLength >= 50_000 && observedLength > 0 && observedLength < promptLength - 2_000) {
114
+ // Learned: very large prompts can truncate silently; fail fast so we can fall back to file uploads.
112
115
  await logDomFailure(runtime, logger, 'prompt-too-large');
113
116
  throw new BrowserAutomationError('Prompt appears truncated in the composer (likely too large).', {
114
117
  stage: 'submit-prompt',
@@ -134,7 +137,9 @@ export async function submitPrompt(deps, prompt, logger) {
134
137
  else {
135
138
  logger('Clicked send button');
136
139
  }
137
- await verifyPromptCommitted(runtime, prompt, 60_000, logger);
140
+ const commitTimeoutMs = Math.max(60_000, deps.inputTimeoutMs ?? 0);
141
+ // Learned: the send button can succeed but the turn doesn't appear immediately; verify commit via turns/stop button.
142
+ return await verifyPromptCommitted(runtime, prompt, commitTimeoutMs, logger, deps.baselineTurns ?? undefined);
138
143
  }
139
144
  export async function clearPromptComposer(Runtime, logger) {
140
145
  const primarySelectorLiteral = JSON.stringify(PROMPT_PRIMARY_SELECTOR);
@@ -165,8 +170,8 @@ export async function clearPromptComposer(Runtime, logger) {
165
170
  }
166
171
  await delay(250);
167
172
  }
168
- async function waitForDomReady(Runtime, logger) {
169
- const deadline = Date.now() + 10_000;
173
+ async function waitForDomReady(Runtime, logger, timeoutMs = 10_000) {
174
+ const deadline = Date.now() + timeoutMs;
170
175
  while (Date.now() < deadline) {
171
176
  const { result } = await Runtime.evaluate({
172
177
  expression: `(() => {
@@ -183,7 +188,7 @@ async function waitForDomReady(Runtime, logger) {
183
188
  }
184
189
  await delay(150);
185
190
  }
186
- logger?.('Page did not reach ready/composer state within 10s; continuing cautiously.');
191
+ logger?.(`Page did not reach ready/composer state within ${timeoutMs}ms; continuing cautiously.`);
187
192
  }
188
193
  function buildAttachmentReadyExpression(attachmentNames) {
189
194
  const namesLiteral = JSON.stringify(attachmentNames.map((name) => name.toLowerCase()));
@@ -241,6 +246,7 @@ async function attemptSendButton(Runtime, _logger, attachmentNames) {
241
246
  dataDisabled === 'true' ||
242
247
  style.pointerEvents === 'none' ||
243
248
  style.display === 'none';
249
+ // Learned: some send buttons render but are inert; only click when truly enabled.
244
250
  if (disabled) return 'disabled';
245
251
  // Use unified pointer/mouse sequence to satisfy React handlers.
246
252
  dispatchClickSequence(button);
@@ -270,11 +276,17 @@ async function attemptSendButton(Runtime, _logger, attachmentNames) {
270
276
  }
271
277
  return false;
272
278
  }
273
- async function verifyPromptCommitted(Runtime, prompt, timeoutMs, logger) {
279
+ async function verifyPromptCommitted(Runtime, prompt, timeoutMs, logger, baselineTurns) {
274
280
  const deadline = Date.now() + timeoutMs;
275
281
  const encodedPrompt = JSON.stringify(prompt.trim());
276
282
  const primarySelectorLiteral = JSON.stringify(PROMPT_PRIMARY_SELECTOR);
277
283
  const fallbackSelectorLiteral = JSON.stringify(PROMPT_FALLBACK_SELECTOR);
284
+ const stopSelectorLiteral = JSON.stringify(STOP_BUTTON_SELECTOR);
285
+ const assistantSelectorLiteral = JSON.stringify(ASSISTANT_ROLE_SELECTOR);
286
+ const baselineLiteral = typeof baselineTurns === 'number' && Number.isFinite(baselineTurns) && baselineTurns >= 0
287
+ ? Math.floor(baselineTurns)
288
+ : -1;
289
+ // Learned: ChatGPT can echo/format text; normalize markdown and use prefix matches to detect the sent prompt.
278
290
  const script = `(() => {
279
291
  const editor = document.querySelector(${primarySelectorLiteral});
280
292
  const fallback = document.querySelector(${fallbackSelectorLiteral});
@@ -297,11 +309,35 @@ async function verifyPromptCommitted(Runtime, prompt, timeoutMs, logger) {
297
309
  normalizedPromptPrefix.length > 30 &&
298
310
  normalizedTurns.some((text) => text.includes(normalizedPromptPrefix));
299
311
  const lastTurn = normalizedTurns[normalizedTurns.length - 1] ?? '';
312
+ const lastMatched =
313
+ normalizedPrompt.length > 0 &&
314
+ (lastTurn.includes(normalizedPrompt) ||
315
+ (normalizedPromptPrefix.length > 30 && lastTurn.includes(normalizedPromptPrefix)));
316
+ const baseline = ${baselineLiteral};
317
+ const hasNewTurn = baseline < 0 ? true : normalizedTurns.length > baseline;
318
+ const stopVisible = Boolean(document.querySelector(${stopSelectorLiteral}));
319
+ const assistantVisible = Boolean(
320
+ document.querySelector(${assistantSelectorLiteral}) ||
321
+ document.querySelector('[data-testid*="assistant"]'),
322
+ );
323
+ // Learned: composer clearing + stop button or assistant presence is a reliable fallback signal.
324
+ const editorValue = editor?.innerText ?? '';
325
+ const fallbackValue = fallback?.value ?? '';
326
+ const composerCleared = !(String(editorValue).trim() || String(fallbackValue).trim());
327
+ const href = typeof location === 'object' && location.href ? location.href : '';
328
+ const inConversation = /\\/c\\//.test(href);
300
329
  return {
301
330
  userMatched,
302
331
  prefixMatched,
303
- fallbackValue: fallback?.value ?? '',
304
- editorValue: editor?.innerText ?? '',
332
+ lastMatched,
333
+ hasNewTurn,
334
+ stopVisible,
335
+ assistantVisible,
336
+ composerCleared,
337
+ inConversation,
338
+ href,
339
+ fallbackValue,
340
+ editorValue,
305
341
  lastTurn,
306
342
  turnsCount: normalizedTurns.length,
307
343
  };
@@ -309,8 +345,15 @@ async function verifyPromptCommitted(Runtime, prompt, timeoutMs, logger) {
309
345
  while (Date.now() < deadline) {
310
346
  const { result } = await Runtime.evaluate({ expression: script, returnByValue: true });
311
347
  const info = result.value;
312
- if (info?.userMatched || info?.prefixMatched) {
313
- return;
348
+ const turnsCount = result.value?.turnsCount;
349
+ if (info?.hasNewTurn && (info?.lastMatched || info?.userMatched || info?.prefixMatched)) {
350
+ return typeof turnsCount === 'number' && Number.isFinite(turnsCount) ? turnsCount : null;
351
+ }
352
+ const fallbackCommit = info?.composerCleared &&
353
+ ((info?.stopVisible ?? false) ||
354
+ (info?.hasNewTurn && (info?.assistantVisible || info?.inConversation)));
355
+ if (fallbackCommit) {
356
+ return typeof turnsCount === 'number' && Number.isFinite(turnsCount) ? turnsCount : null;
314
357
  }
315
358
  await delay(100);
316
359
  }
@@ -1,9 +1,9 @@
1
- import { readFile } from 'node:fs/promises';
2
1
  import path from 'node:path';
3
2
  import { FILE_INPUT_SELECTORS } from '../constants.js';
4
3
  import { waitForAttachmentVisible } from './attachments.js';
5
4
  import { delay } from '../utils.js';
6
5
  import { logDomFailure } from '../domDebug.js';
6
+ import { transferAttachmentViaDataTransfer } from './attachmentDataTransfer.js';
7
7
  /**
8
8
  * Upload file to remote Chrome by transferring content via CDP
9
9
  * Used when browser is on a different machine than CLI
@@ -13,17 +13,7 @@ export async function uploadAttachmentViaDataTransfer(deps, attachment, logger)
13
13
  if (!dom) {
14
14
  throw new Error('DOM domain unavailable while uploading attachments.');
15
15
  }
16
- // Read file content from local filesystem
17
- const fileContent = await readFile(attachment.path);
18
- // Enforce file size limit to avoid CDP protocol issues
19
- const MAX_BYTES = 20 * 1024 * 1024; // 20MB limit for CDP transfer
20
- if (fileContent.length > MAX_BYTES) {
21
- throw new Error(`Attachment ${path.basename(attachment.path)} is too large for remote upload (${fileContent.length} bytes). Maximum size is ${MAX_BYTES} bytes.`);
22
- }
23
- const base64Content = fileContent.toString('base64');
24
- const fileName = path.basename(attachment.path);
25
- const mimeType = guessMimeType(fileName);
26
- logger(`Transferring ${fileName} (${fileContent.length} bytes) to remote browser...`);
16
+ logger(`Transferring ${path.basename(attachment.path)} to remote browser...`);
27
17
  // Find file input element
28
18
  const documentNode = await dom.getDocument();
29
19
  let fileInputSelector;
@@ -38,151 +28,10 @@ export async function uploadAttachmentViaDataTransfer(deps, attachment, logger)
38
28
  await logDomFailure(runtime, logger, 'file-input');
39
29
  throw new Error('Unable to locate ChatGPT file attachment input.');
40
30
  }
41
- // Inject file via JavaScript DataTransfer API
42
- const expression = `
43
- (function() {
44
- // Check for required file APIs
45
- if (!('File' in window) || !('Blob' in window) || !('DataTransfer' in window) || typeof atob !== 'function') {
46
- return { success: false, error: 'Required file APIs are not available in this browser' };
47
- }
48
-
49
- const fileInput = document.querySelector(${JSON.stringify(fileInputSelector)});
50
- if (!fileInput) {
51
- return { success: false, error: 'File input not found' };
52
- }
53
-
54
- // Validate that the element is actually a file input
55
- if (!(fileInput instanceof HTMLInputElement) || fileInput.type !== 'file') {
56
- return { success: false, error: 'Found element is not a file input' };
57
- }
58
-
59
- // Convert base64 to Blob
60
- const base64Data = ${JSON.stringify(base64Content)};
61
- const binaryString = atob(base64Data);
62
- const bytes = new Uint8Array(binaryString.length);
63
- for (let i = 0; i < binaryString.length; i++) {
64
- bytes[i] = binaryString.charCodeAt(i);
65
- }
66
- const blob = new Blob([bytes], { type: ${JSON.stringify(mimeType)} });
67
-
68
- // Create File object
69
- const file = new File([blob], ${JSON.stringify(fileName)}, {
70
- type: ${JSON.stringify(mimeType)},
71
- lastModified: Date.now()
72
- });
73
-
74
- // Create DataTransfer and assign to input
75
- const dataTransfer = new DataTransfer();
76
- dataTransfer.items.add(file);
77
- let assigned = false;
78
-
79
- const proto = Object.getPrototypeOf(fileInput);
80
- const descriptor = proto ? Object.getOwnPropertyDescriptor(proto, 'files') : null;
81
- if (descriptor?.set) {
82
- try {
83
- descriptor.set.call(fileInput, dataTransfer.files);
84
- assigned = true;
85
- } catch {
86
- assigned = false;
87
- }
88
- }
89
- if (!assigned) {
90
- try {
91
- Object.defineProperty(fileInput, 'files', {
92
- configurable: true,
93
- get: () => dataTransfer.files,
94
- });
95
- assigned = true;
96
- } catch {
97
- assigned = false;
98
- }
99
- }
100
- if (!assigned) {
101
- try {
102
- fileInput.files = dataTransfer.files;
103
- assigned = true;
104
- } catch {
105
- assigned = false;
106
- }
107
- }
108
- if (!assigned) {
109
- return { success: false, error: 'Unable to assign FileList to input' };
110
- }
111
-
112
- // Trigger both input and change events for better compatibility
113
- fileInput.dispatchEvent(new Event('input', { bubbles: true }));
114
- fileInput.dispatchEvent(new Event('change', { bubbles: true }));
115
-
116
- return { success: true, fileName: file.name, size: file.size };
117
- })()
118
- `;
119
- const evalResult = await runtime.evaluate({ expression, returnByValue: true });
120
- // Check for JavaScript exceptions during evaluation
121
- if (evalResult.exceptionDetails) {
122
- const description = evalResult.exceptionDetails.text ?? 'JS evaluation failed';
123
- throw new Error(`Failed to transfer file to remote browser: ${description}`);
124
- }
125
- // Validate result structure before accessing
126
- if (!evalResult.result || typeof evalResult.result.value !== 'object' || evalResult.result.value == null) {
127
- throw new Error('Failed to transfer file to remote browser: unexpected evaluation result');
128
- }
129
- const uploadResult = evalResult.result.value;
130
- if (!uploadResult.success) {
131
- throw new Error(`Failed to transfer file to remote browser: ${uploadResult.error || 'Unknown error'}`);
132
- }
133
- logger(`File transferred: ${uploadResult.fileName} (${uploadResult.size} bytes)`);
31
+ const transferResult = await transferAttachmentViaDataTransfer(runtime, attachment, fileInputSelector);
32
+ logger(`File transferred: ${transferResult.fileName} (${transferResult.size} bytes)`);
134
33
  // Give ChatGPT a moment to process the file
135
34
  await delay(500);
136
- await waitForAttachmentVisible(runtime, fileName, 10_000, logger);
35
+ await waitForAttachmentVisible(runtime, transferResult.fileName, 10_000, logger);
137
36
  logger('Attachment queued');
138
37
  }
139
- function guessMimeType(fileName) {
140
- const ext = path.extname(fileName).toLowerCase();
141
- const mimeTypes = {
142
- // Text files
143
- '.txt': 'text/plain',
144
- '.md': 'text/markdown',
145
- '.csv': 'text/csv',
146
- // Code files
147
- '.json': 'application/json',
148
- '.js': 'text/javascript',
149
- '.ts': 'text/typescript',
150
- '.jsx': 'text/javascript',
151
- '.tsx': 'text/typescript',
152
- '.py': 'text/x-python',
153
- '.java': 'text/x-java',
154
- '.c': 'text/x-c',
155
- '.cpp': 'text/x-c++',
156
- '.h': 'text/x-c',
157
- '.hpp': 'text/x-c++',
158
- '.sh': 'text/x-sh',
159
- '.bash': 'text/x-sh',
160
- // Web files
161
- '.html': 'text/html',
162
- '.css': 'text/css',
163
- '.xml': 'text/xml',
164
- '.yaml': 'text/yaml',
165
- '.yml': 'text/yaml',
166
- // Documents
167
- '.pdf': 'application/pdf',
168
- '.doc': 'application/msword',
169
- '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
170
- '.xls': 'application/vnd.ms-excel',
171
- '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
172
- '.ppt': 'application/vnd.ms-powerpoint',
173
- '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
174
- // Images
175
- '.png': 'image/png',
176
- '.jpg': 'image/jpeg',
177
- '.jpeg': 'image/jpeg',
178
- '.gif': 'image/gif',
179
- '.svg': 'image/svg+xml',
180
- '.webp': 'image/webp',
181
- // Archives
182
- '.zip': 'application/zip',
183
- '.tar': 'application/x-tar',
184
- '.gz': 'application/gzip',
185
- '.7z': 'application/x-7z-compressed',
186
- };
187
- return mimeTypes[ext] || 'application/octet-stream';
188
- }
@@ -1,4 +1,5 @@
1
- import { CHATGPT_URL, DEFAULT_MODEL_TARGET } from './constants.js';
1
+ import { CHATGPT_URL, DEFAULT_MODEL_STRATEGY, DEFAULT_MODEL_TARGET } from './constants.js';
2
+ import { normalizeBrowserModelStrategy } from './modelStrategy.js';
2
3
  import { isTemporaryChatUrl, normalizeChatgptUrl } from './utils.js';
3
4
  import os from 'node:os';
4
5
  import path from 'node:path';
@@ -10,7 +11,7 @@ export const DEFAULT_BROWSER_CONFIG = {
10
11
  chatgptUrl: CHATGPT_URL,
11
12
  timeoutMs: 1_200_000,
12
13
  debugPort: null,
13
- inputTimeoutMs: 30_000,
14
+ inputTimeoutMs: 60_000,
14
15
  cookieSync: true,
15
16
  cookieNames: null,
16
17
  inlineCookies: null,
@@ -19,6 +20,7 @@ export const DEFAULT_BROWSER_CONFIG = {
19
20
  keepBrowser: false,
20
21
  hideWindow: false,
21
22
  desiredModel: DEFAULT_MODEL_TARGET,
23
+ modelStrategy: DEFAULT_MODEL_STRATEGY,
22
24
  debug: false,
23
25
  allowCookieErrors: false,
24
26
  remoteChrome: null,
@@ -32,7 +34,10 @@ export function resolveBrowserConfig(config) {
32
34
  const rawUrl = config?.chatgptUrl ?? config?.url ?? DEFAULT_BROWSER_CONFIG.url;
33
35
  const normalizedUrl = normalizeChatgptUrl(rawUrl ?? DEFAULT_BROWSER_CONFIG.url, DEFAULT_BROWSER_CONFIG.url);
34
36
  const desiredModel = config?.desiredModel ?? DEFAULT_BROWSER_CONFIG.desiredModel ?? DEFAULT_MODEL_TARGET;
35
- if (isTemporaryChatUrl(normalizedUrl) && /\bpro\b/i.test(desiredModel)) {
37
+ const modelStrategy = normalizeBrowserModelStrategy(config?.modelStrategy) ??
38
+ DEFAULT_BROWSER_CONFIG.modelStrategy ??
39
+ DEFAULT_MODEL_STRATEGY;
40
+ if (modelStrategy === 'select' && isTemporaryChatUrl(normalizedUrl) && /\bpro\b/i.test(desiredModel)) {
36
41
  throw new Error('Temporary Chat mode does not expose Pro models in the ChatGPT model picker. ' +
37
42
  'Remove "temporary-chat=true" from your browser URL, or use a non-Pro model label (e.g. "GPT-5.2").');
38
43
  }
@@ -58,6 +63,7 @@ export function resolveBrowserConfig(config) {
58
63
  keepBrowser: config?.keepBrowser ?? DEFAULT_BROWSER_CONFIG.keepBrowser,
59
64
  hideWindow: config?.hideWindow ?? DEFAULT_BROWSER_CONFIG.hideWindow,
60
65
  desiredModel,
66
+ modelStrategy,
61
67
  chromeProfile: config?.chromeProfile ?? DEFAULT_BROWSER_CONFIG.chromeProfile,
62
68
  chromePath: config?.chromePath ?? DEFAULT_BROWSER_CONFIG.chromePath,
63
69
  chromeCookiePath: config?.chromeCookiePath ?? DEFAULT_BROWSER_CONFIG.chromeCookiePath,
@@ -1,5 +1,6 @@
1
1
  export const CHATGPT_URL = 'https://chatgpt.com/';
2
2
  export const DEFAULT_MODEL_TARGET = 'GPT-5.2 Pro';
3
+ export const DEFAULT_MODEL_STRATEGY = 'select';
3
4
  export const COOKIE_URLS = ['https://chatgpt.com', 'https://chat.openai.com', 'https://atlas.openai.com'];
4
5
  export const INPUT_SELECTORS = [
5
6
  'textarea[data-id="prompt-textarea"]',
@@ -22,7 +23,9 @@ export const ANSWER_SELECTORS = [
22
23
  '[data-message-author-role="assistant"]',
23
24
  '[data-turn="assistant"]',
24
25
  ];
25
- export const CONVERSATION_TURN_SELECTOR = 'article[data-testid^="conversation-turn"]';
26
+ export const CONVERSATION_TURN_SELECTOR = 'article[data-testid^="conversation-turn"], div[data-testid^="conversation-turn"], section[data-testid^="conversation-turn"], ' +
27
+ 'article[data-message-author-role], div[data-message-author-role], section[data-message-author-role], ' +
28
+ 'article[data-turn], div[data-turn], section[data-turn]';
26
29
  export const ASSISTANT_ROLE_SELECTOR = '[data-message-author-role="assistant"], [data-turn="assistant"]';
27
30
  export const CLOUDFLARE_SCRIPT_SELECTOR = 'script[src*="/challenge-platform/"]';
28
31
  export const CLOUDFLARE_TITLE = 'just a moment';