@steipete/oracle 0.12.1 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -272,55 +272,55 @@ Browser automation can open or control Chrome, so dry-runs and live runs print a
272
272
 
273
273
  ## Flags you’ll actually use
274
274
 
275
- | Flag | Purpose |
276
- | --------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
277
- | `-p, --prompt <text>` | Required prompt. |
278
- | `-f, --file <paths...>` | Attach files/dirs (globs + `!` excludes). |
279
- | `-e, --engine <api\|browser>` | Choose API or browser (browser is experimental). |
280
- | `-m, --model <name>` | Built-ins (`gpt-5.5-pro` default, `gpt-5.5`, `gpt-5.4-pro`, `gpt-5.4`, `gpt-5.1-pro`, `gpt-5-pro`, `gpt-5.1`, `gpt-5.1-codex`, `gpt-5.2`, `gpt-5.2-instant`, `gpt-5.2-pro`, `gemini-3.1-pro` API-only, `gemini-3-pro`, `claude-4.6-sonnet`, `claude-4.1-opus`) plus any OpenRouter id (e.g., `minimax/minimax-m2`, `openai/gpt-4o-mini`). |
281
- | `--models <list>` | Comma-separated API models (mix built-ins and OpenRouter ids) for multi-model runs. |
282
- | `--followup <sessionId\|responseId>` | Continue an OpenAI/Azure Responses API run from a stored oracle session or `resp_...` response id. |
283
- | `--followup-model <model>` | For multi-model OpenAI/Azure parent sessions, choose which model response to continue from. |
284
- | `--base-url <url>` | Point API runs at LiteLLM/Azure/OpenRouter/etc. |
285
- | `--chatgpt-url <url>` | Target a ChatGPT workspace/folder or Temporary Chat URL (browser). |
286
- | `--browser-model-strategy <select\|current\|ignore>` | Control ChatGPT model selection in browser mode (current keeps the active model; ignore skips the picker). |
287
- | `--browser-manual-login` | Skip cookie copy; reuse a persistent automation profile and wait for manual ChatGPT login. |
288
- | `--browser-attach-running` | Reuse your current local browser session through local `DevToolsActivePort` discovery; Oracle opens a dedicated tab instead of launching Chrome (defaults to `127.0.0.1:9222`, or combine with `--remote-chrome <host:port>` to hint a different local endpoint). |
289
- | `--browser-tab <ref>` | Reuse an existing ChatGPT tab by `current`, target id, URL, or title substring instead of opening a new tab. |
290
- | `--browser-thinking-time <light\|standard\|extended\|heavy>` | Set ChatGPT thinking-time intensity (browser; Thinking/Pro models only). |
291
- | `--browser-research deep` | Activate ChatGPT Deep Research for broad web research and cited reports (browser only). |
292
- | `--browser-follow-up <prompt>` | Browser-only multi-turn consult: submit an additional prompt in the same ChatGPT conversation after the initial answer. Repeat for challenge/revision/final-decision passes. Not supported with Deep Research mode. |
293
- | `--browser-archive <auto\|always\|never>` | Archive completed ChatGPT browser conversations after local artifacts are saved. `auto` archives successful one-shot chats only, and skips project, Deep Research, multi-turn, failed, and incomplete sessions. |
294
- | `--browser-attachments <auto\|never\|always>` | Control browser file delivery: `auto` pastes small text files inline and uploads larger bundles, `never` always pastes inline, and `always` uploads files as ChatGPT attachments. |
295
- | `--browser-bundle-files`, `--browser-bundle-format <text\|zip>` | Bundle browser uploads into one attachment. `text` keeps the existing single Markdown-style text bundle; `zip` preserves individual file names inside one ZIP upload. |
296
- | `--browser-port <port>` | Pin the Chrome DevTools port (WSL/Windows firewall helper). |
297
- | `--browser-inline-cookies[(-file)] <payload \| path>` | Supply cookies without Chrome/Keychain (browser). |
298
- | `--browser-timeout`, `--browser-input-timeout` | Control overall/browser input timeouts (supports h/m/s/ms). |
299
- | `--browser-recheck-delay`, `--browser-recheck-timeout` | Delayed recheck for long Pro runs: wait then retry capture after timeout (supports h/m/s/ms). |
300
- | `--heartbeat <seconds>` | Emit API and browser progress heartbeats. Browser mode reports ChatGPT Thinking/Reasoning sidecar liveness metadata when available, without logging reasoning text. |
301
- | `--browser-reuse-wait` | Wait for a shared Chrome profile before launching (parallel browser runs). |
302
- | `--browser-profile-lock-timeout` | Wait for the shared manual-login profile lock before sending (serializes parallel runs). |
303
- | `--browser-max-concurrent-tabs` | Soft limit for simultaneous ChatGPT tabs sharing one manual-login profile (default 3). |
304
- | `--render`, `--copy` | Print and/or copy the assembled markdown bundle. |
305
- | `--wait` | Block for background API runs (e.g., GPT‑5.1 Pro) instead of detaching. |
306
- | `--timeout <seconds\|duration\|auto>` | Overall API deadline (auto = 60m for pro, 120s otherwise; durations like `10m` derive HTTP/stale-session timeouts unless overridden). |
307
- | `--background`, `--no-background` | Force Responses API background mode (create + retrieve) for API runs. |
308
- | `--http-timeout <ms\|s\|m\|h>` | Override the HTTP client timeout; if omitted, explicit `--timeout` values are reused for transport. |
309
- | `--zombie-timeout <ms\|s\|m\|h>` | Override stale-session cutoff used by `oracle status`. |
310
- | `--zombie-last-activity` | Use last log activity to detect stale sessions. |
311
- | `--write-output <path>` | Save only the final answer (multi-model adds `.<model>` and writes `<stem>.oracle.json`). Browser sessions also save transcripts and generated artifacts under `~/.oracle/sessions/<id>/artifacts/`. |
312
- | `--allow-partial`, `--partial <fail\|ok>` | Multi-model failure policy. Default `fail` exits 1 after printing a structured partial summary; `ok` exits 0 when at least one model succeeds. |
313
- | `--preflight` | Check redacted provider readiness for requested API model(s), then exit without creating a session. |
314
- | `--perf-trace`, `--perf-trace-path <path>` | Write startup/first-output timing trace JSON; also accepts `--perf-trace=/tmp/oracle.json`, `ORACLE_PERF_TRACE=1`, or `ORACLE_PERF_TRACE=/tmp/oracle.json`. |
315
- | `--files-report` | Print per-file token usage. |
316
- | `--dry-run [summary\|json\|full]` | Preview without sending. |
317
- | `--remote-host`, `--remote-token` | Use a remote `oracle serve` host (browser). |
318
- | `--remote-chrome <host:port>` | Attach to an existing remote Chrome session (browser), or when combined with `--browser-attach-running` use this host:port as the local attach hint. |
319
- | `--youtube <url>` | YouTube video URL to analyze (Gemini browser mode). |
320
- | `--generate-image <file>` | Generate image and save to file (Gemini browser mode; ChatGPT browser mode saves downloadable image artifacts when present). Extra ChatGPT images save as numbered siblings. |
321
- | `--edit-image <file>` | Edit existing image with `--output` (Gemini browser mode). For ChatGPT browser mode, attach source images with `--file` and use `--generate-image` for the output path. |
322
- | `--provider openai\|azure\|auto`, `--no-azure`, `--route` | Choose or inspect API provider routing; `openai` / `--no-azure` ignores Azure env/config for the run. |
323
- | `--azure-endpoint`, `--azure-deployment`, `--azure-api-version` | Target Azure OpenAI endpoints (picks Azure client automatically). |
275
+ | Flag | Purpose |
276
+ | ------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
277
+ | `-p, --prompt <text>` | Required prompt. |
278
+ | `-f, --file <paths...>` | Attach files/dirs (globs + `!` excludes). |
279
+ | `-e, --engine <api\|browser>` | Choose API or browser (browser is experimental). |
280
+ | `-m, --model <name>` | Built-ins (`gpt-5.5-pro` default, `gpt-5.5`, `gpt-5.4-pro`, `gpt-5.4`, `gpt-5.1-pro`, `gpt-5-pro`, `gpt-5.1`, `gpt-5.1-codex`, `gpt-5.2`, `gpt-5.2-instant`, `gpt-5.2-pro`, `gemini-3.1-pro` API-only, `gemini-3-pro`, `claude-4.6-sonnet`, `claude-4.1-opus`) plus any OpenRouter id (e.g., `minimax/minimax-m2`, `openai/gpt-4o-mini`). |
281
+ | `--models <list>` | Comma-separated API models (mix built-ins and OpenRouter ids) for multi-model runs. |
282
+ | `--followup <sessionId\|responseId>` | Continue an OpenAI/Azure Responses API run from a stored oracle session or `resp_...` response id. |
283
+ | `--followup-model <model>` | For multi-model OpenAI/Azure parent sessions, choose which model response to continue from. |
284
+ | `--base-url <url>` | Point API runs at LiteLLM/Azure/OpenRouter/etc. |
285
+ | `--chatgpt-url <url>` | Target a ChatGPT workspace/folder or Temporary Chat URL (browser). |
286
+ | `--browser-model-strategy <select\|current\|ignore>` | Control ChatGPT model selection in browser mode (current keeps the active model; ignore skips the picker). |
287
+ | `--browser-manual-login` | Skip cookie copy; reuse a persistent automation profile and wait for manual ChatGPT login. |
288
+ | `--browser-attach-running` | Reuse your current local browser session through local `DevToolsActivePort` discovery; Oracle opens a dedicated tab instead of launching Chrome (defaults to `127.0.0.1:9222`, or combine with `--remote-chrome <host:port>` to hint a different local endpoint). |
289
+ | `--browser-tab <ref>` | Reuse an existing ChatGPT tab by `current`, target id, URL, or title substring instead of opening a new tab. |
290
+ | `--browser-thinking-time <light\|standard\|extended\|heavy>` | Set ChatGPT thinking-time intensity (browser; Thinking/Pro models only). |
291
+ | `--browser-research deep` | Activate ChatGPT Deep Research for broad web research and cited reports (browser only). |
292
+ | `--browser-follow-up <prompt>` | Browser-only multi-turn consult: submit an additional prompt in the same ChatGPT conversation after the initial answer. Repeat for challenge/revision/final-decision passes. Not supported with Deep Research mode. |
293
+ | `--browser-archive <auto\|always\|never>` | Archive completed ChatGPT browser conversations after local artifacts are saved. `auto` archives successful one-shot chats only, and skips project, Deep Research, multi-turn, failed, and incomplete sessions. |
294
+ | `--browser-attachments <auto\|never\|always>` | Control browser file delivery: `auto` pastes small text files inline and uploads larger bundles, `never` always pastes inline, and `always` uploads files as ChatGPT attachments. |
295
+ | `--browser-bundle-files`, `--browser-bundle-format <text\|zip>` | Bundle browser uploads into one attachment. `text` keeps the existing single Markdown-style text bundle; `zip` preserves individual file names inside one ZIP upload. |
296
+ | `--browser-port <port>` | Pin the Chrome DevTools port (WSL/Windows firewall helper). |
297
+ | `--browser-inline-cookies[(-file)] <payload \| path>` | Supply cookies without Chrome/Keychain (browser). |
298
+ | `--browser-timeout`, `--browser-input-timeout`, `--browser-attachment-timeout` | Control overall/browser input/attachment readiness timeouts (supports h/m/s/ms). |
299
+ | `--browser-recheck-delay`, `--browser-recheck-timeout` | Delayed recheck for long Pro runs: wait then retry capture after timeout (supports h/m/s/ms). |
300
+ | `--heartbeat <seconds>` | Emit API and browser progress heartbeats. Browser mode reports ChatGPT Thinking/Reasoning sidecar liveness metadata when available, without logging reasoning text. |
301
+ | `--browser-reuse-wait` | Wait for a shared Chrome profile before launching (parallel browser runs). |
302
+ | `--browser-profile-lock-timeout` | Wait for the shared manual-login profile lock before sending (serializes parallel runs). |
303
+ | `--browser-max-concurrent-tabs` | Soft limit for simultaneous ChatGPT tabs sharing one manual-login profile (default 3). |
304
+ | `--render`, `--copy` | Print and/or copy the assembled markdown bundle. |
305
+ | `--wait` | Block for background API runs (e.g., GPT‑5.1 Pro) instead of detaching. |
306
+ | `--timeout <seconds\|duration\|auto>` | Overall API deadline (auto = 60m for pro, 120s otherwise; durations like `10m` derive HTTP/stale-session timeouts unless overridden). |
307
+ | `--background`, `--no-background` | Force Responses API background mode (create + retrieve) for API runs. |
308
+ | `--http-timeout <ms\|s\|m\|h>` | Override the HTTP client timeout; if omitted, explicit `--timeout` values are reused for transport. |
309
+ | `--zombie-timeout <ms\|s\|m\|h>` | Override stale-session cutoff used by `oracle status`. |
310
+ | `--zombie-last-activity` | Use last log activity to detect stale sessions. |
311
+ | `--write-output <path>` | Save only the final answer (multi-model adds `.<model>` and writes `<stem>.oracle.json`). Browser sessions also save transcripts and generated artifacts under `~/.oracle/sessions/<id>/artifacts/`. |
312
+ | `--allow-partial`, `--partial <fail\|ok>` | Multi-model failure policy. Default `fail` exits 1 after printing a structured partial summary; `ok` exits 0 when at least one model succeeds. |
313
+ | `--preflight` | Check redacted provider readiness for requested API model(s), then exit without creating a session. |
314
+ | `--perf-trace`, `--perf-trace-path <path>` | Write startup/first-output timing trace JSON; also accepts `--perf-trace=/tmp/oracle.json`, `ORACLE_PERF_TRACE=1`, or `ORACLE_PERF_TRACE=/tmp/oracle.json`. |
315
+ | `--files-report` | Print per-file token usage. |
316
+ | `--dry-run [summary\|json\|full]` | Preview without sending. |
317
+ | `--remote-host`, `--remote-token` | Use a remote `oracle serve` host (browser). |
318
+ | `--remote-chrome <host:port>` | Attach to an existing remote Chrome session (browser), or when combined with `--browser-attach-running` use this host:port as the local attach hint. |
319
+ | `--youtube <url>` | YouTube video URL to analyze (Gemini browser mode). |
320
+ | `--generate-image <file>` | Generate image and save to file (Gemini browser mode; ChatGPT browser mode saves downloadable image artifacts when present). Extra ChatGPT images save as numbered siblings. |
321
+ | `--edit-image <file>` | Edit existing image with `--output` (Gemini browser mode). For ChatGPT browser mode, attach source images with `--file` and use `--generate-image` for the output path. |
322
+ | `--provider openai\|azure\|auto`, `--no-azure`, `--route` | Choose or inspect API provider routing; `openai` / `--no-azure` ignores Azure env/config for the run. |
323
+ | `--azure-endpoint`, `--azure-deployment`, `--azure-api-version` | Target Azure OpenAI endpoints (picks Azure client automatically). |
324
324
 
325
325
  ## Configuration
326
326
 
@@ -345,11 +345,11 @@ When several agents share one manual-login ChatGPT profile, Oracle coordinates b
345
345
 
346
346
  Advanced flags
347
347
 
348
- | Area | Flags |
349
- | ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
350
- | Browser | `--browser-manual-login`, `--browser-attach-running`, `--browser-thinking-time`, `--browser-research`, `--browser-follow-up`, `--browser-archive`, `--browser-timeout`, `--browser-input-timeout`, `--browser-recheck-delay`, `--browser-recheck-timeout`, `--browser-reuse-wait`, `--browser-profile-lock-timeout`, `--browser-max-concurrent-tabs`, `--browser-auto-reattach-delay`, `--browser-auto-reattach-interval`, `--browser-auto-reattach-timeout`, `--browser-cookie-wait`, `--browser-inline-cookies[(-file)]`, `--browser-attachments`, `--browser-inline-files`, `--browser-bundle-files`, `--browser-bundle-format`, `--browser-keep-browser`, `--browser-headless`, `--browser-hide-window`, `--browser-no-cookie-sync`, `--browser-allow-cookie-errors`, `--browser-chrome-path`, `--browser-cookie-path`, `--chatgpt-url` |
351
- | Run control | `--background`, `--no-background`, `--http-timeout`, `--zombie-timeout`, `--zombie-last-activity` |
352
- | Azure/OpenAI | `--azure-endpoint`, `--azure-deployment`, `--azure-api-version`, `--base-url` |
348
+ | Area | Flags |
349
+ | ------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
350
+ | Browser | `--browser-manual-login`, `--browser-attach-running`, `--browser-thinking-time`, `--browser-research`, `--browser-follow-up`, `--browser-archive`, `--browser-timeout`, `--browser-input-timeout`, `--browser-attachment-timeout`, `--browser-recheck-delay`, `--browser-recheck-timeout`, `--browser-reuse-wait`, `--browser-profile-lock-timeout`, `--browser-max-concurrent-tabs`, `--browser-auto-reattach-delay`, `--browser-auto-reattach-interval`, `--browser-auto-reattach-timeout`, `--browser-cookie-wait`, `--browser-inline-cookies[(-file)]`, `--browser-attachments`, `--browser-inline-files`, `--browser-bundle-files`, `--browser-bundle-format`, `--browser-keep-browser`, `--browser-headless`, `--browser-hide-window`, `--browser-no-cookie-sync`, `--browser-allow-cookie-errors`, `--browser-chrome-path`, `--browser-cookie-path`, `--chatgpt-url` |
351
+ | Run control | `--background`, `--no-background`, `--http-timeout`, `--zombie-timeout`, `--zombie-last-activity` |
352
+ | Azure/OpenAI | `--azure-endpoint`, `--azure-deployment`, `--azure-api-version`, `--base-url` |
353
353
 
354
354
  Remote browser example
355
355
 
@@ -304,6 +304,7 @@ program
304
304
  .addOption(new Option("--browser-url <url>", `Alias for --chatgpt-url (default ${CHATGPT_URL}).`).hideHelp())
305
305
  .addOption(new Option("--browser-timeout <ms|s|m>", "Maximum time to wait for an answer (default 1200s / 20m).").hideHelp())
306
306
  .addOption(new Option("--browser-input-timeout <ms|s|m>", "Maximum time to wait for the prompt textarea (default 60s).").hideHelp())
307
+ .addOption(new Option("--browser-attachment-timeout <ms|s|m>", "Maximum time to wait for attachment upload/readiness before clicking send (default 45s).").hideHelp())
307
308
  .addOption(new Option("--browser-recheck-delay <ms|s|m|h>", "After an assistant timeout, wait this long then revisit the conversation to retry capture.").hideHelp())
308
309
  .addOption(new Option("--browser-recheck-timeout <ms|s|m|h>", "Time budget for the delayed recheck attempt (default 120s).").hideHelp())
309
310
  .addOption(new Option("--browser-reuse-wait <ms|s|m|h>", "Wait for a shared Chrome profile to appear before launching a new one (helps parallel runs).").hideHelp())
@@ -1096,19 +1097,21 @@ async function runRootCommand(options) {
1096
1097
  Boolean(options.azureEndpoint?.trim()) &&
1097
1098
  engineModels.some((model) => isAzureOpenAICandidateModel(model));
1098
1099
  const explicitApiProviderRequested = providerMode !== "auto" || hasExplicitAzureOption(optionUsesDefault);
1099
- const preferredEngine = options.engine ?? (explicitApiProviderRequested ? undefined : userConfig.engine);
1100
+ const envEnginePreference = (process.env.ORACLE_ENGINE ?? "").trim().toLowerCase();
1101
+ const explicitApiEngineRequested = options.engine === "api" || (!options.engine && envEnginePreference === "api");
1102
+ const configBrowserEngineRequested = userConfig.engine === "browser" && !explicitApiEngineRequested && !explicitApiProviderRequested;
1100
1103
  let engine = resolveEngine({
1101
- engine: preferredEngine,
1104
+ engine: options.engine,
1105
+ configEngine: userConfig.engine,
1102
1106
  browserFlag: options.browser,
1103
1107
  apiProviderRequested: explicitApiProviderRequested,
1104
1108
  env: process.env,
1105
1109
  });
1106
- const envEnginePreference = (process.env.ORACLE_ENGINE ?? "").trim().toLowerCase();
1107
1110
  const browserEngineRequested = options.browser ||
1108
1111
  options.engine === "browser" ||
1109
1112
  Boolean(remoteHost) ||
1110
- (!explicitApiProviderRequested &&
1111
- (userConfig.engine === "browser" || envEnginePreference === "browser"));
1113
+ configBrowserEngineRequested ||
1114
+ (!options.engine && !explicitApiProviderRequested && envEnginePreference === "browser");
1112
1115
  if (azureAutoApiRequested && engine === "browser" && !browserEngineRequested) {
1113
1116
  engine = "api";
1114
1117
  }
@@ -1165,7 +1168,7 @@ async function runRootCommand(options) {
1165
1168
  }
1166
1169
  const resolvedModel = normalizedMultiModels[0] ?? (isGemini ? resolveApiModel(cliModelArg) : resolvedModelCandidate);
1167
1170
  const includesGeminiApiOnly = (normalizedMultiModels.length > 0 ? normalizedMultiModels : [resolvedModel]).some((model) => model === "gemini-3.1-pro");
1168
- if ((userForcedBrowser || userConfig.engine === "browser") && includesGeminiApiOnly) {
1171
+ if (browserExplicitlyRequested && includesGeminiApiOnly) {
1169
1172
  throw new Error("gemini-3.1-pro is API-only today. Use --engine api or switch to gemini-3-pro for Gemini web.");
1170
1173
  }
1171
1174
  if (engine === "browser" && includesGeminiApiOnly) {
@@ -1271,6 +1274,12 @@ async function runRootCommand(options) {
1271
1274
  const getSource = (key) => program.getOptionValueSource?.(key) ?? undefined;
1272
1275
  const { applyBrowserDefaultsFromConfig } = await import("../src/cli/browserDefaults.js");
1273
1276
  applyBrowserDefaultsFromConfig(options, userConfig, getSource);
1277
+ const attachmentTimeoutEnv = process.env.ORACLE_BROWSER_ATTACHMENT_TIMEOUT?.trim();
1278
+ if (attachmentTimeoutEnv &&
1279
+ (getSource("browserAttachmentTimeout") === undefined ||
1280
+ getSource("browserAttachmentTimeout") === "default")) {
1281
+ options.browserAttachmentTimeout = attachmentTimeoutEnv;
1282
+ }
1274
1283
  const sessionMode = engine === "browser" ? "browser" : "api";
1275
1284
  const browserConfig = await (async () => {
1276
1285
  if (sessionMode !== "browser")
File without changes
@@ -182,7 +182,30 @@ function buildModelSelectionExpression(targetModel, strategy) {
182
182
  })
183
183
  );
184
184
 
185
- const button = document.querySelector(BUTTON_SELECTOR);
185
+ const isVisibleElement = (node) => {
186
+ if (!(node instanceof HTMLElement)) return false;
187
+ const rect = node.getBoundingClientRect();
188
+ const style = window.getComputedStyle(node);
189
+ return rect.width > 0 && rect.height > 0 && style.display !== 'none' && style.visibility !== 'hidden';
190
+ };
191
+ const looksLikeModelPill = (node) => {
192
+ if (!(node instanceof HTMLElement) || !node.matches('button.__composer-pill')) return false;
193
+ if (!isVisibleElement(node)) return false;
194
+ const label = normalizeText(
195
+ (node.textContent ?? '') + ' ' + (node.getAttribute('aria-label') ?? '') + ' ' + (node.getAttribute('title') ?? '')
196
+ );
197
+ if (!label) return false;
198
+ if (label.includes('click to remove')) return false;
199
+ const modelTokens = ['chatgpt', 'gpt', 'instant', 'thinking', 'pro', 'extended', 'standard', 'heavy', 'light'];
200
+ return modelTokens.some((token) => hasToken(label, token));
201
+ };
202
+ const findModelButton = () => {
203
+ const explicit = document.querySelector(BUTTON_SELECTOR);
204
+ if (explicit) return explicit;
205
+ return Array.from(document.querySelectorAll('button.__composer-pill')).find(looksLikeModelPill) ?? null;
206
+ };
207
+
208
+ const button = findModelButton();
186
209
  if (!button) {
187
210
  return { status: 'button-missing' };
188
211
  }
@@ -222,6 +245,8 @@ function buildModelSelectionExpression(targetModel, strategy) {
222
245
  };
223
246
  const getResolvedLabel = (fallback) =>
224
247
  withProPillSignal(getComposerModelLabel() || getButtonLabel() || fallback);
248
+ const isThinkingEffortLabel = (label) =>
249
+ label === 'extended' || label === 'standard' || label === 'heavy' || label === 'light';
225
250
  if (MODEL_STRATEGY === 'current') {
226
251
  const currentLabel = getResolvedLabel(PRIMARY_LABEL);
227
252
  return {
@@ -233,6 +258,15 @@ function buildModelSelectionExpression(targetModel, strategy) {
233
258
  const normalizedLabel = normalizeText(getButtonLabel());
234
259
  if (!normalizedLabel) return false;
235
260
  if (isTargetGpt55VisibleAlias(normalizedLabel)) return true;
261
+ if (
262
+ wantsThinking &&
263
+ desiredVersion === '5-5' &&
264
+ !hasProComposerPill() &&
265
+ isThinkingEffortLabel(normalizedLabel) &&
266
+ isTargetGpt55VisibleAlias(readComposerModelSignal())
267
+ ) {
268
+ return true;
269
+ }
236
270
  if (
237
271
  wantsPro &&
238
272
  hasProComposerPill() &&
@@ -254,6 +288,14 @@ function buildModelSelectionExpression(targetModel, strategy) {
254
288
  if (wantsPro && labelHasLegacyProVersion(normalizedLabel)) return false;
255
289
  if (wantsPro && !labelHasProWord(normalizedLabel)) return false;
256
290
  if (wantsInstant && !normalizedLabel.includes('instant')) return false;
291
+ if (
292
+ wantsThinking &&
293
+ desiredVersion === '5-4' &&
294
+ !normalizedLabel.includes('pro') &&
295
+ !normalizedLabel.includes('instant')
296
+ ) {
297
+ return true;
298
+ }
257
299
  if (wantsThinking && !normalizedLabel.includes('thinking')) return false;
258
300
  // Also reject if button has variants we DON'T want
259
301
  if (!wantsPro && normalizedLabel.includes(' pro')) return false;
@@ -335,9 +377,6 @@ function buildModelSelectionExpression(targetModel, strategy) {
335
377
  if (dataSelected === 'true' || selectedStates.includes(dataState)) {
336
378
  return true;
337
379
  }
338
- if (node.querySelector('[data-testid*="check"], [role="img"][data-icon="check"], svg[data-icon="check"], .trailing svg')) {
339
- return true;
340
- }
341
380
  return false;
342
381
  };
343
382
 
@@ -348,6 +387,7 @@ function buildModelSelectionExpression(targetModel, strategy) {
348
387
  }
349
388
  let score = 0;
350
389
  const normalizedTestId = (testid ?? '').toLowerCase();
390
+ let exactTestIdMatch = false;
351
391
  if (normalizedTestId) {
352
392
  if (desiredVersion) {
353
393
  // data-testid strings have been observed with both dotted and dashed versions (e.g. gpt-5.2-pro vs gpt-5-2-pro).
@@ -394,6 +434,7 @@ function buildModelSelectionExpression(targetModel, strategy) {
394
434
  // Exact testid matches take priority over substring matches
395
435
  const exactMatch = TEST_IDS.find((id) => id && normalizedTestId === id);
396
436
  if (exactMatch) {
437
+ exactTestIdMatch = true;
397
438
  score += 1500;
398
439
  if (exactMatch.startsWith('model-switcher-')) score += 200;
399
440
  } else {
@@ -410,17 +451,22 @@ function buildModelSelectionExpression(targetModel, strategy) {
410
451
  }
411
452
  const candidateGpt55VisibleAlias = isTargetGpt55VisibleAlias(normalizedText);
412
453
  const candidateHasThinking =
413
- normalizedText.includes('thinking') || normalizedTestId.includes('thinking');
454
+ normalizedText.includes('thinking') ||
455
+ normalizedTestId.includes('thinking') ||
456
+ (wantsThinking && desiredVersion === '5-4' && exactTestIdMatch);
414
457
  const candidateHasLegacyProVersion = labelHasLegacyProVersion(normalizedText);
415
458
  const candidateHasPro =
416
- candidateGpt55VisibleAlias ||
417
459
  labelHasProWord(normalizedText) ||
418
460
  normalizedText.includes('proresearch') ||
419
461
  normalizedTestId.includes('pro');
462
+ const candidateHasInstant =
463
+ normalizedText.includes('instant') || normalizedTestId.includes('instant');
420
464
  if (wantsPro && candidateHasThinking) return 0;
421
465
  if (wantsPro && candidateHasLegacyProVersion) return 0;
422
466
  if (wantsPro && !candidateHasPro) return 0;
467
+ if (wantsInstant && !candidateHasInstant) return 0;
423
468
  if (wantsThinking && candidateHasPro) return 0;
469
+ if (wantsThinking && !candidateHasThinking) return 0;
424
470
  if (desiredVersion === '5-5' && normalizedText && !candidateGpt55VisibleAlias) {
425
471
  const candidateHasVersion =
426
472
  normalizedText.includes('5 5') ||
@@ -486,10 +532,35 @@ function buildModelSelectionExpression(targetModel, strategy) {
486
532
  return Math.max(score, 0);
487
533
  };
488
534
 
535
+ const hasModelSwitcherItem = (node) =>
536
+ Boolean(node?.querySelector?.('[data-testid^="model-switcher-"]'));
537
+ const hasModelLikeMenuText = (node) => {
538
+ const text = normalizeText(node?.textContent ?? '');
539
+ return (
540
+ text.includes('instant') ||
541
+ text.includes('thinking') ||
542
+ labelHasProWord(text) ||
543
+ text.includes('5 5') ||
544
+ text.includes('5 4') ||
545
+ text.includes('5 2') ||
546
+ text.includes('gpt 5') ||
547
+ text.includes('gpt5')
548
+ );
549
+ };
550
+ const queryPickerMenus = () => {
551
+ const menus = Array.from(document.querySelectorAll(${menuContainerLiteral}));
552
+ const pickerMenus = menus.filter(hasModelSwitcherItem);
553
+ if (pickerMenus.length === 0) return menus;
554
+ const textFallbackMenus = menus.filter(
555
+ (menu) => !pickerMenus.includes(menu) && hasModelLikeMenuText(menu),
556
+ );
557
+ return pickerMenus.concat(textFallbackMenus);
558
+ };
559
+
489
560
  const findBestOption = () => {
490
561
  // Walk through every menu item and keep whichever earns the highest score.
491
562
  let bestMatch = null;
492
- const menus = Array.from(document.querySelectorAll(${menuContainerLiteral}));
563
+ const menus = queryPickerMenus();
493
564
  for (const menu of menus) {
494
565
  const buttons = Array.from(menu.querySelectorAll(${menuItemLiteral}));
495
566
  for (const option of buttons) {
@@ -518,6 +589,16 @@ function buildModelSelectionExpression(targetModel, strategy) {
518
589
  resolve('target');
519
590
  return;
520
591
  }
592
+ const currentButtonLabel = normalizeText(getButtonLabel());
593
+ if (
594
+ wantsInstant &&
595
+ desiredVersion === '5-5' &&
596
+ currentButtonLabel === 'instant' &&
597
+ currentButtonLabel !== previousButtonLabel
598
+ ) {
599
+ resolve('target');
600
+ return;
601
+ }
521
602
  if (selectionStateChanged(previousButtonLabel, previousComposerSignal)) {
522
603
  resolve('changed');
523
604
  return;
@@ -545,10 +626,8 @@ function buildModelSelectionExpression(targetModel, strategy) {
545
626
  return body.includes('temporary chat');
546
627
  };
547
628
  const collectAvailableOptions = () => {
548
- const menuRoots = Array.from(document.querySelectorAll(${menuContainerLiteral}));
549
- const nodes = menuRoots.length > 0
550
- ? menuRoots.flatMap((root) => Array.from(root.querySelectorAll(${menuItemLiteral})))
551
- : Array.from(document.querySelectorAll(${menuItemLiteral}));
629
+ const menuRoots = queryPickerMenus();
630
+ const nodes = menuRoots.flatMap((root) => Array.from(root.querySelectorAll(${menuItemLiteral})));
552
631
  const labels = nodes
553
632
  .map((node) => (node?.textContent ?? '').trim())
554
633
  .filter(Boolean)
@@ -556,7 +635,7 @@ function buildModelSelectionExpression(targetModel, strategy) {
556
635
  return labels.slice(0, 12);
557
636
  };
558
637
  const ensureMenuOpen = () => {
559
- const menuOpen = document.querySelector('[role="menu"], [data-radix-collection-root]');
638
+ const menuOpen = queryPickerMenus().length > 0;
560
639
  if (!menuOpen && performance.now() - lastPointerClick > REOPEN_INTERVAL_MS) {
561
640
  pointerClick();
562
641
  }
@@ -574,7 +653,7 @@ function buildModelSelectionExpression(targetModel, strategy) {
574
653
  ensureMenuOpen();
575
654
  const match = findBestOption();
576
655
  if (match) {
577
- if (activeSelectionMatchesTarget()) {
656
+ if (optionIsSelected(match.node) || activeSelectionMatchesTarget()) {
578
657
  closeMenu();
579
658
  resolve({ status: 'already-selected', label: getResolvedLabel(match.label) });
580
659
  return;
@@ -630,7 +709,7 @@ function buildComposerSignalMatchers(targetModel) {
630
709
  return { includesAny: ["thinking"], excludesAny: ["pro"], allowBlank: false };
631
710
  }
632
711
  if (normalized.includes("instant")) {
633
- return { includesAny: [], excludesAny: ["thinking", "pro"], allowBlank: true };
712
+ return { includesAny: ["instant"], excludesAny: ["thinking", "pro"], allowBlank: false };
634
713
  }
635
714
  return { includesAny: [], excludesAny: ["thinking", "pro"], allowBlank: true };
636
715
  }
@@ -673,7 +752,13 @@ function buildModelMatchersLiteral(targetModel) {
673
752
  testIdTokens.add("gpt-5-5-thinking");
674
753
  testIdTokens.add("gpt-5.5-thinking");
675
754
  }
676
- if (!base.includes("pro") && !base.includes("thinking")) {
755
+ if (base.includes("instant")) {
756
+ push("instant", labelTokens);
757
+ testIdTokens.add("model-switcher-gpt-5-5-instant");
758
+ testIdTokens.add("gpt-5-5-instant");
759
+ testIdTokens.add("gpt-5.5-instant");
760
+ }
761
+ if (!base.includes("pro") && !base.includes("thinking") && !base.includes("instant")) {
677
762
  testIdTokens.add("model-switcher-gpt-5-5");
678
763
  }
679
764
  testIdTokens.add("gpt-5-5");