pi-web-access 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,36 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [0.7.3] - 2026-02-05
6
+
7
+ ### Added
8
+ - Jina Reader fallback for JS-rendered pages. When Readability returns insufficient content (cookie notices, consent walls, SPA shells), the extraction chain now tries Jina Reader (`r.jina.ai`) before falling back to Gemini. Jina handles JavaScript rendering server-side and returns clean markdown. No API key required.
9
+ - JS-render detection heuristic (`isLikelyJSRendered`) produces more specific error messages when pages appear to load content dynamically.
10
+ - Actionable guidance when all extraction methods fail, listing steps to configure Gemini API or use `web_search` instead.
11
+
12
+ ### Changed
13
+ - HTTP fetch headers now mimic Chrome (realistic `User-Agent`, `Sec-Fetch-*`, `Accept-Language`) instead of the default Node.js user agent. Reduces blocks from bot-detection systems.
14
+ - Short Readability output (< 500 chars) is now treated as a content failure, triggering the fallback chain. Previously, a 266-char cookie notice was returned as "successful" content.
15
+ - Extraction fallback order is now: HTTP+Readability → RSC → Jina Reader → Gemini URL Context → Gemini Web → error with guidance.
16
+
17
+ ### Fixed
18
+ - `parseTimestamp` now rejects negative values in colon-separated format (`-1:30`, `1:-30`). Previously only the numeric path (`-90`) rejected negatives, while the colon path computed and returned negative seconds.
19
+
20
+ ## [0.7.2] - 2026-02-03
21
+
22
+ ### Added
23
+ - `model` parameter on `fetch_content` to override the Gemini model per-request (e.g. `model: "gemini-2.5-flash"`)
24
+ - Collapsed TUI results now show a 200-char text preview instead of just the status line
25
+ - LICENSE file (MIT)
26
+
27
+ ### Changed
28
+ - Default Gemini model updated from `gemini-2.5-flash` to `gemini-3-flash-preview` across all API, search, URL context, YouTube, and video paths. Gemini Web gracefully falls back to `gemini-2.5-flash` when the model header isn't available.
29
+ - README rewritten: added tagline, badges, "Why" section, Quick Start, corrected "How It Works" routing order, fixed inaccurate env var precedence claim, added missing `/v/` YouTube format, restored `/search` command docs, collapsible Files table
30
+
31
+ ### Fixed
32
+ - `PERPLEXITY_API_KEY` env var now takes precedence over config file value, matching `GEMINI_API_KEY` behavior and README documentation (was reversed)
33
+ - `package.json` now includes `repository`, `homepage`, `bugs`, and `description` fields (repo link was missing from pi packages site)
34
+
5
35
  ## [0.7.0] - 2026-02-03
6
36
 
7
37
  ### Added
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Nico Bailon
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md CHANGED
@@ -4,14 +4,23 @@
4
4
 
5
5
  # Pi Web Access
6
6
 
7
- An extension for [Pi coding agent](https://github.com/badlogic/pi-mono/) that gives Pi web capabilities: search via Perplexity AI or Gemini, fetch and extract content from URLs, clone GitHub repos for local exploration, read PDFs, understand YouTube videos, and analyze local video files.
7
+ **Web search, content extraction, and video understanding for Pi agent. Zero config with Chrome, or bring your own API keys.**
8
+
9
+ [![npm version](https://img.shields.io/npm/v/pi-web-access?style=for-the-badge)](https://www.npmjs.com/package/pi-web-access)
10
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=for-the-badge)](https://opensource.org/licenses/MIT)
11
+ [![Platform](https://img.shields.io/badge/Platform-macOS%20%7C%20Linux%20%7C%20Windows*-blue?style=for-the-badge)]()
8
12
 
9
13
  https://github.com/user-attachments/assets/cac6a17a-1eeb-4dde-9818-cdf85d8ea98f
10
14
 
11
- ```typescript
12
- web_search({ query: "TypeScript best practices 2025" })
13
- fetch_content({ url: "https://docs.example.com/guide" })
14
- ```
15
+ ## Why Pi Web Access
16
+
17
+ **Zero Config** — Signed into Google in Chrome? That's it. The extension reads your Chrome session cookies to access Gemini directly. No API keys, no setup, no subscriptions.
18
+
19
+ **Video Understanding** — Point it at a YouTube video or local screen recording and ask questions about what's on screen. Full transcripts, visual descriptions, and frame extraction at exact timestamps.
20
+
21
+ **Smart Fallbacks** — Every capability has a fallback chain. Search tries Perplexity, then Gemini API, then Gemini Web. YouTube tries Gemini Web, then API, then Perplexity. Blocked pages retry through Jina Reader and Gemini extraction. Something always works.
22
+
23
+ **GitHub Cloning** — GitHub URLs are cloned locally instead of scraped. The agent gets real file contents and a local path to explore, not rendered HTML.
15
24
 
16
25
  ## Install
17
26
 
@@ -19,281 +28,183 @@ fetch_content({ url: "https://docs.example.com/guide" })
19
28
  pi install npm:pi-web-access
20
29
  ```
21
30
 
22
- **Zero config if you're signed into Google in Chrome.** The extension reads your Chrome session cookies to access Gemini — no API keys needed. This gives you web search, YouTube video understanding, page extraction fallbacks, and local video analysis for free.
23
-
24
- If you're not signed into Chrome, or want to use a different provider, add API keys to `~/.pi/web-search.json`:
25
-
26
- ```json
27
- { "geminiApiKey": "AIza..." }
28
- ```
31
+ If you're not signed into Chrome, or prefer a different provider, add API keys to `~/.pi/web-search.json`:
29
32
 
30
33
  ```json
31
- { "perplexityApiKey": "pplx-..." }
34
+ {
35
+ "perplexityApiKey": "pplx-...",
36
+ "geminiApiKey": "AIza..."
37
+ }
32
38
  ```
33
39
 
34
- You can configure both. In `auto` mode (default), the extension tries Perplexity first (if configured), then Gemini API, then Gemini Web via Chrome cookies.
35
-
36
- **Requires:** Pi v0.37.3+
40
+ You can configure one or both. In `auto` mode (default), `web_search` tries Perplexity first, then Gemini API, then Gemini Web.
37
41
 
38
- **Optional dependencies** for video frame extraction:
42
+ Optional dependencies for video frame extraction:
39
43
 
40
44
  ```bash
41
45
  brew install ffmpeg # frame extraction, video thumbnails, local video duration
42
- brew install yt-dlp # YouTube frame extraction (stream URL + duration lookup)
46
+ brew install yt-dlp # YouTube stream URLs for frame extraction
43
47
  ```
44
48
 
45
- Without these, video content analysis (transcripts via Gemini) still works. The binaries are only needed for extracting visual frames from videos. `ffprobe` (bundled with ffmpeg) is used for local video duration lookup when sampling frames across an entire video.
46
-
47
- ## Tools
49
+ Without these, video content analysis (transcripts, visual descriptions via Gemini) still works. The binaries are only needed for extracting individual frames as images.
48
50
 
49
- ### web_search
51
+ Requires Pi v0.37.3+.
50
52
 
51
- Search the web via Perplexity AI or Gemini. Returns synthesized answer with source citations.
53
+ ## Quick Start
52
54
 
53
55
  ```typescript
54
- // Single query
55
- web_search({ query: "rust async programming" })
56
+ // Search the web
57
+ web_search({ query: "TypeScript best practices 2025" })
56
58
 
57
- // Multiple queries (batch)
58
- web_search({ queries: ["query 1", "query 2"] })
59
+ // Fetch a page
60
+ fetch_content({ url: "https://docs.example.com/guide" })
59
61
 
60
- // With options
61
- web_search({
62
- query: "latest news",
63
- numResults: 10, // Default: 5, max: 20
64
- recencyFilter: "week", // day, week, month, year
65
- domainFilter: ["github.com"] // Prefix with - to exclude
66
- })
62
+ // Clone a GitHub repo
63
+ fetch_content({ url: "https://github.com/owner/repo" })
67
64
 
68
- // Explicit provider
69
- web_search({ query: "...", provider: "gemini" }) // auto, perplexity, gemini
65
+ // Understand a YouTube video
66
+ fetch_content({ url: "https://youtube.com/watch?v=abc", prompt: "What libraries are shown?" })
70
67
 
71
- // Fetch full page content (async)
72
- web_search({ query: "...", includeContent: true })
68
+ // Analyze a screen recording
69
+ fetch_content({ url: "/path/to/recording.mp4", prompt: "What error appears on screen?" })
73
70
  ```
74
71
 
75
- When `includeContent: true`, sources are fetched in the background. Agent receives notification when ready.
76
-
77
- Provider selection in `auto` mode: Perplexity (if key configured) → Gemini API (if key configured, uses Google Search grounding) → Gemini Web (if signed into Chrome). Gemini API returns structured citations with source mappings. Gemini Web returns markdown with embedded links.
72
+ ## Tools
78
73
 
79
- ### fetch_content
74
+ ### web_search
80
75
 
81
- Fetch URL(s) and extract readable content as markdown.
76
+ Search the web via Perplexity AI or Gemini. Returns a synthesized answer with source citations.
82
77
 
83
78
  ```typescript
84
- // Single URL - returns content directly (also stored for retrieval)
85
- fetch_content({ url: "https://example.com/article" })
86
-
87
- // Multiple URLs - returns summary (content stored for retrieval)
88
- fetch_content({ urls: ["url1", "url2", "url3"] })
89
-
90
- // PDFs - extracted and saved to ~/Downloads/
91
- fetch_content({ url: "https://arxiv.org/pdf/1706.03762" })
92
- // → "PDF extracted and saved to: ~/Downloads/arxiv-170603762.md"
79
+ web_search({ query: "rust async programming" })
80
+ web_search({ queries: ["query 1", "query 2"] })
81
+ web_search({ query: "latest news", numResults: 10, recencyFilter: "week" })
82
+ web_search({ query: "...", domainFilter: ["github.com"] })
83
+ web_search({ query: "...", provider: "gemini" })
84
+ web_search({ query: "...", includeContent: true })
93
85
  ```
94
86
 
95
- **GitHub repos:** GitHub code URLs are automatically detected and cloned locally instead of scraping HTML. The agent gets actual file contents and a local path to explore with `read` and `bash`.
96
-
97
- ```typescript
98
- // Clone a repo - returns structure + README
99
- fetch_content({ url: "https://github.com/owner/repo" })
100
- // "Repository cloned to: /tmp/pi-github-repos/owner/repo"
101
-
102
- // Specific file - returns file contents
103
- fetch_content({ url: "https://github.com/owner/repo/blob/main/src/index.ts" })
87
+ | Parameter | Description |
88
+ |-----------|-------------|
89
+ | `query` / `queries` | Single query or batch of queries |
90
+ | `numResults` | Results per query (default: 5, max: 20) |
91
+ | `recencyFilter` | `day`, `week`, `month`, or `year` |
92
+ | `domainFilter` | Limit to domains (prefix with `-` to exclude) |
93
+ | `provider` | `auto` (default), `perplexity`, or `gemini` |
94
+ | `includeContent` | Fetch full page content from sources in background |
104
95
 
105
- // Directory - returns listing
106
- fetch_content({ url: "https://github.com/owner/repo/tree/main/src" })
107
-
108
- // Force-clone a large repo that exceeds the size threshold
109
- fetch_content({ url: "https://github.com/big/repo", forceClone: true })
110
- ```
111
-
112
- Repos over 350MB get a lightweight API-based view instead of a full clone. Commit SHA URLs are also handled via the API. Clones are cached for the session -- multiple files from the same repo share one clone, but clones are wiped on session change/shutdown and re-cloned as needed.
96
+ ### fetch_content
113
97
 
114
- **YouTube videos:** YouTube URLs are automatically detected and processed via Gemini for full video understanding (visual + audio + transcript). Three-tier fallback:
98
+ Fetch URL(s) and extract readable content as markdown. Automatically detects and handles GitHub repos, YouTube videos, PDFs, local video files, and regular web pages.
115
99
 
116
100
  ```typescript
117
- // Returns transcript with timestamps, visual descriptions, chapter markers
118
- fetch_content({ url: "https://youtube.com/watch?v=dQw4w9WgXcQ" })
119
-
120
- // Ask a specific question about the video
121
- fetch_content({ url: "https://youtube.com/watch?v=abc", prompt: "What libraries are imported?" })
101
+ fetch_content({ url: "https://example.com/article" })
102
+ fetch_content({ urls: ["url1", "url2", "url3"] })
103
+ fetch_content({ url: "https://github.com/owner/repo" })
104
+ fetch_content({ url: "https://youtube.com/watch?v=abc", prompt: "What libraries are shown?" })
105
+ fetch_content({ url: "/path/to/recording.mp4", prompt: "What error appears on screen?" })
106
+ fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41-25:00", frames: 4 })
122
107
  ```
123
108
 
124
- 1. **Gemini Web** (primary) -- reads your Chrome session cookies. Zero config if you're signed into Google.
125
- 2. **Gemini API** (secondary) -- uses `GEMINI_API_KEY` env var or `geminiApiKey` in config.
126
- 3. **Perplexity** (fallback) -- topic summary when neither Gemini path is available.
127
-
128
- YouTube results include the video thumbnail as an image content part, so the agent receives visual context alongside the transcript.
109
+ | Parameter | Description |
110
+ |-----------|-------------|
111
+ | `url` / `urls` | Single URL/path or multiple URLs |
112
+ | `prompt` | Question to ask about a YouTube video or local video file |
113
+ | `timestamp` | Extract frame(s) single (`"23:41"`), range (`"23:41-25:00"`), or seconds (`"85"`) |
114
+ | `frames` | Number of frames to extract (max 12) |
115
+ | `forceClone` | Clone GitHub repos that exceed the 350MB size threshold |
129
116
 
130
- Handles all YouTube URL formats: `/watch?v=`, `youtu.be/`, `/shorts/`, `/live/`, `/embed/`, `/v/`, `m.youtube.com`. Playlist-only URLs fall through to normal extraction.
117
+ ### get_search_content
131
118
 
132
- **Local video files:** Pass a file path to analyze video content via Gemini. Supports MP4, MOV, WebM, AVI, and other common formats. Max 50MB (configurable).
119
+ Retrieve stored content from previous searches or fetches. Content over 30,000 chars is truncated in tool responses but stored in full for retrieval here.
133
120
 
134
121
  ```typescript
135
- // Analyze a screen recording
136
- fetch_content({ url: "/path/to/recording.mp4" })
137
-
138
- // Ask about specific content in the video
139
- fetch_content({ url: "./demo.mov", prompt: "What error message appears on screen?" })
122
+ get_search_content({ responseId: "abc123", urlIndex: 0 })
123
+ get_search_content({ responseId: "abc123", url: "https://..." })
124
+ get_search_content({ responseId: "abc123", query: "original query" })
140
125
  ```
141
126
 
142
- Two-tier fallback: Gemini API (needs key, proper Files API with MIME types) → Gemini Web (free, needs Chrome login). File paths are detected by prefix (`/`, `./`, `../`, `file://`). If ffmpeg is installed, a frame from the video is included as a thumbnail image alongside the analysis.
143
-
144
- **Video frame extraction (YouTube + local):** Use `timestamp` and/or `frames` to pull visuals for scanning.
145
-
146
- ```typescript
147
- // Single frame at an exact time
148
- fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41" })
127
+ ## Capabilities
149
128
 
150
- // Range scan (default 6 frames)
151
- fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41-25:00" })
129
+ ### GitHub repos
152
130
 
153
- // Custom density across a range
154
- fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41-25:00", frames: 3 })
131
+ GitHub URLs are cloned locally instead of scraped. The agent gets real file contents and a local path to explore with `read` and `bash`. Root URLs return the repo tree + README, `/tree/` paths return directory listings, `/blob/` paths return file contents.
155
132
 
156
- // N frames at 5s intervals starting from a single timestamp
157
- fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41", frames: 5 })
133
+ Repos over 350MB get a lightweight API-based view instead of a full clone (override with `forceClone: true`). Commit SHA URLs are handled via the API. Clones are cached for the session and wiped on session change. Private repos require the `gh` CLI.
158
134
 
159
- // Whole-video sampling (no timestamp)
160
- fetch_content({ url: "https://youtube.com/watch?v=abc", frames: 6 })
161
- ```
135
+ ### YouTube videos
162
136
 
163
- The same `timestamp`/`frames` syntax works with local file paths (e.g. `/path/to/video.mp4`).
137
+ YouTube URLs are processed via Gemini for full video understanding — visual descriptions, transcripts with timestamps, and chapter markers. Pass a `prompt` to ask specific questions about the video. Results include the video thumbnail so the agent gets visual context alongside the transcript.
164
138
 
165
- Requirements: YouTube frame extraction needs `yt-dlp` + `ffmpeg`. Local video frames need `ffmpeg` (and `ffprobe`, bundled with ffmpeg, for whole-video sampling).
139
+ Fallback: Gemini Web Gemini API Perplexity (text summary only). Handles all URL formats: `/watch?v=`, `youtu.be/`, `/shorts/`, `/live/`, `/embed/`, `/v/`.
166
140
 
167
- Common errors include missing binaries, private/age-restricted videos, region blocks, live streams, expired stream URLs (403), and timestamps beyond the video duration.
141
+ ### Local video files
168
142
 
169
- **Gemini extraction fallback:** When Readability fails or a site blocks bot traffic (403, 429), the extension automatically retries via Gemini URL Context (API) or Gemini Web. This handles SPAs, JS-heavy pages, and anti-bot protections that the HTTP pipeline can't.
143
+ Pass a file path (`/`, `./`, `../`, or `file://` prefix) to analyze video content via Gemini. Supports MP4, MOV, WebM, AVI, and other common formats up to 50MB. Pass a `prompt` to ask about specific content. If ffmpeg is installed, a thumbnail frame is included alongside the analysis.
170
144
 
171
- **PDF handling:** When fetching a PDF URL, the extension extracts text and saves it as a markdown file in `~/Downloads/`. The agent can then use `read` to access specific sections without loading 200K+ chars into context.
145
+ Fallback: Gemini API (Files API upload) Gemini Web.
172
146
 
173
- ### get_search_content
147
+ ### Video frame extraction
174
148
 
175
- Retrieve stored content from previous searches or fetches.
149
+ Use `timestamp` and/or `frames` on any YouTube URL or local video file to extract visual frames as images.
176
150
 
177
151
  ```typescript
178
- // By response ID (from web_search or fetch_content)
179
- get_search_content({ responseId: "abc123", urlIndex: 0 })
180
-
181
- // By URL
182
- get_search_content({ responseId: "abc123", url: "https://..." })
183
-
184
- // By query (for search results)
185
- get_search_content({ responseId: "abc123", query: "original query" })
152
+ fetch_content({ url: "...", timestamp: "23:41" }) // single frame
153
+ fetch_content({ url: "...", timestamp: "23:41-25:00" }) // range, 6 frames
154
+ fetch_content({ url: "...", timestamp: "23:41-25:00", frames: 3 }) // range, custom count
155
+ fetch_content({ url: "...", timestamp: "23:41", frames: 5 }) // 5 frames at 5s intervals
156
+ fetch_content({ url: "...", frames: 6 }) // sample whole video
186
157
  ```
187
158
 
188
- ## Features
159
+ Requires `ffmpeg` (and `yt-dlp` for YouTube). Timestamps accept `H:MM:SS`, `MM:SS`, or bare seconds.
189
160
 
190
- ### Activity Monitor (Ctrl+Shift+W)
161
+ ### PDFs
191
162
 
192
- Toggle live request/response activity:
163
+ PDF URLs are extracted as text and saved to `~/Downloads/` as markdown. The agent can then `read` specific sections without loading the full document into context. Text-based extraction only — no OCR.
193
164
 
194
- ```
195
- ─── Web Search Activity ────────────────────────────────────
196
- API "typescript best practices" 200 2.1s ✓
197
- GET docs.example.com/article 200 0.8s ✓
198
- GET blog.example.com/post 404 0.3s ✗
199
- GET news.example.com/latest ... 1.2s ⋯
200
- ────────────────────────────────────────────────────────────
201
- Rate: 3/10 (resets in 42s)
202
- ```
203
-
204
- ### RSC Content Extraction
165
+ ### Blocked pages
205
166
 
206
- Next.js App Router pages embed content as RSC (React Server Components) flight data in script tags. When Readability fails, the extension parses these JSON payloads directly, reconstructing markdown with headings, tables, code blocks, and links.
167
+ When Readability fails or returns only a cookie notice, the extension retries via Jina Reader (handles JS rendering server-side, no API key needed), then Gemini URL Context API, then Gemini Web extraction. Handles SPAs, JS-heavy pages, and anti-bot protections transparently. Also parses Next.js RSC flight data when present.
207
168
 
208
- ### TUI Rendering
209
-
210
- Tool calls render with real-time progress:
169
+ ## How It Works
211
170
 
212
171
  ```
213
- ┌─ search "TypeScript best practices 2025" ─────────────────────────┐
214
- [████████░░] searching │
215
- └───────────────────────────────────────────────────────────────────┘
172
+ fetch_content(url)
173
+ Video file? Gemini API (Files API) → Gemini Web
174
+ → GitHub URL? Clone repo, return file contents + local path
175
+ → YouTube URL? Gemini Web → Gemini API → Perplexity
176
+ → HTTP fetch → PDF? Extract text, save to ~/Downloads/
177
+ → HTML? Readability → RSC parser → Jina Reader → Gemini fallback
178
+ → Text/JSON/Markdown? Return directly
216
179
  ```
217
180
 
218
181
  ## Skills
219
182
 
220
- Skills are bundled with the extension and available automatically after install -- no extra setup needed.
221
-
222
183
  ### librarian
223
184
 
224
- Structured research workflow for open-source libraries with evidence-backed answers and GitHub permalinks. Loaded automatically when the task involves understanding library internals, finding implementation details, or tracing code history.
225
-
226
- Combines `fetch_content` (GitHub cloning), `web_search` (recent info), and git operations (blame, log, show). Pi auto-detects when to load it based on your prompt. If you have [pi-skill-palette](https://github.com/nicobailon/pi-skill-palette) installed, you can also load it explicitly via `/skill:librarian`.
185
+ Bundled research workflow for investigating open-source libraries. Combines GitHub cloning, web search, and git operations (blame, log, show) to produce evidence-backed answers with permalinks. Pi loads it automatically based on your prompt. Also available via `/skill:librarian` with [pi-skill-palette](https://github.com/nicobailon/pi-skill-palette).
227
186
 
228
187
  ## Commands
229
188
 
230
189
  ### /search
231
190
 
232
- Browse stored search results interactively.
233
-
234
- ## How It Works
235
-
236
- ### fetch_content routing
191
+ Browse stored search results interactively. Lists all results from the current session with their response IDs for easy retrieval.
237
192
 
238
- ```
239
- fetch_content(url_or_path, prompt?)
240
-
241
- ├── Local video file? ──→ Gemini API → Gemini Web
242
- │ ↓
243
- │ Video analysis (prompt forwarded)
244
-
245
- ├── github.com code URL? ──→ Clone repo (gh/git --depth 1)
246
- │ │
247
- │ ┌───────┼───────┐
248
- │ ↓ ↓ ↓
249
- │ root tree blob
250
- │ ↓ ↓ ↓
251
- │ tree + dir file
252
- │ README listing contents
253
- │ │ │ │
254
- │ └───────┼───────┘
255
- │ ↓
256
- │ Return content + local
257
- │ path for read/bash
258
-
259
- ├── YouTube URL? ──→ Gemini Web → Gemini API → Perplexity
260
- │ ↓ (prompt forwarded)
261
- │ Transcript + visual descriptions
262
-
263
- ├── PDF? ──→ unpdf → Save to ~/Downloads/
264
-
265
- ├── Plain text/markdown/JSON? ──→ Return directly
266
-
267
- └── HTML ──→ Readability → Markdown
268
-
269
- [if fails]
270
-
271
- RSC Parser → Markdown
272
-
273
- [if all fail]
274
-
275
- Gemini URL Context → Gemini Web extraction
276
- ```
193
+ ## Activity Monitor
277
194
 
278
- ### web_search routing
195
+ Toggle with **Ctrl+Shift+W** to see live request/response activity:
279
196
 
280
197
  ```
281
- web_search(query, provider?)
282
-
283
- ├── provider = "perplexity" ──→ Perplexity API
284
- ├── provider = "gemini" ──→ Gemini API → Gemini Web
285
- └── provider = "auto"
286
- ├── Perplexity key? ──→ Perplexity API
287
- ├── Gemini API key? ──→ Gemini API (grounded search)
288
- ├── Chrome cookies? ──→ Gemini Web (grounded search)
289
- └── Error
198
+ ─── Web Search Activity ────────────────────────────────────
199
+ API "typescript best practices" 200 2.1s ✓
200
+ GET docs.example.com/article 200 0.8s
201
+ GET blog.example.com/post 404 0.3s
202
+ ────────────────────────────────────────────────────────────
290
203
  ```
291
204
 
292
- When `includeContent: true`, sources are fetched in the background using the fetch_content routing above, and the agent receives a notification when ready.
293
-
294
205
  ## Configuration
295
206
 
296
- All config lives in `~/.pi/web-search.json`:
207
+ All config lives in `~/.pi/web-search.json`. Every field is optional.
297
208
 
298
209
  ```json
299
210
  {
@@ -308,61 +219,51 @@ All config lives in `~/.pi/web-search.json`:
308
219
  },
309
220
  "youtube": {
310
221
  "enabled": true,
311
- "preferredModel": "gemini-2.5-flash"
222
+ "preferredModel": "gemini-3-flash-preview"
312
223
  },
313
224
  "video": {
314
225
  "enabled": true,
315
- "preferredModel": "gemini-2.5-flash",
226
+ "preferredModel": "gemini-3-flash-preview",
316
227
  "maxSizeMB": 50
317
228
  }
318
229
  }
319
230
  ```
320
231
 
321
- All fields are optional. `GEMINI_API_KEY` and `PERPLEXITY_API_KEY` env vars take precedence over config file values. Set `"enabled": false` under `githubClone`, `youtube`, or `video` to disable those features.
232
+ `GEMINI_API_KEY` and `PERPLEXITY_API_KEY` env vars take precedence over config file values. `searchProvider` sets the `web_search` default: `"auto"`, `"perplexity"`, or `"gemini"`. Set `"enabled": false` under any feature to disable it. Config changes require a Pi restart.
322
233
 
323
- `searchProvider` controls `web_search` default: `"auto"` (Perplexity Gemini API Gemini Web), `"perplexity"`, or `"gemini"` (API → Web).
234
+ Rate limits: Perplexity is capped at 10 requests/minute (client-side). Content fetches run 3 concurrent with a 30s timeout per URL.
324
235
 
325
- ## Rate Limits
236
+ ## Limitations
326
237
 
327
- - **Perplexity API**: 10 requests/minute (enforced client-side)
328
- - **Content Fetch**: 3 concurrent requests, 30s timeout per URL
329
- - **Cache TTL**: 1 hour
238
+ - Chrome cookie extraction is macOS-only — other platforms fall through to API keys. First-time access may trigger a Keychain dialog.
239
+ - YouTube private/age-restricted videos may fail on all extraction paths.
240
+ - Gemini can process videos up to ~1 hour; longer videos may be truncated.
241
+ - PDFs are text-extracted only (no OCR for scanned documents).
242
+ - GitHub branch names with slashes may misresolve file paths; the clone still works and the agent can navigate manually.
243
+ - Non-code GitHub URLs (issues, PRs, wiki) fall through to normal web extraction.
330
244
 
331
- ## Files
245
+ <details>
246
+ <summary>Files</summary>
332
247
 
333
248
  | File | Purpose |
334
249
  |------|---------|
335
250
  | `index.ts` | Extension entry, tool definitions, commands, widget |
336
- | `perplexity.ts` | Perplexity API client, rate limiting |
337
- | `gemini-search.ts` | Gemini search providers (Web + API with grounding), search routing |
338
- | `extract.ts` | URL/file path routing, HTTP extraction, Gemini fallback orchestration |
251
+ | `extract.ts` | URL/file path routing, HTTP extraction, fallback orchestration |
252
+ | `gemini-search.ts` | Search routing across Perplexity, Gemini API, Gemini Web |
339
253
  | `gemini-url-context.ts` | Gemini URL Context + Web extraction fallbacks |
340
- | `video-extract.ts` | Local video file detection, upload, Gemini Web/API analysis |
341
- | `youtube-extract.ts` | YouTube URL detection, three-tier extraction orchestrator |
342
- | `chrome-cookies.ts` | macOS Chrome cookie extraction (Keychain + SQLite) |
343
254
  | `gemini-web.ts` | Gemini Web client (cookie auth, StreamGenerate) |
344
- | `gemini-api.ts` | Gemini REST API client (generateContent, file upload) |
345
- | `utils.ts` | Shared formatting (`formatSeconds`) and error helpers for frame extraction |
346
- | `github-extract.ts` | GitHub URL parser, clone cache, content generation |
347
- | `github-api.ts` | GitHub API fallback for oversized repos and commit SHAs |
255
+ | `gemini-api.ts` | Gemini REST API client (generateContent) |
256
+ | `chrome-cookies.ts` | macOS Chrome cookie extraction (Keychain + SQLite) |
257
+ | `youtube-extract.ts` | YouTube detection, three-tier extraction, frame extraction |
258
+ | `video-extract.ts` | Local video detection, Files API upload, Gemini analysis |
259
+ | `github-extract.ts` | GitHub URL parsing, clone cache, content generation |
260
+ | `github-api.ts` | GitHub API fallback for large repos and commit SHAs |
261
+ | `perplexity.ts` | Perplexity API client with rate limiting |
348
262
  | `pdf-extract.ts` | PDF text extraction, saves to markdown |
349
263
  | `rsc-extract.ts` | RSC flight data parser for Next.js pages |
264
+ | `utils.ts` | Shared formatting and error helpers |
350
265
  | `storage.ts` | Session-aware result storage |
351
- | `activity.ts` | Activity tracking for observability widget |
352
- | `skills/librarian/` | Bundled skill for library research with permalinks |
353
-
354
- ## Limitations
266
+ | `activity.ts` | Activity tracking for the observability widget |
267
+ | `skills/librarian/` | Bundled skill for library research |
355
268
 
356
- - Content extraction works best on article-style pages; JS-heavy sites fall back to Gemini extraction when available
357
- - Gemini extraction fallback requires either a Gemini API key or Chrome login to Google
358
- - PDFs are extracted as text (no OCR for scanned documents)
359
- - Max response size: 20MB for PDFs, 5MB for HTML
360
- - Max inline content: 30,000 chars per URL (larger content stored for retrieval via get_search_content)
361
- - GitHub cloning requires `gh` CLI for private repos (public repos fall back to `git clone`)
362
- - GitHub branch names with slashes (e.g. `feature/foo`) may resolve the wrong file path; the clone still succeeds and the agent can navigate manually
363
- - Non-code GitHub URLs (issues, PRs, wiki, etc.) fall through to normal Readability extraction
364
- - YouTube extraction via Gemini Web requires macOS (Chrome cookie decryption is OS-specific); other platforms fall through to Gemini API or Perplexity
365
- - YouTube private/age-restricted videos may fail on all paths
366
- - Gemini can process videos up to ~1 hour at default resolution; longer videos may be truncated
367
- - First-time Chrome cookie access may trigger a macOS Keychain permission dialog
368
- - Requires Pi restart after config file changes
269
+ </details>
package/extract.ts CHANGED
@@ -15,6 +15,7 @@ const DEFAULT_TIMEOUT_MS = 30000;
15
15
  const CONCURRENT_LIMIT = 3;
16
16
 
17
17
  const NON_RECOVERABLE_ERRORS = ["Unsupported content type", "Response too large"];
18
+ const MIN_USEFUL_CONTENT = 500;
18
19
 
19
20
  const turndown = new TurndownService({
20
21
  headingStyle: "atx",
@@ -48,13 +49,72 @@ export interface ExtractOptions {
48
49
  prompt?: string;
49
50
  timestamp?: string;
50
51
  frames?: number;
52
+ model?: string;
53
+ }
54
+
55
+ const JINA_READER_BASE = "https://r.jina.ai/";
56
+ const JINA_TIMEOUT_MS = 30000;
57
+
58
+ async function extractWithJinaReader(
59
+ url: string,
60
+ signal?: AbortSignal,
61
+ ): Promise<ExtractedContent | null> {
62
+ const jinaUrl = JINA_READER_BASE + url;
63
+
64
+ const activityId = activityMonitor.logStart({ type: "api", query: `jina: ${url}` });
65
+
66
+ try {
67
+ const res = await fetch(jinaUrl, {
68
+ headers: {
69
+ "Accept": "text/markdown",
70
+ "X-No-Cache": "true",
71
+ },
72
+ signal: AbortSignal.any([
73
+ AbortSignal.timeout(JINA_TIMEOUT_MS),
74
+ ...(signal ? [signal] : []),
75
+ ]),
76
+ });
77
+
78
+ if (!res.ok) {
79
+ activityMonitor.logComplete(activityId, res.status);
80
+ return null;
81
+ }
82
+
83
+ const content = await res.text();
84
+ activityMonitor.logComplete(activityId, res.status);
85
+
86
+ const contentStart = content.indexOf("Markdown Content:");
87
+ if (contentStart < 0) {
88
+ return null;
89
+ }
90
+
91
+ const markdownPart = content.slice(contentStart + 17).trim(); // 17 = "Markdown Content:".length
92
+
93
+ // Check for failed JS rendering or minimal content
94
+ if (markdownPart.length < 100 ||
95
+ markdownPart.startsWith("Loading...") ||
96
+ markdownPart.startsWith("Please enable JavaScript")) {
97
+ return null;
98
+ }
99
+
100
+ const title = extractHeadingTitle(markdownPart) ?? (new URL(url).pathname.split("/").pop() || url);
101
+ return { url, title, content: markdownPart, error: null };
102
+ } catch (err) {
103
+ const message = err instanceof Error ? err.message : String(err);
104
+ if (message.toLowerCase().includes("abort")) {
105
+ activityMonitor.logComplete(activityId, 0);
106
+ } else {
107
+ activityMonitor.logError(activityId, message);
108
+ }
109
+ return null;
110
+ }
51
111
  }
52
112
 
53
113
  function parseTimestamp(ts: string): number | null {
54
114
  const num = Number(ts);
55
115
  if (!isNaN(num) && num >= 0) return Math.floor(num);
56
116
  const parts = ts.split(":").map(Number);
57
- if (parts.some(isNaN)) return null;
117
+ if (parts.some(p => isNaN(p) || p < 0)) return null;
58
118
  if (parts.length === 3) return Math.floor(parts[0] * 3600 + parts[1] * 60 + parts[2]);
59
119
  if (parts.length === 2) return Math.floor(parts[0] * 60 + parts[1]);
60
120
  return null;
@@ -269,7 +329,7 @@ export async function extractContent(
269
329
  const ytInfo = isYouTubeURL(url);
270
330
  if (ytInfo.isYouTube && isYouTubeEnabled()) {
271
331
  try {
272
- const ytResult = await extractYouTube(url, signal, options?.prompt);
332
+ const ytResult = await extractYouTube(url, signal, options?.prompt, options?.model);
273
333
  if (ytResult) return ytResult;
274
334
  } catch {}
275
335
  return {
@@ -285,10 +345,45 @@ export async function extractContent(
285
345
  if (!httpResult.error || signal?.aborted) return httpResult;
286
346
  if (NON_RECOVERABLE_ERRORS.some(prefix => httpResult.error!.startsWith(prefix))) return httpResult;
287
347
 
348
+ const jinaResult = await extractWithJinaReader(url, signal);
349
+ if (jinaResult) return jinaResult;
350
+
288
351
  const geminiResult = await extractWithUrlContext(url, signal)
289
352
  ?? await extractWithGeminiWeb(url, signal);
290
353
 
291
- return geminiResult ?? httpResult;
354
+ if (geminiResult) return geminiResult;
355
+
356
+ const guidance = [
357
+ httpResult.error,
358
+ "",
359
+ "Fallback options:",
360
+ " \u2022 Set GEMINI_API_KEY in ~/.pi/web-search.json",
361
+ " \u2022 Sign into gemini.google.com in Chrome",
362
+ " \u2022 Use web_search to find content about this topic",
363
+ ].join("\n");
364
+ return { ...httpResult, error: guidance };
365
+ }
366
+
367
+ function isLikelyJSRendered(html: string): boolean {
368
+ // Extract body content
369
+ const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
370
+ if (!bodyMatch) return false;
371
+
372
+ const bodyHtml = bodyMatch[1];
373
+
374
+ // Strip tags to get text content
375
+ const textContent = bodyHtml
376
+ .replace(/<script[\s\S]*?<\/script>/gi, "")
377
+ .replace(/<style[\s\S]*?<\/style>/gi, "")
378
+ .replace(/<[^>]+>/g, "")
379
+ .replace(/\s+/g, " ")
380
+ .trim();
381
+
382
+ // Count scripts
383
+ const scriptCount = (html.match(/<script/gi) || []).length;
384
+
385
+ // Heuristic: little text content but many scripts suggests JS rendering
386
+ return textContent.length < 500 && scriptCount > 3;
292
387
  }
293
388
 
294
389
  async function extractViaHttp(
@@ -309,8 +404,15 @@ async function extractViaHttp(
309
404
  const response = await fetch(url, {
310
405
  signal: controller.signal,
311
406
  headers: {
312
- "User-Agent": "Mozilla/5.0 (compatible; pi-agent/1.0)",
313
- Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
407
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
408
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
409
+ "Accept-Language": "en-US,en;q=0.9",
410
+ "Cache-Control": "no-cache",
411
+ "Sec-Fetch-Dest": "document",
412
+ "Sec-Fetch-Mode": "navigate",
413
+ "Sec-Fetch-Site": "none",
414
+ "Sec-Fetch-User": "?1",
415
+ "Upgrade-Insecure-Requests": "1",
314
416
  },
315
417
  });
316
418
 
@@ -394,16 +496,35 @@ async function extractViaHttp(
394
496
  }
395
497
 
396
498
  activityMonitor.logComplete(activityId, response.status);
499
+
500
+ // Provide more specific error message
501
+ const jsRendered = isLikelyJSRendered(text);
502
+ const errorMsg = jsRendered
503
+ ? "Page appears to be JavaScript-rendered (content loads dynamically)"
504
+ : "Could not extract readable content from HTML structure";
505
+
397
506
  return {
398
507
  url,
399
508
  title: "",
400
509
  content: "",
401
- error: "Could not extract readable content",
510
+ error: errorMsg,
402
511
  };
403
512
  }
404
513
 
405
514
  const markdown = turndown.turndown(article.content);
406
515
  activityMonitor.logComplete(activityId, response.status);
516
+
517
+ if (markdown.length < MIN_USEFUL_CONTENT) {
518
+ return {
519
+ url,
520
+ title: article.title || "",
521
+ content: markdown,
522
+ error: isLikelyJSRendered(text)
523
+ ? "Page appears to be JavaScript-rendered (content loads dynamically)"
524
+ : "Extracted content appears incomplete",
525
+ };
526
+ }
527
+
407
528
  return { url, title: article.title || "", content: markdown, error: null };
408
529
  } catch (err) {
409
530
  const message = err instanceof Error ? err.message : String(err);
package/gemini-api.ts CHANGED
@@ -4,7 +4,7 @@ import { join } from "node:path";
4
4
 
5
5
  export const API_BASE = "https://generativelanguage.googleapis.com/v1beta";
6
6
  const CONFIG_PATH = join(homedir(), ".pi", "web-search.json");
7
- export const DEFAULT_MODEL = "gemini-2.5-flash";
7
+ export const DEFAULT_MODEL = "gemini-3-flash-preview";
8
8
 
9
9
  interface GeminiApiConfig {
10
10
  geminiApiKey?: string;
package/gemini-search.ts CHANGED
@@ -123,7 +123,7 @@ async function searchWithGeminiWeb(query: string, options: SearchOptions = {}):
123
123
 
124
124
  try {
125
125
  const text = await queryWithCookies(prompt, cookies, {
126
- model: "gemini-2.5-flash",
126
+ model: "gemini-3-flash-preview",
127
127
  signal: options.signal,
128
128
  timeoutMs: 60000,
129
129
  });
@@ -80,7 +80,7 @@ export async function extractWithGeminiWeb(
80
80
 
81
81
  try {
82
82
  const text = await queryWithCookies(EXTRACTION_PROMPT + url, cookies, {
83
- model: "gemini-2.5-flash",
83
+ model: "gemini-3-flash-preview",
84
84
  signal,
85
85
  timeoutMs: 60000,
86
86
  });
package/index.ts CHANGED
@@ -420,6 +420,9 @@ export default function (pi: ExtensionAPI) {
420
420
  maximum: 12,
421
421
  description: "Number of frames to extract. Use with timestamp range for custom density, with single timestamp to get N frames at 5s intervals, or alone to sample across the entire video. Requires yt-dlp + ffmpeg for YouTube, ffmpeg for local video.",
422
422
  })),
423
+ model: Type.Optional(Type.String({
424
+ description: "Override the Gemini model for video/YouTube analysis (e.g. 'gemini-2.5-flash', 'gemini-3-flash-preview'). Defaults to config or gemini-3-flash-preview.",
425
+ })),
423
426
  }),
424
427
 
425
428
  async execute(_toolCallId, params, signal, onUpdate, _ctx) {
@@ -441,6 +444,7 @@ export default function (pi: ExtensionAPI) {
441
444
  prompt: params.prompt,
442
445
  timestamp: params.timestamp,
443
446
  frames: params.frames,
447
+ model: params.model,
444
448
  });
445
449
  const successful = fetchResults.filter((r) => !r.error).length;
446
450
  const totalChars = fetchResults.reduce((sum, r) => sum + r.content.length, 0);
@@ -527,7 +531,7 @@ export default function (pi: ExtensionAPI) {
527
531
  },
528
532
 
529
533
  renderCall(args, theme) {
530
- const { url, urls, prompt, timestamp, frames } = args as { url?: string; urls?: string[]; prompt?: string; timestamp?: string; frames?: number };
534
+ const { url, urls, prompt, timestamp, frames, model } = args as { url?: string; urls?: string[]; prompt?: string; timestamp?: string; frames?: number; model?: string };
531
535
  const urlList = urls ?? (url ? [url] : []);
532
536
  if (urlList.length === 0) {
533
537
  return new Text(theme.fg("toolTitle", theme.bold("fetch ")) + theme.fg("error", "(no URL)"), 0, 0);
@@ -556,6 +560,9 @@ export default function (pi: ExtensionAPI) {
556
560
  const display = prompt.length > 250 ? prompt.slice(0, 247) + "..." : prompt;
557
561
  lines.push(theme.fg("dim", " prompt: ") + theme.fg("muted", `"${display}"`));
558
562
  }
563
+ if (model) {
564
+ lines.push(theme.fg("dim", " model: ") + theme.fg("warning", model));
565
+ }
559
566
  return new Text(lines.join("\n"), 0, 0);
560
567
  },
561
568
 
@@ -603,8 +610,10 @@ export default function (pi: ExtensionAPI) {
603
610
  if (typeof details?.duration === "number") {
604
611
  statusLine += theme.fg("muted", ` | ${formatSeconds(Math.floor(details.duration))} total`);
605
612
  }
613
+ const textContent = result.content.find((c) => c.type === "text")?.text || "";
606
614
  if (!expanded) {
607
- return new Text(statusLine, 0, 0);
615
+ const brief = textContent.length > 200 ? textContent.slice(0, 200) + "..." : textContent;
616
+ return new Text(statusLine + "\n" + theme.fg("dim", brief), 0, 0);
608
617
  }
609
618
  const lines = [statusLine];
610
619
  if (details?.prompt) {
@@ -617,7 +626,6 @@ export default function (pi: ExtensionAPI) {
617
626
  if (typeof details?.frames === "number") {
618
627
  lines.push(theme.fg("dim", ` frames: ${details.frames}`));
619
628
  }
620
- const textContent = result.content.find((c) => c.type === "text")?.text || "";
621
629
  const preview = textContent.length > 500 ? textContent.slice(0, 500) + "..." : textContent;
622
630
  lines.push(theme.fg("dim", preview));
623
631
  return new Text(lines.join("\n"), 0, 0);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-web-access",
3
- "version": "0.7.1",
3
+ "version": "0.7.3",
4
4
  "description": "Web search, URL fetching, GitHub repo cloning, PDF extraction, YouTube video understanding, and local video analysis for Pi coding agent",
5
5
  "type": "module",
6
6
  "keywords": [
package/perplexity.ts CHANGED
@@ -56,7 +56,7 @@ function loadConfig(): WebSearchConfig {
56
56
 
57
57
  function getApiKey(): string {
58
58
  const config = loadConfig();
59
- const key = config.perplexityApiKey || process.env.PERPLEXITY_API_KEY;
59
+ const key = process.env.PERPLEXITY_API_KEY || config.perplexityApiKey;
60
60
  if (!key) {
61
61
  throw new Error(
62
62
  "Perplexity API key not found. Either:\n" +
@@ -93,7 +93,7 @@ function validateDomainFilter(domains: string[]): string[] {
93
93
 
94
94
  export function isPerplexityAvailable(): boolean {
95
95
  const config = loadConfig();
96
- return Boolean(config.perplexityApiKey || process.env.PERPLEXITY_API_KEY);
96
+ return Boolean(process.env.PERPLEXITY_API_KEY || config.perplexityApiKey);
97
97
  }
98
98
 
99
99
  export async function searchWithPerplexity(query: string, options: SearchOptions = {}): Promise<SearchResponse> {
package/video-extract.ts CHANGED
@@ -46,7 +46,7 @@ interface VideoConfig {
46
46
 
47
47
  const VIDEO_CONFIG_DEFAULTS: VideoConfig = {
48
48
  enabled: true,
49
- preferredModel: "gemini-2.5-flash",
49
+ preferredModel: "gemini-3-flash-preview",
50
50
  maxSizeMB: 50,
51
51
  };
52
52
 
@@ -123,11 +123,12 @@ export async function extractVideo(
123
123
  ): Promise<ExtractedContent | null> {
124
124
  const config = loadVideoConfig();
125
125
  const effectivePrompt = options?.prompt ?? DEFAULT_VIDEO_PROMPT;
126
+ const effectiveModel = options?.model ?? config.preferredModel;
126
127
  const displayName = basename(info.absolutePath);
127
128
  const activityId = activityMonitor.logStart({ type: "fetch", url: `video:${displayName}` });
128
129
 
129
- const result = await tryVideoGeminiApi(info, effectivePrompt, config, signal)
130
- ?? await tryVideoGeminiWeb(info, effectivePrompt, config, signal);
130
+ const result = await tryVideoGeminiApi(info, effectivePrompt, effectiveModel, signal)
131
+ ?? await tryVideoGeminiWeb(info, effectivePrompt, effectiveModel, signal);
131
132
 
132
133
  if (result) {
133
134
  const thumbnail = await extractVideoFrame(info.absolutePath);
@@ -183,7 +184,7 @@ export async function getLocalVideoDuration(filePath: string): Promise<number |
183
184
  async function tryVideoGeminiWeb(
184
185
  info: VideoFileInfo,
185
186
  prompt: string,
186
- config: VideoConfig,
187
+ model: string,
187
188
  signal?: AbortSignal,
188
189
  ): Promise<ExtractedContent | null> {
189
190
  try {
@@ -193,7 +194,7 @@ async function tryVideoGeminiWeb(
193
194
 
194
195
  const text = await queryWithCookies(prompt, cookies, {
195
196
  files: [info.absolutePath],
196
- model: config.preferredModel,
197
+ model,
197
198
  signal,
198
199
  timeoutMs: 180000,
199
200
  });
@@ -212,7 +213,7 @@ async function tryVideoGeminiWeb(
212
213
  async function tryVideoGeminiApi(
213
214
  info: VideoFileInfo,
214
215
  prompt: string,
215
- config: VideoConfig,
216
+ model: string,
216
217
  signal?: AbortSignal,
217
218
  ): Promise<ExtractedContent | null> {
218
219
  const apiKey = getApiKey();
@@ -227,7 +228,7 @@ async function tryVideoGeminiApi(
227
228
  await pollFileState(fileName, apiKey, signal, 120000);
228
229
 
229
230
  const text = await queryGeminiApiWithVideo(prompt, uploaded.uri, {
230
- model: config.preferredModel,
231
+ model,
231
232
  mimeType: info.mimeType,
232
233
  signal,
233
234
  timeoutMs: 120000,
@@ -26,7 +26,7 @@ interface YouTubeConfig {
26
26
  preferredModel: string;
27
27
  }
28
28
 
29
- const defaults: YouTubeConfig = { enabled: true, preferredModel: "gemini-2.5-flash" };
29
+ const defaults: YouTubeConfig = { enabled: true, preferredModel: "gemini-3-flash-preview" };
30
30
  let cachedConfig: YouTubeConfig | null = null;
31
31
 
32
32
  function loadYouTubeConfig(): YouTubeConfig {
@@ -69,6 +69,7 @@ export async function extractYouTube(
69
69
  url: string,
70
70
  signal?: AbortSignal,
71
71
  prompt?: string,
72
+ model?: string,
72
73
  ): Promise<ExtractedContent | null> {
73
74
  const config = loadYouTubeConfig();
74
75
  const { videoId } = isYouTubeURL(url);
@@ -76,11 +77,12 @@ export async function extractYouTube(
76
77
  ? `https://www.youtube.com/watch?v=${videoId}`
77
78
  : url;
78
79
  const effectivePrompt = prompt ?? YOUTUBE_PROMPT;
80
+ const effectiveModel = model ?? config.preferredModel;
79
81
 
80
82
  const activityId = activityMonitor.logStart({ type: "fetch", url: `youtube.com/${videoId ?? "video"}` });
81
83
 
82
- const result = await tryGeminiWeb(canonicalUrl, effectivePrompt, config, signal)
83
- ?? await tryGeminiApi(canonicalUrl, effectivePrompt, config, signal)
84
+ const result = await tryGeminiWeb(canonicalUrl, effectivePrompt, effectiveModel, signal)
85
+ ?? await tryGeminiApi(canonicalUrl, effectivePrompt, effectiveModel, signal)
84
86
  ?? await tryPerplexity(url, effectivePrompt, signal);
85
87
 
86
88
  if (result) {
@@ -190,7 +192,7 @@ export async function fetchYouTubeThumbnail(videoId: string): Promise<{ data: st
190
192
  async function tryGeminiWeb(
191
193
  url: string,
192
194
  prompt: string,
193
- config: YouTubeConfig,
195
+ model: string,
194
196
  signal?: AbortSignal,
195
197
  ): Promise<ExtractedContent | null> {
196
198
  try {
@@ -201,7 +203,7 @@ async function tryGeminiWeb(
201
203
 
202
204
  const text = await queryWithCookies(prompt, cookies, {
203
205
  youtubeUrl: url,
204
- model: config.preferredModel,
206
+ model,
205
207
  signal,
206
208
  timeoutMs: 120000,
207
209
  });
@@ -220,7 +222,7 @@ async function tryGeminiWeb(
220
222
  async function tryGeminiApi(
221
223
  url: string,
222
224
  prompt: string,
223
- config: YouTubeConfig,
225
+ model: string,
224
226
  signal?: AbortSignal,
225
227
  ): Promise<ExtractedContent | null> {
226
228
  try {
@@ -229,7 +231,7 @@ async function tryGeminiApi(
229
231
  if (signal?.aborted) return null;
230
232
 
231
233
  const text = await queryGeminiApiWithVideo(prompt, url, {
232
- model: config.preferredModel,
234
+ model,
233
235
  signal,
234
236
  timeoutMs: 120000,
235
237
  });