pi-web-access 0.5.1 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +59 -1
- package/README.md +140 -25
- package/chrome-cookies.ts +240 -0
- package/extract.ts +266 -27
- package/gemini-api.ts +103 -0
- package/gemini-search.ts +236 -0
- package/gemini-url-context.ts +119 -0
- package/gemini-web.ts +296 -0
- package/index.ts +112 -22
- package/package.json +29 -4
- package/perplexity.ts +7 -2
- package/pi-web-fetch-demo.mp4 +0 -0
- package/rsc-extract.ts +1 -1
- package/skills/librarian/SKILL.md +40 -0
- package/utils.ts +44 -0
- package/video-extract.ts +329 -0
- package/youtube-extract.ts +280 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,7 +2,65 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
-
## [
|
|
5
|
+
## [0.7.0] - 2026-02-03
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- **Multi-provider web search**: `web_search` now supports Perplexity, Gemini API (with Google Search grounding), and Gemini Web (cookie auth) as search providers. New `provider` parameter (`auto`, `perplexity`, `gemini`) controls selection. In `auto` mode (default): Perplexity → Gemini API → Gemini Web. Backwards-compatible — existing Perplexity users see no change.
|
|
9
|
+
- **Gemini API grounded search**: Structured citations via `groundingMetadata` with source URIs and text-to-source mappings. Google proxy URLs are resolved via HEAD redirects. Configured via `GEMINI_API_KEY` or `geminiApiKey` in config.
|
|
10
|
+
- **Gemini Web search**: Zero-config web search for users signed into Google in Chrome. Prompt instructs Gemini to cite sources; URLs extracted from markdown response.
|
|
11
|
+
- **Gemini extraction fallback**: When `fetch_content` fails (HTTP 403/429, Readability fails, network errors), automatically retries via Gemini URL Context API then Gemini Web extraction. Each has an independent 60s timeout. Handles SPAs, JS-heavy pages, and anti-bot protections.
|
|
12
|
+
- **Local video file analysis**: `fetch_content` accepts file paths to video files (MP4, MOV, WebM, AVI, etc.). Detected by path prefix (`/`, `./`, `../`, `file://`), validated by extension and 50MB limit. Two-tier fallback: Gemini API (resumable upload via Files API with proper MIME types, poll-until-active and cleanup) → Gemini Web (free, cookie auth).
|
|
13
|
+
- **Video prompt parameter**: `fetch_content` gains optional `prompt` parameter for asking specific questions about video content. Threads through YouTube and local video extraction. Without prompt, uses default extraction (transcript + visual descriptions).
|
|
14
|
+
- **Video thumbnails**: YouTube results include the video thumbnail (fetched from `img.youtube.com`). Local video results include a frame extracted via ffmpeg (at ~1 second). Returned as image content parts alongside text — the agent sees the thumbnail as vision context.
|
|
15
|
+
- **Configurable frame extraction**: `frames` parameter (1-12) on `fetch_content` for pulling visual frames from YouTube or local video. Works in five modes: frames alone (sample across entire video), single timestamp (one frame), single+frames (N frames at 5s intervals), range (default 6 frames), range+frames (N frames across the range). Endpoint-inclusive distribution with 5-second minimum spacing.
|
|
16
|
+
- **Video duration in responses**: Frame extraction results include the video duration for context.
|
|
17
|
+
- `searchProvider` config option in `~/.pi/web-search.json` for global provider default
|
|
18
|
+
- `video` config section: `enabled`, `preferredModel`, `maxSizeMB`
|
|
19
|
+
|
|
20
|
+
### Changed
|
|
21
|
+
- `PerplexityResponse` renamed to `SearchResponse` (shared interface for all search providers)
|
|
22
|
+
- Extracted HTTP pipeline from `extractContent` into `extractViaHttp` for cleaner Gemini fallback orchestration
|
|
23
|
+
- `getApiKey()`, `API_BASE`, `DEFAULT_MODEL` exported from `gemini-api.ts` for use by search and URL Context modules
|
|
24
|
+
- `isPerplexityAvailable()` added to `perplexity.ts` as non-throwing API key check
|
|
25
|
+
- Content-type routing in `extract.ts`: only `text/html` and `application/xhtml+xml` go through Readability; all other text types (`text/markdown`, `application/json`, `text/csv`, etc.) returned directly. Fixes the OpenAI cookbook `.md` URL that returned "Untitled (30 chars)".
|
|
26
|
+
- Title extraction for non-HTML content: `extractTextTitle()` pulls from markdown `#`/`##` headings, falls back to URL filename
|
|
27
|
+
- Combined `yt-dlp --print duration -g` call fetches stream URL and duration in a single invocation, reused across all frame extraction paths via `streamInfo` passthrough
|
|
28
|
+
- Shared helpers in `utils.ts` (`formatSeconds`, error mapping) eliminate circular imports and duplication across youtube-extract.ts and video-extract.ts
|
|
29
|
+
|
|
30
|
+
### Fixed
|
|
31
|
+
- `fetch_content` TUI rendered `undefined/undefined URLs` during progress updates (renderResult didn't handle `isPartial`, now shows a progress bar like `web_search` does)
|
|
32
|
+
- RSC extractor produced malformed markdown for `<pre><code>` blocks (backticks inside fenced code blocks) -- extremely common on Next.js documentation pages
|
|
33
|
+
- Multi-URL fetch failures rendered in green "success" color even when 0 URLs succeeded (now red)
|
|
34
|
+
- `web_search` queries parameter described as "parallel" in schema but execution is sequential (changed to "batch"; `urls` correctly remains "parallel")
|
|
35
|
+
- Proper error propagation for frame extraction: missing binaries (yt-dlp, ffmpeg, ffprobe), private/age-restricted/region-blocked videos, expired stream URLs (403), timestamp-exceeds-duration, and timeouts all produce specific user-facing messages instead of silent nulls
|
|
36
|
+
- `isTimeoutError` now detects `execFileSync` timeouts via the `killed` flag (SIGTERM from timeout was previously unrecognized)
|
|
37
|
+
- Float video durations (e.g. 15913.7s from yt-dlp) no longer produce out-of-range timestamps — durations are floored before computing frame positions
|
|
38
|
+
- `parseTimestamp` consistently floors results across both bare-number ("90.5" → 90) and colon ("1:30.5" → 90) paths — previously the colon path returned floats
|
|
39
|
+
- YouTube thumbnail assignment no longer sets `null` on the optional `thumbnail` field when fetch fails (was a type mismatch; now only assigned on success)
|
|
40
|
+
|
|
41
|
+
### New files
|
|
42
|
+
- `gemini-search.ts` -- search routing + Gemini Web/API search providers with grounding
|
|
43
|
+
- `gemini-url-context.ts` -- URL Context API extraction + Gemini Web extraction fallback
|
|
44
|
+
- `video-extract.ts` -- local video file detection, Gemini Web/API analysis with Files API upload
|
|
45
|
+
- `utils.ts` -- shared formatting and error helpers for frame extraction
|
|
46
|
+
|
|
47
|
+
## [0.6.0] - 2026-02-02
|
|
48
|
+
|
|
49
|
+
### Added
|
|
50
|
+
- YouTube video understanding in `fetch_content` via three-tier fallback chain:
|
|
51
|
+
- **Gemini Web** (primary): reads Chrome session cookies from macOS Keychain + SQLite, authenticates to gemini.google.com, sends YouTube URL via StreamGenerate endpoint. Full visual + audio understanding with timestamps. Zero config needed if signed into Google in Chrome.
|
|
52
|
+
- **Gemini API** (secondary): direct REST calls with `GEMINI_API_KEY`. YouTube URLs passed as `file_data.file_uri`. Configure via `GEMINI_API_KEY` env var or `geminiApiKey` in `~/.pi/web-search.json`.
|
|
53
|
+
- **Perplexity** (fallback): uses existing `searchWithPerplexity` for a topic summary when neither Gemini path is available. Output labeled as "Summary (via Perplexity)" so the agent knows it's not a full transcript.
|
|
54
|
+
- YouTube URL detection for all common formats: `/watch?v=`, `youtu.be/`, `/shorts/`, `/live/`, `/embed/`, `/v/`, `m.youtube.com`
|
|
55
|
+
- Configurable via `~/.pi/web-search.json` under `youtube` key (`enabled`, `preferredModel`)
|
|
56
|
+
- Actionable error messages when extraction fails (directs user to sign into Chrome or set API key)
|
|
57
|
+
- YouTube URLs no longer fall through to HTTP/Readability (which returns garbage); returns error instead
|
|
58
|
+
|
|
59
|
+
### New files
|
|
60
|
+
- `chrome-cookies.ts` -- macOS Chrome cookie extraction using Node builtins (`node:crypto`, `node:sqlite`, `child_process`)
|
|
61
|
+
- `gemini-web.ts` -- Gemini Web client ported from surf's gemini-client.cjs (cookie auth, StreamGenerate, model fallback)
|
|
62
|
+
- `gemini-api.ts` -- Gemini REST API client (generateContent, file upload/processing/cleanup for Phase 2)
|
|
63
|
+
- `youtube-extract.ts` -- YouTube extraction orchestrator with three-tier fallback and activity logging
|
|
6
64
|
|
|
7
65
|
## [0.5.1] - 2026-02-02
|
|
8
66
|
|
package/README.md
CHANGED
|
@@ -4,7 +4,9 @@
|
|
|
4
4
|
|
|
5
5
|
# Pi Web Access
|
|
6
6
|
|
|
7
|
-
An extension for [Pi coding agent](https://github.com/badlogic/pi-mono/) that gives Pi web capabilities: search via Perplexity AI, fetch and extract content from URLs, clone GitHub repos for local exploration, and
|
|
7
|
+
An extension for [Pi coding agent](https://github.com/badlogic/pi-mono/) that gives Pi web capabilities: search via Perplexity AI or Gemini, fetch and extract content from URLs, clone GitHub repos for local exploration, read PDFs, understand YouTube videos, and analyze local video files.
|
|
8
|
+
|
|
9
|
+
https://github.com/user-attachments/assets/cac6a17a-1eeb-4dde-9818-cdf85d8ea98f
|
|
8
10
|
|
|
9
11
|
```typescript
|
|
10
12
|
web_search({ query: "TypeScript best practices 2025" })
|
|
@@ -17,31 +19,42 @@ fetch_content({ url: "https://docs.example.com/guide" })
|
|
|
17
19
|
pi install npm:pi-web-access
|
|
18
20
|
```
|
|
19
21
|
|
|
20
|
-
|
|
22
|
+
**Zero config if you're signed into Google in Chrome.** The extension reads your Chrome session cookies to access Gemini — no API keys needed. This gives you web search, YouTube video understanding, page extraction fallbacks, and local video analysis for free.
|
|
21
23
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
24
|
+
If you're not signed into Chrome, or want to use a different provider, add API keys to `~/.pi/web-search.json`:
|
|
25
|
+
|
|
26
|
+
```json
|
|
27
|
+
{ "geminiApiKey": "AIza..." }
|
|
28
|
+
```
|
|
25
29
|
|
|
26
|
-
|
|
27
|
-
|
|
30
|
+
```json
|
|
31
|
+
{ "perplexityApiKey": "pplx-..." }
|
|
28
32
|
```
|
|
29
33
|
|
|
30
|
-
|
|
34
|
+
You can configure both. In `auto` mode (default), the extension tries Perplexity first (if configured), then Gemini API, then Gemini Web via Chrome cookies.
|
|
31
35
|
|
|
32
36
|
**Requires:** Pi v0.37.3+
|
|
33
37
|
|
|
38
|
+
**Optional dependencies** for video frame extraction:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
brew install ffmpeg # frame extraction, video thumbnails, local video duration
|
|
42
|
+
brew install yt-dlp # YouTube frame extraction (stream URL + duration lookup)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Without these, video content analysis (transcripts via Gemini) still works. The binaries are only needed for extracting visual frames from videos. `ffprobe` (bundled with ffmpeg) is used for local video duration lookup when sampling frames across an entire video.
|
|
46
|
+
|
|
34
47
|
## Tools
|
|
35
48
|
|
|
36
49
|
### web_search
|
|
37
50
|
|
|
38
|
-
Search the web via Perplexity AI. Returns synthesized answer with source citations.
|
|
51
|
+
Search the web via Perplexity AI or Gemini. Returns synthesized answer with source citations.
|
|
39
52
|
|
|
40
53
|
```typescript
|
|
41
54
|
// Single query
|
|
42
55
|
web_search({ query: "rust async programming" })
|
|
43
56
|
|
|
44
|
-
// Multiple queries (
|
|
57
|
+
// Multiple queries (batch)
|
|
45
58
|
web_search({ queries: ["query 1", "query 2"] })
|
|
46
59
|
|
|
47
60
|
// With options
|
|
@@ -52,12 +65,17 @@ web_search({
|
|
|
52
65
|
domainFilter: ["github.com"] // Prefix with - to exclude
|
|
53
66
|
})
|
|
54
67
|
|
|
68
|
+
// Explicit provider
|
|
69
|
+
web_search({ query: "...", provider: "gemini" }) // auto, perplexity, gemini
|
|
70
|
+
|
|
55
71
|
// Fetch full page content (async)
|
|
56
72
|
web_search({ query: "...", includeContent: true })
|
|
57
73
|
```
|
|
58
74
|
|
|
59
75
|
When `includeContent: true`, sources are fetched in the background. Agent receives notification when ready.
|
|
60
76
|
|
|
77
|
+
Provider selection in `auto` mode: Perplexity (if key configured) → Gemini API (if key configured, uses Google Search grounding) → Gemini Web (if signed into Chrome). Gemini API returns structured citations with source mappings. Gemini Web returns markdown with embedded links.
|
|
78
|
+
|
|
61
79
|
### fetch_content
|
|
62
80
|
|
|
63
81
|
Fetch URL(s) and extract readable content as markdown.
|
|
@@ -93,6 +111,63 @@ fetch_content({ url: "https://github.com/big/repo", forceClone: true })
|
|
|
93
111
|
|
|
94
112
|
Repos over 350MB get a lightweight API-based view instead of a full clone. Commit SHA URLs are also handled via the API. Clones are cached for the session -- multiple files from the same repo share one clone, but clones are wiped on session change/shutdown and re-cloned as needed.
|
|
95
113
|
|
|
114
|
+
**YouTube videos:** YouTube URLs are automatically detected and processed via Gemini for full video understanding (visual + audio + transcript). Three-tier fallback:
|
|
115
|
+
|
|
116
|
+
```typescript
|
|
117
|
+
// Returns transcript with timestamps, visual descriptions, chapter markers
|
|
118
|
+
fetch_content({ url: "https://youtube.com/watch?v=dQw4w9WgXcQ" })
|
|
119
|
+
|
|
120
|
+
// Ask a specific question about the video
|
|
121
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", prompt: "What libraries are imported?" })
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
1. **Gemini Web** (primary) -- reads your Chrome session cookies. Zero config if you're signed into Google.
|
|
125
|
+
2. **Gemini API** (secondary) -- uses `GEMINI_API_KEY` env var or `geminiApiKey` in config.
|
|
126
|
+
3. **Perplexity** (fallback) -- topic summary when neither Gemini path is available.
|
|
127
|
+
|
|
128
|
+
YouTube results include the video thumbnail as an image content part, so the agent receives visual context alongside the transcript.
|
|
129
|
+
|
|
130
|
+
Handles all YouTube URL formats: `/watch?v=`, `youtu.be/`, `/shorts/`, `/live/`, `/embed/`, `/v/`, `m.youtube.com`. Playlist-only URLs fall through to normal extraction.
|
|
131
|
+
|
|
132
|
+
**Local video files:** Pass a file path to analyze video content via Gemini. Supports MP4, MOV, WebM, AVI, and other common formats. Max 50MB (configurable).
|
|
133
|
+
|
|
134
|
+
```typescript
|
|
135
|
+
// Analyze a screen recording
|
|
136
|
+
fetch_content({ url: "/path/to/recording.mp4" })
|
|
137
|
+
|
|
138
|
+
// Ask about specific content in the video
|
|
139
|
+
fetch_content({ url: "./demo.mov", prompt: "What error message appears on screen?" })
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Two-tier fallback: Gemini API (needs key, proper Files API with MIME types) → Gemini Web (free, needs Chrome login). File paths are detected by prefix (`/`, `./`, `../`, `file://`). If ffmpeg is installed, a frame from the video is included as a thumbnail image alongside the analysis.
|
|
143
|
+
|
|
144
|
+
**Video frame extraction (YouTube + local):** Use `timestamp` and/or `frames` to pull visuals for scanning.
|
|
145
|
+
|
|
146
|
+
```typescript
|
|
147
|
+
// Single frame at an exact time
|
|
148
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41" })
|
|
149
|
+
|
|
150
|
+
// Range scan (default 6 frames)
|
|
151
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41-25:00" })
|
|
152
|
+
|
|
153
|
+
// Custom density across a range
|
|
154
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41-25:00", frames: 3 })
|
|
155
|
+
|
|
156
|
+
// N frames at 5s intervals starting from a single timestamp
|
|
157
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41", frames: 5 })
|
|
158
|
+
|
|
159
|
+
// Whole-video sampling (no timestamp)
|
|
160
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", frames: 6 })
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
The same `timestamp`/`frames` syntax works with local file paths (e.g. `/path/to/video.mp4`).
|
|
164
|
+
|
|
165
|
+
Requirements: YouTube frame extraction needs `yt-dlp` + `ffmpeg`. Local video frames need `ffmpeg` (and `ffprobe`, bundled with ffmpeg, for whole-video sampling).
|
|
166
|
+
|
|
167
|
+
Common errors include missing binaries, private/age-restricted videos, region blocks, live streams, expired stream URLs (403), and timestamps beyond the video duration.
|
|
168
|
+
|
|
169
|
+
**Gemini extraction fallback:** When Readability fails or a site blocks bot traffic (403, 429), the extension automatically retries via Gemini URL Context (API) or Gemini Web. This handles SPAs, JS-heavy pages, and anti-bot protections that the HTTP pipeline can't.
|
|
170
|
+
|
|
96
171
|
**PDF handling:** When fetching a PDF URL, the extension extracts text and saves it as a markdown file in `~/Downloads/`. The agent can then use `read` to access specific sections without loading 200K+ chars into context.
|
|
97
172
|
|
|
98
173
|
### get_search_content
|
|
@@ -161,7 +236,11 @@ Browse stored search results interactively.
|
|
|
161
236
|
### fetch_content routing
|
|
162
237
|
|
|
163
238
|
```
|
|
164
|
-
fetch_content(
|
|
239
|
+
fetch_content(url_or_path, prompt?)
|
|
240
|
+
│
|
|
241
|
+
├── Local video file? ──→ Gemini API → Gemini Web
|
|
242
|
+
│ ↓
|
|
243
|
+
│ Video analysis (prompt forwarded)
|
|
165
244
|
│
|
|
166
245
|
├── github.com code URL? ──→ Clone repo (gh/git --depth 1)
|
|
167
246
|
│ │
|
|
@@ -177,30 +256,41 @@ fetch_content(url)
|
|
|
177
256
|
│ Return content + local
|
|
178
257
|
│ path for read/bash
|
|
179
258
|
│
|
|
259
|
+
├── YouTube URL? ──→ Gemini Web → Gemini API → Perplexity
|
|
260
|
+
│ ↓ (prompt forwarded)
|
|
261
|
+
│ Transcript + visual descriptions
|
|
262
|
+
│
|
|
180
263
|
├── PDF? ──→ unpdf → Save to ~/Downloads/
|
|
181
264
|
│
|
|
182
|
-
├── Plain text? ──→ Return directly
|
|
265
|
+
├── Plain text/markdown/JSON? ──→ Return directly
|
|
183
266
|
│
|
|
184
267
|
└── HTML ──→ Readability → Markdown
|
|
185
268
|
│
|
|
186
269
|
[if fails]
|
|
187
270
|
↓
|
|
188
271
|
RSC Parser → Markdown
|
|
272
|
+
│
|
|
273
|
+
[if all fail]
|
|
274
|
+
↓
|
|
275
|
+
Gemini URL Context → Gemini Web extraction
|
|
189
276
|
```
|
|
190
277
|
|
|
191
|
-
### web_search
|
|
278
|
+
### web_search routing
|
|
192
279
|
|
|
193
280
|
```
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
281
|
+
web_search(query, provider?)
|
|
282
|
+
│
|
|
283
|
+
├── provider = "perplexity" ──→ Perplexity API
|
|
284
|
+
├── provider = "gemini" ──→ Gemini API → Gemini Web
|
|
285
|
+
└── provider = "auto"
|
|
286
|
+
├── Perplexity key? ──→ Perplexity API
|
|
287
|
+
├── Gemini API key? ──→ Gemini API (grounded search)
|
|
288
|
+
├── Chrome cookies? ──→ Gemini Web (grounded search)
|
|
289
|
+
└── Error
|
|
202
290
|
```
|
|
203
291
|
|
|
292
|
+
When `includeContent: true`, sources are fetched in the background using the fetch_content routing above, and the agent receives a notification when ready.
|
|
293
|
+
|
|
204
294
|
## Configuration
|
|
205
295
|
|
|
206
296
|
All config lives in `~/.pi/web-search.json`:
|
|
@@ -208,16 +298,29 @@ All config lives in `~/.pi/web-search.json`:
|
|
|
208
298
|
```json
|
|
209
299
|
{
|
|
210
300
|
"perplexityApiKey": "pplx-...",
|
|
301
|
+
"geminiApiKey": "AIza...",
|
|
302
|
+
"searchProvider": "auto",
|
|
211
303
|
"githubClone": {
|
|
212
304
|
"enabled": true,
|
|
213
305
|
"maxRepoSizeMB": 350,
|
|
214
306
|
"cloneTimeoutSeconds": 30,
|
|
215
307
|
"clonePath": "/tmp/pi-github-repos"
|
|
308
|
+
},
|
|
309
|
+
"youtube": {
|
|
310
|
+
"enabled": true,
|
|
311
|
+
"preferredModel": "gemini-2.5-flash"
|
|
312
|
+
},
|
|
313
|
+
"video": {
|
|
314
|
+
"enabled": true,
|
|
315
|
+
"preferredModel": "gemini-2.5-flash",
|
|
316
|
+
"maxSizeMB": 50
|
|
216
317
|
}
|
|
217
318
|
}
|
|
218
319
|
```
|
|
219
320
|
|
|
220
|
-
All
|
|
321
|
+
All fields are optional. `GEMINI_API_KEY` and `PERPLEXITY_API_KEY` env vars take precedence over config file values. Set `"enabled": false` under `githubClone`, `youtube`, or `video` to disable those features.
|
|
322
|
+
|
|
323
|
+
`searchProvider` controls `web_search` default: `"auto"` (Perplexity → Gemini API → Gemini Web), `"perplexity"`, or `"gemini"` (API → Web).
|
|
221
324
|
|
|
222
325
|
## Rate Limits
|
|
223
326
|
|
|
@@ -231,7 +334,15 @@ All `githubClone` fields are optional with the defaults shown above. Set `"enabl
|
|
|
231
334
|
|------|---------|
|
|
232
335
|
| `index.ts` | Extension entry, tool definitions, commands, widget |
|
|
233
336
|
| `perplexity.ts` | Perplexity API client, rate limiting |
|
|
234
|
-
| `
|
|
337
|
+
| `gemini-search.ts` | Gemini search providers (Web + API with grounding), search routing |
|
|
338
|
+
| `extract.ts` | URL/file path routing, HTTP extraction, Gemini fallback orchestration |
|
|
339
|
+
| `gemini-url-context.ts` | Gemini URL Context + Web extraction fallbacks |
|
|
340
|
+
| `video-extract.ts` | Local video file detection, upload, Gemini Web/API analysis |
|
|
341
|
+
| `youtube-extract.ts` | YouTube URL detection, three-tier extraction orchestrator |
|
|
342
|
+
| `chrome-cookies.ts` | macOS Chrome cookie extraction (Keychain + SQLite) |
|
|
343
|
+
| `gemini-web.ts` | Gemini Web client (cookie auth, StreamGenerate) |
|
|
344
|
+
| `gemini-api.ts` | Gemini REST API client (generateContent, file upload) |
|
|
345
|
+
| `utils.ts` | Shared formatting (`formatSeconds`) and error helpers for frame extraction |
|
|
235
346
|
| `github-extract.ts` | GitHub URL parser, clone cache, content generation |
|
|
236
347
|
| `github-api.ts` | GitHub API fallback for oversized repos and commit SHAs |
|
|
237
348
|
| `pdf-extract.ts` | PDF text extraction, saves to markdown |
|
|
@@ -242,12 +353,16 @@ All `githubClone` fields are optional with the defaults shown above. Set `"enabl
|
|
|
242
353
|
|
|
243
354
|
## Limitations
|
|
244
355
|
|
|
245
|
-
- Content extraction works best on article-style pages
|
|
246
|
-
-
|
|
356
|
+
- Content extraction works best on article-style pages; JS-heavy sites fall back to Gemini extraction when available
|
|
357
|
+
- Gemini extraction fallback requires either a Gemini API key or Chrome login to Google
|
|
247
358
|
- PDFs are extracted as text (no OCR for scanned documents)
|
|
248
359
|
- Max response size: 20MB for PDFs, 5MB for HTML
|
|
249
360
|
- Max inline content: 30,000 chars per URL (larger content stored for retrieval via get_search_content)
|
|
250
361
|
- GitHub cloning requires `gh` CLI for private repos (public repos fall back to `git clone`)
|
|
251
362
|
- GitHub branch names with slashes (e.g. `feature/foo`) may resolve the wrong file path; the clone still succeeds and the agent can navigate manually
|
|
252
363
|
- Non-code GitHub URLs (issues, PRs, wiki, etc.) fall through to normal Readability extraction
|
|
364
|
+
- YouTube extraction via Gemini Web requires macOS (Chrome cookie decryption is OS-specific); other platforms fall through to Gemini API or Perplexity
|
|
365
|
+
- YouTube private/age-restricted videos may fail on all paths
|
|
366
|
+
- Gemini can process videos up to ~1 hour at default resolution; longer videos may be truncated
|
|
367
|
+
- First-time Chrome cookie access may trigger a macOS Keychain permission dialog
|
|
253
368
|
- Requires Pi restart after config file changes
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
import { execFile } from "node:child_process";
|
|
2
|
+
import { pbkdf2Sync, createDecipheriv } from "node:crypto";
|
|
3
|
+
import { copyFileSync, existsSync, mkdtempSync, rmSync } from "node:fs";
|
|
4
|
+
import { tmpdir, homedir, platform } from "node:os";
|
|
5
|
+
import { join } from "node:path";
|
|
6
|
+
|
|
7
|
+
export type CookieMap = Record<string, string>;
|
|
8
|
+
|
|
9
|
+
const GOOGLE_ORIGINS = [
|
|
10
|
+
"https://gemini.google.com",
|
|
11
|
+
"https://accounts.google.com",
|
|
12
|
+
"https://www.google.com",
|
|
13
|
+
];
|
|
14
|
+
|
|
15
|
+
const ALL_COOKIE_NAMES = new Set([
|
|
16
|
+
"__Secure-1PSID",
|
|
17
|
+
"__Secure-1PSIDTS",
|
|
18
|
+
"__Secure-1PSIDCC",
|
|
19
|
+
"__Secure-1PAPISID",
|
|
20
|
+
"NID",
|
|
21
|
+
"AEC",
|
|
22
|
+
"SOCS",
|
|
23
|
+
"__Secure-BUCKET",
|
|
24
|
+
"__Secure-ENID",
|
|
25
|
+
"SID",
|
|
26
|
+
"HSID",
|
|
27
|
+
"SSID",
|
|
28
|
+
"APISID",
|
|
29
|
+
"SAPISID",
|
|
30
|
+
"__Secure-3PSID",
|
|
31
|
+
"__Secure-3PSIDTS",
|
|
32
|
+
"__Secure-3PAPISID",
|
|
33
|
+
"SIDCC",
|
|
34
|
+
]);
|
|
35
|
+
|
|
36
|
+
const CHROME_COOKIES_PATH = join(
|
|
37
|
+
homedir(),
|
|
38
|
+
"Library/Application Support/Google/Chrome/Default/Cookies",
|
|
39
|
+
);
|
|
40
|
+
|
|
41
|
+
export async function getGoogleCookies(): Promise<{ cookies: CookieMap; warnings: string[] } | null> {
|
|
42
|
+
if (platform() !== "darwin") return null;
|
|
43
|
+
if (!existsSync(CHROME_COOKIES_PATH)) return null;
|
|
44
|
+
|
|
45
|
+
const warnings: string[] = [];
|
|
46
|
+
|
|
47
|
+
const password = await readKeychainPassword();
|
|
48
|
+
if (!password) {
|
|
49
|
+
warnings.push("Could not read Chrome Safe Storage password from Keychain");
|
|
50
|
+
return { cookies: {}, warnings };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const key = pbkdf2Sync(password, "saltysalt", 1003, 16, "sha1");
|
|
54
|
+
const tempDir = mkdtempSync(join(tmpdir(), "pi-chrome-cookies-"));
|
|
55
|
+
|
|
56
|
+
try {
|
|
57
|
+
const tempDb = join(tempDir, "Cookies");
|
|
58
|
+
copyFileSync(CHROME_COOKIES_PATH, tempDb);
|
|
59
|
+
copySidecar(CHROME_COOKIES_PATH, tempDb, "-wal");
|
|
60
|
+
copySidecar(CHROME_COOKIES_PATH, tempDb, "-shm");
|
|
61
|
+
|
|
62
|
+
const metaVersion = await readMetaVersion(tempDb);
|
|
63
|
+
const stripHash = metaVersion >= 24;
|
|
64
|
+
|
|
65
|
+
const hosts = GOOGLE_ORIGINS.map((o) => new URL(o).hostname);
|
|
66
|
+
const rows = await queryCookieRows(tempDb, hosts);
|
|
67
|
+
if (!rows) {
|
|
68
|
+
warnings.push("Failed to query Chrome cookie database");
|
|
69
|
+
return { cookies: {}, warnings };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const cookies: CookieMap = {};
|
|
73
|
+
for (const row of rows) {
|
|
74
|
+
const name = row.name as string;
|
|
75
|
+
if (!ALL_COOKIE_NAMES.has(name)) continue;
|
|
76
|
+
if (cookies[name]) continue;
|
|
77
|
+
|
|
78
|
+
let value = typeof row.value === "string" && row.value.length > 0 ? row.value : null;
|
|
79
|
+
if (!value) {
|
|
80
|
+
const encrypted = row.encrypted_value;
|
|
81
|
+
if (encrypted instanceof Uint8Array) {
|
|
82
|
+
value = decryptCookieValue(encrypted, key, stripHash);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
if (value) cookies[name] = value;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return { cookies, warnings };
|
|
89
|
+
} finally {
|
|
90
|
+
rmSync(tempDir, { recursive: true, force: true });
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function decryptCookieValue(encrypted: Uint8Array, key: Buffer, stripHash: boolean): string | null {
|
|
95
|
+
const buf = Buffer.from(encrypted);
|
|
96
|
+
if (buf.length < 3) return null;
|
|
97
|
+
|
|
98
|
+
const prefix = buf.subarray(0, 3).toString("utf8");
|
|
99
|
+
if (!/^v\d\d$/.test(prefix)) return null;
|
|
100
|
+
|
|
101
|
+
const ciphertext = buf.subarray(3);
|
|
102
|
+
if (!ciphertext.length) return "";
|
|
103
|
+
|
|
104
|
+
try {
|
|
105
|
+
const iv = Buffer.alloc(16, 0x20);
|
|
106
|
+
const decipher = createDecipheriv("aes-128-cbc", key, iv);
|
|
107
|
+
decipher.setAutoPadding(false);
|
|
108
|
+
const plaintext = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
|
|
109
|
+
const unpadded = removePkcs7Padding(plaintext);
|
|
110
|
+
const bytes = stripHash && unpadded.length >= 32 ? unpadded.subarray(32) : unpadded;
|
|
111
|
+
const decoded = new TextDecoder("utf-8", { fatal: true }).decode(bytes);
|
|
112
|
+
let i = 0;
|
|
113
|
+
while (i < decoded.length && decoded.charCodeAt(i) < 0x20) i++;
|
|
114
|
+
return decoded.slice(i);
|
|
115
|
+
} catch {
|
|
116
|
+
return null;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function removePkcs7Padding(buf: Buffer): Buffer {
|
|
121
|
+
if (!buf.length) return buf;
|
|
122
|
+
const padding = buf[buf.length - 1];
|
|
123
|
+
if (!padding || padding > 16) return buf;
|
|
124
|
+
return buf.subarray(0, buf.length - padding);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function readKeychainPassword(): Promise<string | null> {
|
|
128
|
+
return new Promise((resolve) => {
|
|
129
|
+
execFile(
|
|
130
|
+
"security",
|
|
131
|
+
["find-generic-password", "-w", "-a", "Chrome", "-s", "Chrome Safe Storage"],
|
|
132
|
+
{ timeout: 5000 },
|
|
133
|
+
(err, stdout) => {
|
|
134
|
+
if (err) { resolve(null); return; }
|
|
135
|
+
resolve(stdout.trim() || null);
|
|
136
|
+
},
|
|
137
|
+
);
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
let sqliteModule: typeof import("node:sqlite") | null = null;
|
|
142
|
+
|
|
143
|
+
async function importSqlite(): Promise<typeof import("node:sqlite") | null> {
|
|
144
|
+
if (sqliteModule) return sqliteModule;
|
|
145
|
+
const orig = process.emitWarning.bind(process);
|
|
146
|
+
process.emitWarning = ((warning: string | Error, ...args: unknown[]) => {
|
|
147
|
+
const msg = typeof warning === "string" ? warning : warning?.message ?? "";
|
|
148
|
+
if (msg.includes("SQLite is an experimental feature")) return;
|
|
149
|
+
return (orig as Function)(warning, ...args);
|
|
150
|
+
}) as typeof process.emitWarning;
|
|
151
|
+
try {
|
|
152
|
+
sqliteModule = await import("node:sqlite");
|
|
153
|
+
return sqliteModule;
|
|
154
|
+
} catch {
|
|
155
|
+
return null;
|
|
156
|
+
} finally {
|
|
157
|
+
process.emitWarning = orig;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function supportsReadBigInts(): boolean {
|
|
162
|
+
const [major, minor] = process.versions.node.split(".").map(Number);
|
|
163
|
+
if (major > 24) return true;
|
|
164
|
+
if (major < 24) return false;
|
|
165
|
+
return minor >= 4;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
async function readMetaVersion(dbPath: string): Promise<number> {
|
|
169
|
+
const sqlite = await importSqlite();
|
|
170
|
+
if (!sqlite) return 0;
|
|
171
|
+
const opts: Record<string, unknown> = { readOnly: true };
|
|
172
|
+
if (supportsReadBigInts()) opts.readBigInts = true;
|
|
173
|
+
const db = new sqlite.DatabaseSync(dbPath, opts);
|
|
174
|
+
try {
|
|
175
|
+
const rows = db.prepare("SELECT value FROM meta WHERE key = 'version'").all() as Array<Record<string, unknown>>;
|
|
176
|
+
const val = rows[0]?.value;
|
|
177
|
+
if (typeof val === "number") return Math.floor(val);
|
|
178
|
+
if (typeof val === "bigint") return Number(val);
|
|
179
|
+
if (typeof val === "string") return parseInt(val, 10) || 0;
|
|
180
|
+
return 0;
|
|
181
|
+
} catch {
|
|
182
|
+
return 0;
|
|
183
|
+
} finally {
|
|
184
|
+
db.close();
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
async function queryCookieRows(
|
|
189
|
+
dbPath: string,
|
|
190
|
+
hosts: string[],
|
|
191
|
+
): Promise<Array<Record<string, unknown>> | null> {
|
|
192
|
+
const sqlite = await importSqlite();
|
|
193
|
+
if (!sqlite) return null;
|
|
194
|
+
|
|
195
|
+
const clauses: string[] = [];
|
|
196
|
+
for (const host of hosts) {
|
|
197
|
+
for (const candidate of expandHosts(host)) {
|
|
198
|
+
const esc = candidate.replaceAll("'", "''");
|
|
199
|
+
clauses.push(`host_key = '${esc}'`);
|
|
200
|
+
clauses.push(`host_key = '.${esc}'`);
|
|
201
|
+
clauses.push(`host_key LIKE '%.${esc}'`);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
const where = clauses.join(" OR ");
|
|
205
|
+
|
|
206
|
+
const opts: Record<string, unknown> = { readOnly: true };
|
|
207
|
+
if (supportsReadBigInts()) opts.readBigInts = true;
|
|
208
|
+
const db = new sqlite.DatabaseSync(dbPath, opts);
|
|
209
|
+
try {
|
|
210
|
+
return db
|
|
211
|
+
.prepare(
|
|
212
|
+
`SELECT name, value, host_key, encrypted_value FROM cookies WHERE (${where}) ORDER BY expires_utc DESC`,
|
|
213
|
+
)
|
|
214
|
+
.all() as Array<Record<string, unknown>>;
|
|
215
|
+
} catch {
|
|
216
|
+
return null;
|
|
217
|
+
} finally {
|
|
218
|
+
db.close();
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
function expandHosts(host: string): string[] {
|
|
223
|
+
const parts = host.split(".").filter(Boolean);
|
|
224
|
+
if (parts.length <= 1) return [host];
|
|
225
|
+
const candidates = new Set<string>();
|
|
226
|
+
candidates.add(host);
|
|
227
|
+
for (let i = 1; i <= parts.length - 2; i++) {
|
|
228
|
+
const c = parts.slice(i).join(".");
|
|
229
|
+
if (c) candidates.add(c);
|
|
230
|
+
}
|
|
231
|
+
return Array.from(candidates);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function copySidecar(srcDb: string, targetDb: string, suffix: string): void {
|
|
235
|
+
const sidecar = `${srcDb}${suffix}`;
|
|
236
|
+
if (!existsSync(sidecar)) return;
|
|
237
|
+
try {
|
|
238
|
+
copyFileSync(sidecar, `${targetDb}${suffix}`);
|
|
239
|
+
} catch {}
|
|
240
|
+
}
|