pi-web-access 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +69 -1
- package/README.md +147 -23
- package/chrome-cookies.ts +240 -0
- package/extract.ts +266 -27
- package/gemini-api.ts +103 -0
- package/gemini-search.ts +236 -0
- package/gemini-url-context.ts +119 -0
- package/gemini-web.ts +296 -0
- package/github-api.ts +3 -2
- package/index.ts +113 -23
- package/package.json +4 -2
- package/perplexity.ts +7 -2
- package/pi-web-fetch-demo.mp4 +0 -0
- package/rsc-extract.ts +1 -1
- package/skills/librarian/SKILL.md +195 -0
- package/utils.ts +44 -0
- package/video-extract.ts +329 -0
- package/youtube-extract.ts +280 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,7 +2,75 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
-
## [
|
|
5
|
+
## [0.7.0] - 2026-02-03
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- **Multi-provider web search**: `web_search` now supports Perplexity, Gemini API (with Google Search grounding), and Gemini Web (cookie auth) as search providers. New `provider` parameter (`auto`, `perplexity`, `gemini`) controls selection. In `auto` mode (default): Perplexity → Gemini API → Gemini Web. Backwards-compatible — existing Perplexity users see no change.
|
|
9
|
+
- **Gemini API grounded search**: Structured citations via `groundingMetadata` with source URIs and text-to-source mappings. Google proxy URLs are resolved via HEAD redirects. Configured via `GEMINI_API_KEY` or `geminiApiKey` in config.
|
|
10
|
+
- **Gemini Web search**: Zero-config web search for users signed into Google in Chrome. Prompt instructs Gemini to cite sources; URLs extracted from markdown response.
|
|
11
|
+
- **Gemini extraction fallback**: When `fetch_content` fails (HTTP 403/429, Readability fails, network errors), automatically retries via Gemini URL Context API then Gemini Web extraction. Each has an independent 60s timeout. Handles SPAs, JS-heavy pages, and anti-bot protections.
|
|
12
|
+
- **Local video file analysis**: `fetch_content` accepts file paths to video files (MP4, MOV, WebM, AVI, etc.). Detected by path prefix (`/`, `./`, `../`, `file://`), validated by extension and 50MB limit. Two-tier fallback: Gemini API (resumable upload via Files API with proper MIME types, poll-until-active and cleanup) → Gemini Web (free, cookie auth).
|
|
13
|
+
- **Video prompt parameter**: `fetch_content` gains optional `prompt` parameter for asking specific questions about video content. Threads through YouTube and local video extraction. Without prompt, uses default extraction (transcript + visual descriptions).
|
|
14
|
+
- **Video thumbnails**: YouTube results include the video thumbnail (fetched from `img.youtube.com`). Local video results include a frame extracted via ffmpeg (at ~1 second). Returned as image content parts alongside text — the agent sees the thumbnail as vision context.
|
|
15
|
+
- **Configurable frame extraction**: `frames` parameter (1-12) on `fetch_content` for pulling visual frames from YouTube or local video. Works in five modes: frames alone (sample across entire video), single timestamp (one frame), single+frames (N frames at 5s intervals), range (default 6 frames), range+frames (N frames across the range). Endpoint-inclusive distribution with 5-second minimum spacing.
|
|
16
|
+
- **Video duration in responses**: Frame extraction results include the video duration for context.
|
|
17
|
+
- `searchProvider` config option in `~/.pi/web-search.json` for global provider default
|
|
18
|
+
- `video` config section: `enabled`, `preferredModel`, `maxSizeMB`
|
|
19
|
+
|
|
20
|
+
### Changed
|
|
21
|
+
- `PerplexityResponse` renamed to `SearchResponse` (shared interface for all search providers)
|
|
22
|
+
- Extracted HTTP pipeline from `extractContent` into `extractViaHttp` for cleaner Gemini fallback orchestration
|
|
23
|
+
- `getApiKey()`, `API_BASE`, `DEFAULT_MODEL` exported from `gemini-api.ts` for use by search and URL Context modules
|
|
24
|
+
- `isPerplexityAvailable()` added to `perplexity.ts` as non-throwing API key check
|
|
25
|
+
- Content-type routing in `extract.ts`: only `text/html` and `application/xhtml+xml` go through Readability; all other text types (`text/markdown`, `application/json`, `text/csv`, etc.) returned directly. Fixes the OpenAI cookbook `.md` URL that returned "Untitled (30 chars)".
|
|
26
|
+
- Title extraction for non-HTML content: `extractTextTitle()` pulls from markdown `#`/`##` headings, falls back to URL filename
|
|
27
|
+
- Combined `yt-dlp --print duration -g` call fetches stream URL and duration in a single invocation, reused across all frame extraction paths via `streamInfo` passthrough
|
|
28
|
+
- Shared helpers in `utils.ts` (`formatSeconds`, error mapping) eliminate circular imports and duplication across youtube-extract.ts and video-extract.ts
|
|
29
|
+
|
|
30
|
+
### Fixed
|
|
31
|
+
- `fetch_content` TUI rendered `undefined/undefined URLs` during progress updates (renderResult didn't handle `isPartial`, now shows a progress bar like `web_search` does)
|
|
32
|
+
- RSC extractor produced malformed markdown for `<pre><code>` blocks (backticks inside fenced code blocks) -- extremely common on Next.js documentation pages
|
|
33
|
+
- Multi-URL fetch failures rendered in green "success" color even when 0 URLs succeeded (now red)
|
|
34
|
+
- `web_search` queries parameter described as "parallel" in schema but execution is sequential (changed to "batch"; `urls` correctly remains "parallel")
|
|
35
|
+
- Proper error propagation for frame extraction: missing binaries (yt-dlp, ffmpeg, ffprobe), private/age-restricted/region-blocked videos, expired stream URLs (403), timestamp-exceeds-duration, and timeouts all produce specific user-facing messages instead of silent nulls
|
|
36
|
+
- `isTimeoutError` now detects `execFileSync` timeouts via the `killed` flag (SIGTERM from timeout was previously unrecognized)
|
|
37
|
+
- Float video durations (e.g. 15913.7s from yt-dlp) no longer produce out-of-range timestamps — durations are floored before computing frame positions
|
|
38
|
+
- `parseTimestamp` consistently floors results across both bare-number ("90.5" → 90) and colon ("1:30.5" → 90) paths — previously the colon path returned floats
|
|
39
|
+
- YouTube thumbnail assignment no longer sets `null` on the optional `thumbnail` field when fetch fails (was a type mismatch; now only assigned on success)
|
|
40
|
+
|
|
41
|
+
### New files
|
|
42
|
+
- `gemini-search.ts` -- search routing + Gemini Web/API search providers with grounding
|
|
43
|
+
- `gemini-url-context.ts` -- URL Context API extraction + Gemini Web extraction fallback
|
|
44
|
+
- `video-extract.ts` -- local video file detection, Gemini Web/API analysis with Files API upload
|
|
45
|
+
- `utils.ts` -- shared formatting and error helpers for frame extraction
|
|
46
|
+
|
|
47
|
+
## [0.6.0] - 2026-02-02
|
|
48
|
+
|
|
49
|
+
### Added
|
|
50
|
+
- YouTube video understanding in `fetch_content` via three-tier fallback chain:
|
|
51
|
+
- **Gemini Web** (primary): reads Chrome session cookies from macOS Keychain + SQLite, authenticates to gemini.google.com, sends YouTube URL via StreamGenerate endpoint. Full visual + audio understanding with timestamps. Zero config needed if signed into Google in Chrome.
|
|
52
|
+
- **Gemini API** (secondary): direct REST calls with `GEMINI_API_KEY`. YouTube URLs passed as `file_data.file_uri`. Configure via `GEMINI_API_KEY` env var or `geminiApiKey` in `~/.pi/web-search.json`.
|
|
53
|
+
- **Perplexity** (fallback): uses existing `searchWithPerplexity` for a topic summary when neither Gemini path is available. Output labeled as "Summary (via Perplexity)" so the agent knows it's not a full transcript.
|
|
54
|
+
- YouTube URL detection for all common formats: `/watch?v=`, `youtu.be/`, `/shorts/`, `/live/`, `/embed/`, `/v/`, `m.youtube.com`
|
|
55
|
+
- Configurable via `~/.pi/web-search.json` under `youtube` key (`enabled`, `preferredModel`)
|
|
56
|
+
- Actionable error messages when extraction fails (directs user to sign into Chrome or set API key)
|
|
57
|
+
- YouTube URLs no longer fall through to HTTP/Readability (which returns garbage); returns error instead
|
|
58
|
+
|
|
59
|
+
### New files
|
|
60
|
+
- `chrome-cookies.ts` -- macOS Chrome cookie extraction using Node builtins (`node:crypto`, `node:sqlite`, `child_process`)
|
|
61
|
+
- `gemini-web.ts` -- Gemini Web client ported from surf's gemini-client.cjs (cookie auth, StreamGenerate, model fallback)
|
|
62
|
+
- `gemini-api.ts` -- Gemini REST API client (generateContent, file upload/processing/cleanup for Phase 2)
|
|
63
|
+
- `youtube-extract.ts` -- YouTube extraction orchestrator with three-tier fallback and activity logging
|
|
64
|
+
|
|
65
|
+
## [0.5.1] - 2026-02-02
|
|
66
|
+
|
|
67
|
+
### Added
|
|
68
|
+
- Bundled `librarian` skill -- structured research workflow for open-source libraries with GitHub permalinks, combining fetch_content (cloning), web_search (recent info), and git operations (blame, log, show)
|
|
69
|
+
|
|
70
|
+
### Fixed
|
|
71
|
+
- Session fork event handler was registered as `session_branch` (non-existent event) instead of `session_fork`, meaning forks never triggered cleanup (abort pending fetches, clear clone cache, restore session data)
|
|
72
|
+
- API fallback title for tree URLs with a path (e.g. `/tree/main/src`) now includes the path (`owner/repo - src`), consistent with clone-based results
|
|
73
|
+
- Removed unnecessary export on `getDefaultBranch` (only used internally by `fetchViaApi`)
|
|
6
74
|
|
|
7
75
|
## [0.5.0] - 2026-02-01
|
|
8
76
|
|
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
# Pi Web Access
|
|
6
6
|
|
|
7
|
-
An extension for [Pi coding agent](https://github.com/badlogic/pi-mono/) that gives Pi web capabilities: search via Perplexity AI, fetch and extract content from URLs, clone GitHub repos for local exploration, and
|
|
7
|
+
An extension for [Pi coding agent](https://github.com/badlogic/pi-mono/) that gives Pi web capabilities: search via Perplexity AI or Gemini, fetch and extract content from URLs, clone GitHub repos for local exploration, read PDFs, understand YouTube videos, and analyze local video files.
|
|
8
8
|
|
|
9
9
|
```typescript
|
|
10
10
|
web_search({ query: "TypeScript best practices 2025" })
|
|
@@ -17,31 +17,42 @@ fetch_content({ url: "https://docs.example.com/guide" })
|
|
|
17
17
|
pi install npm:pi-web-access
|
|
18
18
|
```
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
Configure at least one search provider:
|
|
21
21
|
|
|
22
22
|
```bash
|
|
23
|
-
# Option 1:
|
|
24
|
-
export PERPLEXITY_API_KEY="pplx-..."
|
|
23
|
+
# Option 1: Sign into gemini.google.com in Chrome (free, zero config)
|
|
25
24
|
|
|
26
|
-
# Option 2:
|
|
25
|
+
# Option 2: Gemini API key
|
|
26
|
+
echo '{"geminiApiKey": "AIza..."}' > ~/.pi/web-search.json
|
|
27
|
+
|
|
28
|
+
# Option 3: Perplexity API key
|
|
27
29
|
echo '{"perplexityApiKey": "pplx-..."}' > ~/.pi/web-search.json
|
|
28
30
|
```
|
|
29
31
|
|
|
30
|
-
|
|
32
|
+
All three work simultaneously. In `auto` mode (default), the extension tries Perplexity first, then Gemini API, then Gemini Web.
|
|
31
33
|
|
|
32
34
|
**Requires:** Pi v0.37.3+
|
|
33
35
|
|
|
36
|
+
**Optional dependencies** for video frame extraction:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
brew install ffmpeg # frame extraction, video thumbnails, local video duration
|
|
40
|
+
brew install yt-dlp # YouTube frame extraction (stream URL + duration lookup)
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Without these, video content analysis (transcripts via Gemini) still works. The binaries are only needed for extracting visual frames from videos. `ffprobe` (bundled with ffmpeg) is used for local video duration lookup when sampling frames across an entire video.
|
|
44
|
+
|
|
34
45
|
## Tools
|
|
35
46
|
|
|
36
47
|
### web_search
|
|
37
48
|
|
|
38
|
-
Search the web via Perplexity AI. Returns synthesized answer with source citations.
|
|
49
|
+
Search the web via Perplexity AI or Gemini. Returns synthesized answer with source citations.
|
|
39
50
|
|
|
40
51
|
```typescript
|
|
41
52
|
// Single query
|
|
42
53
|
web_search({ query: "rust async programming" })
|
|
43
54
|
|
|
44
|
-
// Multiple queries (
|
|
55
|
+
// Multiple queries (batch)
|
|
45
56
|
web_search({ queries: ["query 1", "query 2"] })
|
|
46
57
|
|
|
47
58
|
// With options
|
|
@@ -52,12 +63,17 @@ web_search({
|
|
|
52
63
|
domainFilter: ["github.com"] // Prefix with - to exclude
|
|
53
64
|
})
|
|
54
65
|
|
|
66
|
+
// Explicit provider
|
|
67
|
+
web_search({ query: "...", provider: "gemini" }) // auto, perplexity, gemini
|
|
68
|
+
|
|
55
69
|
// Fetch full page content (async)
|
|
56
70
|
web_search({ query: "...", includeContent: true })
|
|
57
71
|
```
|
|
58
72
|
|
|
59
73
|
When `includeContent: true`, sources are fetched in the background. Agent receives notification when ready.
|
|
60
74
|
|
|
75
|
+
Provider selection in `auto` mode: Perplexity (if key configured) → Gemini API (if key configured, uses Google Search grounding) → Gemini Web (if signed into Chrome). Gemini API returns structured citations with source mappings. Gemini Web returns markdown with embedded links.
|
|
76
|
+
|
|
61
77
|
### fetch_content
|
|
62
78
|
|
|
63
79
|
Fetch URL(s) and extract readable content as markdown.
|
|
@@ -93,6 +109,63 @@ fetch_content({ url: "https://github.com/big/repo", forceClone: true })
|
|
|
93
109
|
|
|
94
110
|
Repos over 350MB get a lightweight API-based view instead of a full clone. Commit SHA URLs are also handled via the API. Clones are cached for the session -- multiple files from the same repo share one clone, but clones are wiped on session change/shutdown and re-cloned as needed.
|
|
95
111
|
|
|
112
|
+
**YouTube videos:** YouTube URLs are automatically detected and processed via Gemini for full video understanding (visual + audio + transcript). Three-tier fallback:
|
|
113
|
+
|
|
114
|
+
```typescript
|
|
115
|
+
// Returns transcript with timestamps, visual descriptions, chapter markers
|
|
116
|
+
fetch_content({ url: "https://youtube.com/watch?v=dQw4w9WgXcQ" })
|
|
117
|
+
|
|
118
|
+
// Ask a specific question about the video
|
|
119
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", prompt: "What libraries are imported?" })
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
1. **Gemini Web** (primary) -- reads your Chrome session cookies. Zero config if you're signed into Google.
|
|
123
|
+
2. **Gemini API** (secondary) -- uses `GEMINI_API_KEY` env var or `geminiApiKey` in config.
|
|
124
|
+
3. **Perplexity** (fallback) -- topic summary when neither Gemini path is available.
|
|
125
|
+
|
|
126
|
+
YouTube results include the video thumbnail as an image content part, so the agent receives visual context alongside the transcript.
|
|
127
|
+
|
|
128
|
+
Handles all YouTube URL formats: `/watch?v=`, `youtu.be/`, `/shorts/`, `/live/`, `/embed/`, `/v/`, `m.youtube.com`. Playlist-only URLs fall through to normal extraction.
|
|
129
|
+
|
|
130
|
+
**Local video files:** Pass a file path to analyze video content via Gemini. Supports MP4, MOV, WebM, AVI, and other common formats. Max 50MB (configurable).
|
|
131
|
+
|
|
132
|
+
```typescript
|
|
133
|
+
// Analyze a screen recording
|
|
134
|
+
fetch_content({ url: "/path/to/recording.mp4" })
|
|
135
|
+
|
|
136
|
+
// Ask about specific content in the video
|
|
137
|
+
fetch_content({ url: "./demo.mov", prompt: "What error message appears on screen?" })
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Two-tier fallback: Gemini API (needs key, proper Files API with MIME types) → Gemini Web (free, needs Chrome login). File paths are detected by prefix (`/`, `./`, `../`, `file://`). If ffmpeg is installed, a frame from the video is included as a thumbnail image alongside the analysis.
|
|
141
|
+
|
|
142
|
+
**Video frame extraction (YouTube + local):** Use `timestamp` and/or `frames` to pull visuals for scanning.
|
|
143
|
+
|
|
144
|
+
```typescript
|
|
145
|
+
// Single frame at an exact time
|
|
146
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41" })
|
|
147
|
+
|
|
148
|
+
// Range scan (default 6 frames)
|
|
149
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41-25:00" })
|
|
150
|
+
|
|
151
|
+
// Custom density across a range
|
|
152
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41-25:00", frames: 3 })
|
|
153
|
+
|
|
154
|
+
// N frames at 5s intervals starting from a single timestamp
|
|
155
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41", frames: 5 })
|
|
156
|
+
|
|
157
|
+
// Whole-video sampling (no timestamp)
|
|
158
|
+
fetch_content({ url: "https://youtube.com/watch?v=abc", frames: 6 })
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
The same `timestamp`/`frames` syntax works with local file paths (e.g. `/path/to/video.mp4`).
|
|
162
|
+
|
|
163
|
+
Requirements: YouTube frame extraction needs `yt-dlp` + `ffmpeg`. Local video frames need `ffmpeg` (and `ffprobe`, bundled with ffmpeg, for whole-video sampling).
|
|
164
|
+
|
|
165
|
+
Common errors include missing binaries, private/age-restricted videos, region blocks, live streams, expired stream URLs (403), and timestamps beyond the video duration.
|
|
166
|
+
|
|
167
|
+
**Gemini extraction fallback:** When Readability fails or a site blocks bot traffic (403, 429), the extension automatically retries via Gemini URL Context (API) or Gemini Web. This handles SPAs, JS-heavy pages, and anti-bot protections that the HTTP pipeline can't.
|
|
168
|
+
|
|
96
169
|
**PDF handling:** When fetching a PDF URL, the extension extracts text and saves it as a markdown file in `~/Downloads/`. The agent can then use `read` to access specific sections without loading 200K+ chars into context.
|
|
97
170
|
|
|
98
171
|
### get_search_content
|
|
@@ -140,6 +213,16 @@ Tool calls render with real-time progress:
|
|
|
140
213
|
└───────────────────────────────────────────────────────────────────┘
|
|
141
214
|
```
|
|
142
215
|
|
|
216
|
+
## Skills
|
|
217
|
+
|
|
218
|
+
Skills are bundled with the extension and available automatically after install -- no extra setup needed.
|
|
219
|
+
|
|
220
|
+
### librarian
|
|
221
|
+
|
|
222
|
+
Structured research workflow for open-source libraries with evidence-backed answers and GitHub permalinks. Loaded automatically when the task involves understanding library internals, finding implementation details, or tracing code history.
|
|
223
|
+
|
|
224
|
+
Combines `fetch_content` (GitHub cloning), `web_search` (recent info), and git operations (blame, log, show). Pi auto-detects when to load it based on your prompt. If you have [pi-skill-palette](https://github.com/nicobailon/pi-skill-palette) installed, you can also load it explicitly via `/skill:librarian`.
|
|
225
|
+
|
|
143
226
|
## Commands
|
|
144
227
|
|
|
145
228
|
### /search
|
|
@@ -151,7 +234,11 @@ Browse stored search results interactively.
|
|
|
151
234
|
### fetch_content routing
|
|
152
235
|
|
|
153
236
|
```
|
|
154
|
-
fetch_content(
|
|
237
|
+
fetch_content(url_or_path, prompt?)
|
|
238
|
+
│
|
|
239
|
+
├── Local video file? ──→ Gemini API → Gemini Web
|
|
240
|
+
│ ↓
|
|
241
|
+
│ Video analysis (prompt forwarded)
|
|
155
242
|
│
|
|
156
243
|
├── github.com code URL? ──→ Clone repo (gh/git --depth 1)
|
|
157
244
|
│ │
|
|
@@ -167,30 +254,41 @@ fetch_content(url)
|
|
|
167
254
|
│ Return content + local
|
|
168
255
|
│ path for read/bash
|
|
169
256
|
│
|
|
257
|
+
├── YouTube URL? ──→ Gemini Web → Gemini API → Perplexity
|
|
258
|
+
│ ↓ (prompt forwarded)
|
|
259
|
+
│ Transcript + visual descriptions
|
|
260
|
+
│
|
|
170
261
|
├── PDF? ──→ unpdf → Save to ~/Downloads/
|
|
171
262
|
│
|
|
172
|
-
├── Plain text? ──→ Return directly
|
|
263
|
+
├── Plain text/markdown/JSON? ──→ Return directly
|
|
173
264
|
│
|
|
174
265
|
└── HTML ──→ Readability → Markdown
|
|
175
266
|
│
|
|
176
267
|
[if fails]
|
|
177
268
|
↓
|
|
178
269
|
RSC Parser → Markdown
|
|
270
|
+
│
|
|
271
|
+
[if all fail]
|
|
272
|
+
↓
|
|
273
|
+
Gemini URL Context → Gemini Web extraction
|
|
179
274
|
```
|
|
180
275
|
|
|
181
|
-
### web_search
|
|
276
|
+
### web_search routing
|
|
182
277
|
|
|
183
278
|
```
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
279
|
+
web_search(query, provider?)
|
|
280
|
+
│
|
|
281
|
+
├── provider = "perplexity" ──→ Perplexity API
|
|
282
|
+
├── provider = "gemini" ──→ Gemini API → Gemini Web
|
|
283
|
+
└── provider = "auto"
|
|
284
|
+
├── Perplexity key? ──→ Perplexity API
|
|
285
|
+
├── Gemini API key? ──→ Gemini API (grounded search)
|
|
286
|
+
├── Chrome cookies? ──→ Gemini Web (grounded search)
|
|
287
|
+
└── Error
|
|
192
288
|
```
|
|
193
289
|
|
|
290
|
+
When `includeContent: true`, sources are fetched in the background using the fetch_content routing above, and the agent receives a notification when ready.
|
|
291
|
+
|
|
194
292
|
## Configuration
|
|
195
293
|
|
|
196
294
|
All config lives in `~/.pi/web-search.json`:
|
|
@@ -198,16 +296,29 @@ All config lives in `~/.pi/web-search.json`:
|
|
|
198
296
|
```json
|
|
199
297
|
{
|
|
200
298
|
"perplexityApiKey": "pplx-...",
|
|
299
|
+
"geminiApiKey": "AIza...",
|
|
300
|
+
"searchProvider": "auto",
|
|
201
301
|
"githubClone": {
|
|
202
302
|
"enabled": true,
|
|
203
303
|
"maxRepoSizeMB": 350,
|
|
204
304
|
"cloneTimeoutSeconds": 30,
|
|
205
305
|
"clonePath": "/tmp/pi-github-repos"
|
|
306
|
+
},
|
|
307
|
+
"youtube": {
|
|
308
|
+
"enabled": true,
|
|
309
|
+
"preferredModel": "gemini-2.5-flash"
|
|
310
|
+
},
|
|
311
|
+
"video": {
|
|
312
|
+
"enabled": true,
|
|
313
|
+
"preferredModel": "gemini-2.5-flash",
|
|
314
|
+
"maxSizeMB": 50
|
|
206
315
|
}
|
|
207
316
|
}
|
|
208
317
|
```
|
|
209
318
|
|
|
210
|
-
All
|
|
319
|
+
All fields are optional. `GEMINI_API_KEY` and `PERPLEXITY_API_KEY` env vars take precedence over config file values. Set `"enabled": false` under `githubClone`, `youtube`, or `video` to disable those features.
|
|
320
|
+
|
|
321
|
+
`searchProvider` controls `web_search` default: `"auto"` (Perplexity → Gemini API → Gemini Web), `"perplexity"`, or `"gemini"` (API → Web).
|
|
211
322
|
|
|
212
323
|
## Rate Limits
|
|
213
324
|
|
|
@@ -221,22 +332,35 @@ All `githubClone` fields are optional with the defaults shown above. Set `"enabl
|
|
|
221
332
|
|------|---------|
|
|
222
333
|
| `index.ts` | Extension entry, tool definitions, commands, widget |
|
|
223
334
|
| `perplexity.ts` | Perplexity API client, rate limiting |
|
|
224
|
-
| `
|
|
335
|
+
| `gemini-search.ts` | Gemini search providers (Web + API with grounding), search routing |
|
|
336
|
+
| `extract.ts` | URL/file path routing, HTTP extraction, Gemini fallback orchestration |
|
|
337
|
+
| `gemini-url-context.ts` | Gemini URL Context + Web extraction fallbacks |
|
|
338
|
+
| `video-extract.ts` | Local video file detection, upload, Gemini Web/API analysis |
|
|
339
|
+
| `youtube-extract.ts` | YouTube URL detection, three-tier extraction orchestrator |
|
|
340
|
+
| `chrome-cookies.ts` | macOS Chrome cookie extraction (Keychain + SQLite) |
|
|
341
|
+
| `gemini-web.ts` | Gemini Web client (cookie auth, StreamGenerate) |
|
|
342
|
+
| `gemini-api.ts` | Gemini REST API client (generateContent, file upload) |
|
|
343
|
+
| `utils.ts` | Shared formatting (`formatSeconds`) and error helpers for frame extraction |
|
|
225
344
|
| `github-extract.ts` | GitHub URL parser, clone cache, content generation |
|
|
226
345
|
| `github-api.ts` | GitHub API fallback for oversized repos and commit SHAs |
|
|
227
346
|
| `pdf-extract.ts` | PDF text extraction, saves to markdown |
|
|
228
347
|
| `rsc-extract.ts` | RSC flight data parser for Next.js pages |
|
|
229
348
|
| `storage.ts` | Session-aware result storage |
|
|
230
349
|
| `activity.ts` | Activity tracking for observability widget |
|
|
350
|
+
| `skills/librarian/` | Bundled skill for library research with permalinks |
|
|
231
351
|
|
|
232
352
|
## Limitations
|
|
233
353
|
|
|
234
|
-
- Content extraction works best on article-style pages
|
|
235
|
-
-
|
|
354
|
+
- Content extraction works best on article-style pages; JS-heavy sites fall back to Gemini extraction when available
|
|
355
|
+
- Gemini extraction fallback requires either a Gemini API key or Chrome login to Google
|
|
236
356
|
- PDFs are extracted as text (no OCR for scanned documents)
|
|
237
357
|
- Max response size: 20MB for PDFs, 5MB for HTML
|
|
238
358
|
- Max inline content: 30,000 chars per URL (larger content stored for retrieval via get_search_content)
|
|
239
359
|
- GitHub cloning requires `gh` CLI for private repos (public repos fall back to `git clone`)
|
|
240
360
|
- GitHub branch names with slashes (e.g. `feature/foo`) may resolve the wrong file path; the clone still succeeds and the agent can navigate manually
|
|
241
361
|
- Non-code GitHub URLs (issues, PRs, wiki, etc.) fall through to normal Readability extraction
|
|
362
|
+
- YouTube extraction via Gemini Web requires macOS (Chrome cookie decryption is OS-specific); other platforms fall through to Gemini API or Perplexity
|
|
363
|
+
- YouTube private/age-restricted videos may fail on all paths
|
|
364
|
+
- Gemini can process videos up to ~1 hour at default resolution; longer videos may be truncated
|
|
365
|
+
- First-time Chrome cookie access may trigger a macOS Keychain permission dialog
|
|
242
366
|
- Requires Pi restart after config file changes
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
import { execFile } from "node:child_process";
|
|
2
|
+
import { pbkdf2Sync, createDecipheriv } from "node:crypto";
|
|
3
|
+
import { copyFileSync, existsSync, mkdtempSync, rmSync } from "node:fs";
|
|
4
|
+
import { tmpdir, homedir, platform } from "node:os";
|
|
5
|
+
import { join } from "node:path";
|
|
6
|
+
|
|
7
|
+
export type CookieMap = Record<string, string>;
|
|
8
|
+
|
|
9
|
+
const GOOGLE_ORIGINS = [
|
|
10
|
+
"https://gemini.google.com",
|
|
11
|
+
"https://accounts.google.com",
|
|
12
|
+
"https://www.google.com",
|
|
13
|
+
];
|
|
14
|
+
|
|
15
|
+
const ALL_COOKIE_NAMES = new Set([
|
|
16
|
+
"__Secure-1PSID",
|
|
17
|
+
"__Secure-1PSIDTS",
|
|
18
|
+
"__Secure-1PSIDCC",
|
|
19
|
+
"__Secure-1PAPISID",
|
|
20
|
+
"NID",
|
|
21
|
+
"AEC",
|
|
22
|
+
"SOCS",
|
|
23
|
+
"__Secure-BUCKET",
|
|
24
|
+
"__Secure-ENID",
|
|
25
|
+
"SID",
|
|
26
|
+
"HSID",
|
|
27
|
+
"SSID",
|
|
28
|
+
"APISID",
|
|
29
|
+
"SAPISID",
|
|
30
|
+
"__Secure-3PSID",
|
|
31
|
+
"__Secure-3PSIDTS",
|
|
32
|
+
"__Secure-3PAPISID",
|
|
33
|
+
"SIDCC",
|
|
34
|
+
]);
|
|
35
|
+
|
|
36
|
+
const CHROME_COOKIES_PATH = join(
|
|
37
|
+
homedir(),
|
|
38
|
+
"Library/Application Support/Google/Chrome/Default/Cookies",
|
|
39
|
+
);
|
|
40
|
+
|
|
41
|
+
export async function getGoogleCookies(): Promise<{ cookies: CookieMap; warnings: string[] } | null> {
|
|
42
|
+
if (platform() !== "darwin") return null;
|
|
43
|
+
if (!existsSync(CHROME_COOKIES_PATH)) return null;
|
|
44
|
+
|
|
45
|
+
const warnings: string[] = [];
|
|
46
|
+
|
|
47
|
+
const password = await readKeychainPassword();
|
|
48
|
+
if (!password) {
|
|
49
|
+
warnings.push("Could not read Chrome Safe Storage password from Keychain");
|
|
50
|
+
return { cookies: {}, warnings };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const key = pbkdf2Sync(password, "saltysalt", 1003, 16, "sha1");
|
|
54
|
+
const tempDir = mkdtempSync(join(tmpdir(), "pi-chrome-cookies-"));
|
|
55
|
+
|
|
56
|
+
try {
|
|
57
|
+
const tempDb = join(tempDir, "Cookies");
|
|
58
|
+
copyFileSync(CHROME_COOKIES_PATH, tempDb);
|
|
59
|
+
copySidecar(CHROME_COOKIES_PATH, tempDb, "-wal");
|
|
60
|
+
copySidecar(CHROME_COOKIES_PATH, tempDb, "-shm");
|
|
61
|
+
|
|
62
|
+
const metaVersion = await readMetaVersion(tempDb);
|
|
63
|
+
const stripHash = metaVersion >= 24;
|
|
64
|
+
|
|
65
|
+
const hosts = GOOGLE_ORIGINS.map((o) => new URL(o).hostname);
|
|
66
|
+
const rows = await queryCookieRows(tempDb, hosts);
|
|
67
|
+
if (!rows) {
|
|
68
|
+
warnings.push("Failed to query Chrome cookie database");
|
|
69
|
+
return { cookies: {}, warnings };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const cookies: CookieMap = {};
|
|
73
|
+
for (const row of rows) {
|
|
74
|
+
const name = row.name as string;
|
|
75
|
+
if (!ALL_COOKIE_NAMES.has(name)) continue;
|
|
76
|
+
if (cookies[name]) continue;
|
|
77
|
+
|
|
78
|
+
let value = typeof row.value === "string" && row.value.length > 0 ? row.value : null;
|
|
79
|
+
if (!value) {
|
|
80
|
+
const encrypted = row.encrypted_value;
|
|
81
|
+
if (encrypted instanceof Uint8Array) {
|
|
82
|
+
value = decryptCookieValue(encrypted, key, stripHash);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
if (value) cookies[name] = value;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return { cookies, warnings };
|
|
89
|
+
} finally {
|
|
90
|
+
rmSync(tempDir, { recursive: true, force: true });
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function decryptCookieValue(encrypted: Uint8Array, key: Buffer, stripHash: boolean): string | null {
|
|
95
|
+
const buf = Buffer.from(encrypted);
|
|
96
|
+
if (buf.length < 3) return null;
|
|
97
|
+
|
|
98
|
+
const prefix = buf.subarray(0, 3).toString("utf8");
|
|
99
|
+
if (!/^v\d\d$/.test(prefix)) return null;
|
|
100
|
+
|
|
101
|
+
const ciphertext = buf.subarray(3);
|
|
102
|
+
if (!ciphertext.length) return "";
|
|
103
|
+
|
|
104
|
+
try {
|
|
105
|
+
const iv = Buffer.alloc(16, 0x20);
|
|
106
|
+
const decipher = createDecipheriv("aes-128-cbc", key, iv);
|
|
107
|
+
decipher.setAutoPadding(false);
|
|
108
|
+
const plaintext = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
|
|
109
|
+
const unpadded = removePkcs7Padding(plaintext);
|
|
110
|
+
const bytes = stripHash && unpadded.length >= 32 ? unpadded.subarray(32) : unpadded;
|
|
111
|
+
const decoded = new TextDecoder("utf-8", { fatal: true }).decode(bytes);
|
|
112
|
+
let i = 0;
|
|
113
|
+
while (i < decoded.length && decoded.charCodeAt(i) < 0x20) i++;
|
|
114
|
+
return decoded.slice(i);
|
|
115
|
+
} catch {
|
|
116
|
+
return null;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function removePkcs7Padding(buf: Buffer): Buffer {
|
|
121
|
+
if (!buf.length) return buf;
|
|
122
|
+
const padding = buf[buf.length - 1];
|
|
123
|
+
if (!padding || padding > 16) return buf;
|
|
124
|
+
return buf.subarray(0, buf.length - padding);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function readKeychainPassword(): Promise<string | null> {
|
|
128
|
+
return new Promise((resolve) => {
|
|
129
|
+
execFile(
|
|
130
|
+
"security",
|
|
131
|
+
["find-generic-password", "-w", "-a", "Chrome", "-s", "Chrome Safe Storage"],
|
|
132
|
+
{ timeout: 5000 },
|
|
133
|
+
(err, stdout) => {
|
|
134
|
+
if (err) { resolve(null); return; }
|
|
135
|
+
resolve(stdout.trim() || null);
|
|
136
|
+
},
|
|
137
|
+
);
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
let sqliteModule: typeof import("node:sqlite") | null = null;
|
|
142
|
+
|
|
143
|
+
async function importSqlite(): Promise<typeof import("node:sqlite") | null> {
|
|
144
|
+
if (sqliteModule) return sqliteModule;
|
|
145
|
+
const orig = process.emitWarning.bind(process);
|
|
146
|
+
process.emitWarning = ((warning: string | Error, ...args: unknown[]) => {
|
|
147
|
+
const msg = typeof warning === "string" ? warning : warning?.message ?? "";
|
|
148
|
+
if (msg.includes("SQLite is an experimental feature")) return;
|
|
149
|
+
return (orig as Function)(warning, ...args);
|
|
150
|
+
}) as typeof process.emitWarning;
|
|
151
|
+
try {
|
|
152
|
+
sqliteModule = await import("node:sqlite");
|
|
153
|
+
return sqliteModule;
|
|
154
|
+
} catch {
|
|
155
|
+
return null;
|
|
156
|
+
} finally {
|
|
157
|
+
process.emitWarning = orig;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function supportsReadBigInts(): boolean {
|
|
162
|
+
const [major, minor] = process.versions.node.split(".").map(Number);
|
|
163
|
+
if (major > 24) return true;
|
|
164
|
+
if (major < 24) return false;
|
|
165
|
+
return minor >= 4;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
async function readMetaVersion(dbPath: string): Promise<number> {
|
|
169
|
+
const sqlite = await importSqlite();
|
|
170
|
+
if (!sqlite) return 0;
|
|
171
|
+
const opts: Record<string, unknown> = { readOnly: true };
|
|
172
|
+
if (supportsReadBigInts()) opts.readBigInts = true;
|
|
173
|
+
const db = new sqlite.DatabaseSync(dbPath, opts);
|
|
174
|
+
try {
|
|
175
|
+
const rows = db.prepare("SELECT value FROM meta WHERE key = 'version'").all() as Array<Record<string, unknown>>;
|
|
176
|
+
const val = rows[0]?.value;
|
|
177
|
+
if (typeof val === "number") return Math.floor(val);
|
|
178
|
+
if (typeof val === "bigint") return Number(val);
|
|
179
|
+
if (typeof val === "string") return parseInt(val, 10) || 0;
|
|
180
|
+
return 0;
|
|
181
|
+
} catch {
|
|
182
|
+
return 0;
|
|
183
|
+
} finally {
|
|
184
|
+
db.close();
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
async function queryCookieRows(
|
|
189
|
+
dbPath: string,
|
|
190
|
+
hosts: string[],
|
|
191
|
+
): Promise<Array<Record<string, unknown>> | null> {
|
|
192
|
+
const sqlite = await importSqlite();
|
|
193
|
+
if (!sqlite) return null;
|
|
194
|
+
|
|
195
|
+
const clauses: string[] = [];
|
|
196
|
+
for (const host of hosts) {
|
|
197
|
+
for (const candidate of expandHosts(host)) {
|
|
198
|
+
const esc = candidate.replaceAll("'", "''");
|
|
199
|
+
clauses.push(`host_key = '${esc}'`);
|
|
200
|
+
clauses.push(`host_key = '.${esc}'`);
|
|
201
|
+
clauses.push(`host_key LIKE '%.${esc}'`);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
const where = clauses.join(" OR ");
|
|
205
|
+
|
|
206
|
+
const opts: Record<string, unknown> = { readOnly: true };
|
|
207
|
+
if (supportsReadBigInts()) opts.readBigInts = true;
|
|
208
|
+
const db = new sqlite.DatabaseSync(dbPath, opts);
|
|
209
|
+
try {
|
|
210
|
+
return db
|
|
211
|
+
.prepare(
|
|
212
|
+
`SELECT name, value, host_key, encrypted_value FROM cookies WHERE (${where}) ORDER BY expires_utc DESC`,
|
|
213
|
+
)
|
|
214
|
+
.all() as Array<Record<string, unknown>>;
|
|
215
|
+
} catch {
|
|
216
|
+
return null;
|
|
217
|
+
} finally {
|
|
218
|
+
db.close();
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
function expandHosts(host: string): string[] {
|
|
223
|
+
const parts = host.split(".").filter(Boolean);
|
|
224
|
+
if (parts.length <= 1) return [host];
|
|
225
|
+
const candidates = new Set<string>();
|
|
226
|
+
candidates.add(host);
|
|
227
|
+
for (let i = 1; i <= parts.length - 2; i++) {
|
|
228
|
+
const c = parts.slice(i).join(".");
|
|
229
|
+
if (c) candidates.add(c);
|
|
230
|
+
}
|
|
231
|
+
return Array.from(candidates);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function copySidecar(srcDb: string, targetDb: string, suffix: string): void {
|
|
235
|
+
const sidecar = `${srcDb}${suffix}`;
|
|
236
|
+
if (!existsSync(sidecar)) return;
|
|
237
|
+
try {
|
|
238
|
+
copyFileSync(sidecar, `${targetDb}${suffix}`);
|
|
239
|
+
} catch {}
|
|
240
|
+
}
|