pi-web-access 0.5.1 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,7 +2,65 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
- ## [Unreleased]
5
+ ## [0.7.0] - 2026-02-03
6
+
7
+ ### Added
8
+ - **Multi-provider web search**: `web_search` now supports Perplexity, Gemini API (with Google Search grounding), and Gemini Web (cookie auth) as search providers. New `provider` parameter (`auto`, `perplexity`, `gemini`) controls selection. In `auto` mode (default): Perplexity → Gemini API → Gemini Web. Backwards-compatible — existing Perplexity users see no change.
9
+ - **Gemini API grounded search**: Structured citations via `groundingMetadata` with source URIs and text-to-source mappings. Google proxy URLs are resolved via HEAD redirects. Configured via `GEMINI_API_KEY` or `geminiApiKey` in config.
10
+ - **Gemini Web search**: Zero-config web search for users signed into Google in Chrome. Prompt instructs Gemini to cite sources; URLs extracted from markdown response.
11
+ - **Gemini extraction fallback**: When `fetch_content` fails (HTTP 403/429, Readability fails, network errors), automatically retries via Gemini URL Context API then Gemini Web extraction. Each has an independent 60s timeout. Handles SPAs, JS-heavy pages, and anti-bot protections.
12
+ - **Local video file analysis**: `fetch_content` accepts file paths to video files (MP4, MOV, WebM, AVI, etc.). Detected by path prefix (`/`, `./`, `../`, `file://`), validated by extension and 50MB limit. Two-tier fallback: Gemini API (resumable upload via Files API with proper MIME types, poll-until-active and cleanup) → Gemini Web (free, cookie auth).
13
+ - **Video prompt parameter**: `fetch_content` gains optional `prompt` parameter for asking specific questions about video content. Threads through YouTube and local video extraction. Without prompt, uses default extraction (transcript + visual descriptions).
14
+ - **Video thumbnails**: YouTube results include the video thumbnail (fetched from `img.youtube.com`). Local video results include a frame extracted via ffmpeg (at ~1 second). Returned as image content parts alongside text — the agent sees the thumbnail as vision context.
15
+ - **Configurable frame extraction**: `frames` parameter (1-12) on `fetch_content` for pulling visual frames from YouTube or local video. Works in five modes: frames alone (sample across entire video), single timestamp (one frame), single+frames (N frames at 5s intervals), range (default 6 frames), range+frames (N frames across the range). Endpoint-inclusive distribution with 5-second minimum spacing.
16
+ - **Video duration in responses**: Frame extraction results include the video duration for context.
17
+ - `searchProvider` config option in `~/.pi/web-search.json` for global provider default
18
+ - `video` config section: `enabled`, `preferredModel`, `maxSizeMB`
19
+
20
+ ### Changed
21
+ - `PerplexityResponse` renamed to `SearchResponse` (shared interface for all search providers)
22
+ - Extracted HTTP pipeline from `extractContent` into `extractViaHttp` for cleaner Gemini fallback orchestration
23
+ - `getApiKey()`, `API_BASE`, `DEFAULT_MODEL` exported from `gemini-api.ts` for use by search and URL Context modules
24
+ - `isPerplexityAvailable()` added to `perplexity.ts` as non-throwing API key check
25
+ - Content-type routing in `extract.ts`: only `text/html` and `application/xhtml+xml` go through Readability; all other text types (`text/markdown`, `application/json`, `text/csv`, etc.) returned directly. Fixes the OpenAI cookbook `.md` URL that returned "Untitled (30 chars)".
26
+ - Title extraction for non-HTML content: `extractTextTitle()` pulls from markdown `#`/`##` headings, falls back to URL filename
27
+ - Combined `yt-dlp --print duration -g` call fetches stream URL and duration in a single invocation, reused across all frame extraction paths via `streamInfo` passthrough
28
+ - Shared helpers in `utils.ts` (`formatSeconds`, error mapping) eliminate circular imports and duplication across youtube-extract.ts and video-extract.ts
29
+
30
+ ### Fixed
31
+ - `fetch_content` TUI rendered `undefined/undefined URLs` during progress updates (renderResult didn't handle `isPartial`, now shows a progress bar like `web_search` does)
32
+ - RSC extractor produced malformed markdown for `<pre><code>` blocks (backticks inside fenced code blocks) -- extremely common on Next.js documentation pages
33
+ - Multi-URL fetch failures rendered in green "success" color even when 0 URLs succeeded (now red)
34
+ - `web_search` queries parameter described as "parallel" in schema but execution is sequential (changed to "batch"; `urls` correctly remains "parallel")
35
+ - Proper error propagation for frame extraction: missing binaries (yt-dlp, ffmpeg, ffprobe), private/age-restricted/region-blocked videos, expired stream URLs (403), timestamp-exceeds-duration, and timeouts all produce specific user-facing messages instead of silent nulls
36
+ - `isTimeoutError` now detects `execFileSync` timeouts via the `killed` flag (SIGTERM from timeout was previously unrecognized)
37
+ - Float video durations (e.g. 15913.7s from yt-dlp) no longer produce out-of-range timestamps — durations are floored before computing frame positions
38
+ - `parseTimestamp` consistently floors results across both bare-number ("90.5" → 90) and colon ("1:30.5" → 90) paths — previously the colon path returned floats
39
+ - YouTube thumbnail assignment no longer sets `null` on the optional `thumbnail` field when fetch fails (was a type mismatch; now only assigned on success)
40
+
41
+ ### New files
42
+ - `gemini-search.ts` -- search routing + Gemini Web/API search providers with grounding
43
+ - `gemini-url-context.ts` -- URL Context API extraction + Gemini Web extraction fallback
44
+ - `video-extract.ts` -- local video file detection, Gemini Web/API analysis with Files API upload
45
+ - `utils.ts` -- shared formatting and error helpers for frame extraction
46
+
47
+ ## [0.6.0] - 2026-02-02
48
+
49
+ ### Added
50
+ - YouTube video understanding in `fetch_content` via three-tier fallback chain:
51
+ - **Gemini Web** (primary): reads Chrome session cookies from macOS Keychain + SQLite, authenticates to gemini.google.com, sends YouTube URL via StreamGenerate endpoint. Full visual + audio understanding with timestamps. Zero config needed if signed into Google in Chrome.
52
+ - **Gemini API** (secondary): direct REST calls with `GEMINI_API_KEY`. YouTube URLs passed as `file_data.file_uri`. Configure via `GEMINI_API_KEY` env var or `geminiApiKey` in `~/.pi/web-search.json`.
53
+ - **Perplexity** (fallback): uses existing `searchWithPerplexity` for a topic summary when neither Gemini path is available. Output labeled as "Summary (via Perplexity)" so the agent knows it's not a full transcript.
54
+ - YouTube URL detection for all common formats: `/watch?v=`, `youtu.be/`, `/shorts/`, `/live/`, `/embed/`, `/v/`, `m.youtube.com`
55
+ - Configurable via `~/.pi/web-search.json` under `youtube` key (`enabled`, `preferredModel`)
56
+ - Actionable error messages when extraction fails (directs user to sign into Chrome or set API key)
57
+ - YouTube URLs no longer fall through to HTTP/Readability (which returns garbage); returns error instead
58
+
59
+ ### New files
60
+ - `chrome-cookies.ts` -- macOS Chrome cookie extraction using Node builtins (`node:crypto`, `node:sqlite`, `child_process`)
61
+ - `gemini-web.ts` -- Gemini Web client ported from surf's gemini-client.cjs (cookie auth, StreamGenerate, model fallback)
62
+ - `gemini-api.ts` -- Gemini REST API client (generateContent, file upload/processing/cleanup for Phase 2)
63
+ - `youtube-extract.ts` -- YouTube extraction orchestrator with three-tier fallback and activity logging
6
64
 
7
65
  ## [0.5.1] - 2026-02-02
8
66
 
package/README.md CHANGED
@@ -4,7 +4,9 @@
4
4
 
5
5
  # Pi Web Access
6
6
 
7
- An extension for [Pi coding agent](https://github.com/badlogic/pi-mono/) that gives Pi web capabilities: search via Perplexity AI, fetch and extract content from URLs, clone GitHub repos for local exploration, and read PDFs.
7
+ An extension for [Pi coding agent](https://github.com/badlogic/pi-mono/) that gives Pi web capabilities: search via Perplexity AI or Gemini, fetch and extract content from URLs, clone GitHub repos for local exploration, read PDFs, understand YouTube videos, and analyze local video files.
8
+
9
+ https://github.com/user-attachments/assets/cac6a17a-1eeb-4dde-9818-cdf85d8ea98f
8
10
 
9
11
  ```typescript
10
12
  web_search({ query: "TypeScript best practices 2025" })
@@ -17,31 +19,42 @@ fetch_content({ url: "https://docs.example.com/guide" })
17
19
  pi install npm:pi-web-access
18
20
  ```
19
21
 
20
- Add your Perplexity API key:
22
+ **Zero config if you're signed into Google in Chrome.** The extension reads your Chrome session cookies to access Gemini — no API keys needed. This gives you web search, YouTube video understanding, page extraction fallbacks, and local video analysis for free.
21
23
 
22
- ```bash
23
- # Option 1: Environment variable
24
- export PERPLEXITY_API_KEY="pplx-..."
24
+ If you're not signed into Chrome, or want to use a different provider, add API keys to `~/.pi/web-search.json`:
25
+
26
+ ```json
27
+ { "geminiApiKey": "AIza..." }
28
+ ```
25
29
 
26
- # Option 2: Config file
27
- echo '{"perplexityApiKey": "pplx-..."}' > ~/.pi/web-search.json
30
+ ```json
31
+ { "perplexityApiKey": "pplx-..." }
28
32
  ```
29
33
 
30
- Get a key at https://perplexity.ai/settings/api
34
+ You can configure both. In `auto` mode (default), the extension tries Perplexity first (if configured), then Gemini API, then Gemini Web via Chrome cookies.
31
35
 
32
36
  **Requires:** Pi v0.37.3+
33
37
 
38
+ **Optional dependencies** for video frame extraction:
39
+
40
+ ```bash
41
+ brew install ffmpeg # frame extraction, video thumbnails, local video duration
42
+ brew install yt-dlp # YouTube frame extraction (stream URL + duration lookup)
43
+ ```
44
+
45
+ Without these, video content analysis (transcripts via Gemini) still works. The binaries are only needed for extracting visual frames from videos. `ffprobe` (bundled with ffmpeg) is used for local video duration lookup when sampling frames across an entire video.
46
+
34
47
  ## Tools
35
48
 
36
49
  ### web_search
37
50
 
38
- Search the web via Perplexity AI. Returns synthesized answer with source citations.
51
+ Search the web via Perplexity AI or Gemini. Returns synthesized answer with source citations.
39
52
 
40
53
  ```typescript
41
54
  // Single query
42
55
  web_search({ query: "rust async programming" })
43
56
 
44
- // Multiple queries (parallel)
57
+ // Multiple queries (batch)
45
58
  web_search({ queries: ["query 1", "query 2"] })
46
59
 
47
60
  // With options
@@ -52,12 +65,17 @@ web_search({
52
65
  domainFilter: ["github.com"] // Prefix with - to exclude
53
66
  })
54
67
 
68
+ // Explicit provider
69
+ web_search({ query: "...", provider: "gemini" }) // auto, perplexity, gemini
70
+
55
71
  // Fetch full page content (async)
56
72
  web_search({ query: "...", includeContent: true })
57
73
  ```
58
74
 
59
75
  When `includeContent: true`, sources are fetched in the background. Agent receives notification when ready.
60
76
 
77
+ Provider selection in `auto` mode: Perplexity (if key configured) → Gemini API (if key configured, uses Google Search grounding) → Gemini Web (if signed into Chrome). Gemini API returns structured citations with source mappings. Gemini Web returns markdown with embedded links.
78
+
61
79
  ### fetch_content
62
80
 
63
81
  Fetch URL(s) and extract readable content as markdown.
@@ -93,6 +111,63 @@ fetch_content({ url: "https://github.com/big/repo", forceClone: true })
93
111
 
94
112
  Repos over 350MB get a lightweight API-based view instead of a full clone. Commit SHA URLs are also handled via the API. Clones are cached for the session -- multiple files from the same repo share one clone, but clones are wiped on session change/shutdown and re-cloned as needed.
95
113
 
114
+ **YouTube videos:** YouTube URLs are automatically detected and processed via Gemini for full video understanding (visual + audio + transcript). Three-tier fallback:
115
+
116
+ ```typescript
117
+ // Returns transcript with timestamps, visual descriptions, chapter markers
118
+ fetch_content({ url: "https://youtube.com/watch?v=dQw4w9WgXcQ" })
119
+
120
+ // Ask a specific question about the video
121
+ fetch_content({ url: "https://youtube.com/watch?v=abc", prompt: "What libraries are imported?" })
122
+ ```
123
+
124
+ 1. **Gemini Web** (primary) -- reads your Chrome session cookies. Zero config if you're signed into Google.
125
+ 2. **Gemini API** (secondary) -- uses `GEMINI_API_KEY` env var or `geminiApiKey` in config.
126
+ 3. **Perplexity** (fallback) -- topic summary when neither Gemini path is available.
127
+
128
+ YouTube results include the video thumbnail as an image content part, so the agent receives visual context alongside the transcript.
129
+
130
+ Handles all YouTube URL formats: `/watch?v=`, `youtu.be/`, `/shorts/`, `/live/`, `/embed/`, `/v/`, `m.youtube.com`. Playlist-only URLs fall through to normal extraction.
131
+
132
+ **Local video files:** Pass a file path to analyze video content via Gemini. Supports MP4, MOV, WebM, AVI, and other common formats. Max 50MB (configurable).
133
+
134
+ ```typescript
135
+ // Analyze a screen recording
136
+ fetch_content({ url: "/path/to/recording.mp4" })
137
+
138
+ // Ask about specific content in the video
139
+ fetch_content({ url: "./demo.mov", prompt: "What error message appears on screen?" })
140
+ ```
141
+
142
+ Two-tier fallback: Gemini API (needs key, proper Files API with MIME types) → Gemini Web (free, needs Chrome login). File paths are detected by prefix (`/`, `./`, `../`, `file://`). If ffmpeg is installed, a frame from the video is included as a thumbnail image alongside the analysis.
143
+
144
+ **Video frame extraction (YouTube + local):** Use `timestamp` and/or `frames` to pull visuals for scanning.
145
+
146
+ ```typescript
147
+ // Single frame at an exact time
148
+ fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41" })
149
+
150
+ // Range scan (default 6 frames)
151
+ fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41-25:00" })
152
+
153
+ // Custom density across a range
154
+ fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41-25:00", frames: 3 })
155
+
156
+ // N frames at 5s intervals starting from a single timestamp
157
+ fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41", frames: 5 })
158
+
159
+ // Whole-video sampling (no timestamp)
160
+ fetch_content({ url: "https://youtube.com/watch?v=abc", frames: 6 })
161
+ ```
162
+
163
+ The same `timestamp`/`frames` syntax works with local file paths (e.g. `/path/to/video.mp4`).
164
+
165
+ Requirements: YouTube frame extraction needs `yt-dlp` + `ffmpeg`. Local video frames need `ffmpeg` (and `ffprobe`, bundled with ffmpeg, for whole-video sampling).
166
+
167
+ Common errors include missing binaries, private/age-restricted videos, region blocks, live streams, expired stream URLs (403), and timestamps beyond the video duration.
168
+
169
+ **Gemini extraction fallback:** When Readability fails or a site blocks bot traffic (403, 429), the extension automatically retries via Gemini URL Context (API) or Gemini Web. This handles SPAs, JS-heavy pages, and anti-bot protections that the HTTP pipeline can't.
170
+
96
171
  **PDF handling:** When fetching a PDF URL, the extension extracts text and saves it as a markdown file in `~/Downloads/`. The agent can then use `read` to access specific sections without loading 200K+ chars into context.
97
172
 
98
173
  ### get_search_content
@@ -161,7 +236,11 @@ Browse stored search results interactively.
161
236
  ### fetch_content routing
162
237
 
163
238
  ```
164
- fetch_content(url)
239
+ fetch_content(url_or_path, prompt?)
240
+
241
+ ├── Local video file? ──→ Gemini API → Gemini Web
242
+ │ ↓
243
+ │ Video analysis (prompt forwarded)
165
244
 
166
245
  ├── github.com code URL? ──→ Clone repo (gh/git --depth 1)
167
246
  │ │
@@ -177,30 +256,41 @@ fetch_content(url)
177
256
  │ Return content + local
178
257
  │ path for read/bash
179
258
 
259
+ ├── YouTube URL? ──→ Gemini Web → Gemini API → Perplexity
260
+ │ ↓ (prompt forwarded)
261
+ │ Transcript + visual descriptions
262
+
180
263
  ├── PDF? ──→ unpdf → Save to ~/Downloads/
181
264
 
182
- ├── Plain text? ──→ Return directly
265
+ ├── Plain text/markdown/JSON? ──→ Return directly
183
266
 
184
267
  └── HTML ──→ Readability → Markdown
185
268
 
186
269
  [if fails]
187
270
 
188
271
  RSC Parser → Markdown
272
+
273
+ [if all fail]
274
+
275
+ Gemini URL Context → Gemini Web extraction
189
276
  ```
190
277
 
191
- ### web_search with includeContent
278
+ ### web_search routing
192
279
 
193
280
  ```
194
- Agent Request → Perplexity API → Synthesized Answer + Citations
195
-
196
- [if includeContent: true]
197
-
198
- Background Fetch (3 concurrent)
199
- (uses same routing as above)
200
-
201
- Agent Notification (triggerTurn)
281
+ web_search(query, provider?)
282
+
283
+ ├── provider = "perplexity" ──→ Perplexity API
284
+ ├── provider = "gemini" ──→ Gemini API → Gemini Web
285
+ └── provider = "auto"
286
+ ├── Perplexity key? ──→ Perplexity API
287
+ ├── Gemini API key? ──→ Gemini API (grounded search)
288
+ ├── Chrome cookies? ──→ Gemini Web (grounded search)
289
+ └── Error
202
290
  ```
203
291
 
292
+ When `includeContent: true`, sources are fetched in the background using the fetch_content routing above, and the agent receives a notification when ready.
293
+
204
294
  ## Configuration
205
295
 
206
296
  All config lives in `~/.pi/web-search.json`:
@@ -208,16 +298,29 @@ All config lives in `~/.pi/web-search.json`:
208
298
  ```json
209
299
  {
210
300
  "perplexityApiKey": "pplx-...",
301
+ "geminiApiKey": "AIza...",
302
+ "searchProvider": "auto",
211
303
  "githubClone": {
212
304
  "enabled": true,
213
305
  "maxRepoSizeMB": 350,
214
306
  "cloneTimeoutSeconds": 30,
215
307
  "clonePath": "/tmp/pi-github-repos"
308
+ },
309
+ "youtube": {
310
+ "enabled": true,
311
+ "preferredModel": "gemini-2.5-flash"
312
+ },
313
+ "video": {
314
+ "enabled": true,
315
+ "preferredModel": "gemini-2.5-flash",
316
+ "maxSizeMB": 50
216
317
  }
217
318
  }
218
319
  ```
219
320
 
220
- All `githubClone` fields are optional with the defaults shown above. Set `"enabled": false` to disable GitHub cloning entirely and fall through to normal HTML extraction.
321
+ All fields are optional. `GEMINI_API_KEY` and `PERPLEXITY_API_KEY` env vars take precedence over config file values. Set `"enabled": false` under `githubClone`, `youtube`, or `video` to disable those features.
322
+
323
+ `searchProvider` controls `web_search` default: `"auto"` (Perplexity → Gemini API → Gemini Web), `"perplexity"`, or `"gemini"` (API → Web).
221
324
 
222
325
  ## Rate Limits
223
326
 
@@ -231,7 +334,15 @@ All `githubClone` fields are optional with the defaults shown above. Set `"enabl
231
334
  |------|---------|
232
335
  | `index.ts` | Extension entry, tool definitions, commands, widget |
233
336
  | `perplexity.ts` | Perplexity API client, rate limiting |
234
- | `extract.ts` | URL fetching, content extraction routing |
337
+ | `gemini-search.ts` | Gemini search providers (Web + API with grounding), search routing |
338
+ | `extract.ts` | URL/file path routing, HTTP extraction, Gemini fallback orchestration |
339
+ | `gemini-url-context.ts` | Gemini URL Context + Web extraction fallbacks |
340
+ | `video-extract.ts` | Local video file detection, upload, Gemini Web/API analysis |
341
+ | `youtube-extract.ts` | YouTube URL detection, three-tier extraction orchestrator |
342
+ | `chrome-cookies.ts` | macOS Chrome cookie extraction (Keychain + SQLite) |
343
+ | `gemini-web.ts` | Gemini Web client (cookie auth, StreamGenerate) |
344
+ | `gemini-api.ts` | Gemini REST API client (generateContent, file upload) |
345
+ | `utils.ts` | Shared formatting (`formatSeconds`) and error helpers for frame extraction |
235
346
  | `github-extract.ts` | GitHub URL parser, clone cache, content generation |
236
347
  | `github-api.ts` | GitHub API fallback for oversized repos and commit SHAs |
237
348
  | `pdf-extract.ts` | PDF text extraction, saves to markdown |
@@ -242,12 +353,16 @@ All `githubClone` fields are optional with the defaults shown above. Set `"enabl
242
353
 
243
354
  ## Limitations
244
355
 
245
- - Content extraction works best on article-style pages
246
- - Heavy JS sites may not extract well (no browser rendering), though Next.js App Router pages with RSC flight data are supported
356
+ - Content extraction works best on article-style pages; JS-heavy sites fall back to Gemini extraction when available
357
+ - Gemini extraction fallback requires either a Gemini API key or Chrome login to Google
247
358
  - PDFs are extracted as text (no OCR for scanned documents)
248
359
  - Max response size: 20MB for PDFs, 5MB for HTML
249
360
  - Max inline content: 30,000 chars per URL (larger content stored for retrieval via get_search_content)
250
361
  - GitHub cloning requires `gh` CLI for private repos (public repos fall back to `git clone`)
251
362
  - GitHub branch names with slashes (e.g. `feature/foo`) may resolve the wrong file path; the clone still succeeds and the agent can navigate manually
252
363
  - Non-code GitHub URLs (issues, PRs, wiki, etc.) fall through to normal Readability extraction
364
+ - YouTube extraction via Gemini Web requires macOS (Chrome cookie decryption is OS-specific); other platforms fall through to Gemini API or Perplexity
365
+ - YouTube private/age-restricted videos may fail on all paths
366
+ - Gemini can process videos up to ~1 hour at default resolution; longer videos may be truncated
367
+ - First-time Chrome cookie access may trigger a macOS Keychain permission dialog
253
368
  - Requires Pi restart after config file changes
@@ -0,0 +1,240 @@
1
+ import { execFile } from "node:child_process";
2
+ import { pbkdf2Sync, createDecipheriv } from "node:crypto";
3
+ import { copyFileSync, existsSync, mkdtempSync, rmSync } from "node:fs";
4
+ import { tmpdir, homedir, platform } from "node:os";
5
+ import { join } from "node:path";
6
+
7
+ export type CookieMap = Record<string, string>;
8
+
9
+ const GOOGLE_ORIGINS = [
10
+ "https://gemini.google.com",
11
+ "https://accounts.google.com",
12
+ "https://www.google.com",
13
+ ];
14
+
15
+ const ALL_COOKIE_NAMES = new Set([
16
+ "__Secure-1PSID",
17
+ "__Secure-1PSIDTS",
18
+ "__Secure-1PSIDCC",
19
+ "__Secure-1PAPISID",
20
+ "NID",
21
+ "AEC",
22
+ "SOCS",
23
+ "__Secure-BUCKET",
24
+ "__Secure-ENID",
25
+ "SID",
26
+ "HSID",
27
+ "SSID",
28
+ "APISID",
29
+ "SAPISID",
30
+ "__Secure-3PSID",
31
+ "__Secure-3PSIDTS",
32
+ "__Secure-3PAPISID",
33
+ "SIDCC",
34
+ ]);
35
+
36
+ const CHROME_COOKIES_PATH = join(
37
+ homedir(),
38
+ "Library/Application Support/Google/Chrome/Default/Cookies",
39
+ );
40
+
41
+ export async function getGoogleCookies(): Promise<{ cookies: CookieMap; warnings: string[] } | null> {
42
+ if (platform() !== "darwin") return null;
43
+ if (!existsSync(CHROME_COOKIES_PATH)) return null;
44
+
45
+ const warnings: string[] = [];
46
+
47
+ const password = await readKeychainPassword();
48
+ if (!password) {
49
+ warnings.push("Could not read Chrome Safe Storage password from Keychain");
50
+ return { cookies: {}, warnings };
51
+ }
52
+
53
+ const key = pbkdf2Sync(password, "saltysalt", 1003, 16, "sha1");
54
+ const tempDir = mkdtempSync(join(tmpdir(), "pi-chrome-cookies-"));
55
+
56
+ try {
57
+ const tempDb = join(tempDir, "Cookies");
58
+ copyFileSync(CHROME_COOKIES_PATH, tempDb);
59
+ copySidecar(CHROME_COOKIES_PATH, tempDb, "-wal");
60
+ copySidecar(CHROME_COOKIES_PATH, tempDb, "-shm");
61
+
62
+ const metaVersion = await readMetaVersion(tempDb);
63
+ const stripHash = metaVersion >= 24;
64
+
65
+ const hosts = GOOGLE_ORIGINS.map((o) => new URL(o).hostname);
66
+ const rows = await queryCookieRows(tempDb, hosts);
67
+ if (!rows) {
68
+ warnings.push("Failed to query Chrome cookie database");
69
+ return { cookies: {}, warnings };
70
+ }
71
+
72
+ const cookies: CookieMap = {};
73
+ for (const row of rows) {
74
+ const name = row.name as string;
75
+ if (!ALL_COOKIE_NAMES.has(name)) continue;
76
+ if (cookies[name]) continue;
77
+
78
+ let value = typeof row.value === "string" && row.value.length > 0 ? row.value : null;
79
+ if (!value) {
80
+ const encrypted = row.encrypted_value;
81
+ if (encrypted instanceof Uint8Array) {
82
+ value = decryptCookieValue(encrypted, key, stripHash);
83
+ }
84
+ }
85
+ if (value) cookies[name] = value;
86
+ }
87
+
88
+ return { cookies, warnings };
89
+ } finally {
90
+ rmSync(tempDir, { recursive: true, force: true });
91
+ }
92
+ }
93
+
94
+ function decryptCookieValue(encrypted: Uint8Array, key: Buffer, stripHash: boolean): string | null {
95
+ const buf = Buffer.from(encrypted);
96
+ if (buf.length < 3) return null;
97
+
98
+ const prefix = buf.subarray(0, 3).toString("utf8");
99
+ if (!/^v\d\d$/.test(prefix)) return null;
100
+
101
+ const ciphertext = buf.subarray(3);
102
+ if (!ciphertext.length) return "";
103
+
104
+ try {
105
+ const iv = Buffer.alloc(16, 0x20);
106
+ const decipher = createDecipheriv("aes-128-cbc", key, iv);
107
+ decipher.setAutoPadding(false);
108
+ const plaintext = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
109
+ const unpadded = removePkcs7Padding(plaintext);
110
+ const bytes = stripHash && unpadded.length >= 32 ? unpadded.subarray(32) : unpadded;
111
+ const decoded = new TextDecoder("utf-8", { fatal: true }).decode(bytes);
112
+ let i = 0;
113
+ while (i < decoded.length && decoded.charCodeAt(i) < 0x20) i++;
114
+ return decoded.slice(i);
115
+ } catch {
116
+ return null;
117
+ }
118
+ }
119
+
120
+ function removePkcs7Padding(buf: Buffer): Buffer {
121
+ if (!buf.length) return buf;
122
+ const padding = buf[buf.length - 1];
123
+ if (!padding || padding > 16) return buf;
124
+ return buf.subarray(0, buf.length - padding);
125
+ }
126
+
127
+ function readKeychainPassword(): Promise<string | null> {
128
+ return new Promise((resolve) => {
129
+ execFile(
130
+ "security",
131
+ ["find-generic-password", "-w", "-a", "Chrome", "-s", "Chrome Safe Storage"],
132
+ { timeout: 5000 },
133
+ (err, stdout) => {
134
+ if (err) { resolve(null); return; }
135
+ resolve(stdout.trim() || null);
136
+ },
137
+ );
138
+ });
139
+ }
140
+
141
+ let sqliteModule: typeof import("node:sqlite") | null = null;
142
+
143
+ async function importSqlite(): Promise<typeof import("node:sqlite") | null> {
144
+ if (sqliteModule) return sqliteModule;
145
+ const orig = process.emitWarning.bind(process);
146
+ process.emitWarning = ((warning: string | Error, ...args: unknown[]) => {
147
+ const msg = typeof warning === "string" ? warning : warning?.message ?? "";
148
+ if (msg.includes("SQLite is an experimental feature")) return;
149
+ return (orig as Function)(warning, ...args);
150
+ }) as typeof process.emitWarning;
151
+ try {
152
+ sqliteModule = await import("node:sqlite");
153
+ return sqliteModule;
154
+ } catch {
155
+ return null;
156
+ } finally {
157
+ process.emitWarning = orig;
158
+ }
159
+ }
160
+
161
+ function supportsReadBigInts(): boolean {
162
+ const [major, minor] = process.versions.node.split(".").map(Number);
163
+ if (major > 24) return true;
164
+ if (major < 24) return false;
165
+ return minor >= 4;
166
+ }
167
+
168
+ async function readMetaVersion(dbPath: string): Promise<number> {
169
+ const sqlite = await importSqlite();
170
+ if (!sqlite) return 0;
171
+ const opts: Record<string, unknown> = { readOnly: true };
172
+ if (supportsReadBigInts()) opts.readBigInts = true;
173
+ const db = new sqlite.DatabaseSync(dbPath, opts);
174
+ try {
175
+ const rows = db.prepare("SELECT value FROM meta WHERE key = 'version'").all() as Array<Record<string, unknown>>;
176
+ const val = rows[0]?.value;
177
+ if (typeof val === "number") return Math.floor(val);
178
+ if (typeof val === "bigint") return Number(val);
179
+ if (typeof val === "string") return parseInt(val, 10) || 0;
180
+ return 0;
181
+ } catch {
182
+ return 0;
183
+ } finally {
184
+ db.close();
185
+ }
186
+ }
187
+
188
+ async function queryCookieRows(
189
+ dbPath: string,
190
+ hosts: string[],
191
+ ): Promise<Array<Record<string, unknown>> | null> {
192
+ const sqlite = await importSqlite();
193
+ if (!sqlite) return null;
194
+
195
+ const clauses: string[] = [];
196
+ for (const host of hosts) {
197
+ for (const candidate of expandHosts(host)) {
198
+ const esc = candidate.replaceAll("'", "''");
199
+ clauses.push(`host_key = '${esc}'`);
200
+ clauses.push(`host_key = '.${esc}'`);
201
+ clauses.push(`host_key LIKE '%.${esc}'`);
202
+ }
203
+ }
204
+ const where = clauses.join(" OR ");
205
+
206
+ const opts: Record<string, unknown> = { readOnly: true };
207
+ if (supportsReadBigInts()) opts.readBigInts = true;
208
+ const db = new sqlite.DatabaseSync(dbPath, opts);
209
+ try {
210
+ return db
211
+ .prepare(
212
+ `SELECT name, value, host_key, encrypted_value FROM cookies WHERE (${where}) ORDER BY expires_utc DESC`,
213
+ )
214
+ .all() as Array<Record<string, unknown>>;
215
+ } catch {
216
+ return null;
217
+ } finally {
218
+ db.close();
219
+ }
220
+ }
221
+
222
+ function expandHosts(host: string): string[] {
223
+ const parts = host.split(".").filter(Boolean);
224
+ if (parts.length <= 1) return [host];
225
+ const candidates = new Set<string>();
226
+ candidates.add(host);
227
+ for (let i = 1; i <= parts.length - 2; i++) {
228
+ const c = parts.slice(i).join(".");
229
+ if (c) candidates.add(c);
230
+ }
231
+ return Array.from(candidates);
232
+ }
233
+
234
+ function copySidecar(srcDb: string, targetDb: string, suffix: string): void {
235
+ const sidecar = `${srcDb}${suffix}`;
236
+ if (!existsSync(sidecar)) return;
237
+ try {
238
+ copyFileSync(sidecar, `${targetDb}${suffix}`);
239
+ } catch {}
240
+ }