pi-web-access 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,7 +2,65 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
- ## [Unreleased]
5
+ ## [0.7.0] - 2026-02-03
6
+
7
+ ### Added
8
+ - **Multi-provider web search**: `web_search` now supports Perplexity, Gemini API (with Google Search grounding), and Gemini Web (cookie auth) as search providers. New `provider` parameter (`auto`, `perplexity`, `gemini`) controls selection. In `auto` mode (default): Perplexity → Gemini API → Gemini Web. Backwards-compatible — existing Perplexity users see no change.
9
+ - **Gemini API grounded search**: Structured citations via `groundingMetadata` with source URIs and text-to-source mappings. Google proxy URLs are resolved via HEAD redirects. Configured via `GEMINI_API_KEY` or `geminiApiKey` in config.
10
+ - **Gemini Web search**: Zero-config web search for users signed into Google in Chrome. Prompt instructs Gemini to cite sources; URLs extracted from markdown response.
11
+ - **Gemini extraction fallback**: When `fetch_content` fails (HTTP 403/429, Readability fails, network errors), automatically retries via Gemini URL Context API then Gemini Web extraction. Each has an independent 60s timeout. Handles SPAs, JS-heavy pages, and anti-bot protections.
12
+ - **Local video file analysis**: `fetch_content` accepts file paths to video files (MP4, MOV, WebM, AVI, etc.). Detected by path prefix (`/`, `./`, `../`, `file://`), validated by extension and 50MB limit. Two-tier fallback: Gemini API (resumable upload via Files API with proper MIME types, poll-until-active and cleanup) → Gemini Web (free, cookie auth).
13
+ - **Video prompt parameter**: `fetch_content` gains optional `prompt` parameter for asking specific questions about video content. Threads through YouTube and local video extraction. Without prompt, uses default extraction (transcript + visual descriptions).
14
+ - **Video thumbnails**: YouTube results include the video thumbnail (fetched from `img.youtube.com`). Local video results include a frame extracted via ffmpeg (at ~1 second). Returned as image content parts alongside text — the agent sees the thumbnail as vision context.
15
+ - **Configurable frame extraction**: `frames` parameter (1-12) on `fetch_content` for pulling visual frames from YouTube or local video. Works in five modes: frames alone (sample across entire video), single timestamp (one frame), single+frames (N frames at 5s intervals), range (default 6 frames), range+frames (N frames across the range). Endpoint-inclusive distribution with 5-second minimum spacing.
16
+ - **Video duration in responses**: Frame extraction results include the video duration for context.
17
+ - `searchProvider` config option in `~/.pi/web-search.json` for global provider default
18
+ - `video` config section: `enabled`, `preferredModel`, `maxSizeMB`
19
+
20
+ ### Changed
21
+ - `PerplexityResponse` renamed to `SearchResponse` (shared interface for all search providers)
22
+ - Extracted HTTP pipeline from `extractContent` into `extractViaHttp` for cleaner Gemini fallback orchestration
23
+ - `getApiKey()`, `API_BASE`, `DEFAULT_MODEL` exported from `gemini-api.ts` for use by search and URL Context modules
24
+ - `isPerplexityAvailable()` added to `perplexity.ts` as non-throwing API key check
25
+ - Content-type routing in `extract.ts`: only `text/html` and `application/xhtml+xml` go through Readability; all other text types (`text/markdown`, `application/json`, `text/csv`, etc.) returned directly. Fixes the OpenAI cookbook `.md` URL that returned "Untitled (30 chars)".
26
+ - Title extraction for non-HTML content: `extractTextTitle()` pulls from markdown `#`/`##` headings, falls back to URL filename
27
+ - Combined `yt-dlp --print duration -g` call fetches stream URL and duration in a single invocation, reused across all frame extraction paths via `streamInfo` passthrough
28
+ - Shared helpers in `utils.ts` (`formatSeconds`, error mapping) eliminate circular imports and duplication across youtube-extract.ts and video-extract.ts
29
+
30
+ ### Fixed
31
+ - `fetch_content` TUI rendered `undefined/undefined URLs` during progress updates (renderResult didn't handle `isPartial`, now shows a progress bar like `web_search` does)
32
+ - RSC extractor produced malformed markdown for `<pre><code>` blocks (backticks inside fenced code blocks) -- extremely common on Next.js documentation pages
33
+ - Multi-URL fetch failures rendered in green "success" color even when 0 URLs succeeded (now red)
34
+ - `web_search` queries parameter described as "parallel" in schema but execution is sequential (changed to "batch"; `urls` correctly remains "parallel")
35
+ - Proper error propagation for frame extraction: missing binaries (yt-dlp, ffmpeg, ffprobe), private/age-restricted/region-blocked videos, expired stream URLs (403), timestamp-exceeds-duration, and timeouts all produce specific user-facing messages instead of silent nulls
36
+ - `isTimeoutError` now detects `execFileSync` timeouts via the `killed` flag (SIGTERM from timeout was previously unrecognized)
37
+ - Float video durations (e.g. 15913.7s from yt-dlp) no longer produce out-of-range timestamps — durations are floored before computing frame positions
38
+ - `parseTimestamp` consistently floors results across both bare-number ("90.5" → 90) and colon ("1:30.5" → 90) paths — previously the colon path returned floats
39
+ - YouTube thumbnail assignment no longer sets `null` on the optional `thumbnail` field when fetch fails (was a type mismatch; now only assigned on success)
40
+
41
+ ### New files
42
+ - `gemini-search.ts` -- search routing + Gemini Web/API search providers with grounding
43
+ - `gemini-url-context.ts` -- URL Context API extraction + Gemini Web extraction fallback
44
+ - `video-extract.ts` -- local video file detection, Gemini Web/API analysis with Files API upload
45
+ - `utils.ts` -- shared formatting and error helpers for frame extraction
46
+
47
+ ## [0.6.0] - 2026-02-02
48
+
49
+ ### Added
50
+ - YouTube video understanding in `fetch_content` via three-tier fallback chain:
51
+ - **Gemini Web** (primary): reads Chrome session cookies from macOS Keychain + SQLite, authenticates to gemini.google.com, sends YouTube URL via StreamGenerate endpoint. Full visual + audio understanding with timestamps. Zero config needed if signed into Google in Chrome.
52
+ - **Gemini API** (secondary): direct REST calls with `GEMINI_API_KEY`. YouTube URLs passed as `file_data.file_uri`. Configure via `GEMINI_API_KEY` env var or `geminiApiKey` in `~/.pi/web-search.json`.
53
+ - **Perplexity** (fallback): uses existing `searchWithPerplexity` for a topic summary when neither Gemini path is available. Output labeled as "Summary (via Perplexity)" so the agent knows it's not a full transcript.
54
+ - YouTube URL detection for all common formats: `/watch?v=`, `youtu.be/`, `/shorts/`, `/live/`, `/embed/`, `/v/`, `m.youtube.com`
55
+ - Configurable via `~/.pi/web-search.json` under `youtube` key (`enabled`, `preferredModel`)
56
+ - Actionable error messages when extraction fails (directs user to sign into Chrome or set API key)
57
+ - YouTube URLs no longer fall through to HTTP/Readability (which returns garbage); returns error instead
58
+
59
+ ### New files
60
+ - `chrome-cookies.ts` -- macOS Chrome cookie extraction using Node builtins (`node:crypto`, `node:sqlite`, `child_process`)
61
+ - `gemini-web.ts` -- Gemini Web client ported from surf's gemini-client.cjs (cookie auth, StreamGenerate, model fallback)
62
+ - `gemini-api.ts` -- Gemini REST API client (generateContent, file upload/processing/cleanup for Phase 2)
63
+ - `youtube-extract.ts` -- YouTube extraction orchestrator with three-tier fallback and activity logging
6
64
 
7
65
  ## [0.5.1] - 2026-02-02
8
66
 
package/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
 
5
5
  # Pi Web Access
6
6
 
7
- An extension for [Pi coding agent](https://github.com/badlogic/pi-mono/) that gives Pi web capabilities: search via Perplexity AI, fetch and extract content from URLs, clone GitHub repos for local exploration, and read PDFs.
7
+ An extension for [Pi coding agent](https://github.com/badlogic/pi-mono/) that gives Pi web capabilities: search via Perplexity AI or Gemini, fetch and extract content from URLs, clone GitHub repos for local exploration, read PDFs, understand YouTube videos, and analyze local video files.
8
8
 
9
9
  ```typescript
10
10
  web_search({ query: "TypeScript best practices 2025" })
@@ -17,31 +17,42 @@ fetch_content({ url: "https://docs.example.com/guide" })
17
17
  pi install npm:pi-web-access
18
18
  ```
19
19
 
20
- Add your Perplexity API key:
20
+ Configure at least one search provider:
21
21
 
22
22
  ```bash
23
- # Option 1: Environment variable
24
- export PERPLEXITY_API_KEY="pplx-..."
23
+ # Option 1: Sign into gemini.google.com in Chrome (free, zero config)
25
24
 
26
- # Option 2: Config file
25
+ # Option 2: Gemini API key
26
+ echo '{"geminiApiKey": "AIza..."}' > ~/.pi/web-search.json
27
+
28
+ # Option 3: Perplexity API key
27
29
  echo '{"perplexityApiKey": "pplx-..."}' > ~/.pi/web-search.json
28
30
  ```
29
31
 
30
- Get a key at https://perplexity.ai/settings/api
32
+ All three work simultaneously. In `auto` mode (default), the extension tries Perplexity first, then Gemini API, then Gemini Web.
31
33
 
32
34
  **Requires:** Pi v0.37.3+
33
35
 
36
+ **Optional dependencies** for video frame extraction:
37
+
38
+ ```bash
39
+ brew install ffmpeg # frame extraction, video thumbnails, local video duration
40
+ brew install yt-dlp # YouTube frame extraction (stream URL + duration lookup)
41
+ ```
42
+
43
+ Without these, video content analysis (transcripts via Gemini) still works. The binaries are only needed for extracting visual frames from videos. `ffprobe` (bundled with ffmpeg) is used for local video duration lookup when sampling frames across an entire video.
44
+
34
45
  ## Tools
35
46
 
36
47
  ### web_search
37
48
 
38
- Search the web via Perplexity AI. Returns synthesized answer with source citations.
49
+ Search the web via Perplexity AI or Gemini. Returns synthesized answer with source citations.
39
50
 
40
51
  ```typescript
41
52
  // Single query
42
53
  web_search({ query: "rust async programming" })
43
54
 
44
- // Multiple queries (parallel)
55
+ // Multiple queries (batch)
45
56
  web_search({ queries: ["query 1", "query 2"] })
46
57
 
47
58
  // With options
@@ -52,12 +63,17 @@ web_search({
52
63
  domainFilter: ["github.com"] // Prefix with - to exclude
53
64
  })
54
65
 
66
+ // Explicit provider
67
+ web_search({ query: "...", provider: "gemini" }) // auto, perplexity, gemini
68
+
55
69
  // Fetch full page content (async)
56
70
  web_search({ query: "...", includeContent: true })
57
71
  ```
58
72
 
59
73
  When `includeContent: true`, sources are fetched in the background. Agent receives notification when ready.
60
74
 
75
+ Provider selection in `auto` mode: Perplexity (if key configured) → Gemini API (if key configured, uses Google Search grounding) → Gemini Web (if signed into Chrome). Gemini API returns structured citations with source mappings. Gemini Web returns markdown with embedded links.
76
+
61
77
  ### fetch_content
62
78
 
63
79
  Fetch URL(s) and extract readable content as markdown.
@@ -93,6 +109,63 @@ fetch_content({ url: "https://github.com/big/repo", forceClone: true })
93
109
 
94
110
  Repos over 350MB get a lightweight API-based view instead of a full clone. Commit SHA URLs are also handled via the API. Clones are cached for the session -- multiple files from the same repo share one clone, but clones are wiped on session change/shutdown and re-cloned as needed.
95
111
 
112
+ **YouTube videos:** YouTube URLs are automatically detected and processed via Gemini for full video understanding (visual + audio + transcript). Three-tier fallback:
113
+
114
+ ```typescript
115
+ // Returns transcript with timestamps, visual descriptions, chapter markers
116
+ fetch_content({ url: "https://youtube.com/watch?v=dQw4w9WgXcQ" })
117
+
118
+ // Ask a specific question about the video
119
+ fetch_content({ url: "https://youtube.com/watch?v=abc", prompt: "What libraries are imported?" })
120
+ ```
121
+
122
+ 1. **Gemini Web** (primary) -- reads your Chrome session cookies. Zero config if you're signed into Google.
123
+ 2. **Gemini API** (secondary) -- uses `GEMINI_API_KEY` env var or `geminiApiKey` in config.
124
+ 3. **Perplexity** (fallback) -- topic summary when neither Gemini path is available.
125
+
126
+ YouTube results include the video thumbnail as an image content part, so the agent receives visual context alongside the transcript.
127
+
128
+ Handles all YouTube URL formats: `/watch?v=`, `youtu.be/`, `/shorts/`, `/live/`, `/embed/`, `/v/`, `m.youtube.com`. Playlist-only URLs fall through to normal extraction.
129
+
130
+ **Local video files:** Pass a file path to analyze video content via Gemini. Supports MP4, MOV, WebM, AVI, and other common formats. Max 50MB (configurable).
131
+
132
+ ```typescript
133
+ // Analyze a screen recording
134
+ fetch_content({ url: "/path/to/recording.mp4" })
135
+
136
+ // Ask about specific content in the video
137
+ fetch_content({ url: "./demo.mov", prompt: "What error message appears on screen?" })
138
+ ```
139
+
140
+ Two-tier fallback: Gemini API (needs key, proper Files API with MIME types) → Gemini Web (free, needs Chrome login). File paths are detected by prefix (`/`, `./`, `../`, `file://`). If ffmpeg is installed, a frame from the video is included as a thumbnail image alongside the analysis.
141
+
142
+ **Video frame extraction (YouTube + local):** Use `timestamp` and/or `frames` to pull visuals for scanning.
143
+
144
+ ```typescript
145
+ // Single frame at an exact time
146
+ fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41" })
147
+
148
+ // Range scan (default 6 frames)
149
+ fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41-25:00" })
150
+
151
+ // Custom density across a range
152
+ fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41-25:00", frames: 3 })
153
+
154
+ // N frames at 5s intervals starting from a single timestamp
155
+ fetch_content({ url: "https://youtube.com/watch?v=abc", timestamp: "23:41", frames: 5 })
156
+
157
+ // Whole-video sampling (no timestamp)
158
+ fetch_content({ url: "https://youtube.com/watch?v=abc", frames: 6 })
159
+ ```
160
+
161
+ The same `timestamp`/`frames` syntax works with local file paths (e.g. `/path/to/video.mp4`).
162
+
163
+ Requirements: YouTube frame extraction needs `yt-dlp` + `ffmpeg`. Local video frames need `ffmpeg` (and `ffprobe`, bundled with ffmpeg, for whole-video sampling).
164
+
165
+ Common errors include missing binaries, private/age-restricted videos, region blocks, live streams, expired stream URLs (403), and timestamps beyond the video duration.
166
+
167
+ **Gemini extraction fallback:** When Readability fails or a site blocks bot traffic (403, 429), the extension automatically retries via Gemini URL Context (API) or Gemini Web. This handles SPAs, JS-heavy pages, and anti-bot protections that the HTTP pipeline can't.
168
+
96
169
  **PDF handling:** When fetching a PDF URL, the extension extracts text and saves it as a markdown file in `~/Downloads/`. The agent can then use `read` to access specific sections without loading 200K+ chars into context.
97
170
 
98
171
  ### get_search_content
@@ -161,7 +234,11 @@ Browse stored search results interactively.
161
234
  ### fetch_content routing
162
235
 
163
236
  ```
164
- fetch_content(url)
237
+ fetch_content(url_or_path, prompt?)
238
+
239
+ ├── Local video file? ──→ Gemini API → Gemini Web
240
+ │ ↓
241
+ │ Video analysis (prompt forwarded)
165
242
 
166
243
  ├── github.com code URL? ──→ Clone repo (gh/git --depth 1)
167
244
  │ │
@@ -177,30 +254,41 @@ fetch_content(url)
177
254
  │ Return content + local
178
255
  │ path for read/bash
179
256
 
257
+ ├── YouTube URL? ──→ Gemini Web → Gemini API → Perplexity
258
+ │ ↓ (prompt forwarded)
259
+ │ Transcript + visual descriptions
260
+
180
261
  ├── PDF? ──→ unpdf → Save to ~/Downloads/
181
262
 
182
- ├── Plain text? ──→ Return directly
263
+ ├── Plain text/markdown/JSON? ──→ Return directly
183
264
 
184
265
  └── HTML ──→ Readability → Markdown
185
266
 
186
267
  [if fails]
187
268
 
188
269
  RSC Parser → Markdown
270
+
271
+ [if all fail]
272
+
273
+ Gemini URL Context → Gemini Web extraction
189
274
  ```
190
275
 
191
- ### web_search with includeContent
276
+ ### web_search routing
192
277
 
193
278
  ```
194
- Agent Request → Perplexity API → Synthesized Answer + Citations
195
-
196
- [if includeContent: true]
197
-
198
- Background Fetch (3 concurrent)
199
- (uses same routing as above)
200
-
201
- Agent Notification (triggerTurn)
279
+ web_search(query, provider?)
280
+
281
+ ├── provider = "perplexity" ──→ Perplexity API
282
+ ├── provider = "gemini" ──→ Gemini API → Gemini Web
283
+ └── provider = "auto"
284
+ ├── Perplexity key? ──→ Perplexity API
285
+ ├── Gemini API key? ──→ Gemini API (grounded search)
286
+ ├── Chrome cookies? ──→ Gemini Web (grounded search)
287
+ └── Error
202
288
  ```
203
289
 
290
+ When `includeContent: true`, sources are fetched in the background using the fetch_content routing above, and the agent receives a notification when ready.
291
+
204
292
  ## Configuration
205
293
 
206
294
  All config lives in `~/.pi/web-search.json`:
@@ -208,16 +296,29 @@ All config lives in `~/.pi/web-search.json`:
208
296
  ```json
209
297
  {
210
298
  "perplexityApiKey": "pplx-...",
299
+ "geminiApiKey": "AIza...",
300
+ "searchProvider": "auto",
211
301
  "githubClone": {
212
302
  "enabled": true,
213
303
  "maxRepoSizeMB": 350,
214
304
  "cloneTimeoutSeconds": 30,
215
305
  "clonePath": "/tmp/pi-github-repos"
306
+ },
307
+ "youtube": {
308
+ "enabled": true,
309
+ "preferredModel": "gemini-2.5-flash"
310
+ },
311
+ "video": {
312
+ "enabled": true,
313
+ "preferredModel": "gemini-2.5-flash",
314
+ "maxSizeMB": 50
216
315
  }
217
316
  }
218
317
  ```
219
318
 
220
- All `githubClone` fields are optional with the defaults shown above. Set `"enabled": false` to disable GitHub cloning entirely and fall through to normal HTML extraction.
319
+ All fields are optional. `GEMINI_API_KEY` and `PERPLEXITY_API_KEY` env vars take precedence over config file values. Set `"enabled": false` under `githubClone`, `youtube`, or `video` to disable those features.
320
+
321
+ `searchProvider` controls `web_search` default: `"auto"` (Perplexity → Gemini API → Gemini Web), `"perplexity"`, or `"gemini"` (API → Web).
221
322
 
222
323
  ## Rate Limits
223
324
 
@@ -231,7 +332,15 @@ All `githubClone` fields are optional with the defaults shown above. Set `"enabl
231
332
  |------|---------|
232
333
  | `index.ts` | Extension entry, tool definitions, commands, widget |
233
334
  | `perplexity.ts` | Perplexity API client, rate limiting |
234
- | `extract.ts` | URL fetching, content extraction routing |
335
+ | `gemini-search.ts` | Gemini search providers (Web + API with grounding), search routing |
336
+ | `extract.ts` | URL/file path routing, HTTP extraction, Gemini fallback orchestration |
337
+ | `gemini-url-context.ts` | Gemini URL Context + Web extraction fallbacks |
338
+ | `video-extract.ts` | Local video file detection, upload, Gemini Web/API analysis |
339
+ | `youtube-extract.ts` | YouTube URL detection, three-tier extraction orchestrator |
340
+ | `chrome-cookies.ts` | macOS Chrome cookie extraction (Keychain + SQLite) |
341
+ | `gemini-web.ts` | Gemini Web client (cookie auth, StreamGenerate) |
342
+ | `gemini-api.ts` | Gemini REST API client (generateContent, file upload) |
343
+ | `utils.ts` | Shared formatting (`formatSeconds`) and error helpers for frame extraction |
235
344
  | `github-extract.ts` | GitHub URL parser, clone cache, content generation |
236
345
  | `github-api.ts` | GitHub API fallback for oversized repos and commit SHAs |
237
346
  | `pdf-extract.ts` | PDF text extraction, saves to markdown |
@@ -242,12 +351,16 @@ All `githubClone` fields are optional with the defaults shown above. Set `"enabl
242
351
 
243
352
  ## Limitations
244
353
 
245
- - Content extraction works best on article-style pages
246
- - Heavy JS sites may not extract well (no browser rendering), though Next.js App Router pages with RSC flight data are supported
354
+ - Content extraction works best on article-style pages; JS-heavy sites fall back to Gemini extraction when available
355
+ - Gemini extraction fallback requires either a Gemini API key or Chrome login to Google
247
356
  - PDFs are extracted as text (no OCR for scanned documents)
248
357
  - Max response size: 20MB for PDFs, 5MB for HTML
249
358
  - Max inline content: 30,000 chars per URL (larger content stored for retrieval via get_search_content)
250
359
  - GitHub cloning requires `gh` CLI for private repos (public repos fall back to `git clone`)
251
360
  - GitHub branch names with slashes (e.g. `feature/foo`) may resolve the wrong file path; the clone still succeeds and the agent can navigate manually
252
361
  - Non-code GitHub URLs (issues, PRs, wiki, etc.) fall through to normal Readability extraction
362
+ - YouTube extraction via Gemini Web requires macOS (Chrome cookie decryption is OS-specific); other platforms fall through to Gemini API or Perplexity
363
+ - YouTube private/age-restricted videos may fail on all paths
364
+ - Gemini can process videos up to ~1 hour at default resolution; longer videos may be truncated
365
+ - First-time Chrome cookie access may trigger a macOS Keychain permission dialog
253
366
  - Requires Pi restart after config file changes
@@ -0,0 +1,240 @@
1
+ import { execFile } from "node:child_process";
2
+ import { pbkdf2Sync, createDecipheriv } from "node:crypto";
3
+ import { copyFileSync, existsSync, mkdtempSync, rmSync } from "node:fs";
4
+ import { tmpdir, homedir, platform } from "node:os";
5
+ import { join } from "node:path";
6
+
7
+ export type CookieMap = Record<string, string>;
8
+
9
+ const GOOGLE_ORIGINS = [
10
+ "https://gemini.google.com",
11
+ "https://accounts.google.com",
12
+ "https://www.google.com",
13
+ ];
14
+
15
+ const ALL_COOKIE_NAMES = new Set([
16
+ "__Secure-1PSID",
17
+ "__Secure-1PSIDTS",
18
+ "__Secure-1PSIDCC",
19
+ "__Secure-1PAPISID",
20
+ "NID",
21
+ "AEC",
22
+ "SOCS",
23
+ "__Secure-BUCKET",
24
+ "__Secure-ENID",
25
+ "SID",
26
+ "HSID",
27
+ "SSID",
28
+ "APISID",
29
+ "SAPISID",
30
+ "__Secure-3PSID",
31
+ "__Secure-3PSIDTS",
32
+ "__Secure-3PAPISID",
33
+ "SIDCC",
34
+ ]);
35
+
36
+ const CHROME_COOKIES_PATH = join(
37
+ homedir(),
38
+ "Library/Application Support/Google/Chrome/Default/Cookies",
39
+ );
40
+
41
+ export async function getGoogleCookies(): Promise<{ cookies: CookieMap; warnings: string[] } | null> {
42
+ if (platform() !== "darwin") return null;
43
+ if (!existsSync(CHROME_COOKIES_PATH)) return null;
44
+
45
+ const warnings: string[] = [];
46
+
47
+ const password = await readKeychainPassword();
48
+ if (!password) {
49
+ warnings.push("Could not read Chrome Safe Storage password from Keychain");
50
+ return { cookies: {}, warnings };
51
+ }
52
+
53
+ const key = pbkdf2Sync(password, "saltysalt", 1003, 16, "sha1");
54
+ const tempDir = mkdtempSync(join(tmpdir(), "pi-chrome-cookies-"));
55
+
56
+ try {
57
+ const tempDb = join(tempDir, "Cookies");
58
+ copyFileSync(CHROME_COOKIES_PATH, tempDb);
59
+ copySidecar(CHROME_COOKIES_PATH, tempDb, "-wal");
60
+ copySidecar(CHROME_COOKIES_PATH, tempDb, "-shm");
61
+
62
+ const metaVersion = await readMetaVersion(tempDb);
63
+ const stripHash = metaVersion >= 24;
64
+
65
+ const hosts = GOOGLE_ORIGINS.map((o) => new URL(o).hostname);
66
+ const rows = await queryCookieRows(tempDb, hosts);
67
+ if (!rows) {
68
+ warnings.push("Failed to query Chrome cookie database");
69
+ return { cookies: {}, warnings };
70
+ }
71
+
72
+ const cookies: CookieMap = {};
73
+ for (const row of rows) {
74
+ const name = row.name as string;
75
+ if (!ALL_COOKIE_NAMES.has(name)) continue;
76
+ if (cookies[name]) continue;
77
+
78
+ let value = typeof row.value === "string" && row.value.length > 0 ? row.value : null;
79
+ if (!value) {
80
+ const encrypted = row.encrypted_value;
81
+ if (encrypted instanceof Uint8Array) {
82
+ value = decryptCookieValue(encrypted, key, stripHash);
83
+ }
84
+ }
85
+ if (value) cookies[name] = value;
86
+ }
87
+
88
+ return { cookies, warnings };
89
+ } finally {
90
+ rmSync(tempDir, { recursive: true, force: true });
91
+ }
92
+ }
93
+
94
+ function decryptCookieValue(encrypted: Uint8Array, key: Buffer, stripHash: boolean): string | null {
95
+ const buf = Buffer.from(encrypted);
96
+ if (buf.length < 3) return null;
97
+
98
+ const prefix = buf.subarray(0, 3).toString("utf8");
99
+ if (!/^v\d\d$/.test(prefix)) return null;
100
+
101
+ const ciphertext = buf.subarray(3);
102
+ if (!ciphertext.length) return "";
103
+
104
+ try {
105
+ const iv = Buffer.alloc(16, 0x20);
106
+ const decipher = createDecipheriv("aes-128-cbc", key, iv);
107
+ decipher.setAutoPadding(false);
108
+ const plaintext = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
109
+ const unpadded = removePkcs7Padding(plaintext);
110
+ const bytes = stripHash && unpadded.length >= 32 ? unpadded.subarray(32) : unpadded;
111
+ const decoded = new TextDecoder("utf-8", { fatal: true }).decode(bytes);
112
+ let i = 0;
113
+ while (i < decoded.length && decoded.charCodeAt(i) < 0x20) i++;
114
+ return decoded.slice(i);
115
+ } catch {
116
+ return null;
117
+ }
118
+ }
119
+
120
+ function removePkcs7Padding(buf: Buffer): Buffer {
121
+ if (!buf.length) return buf;
122
+ const padding = buf[buf.length - 1];
123
+ if (!padding || padding > 16) return buf;
124
+ return buf.subarray(0, buf.length - padding);
125
+ }
126
+
127
+ function readKeychainPassword(): Promise<string | null> {
128
+ return new Promise((resolve) => {
129
+ execFile(
130
+ "security",
131
+ ["find-generic-password", "-w", "-a", "Chrome", "-s", "Chrome Safe Storage"],
132
+ { timeout: 5000 },
133
+ (err, stdout) => {
134
+ if (err) { resolve(null); return; }
135
+ resolve(stdout.trim() || null);
136
+ },
137
+ );
138
+ });
139
+ }
140
+
141
+ let sqliteModule: typeof import("node:sqlite") | null = null;
142
+
143
+ async function importSqlite(): Promise<typeof import("node:sqlite") | null> {
144
+ if (sqliteModule) return sqliteModule;
145
+ const orig = process.emitWarning.bind(process);
146
+ process.emitWarning = ((warning: string | Error, ...args: unknown[]) => {
147
+ const msg = typeof warning === "string" ? warning : warning?.message ?? "";
148
+ if (msg.includes("SQLite is an experimental feature")) return;
149
+ return (orig as Function)(warning, ...args);
150
+ }) as typeof process.emitWarning;
151
+ try {
152
+ sqliteModule = await import("node:sqlite");
153
+ return sqliteModule;
154
+ } catch {
155
+ return null;
156
+ } finally {
157
+ process.emitWarning = orig;
158
+ }
159
+ }
160
+
161
+ function supportsReadBigInts(): boolean {
162
+ const [major, minor] = process.versions.node.split(".").map(Number);
163
+ if (major > 24) return true;
164
+ if (major < 24) return false;
165
+ return minor >= 4;
166
+ }
167
+
168
+ async function readMetaVersion(dbPath: string): Promise<number> {
169
+ const sqlite = await importSqlite();
170
+ if (!sqlite) return 0;
171
+ const opts: Record<string, unknown> = { readOnly: true };
172
+ if (supportsReadBigInts()) opts.readBigInts = true;
173
+ const db = new sqlite.DatabaseSync(dbPath, opts);
174
+ try {
175
+ const rows = db.prepare("SELECT value FROM meta WHERE key = 'version'").all() as Array<Record<string, unknown>>;
176
+ const val = rows[0]?.value;
177
+ if (typeof val === "number") return Math.floor(val);
178
+ if (typeof val === "bigint") return Number(val);
179
+ if (typeof val === "string") return parseInt(val, 10) || 0;
180
+ return 0;
181
+ } catch {
182
+ return 0;
183
+ } finally {
184
+ db.close();
185
+ }
186
+ }
187
+
188
+ async function queryCookieRows(
189
+ dbPath: string,
190
+ hosts: string[],
191
+ ): Promise<Array<Record<string, unknown>> | null> {
192
+ const sqlite = await importSqlite();
193
+ if (!sqlite) return null;
194
+
195
+ const clauses: string[] = [];
196
+ for (const host of hosts) {
197
+ for (const candidate of expandHosts(host)) {
198
+ const esc = candidate.replaceAll("'", "''");
199
+ clauses.push(`host_key = '${esc}'`);
200
+ clauses.push(`host_key = '.${esc}'`);
201
+ clauses.push(`host_key LIKE '%.${esc}'`);
202
+ }
203
+ }
204
+ const where = clauses.join(" OR ");
205
+
206
+ const opts: Record<string, unknown> = { readOnly: true };
207
+ if (supportsReadBigInts()) opts.readBigInts = true;
208
+ const db = new sqlite.DatabaseSync(dbPath, opts);
209
+ try {
210
+ return db
211
+ .prepare(
212
+ `SELECT name, value, host_key, encrypted_value FROM cookies WHERE (${where}) ORDER BY expires_utc DESC`,
213
+ )
214
+ .all() as Array<Record<string, unknown>>;
215
+ } catch {
216
+ return null;
217
+ } finally {
218
+ db.close();
219
+ }
220
+ }
221
+
222
+ function expandHosts(host: string): string[] {
223
+ const parts = host.split(".").filter(Boolean);
224
+ if (parts.length <= 1) return [host];
225
+ const candidates = new Set<string>();
226
+ candidates.add(host);
227
+ for (let i = 1; i <= parts.length - 2; i++) {
228
+ const c = parts.slice(i).join(".");
229
+ if (c) candidates.add(c);
230
+ }
231
+ return Array.from(candidates);
232
+ }
233
+
234
+ function copySidecar(srcDb: string, targetDb: string, suffix: string): void {
235
+ const sidecar = `${srcDb}${suffix}`;
236
+ if (!existsSync(sidecar)) return;
237
+ try {
238
+ copyFileSync(sidecar, `${targetDb}${suffix}`);
239
+ } catch {}
240
+ }