ai-browser-profile 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,330 @@
1
+ """Read cookies from a Chromium browser profile and inject into another.
2
+
3
+ Sibling to the ingestors in ai_browser_profile.ingestors. Lives in the
4
+ package but is NOT wired into extract_memories() — cookies are auth
5
+ secrets and must never land in memories.db.
6
+
7
+ Public API:
8
+ read_cookies(profile, domains=None) -> list[Cookie]
9
+ inject_via_cdp(cookies, cdp_url, ...) -> int
10
+
11
+ CLI:
12
+ python -m ai_browser_profile.cookies copy \\
13
+ --from chrome:Default \\
14
+ --to cdp://127.0.0.1:9555 \\
15
+ --domains github.com,linear.app
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import argparse
21
+ import hashlib
22
+ import json
23
+ import logging
24
+ import shutil
25
+ import sqlite3
26
+ import subprocess
27
+ import sys
28
+ import urllib.request
29
+ from dataclasses import dataclass
30
+ from pathlib import Path
31
+ from typing import Iterable, Optional
32
+
33
+ from ai_browser_profile.ingestors.browser_detect import (
34
+ BrowserProfile,
35
+ copy_db,
36
+ detect_browsers,
37
+ )
38
+
39
+ log = logging.getLogger(__name__)
40
+
41
+ KEYCHAIN_SERVICE = {
42
+ "chrome": "Chrome Safe Storage",
43
+ "arc": "Arc Safe Storage",
44
+ "brave": "Brave Safe Storage",
45
+ "edge": "Microsoft Edge Safe Storage",
46
+ "chromium": "Chromium Safe Storage",
47
+ }
48
+
49
+ PBKDF2_SALT = b"saltysalt"
50
+ PBKDF2_ITERATIONS = 1003
51
+ AES_KEY_LENGTH = 16
52
+ AES_IV = b" " * 16
53
+
54
+ SAMESITE_MAP = {-1: "Unspecified", 0: "None", 1: "Lax", 2: "Strict"}
55
+
56
+
57
+ @dataclass
58
+ class Cookie:
59
+ name: str
60
+ value: str
61
+ domain: str
62
+ path: str
63
+ expires: float
64
+ secure: bool
65
+ http_only: bool
66
+ same_site: str
67
+
68
+
69
+ def _keychain_password(browser: str) -> bytes:
70
+ service = KEYCHAIN_SERVICE.get(browser)
71
+ if not service:
72
+ raise ValueError(f"No keychain service mapped for browser {browser!r}")
73
+ res = subprocess.run(
74
+ ["security", "find-generic-password", "-w", "-s", service],
75
+ capture_output=True, text=True, check=False,
76
+ )
77
+ if res.returncode != 0:
78
+ raise RuntimeError(
79
+ f"Could not read {service!r} from Keychain: {res.stderr.strip() or 'access denied'}"
80
+ )
81
+ return res.stdout.strip().encode()
82
+
83
+
84
+ def _derive_key(password: bytes) -> bytes:
85
+ return hashlib.pbkdf2_hmac(
86
+ "sha1", password, PBKDF2_SALT, PBKDF2_ITERATIONS, AES_KEY_LENGTH
87
+ )
88
+
89
+
90
+ def _decrypt(encrypted: bytes, key: bytes, host_key: str) -> Optional[str]:
91
+ """Decrypt a Chromium cookie value. Returns None on failure."""
92
+ from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
93
+
94
+ if not encrypted:
95
+ return None
96
+ prefix = encrypted[:3]
97
+ if prefix in (b"v10", b"v11"):
98
+ payload = encrypted[3:]
99
+ else:
100
+ payload = encrypted
101
+ if len(payload) % 16 != 0:
102
+ return None
103
+ cipher = Cipher(algorithms.AES(key), modes.CBC(AES_IV))
104
+ dec = cipher.decryptor()
105
+ plain = dec.update(payload) + dec.finalize()
106
+ if not plain:
107
+ return None
108
+ pad = plain[-1]
109
+ if 1 <= pad <= 16 and plain.endswith(bytes([pad]) * pad):
110
+ plain = plain[:-pad]
111
+ # Chrome 80+ prepends SHA256(host_key) (32 bytes) to bind cookie to its host.
112
+ expected = hashlib.sha256(host_key.encode()).digest()
113
+ if plain.startswith(expected):
114
+ plain = plain[32:]
115
+ try:
116
+ return plain.decode("utf-8")
117
+ except UnicodeDecodeError:
118
+ return plain.decode("utf-8", errors="replace")
119
+
120
+
121
+ def read_cookies(
122
+ profile: BrowserProfile,
123
+ domains: Optional[Iterable[str]] = None,
124
+ ) -> list[Cookie]:
125
+ """Read and decrypt cookies from a Chromium browser profile.
126
+
127
+ Args:
128
+ profile: A Chromium profile from detect_browsers().
129
+ domains: Optional iterable of substrings; a cookie is kept if its
130
+ host_key contains any of them. None means all cookies.
131
+ """
132
+ if profile.browser in ("safari", "firefox"):
133
+ raise NotImplementedError(f"Cookie read not supported for {profile.browser}")
134
+
135
+ cookies_path = profile.path / "Cookies"
136
+ if not cookies_path.exists():
137
+ raise FileNotFoundError(f"No Cookies file at {cookies_path}")
138
+
139
+ tmp = copy_db(cookies_path)
140
+ if tmp is None:
141
+ raise RuntimeError(
142
+ f"Could not copy {cookies_path}. Grant Full Disk Access to your terminal and retry."
143
+ )
144
+
145
+ domain_filters = list(domains) if domains else None
146
+ key = _derive_key(_keychain_password(profile.browser))
147
+ cookies: list[Cookie] = []
148
+ skipped = 0
149
+ def _txt(b) -> str:
150
+ if b is None:
151
+ return ""
152
+ if isinstance(b, bytes):
153
+ return b.decode("utf-8", errors="replace")
154
+ return str(b)
155
+
156
+ try:
157
+ conn = sqlite3.connect(f"file:{tmp}?mode=ro", uri=True)
158
+ # Arc and some Chrome forks declare encrypted_value as TEXT, not BLOB,
159
+ # which makes sqlite3 try to UTF-8-decode the AES ciphertext and crash
160
+ # mid-iteration. Force everything to bytes and decode TEXT columns
161
+ # ourselves.
162
+ conn.text_factory = bytes
163
+ conn.row_factory = sqlite3.Row
164
+ rows = conn.execute(
165
+ "SELECT host_key, name, value, encrypted_value, path, expires_utc, "
166
+ "is_secure, is_httponly, samesite FROM cookies"
167
+ )
168
+ for row in rows:
169
+ host = _txt(row["host_key"])
170
+ if domain_filters and not any(d in host for d in domain_filters):
171
+ continue
172
+ value = _txt(row["value"])
173
+ if not value and row["encrypted_value"]:
174
+ value = _decrypt(row["encrypted_value"], key, host) or ""
175
+ if not value:
176
+ skipped += 1
177
+ continue
178
+ expires = 0.0
179
+ if row["expires_utc"]:
180
+ # Chromium epoch is 1601-01-01 in microseconds.
181
+ expires = (row["expires_utc"] / 1_000_000) - 11644473600
182
+ cookies.append(Cookie(
183
+ name=_txt(row["name"]),
184
+ value=value,
185
+ domain=host,
186
+ path=_txt(row["path"]) or "/",
187
+ expires=expires,
188
+ secure=bool(row["is_secure"]),
189
+ http_only=bool(row["is_httponly"]),
190
+ same_site=SAMESITE_MAP.get(row["samesite"], "Unspecified"),
191
+ ))
192
+ conn.close()
193
+ finally:
194
+ shutil.rmtree(tmp.parent, ignore_errors=True)
195
+
196
+ log.info(
197
+ "Read %d cookies from %s/%s (skipped %d undecryptable)",
198
+ len(cookies), profile.browser, profile.name, skipped,
199
+ )
200
+ return cookies
201
+
202
+
203
+ def _ws_from_cdp_url(cdp_url: str) -> str:
204
+ if cdp_url.startswith("ws://") or cdp_url.startswith("wss://"):
205
+ return cdp_url
206
+ if cdp_url.startswith("cdp://"):
207
+ cdp_url = "http://" + cdp_url[len("cdp://"):]
208
+ base = cdp_url.rstrip("/")
209
+ with urllib.request.urlopen(f"{base}/json/version", timeout=5) as r:
210
+ return json.loads(r.read())["webSocketDebuggerUrl"]
211
+
212
+
213
+ def inject_via_cdp(
214
+ cookies: Iterable[Cookie],
215
+ cdp_url: str = "http://127.0.0.1:9222",
216
+ ) -> int:
217
+ """Inject cookies into a running Chrome via CDP Storage.setCookies.
218
+
219
+ Args:
220
+ cookies: iterable of Cookie objects.
221
+ cdp_url: base http(s) URL of the Chrome DevTools endpoint, or a
222
+ cdp://host:port shorthand, or a raw ws:// URL.
223
+
224
+ Returns: number of cookies submitted (the browser accepts them as a batch).
225
+ """
226
+ from websocket import create_connection
227
+
228
+ ws_url = _ws_from_cdp_url(cdp_url)
229
+ # Chrome 111+ enforces CDP origin checking and rejects any Origin header
230
+ # unless the target was launched with --remote-allow-origins. Suppressing
231
+ # the header bypasses the check; localhost CDP is already privileged.
232
+ ws = create_connection(ws_url, timeout=10, suppress_origin=True)
233
+ try:
234
+ batch = []
235
+ for c in cookies:
236
+ param = {
237
+ "name": c.name,
238
+ "value": c.value,
239
+ "domain": c.domain,
240
+ "path": c.path or "/",
241
+ "secure": c.secure,
242
+ "httpOnly": c.http_only,
243
+ }
244
+ if c.same_site in ("Strict", "Lax", "None"):
245
+ param["sameSite"] = c.same_site
246
+ if c.expires > 0:
247
+ param["expires"] = c.expires
248
+ batch.append(param)
249
+ if not batch:
250
+ return 0
251
+ ws.send(json.dumps({
252
+ "id": 1,
253
+ "method": "Storage.setCookies",
254
+ "params": {"cookies": batch},
255
+ }))
256
+ resp = json.loads(ws.recv())
257
+ if "error" in resp:
258
+ log.warning("Storage.setCookies failed: %s", resp["error"])
259
+ return 0
260
+ finally:
261
+ ws.close()
262
+ log.info("Injected %d cookies via CDP", len(batch))
263
+ return len(batch)
264
+
265
+
266
+ # --- helpers used by CLI and external callers ---
267
+
268
+ def find_profile(spec: str) -> BrowserProfile:
269
+ """Resolve a 'browser:profile-name' spec (e.g. 'chrome:Default') to a BrowserProfile."""
270
+ if ":" in spec:
271
+ browser, name = spec.split(":", 1)
272
+ else:
273
+ browser, name = spec, "Default"
274
+ matches = [p for p in detect_browsers({browser}) if p.name == name]
275
+ if not matches:
276
+ available = [(p.browser, p.name) for p in detect_browsers({browser})]
277
+ raise SystemExit(
278
+ f"No profile {spec!r}. Available {browser} profiles: {available}"
279
+ )
280
+ return matches[0]
281
+
282
+
283
+ def _cli(argv: Optional[list[str]] = None) -> int:
284
+ parser = argparse.ArgumentParser(prog="python -m ai_browser_profile.cookies")
285
+ sub = parser.add_subparsers(dest="cmd", required=True)
286
+
287
+ cp = sub.add_parser("copy", help="copy cookies from a local profile into a running browser via CDP")
288
+ cp.add_argument("--from", dest="src", required=True,
289
+ help="source profile, e.g. chrome:Default or arc:'Profile 1'")
290
+ cp.add_argument("--to", dest="dst", required=True,
291
+ help="target CDP endpoint, e.g. cdp://127.0.0.1:9555 or http://127.0.0.1:9555")
292
+ cp.add_argument("--domains", default=None,
293
+ help="comma-separated list of host_key substrings to include")
294
+ cp.add_argument("-v", "--verbose", action="store_true")
295
+
296
+ ls = sub.add_parser("list", help="list cookies in a local profile (counts only — no values printed)")
297
+ ls.add_argument("--from", dest="src", required=True)
298
+ ls.add_argument("--domains", default=None)
299
+
300
+ args = parser.parse_args(argv)
301
+
302
+ logging.basicConfig(
303
+ level=logging.DEBUG if getattr(args, "verbose", False) else logging.INFO,
304
+ format="%(levelname)s %(message)s",
305
+ )
306
+
307
+ profile = find_profile(args.src)
308
+ domain_filters = [d.strip() for d in args.domains.split(",")] if args.domains else None
309
+
310
+ cookies = read_cookies(profile, domains=domain_filters)
311
+
312
+ if args.cmd == "list":
313
+ by_host: dict[str, int] = {}
314
+ for c in cookies:
315
+ by_host[c.domain] = by_host.get(c.domain, 0) + 1
316
+ for host, n in sorted(by_host.items(), key=lambda kv: -kv[1]):
317
+ print(f" {n:4} {host}")
318
+ print(f"Total: {len(cookies)} cookies across {len(by_host)} hosts")
319
+ return 0
320
+
321
+ if args.cmd == "copy":
322
+ n = inject_via_cdp(cookies, args.dst)
323
+ print(f"Injected {n}/{len(cookies)} cookies into {args.dst}")
324
+ return 0 if n > 0 else 2
325
+
326
+ return 1
327
+
328
+
329
+ if __name__ == "__main__":
330
+ sys.exit(_cli())
@@ -40,6 +40,7 @@ def extract_memories(memories_db_path: str = "memories.db",
40
40
  """
41
41
  total_start = time.monotonic()
42
42
  mem = MemoryDB(memories_db_path, defer_embeddings=True)
43
+ from ai_browser_profile.ingestors.browser_detect import permission_denied_paths
43
44
  profiles = detect_browsers(allowed=browsers)
44
45
  log.info(f"Extracting memories from {len(profiles)} profiles...")
45
46
 
@@ -80,6 +81,17 @@ def extract_memories(memories_db_path: str = "memories.db",
80
81
  run_cleanup(db_path=memories_db_path)
81
82
  mem = MemoryDB(memories_db_path, defer_embeddings=True)
82
83
  interim_profile = mem.profile_text()
84
+
85
+ # Emit structured browser summary so callers can show transparency
86
+ detected_browsers = sorted(set(p.browser for p in profiles))
87
+ denied_browsers = sorted(set(
88
+ p.browser for p in profiles
89
+ if any(str(p.path) in str(denied) for denied in permission_denied_paths)
90
+ ))
91
+ print(f"BROWSERS_SCANNED: {','.join(detected_browsers)}", flush=True)
92
+ if denied_browsers:
93
+ print(f"BROWSERS_PERMISSION_DENIED: {','.join(denied_browsers)}", flush=True)
94
+
83
95
  log.info(f"Interim profile ready (WhatsApp + embeddings still running):\n{interim_profile}")
84
96
 
85
97
  # 7. WhatsApp — contacts from IndexedDB (slow, runs last)