ai-browser-profile 1.0.5 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ai_browser_profile/cookies.py +330 -0
- package/ai_browser_profile/extract.py +12 -0
- package/ai_browser_profile/indexeddb.py +599 -0
- package/ai_browser_profile/ingestors/browser_detect.py +4 -0
- package/ai_browser_profile/localstorage.py +266 -0
- package/bin/cli.js +6 -2
- package/package.json +28 -1
- package/review/SKILL.md +1 -1
- package/skill/SKILL.md +26 -159
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
"""Read cookies from a Chromium browser profile and inject into another.
|
|
2
|
+
|
|
3
|
+
Sibling to the ingestors in ai_browser_profile.ingestors. Lives in the
|
|
4
|
+
package but is NOT wired into extract_memories() — cookies are auth
|
|
5
|
+
secrets and must never land in memories.db.
|
|
6
|
+
|
|
7
|
+
Public API:
|
|
8
|
+
read_cookies(profile, domains=None) -> list[Cookie]
|
|
9
|
+
inject_via_cdp(cookies, cdp_url, ...) -> int
|
|
10
|
+
|
|
11
|
+
CLI:
|
|
12
|
+
python -m ai_browser_profile.cookies copy \\
|
|
13
|
+
--from chrome:Default \\
|
|
14
|
+
--to cdp://127.0.0.1:9555 \\
|
|
15
|
+
--domains github.com,linear.app
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import argparse
|
|
21
|
+
import hashlib
|
|
22
|
+
import json
|
|
23
|
+
import logging
|
|
24
|
+
import shutil
|
|
25
|
+
import sqlite3
|
|
26
|
+
import subprocess
|
|
27
|
+
import sys
|
|
28
|
+
import urllib.request
|
|
29
|
+
from dataclasses import dataclass
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from typing import Iterable, Optional
|
|
32
|
+
|
|
33
|
+
from ai_browser_profile.ingestors.browser_detect import (
|
|
34
|
+
BrowserProfile,
|
|
35
|
+
copy_db,
|
|
36
|
+
detect_browsers,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
log = logging.getLogger(__name__)
|
|
40
|
+
|
|
41
|
+
KEYCHAIN_SERVICE = {
|
|
42
|
+
"chrome": "Chrome Safe Storage",
|
|
43
|
+
"arc": "Arc Safe Storage",
|
|
44
|
+
"brave": "Brave Safe Storage",
|
|
45
|
+
"edge": "Microsoft Edge Safe Storage",
|
|
46
|
+
"chromium": "Chromium Safe Storage",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
PBKDF2_SALT = b"saltysalt"
|
|
50
|
+
PBKDF2_ITERATIONS = 1003
|
|
51
|
+
AES_KEY_LENGTH = 16
|
|
52
|
+
AES_IV = b" " * 16
|
|
53
|
+
|
|
54
|
+
SAMESITE_MAP = {-1: "Unspecified", 0: "None", 1: "Lax", 2: "Strict"}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class Cookie:
|
|
59
|
+
name: str
|
|
60
|
+
value: str
|
|
61
|
+
domain: str
|
|
62
|
+
path: str
|
|
63
|
+
expires: float
|
|
64
|
+
secure: bool
|
|
65
|
+
http_only: bool
|
|
66
|
+
same_site: str
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _keychain_password(browser: str) -> bytes:
|
|
70
|
+
service = KEYCHAIN_SERVICE.get(browser)
|
|
71
|
+
if not service:
|
|
72
|
+
raise ValueError(f"No keychain service mapped for browser {browser!r}")
|
|
73
|
+
res = subprocess.run(
|
|
74
|
+
["security", "find-generic-password", "-w", "-s", service],
|
|
75
|
+
capture_output=True, text=True, check=False,
|
|
76
|
+
)
|
|
77
|
+
if res.returncode != 0:
|
|
78
|
+
raise RuntimeError(
|
|
79
|
+
f"Could not read {service!r} from Keychain: {res.stderr.strip() or 'access denied'}"
|
|
80
|
+
)
|
|
81
|
+
return res.stdout.strip().encode()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _derive_key(password: bytes) -> bytes:
|
|
85
|
+
return hashlib.pbkdf2_hmac(
|
|
86
|
+
"sha1", password, PBKDF2_SALT, PBKDF2_ITERATIONS, AES_KEY_LENGTH
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _decrypt(encrypted: bytes, key: bytes, host_key: str) -> Optional[str]:
|
|
91
|
+
"""Decrypt a Chromium cookie value. Returns None on failure."""
|
|
92
|
+
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
|
93
|
+
|
|
94
|
+
if not encrypted:
|
|
95
|
+
return None
|
|
96
|
+
prefix = encrypted[:3]
|
|
97
|
+
if prefix in (b"v10", b"v11"):
|
|
98
|
+
payload = encrypted[3:]
|
|
99
|
+
else:
|
|
100
|
+
payload = encrypted
|
|
101
|
+
if len(payload) % 16 != 0:
|
|
102
|
+
return None
|
|
103
|
+
cipher = Cipher(algorithms.AES(key), modes.CBC(AES_IV))
|
|
104
|
+
dec = cipher.decryptor()
|
|
105
|
+
plain = dec.update(payload) + dec.finalize()
|
|
106
|
+
if not plain:
|
|
107
|
+
return None
|
|
108
|
+
pad = plain[-1]
|
|
109
|
+
if 1 <= pad <= 16 and plain.endswith(bytes([pad]) * pad):
|
|
110
|
+
plain = plain[:-pad]
|
|
111
|
+
# Chrome 80+ prepends SHA256(host_key) (32 bytes) to bind cookie to its host.
|
|
112
|
+
expected = hashlib.sha256(host_key.encode()).digest()
|
|
113
|
+
if plain.startswith(expected):
|
|
114
|
+
plain = plain[32:]
|
|
115
|
+
try:
|
|
116
|
+
return plain.decode("utf-8")
|
|
117
|
+
except UnicodeDecodeError:
|
|
118
|
+
return plain.decode("utf-8", errors="replace")
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def read_cookies(
|
|
122
|
+
profile: BrowserProfile,
|
|
123
|
+
domains: Optional[Iterable[str]] = None,
|
|
124
|
+
) -> list[Cookie]:
|
|
125
|
+
"""Read and decrypt cookies from a Chromium browser profile.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
profile: A Chromium profile from detect_browsers().
|
|
129
|
+
domains: Optional iterable of substrings; a cookie is kept if its
|
|
130
|
+
host_key contains any of them. None means all cookies.
|
|
131
|
+
"""
|
|
132
|
+
if profile.browser in ("safari", "firefox"):
|
|
133
|
+
raise NotImplementedError(f"Cookie read not supported for {profile.browser}")
|
|
134
|
+
|
|
135
|
+
cookies_path = profile.path / "Cookies"
|
|
136
|
+
if not cookies_path.exists():
|
|
137
|
+
raise FileNotFoundError(f"No Cookies file at {cookies_path}")
|
|
138
|
+
|
|
139
|
+
tmp = copy_db(cookies_path)
|
|
140
|
+
if tmp is None:
|
|
141
|
+
raise RuntimeError(
|
|
142
|
+
f"Could not copy {cookies_path}. Grant Full Disk Access to your terminal and retry."
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
domain_filters = list(domains) if domains else None
|
|
146
|
+
key = _derive_key(_keychain_password(profile.browser))
|
|
147
|
+
cookies: list[Cookie] = []
|
|
148
|
+
skipped = 0
|
|
149
|
+
def _txt(b) -> str:
|
|
150
|
+
if b is None:
|
|
151
|
+
return ""
|
|
152
|
+
if isinstance(b, bytes):
|
|
153
|
+
return b.decode("utf-8", errors="replace")
|
|
154
|
+
return str(b)
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
conn = sqlite3.connect(f"file:{tmp}?mode=ro", uri=True)
|
|
158
|
+
# Arc and some Chrome forks declare encrypted_value as TEXT, not BLOB,
|
|
159
|
+
# which makes sqlite3 try to UTF-8-decode the AES ciphertext and crash
|
|
160
|
+
# mid-iteration. Force everything to bytes and decode TEXT columns
|
|
161
|
+
# ourselves.
|
|
162
|
+
conn.text_factory = bytes
|
|
163
|
+
conn.row_factory = sqlite3.Row
|
|
164
|
+
rows = conn.execute(
|
|
165
|
+
"SELECT host_key, name, value, encrypted_value, path, expires_utc, "
|
|
166
|
+
"is_secure, is_httponly, samesite FROM cookies"
|
|
167
|
+
)
|
|
168
|
+
for row in rows:
|
|
169
|
+
host = _txt(row["host_key"])
|
|
170
|
+
if domain_filters and not any(d in host for d in domain_filters):
|
|
171
|
+
continue
|
|
172
|
+
value = _txt(row["value"])
|
|
173
|
+
if not value and row["encrypted_value"]:
|
|
174
|
+
value = _decrypt(row["encrypted_value"], key, host) or ""
|
|
175
|
+
if not value:
|
|
176
|
+
skipped += 1
|
|
177
|
+
continue
|
|
178
|
+
expires = 0.0
|
|
179
|
+
if row["expires_utc"]:
|
|
180
|
+
# Chromium epoch is 1601-01-01 in microseconds.
|
|
181
|
+
expires = (row["expires_utc"] / 1_000_000) - 11644473600
|
|
182
|
+
cookies.append(Cookie(
|
|
183
|
+
name=_txt(row["name"]),
|
|
184
|
+
value=value,
|
|
185
|
+
domain=host,
|
|
186
|
+
path=_txt(row["path"]) or "/",
|
|
187
|
+
expires=expires,
|
|
188
|
+
secure=bool(row["is_secure"]),
|
|
189
|
+
http_only=bool(row["is_httponly"]),
|
|
190
|
+
same_site=SAMESITE_MAP.get(row["samesite"], "Unspecified"),
|
|
191
|
+
))
|
|
192
|
+
conn.close()
|
|
193
|
+
finally:
|
|
194
|
+
shutil.rmtree(tmp.parent, ignore_errors=True)
|
|
195
|
+
|
|
196
|
+
log.info(
|
|
197
|
+
"Read %d cookies from %s/%s (skipped %d undecryptable)",
|
|
198
|
+
len(cookies), profile.browser, profile.name, skipped,
|
|
199
|
+
)
|
|
200
|
+
return cookies
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _ws_from_cdp_url(cdp_url: str) -> str:
|
|
204
|
+
if cdp_url.startswith("ws://") or cdp_url.startswith("wss://"):
|
|
205
|
+
return cdp_url
|
|
206
|
+
if cdp_url.startswith("cdp://"):
|
|
207
|
+
cdp_url = "http://" + cdp_url[len("cdp://"):]
|
|
208
|
+
base = cdp_url.rstrip("/")
|
|
209
|
+
with urllib.request.urlopen(f"{base}/json/version", timeout=5) as r:
|
|
210
|
+
return json.loads(r.read())["webSocketDebuggerUrl"]
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def inject_via_cdp(
|
|
214
|
+
cookies: Iterable[Cookie],
|
|
215
|
+
cdp_url: str = "http://127.0.0.1:9222",
|
|
216
|
+
) -> int:
|
|
217
|
+
"""Inject cookies into a running Chrome via CDP Storage.setCookies.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
cookies: iterable of Cookie objects.
|
|
221
|
+
cdp_url: base http(s) URL of the Chrome DevTools endpoint, or a
|
|
222
|
+
cdp://host:port shorthand, or a raw ws:// URL.
|
|
223
|
+
|
|
224
|
+
Returns: number of cookies submitted (the browser accepts them as a batch).
|
|
225
|
+
"""
|
|
226
|
+
from websocket import create_connection
|
|
227
|
+
|
|
228
|
+
ws_url = _ws_from_cdp_url(cdp_url)
|
|
229
|
+
# Chrome 111+ enforces CDP origin checking and rejects any Origin header
|
|
230
|
+
# unless the target was launched with --remote-allow-origins. Suppressing
|
|
231
|
+
# the header bypasses the check; localhost CDP is already privileged.
|
|
232
|
+
ws = create_connection(ws_url, timeout=10, suppress_origin=True)
|
|
233
|
+
try:
|
|
234
|
+
batch = []
|
|
235
|
+
for c in cookies:
|
|
236
|
+
param = {
|
|
237
|
+
"name": c.name,
|
|
238
|
+
"value": c.value,
|
|
239
|
+
"domain": c.domain,
|
|
240
|
+
"path": c.path or "/",
|
|
241
|
+
"secure": c.secure,
|
|
242
|
+
"httpOnly": c.http_only,
|
|
243
|
+
}
|
|
244
|
+
if c.same_site in ("Strict", "Lax", "None"):
|
|
245
|
+
param["sameSite"] = c.same_site
|
|
246
|
+
if c.expires > 0:
|
|
247
|
+
param["expires"] = c.expires
|
|
248
|
+
batch.append(param)
|
|
249
|
+
if not batch:
|
|
250
|
+
return 0
|
|
251
|
+
ws.send(json.dumps({
|
|
252
|
+
"id": 1,
|
|
253
|
+
"method": "Storage.setCookies",
|
|
254
|
+
"params": {"cookies": batch},
|
|
255
|
+
}))
|
|
256
|
+
resp = json.loads(ws.recv())
|
|
257
|
+
if "error" in resp:
|
|
258
|
+
log.warning("Storage.setCookies failed: %s", resp["error"])
|
|
259
|
+
return 0
|
|
260
|
+
finally:
|
|
261
|
+
ws.close()
|
|
262
|
+
log.info("Injected %d cookies via CDP", len(batch))
|
|
263
|
+
return len(batch)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
# --- helpers used by CLI and external callers ---
|
|
267
|
+
|
|
268
|
+
def find_profile(spec: str) -> BrowserProfile:
|
|
269
|
+
"""Resolve a 'browser:profile-name' spec (e.g. 'chrome:Default') to a BrowserProfile."""
|
|
270
|
+
if ":" in spec:
|
|
271
|
+
browser, name = spec.split(":", 1)
|
|
272
|
+
else:
|
|
273
|
+
browser, name = spec, "Default"
|
|
274
|
+
matches = [p for p in detect_browsers({browser}) if p.name == name]
|
|
275
|
+
if not matches:
|
|
276
|
+
available = [(p.browser, p.name) for p in detect_browsers({browser})]
|
|
277
|
+
raise SystemExit(
|
|
278
|
+
f"No profile {spec!r}. Available {browser} profiles: {available}"
|
|
279
|
+
)
|
|
280
|
+
return matches[0]
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _cli(argv: Optional[list[str]] = None) -> int:
|
|
284
|
+
parser = argparse.ArgumentParser(prog="python -m ai_browser_profile.cookies")
|
|
285
|
+
sub = parser.add_subparsers(dest="cmd", required=True)
|
|
286
|
+
|
|
287
|
+
cp = sub.add_parser("copy", help="copy cookies from a local profile into a running browser via CDP")
|
|
288
|
+
cp.add_argument("--from", dest="src", required=True,
|
|
289
|
+
help="source profile, e.g. chrome:Default or arc:'Profile 1'")
|
|
290
|
+
cp.add_argument("--to", dest="dst", required=True,
|
|
291
|
+
help="target CDP endpoint, e.g. cdp://127.0.0.1:9555 or http://127.0.0.1:9555")
|
|
292
|
+
cp.add_argument("--domains", default=None,
|
|
293
|
+
help="comma-separated list of host_key substrings to include")
|
|
294
|
+
cp.add_argument("-v", "--verbose", action="store_true")
|
|
295
|
+
|
|
296
|
+
ls = sub.add_parser("list", help="list cookies in a local profile (counts only — no values printed)")
|
|
297
|
+
ls.add_argument("--from", dest="src", required=True)
|
|
298
|
+
ls.add_argument("--domains", default=None)
|
|
299
|
+
|
|
300
|
+
args = parser.parse_args(argv)
|
|
301
|
+
|
|
302
|
+
logging.basicConfig(
|
|
303
|
+
level=logging.DEBUG if getattr(args, "verbose", False) else logging.INFO,
|
|
304
|
+
format="%(levelname)s %(message)s",
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
profile = find_profile(args.src)
|
|
308
|
+
domain_filters = [d.strip() for d in args.domains.split(",")] if args.domains else None
|
|
309
|
+
|
|
310
|
+
cookies = read_cookies(profile, domains=domain_filters)
|
|
311
|
+
|
|
312
|
+
if args.cmd == "list":
|
|
313
|
+
by_host: dict[str, int] = {}
|
|
314
|
+
for c in cookies:
|
|
315
|
+
by_host[c.domain] = by_host.get(c.domain, 0) + 1
|
|
316
|
+
for host, n in sorted(by_host.items(), key=lambda kv: -kv[1]):
|
|
317
|
+
print(f" {n:4} {host}")
|
|
318
|
+
print(f"Total: {len(cookies)} cookies across {len(by_host)} hosts")
|
|
319
|
+
return 0
|
|
320
|
+
|
|
321
|
+
if args.cmd == "copy":
|
|
322
|
+
n = inject_via_cdp(cookies, args.dst)
|
|
323
|
+
print(f"Injected {n}/{len(cookies)} cookies into {args.dst}")
|
|
324
|
+
return 0 if n > 0 else 2
|
|
325
|
+
|
|
326
|
+
return 1
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
if __name__ == "__main__":
|
|
330
|
+
sys.exit(_cli())
|
|
@@ -40,6 +40,7 @@ def extract_memories(memories_db_path: str = "memories.db",
|
|
|
40
40
|
"""
|
|
41
41
|
total_start = time.monotonic()
|
|
42
42
|
mem = MemoryDB(memories_db_path, defer_embeddings=True)
|
|
43
|
+
from ai_browser_profile.ingestors.browser_detect import permission_denied_paths
|
|
43
44
|
profiles = detect_browsers(allowed=browsers)
|
|
44
45
|
log.info(f"Extracting memories from {len(profiles)} profiles...")
|
|
45
46
|
|
|
@@ -80,6 +81,17 @@ def extract_memories(memories_db_path: str = "memories.db",
|
|
|
80
81
|
run_cleanup(db_path=memories_db_path)
|
|
81
82
|
mem = MemoryDB(memories_db_path, defer_embeddings=True)
|
|
82
83
|
interim_profile = mem.profile_text()
|
|
84
|
+
|
|
85
|
+
# Emit structured browser summary so callers can show transparency
|
|
86
|
+
detected_browsers = sorted(set(p.browser for p in profiles))
|
|
87
|
+
denied_browsers = sorted(set(
|
|
88
|
+
p.browser for p in profiles
|
|
89
|
+
if any(str(p.path) in str(denied) for denied in permission_denied_paths)
|
|
90
|
+
))
|
|
91
|
+
print(f"BROWSERS_SCANNED: {','.join(detected_browsers)}", flush=True)
|
|
92
|
+
if denied_browsers:
|
|
93
|
+
print(f"BROWSERS_PERMISSION_DENIED: {','.join(denied_browsers)}", flush=True)
|
|
94
|
+
|
|
83
95
|
log.info(f"Interim profile ready (WhatsApp + embeddings still running):\n{interim_profile}")
|
|
84
96
|
|
|
85
97
|
# 7. WhatsApp — contacts from IndexedDB (slow, runs last)
|