ai-browser-profile 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,330 @@
1
+ """Read cookies from a Chromium browser profile and inject into another.
2
+
3
+ Sibling to the ingestors in ai_browser_profile.ingestors. Lives in the
4
+ package but is NOT wired into extract_memories() — cookies are auth
5
+ secrets and must never land in memories.db.
6
+
7
+ Public API:
8
+ read_cookies(profile, domains=None) -> list[Cookie]
9
+ inject_via_cdp(cookies, cdp_url, ...) -> int
10
+
11
+ CLI:
12
+ python -m ai_browser_profile.cookies copy \\
13
+ --from chrome:Default \\
14
+ --to cdp://127.0.0.1:9555 \\
15
+ --domains github.com,linear.app
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import argparse
21
+ import hashlib
22
+ import json
23
+ import logging
24
+ import shutil
25
+ import sqlite3
26
+ import subprocess
27
+ import sys
28
+ import urllib.request
29
+ from dataclasses import dataclass
30
+ from pathlib import Path
31
+ from typing import Iterable, Optional
32
+
33
+ from ai_browser_profile.ingestors.browser_detect import (
34
+ BrowserProfile,
35
+ copy_db,
36
+ detect_browsers,
37
+ )
38
+
39
+ log = logging.getLogger(__name__)
40
+
41
+ KEYCHAIN_SERVICE = {
42
+ "chrome": "Chrome Safe Storage",
43
+ "arc": "Arc Safe Storage",
44
+ "brave": "Brave Safe Storage",
45
+ "edge": "Microsoft Edge Safe Storage",
46
+ "chromium": "Chromium Safe Storage",
47
+ }
48
+
49
+ PBKDF2_SALT = b"saltysalt"
50
+ PBKDF2_ITERATIONS = 1003
51
+ AES_KEY_LENGTH = 16
52
+ AES_IV = b" " * 16
53
+
54
+ SAMESITE_MAP = {-1: "Unspecified", 0: "None", 1: "Lax", 2: "Strict"}
55
+
56
+
57
+ @dataclass
58
+ class Cookie:
59
+ name: str
60
+ value: str
61
+ domain: str
62
+ path: str
63
+ expires: float
64
+ secure: bool
65
+ http_only: bool
66
+ same_site: str
67
+
68
+
69
+ def _keychain_password(browser: str) -> bytes:
70
+ service = KEYCHAIN_SERVICE.get(browser)
71
+ if not service:
72
+ raise ValueError(f"No keychain service mapped for browser {browser!r}")
73
+ res = subprocess.run(
74
+ ["security", "find-generic-password", "-w", "-s", service],
75
+ capture_output=True, text=True, check=False,
76
+ )
77
+ if res.returncode != 0:
78
+ raise RuntimeError(
79
+ f"Could not read {service!r} from Keychain: {res.stderr.strip() or 'access denied'}"
80
+ )
81
+ return res.stdout.strip().encode()
82
+
83
+
84
+ def _derive_key(password: bytes) -> bytes:
85
+ return hashlib.pbkdf2_hmac(
86
+ "sha1", password, PBKDF2_SALT, PBKDF2_ITERATIONS, AES_KEY_LENGTH
87
+ )
88
+
89
+
90
+ def _decrypt(encrypted: bytes, key: bytes, host_key: str) -> Optional[str]:
91
+ """Decrypt a Chromium cookie value. Returns None on failure."""
92
+ from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
93
+
94
+ if not encrypted:
95
+ return None
96
+ prefix = encrypted[:3]
97
+ if prefix in (b"v10", b"v11"):
98
+ payload = encrypted[3:]
99
+ else:
100
+ payload = encrypted
101
+ if len(payload) % 16 != 0:
102
+ return None
103
+ cipher = Cipher(algorithms.AES(key), modes.CBC(AES_IV))
104
+ dec = cipher.decryptor()
105
+ plain = dec.update(payload) + dec.finalize()
106
+ if not plain:
107
+ return None
108
+ pad = plain[-1]
109
+ if 1 <= pad <= 16 and plain.endswith(bytes([pad]) * pad):
110
+ plain = plain[:-pad]
111
+ # Chrome 80+ prepends SHA256(host_key) (32 bytes) to bind cookie to its host.
112
+ expected = hashlib.sha256(host_key.encode()).digest()
113
+ if plain.startswith(expected):
114
+ plain = plain[32:]
115
+ try:
116
+ return plain.decode("utf-8")
117
+ except UnicodeDecodeError:
118
+ return plain.decode("utf-8", errors="replace")
119
+
120
+
121
+ def read_cookies(
122
+ profile: BrowserProfile,
123
+ domains: Optional[Iterable[str]] = None,
124
+ ) -> list[Cookie]:
125
+ """Read and decrypt cookies from a Chromium browser profile.
126
+
127
+ Args:
128
+ profile: A Chromium profile from detect_browsers().
129
+ domains: Optional iterable of substrings; a cookie is kept if its
130
+ host_key contains any of them. None means all cookies.
131
+ """
132
+ if profile.browser in ("safari", "firefox"):
133
+ raise NotImplementedError(f"Cookie read not supported for {profile.browser}")
134
+
135
+ cookies_path = profile.path / "Cookies"
136
+ if not cookies_path.exists():
137
+ raise FileNotFoundError(f"No Cookies file at {cookies_path}")
138
+
139
+ tmp = copy_db(cookies_path)
140
+ if tmp is None:
141
+ raise RuntimeError(
142
+ f"Could not copy {cookies_path}. Grant Full Disk Access to your terminal and retry."
143
+ )
144
+
145
+ domain_filters = list(domains) if domains else None
146
+ key = _derive_key(_keychain_password(profile.browser))
147
+ cookies: list[Cookie] = []
148
+ skipped = 0
149
+ def _txt(b) -> str:
150
+ if b is None:
151
+ return ""
152
+ if isinstance(b, bytes):
153
+ return b.decode("utf-8", errors="replace")
154
+ return str(b)
155
+
156
+ try:
157
+ conn = sqlite3.connect(f"file:{tmp}?mode=ro", uri=True)
158
+ # Arc and some Chrome forks declare encrypted_value as TEXT, not BLOB,
159
+ # which makes sqlite3 try to UTF-8-decode the AES ciphertext and crash
160
+ # mid-iteration. Force everything to bytes and decode TEXT columns
161
+ # ourselves.
162
+ conn.text_factory = bytes
163
+ conn.row_factory = sqlite3.Row
164
+ rows = conn.execute(
165
+ "SELECT host_key, name, value, encrypted_value, path, expires_utc, "
166
+ "is_secure, is_httponly, samesite FROM cookies"
167
+ )
168
+ for row in rows:
169
+ host = _txt(row["host_key"])
170
+ if domain_filters and not any(d in host for d in domain_filters):
171
+ continue
172
+ value = _txt(row["value"])
173
+ if not value and row["encrypted_value"]:
174
+ value = _decrypt(row["encrypted_value"], key, host) or ""
175
+ if not value:
176
+ skipped += 1
177
+ continue
178
+ expires = 0.0
179
+ if row["expires_utc"]:
180
+ # Chromium epoch is 1601-01-01 in microseconds.
181
+ expires = (row["expires_utc"] / 1_000_000) - 11644473600
182
+ cookies.append(Cookie(
183
+ name=_txt(row["name"]),
184
+ value=value,
185
+ domain=host,
186
+ path=_txt(row["path"]) or "/",
187
+ expires=expires,
188
+ secure=bool(row["is_secure"]),
189
+ http_only=bool(row["is_httponly"]),
190
+ same_site=SAMESITE_MAP.get(row["samesite"], "Unspecified"),
191
+ ))
192
+ conn.close()
193
+ finally:
194
+ shutil.rmtree(tmp.parent, ignore_errors=True)
195
+
196
+ log.info(
197
+ "Read %d cookies from %s/%s (skipped %d undecryptable)",
198
+ len(cookies), profile.browser, profile.name, skipped,
199
+ )
200
+ return cookies
201
+
202
+
203
+ def _ws_from_cdp_url(cdp_url: str) -> str:
204
+ if cdp_url.startswith("ws://") or cdp_url.startswith("wss://"):
205
+ return cdp_url
206
+ if cdp_url.startswith("cdp://"):
207
+ cdp_url = "http://" + cdp_url[len("cdp://"):]
208
+ base = cdp_url.rstrip("/")
209
+ with urllib.request.urlopen(f"{base}/json/version", timeout=5) as r:
210
+ return json.loads(r.read())["webSocketDebuggerUrl"]
211
+
212
+
213
+ def inject_via_cdp(
214
+ cookies: Iterable[Cookie],
215
+ cdp_url: str = "http://127.0.0.1:9222",
216
+ ) -> int:
217
+ """Inject cookies into a running Chrome via CDP Storage.setCookies.
218
+
219
+ Args:
220
+ cookies: iterable of Cookie objects.
221
+ cdp_url: base http(s) URL of the Chrome DevTools endpoint, or a
222
+ cdp://host:port shorthand, or a raw ws:// URL.
223
+
224
+ Returns: number of cookies submitted (the browser accepts them as a batch).
225
+ """
226
+ from websocket import create_connection
227
+
228
+ ws_url = _ws_from_cdp_url(cdp_url)
229
+ # Chrome 111+ enforces CDP origin checking and rejects any Origin header
230
+ # unless the target was launched with --remote-allow-origins. Suppressing
231
+ # the header bypasses the check; localhost CDP is already privileged.
232
+ ws = create_connection(ws_url, timeout=10, suppress_origin=True)
233
+ try:
234
+ batch = []
235
+ for c in cookies:
236
+ param = {
237
+ "name": c.name,
238
+ "value": c.value,
239
+ "domain": c.domain,
240
+ "path": c.path or "/",
241
+ "secure": c.secure,
242
+ "httpOnly": c.http_only,
243
+ }
244
+ if c.same_site in ("Strict", "Lax", "None"):
245
+ param["sameSite"] = c.same_site
246
+ if c.expires > 0:
247
+ param["expires"] = c.expires
248
+ batch.append(param)
249
+ if not batch:
250
+ return 0
251
+ ws.send(json.dumps({
252
+ "id": 1,
253
+ "method": "Storage.setCookies",
254
+ "params": {"cookies": batch},
255
+ }))
256
+ resp = json.loads(ws.recv())
257
+ if "error" in resp:
258
+ log.warning("Storage.setCookies failed: %s", resp["error"])
259
+ return 0
260
+ finally:
261
+ ws.close()
262
+ log.info("Injected %d cookies via CDP", len(batch))
263
+ return len(batch)
264
+
265
+
266
+ # --- helpers used by CLI and external callers ---
267
+
268
+ def find_profile(spec: str) -> BrowserProfile:
269
+ """Resolve a 'browser:profile-name' spec (e.g. 'chrome:Default') to a BrowserProfile."""
270
+ if ":" in spec:
271
+ browser, name = spec.split(":", 1)
272
+ else:
273
+ browser, name = spec, "Default"
274
+ matches = [p for p in detect_browsers({browser}) if p.name == name]
275
+ if not matches:
276
+ available = [(p.browser, p.name) for p in detect_browsers({browser})]
277
+ raise SystemExit(
278
+ f"No profile {spec!r}. Available {browser} profiles: {available}"
279
+ )
280
+ return matches[0]
281
+
282
+
283
+ def _cli(argv: Optional[list[str]] = None) -> int:
284
+ parser = argparse.ArgumentParser(prog="python -m ai_browser_profile.cookies")
285
+ sub = parser.add_subparsers(dest="cmd", required=True)
286
+
287
+ cp = sub.add_parser("copy", help="copy cookies from a local profile into a running browser via CDP")
288
+ cp.add_argument("--from", dest="src", required=True,
289
+ help="source profile, e.g. chrome:Default or arc:'Profile 1'")
290
+ cp.add_argument("--to", dest="dst", required=True,
291
+ help="target CDP endpoint, e.g. cdp://127.0.0.1:9555 or http://127.0.0.1:9555")
292
+ cp.add_argument("--domains", default=None,
293
+ help="comma-separated list of host_key substrings to include")
294
+ cp.add_argument("-v", "--verbose", action="store_true")
295
+
296
+ ls = sub.add_parser("list", help="list cookies in a local profile (counts only — no values printed)")
297
+ ls.add_argument("--from", dest="src", required=True)
298
+ ls.add_argument("--domains", default=None)
299
+
300
+ args = parser.parse_args(argv)
301
+
302
+ logging.basicConfig(
303
+ level=logging.DEBUG if getattr(args, "verbose", False) else logging.INFO,
304
+ format="%(levelname)s %(message)s",
305
+ )
306
+
307
+ profile = find_profile(args.src)
308
+ domain_filters = [d.strip() for d in args.domains.split(",")] if args.domains else None
309
+
310
+ cookies = read_cookies(profile, domains=domain_filters)
311
+
312
+ if args.cmd == "list":
313
+ by_host: dict[str, int] = {}
314
+ for c in cookies:
315
+ by_host[c.domain] = by_host.get(c.domain, 0) + 1
316
+ for host, n in sorted(by_host.items(), key=lambda kv: -kv[1]):
317
+ print(f" {n:4} {host}")
318
+ print(f"Total: {len(cookies)} cookies across {len(by_host)} hosts")
319
+ return 0
320
+
321
+ if args.cmd == "copy":
322
+ n = inject_via_cdp(cookies, args.dst)
323
+ print(f"Injected {n}/{len(cookies)} cookies into {args.dst}")
324
+ return 0 if n > 0 else 2
325
+
326
+ return 1
327
+
328
+
329
+ if __name__ == "__main__":
330
+ sys.exit(_cli())
@@ -40,6 +40,7 @@ def extract_memories(memories_db_path: str = "memories.db",
40
40
  """
41
41
  total_start = time.monotonic()
42
42
  mem = MemoryDB(memories_db_path, defer_embeddings=True)
43
+ from ai_browser_profile.ingestors.browser_detect import permission_denied_paths
43
44
  profiles = detect_browsers(allowed=browsers)
44
45
  log.info(f"Extracting memories from {len(profiles)} profiles...")
45
46
 
@@ -80,6 +81,17 @@ def extract_memories(memories_db_path: str = "memories.db",
80
81
  run_cleanup(db_path=memories_db_path)
81
82
  mem = MemoryDB(memories_db_path, defer_embeddings=True)
82
83
  interim_profile = mem.profile_text()
84
+
85
+ # Emit structured browser summary so callers can show transparency
86
+ detected_browsers = sorted(set(p.browser for p in profiles))
87
+ denied_browsers = sorted(set(
88
+ p.browser for p in profiles
89
+ if any(str(p.path) in str(denied) for denied in permission_denied_paths)
90
+ ))
91
+ print(f"BROWSERS_SCANNED: {','.join(detected_browsers)}", flush=True)
92
+ if denied_browsers:
93
+ print(f"BROWSERS_PERMISSION_DENIED: {','.join(denied_browsers)}", flush=True)
94
+
83
95
  log.info(f"Interim profile ready (WhatsApp + embeddings still running):\n{interim_profile}")
84
96
 
85
97
  # 7. WhatsApp — contacts from IndexedDB (slow, runs last)
@@ -13,6 +13,9 @@ log = logging.getLogger(__name__)
13
13
 
14
14
  APP_SUPPORT = Path.home() / "Library" / "Application Support"
15
15
 
16
+ # Populated by copy_db() when a file can't be read due to TCC permissions
17
+ permission_denied_paths: list[Path] = []
18
+
16
19
 
17
20
  @dataclass
18
21
  class BrowserProfile:
@@ -89,6 +92,7 @@ def copy_db(src: Path) -> Optional[Path]:
89
92
  return dst
90
93
  except PermissionError:
91
94
  log.warning(f"Permission denied reading {src} — grant Full Disk Access or skip")
95
+ permission_denied_paths.append(src)
92
96
  return None
93
97
 
94
98
 
@@ -0,0 +1,266 @@
1
+ """Read localStorage from a Chromium browser profile and inject into another.
2
+
3
+ Sibling to cookies.py. Like cookies.py, this is NOT wired into
4
+ extract_memories() — localStorage values can include auth tokens and
5
+ must not land in memories.db.
6
+
7
+ Public API:
8
+ read_localstorage(profile, origins=None) -> dict[origin, dict[key, value]]
9
+ inject_localstorage_via_cdp(data, cdp_url, ...) -> int
10
+
11
+ CLI:
12
+ python -m ai_browser_profile.localstorage copy \\
13
+ --from chrome:Profile\\ 1 \\
14
+ --to cdp://127.0.0.1:9555 \\
15
+ --origins chatgpt.com,notion.so
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import argparse
21
+ import json
22
+ import logging
23
+ import shutil
24
+ import sys
25
+ import tempfile
26
+ import time
27
+ from pathlib import Path
28
+ from typing import Iterable, Optional
29
+
30
+ from ai_browser_profile.ingestors.browser_detect import BrowserProfile
31
+ from ai_browser_profile.cookies import _ws_from_cdp_url, find_profile
32
+
33
+ log = logging.getLogger(__name__)
34
+
35
+
36
+ def read_localstorage(
37
+ profile: BrowserProfile,
38
+ origins: Optional[Iterable[str]] = None,
39
+ ) -> dict[str, dict[str, str]]:
40
+ """Read localStorage from a Chromium profile's LevelDB.
41
+
42
+ Args:
43
+ profile: Chromium profile from detect_browsers().
44
+ origins: Optional iterable of substrings; an origin is kept if any
45
+ substring matches its storage_key (e.g. 'chatgpt.com'
46
+ matches 'https://chatgpt.com'). None = all origins.
47
+
48
+ Returns: dict mapping origin (e.g. 'https://chatgpt.com') to dict of key/value.
49
+ """
50
+ if profile.browser in ("safari", "firefox"):
51
+ raise NotImplementedError(f"localStorage read not supported for {profile.browser}")
52
+
53
+ ls_dir = profile.path / "Local Storage" / "leveldb"
54
+ if not ls_dir.exists():
55
+ raise FileNotFoundError(f"No Local Storage/leveldb at {ls_dir}")
56
+
57
+ tmp = Path(tempfile.mkdtemp(prefix="ai_browser_profile_ls_"))
58
+ tmp_ls = tmp / "leveldb"
59
+ try:
60
+ shutil.copytree(ls_dir, tmp_ls)
61
+ except Exception as e:
62
+ shutil.rmtree(tmp, ignore_errors=True)
63
+ raise RuntimeError(f"Could not copy {ls_dir}: {e}") from e
64
+
65
+ origin_filters = list(origins) if origins else None
66
+ out: dict[str, dict[str, str]] = {}
67
+ skipped = 0
68
+
69
+ try:
70
+ from ccl_chromium_reader import ccl_chromium_localstorage
71
+
72
+ ldb = ccl_chromium_localstorage.LocalStoreDb(tmp_ls)
73
+ for record in ldb.iter_all_records():
74
+ try:
75
+ origin = record.storage_key or ""
76
+ key = record.script_key or ""
77
+ value = record.value
78
+ if not origin or not key or value is None:
79
+ continue
80
+ if origin_filters and not any(f in origin for f in origin_filters):
81
+ continue
82
+ if isinstance(value, bytes):
83
+ try:
84
+ value = value.decode("utf-8")
85
+ except UnicodeDecodeError:
86
+ skipped += 1
87
+ continue
88
+ elif not isinstance(value, str):
89
+ value = str(value)
90
+ out.setdefault(origin, {})[key] = value
91
+ except Exception:
92
+ skipped += 1
93
+ continue
94
+ finally:
95
+ shutil.rmtree(tmp, ignore_errors=True)
96
+
97
+ total = sum(len(v) for v in out.values())
98
+ log.info(
99
+ "Read %d localStorage items across %d origins from %s/%s (skipped %d)",
100
+ total, len(out), profile.browser, profile.name, skipped,
101
+ )
102
+ return out
103
+
104
+
105
+ def _cdp_send(ws, msg_id: int, method: str,
106
+ params: Optional[dict] = None,
107
+ session_id: Optional[str] = None) -> dict:
108
+ """Send a CDP message and drain events until the matching reply arrives."""
109
+ msg: dict = {"id": msg_id, "method": method}
110
+ if params:
111
+ msg["params"] = params
112
+ if session_id:
113
+ msg["sessionId"] = session_id
114
+ ws.send(json.dumps(msg))
115
+ deadline = time.time() + 20
116
+ while time.time() < deadline:
117
+ resp = json.loads(ws.recv())
118
+ if resp.get("id") == msg_id:
119
+ return resp
120
+ raise TimeoutError(f"CDP {method} timed out")
121
+
122
+
123
+ def inject_localstorage_via_cdp(
124
+ data: dict[str, dict[str, str]],
125
+ cdp_url: str = "http://127.0.0.1:9222",
126
+ load_wait_sec: float = 4.0,
127
+ ) -> int:
128
+ """Inject localStorage into a running Chrome via per-origin tabs.
129
+
130
+ For each origin: opens a new tab to that origin (so the JS context is
131
+ same-origin), waits for load, evaluates a localStorage.setItem batch via
132
+ Runtime.evaluate, then closes the tab. Returns total items written.
133
+
134
+ Args:
135
+ data: dict of {origin -> {key: value}}. Origin must be http(s)://...
136
+ cdp_url: base http(s) URL of the Chrome DevTools endpoint or a
137
+ cdp://host:port shorthand.
138
+ load_wait_sec: how long to wait between tab open and the JS eval to
139
+ let the page initialize (no Page.loadEventFired listener
140
+ yet — keep simple, race-tolerant via the JS try/catch).
141
+ """
142
+ from websocket import create_connection
143
+
144
+ ws_url = _ws_from_cdp_url(cdp_url)
145
+ ws = create_connection(ws_url, timeout=15, suppress_origin=True)
146
+ msg_id = 0
147
+ total_set = 0
148
+
149
+ try:
150
+ for origin, items in data.items():
151
+ if not items:
152
+ continue
153
+ if not origin.startswith("http"):
154
+ log.warning("Skipping non-http origin %r", origin)
155
+ continue
156
+ # Skip partitioned-storage origins like
157
+ # 'https://www.youtube.com/^0https://openai.com'. The '^' marker
158
+ # is Chromium's third-party storage partitioning; we can't navigate
159
+ # to that as a top-level URL.
160
+ if "^" in origin:
161
+ log.info("Skipping partitioned origin %r", origin)
162
+ continue
163
+ url = origin.rstrip("/") + "/"
164
+
165
+ target_id = None
166
+ try:
167
+ msg_id += 1
168
+ r = _cdp_send(ws, msg_id, "Target.createTarget", {"url": url})
169
+ target_id = r.get("result", {}).get("targetId")
170
+ if not target_id:
171
+ log.warning("createTarget failed for %s: %s", origin, r.get("error"))
172
+ continue
173
+
174
+ msg_id += 1
175
+ r = _cdp_send(ws, msg_id, "Target.attachToTarget",
176
+ {"targetId": target_id, "flatten": True})
177
+ session_id = r.get("result", {}).get("sessionId")
178
+ if not session_id:
179
+ log.warning("attachToTarget failed for %s", origin)
180
+ continue
181
+
182
+ time.sleep(load_wait_sec)
183
+
184
+ # Inline the items as a JS object literal; localStorage rejects
185
+ # non-string values implicitly by coercion (we already string-
186
+ # coerced in read_localstorage).
187
+ expr = (
188
+ "(function(){try{var items=" + json.dumps(items) + ";"
189
+ "var n=0;for(var k in items){try{localStorage.setItem(k,items[k]);n++;}catch(e){}}"
190
+ "return n;}catch(e){return 'ERROR:'+e.toString();}})()"
191
+ )
192
+ msg_id += 1
193
+ r = _cdp_send(
194
+ ws, msg_id, "Runtime.evaluate",
195
+ {"expression": expr, "returnByValue": True},
196
+ session_id=session_id,
197
+ )
198
+ value = r.get("result", {}).get("result", {}).get("value")
199
+ if isinstance(value, int):
200
+ total_set += value
201
+ log.info(" %s: set %d/%d items", origin, value, len(items))
202
+ else:
203
+ log.warning(" %s: %s", origin, value)
204
+ finally:
205
+ if target_id:
206
+ try:
207
+ msg_id += 1
208
+ _cdp_send(ws, msg_id, "Target.closeTarget", {"targetId": target_id})
209
+ except Exception:
210
+ pass
211
+ finally:
212
+ ws.close()
213
+
214
+ log.info("Injected %d localStorage items total", total_set)
215
+ return total_set
216
+
217
+
218
+ def _cli(argv: Optional[list[str]] = None) -> int:
219
+ parser = argparse.ArgumentParser(prog="python -m ai_browser_profile.localstorage")
220
+ sub = parser.add_subparsers(dest="cmd", required=True)
221
+
222
+ cp = sub.add_parser("copy", help="copy localStorage from a local profile into a running browser via CDP")
223
+ cp.add_argument("--from", dest="src", required=True,
224
+ help="source profile, e.g. chrome:Default or 'chrome:Profile 1'")
225
+ cp.add_argument("--to", dest="dst", required=True,
226
+ help="target CDP endpoint, e.g. cdp://127.0.0.1:9555")
227
+ cp.add_argument("--origins", default=None,
228
+ help="comma-separated host substrings (e.g. 'chatgpt.com,notion.so')")
229
+ cp.add_argument("--load-wait", type=float, default=4.0,
230
+ help="seconds to wait after opening each tab before injecting (default 4)")
231
+ cp.add_argument("-v", "--verbose", action="store_true")
232
+
233
+ ls = sub.add_parser("list", help="list localStorage origins (counts only — no values printed)")
234
+ ls.add_argument("--from", dest="src", required=True)
235
+ ls.add_argument("--origins", default=None)
236
+
237
+ args = parser.parse_args(argv)
238
+
239
+ logging.basicConfig(
240
+ level=logging.DEBUG if getattr(args, "verbose", False) else logging.INFO,
241
+ format="%(levelname)s %(message)s",
242
+ )
243
+
244
+ profile = find_profile(args.src)
245
+ origin_filters = [o.strip() for o in args.origins.split(",")] if args.origins else None
246
+
247
+ data = read_localstorage(profile, origins=origin_filters)
248
+
249
+ if args.cmd == "list":
250
+ for origin, items in sorted(data.items(), key=lambda kv: -len(kv[1])):
251
+ print(f" {len(items):4} {origin}")
252
+ total = sum(len(v) for v in data.values())
253
+ print(f"Total: {total} items across {len(data)} origins")
254
+ return 0
255
+
256
+ if args.cmd == "copy":
257
+ n = inject_localstorage_via_cdp(data, args.dst, load_wait_sec=args.load_wait)
258
+ total = sum(len(v) for v in data.values())
259
+ print(f"Injected {n}/{total} localStorage items into {args.dst}")
260
+ return 0 if n > 0 else 2
261
+
262
+ return 1
263
+
264
+
265
+ if __name__ == "__main__":
266
+ sys.exit(_cli())
package/bin/cli.js CHANGED
@@ -25,11 +25,15 @@ const COPY_TARGETS = [
25
25
  // Never overwrite these during update
26
26
  const NEVER_OVERWRITE = new Set(['memories.db', '.venv', 'scripts', 'config.json']);
27
27
 
28
- // Core Python deps (tier 1) — enough for tag search, SQL, extraction
29
- // ccl_chromium_reader is only on GitHub, not PyPI
28
+ // Core Python deps (tier 1) — enough for tag search, SQL, extraction,
29
+ // plus cookies + localStorage sync (cryptography for AES-CBC decrypt of
30
+ // Chromium cookie blobs, websocket-client for CDP injection).
31
+ // ccl_chromium_reader is only on GitHub, not PyPI.
30
32
  const CORE_DEPS = [
31
33
  'git+https://github.com/cclgroupltd/ccl_chromium_reader.git',
32
34
  'numpy',
35
+ 'cryptography',
36
+ 'websocket-client',
33
37
  ];
34
38
 
35
39
  // Embedding deps (tier 2) — optional, for semantic search
package/package.json CHANGED
@@ -1,10 +1,16 @@
1
1
  {
2
2
  "name": "ai-browser-profile",
3
- "version": "1.0.5",
3
+ "version": "1.0.6",
4
4
  "description": "Extract user identity (name, emails, accounts, addresses, payments) from browser data into a self-ranking SQLite database. Install as a Claude Code agent skill.",
5
5
  "bin": {
6
6
  "ai-browser-profile": "bin/cli.js"
7
7
  },
8
+ "scripts": {
9
+ "dev": "next dev",
10
+ "build": "next build",
11
+ "start": "next start",
12
+ "typecheck": "tsc --noEmit"
13
+ },
8
14
  "files": [
9
15
  "bin/",
10
16
  "ai_browser_profile/**/*.py",
@@ -36,5 +42,26 @@
36
42
  "homepage": "https://github.com/m13v/ai-browser-profile",
37
43
  "engines": {
38
44
  "node": ">=16"
45
+ },
46
+ "devDependencies": {
47
+ "@assistant-ui/react": "^0.12.25",
48
+ "@google/generative-ai": "^0.24.1",
49
+ "@m13v/seo-components": "^0.40.0",
50
+ "@remotion/player": "^4.0.446",
51
+ "@seo/components": "npm:@m13v/seo-components@^0.40.0",
52
+ "@supabase/supabase-js": "^2.103.3",
53
+ "@tailwindcss/postcss": "^4",
54
+ "@types/node": "^20",
55
+ "@types/react": "^19",
56
+ "@types/react-dom": "^19",
57
+ "framer-motion": "^12.38.0",
58
+ "lottie-react": "^2.4.1",
59
+ "next": "16.2.2",
60
+ "posthog-js": "^1.369.2",
61
+ "react": "19.2.4",
62
+ "react-dom": "19.2.4",
63
+ "remotion": "^4.0.446",
64
+ "tailwindcss": "^4",
65
+ "typescript": "^5"
39
66
  }
40
67
  }
package/review/SKILL.md CHANGED
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: memory-review
3
- description: "Review and clean unreviewed memories in the database. Removes junk, merges duplicates, fixes miskeyed data, and marks good entries as reviewed. Run periodically after extraction."
3
+ description: "Use when the user says 'review memories', 'clean up the memory database', 'process unreviewed memories', 'memory cleanup', 'merge duplicate memories', or wants periodic LLM-powered post-ingestion review. Removes junk, merges duplicates, fixes miskeyed data, marks good entries as reviewed."
4
4
  ---
5
5
 
6
6
  # Memory Review
package/skill/SKILL.md CHANGED
@@ -1,180 +1,47 @@
1
1
  ---
2
2
  name: ai-browser-profile
3
- description: "Query the user's AI browser profile: identity, accounts, tools, contacts, addresses, payments extracted from browser data. Use when you need context about the user to help with any task: form filling, emailing, booking, payments, or any task where knowing the user's info helps."
3
+ description: "Query the user's browser-extracted profile: identity, accounts, tools, contacts, addresses, payments. Use when the user asks about their own info or you need personal context."
4
4
  ---
5
5
 
6
6
  # AI Browser Profile
7
7
 
8
- A self-ranking database of everything learned about the user from browser data. Memories are ranked by how often they're accessed vs how often they appear in search results frequently useful memories rise, noise sinks.
8
+ Locally-extracted profile of the user built from their browser data (autofill, saved logins, history, bookmarks, WhatsApp, LinkedIn). Stored in `~/ai-browser-profile/memories.db`. Nothing leaves the machine.
9
9
 
10
- ## Quick Reference
10
+ ## When to use `query_browser_profile`
11
11
 
12
- | Item | Value |
13
- |------|-------|
14
- | Database | `~/ai-browser-profile/memories.db` |
15
- | Module | `~/ai-browser-profile/ai_browser_profile/` |
16
- | Python | `~/ai-browser-profile/.venv/bin/python` |
17
- | Rebuild | `~/ai-browser-profile/.venv/bin/python ~/ai-browser-profile/extract.py` |
12
+ Use this tool proactively whenever the user asks about themselves or you need personal context:
18
13
 
19
- ## How to Use
14
+ | User asks... | Query |
15
+ |---|---|
16
+ | "What's my email?" | query: "email address", tags: ["contact_info"] |
17
+ | "What accounts do I have?" | query: "saved accounts", tags: ["account"] |
18
+ | "What tools do I use?" | query: "tools and services", tags: ["tool"] |
19
+ | "Find contact X" | query: "X", tags: ["contact"] |
20
+ | "What's my address?" | query: "home address", tags: ["address"] |
21
+ | "What card do I use?" | query: "payment card", tags: ["payment"] |
22
+ | "Who am I?" / profile | query: "profile", tags: ["identity"] |
20
23
 
21
- ### User profile (start here)
24
+ ## Tool parameters
22
25
 
23
- Get a compact overview of the user — name, emails, addresses, accounts, tools, contacts. This is deterministic (no LLM) and computed from the database. Use it as baseline context before doing any task.
24
-
25
- ```python
26
- import sys, os
27
- sys.path.insert(0, os.path.expanduser("~/ai-browser-profile"))
28
- from ai_browser_profile import MemoryDB
29
-
30
- mem = MemoryDB(os.path.expanduser("~/ai-browser-profile/memories.db"))
31
- print(mem.profile_text()) # markdown formatted, ~1.5KB
32
- mem.close()
33
- ```
34
-
35
- The profile shows: name, all known emails, phone numbers, handles, addresses, payment info, companies, top tools/services, accounts grouped by email, Notion projects, and contact count. Values are ranked by frequency across browser profiles — higher frequency = more likely to be the user's own data.
36
-
37
- ### Search by tags
38
-
39
- ```python
40
- import sys, os
41
- sys.path.insert(0, os.path.expanduser("~/ai-browser-profile"))
42
- from ai_browser_profile import MemoryDB
43
-
44
- mem = MemoryDB(os.path.expanduser("~/ai-browser-profile/memories.db"))
45
-
46
- # Search returns results ranked by hit_rate (accessed/appeared), then counts
47
- # accessed_count and appeared_count are auto-incremented on every search call
48
- results = mem.search(["identity", "contact_info"], limit=10)
49
- for r in results:
50
- print(f'{r["key"]}: {r["value"]}')
51
-
52
- mem.close()
53
26
  ```
54
-
55
- ### Semantic search (natural language)
56
-
57
- ```python
58
- # Find memories by meaning, not just keywords
59
- results = mem.semantic_search("what products does the user build")
60
- for r in results[:5]:
61
- print(f'{r["key"]}: {r["value"][:80]} (sim={r["similarity"]:.3f})')
62
-
63
- # Falls back to text_search() if embeddings not installed
64
- # Install with: npx ai-browser-profile install-embeddings
65
- ```
66
-
67
- ### Quick SQL queries
68
-
69
- ```bash
70
- sqlite3 ~/ai-browser-profile/memories.db
71
- ```
72
-
73
- ```sql
74
- -- All identity info
75
- SELECT m.key, m.value FROM memories m
76
- JOIN memory_tags t ON m.id = t.memory_id WHERE t.tag = 'identity'
77
- AND m.superseded_by IS NULL;
78
-
79
- -- All contact info (emails, phones)
80
- SELECT m.key, m.value, m.source FROM memories m
81
- JOIN memory_tags t ON m.id = t.memory_id WHERE t.tag = 'contact_info'
82
- AND m.superseded_by IS NULL;
83
-
84
- -- All contacts
85
- SELECT m.key, m.value FROM memories m
86
- JOIN memory_tags t ON m.id = t.memory_id WHERE t.tag = 'contact'
87
- AND m.superseded_by IS NULL
88
- ORDER BY m.accessed_count DESC;
89
-
90
- -- Most accessed memories (the ones that proved useful)
91
- SELECT key, value, accessed_count, appeared_count,
92
- CAST(accessed_count AS REAL) / MAX(appeared_count, 1) AS hit_rate
93
- FROM memories WHERE accessed_count > 0
94
- ORDER BY hit_rate DESC;
95
-
96
- -- Search by key pattern
97
- SELECT key, value FROM memories WHERE key LIKE 'account:%'
98
- AND superseded_by IS NULL;
27
+ query_browser_profile(
28
+ query: string, // natural language query
29
+ tags?: string[] // optional: identity, contact_info, account, tool,
30
+ // address, payment, contact, work, knowledge
31
+ )
99
32
  ```
100
33
 
101
- ## Canonical Tags
102
-
103
- | Tag | What it covers | Example keys |
104
- |-----|---------------|-------------|
105
- | `identity` | Name, DOB, gender, job title, language | `first_name`, `last_name`, `full_name`, `date_of_birth` |
106
- | `contact_info` | Email addresses, phone numbers | `email`, `phone` |
107
- | `address` | Physical addresses | `street_address`, `city`, `state`, `zip`, `country` |
108
- | `payment` | Card holder names, expiry | `card_holder_name`, `card_expiry`, `card_nickname` |
109
- | `account` | Service accounts, login credentials | `account:{domain}` |
110
- | `tool` | Tools/services used (from history) | `tool:GitHub`, `tool:Slack`, `tool:Stripe` |
111
- | `contact` | People the user knows | `contact:{Name}`, `linkedin:{Name}` |
112
- | `work` | Work-related (company, LinkedIn) | `company`, `linkedin:*` |
113
- | `knowledge` | Interests, skills, projects, products | `product:*`, `project:*`, `interest:*` |
114
- | `communication` | Messaging platforms | `tool:Slack`, `tool:WhatsApp` |
115
- | `social` | Social platforms | `tool:LinkedIn`, `tool:X/Twitter` |
116
- | `finance` | Financial tools | `tool:Stripe`, `tool:QuickBooks` |
117
-
118
- ## Ranking System
119
-
120
- Every `search()`, `semantic_search()`, and `text_search()` call automatically increments both `appeared_count` and `accessed_count` for all returned results. No manual `mark_accessed()` calls needed.
121
-
122
- **hit_rate** = `accessed_count / appeared_count`
123
-
124
- Memories that are frequently returned by searches rise in ranking. The system is fully automatic — no manual curation or agent instrumentation needed.
34
+ Returns ranked results from the local database. Results are self-ranking — frequently accessed ones surface automatically.
125
35
 
126
- ## Semantic Dedup
36
+ ## Full profile
127
37
 
128
- On `upsert()`, near-duplicate memories (cosine similarity >= 0.92 with same key prefix) are automatically superseded. This prevents storing "Screen recording tool for compliance" and "Screen recording tool launched on Product Hunt for compliance use cases" as separate entries.
129
-
130
- ## Task-Specific Tag Queries
131
-
132
- | Task | Tags to search |
133
- |------|---------------|
134
- | Fill out a form | `["identity", "contact_info", "address"]` |
135
- | Send an email | `["contact_info", "communication"]` + search contact by name |
136
- | Book a flight/hotel | `["identity", "address", "payment"]` |
137
- | Log into a service | `["account"]` |
138
- | Invoice a client | `["identity", "work", "address", "payment"]` |
139
- | Find a contact | `["contact"]` + filter by key pattern |
140
- | Dev/deploy task | `["account", "tool"]` |
141
- | Social media post | `["account", "social"]` |
142
- | Research question | `mem.semantic_search("your question here")` |
143
-
144
- ## Rebuilding Memories
145
-
146
- To refresh from latest browser data:
147
-
148
- ```bash
149
- cd ~/ai-browser-profile
150
- source .venv/bin/activate
151
- python extract.py # full scan
152
- python extract.py --browsers arc chrome # specific browsers
153
- python extract.py --no-indexeddb --no-localstorage # fast, skip LevelDB
38
+ To get the complete user profile in one call:
154
39
  ```
155
-
156
- ### Backfill embeddings (after install-embeddings)
157
-
158
- ```python
159
- import sys, os
160
- sys.path.insert(0, os.path.expanduser("~/ai-browser-profile"))
161
- from ai_browser_profile import MemoryDB
162
- mem = MemoryDB(os.path.expanduser("~/ai-browser-profile/memories.db"))
163
- n = mem.backfill_embeddings()
164
- print(f"Embedded {n} memories")
165
- mem.close()
40
+ query_browser_profile(query: "full profile")
166
41
  ```
167
42
 
168
- This reads browser files directly (History, Login Data, Web Data, IndexedDB, Local Storage). The memory database preserves `appeared_count` and `accessed_count` across rebuilds via UPSERT logic — rankings are never lost.
169
-
170
- ## Dependencies
43
+ Returns name, emails, phone, addresses, payment info, companies, top tools, accounts.
171
44
 
172
- **Core** (installed by `npx ai-browser-profile init`):
173
- - `ccl_chromium_reader` — IndexedDB + Local Storage LevelDB files
174
- - `numpy` — vector math for cosine similarity
45
+ ## Availability
175
46
 
176
- **Embeddings** (optional, installed by `npx ai-browser-profile install-embeddings`):
177
- - `onnxruntime` — ONNX model inference
178
- - `huggingface_hub` — model downloading
179
- - `tokenizers` — text tokenization
180
- - Model: nomic-embed-text-v1.5 (~131MB, downloads on first use)
47
+ Requires browser data extraction during onboarding. If queries return no results, call `extract_browser_profile` to re-run the extraction — it uses the native Swift extractor built into the app (no external tools needed).