ai-browser-profile 1.0.5 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ai_browser_profile/cookies.py +330 -0
- package/ai_browser_profile/extract.py +12 -0
- package/ai_browser_profile/ingestors/browser_detect.py +4 -0
- package/ai_browser_profile/localstorage.py +266 -0
- package/bin/cli.js +6 -2
- package/package.json +28 -1
- package/review/SKILL.md +1 -1
- package/skill/SKILL.md +26 -159
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
"""Read cookies from a Chromium browser profile and inject into another.
|
|
2
|
+
|
|
3
|
+
Sibling to the ingestors in ai_browser_profile.ingestors. Lives in the
|
|
4
|
+
package but is NOT wired into extract_memories() — cookies are auth
|
|
5
|
+
secrets and must never land in memories.db.
|
|
6
|
+
|
|
7
|
+
Public API:
|
|
8
|
+
read_cookies(profile, domains=None) -> list[Cookie]
|
|
9
|
+
inject_via_cdp(cookies, cdp_url, ...) -> int
|
|
10
|
+
|
|
11
|
+
CLI:
|
|
12
|
+
python -m ai_browser_profile.cookies copy \\
|
|
13
|
+
--from chrome:Default \\
|
|
14
|
+
--to cdp://127.0.0.1:9555 \\
|
|
15
|
+
--domains github.com,linear.app
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import argparse
|
|
21
|
+
import hashlib
|
|
22
|
+
import json
|
|
23
|
+
import logging
|
|
24
|
+
import shutil
|
|
25
|
+
import sqlite3
|
|
26
|
+
import subprocess
|
|
27
|
+
import sys
|
|
28
|
+
import urllib.request
|
|
29
|
+
from dataclasses import dataclass
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from typing import Iterable, Optional
|
|
32
|
+
|
|
33
|
+
from ai_browser_profile.ingestors.browser_detect import (
|
|
34
|
+
BrowserProfile,
|
|
35
|
+
copy_db,
|
|
36
|
+
detect_browsers,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
log = logging.getLogger(__name__)
|
|
40
|
+
|
|
41
|
+
KEYCHAIN_SERVICE = {
|
|
42
|
+
"chrome": "Chrome Safe Storage",
|
|
43
|
+
"arc": "Arc Safe Storage",
|
|
44
|
+
"brave": "Brave Safe Storage",
|
|
45
|
+
"edge": "Microsoft Edge Safe Storage",
|
|
46
|
+
"chromium": "Chromium Safe Storage",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
PBKDF2_SALT = b"saltysalt"
|
|
50
|
+
PBKDF2_ITERATIONS = 1003
|
|
51
|
+
AES_KEY_LENGTH = 16
|
|
52
|
+
AES_IV = b" " * 16
|
|
53
|
+
|
|
54
|
+
SAMESITE_MAP = {-1: "Unspecified", 0: "None", 1: "Lax", 2: "Strict"}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class Cookie:
|
|
59
|
+
name: str
|
|
60
|
+
value: str
|
|
61
|
+
domain: str
|
|
62
|
+
path: str
|
|
63
|
+
expires: float
|
|
64
|
+
secure: bool
|
|
65
|
+
http_only: bool
|
|
66
|
+
same_site: str
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _keychain_password(browser: str) -> bytes:
|
|
70
|
+
service = KEYCHAIN_SERVICE.get(browser)
|
|
71
|
+
if not service:
|
|
72
|
+
raise ValueError(f"No keychain service mapped for browser {browser!r}")
|
|
73
|
+
res = subprocess.run(
|
|
74
|
+
["security", "find-generic-password", "-w", "-s", service],
|
|
75
|
+
capture_output=True, text=True, check=False,
|
|
76
|
+
)
|
|
77
|
+
if res.returncode != 0:
|
|
78
|
+
raise RuntimeError(
|
|
79
|
+
f"Could not read {service!r} from Keychain: {res.stderr.strip() or 'access denied'}"
|
|
80
|
+
)
|
|
81
|
+
return res.stdout.strip().encode()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _derive_key(password: bytes) -> bytes:
|
|
85
|
+
return hashlib.pbkdf2_hmac(
|
|
86
|
+
"sha1", password, PBKDF2_SALT, PBKDF2_ITERATIONS, AES_KEY_LENGTH
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _decrypt(encrypted: bytes, key: bytes, host_key: str) -> Optional[str]:
|
|
91
|
+
"""Decrypt a Chromium cookie value. Returns None on failure."""
|
|
92
|
+
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
|
93
|
+
|
|
94
|
+
if not encrypted:
|
|
95
|
+
return None
|
|
96
|
+
prefix = encrypted[:3]
|
|
97
|
+
if prefix in (b"v10", b"v11"):
|
|
98
|
+
payload = encrypted[3:]
|
|
99
|
+
else:
|
|
100
|
+
payload = encrypted
|
|
101
|
+
if len(payload) % 16 != 0:
|
|
102
|
+
return None
|
|
103
|
+
cipher = Cipher(algorithms.AES(key), modes.CBC(AES_IV))
|
|
104
|
+
dec = cipher.decryptor()
|
|
105
|
+
plain = dec.update(payload) + dec.finalize()
|
|
106
|
+
if not plain:
|
|
107
|
+
return None
|
|
108
|
+
pad = plain[-1]
|
|
109
|
+
if 1 <= pad <= 16 and plain.endswith(bytes([pad]) * pad):
|
|
110
|
+
plain = plain[:-pad]
|
|
111
|
+
# Chrome 80+ prepends SHA256(host_key) (32 bytes) to bind cookie to its host.
|
|
112
|
+
expected = hashlib.sha256(host_key.encode()).digest()
|
|
113
|
+
if plain.startswith(expected):
|
|
114
|
+
plain = plain[32:]
|
|
115
|
+
try:
|
|
116
|
+
return plain.decode("utf-8")
|
|
117
|
+
except UnicodeDecodeError:
|
|
118
|
+
return plain.decode("utf-8", errors="replace")
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def read_cookies(
|
|
122
|
+
profile: BrowserProfile,
|
|
123
|
+
domains: Optional[Iterable[str]] = None,
|
|
124
|
+
) -> list[Cookie]:
|
|
125
|
+
"""Read and decrypt cookies from a Chromium browser profile.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
profile: A Chromium profile from detect_browsers().
|
|
129
|
+
domains: Optional iterable of substrings; a cookie is kept if its
|
|
130
|
+
host_key contains any of them. None means all cookies.
|
|
131
|
+
"""
|
|
132
|
+
if profile.browser in ("safari", "firefox"):
|
|
133
|
+
raise NotImplementedError(f"Cookie read not supported for {profile.browser}")
|
|
134
|
+
|
|
135
|
+
cookies_path = profile.path / "Cookies"
|
|
136
|
+
if not cookies_path.exists():
|
|
137
|
+
raise FileNotFoundError(f"No Cookies file at {cookies_path}")
|
|
138
|
+
|
|
139
|
+
tmp = copy_db(cookies_path)
|
|
140
|
+
if tmp is None:
|
|
141
|
+
raise RuntimeError(
|
|
142
|
+
f"Could not copy {cookies_path}. Grant Full Disk Access to your terminal and retry."
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
domain_filters = list(domains) if domains else None
|
|
146
|
+
key = _derive_key(_keychain_password(profile.browser))
|
|
147
|
+
cookies: list[Cookie] = []
|
|
148
|
+
skipped = 0
|
|
149
|
+
def _txt(b) -> str:
|
|
150
|
+
if b is None:
|
|
151
|
+
return ""
|
|
152
|
+
if isinstance(b, bytes):
|
|
153
|
+
return b.decode("utf-8", errors="replace")
|
|
154
|
+
return str(b)
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
conn = sqlite3.connect(f"file:{tmp}?mode=ro", uri=True)
|
|
158
|
+
# Arc and some Chrome forks declare encrypted_value as TEXT, not BLOB,
|
|
159
|
+
# which makes sqlite3 try to UTF-8-decode the AES ciphertext and crash
|
|
160
|
+
# mid-iteration. Force everything to bytes and decode TEXT columns
|
|
161
|
+
# ourselves.
|
|
162
|
+
conn.text_factory = bytes
|
|
163
|
+
conn.row_factory = sqlite3.Row
|
|
164
|
+
rows = conn.execute(
|
|
165
|
+
"SELECT host_key, name, value, encrypted_value, path, expires_utc, "
|
|
166
|
+
"is_secure, is_httponly, samesite FROM cookies"
|
|
167
|
+
)
|
|
168
|
+
for row in rows:
|
|
169
|
+
host = _txt(row["host_key"])
|
|
170
|
+
if domain_filters and not any(d in host for d in domain_filters):
|
|
171
|
+
continue
|
|
172
|
+
value = _txt(row["value"])
|
|
173
|
+
if not value and row["encrypted_value"]:
|
|
174
|
+
value = _decrypt(row["encrypted_value"], key, host) or ""
|
|
175
|
+
if not value:
|
|
176
|
+
skipped += 1
|
|
177
|
+
continue
|
|
178
|
+
expires = 0.0
|
|
179
|
+
if row["expires_utc"]:
|
|
180
|
+
# Chromium epoch is 1601-01-01 in microseconds.
|
|
181
|
+
expires = (row["expires_utc"] / 1_000_000) - 11644473600
|
|
182
|
+
cookies.append(Cookie(
|
|
183
|
+
name=_txt(row["name"]),
|
|
184
|
+
value=value,
|
|
185
|
+
domain=host,
|
|
186
|
+
path=_txt(row["path"]) or "/",
|
|
187
|
+
expires=expires,
|
|
188
|
+
secure=bool(row["is_secure"]),
|
|
189
|
+
http_only=bool(row["is_httponly"]),
|
|
190
|
+
same_site=SAMESITE_MAP.get(row["samesite"], "Unspecified"),
|
|
191
|
+
))
|
|
192
|
+
conn.close()
|
|
193
|
+
finally:
|
|
194
|
+
shutil.rmtree(tmp.parent, ignore_errors=True)
|
|
195
|
+
|
|
196
|
+
log.info(
|
|
197
|
+
"Read %d cookies from %s/%s (skipped %d undecryptable)",
|
|
198
|
+
len(cookies), profile.browser, profile.name, skipped,
|
|
199
|
+
)
|
|
200
|
+
return cookies
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _ws_from_cdp_url(cdp_url: str) -> str:
|
|
204
|
+
if cdp_url.startswith("ws://") or cdp_url.startswith("wss://"):
|
|
205
|
+
return cdp_url
|
|
206
|
+
if cdp_url.startswith("cdp://"):
|
|
207
|
+
cdp_url = "http://" + cdp_url[len("cdp://"):]
|
|
208
|
+
base = cdp_url.rstrip("/")
|
|
209
|
+
with urllib.request.urlopen(f"{base}/json/version", timeout=5) as r:
|
|
210
|
+
return json.loads(r.read())["webSocketDebuggerUrl"]
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def inject_via_cdp(
|
|
214
|
+
cookies: Iterable[Cookie],
|
|
215
|
+
cdp_url: str = "http://127.0.0.1:9222",
|
|
216
|
+
) -> int:
|
|
217
|
+
"""Inject cookies into a running Chrome via CDP Storage.setCookies.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
cookies: iterable of Cookie objects.
|
|
221
|
+
cdp_url: base http(s) URL of the Chrome DevTools endpoint, or a
|
|
222
|
+
cdp://host:port shorthand, or a raw ws:// URL.
|
|
223
|
+
|
|
224
|
+
Returns: number of cookies submitted (the browser accepts them as a batch).
|
|
225
|
+
"""
|
|
226
|
+
from websocket import create_connection
|
|
227
|
+
|
|
228
|
+
ws_url = _ws_from_cdp_url(cdp_url)
|
|
229
|
+
# Chrome 111+ enforces CDP origin checking and rejects any Origin header
|
|
230
|
+
# unless the target was launched with --remote-allow-origins. Suppressing
|
|
231
|
+
# the header bypasses the check; localhost CDP is already privileged.
|
|
232
|
+
ws = create_connection(ws_url, timeout=10, suppress_origin=True)
|
|
233
|
+
try:
|
|
234
|
+
batch = []
|
|
235
|
+
for c in cookies:
|
|
236
|
+
param = {
|
|
237
|
+
"name": c.name,
|
|
238
|
+
"value": c.value,
|
|
239
|
+
"domain": c.domain,
|
|
240
|
+
"path": c.path or "/",
|
|
241
|
+
"secure": c.secure,
|
|
242
|
+
"httpOnly": c.http_only,
|
|
243
|
+
}
|
|
244
|
+
if c.same_site in ("Strict", "Lax", "None"):
|
|
245
|
+
param["sameSite"] = c.same_site
|
|
246
|
+
if c.expires > 0:
|
|
247
|
+
param["expires"] = c.expires
|
|
248
|
+
batch.append(param)
|
|
249
|
+
if not batch:
|
|
250
|
+
return 0
|
|
251
|
+
ws.send(json.dumps({
|
|
252
|
+
"id": 1,
|
|
253
|
+
"method": "Storage.setCookies",
|
|
254
|
+
"params": {"cookies": batch},
|
|
255
|
+
}))
|
|
256
|
+
resp = json.loads(ws.recv())
|
|
257
|
+
if "error" in resp:
|
|
258
|
+
log.warning("Storage.setCookies failed: %s", resp["error"])
|
|
259
|
+
return 0
|
|
260
|
+
finally:
|
|
261
|
+
ws.close()
|
|
262
|
+
log.info("Injected %d cookies via CDP", len(batch))
|
|
263
|
+
return len(batch)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
# --- helpers used by CLI and external callers ---
|
|
267
|
+
|
|
268
|
+
def find_profile(spec: str) -> BrowserProfile:
|
|
269
|
+
"""Resolve a 'browser:profile-name' spec (e.g. 'chrome:Default') to a BrowserProfile."""
|
|
270
|
+
if ":" in spec:
|
|
271
|
+
browser, name = spec.split(":", 1)
|
|
272
|
+
else:
|
|
273
|
+
browser, name = spec, "Default"
|
|
274
|
+
matches = [p for p in detect_browsers({browser}) if p.name == name]
|
|
275
|
+
if not matches:
|
|
276
|
+
available = [(p.browser, p.name) for p in detect_browsers({browser})]
|
|
277
|
+
raise SystemExit(
|
|
278
|
+
f"No profile {spec!r}. Available {browser} profiles: {available}"
|
|
279
|
+
)
|
|
280
|
+
return matches[0]
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _cli(argv: Optional[list[str]] = None) -> int:
|
|
284
|
+
parser = argparse.ArgumentParser(prog="python -m ai_browser_profile.cookies")
|
|
285
|
+
sub = parser.add_subparsers(dest="cmd", required=True)
|
|
286
|
+
|
|
287
|
+
cp = sub.add_parser("copy", help="copy cookies from a local profile into a running browser via CDP")
|
|
288
|
+
cp.add_argument("--from", dest="src", required=True,
|
|
289
|
+
help="source profile, e.g. chrome:Default or arc:'Profile 1'")
|
|
290
|
+
cp.add_argument("--to", dest="dst", required=True,
|
|
291
|
+
help="target CDP endpoint, e.g. cdp://127.0.0.1:9555 or http://127.0.0.1:9555")
|
|
292
|
+
cp.add_argument("--domains", default=None,
|
|
293
|
+
help="comma-separated list of host_key substrings to include")
|
|
294
|
+
cp.add_argument("-v", "--verbose", action="store_true")
|
|
295
|
+
|
|
296
|
+
ls = sub.add_parser("list", help="list cookies in a local profile (counts only — no values printed)")
|
|
297
|
+
ls.add_argument("--from", dest="src", required=True)
|
|
298
|
+
ls.add_argument("--domains", default=None)
|
|
299
|
+
|
|
300
|
+
args = parser.parse_args(argv)
|
|
301
|
+
|
|
302
|
+
logging.basicConfig(
|
|
303
|
+
level=logging.DEBUG if getattr(args, "verbose", False) else logging.INFO,
|
|
304
|
+
format="%(levelname)s %(message)s",
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
profile = find_profile(args.src)
|
|
308
|
+
domain_filters = [d.strip() for d in args.domains.split(",")] if args.domains else None
|
|
309
|
+
|
|
310
|
+
cookies = read_cookies(profile, domains=domain_filters)
|
|
311
|
+
|
|
312
|
+
if args.cmd == "list":
|
|
313
|
+
by_host: dict[str, int] = {}
|
|
314
|
+
for c in cookies:
|
|
315
|
+
by_host[c.domain] = by_host.get(c.domain, 0) + 1
|
|
316
|
+
for host, n in sorted(by_host.items(), key=lambda kv: -kv[1]):
|
|
317
|
+
print(f" {n:4} {host}")
|
|
318
|
+
print(f"Total: {len(cookies)} cookies across {len(by_host)} hosts")
|
|
319
|
+
return 0
|
|
320
|
+
|
|
321
|
+
if args.cmd == "copy":
|
|
322
|
+
n = inject_via_cdp(cookies, args.dst)
|
|
323
|
+
print(f"Injected {n}/{len(cookies)} cookies into {args.dst}")
|
|
324
|
+
return 0 if n > 0 else 2
|
|
325
|
+
|
|
326
|
+
return 1
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
if __name__ == "__main__":
|
|
330
|
+
sys.exit(_cli())
|
|
@@ -40,6 +40,7 @@ def extract_memories(memories_db_path: str = "memories.db",
|
|
|
40
40
|
"""
|
|
41
41
|
total_start = time.monotonic()
|
|
42
42
|
mem = MemoryDB(memories_db_path, defer_embeddings=True)
|
|
43
|
+
from ai_browser_profile.ingestors.browser_detect import permission_denied_paths
|
|
43
44
|
profiles = detect_browsers(allowed=browsers)
|
|
44
45
|
log.info(f"Extracting memories from {len(profiles)} profiles...")
|
|
45
46
|
|
|
@@ -80,6 +81,17 @@ def extract_memories(memories_db_path: str = "memories.db",
|
|
|
80
81
|
run_cleanup(db_path=memories_db_path)
|
|
81
82
|
mem = MemoryDB(memories_db_path, defer_embeddings=True)
|
|
82
83
|
interim_profile = mem.profile_text()
|
|
84
|
+
|
|
85
|
+
# Emit structured browser summary so callers can show transparency
|
|
86
|
+
detected_browsers = sorted(set(p.browser for p in profiles))
|
|
87
|
+
denied_browsers = sorted(set(
|
|
88
|
+
p.browser for p in profiles
|
|
89
|
+
if any(str(p.path) in str(denied) for denied in permission_denied_paths)
|
|
90
|
+
))
|
|
91
|
+
print(f"BROWSERS_SCANNED: {','.join(detected_browsers)}", flush=True)
|
|
92
|
+
if denied_browsers:
|
|
93
|
+
print(f"BROWSERS_PERMISSION_DENIED: {','.join(denied_browsers)}", flush=True)
|
|
94
|
+
|
|
83
95
|
log.info(f"Interim profile ready (WhatsApp + embeddings still running):\n{interim_profile}")
|
|
84
96
|
|
|
85
97
|
# 7. WhatsApp — contacts from IndexedDB (slow, runs last)
|
|
@@ -13,6 +13,9 @@ log = logging.getLogger(__name__)
|
|
|
13
13
|
|
|
14
14
|
APP_SUPPORT = Path.home() / "Library" / "Application Support"
|
|
15
15
|
|
|
16
|
+
# Populated by copy_db() when a file can't be read due to TCC permissions
|
|
17
|
+
permission_denied_paths: list[Path] = []
|
|
18
|
+
|
|
16
19
|
|
|
17
20
|
@dataclass
|
|
18
21
|
class BrowserProfile:
|
|
@@ -89,6 +92,7 @@ def copy_db(src: Path) -> Optional[Path]:
|
|
|
89
92
|
return dst
|
|
90
93
|
except PermissionError:
|
|
91
94
|
log.warning(f"Permission denied reading {src} — grant Full Disk Access or skip")
|
|
95
|
+
permission_denied_paths.append(src)
|
|
92
96
|
return None
|
|
93
97
|
|
|
94
98
|
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"""Read localStorage from a Chromium browser profile and inject into another.
|
|
2
|
+
|
|
3
|
+
Sibling to cookies.py. Like cookies.py, this is NOT wired into
|
|
4
|
+
extract_memories() — localStorage values can include auth tokens and
|
|
5
|
+
must not land in memories.db.
|
|
6
|
+
|
|
7
|
+
Public API:
|
|
8
|
+
read_localstorage(profile, origins=None) -> dict[origin, dict[key, value]]
|
|
9
|
+
inject_localstorage_via_cdp(data, cdp_url, ...) -> int
|
|
10
|
+
|
|
11
|
+
CLI:
|
|
12
|
+
python -m ai_browser_profile.localstorage copy \\
|
|
13
|
+
--from chrome:Profile\\ 1 \\
|
|
14
|
+
--to cdp://127.0.0.1:9555 \\
|
|
15
|
+
--origins chatgpt.com,notion.so
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import argparse
|
|
21
|
+
import json
|
|
22
|
+
import logging
|
|
23
|
+
import shutil
|
|
24
|
+
import sys
|
|
25
|
+
import tempfile
|
|
26
|
+
import time
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import Iterable, Optional
|
|
29
|
+
|
|
30
|
+
from ai_browser_profile.ingestors.browser_detect import BrowserProfile
|
|
31
|
+
from ai_browser_profile.cookies import _ws_from_cdp_url, find_profile
|
|
32
|
+
|
|
33
|
+
log = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def read_localstorage(
|
|
37
|
+
profile: BrowserProfile,
|
|
38
|
+
origins: Optional[Iterable[str]] = None,
|
|
39
|
+
) -> dict[str, dict[str, str]]:
|
|
40
|
+
"""Read localStorage from a Chromium profile's LevelDB.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
profile: Chromium profile from detect_browsers().
|
|
44
|
+
origins: Optional iterable of substrings; an origin is kept if any
|
|
45
|
+
substring matches its storage_key (e.g. 'chatgpt.com'
|
|
46
|
+
matches 'https://chatgpt.com'). None = all origins.
|
|
47
|
+
|
|
48
|
+
Returns: dict mapping origin (e.g. 'https://chatgpt.com') to dict of key/value.
|
|
49
|
+
"""
|
|
50
|
+
if profile.browser in ("safari", "firefox"):
|
|
51
|
+
raise NotImplementedError(f"localStorage read not supported for {profile.browser}")
|
|
52
|
+
|
|
53
|
+
ls_dir = profile.path / "Local Storage" / "leveldb"
|
|
54
|
+
if not ls_dir.exists():
|
|
55
|
+
raise FileNotFoundError(f"No Local Storage/leveldb at {ls_dir}")
|
|
56
|
+
|
|
57
|
+
tmp = Path(tempfile.mkdtemp(prefix="ai_browser_profile_ls_"))
|
|
58
|
+
tmp_ls = tmp / "leveldb"
|
|
59
|
+
try:
|
|
60
|
+
shutil.copytree(ls_dir, tmp_ls)
|
|
61
|
+
except Exception as e:
|
|
62
|
+
shutil.rmtree(tmp, ignore_errors=True)
|
|
63
|
+
raise RuntimeError(f"Could not copy {ls_dir}: {e}") from e
|
|
64
|
+
|
|
65
|
+
origin_filters = list(origins) if origins else None
|
|
66
|
+
out: dict[str, dict[str, str]] = {}
|
|
67
|
+
skipped = 0
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
from ccl_chromium_reader import ccl_chromium_localstorage
|
|
71
|
+
|
|
72
|
+
ldb = ccl_chromium_localstorage.LocalStoreDb(tmp_ls)
|
|
73
|
+
for record in ldb.iter_all_records():
|
|
74
|
+
try:
|
|
75
|
+
origin = record.storage_key or ""
|
|
76
|
+
key = record.script_key or ""
|
|
77
|
+
value = record.value
|
|
78
|
+
if not origin or not key or value is None:
|
|
79
|
+
continue
|
|
80
|
+
if origin_filters and not any(f in origin for f in origin_filters):
|
|
81
|
+
continue
|
|
82
|
+
if isinstance(value, bytes):
|
|
83
|
+
try:
|
|
84
|
+
value = value.decode("utf-8")
|
|
85
|
+
except UnicodeDecodeError:
|
|
86
|
+
skipped += 1
|
|
87
|
+
continue
|
|
88
|
+
elif not isinstance(value, str):
|
|
89
|
+
value = str(value)
|
|
90
|
+
out.setdefault(origin, {})[key] = value
|
|
91
|
+
except Exception:
|
|
92
|
+
skipped += 1
|
|
93
|
+
continue
|
|
94
|
+
finally:
|
|
95
|
+
shutil.rmtree(tmp, ignore_errors=True)
|
|
96
|
+
|
|
97
|
+
total = sum(len(v) for v in out.values())
|
|
98
|
+
log.info(
|
|
99
|
+
"Read %d localStorage items across %d origins from %s/%s (skipped %d)",
|
|
100
|
+
total, len(out), profile.browser, profile.name, skipped,
|
|
101
|
+
)
|
|
102
|
+
return out
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _cdp_send(ws, msg_id: int, method: str,
|
|
106
|
+
params: Optional[dict] = None,
|
|
107
|
+
session_id: Optional[str] = None) -> dict:
|
|
108
|
+
"""Send a CDP message and drain events until the matching reply arrives."""
|
|
109
|
+
msg: dict = {"id": msg_id, "method": method}
|
|
110
|
+
if params:
|
|
111
|
+
msg["params"] = params
|
|
112
|
+
if session_id:
|
|
113
|
+
msg["sessionId"] = session_id
|
|
114
|
+
ws.send(json.dumps(msg))
|
|
115
|
+
deadline = time.time() + 20
|
|
116
|
+
while time.time() < deadline:
|
|
117
|
+
resp = json.loads(ws.recv())
|
|
118
|
+
if resp.get("id") == msg_id:
|
|
119
|
+
return resp
|
|
120
|
+
raise TimeoutError(f"CDP {method} timed out")
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def inject_localstorage_via_cdp(
|
|
124
|
+
data: dict[str, dict[str, str]],
|
|
125
|
+
cdp_url: str = "http://127.0.0.1:9222",
|
|
126
|
+
load_wait_sec: float = 4.0,
|
|
127
|
+
) -> int:
|
|
128
|
+
"""Inject localStorage into a running Chrome via per-origin tabs.
|
|
129
|
+
|
|
130
|
+
For each origin: opens a new tab to that origin (so the JS context is
|
|
131
|
+
same-origin), waits for load, evaluates a localStorage.setItem batch via
|
|
132
|
+
Runtime.evaluate, then closes the tab. Returns total items written.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
data: dict of {origin -> {key: value}}. Origin must be http(s)://...
|
|
136
|
+
cdp_url: base http(s) URL of the Chrome DevTools endpoint or a
|
|
137
|
+
cdp://host:port shorthand.
|
|
138
|
+
load_wait_sec: how long to wait between tab open and the JS eval to
|
|
139
|
+
let the page initialize (no Page.loadEventFired listener
|
|
140
|
+
yet — keep simple, race-tolerant via the JS try/catch).
|
|
141
|
+
"""
|
|
142
|
+
from websocket import create_connection
|
|
143
|
+
|
|
144
|
+
ws_url = _ws_from_cdp_url(cdp_url)
|
|
145
|
+
ws = create_connection(ws_url, timeout=15, suppress_origin=True)
|
|
146
|
+
msg_id = 0
|
|
147
|
+
total_set = 0
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
for origin, items in data.items():
|
|
151
|
+
if not items:
|
|
152
|
+
continue
|
|
153
|
+
if not origin.startswith("http"):
|
|
154
|
+
log.warning("Skipping non-http origin %r", origin)
|
|
155
|
+
continue
|
|
156
|
+
# Skip partitioned-storage origins like
|
|
157
|
+
# 'https://www.youtube.com/^0https://openai.com'. The '^' marker
|
|
158
|
+
# is Chromium's third-party storage partitioning; we can't navigate
|
|
159
|
+
# to that as a top-level URL.
|
|
160
|
+
if "^" in origin:
|
|
161
|
+
log.info("Skipping partitioned origin %r", origin)
|
|
162
|
+
continue
|
|
163
|
+
url = origin.rstrip("/") + "/"
|
|
164
|
+
|
|
165
|
+
target_id = None
|
|
166
|
+
try:
|
|
167
|
+
msg_id += 1
|
|
168
|
+
r = _cdp_send(ws, msg_id, "Target.createTarget", {"url": url})
|
|
169
|
+
target_id = r.get("result", {}).get("targetId")
|
|
170
|
+
if not target_id:
|
|
171
|
+
log.warning("createTarget failed for %s: %s", origin, r.get("error"))
|
|
172
|
+
continue
|
|
173
|
+
|
|
174
|
+
msg_id += 1
|
|
175
|
+
r = _cdp_send(ws, msg_id, "Target.attachToTarget",
|
|
176
|
+
{"targetId": target_id, "flatten": True})
|
|
177
|
+
session_id = r.get("result", {}).get("sessionId")
|
|
178
|
+
if not session_id:
|
|
179
|
+
log.warning("attachToTarget failed for %s", origin)
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
time.sleep(load_wait_sec)
|
|
183
|
+
|
|
184
|
+
# Inline the items as a JS object literal; localStorage rejects
|
|
185
|
+
# non-string values implicitly by coercion (we already string-
|
|
186
|
+
# coerced in read_localstorage).
|
|
187
|
+
expr = (
|
|
188
|
+
"(function(){try{var items=" + json.dumps(items) + ";"
|
|
189
|
+
"var n=0;for(var k in items){try{localStorage.setItem(k,items[k]);n++;}catch(e){}}"
|
|
190
|
+
"return n;}catch(e){return 'ERROR:'+e.toString();}})()"
|
|
191
|
+
)
|
|
192
|
+
msg_id += 1
|
|
193
|
+
r = _cdp_send(
|
|
194
|
+
ws, msg_id, "Runtime.evaluate",
|
|
195
|
+
{"expression": expr, "returnByValue": True},
|
|
196
|
+
session_id=session_id,
|
|
197
|
+
)
|
|
198
|
+
value = r.get("result", {}).get("result", {}).get("value")
|
|
199
|
+
if isinstance(value, int):
|
|
200
|
+
total_set += value
|
|
201
|
+
log.info(" %s: set %d/%d items", origin, value, len(items))
|
|
202
|
+
else:
|
|
203
|
+
log.warning(" %s: %s", origin, value)
|
|
204
|
+
finally:
|
|
205
|
+
if target_id:
|
|
206
|
+
try:
|
|
207
|
+
msg_id += 1
|
|
208
|
+
_cdp_send(ws, msg_id, "Target.closeTarget", {"targetId": target_id})
|
|
209
|
+
except Exception:
|
|
210
|
+
pass
|
|
211
|
+
finally:
|
|
212
|
+
ws.close()
|
|
213
|
+
|
|
214
|
+
log.info("Injected %d localStorage items total", total_set)
|
|
215
|
+
return total_set
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _cli(argv: Optional[list[str]] = None) -> int:
|
|
219
|
+
parser = argparse.ArgumentParser(prog="python -m ai_browser_profile.localstorage")
|
|
220
|
+
sub = parser.add_subparsers(dest="cmd", required=True)
|
|
221
|
+
|
|
222
|
+
cp = sub.add_parser("copy", help="copy localStorage from a local profile into a running browser via CDP")
|
|
223
|
+
cp.add_argument("--from", dest="src", required=True,
|
|
224
|
+
help="source profile, e.g. chrome:Default or 'chrome:Profile 1'")
|
|
225
|
+
cp.add_argument("--to", dest="dst", required=True,
|
|
226
|
+
help="target CDP endpoint, e.g. cdp://127.0.0.1:9555")
|
|
227
|
+
cp.add_argument("--origins", default=None,
|
|
228
|
+
help="comma-separated host substrings (e.g. 'chatgpt.com,notion.so')")
|
|
229
|
+
cp.add_argument("--load-wait", type=float, default=4.0,
|
|
230
|
+
help="seconds to wait after opening each tab before injecting (default 4)")
|
|
231
|
+
cp.add_argument("-v", "--verbose", action="store_true")
|
|
232
|
+
|
|
233
|
+
ls = sub.add_parser("list", help="list localStorage origins (counts only — no values printed)")
|
|
234
|
+
ls.add_argument("--from", dest="src", required=True)
|
|
235
|
+
ls.add_argument("--origins", default=None)
|
|
236
|
+
|
|
237
|
+
args = parser.parse_args(argv)
|
|
238
|
+
|
|
239
|
+
logging.basicConfig(
|
|
240
|
+
level=logging.DEBUG if getattr(args, "verbose", False) else logging.INFO,
|
|
241
|
+
format="%(levelname)s %(message)s",
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
profile = find_profile(args.src)
|
|
245
|
+
origin_filters = [o.strip() for o in args.origins.split(",")] if args.origins else None
|
|
246
|
+
|
|
247
|
+
data = read_localstorage(profile, origins=origin_filters)
|
|
248
|
+
|
|
249
|
+
if args.cmd == "list":
|
|
250
|
+
for origin, items in sorted(data.items(), key=lambda kv: -len(kv[1])):
|
|
251
|
+
print(f" {len(items):4} {origin}")
|
|
252
|
+
total = sum(len(v) for v in data.values())
|
|
253
|
+
print(f"Total: {total} items across {len(data)} origins")
|
|
254
|
+
return 0
|
|
255
|
+
|
|
256
|
+
if args.cmd == "copy":
|
|
257
|
+
n = inject_localstorage_via_cdp(data, args.dst, load_wait_sec=args.load_wait)
|
|
258
|
+
total = sum(len(v) for v in data.values())
|
|
259
|
+
print(f"Injected {n}/{total} localStorage items into {args.dst}")
|
|
260
|
+
return 0 if n > 0 else 2
|
|
261
|
+
|
|
262
|
+
return 1
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
if __name__ == "__main__":
|
|
266
|
+
sys.exit(_cli())
|
package/bin/cli.js
CHANGED
|
@@ -25,11 +25,15 @@ const COPY_TARGETS = [
|
|
|
25
25
|
// Never overwrite these during update
|
|
26
26
|
const NEVER_OVERWRITE = new Set(['memories.db', '.venv', 'scripts', 'config.json']);
|
|
27
27
|
|
|
28
|
-
// Core Python deps (tier 1) — enough for tag search, SQL, extraction
|
|
29
|
-
//
|
|
28
|
+
// Core Python deps (tier 1) — enough for tag search, SQL, extraction,
|
|
29
|
+
// plus cookies + localStorage sync (cryptography for AES-CBC decrypt of
|
|
30
|
+
// Chromium cookie blobs, websocket-client for CDP injection).
|
|
31
|
+
// ccl_chromium_reader is only on GitHub, not PyPI.
|
|
30
32
|
const CORE_DEPS = [
|
|
31
33
|
'git+https://github.com/cclgroupltd/ccl_chromium_reader.git',
|
|
32
34
|
'numpy',
|
|
35
|
+
'cryptography',
|
|
36
|
+
'websocket-client',
|
|
33
37
|
];
|
|
34
38
|
|
|
35
39
|
// Embedding deps (tier 2) — optional, for semantic search
|
package/package.json
CHANGED
|
@@ -1,10 +1,16 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ai-browser-profile",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.6",
|
|
4
4
|
"description": "Extract user identity (name, emails, accounts, addresses, payments) from browser data into a self-ranking SQLite database. Install as a Claude Code agent skill.",
|
|
5
5
|
"bin": {
|
|
6
6
|
"ai-browser-profile": "bin/cli.js"
|
|
7
7
|
},
|
|
8
|
+
"scripts": {
|
|
9
|
+
"dev": "next dev",
|
|
10
|
+
"build": "next build",
|
|
11
|
+
"start": "next start",
|
|
12
|
+
"typecheck": "tsc --noEmit"
|
|
13
|
+
},
|
|
8
14
|
"files": [
|
|
9
15
|
"bin/",
|
|
10
16
|
"ai_browser_profile/**/*.py",
|
|
@@ -36,5 +42,26 @@
|
|
|
36
42
|
"homepage": "https://github.com/m13v/ai-browser-profile",
|
|
37
43
|
"engines": {
|
|
38
44
|
"node": ">=16"
|
|
45
|
+
},
|
|
46
|
+
"devDependencies": {
|
|
47
|
+
"@assistant-ui/react": "^0.12.25",
|
|
48
|
+
"@google/generative-ai": "^0.24.1",
|
|
49
|
+
"@m13v/seo-components": "^0.40.0",
|
|
50
|
+
"@remotion/player": "^4.0.446",
|
|
51
|
+
"@seo/components": "npm:@m13v/seo-components@^0.40.0",
|
|
52
|
+
"@supabase/supabase-js": "^2.103.3",
|
|
53
|
+
"@tailwindcss/postcss": "^4",
|
|
54
|
+
"@types/node": "^20",
|
|
55
|
+
"@types/react": "^19",
|
|
56
|
+
"@types/react-dom": "^19",
|
|
57
|
+
"framer-motion": "^12.38.0",
|
|
58
|
+
"lottie-react": "^2.4.1",
|
|
59
|
+
"next": "16.2.2",
|
|
60
|
+
"posthog-js": "^1.369.2",
|
|
61
|
+
"react": "19.2.4",
|
|
62
|
+
"react-dom": "19.2.4",
|
|
63
|
+
"remotion": "^4.0.446",
|
|
64
|
+
"tailwindcss": "^4",
|
|
65
|
+
"typescript": "^5"
|
|
39
66
|
}
|
|
40
67
|
}
|
package/review/SKILL.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: memory-review
|
|
3
|
-
description: "
|
|
3
|
+
description: "Use when the user says 'review memories', 'clean up the memory database', 'process unreviewed memories', 'memory cleanup', 'merge duplicate memories', or wants periodic LLM-powered post-ingestion review. Removes junk, merges duplicates, fixes miskeyed data, marks good entries as reviewed."
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# Memory Review
|
package/skill/SKILL.md
CHANGED
|
@@ -1,180 +1,47 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: ai-browser-profile
|
|
3
|
-
description: "Query the user's
|
|
3
|
+
description: "Query the user's browser-extracted profile: identity, accounts, tools, contacts, addresses, payments. Use when the user asks about their own info or you need personal context."
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# AI Browser Profile
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
Locally-extracted profile of the user built from their browser data (autofill, saved logins, history, bookmarks, WhatsApp, LinkedIn). Stored in `~/ai-browser-profile/memories.db`. Nothing leaves the machine.
|
|
9
9
|
|
|
10
|
-
##
|
|
10
|
+
## When to use `query_browser_profile`
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
|------|-------|
|
|
14
|
-
| Database | `~/ai-browser-profile/memories.db` |
|
|
15
|
-
| Module | `~/ai-browser-profile/ai_browser_profile/` |
|
|
16
|
-
| Python | `~/ai-browser-profile/.venv/bin/python` |
|
|
17
|
-
| Rebuild | `~/ai-browser-profile/.venv/bin/python ~/ai-browser-profile/extract.py` |
|
|
12
|
+
Use this tool proactively whenever the user asks about themselves or you need personal context:
|
|
18
13
|
|
|
19
|
-
|
|
14
|
+
| User asks... | Query |
|
|
15
|
+
|---|---|
|
|
16
|
+
| "What's my email?" | query: "email address", tags: ["contact_info"] |
|
|
17
|
+
| "What accounts do I have?" | query: "saved accounts", tags: ["account"] |
|
|
18
|
+
| "What tools do I use?" | query: "tools and services", tags: ["tool"] |
|
|
19
|
+
| "Find contact X" | query: "X", tags: ["contact"] |
|
|
20
|
+
| "What's my address?" | query: "home address", tags: ["address"] |
|
|
21
|
+
| "What card do I use?" | query: "payment card", tags: ["payment"] |
|
|
22
|
+
| "Who am I?" / profile | query: "profile", tags: ["identity"] |
|
|
20
23
|
|
|
21
|
-
|
|
24
|
+
## Tool parameters
|
|
22
25
|
|
|
23
|
-
Get a compact overview of the user — name, emails, addresses, accounts, tools, contacts. This is deterministic (no LLM) and computed from the database. Use it as baseline context before doing any task.
|
|
24
|
-
|
|
25
|
-
```python
|
|
26
|
-
import sys, os
|
|
27
|
-
sys.path.insert(0, os.path.expanduser("~/ai-browser-profile"))
|
|
28
|
-
from ai_browser_profile import MemoryDB
|
|
29
|
-
|
|
30
|
-
mem = MemoryDB(os.path.expanduser("~/ai-browser-profile/memories.db"))
|
|
31
|
-
print(mem.profile_text()) # markdown formatted, ~1.5KB
|
|
32
|
-
mem.close()
|
|
33
|
-
```
|
|
34
|
-
|
|
35
|
-
The profile shows: name, all known emails, phone numbers, handles, addresses, payment info, companies, top tools/services, accounts grouped by email, Notion projects, and contact count. Values are ranked by frequency across browser profiles — higher frequency = more likely to be the user's own data.
|
|
36
|
-
|
|
37
|
-
### Search by tags
|
|
38
|
-
|
|
39
|
-
```python
|
|
40
|
-
import sys, os
|
|
41
|
-
sys.path.insert(0, os.path.expanduser("~/ai-browser-profile"))
|
|
42
|
-
from ai_browser_profile import MemoryDB
|
|
43
|
-
|
|
44
|
-
mem = MemoryDB(os.path.expanduser("~/ai-browser-profile/memories.db"))
|
|
45
|
-
|
|
46
|
-
# Search returns results ranked by hit_rate (accessed/appeared), then counts
|
|
47
|
-
# accessed_count and appeared_count are auto-incremented on every search call
|
|
48
|
-
results = mem.search(["identity", "contact_info"], limit=10)
|
|
49
|
-
for r in results:
|
|
50
|
-
print(f'{r["key"]}: {r["value"]}')
|
|
51
|
-
|
|
52
|
-
mem.close()
|
|
53
26
|
```
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
results = mem.semantic_search("what products does the user build")
|
|
60
|
-
for r in results[:5]:
|
|
61
|
-
print(f'{r["key"]}: {r["value"][:80]} (sim={r["similarity"]:.3f})')
|
|
62
|
-
|
|
63
|
-
# Falls back to text_search() if embeddings not installed
|
|
64
|
-
# Install with: npx ai-browser-profile install-embeddings
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
### Quick SQL queries
|
|
68
|
-
|
|
69
|
-
```bash
|
|
70
|
-
sqlite3 ~/ai-browser-profile/memories.db
|
|
71
|
-
```
|
|
72
|
-
|
|
73
|
-
```sql
|
|
74
|
-
-- All identity info
|
|
75
|
-
SELECT m.key, m.value FROM memories m
|
|
76
|
-
JOIN memory_tags t ON m.id = t.memory_id WHERE t.tag = 'identity'
|
|
77
|
-
AND m.superseded_by IS NULL;
|
|
78
|
-
|
|
79
|
-
-- All contact info (emails, phones)
|
|
80
|
-
SELECT m.key, m.value, m.source FROM memories m
|
|
81
|
-
JOIN memory_tags t ON m.id = t.memory_id WHERE t.tag = 'contact_info'
|
|
82
|
-
AND m.superseded_by IS NULL;
|
|
83
|
-
|
|
84
|
-
-- All contacts
|
|
85
|
-
SELECT m.key, m.value FROM memories m
|
|
86
|
-
JOIN memory_tags t ON m.id = t.memory_id WHERE t.tag = 'contact'
|
|
87
|
-
AND m.superseded_by IS NULL
|
|
88
|
-
ORDER BY m.accessed_count DESC;
|
|
89
|
-
|
|
90
|
-
-- Most accessed memories (the ones that proved useful)
|
|
91
|
-
SELECT key, value, accessed_count, appeared_count,
|
|
92
|
-
CAST(accessed_count AS REAL) / MAX(appeared_count, 1) AS hit_rate
|
|
93
|
-
FROM memories WHERE accessed_count > 0
|
|
94
|
-
ORDER BY hit_rate DESC;
|
|
95
|
-
|
|
96
|
-
-- Search by key pattern
|
|
97
|
-
SELECT key, value FROM memories WHERE key LIKE 'account:%'
|
|
98
|
-
AND superseded_by IS NULL;
|
|
27
|
+
query_browser_profile(
|
|
28
|
+
query: string, // natural language query
|
|
29
|
+
tags?: string[] // optional: identity, contact_info, account, tool,
|
|
30
|
+
// address, payment, contact, work, knowledge
|
|
31
|
+
)
|
|
99
32
|
```
|
|
100
33
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
| Tag | What it covers | Example keys |
|
|
104
|
-
|-----|---------------|-------------|
|
|
105
|
-
| `identity` | Name, DOB, gender, job title, language | `first_name`, `last_name`, `full_name`, `date_of_birth` |
|
|
106
|
-
| `contact_info` | Email addresses, phone numbers | `email`, `phone` |
|
|
107
|
-
| `address` | Physical addresses | `street_address`, `city`, `state`, `zip`, `country` |
|
|
108
|
-
| `payment` | Card holder names, expiry | `card_holder_name`, `card_expiry`, `card_nickname` |
|
|
109
|
-
| `account` | Service accounts, login credentials | `account:{domain}` |
|
|
110
|
-
| `tool` | Tools/services used (from history) | `tool:GitHub`, `tool:Slack`, `tool:Stripe` |
|
|
111
|
-
| `contact` | People the user knows | `contact:{Name}`, `linkedin:{Name}` |
|
|
112
|
-
| `work` | Work-related (company, LinkedIn) | `company`, `linkedin:*` |
|
|
113
|
-
| `knowledge` | Interests, skills, projects, products | `product:*`, `project:*`, `interest:*` |
|
|
114
|
-
| `communication` | Messaging platforms | `tool:Slack`, `tool:WhatsApp` |
|
|
115
|
-
| `social` | Social platforms | `tool:LinkedIn`, `tool:X/Twitter` |
|
|
116
|
-
| `finance` | Financial tools | `tool:Stripe`, `tool:QuickBooks` |
|
|
117
|
-
|
|
118
|
-
## Ranking System
|
|
119
|
-
|
|
120
|
-
Every `search()`, `semantic_search()`, and `text_search()` call automatically increments both `appeared_count` and `accessed_count` for all returned results. No manual `mark_accessed()` calls needed.
|
|
121
|
-
|
|
122
|
-
**hit_rate** = `accessed_count / appeared_count`
|
|
123
|
-
|
|
124
|
-
Memories that are frequently returned by searches rise in ranking. The system is fully automatic — no manual curation or agent instrumentation needed.
|
|
34
|
+
Returns ranked results from the local database. Results are self-ranking — frequently accessed ones surface automatically.
|
|
125
35
|
|
|
126
|
-
##
|
|
36
|
+
## Full profile
|
|
127
37
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
## Task-Specific Tag Queries
|
|
131
|
-
|
|
132
|
-
| Task | Tags to search |
|
|
133
|
-
|------|---------------|
|
|
134
|
-
| Fill out a form | `["identity", "contact_info", "address"]` |
|
|
135
|
-
| Send an email | `["contact_info", "communication"]` + search contact by name |
|
|
136
|
-
| Book a flight/hotel | `["identity", "address", "payment"]` |
|
|
137
|
-
| Log into a service | `["account"]` |
|
|
138
|
-
| Invoice a client | `["identity", "work", "address", "payment"]` |
|
|
139
|
-
| Find a contact | `["contact"]` + filter by key pattern |
|
|
140
|
-
| Dev/deploy task | `["account", "tool"]` |
|
|
141
|
-
| Social media post | `["account", "social"]` |
|
|
142
|
-
| Research question | `mem.semantic_search("your question here")` |
|
|
143
|
-
|
|
144
|
-
## Rebuilding Memories
|
|
145
|
-
|
|
146
|
-
To refresh from latest browser data:
|
|
147
|
-
|
|
148
|
-
```bash
|
|
149
|
-
cd ~/ai-browser-profile
|
|
150
|
-
source .venv/bin/activate
|
|
151
|
-
python extract.py # full scan
|
|
152
|
-
python extract.py --browsers arc chrome # specific browsers
|
|
153
|
-
python extract.py --no-indexeddb --no-localstorage # fast, skip LevelDB
|
|
38
|
+
To get the complete user profile in one call:
|
|
154
39
|
```
|
|
155
|
-
|
|
156
|
-
### Backfill embeddings (after install-embeddings)
|
|
157
|
-
|
|
158
|
-
```python
|
|
159
|
-
import sys, os
|
|
160
|
-
sys.path.insert(0, os.path.expanduser("~/ai-browser-profile"))
|
|
161
|
-
from ai_browser_profile import MemoryDB
|
|
162
|
-
mem = MemoryDB(os.path.expanduser("~/ai-browser-profile/memories.db"))
|
|
163
|
-
n = mem.backfill_embeddings()
|
|
164
|
-
print(f"Embedded {n} memories")
|
|
165
|
-
mem.close()
|
|
40
|
+
query_browser_profile(query: "full profile")
|
|
166
41
|
```
|
|
167
42
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
## Dependencies
|
|
43
|
+
Returns name, emails, phone, addresses, payment info, companies, top tools, accounts.
|
|
171
44
|
|
|
172
|
-
|
|
173
|
-
- `ccl_chromium_reader` — IndexedDB + Local Storage LevelDB files
|
|
174
|
-
- `numpy` — vector math for cosine similarity
|
|
45
|
+
## Availability
|
|
175
46
|
|
|
176
|
-
|
|
177
|
-
- `onnxruntime` — ONNX model inference
|
|
178
|
-
- `huggingface_hub` — model downloading
|
|
179
|
-
- `tokenizers` — text tokenization
|
|
180
|
-
- Model: nomic-embed-text-v1.5 (~131MB, downloads on first use)
|
|
47
|
+
Requires browser data extraction during onboarding. If queries return no results, call `extract_browser_profile` to re-run the extraction — it uses the native Swift extractor built into the app (no external tools needed).
|