ai-browser-profile 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -214,14 +214,19 @@ def inject_via_cdp(
214
214
  cookies: Iterable[Cookie],
215
215
  cdp_url: str = "http://127.0.0.1:9222",
216
216
  ) -> int:
217
- """Inject cookies into a running Chrome via CDP Storage.setCookies.
217
+ """Inject cookies into a running Chrome via CDP.
218
+
219
+ Tries Storage.setCookies at the browser root first. If the browser has
220
+ zero Page targets that command fails with "Browser context management is
221
+ not supported" — in that case we fall back to opening a stub about:blank
222
+ tab, attaching to it, and using Network.setCookies on that session.
218
223
 
219
224
  Args:
220
225
  cookies: iterable of Cookie objects.
221
226
  cdp_url: base http(s) URL of the Chrome DevTools endpoint, or a
222
227
  cdp://host:port shorthand, or a raw ws:// URL.
223
228
 
224
- Returns: number of cookies submitted (the browser accepts them as a batch).
229
+ Returns: number of cookies actually accepted by Chrome.
225
230
  """
226
231
  from websocket import create_connection
227
232
 
@@ -230,6 +235,22 @@ def inject_via_cdp(
230
235
  # unless the target was launched with --remote-allow-origins. Suppressing
231
236
  # the header bypasses the check; localhost CDP is already privileged.
232
237
  ws = create_connection(ws_url, timeout=10, suppress_origin=True)
238
+ msg_id = 0
239
+
240
+ def _send(method, params=None, session_id=None):
241
+ nonlocal msg_id
242
+ msg_id += 1
243
+ msg = {"id": msg_id, "method": method}
244
+ if params:
245
+ msg["params"] = params
246
+ if session_id:
247
+ msg["sessionId"] = session_id
248
+ ws.send(json.dumps(msg))
249
+ while True:
250
+ resp = json.loads(ws.recv())
251
+ if resp.get("id") == msg_id:
252
+ return resp
253
+
233
254
  try:
234
255
  batch = []
235
256
  for c in cookies:
@@ -248,19 +269,49 @@ def inject_via_cdp(
248
269
  batch.append(param)
249
270
  if not batch:
250
271
  return 0
251
- ws.send(json.dumps({
252
- "id": 1,
253
- "method": "Storage.setCookies",
254
- "params": {"cookies": batch},
255
- }))
256
- resp = json.loads(ws.recv())
257
- if "error" in resp:
258
- log.warning("Storage.setCookies failed: %s", resp["error"])
272
+
273
+ # First: try Storage.setCookies at the browser root. Works when at
274
+ # least one Page target exists (i.e. any tab is open).
275
+ resp = _send("Storage.setCookies", {"cookies": batch})
276
+ err = resp.get("error", {})
277
+ if not err:
278
+ log.info("Injected %d cookies via Storage.setCookies", len(batch))
279
+ return len(batch)
280
+
281
+ # Fallback: open a stub tab so the browser context is materialised,
282
+ # then use Network.setCookies on its session.
283
+ msg = err.get("message", "")
284
+ if "Browser context management is not supported" not in msg:
285
+ log.warning("Storage.setCookies failed: %s", err)
259
286
  return 0
287
+
288
+ log.info("Storage.setCookies unavailable (no tabs); opening stub tab and retrying via Network.setCookies")
289
+ target_id = None
290
+ try:
291
+ r = _send("Target.createTarget", {"url": "about:blank"})
292
+ target_id = r.get("result", {}).get("targetId")
293
+ if not target_id:
294
+ log.warning("Couldn't create stub tab: %s", r)
295
+ return 0
296
+ r = _send("Target.attachToTarget", {"targetId": target_id, "flatten": True})
297
+ session_id = r.get("result", {}).get("sessionId")
298
+ if not session_id:
299
+ log.warning("Couldn't attach to stub tab: %s", r)
300
+ return 0
301
+ r = _send("Network.setCookies", {"cookies": batch}, session_id=session_id)
302
+ if r.get("error"):
303
+ log.warning("Network.setCookies failed: %s", r["error"])
304
+ return 0
305
+ log.info("Injected %d cookies via Network.setCookies (per-tab fallback)", len(batch))
306
+ return len(batch)
307
+ finally:
308
+ if target_id:
309
+ try:
310
+ _send("Target.closeTarget", {"targetId": target_id})
311
+ except Exception:
312
+ pass
260
313
  finally:
261
314
  ws.close()
262
- log.info("Injected %d cookies via CDP", len(batch))
263
- return len(batch)
264
315
 
265
316
 
266
317
  # --- helpers used by CLI and external callers ---
@@ -0,0 +1,599 @@
1
+ """Read IndexedDB from a Chromium browser profile and inject into another.
2
+
3
+ Sibling to cookies.py / localstorage.py. Many modern web apps (Linear, Figma,
4
+ Notion's offline mode, Slack web) store their auth/session state in
5
+ IndexedDB rather than cookies or localStorage, so syncing cookies alone is
6
+ not enough to "log in" the destination Chrome. This module fills that gap:
7
+ it reads structured records from the source profile's IndexedDB LevelDB
8
+ store (via ccl_chromium_reader), then re-creates them in the destination
9
+ Chrome via CDP Runtime.evaluate using the standard IndexedDB JS API.
10
+
11
+ Public API:
12
+ read_indexeddb(profile, origins=None) -> dict[origin, list[DbDump]]
13
+ inject_indexeddb_via_cdp(data, cdp_url, ...) -> (injected, total)
14
+
15
+ CLI:
16
+ python -m ai_browser_profile.indexeddb copy \\
17
+ --from arc:Default \\
18
+ --to cdp://127.0.0.1:9655 \\
19
+ --origins linear.app,figma.com
20
+
21
+ Like cookies.py / localstorage.py, this module is NOT wired into
22
+ extract_memories() — IndexedDB values frequently contain auth secrets and
23
+ must never land in memories.db.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import argparse
29
+ import json
30
+ import logging
31
+ import shutil
32
+ import sys
33
+ import tempfile
34
+ import time
35
+ from dataclasses import dataclass
36
+ from pathlib import Path
37
+ from typing import Any, Iterable, Optional
38
+ from urllib.parse import urlparse
39
+
40
+ from ai_browser_profile.ingestors.browser_detect import BrowserProfile, detect_browsers
41
+
42
+ log = logging.getLogger(__name__)
43
+
44
+
45
+ # --- IDB record reading from on-disk LevelDB ---------------------------------
46
+
47
+
48
+ @dataclass
49
+ class IdbRecord:
50
+ key: Any # JSON-safe Python value (str/int/float/None/list/dict)
51
+ value: Any # JSON-safe Python value
52
+
53
+
54
+ @dataclass
55
+ class IdbDbDump:
56
+ name: str # e.g. "linear-schema"
57
+ origin: str # e.g. "https://linear.app"
58
+ stores: dict[str, list[IdbRecord]] # store_name -> records
59
+
60
+
61
+ def _serialize_value(val: Any, depth: int = 0) -> Any:
62
+ """Convert a ccl IndexedDB value to a JSON-safe Python structure.
63
+
64
+ Mirrors the helper in ingestors/indexeddb.py but is duplicated here to
65
+ keep this module self-contained (it ships independently of the ingestor
66
+ pipeline).
67
+ """
68
+ if depth > 30:
69
+ return None
70
+ if val is None:
71
+ return None
72
+ if isinstance(val, (bool, int, float, str)):
73
+ return val
74
+ if isinstance(val, bytes):
75
+ # Best-effort decode; binary auth blobs are rare in IDB but possible.
76
+ try:
77
+ return val.decode("utf-8")
78
+ except UnicodeDecodeError:
79
+ return None
80
+ if isinstance(val, dict):
81
+ return {str(k): _serialize_value(v, depth + 1) for k, v in val.items()}
82
+ if isinstance(val, (list, tuple, set)):
83
+ return [_serialize_value(v, depth + 1) for v in val]
84
+ if hasattr(val, "value"):
85
+ return _serialize_value(val.value, depth + 1)
86
+ # Fallback: stringify unknown custom types (Date, Blob refs, etc.).
87
+ return str(val)
88
+
89
+
90
+ def _extract_idb_key(key: Any) -> Any:
91
+ """Extract a JSON-safe key from a ccl IdbKey.
92
+
93
+ ccl's IdbKey instances render their value via repr like
94
+ "<IdbKey linear_9bb29732457ddb5aa4b71132af9e8b43>". We unwrap that. For
95
+ proper structured key access we use the documented attrs when present.
96
+ """
97
+ # ccl IdbKey commonly exposes .value or .raw_value. Probe both.
98
+ for attr in ("raw_value", "value", "_value"):
99
+ if hasattr(key, attr):
100
+ v = getattr(key, attr)
101
+ return _serialize_value(v)
102
+ s = str(key)
103
+ if s.startswith("<IdbKey ") and s.endswith(">"):
104
+ s = s[len("<IdbKey "):-1]
105
+ # Try numeric first (UUID and string keys remain str)
106
+ try:
107
+ if "." in s:
108
+ return float(s)
109
+ return int(s)
110
+ except (TypeError, ValueError):
111
+ return s
112
+
113
+
114
+ def _copy_dir(src: Path) -> Path:
115
+ """Snapshot a LevelDB directory into a temp location.
116
+
117
+ LevelDB doesn't tolerate concurrent readers when the owning Chrome has
118
+ an exclusive lock on it, and we want a stable snapshot even if Chrome
119
+ later writes. Copy first, read second.
120
+ """
121
+ tmp = Path(tempfile.mkdtemp(prefix="ai_browser_profile_idb_"))
122
+ dst = tmp / src.name
123
+ shutil.copytree(src, dst)
124
+ return dst
125
+
126
+
127
+ def _idb_dir_to_origin(name: str) -> Optional[str]:
128
+ """Map a Chromium IndexedDB directory name back to an origin string.
129
+
130
+ Chrome encodes the origin in the dir name, e.g.
131
+ 'https_linear.app_0.indexeddb.leveldb' -> 'https://linear.app' (port omitted
132
+ when default for the scheme).
133
+ """
134
+ base = name
135
+ for suffix in (".indexeddb.leveldb", ".indexeddb.blob"):
136
+ if base.endswith(suffix):
137
+ base = base[: -len(suffix)]
138
+ break
139
+ # Format: <scheme>_<host>_<port>
140
+ parts = base.rsplit("_", 1)
141
+ if len(parts) != 2 or not parts[1].isdigit():
142
+ return None
143
+ head, port = parts
144
+ scheme_parts = head.split("_", 1)
145
+ if len(scheme_parts) != 2:
146
+ return None
147
+ scheme, host = scheme_parts
148
+ if (scheme == "https" and port == "0") or (scheme == "http" and port == "0"):
149
+ return f"{scheme}://{host}"
150
+ return f"{scheme}://{host}:{port}"
151
+
152
+
153
+ def read_indexeddb(
154
+ profile: BrowserProfile,
155
+ origins: Optional[Iterable[str]] = None,
156
+ ) -> dict[str, list[IdbDbDump]]:
157
+ """Read IndexedDB databases from a Chromium profile.
158
+
159
+ Args:
160
+ profile: source BrowserProfile (must be Chromium-family)
161
+ origins: optional list of host substrings to keep (e.g. "linear.app").
162
+ Matched against the host portion of the encoded origin.
163
+
164
+ Returns:
165
+ Mapping of origin URL -> list of IdbDbDump.
166
+ """
167
+ from ccl_chromium_reader import ccl_chromium_indexeddb
168
+
169
+ if profile.browser in ("safari", "firefox"):
170
+ log.warning("IndexedDB sync only supports Chromium browsers; got %s", profile.browser)
171
+ return {}
172
+
173
+ idb_root = profile.path / "IndexedDB"
174
+ if not idb_root.exists():
175
+ log.warning("No IndexedDB dir at %s", idb_root)
176
+ return {}
177
+
178
+ origin_filter: Optional[list[str]] = (
179
+ [o.strip() for o in origins if o and o.strip()] if origins else None
180
+ )
181
+
182
+ out: dict[str, list[IdbDbDump]] = {}
183
+ skipped_dbs = 0
184
+
185
+ for leveldb_dir in sorted(idb_root.glob("*.indexeddb.leveldb")):
186
+ origin = _idb_dir_to_origin(leveldb_dir.name)
187
+ if origin is None:
188
+ continue
189
+ if origin_filter and not any(f in origin for f in origin_filter):
190
+ continue
191
+
192
+ blob_dir = leveldb_dir.parent / leveldb_dir.name.replace(".leveldb", ".blob")
193
+
194
+ tmp_db = _copy_dir(leveldb_dir)
195
+ tmp_blob = _copy_dir(blob_dir) if blob_dir.exists() else None
196
+
197
+ try:
198
+ wrapper = ccl_chromium_indexeddb.WrappedIndexDB(
199
+ str(tmp_db),
200
+ str(tmp_blob) if tmp_blob else None,
201
+ )
202
+
203
+ origin_dumps: list[IdbDbDump] = []
204
+ for db_id in wrapper.database_ids:
205
+ try:
206
+ db = wrapper[db_id.name, db_id.origin]
207
+ except Exception as e:
208
+ log.debug("Skipping db %r (%s): %s", db_id.name, db_id.origin, e)
209
+ skipped_dbs += 1
210
+ continue
211
+
212
+ stores: dict[str, list[IdbRecord]] = {}
213
+ store_names = list(db.object_store_names)
214
+ for sn in store_names:
215
+ try:
216
+ store = db.get_object_store_by_name(sn)
217
+ except Exception:
218
+ continue
219
+ recs: list[IdbRecord] = []
220
+ try:
221
+ for rec in store.iterate_records():
222
+ try:
223
+ val = _serialize_value(getattr(rec, "value", None))
224
+ if val is None:
225
+ # Tombstones (deletions) — skip
226
+ continue
227
+ key = _extract_idb_key(getattr(rec, "key", None))
228
+ recs.append(IdbRecord(key=key, value=val))
229
+ except Exception:
230
+ continue
231
+ except Exception as e:
232
+ # Some stores have ccl-unsupported value formats;
233
+ # log and continue rather than aborting the whole DB.
234
+ log.debug("Store %r/%r read failed: %s", db_id.name, sn, e)
235
+ continue
236
+ if recs:
237
+ stores[sn] = recs
238
+
239
+ if stores:
240
+ origin_dumps.append(IdbDbDump(name=db_id.name, origin=origin, stores=stores))
241
+
242
+ if origin_dumps:
243
+ out.setdefault(origin, []).extend(origin_dumps)
244
+ except Exception as e:
245
+ log.warning("Failed to open %s: %s", leveldb_dir, e)
246
+ finally:
247
+ shutil.rmtree(tmp_db.parent, ignore_errors=True)
248
+ if tmp_blob:
249
+ shutil.rmtree(tmp_blob.parent, ignore_errors=True)
250
+
251
+ total_dbs = sum(len(v) for v in out.values())
252
+ total_records = sum(
253
+ sum(len(recs) for recs in db.stores.values())
254
+ for dbs in out.values() for db in dbs
255
+ )
256
+ log.info(
257
+ "Read %d IndexedDB records across %d databases / %d origins from %s/%s (skipped %d undecryptable dbs)",
258
+ total_records, total_dbs, len(out), profile.browser, profile.name, skipped_dbs,
259
+ )
260
+ return out
261
+
262
+
263
+ # --- CDP injection -----------------------------------------------------------
264
+
265
+
266
+ # JS that runs inside the destination Chrome's page context (same-origin tab)
267
+ # and replays the records into IndexedDB via the standard JS API. Returns a
268
+ # JSON-able report with per-db success/error counts.
269
+ _INJECT_JS = r"""
270
+ (async () => {
271
+ const payload = __PAYLOAD__; // { dbs: [ { name, stores: { storeName: [ {key, value}, ... ] } } ] }
272
+ const summary = [];
273
+
274
+ for (const dbDump of payload.dbs) {
275
+ const storeNames = Object.keys(dbDump.stores);
276
+ let opened, openErr = null;
277
+
278
+ // Step 1: open (creating stores if missing). We force a version bump only
279
+ // when stores are missing — otherwise we open the current version.
280
+ try {
281
+ opened = await new Promise((resolve, reject) => {
282
+ const tryOpen = (forceVersion) => {
283
+ const req = forceVersion
284
+ ? indexedDB.open(dbDump.name, forceVersion)
285
+ : indexedDB.open(dbDump.name);
286
+ let didUpgrade = false;
287
+ req.onupgradeneeded = (e) => {
288
+ didUpgrade = true;
289
+ const d = e.target.result;
290
+ for (const sn of storeNames) {
291
+ if (!d.objectStoreNames.contains(sn)) {
292
+ // We don't know the original keyPath/autoIncrement reliably
293
+ // from ccl; use out-of-line keys (no keyPath) so we can always
294
+ // pass an explicit key on store.put(). Apps that auto-resolve
295
+ // by `id` field still work because the value object usually
296
+ // contains an `id` matching the key.
297
+ try { d.createObjectStore(sn); } catch (err) { /* ignore */ }
298
+ }
299
+ }
300
+ };
301
+ req.onsuccess = (e) => {
302
+ const d = e.target.result;
303
+ // If all our stores already exist, we're done.
304
+ let missing = storeNames.filter(n => !d.objectStoreNames.contains(n));
305
+ if (missing.length === 0 || forceVersion) {
306
+ resolve(d);
307
+ } else {
308
+ const next = (d.version || 1) + 1;
309
+ d.close();
310
+ tryOpen(next);
311
+ }
312
+ };
313
+ req.onerror = () => reject(req.error || new Error("open failed"));
314
+ req.onblocked = () => reject(new Error("open blocked"));
315
+ };
316
+ tryOpen(undefined);
317
+ });
318
+ } catch (e) {
319
+ openErr = String(e && e.message ? e.message : e);
320
+ summary.push({ db: dbDump.name, opened: false, error: openErr, written: 0, errored: 0 });
321
+ continue;
322
+ }
323
+
324
+ // Step 2: write records into each existing store. We use a separate
325
+ // transaction per store so a failure in one doesn't abort the rest.
326
+ let totalWritten = 0, totalErrored = 0;
327
+ for (const sn of storeNames) {
328
+ if (!opened.objectStoreNames.contains(sn)) {
329
+ totalErrored += dbDump.stores[sn].length;
330
+ continue;
331
+ }
332
+ const recs = dbDump.stores[sn];
333
+ if (!recs.length) continue;
334
+ const txResult = await new Promise((resolve) => {
335
+ let tx;
336
+ try { tx = opened.transaction(sn, "readwrite"); }
337
+ catch (e) { resolve({ written: 0, errored: recs.length, fatal: String(e) }); return; }
338
+ const store = tx.objectStore(sn);
339
+ let written = 0, errored = 0;
340
+ tx.oncomplete = () => resolve({ written, errored });
341
+ tx.onerror = () => resolve({ written, errored: errored + (recs.length - written) });
342
+ tx.onabort = () => resolve({ written, errored: errored + (recs.length - written) });
343
+ for (const rec of recs) {
344
+ try {
345
+ // Try out-of-line put first (we created the store without keyPath).
346
+ // If the *existing* store has a keyPath, this throws DataError,
347
+ // and we fall back to an in-line put (key embedded in value).
348
+ try {
349
+ if (rec.key !== null && rec.key !== undefined) {
350
+ store.put(rec.value, rec.key);
351
+ } else {
352
+ store.put(rec.value);
353
+ }
354
+ } catch (eOut) {
355
+ // keyPath store — try without explicit key
356
+ try { store.put(rec.value); }
357
+ catch (eIn) { errored += 1; continue; }
358
+ }
359
+ written += 1;
360
+ } catch (e) {
361
+ errored += 1;
362
+ }
363
+ }
364
+ });
365
+ totalWritten += txResult.written;
366
+ totalErrored += txResult.errored;
367
+ }
368
+ opened.close();
369
+ summary.push({ db: dbDump.name, opened: true, written: totalWritten, errored: totalErrored });
370
+ }
371
+
372
+ return JSON.stringify({ summary });
373
+ })()
374
+ """
375
+
376
+
377
+ def _ws_from_cdp_url(cdp_url: str) -> str:
378
+ """Resolve a CDP HTTP base URL (or cdp:// shorthand) to the browser-target WebSocket URL."""
379
+ import urllib.request
380
+
381
+ if cdp_url.startswith("cdp://"):
382
+ cdp_url = "http://" + cdp_url[len("cdp://"):]
383
+ base = cdp_url.rstrip("/")
384
+ info = json.loads(urllib.request.urlopen(f"{base}/json/version", timeout=5).read())
385
+ return info["webSocketDebuggerUrl"]
386
+
387
+
388
+ def _cdp_send(ws, msg_id: int, method: str,
389
+ params: Optional[dict] = None,
390
+ session_id: Optional[str] = None) -> dict:
391
+ msg: dict = {"id": msg_id, "method": method}
392
+ if params:
393
+ msg["params"] = params
394
+ if session_id:
395
+ msg["sessionId"] = session_id
396
+ ws.send(json.dumps(msg))
397
+ deadline = time.time() + 30
398
+ while time.time() < deadline:
399
+ resp = json.loads(ws.recv())
400
+ if resp.get("id") == msg_id:
401
+ return resp
402
+ raise TimeoutError(f"CDP {method} timed out")
403
+
404
+
405
+ def inject_indexeddb_via_cdp(
406
+ data: dict[str, list[IdbDbDump]],
407
+ cdp_url: str = "http://127.0.0.1:9655",
408
+ load_wait_sec: float = 4.0,
409
+ ) -> tuple[int, int]:
410
+ """Inject IndexedDB records into a running Chrome via per-origin tabs.
411
+
412
+ Returns (written, total). For each origin we open a new tab at that
413
+ origin (so the JS context is same-origin), wait for initial load to let
414
+ the destination site bootstrap its own IDB schema, then run a single
415
+ Runtime.evaluate that replays all of our records.
416
+ """
417
+ from websocket import create_connection
418
+
419
+ ws_url = _ws_from_cdp_url(cdp_url)
420
+ ws = create_connection(ws_url, timeout=20, suppress_origin=True)
421
+ msg_id = 0
422
+ total_records = 0
423
+ total_written = 0
424
+
425
+ try:
426
+ for origin, dumps in data.items():
427
+ if not dumps:
428
+ continue
429
+ if not origin.startswith("http"):
430
+ log.warning("Skipping non-http origin %r", origin)
431
+ continue
432
+ if "^" in origin:
433
+ log.info("Skipping partitioned origin %r", origin)
434
+ continue
435
+
436
+ origin_total = sum(
437
+ sum(len(recs) for recs in db.stores.values()) for db in dumps
438
+ )
439
+ total_records += origin_total
440
+
441
+ url = origin.rstrip("/") + "/"
442
+ target_id = None
443
+ session_id = None
444
+ try:
445
+ msg_id += 1
446
+ r = _cdp_send(ws, msg_id, "Target.createTarget", {"url": url})
447
+ target_id = r.get("result", {}).get("targetId")
448
+ if not target_id:
449
+ log.warning("Couldn't create tab for %s: %s", origin, r)
450
+ continue
451
+
452
+ msg_id += 1
453
+ r = _cdp_send(ws, msg_id, "Target.attachToTarget",
454
+ {"targetId": target_id, "flatten": True})
455
+ session_id = r.get("result", {}).get("sessionId")
456
+ if not session_id:
457
+ log.warning("Couldn't attach to tab for %s: %s", origin, r)
458
+ continue
459
+
460
+ # Let the destination site finish its initial bootstrap (it
461
+ # may create its own IDB schema with the canonical keyPath /
462
+ # version; we then add to it).
463
+ time.sleep(load_wait_sec)
464
+
465
+ # Serialize the data to JSON and inline into the JS expression.
466
+ # Records can be large; CDP accepts multi-MB expressions.
467
+ payload = {
468
+ "dbs": [
469
+ {
470
+ "name": db.name,
471
+ "stores": {
472
+ sn: [{"key": r.key, "value": r.value} for r in recs]
473
+ for sn, recs in db.stores.items()
474
+ },
475
+ }
476
+ for db in dumps
477
+ ]
478
+ }
479
+ expression = _INJECT_JS.replace("__PAYLOAD__", json.dumps(payload))
480
+
481
+ msg_id += 1
482
+ r = _cdp_send(
483
+ ws, msg_id, "Runtime.evaluate",
484
+ {
485
+ "expression": expression,
486
+ "awaitPromise": True,
487
+ "returnByValue": True,
488
+ "timeout": 60000,
489
+ },
490
+ session_id=session_id,
491
+ )
492
+ result = r.get("result", {}).get("result", {})
493
+ exc = r.get("result", {}).get("exceptionDetails")
494
+ if exc:
495
+ log.warning(" %s: JS error %s", origin, exc.get("text") or exc)
496
+ continue
497
+ value = result.get("value")
498
+ try:
499
+ summary = json.loads(value).get("summary", []) if isinstance(value, str) else []
500
+ except Exception:
501
+ summary = []
502
+ origin_written = 0
503
+ for s in summary:
504
+ if s.get("opened"):
505
+ origin_written += s.get("written", 0)
506
+ if s.get("errored"):
507
+ log.warning(" %s/%s: %d errored", origin, s.get("db"), s.get("errored"))
508
+ else:
509
+ log.warning(" %s/%s: open failed (%s)", origin, s.get("db"), s.get("error"))
510
+ total_written += origin_written
511
+ log.info(" %s: wrote %d/%d records", origin, origin_written, origin_total)
512
+ finally:
513
+ if target_id:
514
+ try:
515
+ msg_id += 1
516
+ _cdp_send(ws, msg_id, "Target.closeTarget", {"targetId": target_id})
517
+ except Exception:
518
+ pass
519
+ finally:
520
+ ws.close()
521
+
522
+ log.info("Injected %d/%d IndexedDB records total", total_written, total_records)
523
+ return total_written, total_records
524
+
525
+
526
+ # --- CLI ---------------------------------------------------------------------
527
+
528
+
529
+ def _find_profile(spec: str) -> BrowserProfile:
530
+ if ":" in spec:
531
+ browser, name = spec.split(":", 1)
532
+ else:
533
+ browser, name = spec, "Default"
534
+ matches = [p for p in detect_browsers({browser}) if p.name == name]
535
+ if not matches:
536
+ available = [(p.browser, p.name) for p in detect_browsers({browser})]
537
+ raise SystemExit(
538
+ f"No profile {spec!r}. Available {browser} profiles: {available}"
539
+ )
540
+ return matches[0]
541
+
542
+
543
+ def _cli(argv: Optional[list[str]] = None) -> int:
544
+ parser = argparse.ArgumentParser(prog="python -m ai_browser_profile.indexeddb")
545
+ sub = parser.add_subparsers(dest="cmd", required=True)
546
+
547
+ cp = sub.add_parser("copy", help="copy IndexedDB databases from a local profile into a running browser via CDP")
548
+ cp.add_argument("--from", dest="src", required=True,
549
+ help="source profile, e.g. arc:Default or 'chrome:Profile 1'")
550
+ cp.add_argument("--to", dest="dst", required=True,
551
+ help="target CDP endpoint, e.g. cdp://127.0.0.1:9655 or http://127.0.0.1:9655")
552
+ cp.add_argument("--origins", default=None,
553
+ help="comma-separated host substrings (e.g. 'linear.app,figma.com')")
554
+ cp.add_argument("--load-wait", type=float, default=4.0,
555
+ help="seconds to wait after opening each tab before injecting (default 4)")
556
+ cp.add_argument("-v", "--verbose", action="store_true")
557
+
558
+ ls = sub.add_parser("list", help="list IndexedDB databases in a local profile (counts only)")
559
+ ls.add_argument("--from", dest="src", required=True)
560
+ ls.add_argument("--origins", default=None)
561
+
562
+ args = parser.parse_args(argv)
563
+
564
+ logging.basicConfig(
565
+ level=logging.DEBUG if getattr(args, "verbose", False) else logging.INFO,
566
+ format="%(levelname)s %(message)s",
567
+ )
568
+
569
+ profile = _find_profile(args.src)
570
+ origin_filters = [o.strip() for o in args.origins.split(",")] if args.origins else None
571
+
572
+ data = read_indexeddb(profile, origins=origin_filters)
573
+
574
+ if args.cmd == "list":
575
+ for origin, dumps in sorted(data.items()):
576
+ total = sum(sum(len(r) for r in d.stores.values()) for d in dumps)
577
+ print(f" {total:5} {origin} ({len(dumps)} db)")
578
+ for d in dumps:
579
+ store_summaries = ", ".join(
580
+ f"{sn}={len(recs)}" for sn, recs in d.stores.items()
581
+ )
582
+ print(f" db={d.name!r} stores: {store_summaries}")
583
+ total_all = sum(
584
+ sum(sum(len(r) for r in d.stores.values()) for d in dumps)
585
+ for dumps in data.values()
586
+ )
587
+ print(f"Total: {total_all} records across {sum(len(v) for v in data.values())} databases / {len(data)} origins")
588
+ return 0
589
+
590
+ if args.cmd == "copy":
591
+ written, total = inject_indexeddb_via_cdp(data, args.dst, load_wait_sec=args.load_wait)
592
+ print(f"Injected {written}/{total} IndexedDB records into {args.dst}")
593
+ return 0 if written > 0 else 2
594
+
595
+ return 1
596
+
597
+
598
+ if __name__ == "__main__":
599
+ sys.exit(_cli())
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-browser-profile",
3
- "version": "1.0.6",
3
+ "version": "1.0.8",
4
4
  "description": "Extract user identity (name, emails, accounts, addresses, payments) from browser data into a self-ranking SQLite database. Install as a Claude Code agent skill.",
5
5
  "bin": {
6
6
  "ai-browser-profile": "bin/cli.js"
@@ -46,9 +46,9 @@
46
46
  "devDependencies": {
47
47
  "@assistant-ui/react": "^0.12.25",
48
48
  "@google/generative-ai": "^0.24.1",
49
- "@m13v/seo-components": "^0.40.0",
49
+ "@m13v/seo-components": "^0.41.1",
50
50
  "@remotion/player": "^4.0.446",
51
- "@seo/components": "npm:@m13v/seo-components@^0.40.0",
51
+ "@seo/components": "npm:@m13v/seo-components@^0.41.1",
52
52
  "@supabase/supabase-js": "^2.103.3",
53
53
  "@tailwindcss/postcss": "^4",
54
54
  "@types/node": "^20",