ai-browser-profile 1.0.9 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -143,6 +143,22 @@ def read_cookies(
143
143
  )
144
144
 
145
145
  domain_filters = list(domains) if domains else None
146
+
147
+ def _host_matches(host: str) -> bool:
148
+ # Domain-suffix match: 'x.com' matches 'x.com' / 'api.x.com' but not 'fedex.com'.
149
+ # Cookie host_keys often start with '.' for "all subdomains" — strip that.
150
+ h = host or ""
151
+ if "://" in h:
152
+ h = h.split("://", 1)[1]
153
+ h = h.split("/", 1)[0].split(":", 1)[0].lstrip(".").lower()
154
+ for f in (domain_filters or []):
155
+ ff = (f or "").strip().lstrip(".").lower()
156
+ if not ff:
157
+ continue
158
+ if h == ff or h.endswith("." + ff):
159
+ return True
160
+ return False
161
+
146
162
  key = _derive_key(_keychain_password(profile.browser))
147
163
  cookies: list[Cookie] = []
148
164
  skipped = 0
@@ -167,7 +183,7 @@ def read_cookies(
167
183
  )
168
184
  for row in rows:
169
185
  host = _txt(row["host_key"])
170
- if domain_filters and not any(d in host for d in domain_filters):
186
+ if domain_filters and not _host_matches(host):
171
187
  continue
172
188
  value = _txt(row["value"])
173
189
  if not value and row["encrypted_value"]:
@@ -179,6 +179,21 @@ def read_indexeddb(
179
179
  [o.strip() for o in origins if o and o.strip()] if origins else None
180
180
  )
181
181
 
182
+ def _host_matches(origin: str) -> bool:
183
+ # Domain-suffix match: filter 'x.com' matches 'x.com' and
184
+ # 'api.x.com' but NOT 'fedex.com' / 'swiftpackageindex.com'.
185
+ h = origin or ""
186
+ if "://" in h:
187
+ h = h.split("://", 1)[1]
188
+ h = h.split("/", 1)[0].split(":", 1)[0].lstrip(".").lower()
189
+ for f in (origin_filter or []):
190
+ ff = (f or "").strip().lstrip(".").lower()
191
+ if not ff:
192
+ continue
193
+ if h == ff or h.endswith("." + ff):
194
+ return True
195
+ return False
196
+
182
197
  # Defaults to skip even when no explicit filter is given:
183
198
  # chrome-extension:// — extensions, not portable across browsers
184
199
  # localhost / 127.* — dev servers, irrelevant across machines
@@ -211,7 +226,7 @@ def read_indexeddb(
211
226
  if origin is None:
212
227
  continue
213
228
  if origin_filter:
214
- if not any(f in origin for f in origin_filter):
229
+ if not _host_matches(origin):
215
230
  continue
216
231
  else:
217
232
  # No explicit filter — apply default safety skips.
@@ -442,12 +457,16 @@ def inject_indexeddb_via_cdp(
442
457
  cdp_url: str = "http://127.0.0.1:9655",
443
458
  load_wait_sec: float = 4.0,
444
459
  ) -> tuple[int, int]:
445
- """Inject IndexedDB records into a running Chrome via per-origin tabs.
460
+ """Inject IndexedDB records into a running Chrome via a single reused tab.
446
461
 
447
- Returns (written, total). For each origin we open a new tab at that
448
- origin (so the JS context is same-origin), wait for initial load to let
449
- the destination site bootstrap its own IDB schema, then run a single
450
- Runtime.evaluate that replays all of our records.
462
+ Returns (written, total). Opens ONE tab at the start, hides it off-screen,
463
+ then navigates that same tab through each origin in sequence. For each
464
+ origin: navigate, wait for bootstrap, run a single Runtime.evaluate that
465
+ replays the IDB records via the standard JS API. Closes the tab at end.
466
+
467
+ This replaces the previous pattern of opening one visible tab per origin
468
+ (which produced a flood of tab open/close churn when many domains were in
469
+ the import list).
451
470
  """
452
471
  from websocket import create_connection
453
472
 
@@ -456,8 +475,46 @@ def inject_indexeddb_via_cdp(
456
475
  msg_id = 0
457
476
  total_records = 0
458
477
  total_written = 0
478
+ target_id: Optional[str] = None
479
+ session_id: Optional[str] = None
459
480
 
460
481
  try:
482
+ # Open ONE reusable tab. We start at about:blank and navigate it
483
+ # per origin below; reusing the tab is what eliminates the visible
484
+ # "open a tab per origin" UX issue when many domains are in scope.
485
+ msg_id += 1
486
+ r = _cdp_send(ws, msg_id, "Target.createTarget", {"url": "about:blank"})
487
+ target_id = r.get("result", {}).get("targetId")
488
+ if not target_id:
489
+ log.warning("createTarget(about:blank) failed: %s", r.get("error"))
490
+ return 0, 0
491
+
492
+ msg_id += 1
493
+ r = _cdp_send(ws, msg_id, "Target.attachToTarget",
494
+ {"targetId": target_id, "flatten": True})
495
+ session_id = r.get("result", {}).get("sessionId")
496
+ if not session_id:
497
+ log.warning("attachToTarget(about:blank) failed: %s", r.get("error"))
498
+ return 0, 0
499
+
500
+ # Hide the import window off-screen so the user doesn't see it bounce
501
+ # through every origin. Best-effort.
502
+ try:
503
+ msg_id += 1
504
+ w = _cdp_send(ws, msg_id, "Browser.getWindowForTarget",
505
+ {"targetId": target_id})
506
+ window_id = w.get("result", {}).get("windowId")
507
+ if window_id:
508
+ msg_id += 1
509
+ _cdp_send(ws, msg_id, "Browser.setWindowBounds", {
510
+ "windowId": window_id,
511
+ "bounds": {"left": -32000, "top": -32000,
512
+ "width": 800, "height": 600,
513
+ "windowState": "normal"},
514
+ })
515
+ except Exception as e:
516
+ log.debug("Could not hide import window: %s", e)
517
+
461
518
  for origin, dumps in data.items():
462
519
  if not dumps:
463
520
  continue
@@ -474,84 +531,75 @@ def inject_indexeddb_via_cdp(
474
531
  total_records += origin_total
475
532
 
476
533
  url = origin.rstrip("/") + "/"
477
- target_id = None
478
- session_id = None
479
- try:
480
- msg_id += 1
481
- r = _cdp_send(ws, msg_id, "Target.createTarget", {"url": url})
482
- target_id = r.get("result", {}).get("targetId")
483
- if not target_id:
484
- log.warning("Couldn't create tab for %s: %s", origin, r)
485
- continue
486
534
 
487
- msg_id += 1
488
- r = _cdp_send(ws, msg_id, "Target.attachToTarget",
489
- {"targetId": target_id, "flatten": True})
490
- session_id = r.get("result", {}).get("sessionId")
491
- if not session_id:
492
- log.warning("Couldn't attach to tab for %s: %s", origin, r)
493
- continue
535
+ # Navigate the SAME tab to this origin. No new tab is created.
536
+ msg_id += 1
537
+ nav = _cdp_send(ws, msg_id, "Page.navigate", {"url": url},
538
+ session_id=session_id)
539
+ err = nav.get("result", {}).get("errorText") or nav.get("error")
540
+ if err:
541
+ log.warning(" %s: navigate failed (%s)", origin, err)
542
+ continue
494
543
 
495
- # Let the destination site finish its initial bootstrap (it
496
- # may create its own IDB schema with the canonical keyPath /
497
- # version; we then add to it).
498
- time.sleep(load_wait_sec)
499
-
500
- # Serialize the data to JSON and inline into the JS expression.
501
- # Records can be large; CDP accepts multi-MB expressions.
502
- payload = {
503
- "dbs": [
504
- {
505
- "name": db.name,
506
- "stores": {
507
- sn: [{"key": r.key, "value": r.value} for r in recs]
508
- for sn, recs in db.stores.items()
509
- },
510
- }
511
- for db in dumps
512
- ]
513
- }
514
- expression = _INJECT_JS.replace("__PAYLOAD__", json.dumps(payload))
544
+ # Let the destination site finish its initial bootstrap (it may
545
+ # create its own IDB schema with the canonical keyPath/version;
546
+ # we then add to it).
547
+ time.sleep(load_wait_sec)
515
548
 
516
- msg_id += 1
517
- r = _cdp_send(
518
- ws, msg_id, "Runtime.evaluate",
549
+ # Serialize the data to JSON and inline into the JS expression.
550
+ # Records can be large; CDP accepts multi-MB expressions.
551
+ payload = {
552
+ "dbs": [
519
553
  {
520
- "expression": expression,
521
- "awaitPromise": True,
522
- "returnByValue": True,
523
- "timeout": 60000,
524
- },
525
- session_id=session_id,
526
- )
527
- result = r.get("result", {}).get("result", {})
528
- exc = r.get("result", {}).get("exceptionDetails")
529
- if exc:
530
- log.warning(" %s: JS error %s", origin, exc.get("text") or exc)
531
- continue
532
- value = result.get("value")
533
- try:
534
- summary = json.loads(value).get("summary", []) if isinstance(value, str) else []
535
- except Exception:
536
- summary = []
537
- origin_written = 0
538
- for s in summary:
539
- if s.get("opened"):
540
- origin_written += s.get("written", 0)
541
- if s.get("errored"):
542
- log.warning(" %s/%s: %d errored", origin, s.get("db"), s.get("errored"))
543
- else:
544
- log.warning(" %s/%s: open failed (%s)", origin, s.get("db"), s.get("error"))
545
- total_written += origin_written
546
- log.info(" %s: wrote %d/%d records", origin, origin_written, origin_total)
547
- finally:
548
- if target_id:
549
- try:
550
- msg_id += 1
551
- _cdp_send(ws, msg_id, "Target.closeTarget", {"targetId": target_id})
552
- except Exception:
553
- pass
554
+ "name": db.name,
555
+ "stores": {
556
+ sn: [{"key": r.key, "value": r.value} for r in recs]
557
+ for sn, recs in db.stores.items()
558
+ },
559
+ }
560
+ for db in dumps
561
+ ]
562
+ }
563
+ expression = _INJECT_JS.replace("__PAYLOAD__", json.dumps(payload))
564
+
565
+ msg_id += 1
566
+ r = _cdp_send(
567
+ ws, msg_id, "Runtime.evaluate",
568
+ {
569
+ "expression": expression,
570
+ "awaitPromise": True,
571
+ "returnByValue": True,
572
+ "timeout": 60000,
573
+ },
574
+ session_id=session_id,
575
+ )
576
+ result = r.get("result", {}).get("result", {})
577
+ exc = r.get("result", {}).get("exceptionDetails")
578
+ if exc:
579
+ log.warning(" %s: JS error %s", origin, exc.get("text") or exc)
580
+ continue
581
+ value = result.get("value")
582
+ try:
583
+ summary = json.loads(value).get("summary", []) if isinstance(value, str) else []
584
+ except Exception:
585
+ summary = []
586
+ origin_written = 0
587
+ for s in summary:
588
+ if s.get("opened"):
589
+ origin_written += s.get("written", 0)
590
+ if s.get("errored"):
591
+ log.warning(" %s/%s: %d errored", origin, s.get("db"), s.get("errored"))
592
+ else:
593
+ log.warning(" %s/%s: open failed (%s)", origin, s.get("db"), s.get("error"))
594
+ total_written += origin_written
595
+ log.info(" %s: wrote %d/%d records", origin, origin_written, origin_total)
554
596
  finally:
597
+ if target_id:
598
+ try:
599
+ msg_id += 1
600
+ _cdp_send(ws, msg_id, "Target.closeTarget", {"targetId": target_id})
601
+ except Exception:
602
+ pass
555
603
  ws.close()
556
604
 
557
605
  log.info("Injected %d/%d IndexedDB records total", total_written, total_records)
@@ -33,6 +33,29 @@ from ai_browser_profile.cookies import _ws_from_cdp_url, find_profile
33
33
  log = logging.getLogger(__name__)
34
34
 
35
35
 
36
+ def _host_matches(host: str, filters: list[str]) -> bool:
37
+ """Domain-suffix match: filter 'x.com' matches 'x.com' and 'sub.x.com',
38
+ but NOT 'fedex.com' or 'swiftpackageindex.com'.
39
+
40
+ Accepts either a raw host like 'example.com' or a full origin like
41
+ 'https://example.com' / 'https://example.com:8080'.
42
+ """
43
+ if not host:
44
+ return False
45
+ h = host
46
+ if "://" in h:
47
+ h = h.split("://", 1)[1]
48
+ h = h.split("/", 1)[0].split(":", 1)[0] # strip path and port
49
+ h = h.lstrip(".").lower()
50
+ for f in filters:
51
+ ff = f.strip().lstrip(".").lower()
52
+ if not ff:
53
+ continue
54
+ if h == ff or h.endswith("." + ff):
55
+ return True
56
+ return False
57
+
58
+
36
59
  def read_localstorage(
37
60
  profile: BrowserProfile,
38
61
  origins: Optional[Iterable[str]] = None,
@@ -77,7 +100,7 @@ def read_localstorage(
77
100
  value = record.value
78
101
  if not origin or not key or value is None:
79
102
  continue
80
- if origin_filters and not any(f in origin for f in origin_filters):
103
+ if origin_filters and not _host_matches(origin, origin_filters):
81
104
  continue
82
105
  if isinstance(value, bytes):
83
106
  try:
@@ -125,19 +148,23 @@ def inject_localstorage_via_cdp(
125
148
  cdp_url: str = "http://127.0.0.1:9222",
126
149
  load_wait_sec: float = 4.0,
127
150
  ) -> int:
128
- """Inject localStorage into a running Chrome via per-origin tabs.
151
+ """Inject localStorage into a running Chrome by reusing a single hidden tab.
152
+
153
+ Opens ONE tab at the start, hides it off-screen, then navigates that same
154
+ tab through each origin in sequence to run a localStorage.setItem batch in
155
+ the page's JS context. Closes the tab at the end. Returns total items
156
+ written.
129
157
 
130
- For each origin: opens a new tab to that origin (so the JS context is
131
- same-origin), waits for load, evaluates a localStorage.setItem batch via
132
- Runtime.evaluate, then closes the tab. Returns total items written.
158
+ This replaces the previous pattern of opening one visible tab per origin
159
+ (which produced a flood of tab open/close churn when many domains were in
160
+ the import list).
133
161
 
134
162
  Args:
135
163
  data: dict of {origin -> {key: value}}. Origin must be http(s)://...
136
164
  cdp_url: base http(s) URL of the Chrome DevTools endpoint or a
137
165
  cdp://host:port shorthand.
138
- load_wait_sec: how long to wait between tab open and the JS eval to
139
- let the page initialize (no Page.loadEventFired listener
140
- yet — keep simple, race-tolerant via the JS try/catch).
166
+ load_wait_sec: seconds to wait after navigating between origins before
167
+ injecting (lets the destination page initialize).
141
168
  """
142
169
  from websocket import create_connection
143
170
 
@@ -145,8 +172,47 @@ def inject_localstorage_via_cdp(
145
172
  ws = create_connection(ws_url, timeout=15, suppress_origin=True)
146
173
  msg_id = 0
147
174
  total_set = 0
175
+ target_id: Optional[str] = None
176
+ session_id: Optional[str] = None
148
177
 
149
178
  try:
179
+ # Open ONE reusable tab. We start at about:blank and navigate it
180
+ # per origin below; reusing the tab is what eliminates the visible
181
+ # "29 tabs flashing open" UX issue.
182
+ msg_id += 1
183
+ r = _cdp_send(ws, msg_id, "Target.createTarget", {"url": "about:blank"})
184
+ target_id = r.get("result", {}).get("targetId")
185
+ if not target_id:
186
+ log.warning("createTarget(about:blank) failed: %s", r.get("error"))
187
+ return 0
188
+
189
+ msg_id += 1
190
+ r = _cdp_send(ws, msg_id, "Target.attachToTarget",
191
+ {"targetId": target_id, "flatten": True})
192
+ session_id = r.get("result", {}).get("sessionId")
193
+ if not session_id:
194
+ log.warning("attachToTarget(about:blank) failed: %s", r.get("error"))
195
+ return 0
196
+
197
+ # Move the tab's window way off-screen so the user doesn't see it
198
+ # bounce through every origin. Best-effort; some Chrome builds reject
199
+ # negative window bounds, in which case we just stay on-screen.
200
+ try:
201
+ msg_id += 1
202
+ w = _cdp_send(ws, msg_id, "Browser.getWindowForTarget",
203
+ {"targetId": target_id})
204
+ window_id = w.get("result", {}).get("windowId")
205
+ if window_id:
206
+ msg_id += 1
207
+ _cdp_send(ws, msg_id, "Browser.setWindowBounds", {
208
+ "windowId": window_id,
209
+ "bounds": {"left": -32000, "top": -32000,
210
+ "width": 800, "height": 600,
211
+ "windowState": "normal"},
212
+ })
213
+ except Exception as e:
214
+ log.debug("Could not hide import window: %s", e)
215
+
150
216
  for origin, items in data.items():
151
217
  if not items:
152
218
  continue
@@ -162,53 +228,44 @@ def inject_localstorage_via_cdp(
162
228
  continue
163
229
  url = origin.rstrip("/") + "/"
164
230
 
165
- target_id = None
166
- try:
167
- msg_id += 1
168
- r = _cdp_send(ws, msg_id, "Target.createTarget", {"url": url})
169
- target_id = r.get("result", {}).get("targetId")
170
- if not target_id:
171
- log.warning("createTarget failed for %s: %s", origin, r.get("error"))
172
- continue
173
-
174
- msg_id += 1
175
- r = _cdp_send(ws, msg_id, "Target.attachToTarget",
176
- {"targetId": target_id, "flatten": True})
177
- session_id = r.get("result", {}).get("sessionId")
178
- if not session_id:
179
- log.warning("attachToTarget failed for %s", origin)
180
- continue
181
-
182
- time.sleep(load_wait_sec)
231
+ # Navigate the SAME tab to this origin. No new tab is created.
232
+ msg_id += 1
233
+ nav = _cdp_send(ws, msg_id, "Page.navigate", {"url": url},
234
+ session_id=session_id)
235
+ err = nav.get("result", {}).get("errorText") or nav.get("error")
236
+ if err:
237
+ log.warning(" %s: navigate failed (%s)", origin, err)
238
+ continue
183
239
 
184
- # Inline the items as a JS object literal; localStorage rejects
185
- # non-string values implicitly by coercion (we already string-
186
- # coerced in read_localstorage).
187
- expr = (
188
- "(function(){try{var items=" + json.dumps(items) + ";"
189
- "var n=0;for(var k in items){try{localStorage.setItem(k,items[k]);n++;}catch(e){}}"
190
- "return n;}catch(e){return 'ERROR:'+e.toString();}})()"
191
- )
192
- msg_id += 1
193
- r = _cdp_send(
194
- ws, msg_id, "Runtime.evaluate",
195
- {"expression": expr, "returnByValue": True},
196
- session_id=session_id,
197
- )
198
- value = r.get("result", {}).get("result", {}).get("value")
199
- if isinstance(value, int):
200
- total_set += value
201
- log.info(" %s: set %d/%d items", origin, value, len(items))
202
- else:
203
- log.warning(" %s: %s", origin, value)
204
- finally:
205
- if target_id:
206
- try:
207
- msg_id += 1
208
- _cdp_send(ws, msg_id, "Target.closeTarget", {"targetId": target_id})
209
- except Exception:
210
- pass
240
+ time.sleep(load_wait_sec)
241
+
242
+ # Inline the items as a JS object literal; localStorage rejects
243
+ # non-string values implicitly by coercion (we already string-
244
+ # coerced in read_localstorage).
245
+ expr = (
246
+ "(function(){try{var items=" + json.dumps(items) + ";"
247
+ "var n=0;for(var k in items){try{localStorage.setItem(k,items[k]);n++;}catch(e){}}"
248
+ "return n;}catch(e){return 'ERROR:'+e.toString();}})()"
249
+ )
250
+ msg_id += 1
251
+ r = _cdp_send(
252
+ ws, msg_id, "Runtime.evaluate",
253
+ {"expression": expr, "returnByValue": True},
254
+ session_id=session_id,
255
+ )
256
+ value = r.get("result", {}).get("result", {}).get("value")
257
+ if isinstance(value, int):
258
+ total_set += value
259
+ log.info(" %s: set %d/%d items", origin, value, len(items))
260
+ else:
261
+ log.warning(" %s: %s", origin, value)
211
262
  finally:
263
+ if target_id:
264
+ try:
265
+ msg_id += 1
266
+ _cdp_send(ws, msg_id, "Target.closeTarget", {"targetId": target_id})
267
+ except Exception:
268
+ pass
212
269
  ws.close()
213
270
 
214
271
  log.info("Injected %d localStorage items total", total_set)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-browser-profile",
3
- "version": "1.0.9",
3
+ "version": "1.0.11",
4
4
  "description": "Extract user identity (name, emails, accounts, addresses, payments) from browser data into a self-ranking SQLite database. Install as a Claude Code agent skill.",
5
5
  "bin": {
6
6
  "ai-browser-profile": "bin/cli.js"