ai-browser-profile 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -143,6 +143,22 @@ def read_cookies(
143
143
  )
144
144
 
145
145
  domain_filters = list(domains) if domains else None
146
+
147
+ def _host_matches(host: str) -> bool:
148
+ # Domain-suffix match: 'x.com' matches 'x.com' / 'api.x.com' but not 'fedex.com'.
149
+ # Cookie host_keys often start with '.' for "all subdomains" — strip that.
150
+ h = host or ""
151
+ if "://" in h:
152
+ h = h.split("://", 1)[1]
153
+ h = h.split("/", 1)[0].split(":", 1)[0].lstrip(".").lower()
154
+ for f in (domain_filters or []):
155
+ ff = (f or "").strip().lstrip(".").lower()
156
+ if not ff:
157
+ continue
158
+ if h == ff or h.endswith("." + ff):
159
+ return True
160
+ return False
161
+
146
162
  key = _derive_key(_keychain_password(profile.browser))
147
163
  cookies: list[Cookie] = []
148
164
  skipped = 0
@@ -167,7 +183,7 @@ def read_cookies(
167
183
  )
168
184
  for row in rows:
169
185
  host = _txt(row["host_key"])
170
- if domain_filters and not any(d in host for d in domain_filters):
186
+ if domain_filters and not _host_matches(host):
171
187
  continue
172
188
  value = _txt(row["value"])
173
189
  if not value and row["encrypted_value"]:
@@ -179,15 +179,65 @@ def read_indexeddb(
179
179
  [o.strip() for o in origins if o and o.strip()] if origins else None
180
180
  )
181
181
 
182
+ def _host_matches(origin: str) -> bool:
183
+ # Domain-suffix match: filter 'x.com' matches 'x.com' and
184
+ # 'api.x.com' but NOT 'fedex.com' / 'swiftpackageindex.com'.
185
+ h = origin or ""
186
+ if "://" in h:
187
+ h = h.split("://", 1)[1]
188
+ h = h.split("/", 1)[0].split(":", 1)[0].lstrip(".").lower()
189
+ for f in (origin_filter or []):
190
+ ff = (f or "").strip().lstrip(".").lower()
191
+ if not ff:
192
+ continue
193
+ if h == ff or h.endswith("." + ff):
194
+ return True
195
+ return False
196
+
197
+ # Defaults to skip even when no explicit filter is given:
198
+ # chrome-extension:// — extensions, not portable across browsers
199
+ # localhost / 127.* — dev servers, irrelevant across machines
200
+ # file:// — local file URLs
201
+ SKIP_PREFIXES = (
202
+ "chrome-extension://",
203
+ "http://localhost",
204
+ "https://localhost",
205
+ "http://127.",
206
+ "https://127.",
207
+ "file://",
208
+ )
209
+ # Skip pathologically large origins by default (e.g. kapwing video editor
210
+ # which stores 2 GB of project blobs). Caller can still ask for them
211
+ # explicitly via origin_filter.
212
+ MAX_LEVELDB_BYTES = 200 * 1024 * 1024 # 200 MB
213
+
214
+ def _dir_size(p) -> int:
215
+ try:
216
+ return sum(f.stat().st_size for f in p.rglob("*") if f.is_file())
217
+ except Exception:
218
+ return 0
219
+
182
220
  out: dict[str, list[IdbDbDump]] = {}
183
221
  skipped_dbs = 0
222
+ skipped_origins = 0
184
223
 
185
224
  for leveldb_dir in sorted(idb_root.glob("*.indexeddb.leveldb")):
186
225
  origin = _idb_dir_to_origin(leveldb_dir.name)
187
226
  if origin is None:
188
227
  continue
189
- if origin_filter and not any(f in origin for f in origin_filter):
190
- continue
228
+ if origin_filter:
229
+ if not _host_matches(origin):
230
+ continue
231
+ else:
232
+ # No explicit filter — apply default safety skips.
233
+ if any(origin.startswith(p) for p in SKIP_PREFIXES):
234
+ skipped_origins += 1
235
+ continue
236
+ size = _dir_size(leveldb_dir)
237
+ if size > MAX_LEVELDB_BYTES:
238
+ log.info("skipping oversized IndexedDB %s (%.1f MB)", origin, size/1024/1024)
239
+ skipped_origins += 1
240
+ continue
191
241
 
192
242
  blob_dir = leveldb_dir.parent / leveldb_dir.name.replace(".leveldb", ".blob")
193
243
 
@@ -33,6 +33,29 @@ from ai_browser_profile.cookies import _ws_from_cdp_url, find_profile
33
33
  log = logging.getLogger(__name__)
34
34
 
35
35
 
36
+ def _host_matches(host: str, filters: list[str]) -> bool:
37
+ """Domain-suffix match: filter 'x.com' matches 'x.com' and 'sub.x.com',
38
+ but NOT 'fedex.com' or 'swiftpackageindex.com'.
39
+
40
+ Accepts either a raw host like 'example.com' or a full origin like
41
+ 'https://example.com' / 'https://example.com:8080'.
42
+ """
43
+ if not host:
44
+ return False
45
+ h = host
46
+ if "://" in h:
47
+ h = h.split("://", 1)[1]
48
+ h = h.split("/", 1)[0].split(":", 1)[0] # strip path and port
49
+ h = h.lstrip(".").lower()
50
+ for f in filters:
51
+ ff = f.strip().lstrip(".").lower()
52
+ if not ff:
53
+ continue
54
+ if h == ff or h.endswith("." + ff):
55
+ return True
56
+ return False
57
+
58
+
36
59
  def read_localstorage(
37
60
  profile: BrowserProfile,
38
61
  origins: Optional[Iterable[str]] = None,
@@ -77,7 +100,7 @@ def read_localstorage(
77
100
  value = record.value
78
101
  if not origin or not key or value is None:
79
102
  continue
80
- if origin_filters and not any(f in origin for f in origin_filters):
103
+ if origin_filters and not _host_matches(origin, origin_filters):
81
104
  continue
82
105
  if isinstance(value, bytes):
83
106
  try:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-browser-profile",
3
- "version": "1.0.8",
3
+ "version": "1.0.10",
4
4
  "description": "Extract user identity (name, emails, accounts, addresses, payments) from browser data into a self-ranking SQLite database. Install as a Claude Code agent skill.",
5
5
  "bin": {
6
6
  "ai-browser-profile": "bin/cli.js"