ai-browser-profile 1.0.8 → 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -143,6 +143,22 @@ def read_cookies(
|
|
|
143
143
|
)
|
|
144
144
|
|
|
145
145
|
domain_filters = list(domains) if domains else None
|
|
146
|
+
|
|
147
|
+
def _host_matches(host: str) -> bool:
|
|
148
|
+
# Domain-suffix match: 'x.com' matches 'x.com' / 'api.x.com' but not 'fedex.com'.
|
|
149
|
+
# Cookie host_keys often start with '.' for "all subdomains" — strip that.
|
|
150
|
+
h = host or ""
|
|
151
|
+
if "://" in h:
|
|
152
|
+
h = h.split("://", 1)[1]
|
|
153
|
+
h = h.split("/", 1)[0].split(":", 1)[0].lstrip(".").lower()
|
|
154
|
+
for f in (domain_filters or []):
|
|
155
|
+
ff = (f or "").strip().lstrip(".").lower()
|
|
156
|
+
if not ff:
|
|
157
|
+
continue
|
|
158
|
+
if h == ff or h.endswith("." + ff):
|
|
159
|
+
return True
|
|
160
|
+
return False
|
|
161
|
+
|
|
146
162
|
key = _derive_key(_keychain_password(profile.browser))
|
|
147
163
|
cookies: list[Cookie] = []
|
|
148
164
|
skipped = 0
|
|
@@ -167,7 +183,7 @@ def read_cookies(
|
|
|
167
183
|
)
|
|
168
184
|
for row in rows:
|
|
169
185
|
host = _txt(row["host_key"])
|
|
170
|
-
if domain_filters and not
|
|
186
|
+
if domain_filters and not _host_matches(host):
|
|
171
187
|
continue
|
|
172
188
|
value = _txt(row["value"])
|
|
173
189
|
if not value and row["encrypted_value"]:
|
|
@@ -179,15 +179,65 @@ def read_indexeddb(
|
|
|
179
179
|
[o.strip() for o in origins if o and o.strip()] if origins else None
|
|
180
180
|
)
|
|
181
181
|
|
|
182
|
+
def _host_matches(origin: str) -> bool:
|
|
183
|
+
# Domain-suffix match: filter 'x.com' matches 'x.com' and
|
|
184
|
+
# 'api.x.com' but NOT 'fedex.com' / 'swiftpackageindex.com'.
|
|
185
|
+
h = origin or ""
|
|
186
|
+
if "://" in h:
|
|
187
|
+
h = h.split("://", 1)[1]
|
|
188
|
+
h = h.split("/", 1)[0].split(":", 1)[0].lstrip(".").lower()
|
|
189
|
+
for f in (origin_filter or []):
|
|
190
|
+
ff = (f or "").strip().lstrip(".").lower()
|
|
191
|
+
if not ff:
|
|
192
|
+
continue
|
|
193
|
+
if h == ff or h.endswith("." + ff):
|
|
194
|
+
return True
|
|
195
|
+
return False
|
|
196
|
+
|
|
197
|
+
# Defaults to skip even when no explicit filter is given:
|
|
198
|
+
# chrome-extension:// — extensions, not portable across browsers
|
|
199
|
+
# localhost / 127.* — dev servers, irrelevant across machines
|
|
200
|
+
# file:// — local file URLs
|
|
201
|
+
SKIP_PREFIXES = (
|
|
202
|
+
"chrome-extension://",
|
|
203
|
+
"http://localhost",
|
|
204
|
+
"https://localhost",
|
|
205
|
+
"http://127.",
|
|
206
|
+
"https://127.",
|
|
207
|
+
"file://",
|
|
208
|
+
)
|
|
209
|
+
# Skip pathologically large origins by default (e.g. kapwing video editor
|
|
210
|
+
# which stores 2 GB of project blobs). Caller can still ask for them
|
|
211
|
+
# explicitly via origin_filter.
|
|
212
|
+
MAX_LEVELDB_BYTES = 200 * 1024 * 1024 # 200 MB
|
|
213
|
+
|
|
214
|
+
def _dir_size(p) -> int:
|
|
215
|
+
try:
|
|
216
|
+
return sum(f.stat().st_size for f in p.rglob("*") if f.is_file())
|
|
217
|
+
except Exception:
|
|
218
|
+
return 0
|
|
219
|
+
|
|
182
220
|
out: dict[str, list[IdbDbDump]] = {}
|
|
183
221
|
skipped_dbs = 0
|
|
222
|
+
skipped_origins = 0
|
|
184
223
|
|
|
185
224
|
for leveldb_dir in sorted(idb_root.glob("*.indexeddb.leveldb")):
|
|
186
225
|
origin = _idb_dir_to_origin(leveldb_dir.name)
|
|
187
226
|
if origin is None:
|
|
188
227
|
continue
|
|
189
|
-
if origin_filter
|
|
190
|
-
|
|
228
|
+
if origin_filter:
|
|
229
|
+
if not _host_matches(origin):
|
|
230
|
+
continue
|
|
231
|
+
else:
|
|
232
|
+
# No explicit filter — apply default safety skips.
|
|
233
|
+
if any(origin.startswith(p) for p in SKIP_PREFIXES):
|
|
234
|
+
skipped_origins += 1
|
|
235
|
+
continue
|
|
236
|
+
size = _dir_size(leveldb_dir)
|
|
237
|
+
if size > MAX_LEVELDB_BYTES:
|
|
238
|
+
log.info("skipping oversized IndexedDB %s (%.1f MB)", origin, size/1024/1024)
|
|
239
|
+
skipped_origins += 1
|
|
240
|
+
continue
|
|
191
241
|
|
|
192
242
|
blob_dir = leveldb_dir.parent / leveldb_dir.name.replace(".leveldb", ".blob")
|
|
193
243
|
|
|
@@ -33,6 +33,29 @@ from ai_browser_profile.cookies import _ws_from_cdp_url, find_profile
|
|
|
33
33
|
log = logging.getLogger(__name__)
|
|
34
34
|
|
|
35
35
|
|
|
36
|
+
def _host_matches(host: str, filters: list[str]) -> bool:
|
|
37
|
+
"""Domain-suffix match: filter 'x.com' matches 'x.com' and 'sub.x.com',
|
|
38
|
+
but NOT 'fedex.com' or 'swiftpackageindex.com'.
|
|
39
|
+
|
|
40
|
+
Accepts either a raw host like 'example.com' or a full origin like
|
|
41
|
+
'https://example.com' / 'https://example.com:8080'.
|
|
42
|
+
"""
|
|
43
|
+
if not host:
|
|
44
|
+
return False
|
|
45
|
+
h = host
|
|
46
|
+
if "://" in h:
|
|
47
|
+
h = h.split("://", 1)[1]
|
|
48
|
+
h = h.split("/", 1)[0].split(":", 1)[0] # strip path and port
|
|
49
|
+
h = h.lstrip(".").lower()
|
|
50
|
+
for f in filters:
|
|
51
|
+
ff = f.strip().lstrip(".").lower()
|
|
52
|
+
if not ff:
|
|
53
|
+
continue
|
|
54
|
+
if h == ff or h.endswith("." + ff):
|
|
55
|
+
return True
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
|
|
36
59
|
def read_localstorage(
|
|
37
60
|
profile: BrowserProfile,
|
|
38
61
|
origins: Optional[Iterable[str]] = None,
|
|
@@ -77,7 +100,7 @@ def read_localstorage(
|
|
|
77
100
|
value = record.value
|
|
78
101
|
if not origin or not key or value is None:
|
|
79
102
|
continue
|
|
80
|
-
if origin_filters and not
|
|
103
|
+
if origin_filters and not _host_matches(origin, origin_filters):
|
|
81
104
|
continue
|
|
82
105
|
if isinstance(value, bytes):
|
|
83
106
|
try:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ai-browser-profile",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.10",
|
|
4
4
|
"description": "Extract user identity (name, emails, accounts, addresses, payments) from browser data into a self-ranking SQLite database. Install as a Claude Code agent skill.",
|
|
5
5
|
"bin": {
|
|
6
6
|
"ai-browser-profile": "bin/cli.js"
|