ai-browser-profile 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,196 @@
1
+ """Lazy-loading embedding model + vector storage for semantic search.
2
+
3
+ Uses a plain SQLite table for embeddings (no sqlite-vec extension needed)
4
+ and computes cosine similarity in Python via numpy.
5
+
6
+ Model: nomic-embed-text-v1.5 (768-dim, 2K context, MTEB ~65).
7
+ Runtime: ONNX Runtime (~50MB) with pre-built quantized model (~131MB).
8
+ No PyTorch, transformers, scipy, or sklearn needed.
9
+
10
+ Nomic uses task prefixes: "search_document: " for stored texts,
11
+ "search_query: " for queries.
12
+ """
13
+
14
+ import logging
15
+ import struct
16
+ from typing import Optional
17
+
18
+ log = logging.getLogger(__name__)
19
+
20
+ # Module-level state — loaded on first call
21
+ _session = None
22
+ _tokenizer = None
23
+
24
+ EMBEDDING_DIM = 768
25
+ MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5"
26
+ ONNX_FILE = "onnx/model_quantized.onnx"
27
+ MAX_LENGTH = 512
28
+
29
+
30
+ def _load_model():
31
+ """Load ONNX model + tokenizer on first use (~131MB download)."""
32
+ global _session, _tokenizer
33
+ if _session is not None:
34
+ return True
35
+ try:
36
+ import onnxruntime as ort
37
+ from huggingface_hub import hf_hub_download
38
+ from tokenizers import Tokenizer
39
+
40
+ onnx_path = hf_hub_download(MODEL_NAME, ONNX_FILE)
41
+ tok_path = hf_hub_download(MODEL_NAME, "tokenizer.json")
42
+
43
+ providers = ['CoreMLExecutionProvider', 'CPUExecutionProvider'] \
44
+ if 'CoreMLExecutionProvider' in ort.get_available_providers() \
45
+ else ['CPUExecutionProvider']
46
+ _session = ort.InferenceSession(onnx_path, providers=providers)
47
+
48
+ _tokenizer = Tokenizer.from_file(tok_path)
49
+ # Only truncate; padding is done dynamically per-batch in _embed_raw
50
+ _tokenizer.enable_truncation(max_length=MAX_LENGTH)
51
+
52
+ log.info(f"Loaded embedding model: {MODEL_NAME} (ONNX, {_session.get_providers()})")
53
+ return True
54
+ except ImportError as e:
55
+ log.warning(f"ONNX runtime dependencies not installed — semantic search disabled: {e}")
56
+ return False
57
+ except Exception as e:
58
+ log.warning(f"Failed to load embedding model: {e}")
59
+ return False
60
+
61
+
62
+ def _embed_raw(texts: list[str]) -> list[Optional[list[float]]]:
63
+ """Embed pre-prefixed texts via ONNX Runtime. Returns normalized vectors."""
64
+ import numpy as np
65
+
66
+ results = []
67
+ batch_size = 32
68
+ for i in range(0, len(texts), batch_size):
69
+ batch = texts[i:i + batch_size]
70
+ encoded = _tokenizer.encode_batch(batch)
71
+
72
+ # Dynamic padding: pad to max length in this batch, not MAX_LENGTH
73
+ max_len = max(len(e.ids) for e in encoded)
74
+ input_ids = np.zeros((len(encoded), max_len), dtype=np.int64)
75
+ attention_mask = np.zeros((len(encoded), max_len), dtype=np.int64)
76
+ for j, e in enumerate(encoded):
77
+ seq_len = len(e.ids)
78
+ input_ids[j, :seq_len] = e.ids
79
+ attention_mask[j, :seq_len] = e.attention_mask
80
+ token_type_ids = np.zeros_like(input_ids)
81
+
82
+ outputs = _session.run(None, {
83
+ "input_ids": input_ids,
84
+ "attention_mask": attention_mask,
85
+ "token_type_ids": token_type_ids,
86
+ })
87
+
88
+ last_hidden = outputs[0] # (batch, seq, 768)
89
+ mask = attention_mask[:, :, None].astype(np.float32)
90
+ emb = (last_hidden * mask).sum(axis=1) / mask.sum(axis=1)
91
+ norms = np.linalg.norm(emb, axis=1, keepdims=True)
92
+ emb = emb / norms
93
+
94
+ for vec in emb:
95
+ results.append(vec.tolist())
96
+
97
+ return results
98
+
99
+
100
+ def is_available() -> bool:
101
+ """Check if embedding model can be loaded."""
102
+ return _load_model()
103
+
104
+
105
+ def embed_text(text: str, prefix: str = "search_document") -> Optional[list[float]]:
106
+ """Embed a single text string. Returns 768-dim vector or None if unavailable.
107
+
108
+ Args:
109
+ text: The text to embed.
110
+ prefix: "search_document" for storing, "search_query" for searching.
111
+ """
112
+ if not _load_model():
113
+ return None
114
+ results = _embed_raw([f"{prefix}: {text}"])
115
+ return results[0] if results else None
116
+
117
+
118
+ def embed_batch(texts: list[str], prefix: str = "search_document") -> list[Optional[list[float]]]:
119
+ """Embed a batch of texts. Returns list of 768-dim vectors."""
120
+ if not _load_model():
121
+ return [None] * len(texts)
122
+ prefixed = [f"{prefix}: {t}" for t in texts]
123
+ return _embed_raw(prefixed)
124
+
125
+
126
+ def _serialize_vec(vec: list[float]) -> bytes:
127
+ """Serialize a float vector to bytes for SQLite BLOB storage."""
128
+ return struct.pack(f"{len(vec)}f", *vec)
129
+
130
+
131
+ def _deserialize_vec(blob: bytes) -> list[float]:
132
+ """Deserialize bytes back to float vector."""
133
+ n = len(blob) // 4
134
+ return list(struct.unpack(f"{n}f", blob))
135
+
136
+
137
+ def setup_embeddings_table(conn) -> bool:
138
+ """Create memory_embeddings table (plain SQLite, no extensions needed)."""
139
+ try:
140
+ conn.execute("""
141
+ CREATE TABLE IF NOT EXISTS memory_embeddings (
142
+ memory_id INTEGER PRIMARY KEY,
143
+ embedding BLOB NOT NULL
144
+ )
145
+ """)
146
+ conn.commit()
147
+ return True
148
+ except Exception as e:
149
+ log.warning(f"Failed to create embeddings table: {e}")
150
+ return False
151
+
152
+
153
+ def store_embedding(conn, memory_id: int, vec: list[float]):
154
+ """Store an embedding for a memory."""
155
+ try:
156
+ conn.execute(
157
+ "INSERT OR REPLACE INTO memory_embeddings (memory_id, embedding) VALUES (?, ?)",
158
+ (memory_id, _serialize_vec(vec)),
159
+ )
160
+ except Exception as e:
161
+ log.debug(f"Failed to store embedding for {memory_id}: {e}")
162
+
163
+
164
+ def cosine_search(conn, query_vec: list[float], limit: int = 20,
165
+ threshold: float = 0.5) -> list[tuple[int, float]]:
166
+ """Search for similar memories by embedding.
167
+
168
+ Computes cosine similarity in Python (vectors are pre-normalized).
169
+ Returns [(memory_id, similarity)] sorted by similarity descending.
170
+ """
171
+ try:
172
+ import numpy as np
173
+ except ImportError:
174
+ log.warning("numpy not available for cosine search")
175
+ return []
176
+
177
+ try:
178
+ rows = conn.execute("SELECT memory_id, embedding FROM memory_embeddings").fetchall()
179
+ except Exception:
180
+ return []
181
+
182
+ if not rows:
183
+ return []
184
+
185
+ q = np.array(query_vec, dtype=np.float32)
186
+
187
+ results = []
188
+ for mem_id, blob in rows:
189
+ vec = np.frombuffer(blob, dtype=np.float32)
190
+ # Dot product of normalized vectors = cosine similarity
191
+ sim = float(np.dot(q, vec))
192
+ if sim >= threshold:
193
+ results.append((mem_id, sim))
194
+
195
+ results.sort(key=lambda x: -x[1])
196
+ return results[:limit]
@@ -0,0 +1,108 @@
1
+ """Orchestrate memory extraction from all browser sources."""
2
+
3
+ import logging
4
+ import time
5
+ from typing import Optional, Set
6
+
7
+ from ai_browser_profile.db import MemoryDB
8
+ from ai_browser_profile.embeddings import setup_embeddings_table
9
+ from ai_browser_profile.ingestors.browser_detect import detect_browsers
10
+ from ai_browser_profile.ingestors.webdata import ingest_webdata
11
+ from ai_browser_profile.ingestors.history import ingest_history
12
+ from ai_browser_profile.ingestors.logins import ingest_logins
13
+ from ai_browser_profile.ingestors.bookmarks import ingest_bookmarks
14
+
15
+ log = logging.getLogger(__name__)
16
+
17
+
18
+ def _timed(name, func, *args, **kwargs):
19
+ """Run func with timing, log duration."""
20
+ log.info(f"[{name}] starting...")
21
+ t0 = time.monotonic()
22
+ result = func(*args, **kwargs)
23
+ elapsed = time.monotonic() - t0
24
+ log.info(f"[{name}] done in {elapsed:.1f}s")
25
+ return result
26
+
27
+
28
+ def extract_memories(memories_db_path: str = "memories.db",
29
+ browsers: Optional[Set[str]] = None,
30
+ skip_indexeddb: bool = False,
31
+ skip_localstorage: bool = False,
32
+ skip_notion: bool = False) -> MemoryDB:
33
+ """Build the memories database directly from browser files.
34
+
35
+ Args:
36
+ memories_db_path: Output database path.
37
+ browsers: Set of browser names to scan (None = all).
38
+ skip_indexeddb: Skip IndexedDB extraction (requires ccl_chromium_reader).
39
+ skip_localstorage: Skip Local Storage extraction (requires ccl_chromium_reader).
40
+ """
41
+ total_start = time.monotonic()
42
+ mem = MemoryDB(memories_db_path, defer_embeddings=True)
43
+ profiles = detect_browsers(allowed=browsers)
44
+ log.info(f"Extracting memories from {len(profiles)} profiles...")
45
+
46
+ # 1. Autofill — saved form data, addresses, credit cards
47
+ _timed("Autofill", ingest_webdata, mem)
48
+
49
+ # 2. History — tools and services used
50
+ _timed("History", ingest_history, mem, profiles)
51
+
52
+ # 3. Bookmarks — interests and saved links
53
+ _timed("Bookmarks", ingest_bookmarks, mem, profiles)
54
+
55
+ # 4. Logins — saved accounts per site
56
+ _timed("Logins", ingest_logins, mem, profiles)
57
+
58
+ # 5. LinkedIn — connections from Local Storage
59
+ if not skip_localstorage:
60
+ try:
61
+ from ai_browser_profile.ingestors.localstorage import ingest_localstorage
62
+ _timed("LinkedIn", ingest_localstorage, mem, profiles)
63
+ except ImportError:
64
+ log.warning("ccl_chromium_reader not installed — skipping LinkedIn")
65
+
66
+ # 6. Notion — workspace contacts, pages, meetings
67
+ if not skip_notion:
68
+ try:
69
+ from ai_browser_profile.ingestors.notion import ingest_notion
70
+ _timed("Notion", ingest_notion, mem)
71
+ except Exception as e:
72
+ log.warning(f"Notion ingestor failed: {e}")
73
+
74
+ # -- Interim profile: core data is ready, show it before slow steps --
75
+ mem.conn.commit()
76
+ # Run cleanup before showing profile so noise is removed
77
+ from clean import run_cleanup
78
+ log.info("Running interim cleanup...")
79
+ mem.close()
80
+ run_cleanup(db_path=memories_db_path)
81
+ mem = MemoryDB(memories_db_path, defer_embeddings=True)
82
+ interim_profile = mem.profile_text()
83
+ log.info(f"Interim profile ready (WhatsApp + embeddings still running):\n{interim_profile}")
84
+
85
+ # 7. WhatsApp — contacts from IndexedDB (slow, runs last)
86
+ if not skip_indexeddb:
87
+ try:
88
+ from ai_browser_profile.ingestors.indexeddb import ingest_indexeddb
89
+ _timed("WhatsApp", ingest_indexeddb, mem, profiles)
90
+ except ImportError:
91
+ log.warning("ccl_chromium_reader not installed — skipping WhatsApp")
92
+
93
+ mem.conn.commit()
94
+
95
+ # 8. Embeddings — backfill all at once (loads ONNX model once, batches efficiently)
96
+ mem._vec_ready = setup_embeddings_table(mem.conn)
97
+ mem._defer_embeddings = False
98
+ if mem._vec_ready:
99
+ _timed("Embeddings", mem.backfill_embeddings)
100
+
101
+ total_elapsed = time.monotonic() - total_start
102
+ stats = mem.stats()
103
+ log.info(
104
+ f"Memories: {stats['total_memories']} total, "
105
+ f"tags: {', '.join(f'{t}={c}' for t, c in list(stats['by_tag'].items())[:10])}"
106
+ )
107
+ log.info(f"Total extraction time: {total_elapsed:.1f}s")
108
+ return mem
File without changes
@@ -0,0 +1,185 @@
1
+ """Ingest bookmark memories from browser bookmark files."""
2
+
3
+ import json
4
+ import plistlib
5
+ import shutil
6
+ import sqlite3
7
+ import logging
8
+ from pathlib import Path
9
+
10
+ from ai_browser_profile.db import MemoryDB
11
+ from ai_browser_profile.ingestors.browser_detect import BrowserProfile, copy_db, domain
12
+ from ai_browser_profile.ingestors.constants import SERVICE_NAMES
13
+
14
+ log = logging.getLogger(__name__)
15
+
16
+ # URLs to skip entirely
17
+ _SKIP_SCHEMES = ("chrome://", "chrome-extension://", "about:", "javascript:", "edge://", "brave://")
18
+
19
+ # Default bookmarks that are noise
20
+ _SKIP_DOMAINS = {"www.apple.com", "apple.com", "support.apple.com"}
21
+ _SKIP_URLS = {"https://www.google.com/", "http://www.google.com/", "https://google.com/", "http://google.com/"}
22
+
23
+ # Domain keywords for tag inference on unknown bookmarks
24
+ _TAG_KEYWORDS = {
25
+ "tool": {"github", "gitlab", "stackoverflow", "stackexchange", "dev", "codepen",
26
+ "codesandbox", "npm", "pypi", "crates", "packagist", "brew"},
27
+ "knowledge": {"docs", "wiki", "learn", "tutorial", "guide", "course", "edu",
28
+ "arxiv", "scholar", "paper", "blog", "medium", "substack"},
29
+ "social": {"twitter", "x.com", "linkedin", "facebook", "instagram", "reddit",
30
+ "mastodon", "threads", "tiktok", "youtube"},
31
+ }
32
+
33
+
34
+ def _should_skip(url: str) -> bool:
35
+ """Return True if this bookmark URL should be skipped."""
36
+ if any(url.startswith(s) for s in _SKIP_SCHEMES):
37
+ return True
38
+ if url in _SKIP_URLS:
39
+ return True
40
+ d = domain(url)
41
+ if d in _SKIP_DOMAINS:
42
+ return True
43
+ if not d:
44
+ return True
45
+ return False
46
+
47
+
48
+ def _infer_tags(d: str) -> list[str]:
49
+ """Infer tags for an unknown bookmark domain."""
50
+ tags = {"knowledge"}
51
+ d_lower = d.lower()
52
+ for tag, keywords in _TAG_KEYWORDS.items():
53
+ for kw in keywords:
54
+ if kw in d_lower:
55
+ tags.add(tag)
56
+ break
57
+ return list(tags)
58
+
59
+
60
+ def _walk_chromium_bookmarks(node: dict, out: list[dict]):
61
+ """Recursively walk a Chromium bookmark tree node."""
62
+ if node.get("type") == "url":
63
+ url = node.get("url", "")
64
+ title = node.get("name", "")
65
+ if url and not _should_skip(url):
66
+ out.append({"url": url, "title": title})
67
+ for child in node.get("children", []):
68
+ _walk_chromium_bookmarks(child, out)
69
+
70
+
71
+ def _chromium_bookmarks(profile: BrowserProfile) -> list[dict]:
72
+ """Read bookmarks from Chromium JSON file."""
73
+ bm_path = profile.path / "Bookmarks"
74
+ if not bm_path.exists():
75
+ return []
76
+ try:
77
+ with open(bm_path, "r", encoding="utf-8") as f:
78
+ data = json.load(f)
79
+ out: list[dict] = []
80
+ roots = data.get("roots", {})
81
+ for key in ("bookmark_bar", "other", "synced"):
82
+ if key in roots:
83
+ _walk_chromium_bookmarks(roots[key], out)
84
+ return out
85
+ except Exception as e:
86
+ log.warning(f"Failed to read Bookmarks for {profile.browser}/{profile.name}: {e}")
87
+ return []
88
+
89
+
90
+ def _walk_safari_bookmarks(node: dict, out: list[dict]):
91
+ """Recursively walk a Safari bookmark plist node."""
92
+ bm_type = node.get("WebBookmarkType", "")
93
+ if bm_type == "WebBookmarkTypeLeaf":
94
+ url = node.get("URLString", "")
95
+ title = node.get("URIDictionary", {}).get("title", "")
96
+ if url and not _should_skip(url):
97
+ out.append({"url": url, "title": title})
98
+ for child in node.get("Children", []):
99
+ _walk_safari_bookmarks(child, out)
100
+
101
+
102
+ def _safari_bookmarks(profile: BrowserProfile) -> list[dict]:
103
+ """Read bookmarks from Safari Bookmarks.plist."""
104
+ bm_path = profile.path / "Bookmarks.plist"
105
+ if not bm_path.exists():
106
+ return []
107
+ try:
108
+ with open(bm_path, "rb") as f:
109
+ data = plistlib.load(f)
110
+ out: list[dict] = []
111
+ _walk_safari_bookmarks(data, out)
112
+ return out
113
+ except Exception as e:
114
+ log.warning(f"Failed to read Safari Bookmarks.plist: {e}")
115
+ return []
116
+
117
+
118
+ def _firefox_bookmarks(profile: BrowserProfile) -> list[dict]:
119
+ """Read bookmarks from Firefox places.sqlite."""
120
+ tmp = copy_db(profile.path / "places.sqlite")
121
+ if not tmp:
122
+ return []
123
+ try:
124
+ conn = sqlite3.connect(f"file:{tmp}?mode=ro", uri=True)
125
+ conn.row_factory = sqlite3.Row
126
+ rows = conn.execute(
127
+ "SELECT b.title, p.url FROM moz_bookmarks b "
128
+ "JOIN moz_places p ON b.fk = p.id "
129
+ "WHERE b.type = 1"
130
+ ).fetchall()
131
+ conn.close()
132
+ out: list[dict] = []
133
+ for row in rows:
134
+ url = row["url"] or ""
135
+ title = row["title"] or ""
136
+ if url and not _should_skip(url):
137
+ out.append({"url": url, "title": title})
138
+ return out
139
+ except Exception as e:
140
+ log.warning(f"Failed to read Firefox bookmarks: {e}")
141
+ return []
142
+ finally:
143
+ shutil.rmtree(tmp.parent, ignore_errors=True)
144
+
145
+
146
+ def ingest_bookmarks(mem: MemoryDB, profiles: list[BrowserProfile]):
147
+ """Extract bookmark memories from all browser profiles."""
148
+ # Collect all bookmarks across profiles
149
+ all_bookmarks: list[dict] = []
150
+ for profile in profiles:
151
+ if profile.browser in ("arc", "chrome", "brave", "edge"):
152
+ all_bookmarks.extend(_chromium_bookmarks(profile))
153
+ elif profile.browser == "safari":
154
+ all_bookmarks.extend(_safari_bookmarks(profile))
155
+ elif profile.browser == "firefox":
156
+ all_bookmarks.extend(_firefox_bookmarks(profile))
157
+
158
+ # Deduplicate by URL (keep first title seen)
159
+ seen_urls: dict[str, str] = {}
160
+ for bm in all_bookmarks:
161
+ url = bm["url"]
162
+ if url not in seen_urls:
163
+ seen_urls[url] = bm["title"]
164
+
165
+ known_count = 0
166
+ unknown_count = 0
167
+
168
+ for url, title in seen_urls.items():
169
+ d = domain(url)
170
+ if not d:
171
+ continue
172
+
173
+ if d in SERVICE_NAMES:
174
+ # Known service: boost the tool entry
175
+ service = SERVICE_NAMES[d]
176
+ tags = ["account", "tool"]
177
+ mem.upsert(f"tool:{service}", title or service, tags, source=f"bookmark:{d}")
178
+ known_count += 1
179
+ else:
180
+ # Unknown domain: create bookmark entry
181
+ tags = _infer_tags(d)
182
+ mem.upsert(f"bookmark:{d}", title or d, tags, confidence=0.6, source=f"bookmark:{url}")
183
+ unknown_count += 1
184
+
185
+ log.info(f" Bookmarks: {len(seen_urls)} unique, {known_count} known services, {unknown_count} new domains")
@@ -0,0 +1,100 @@
1
+ """Detect installed browsers and their profiles."""
2
+
3
+ import shutil
4
+ import sqlite3
5
+ import tempfile
6
+ import logging
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from typing import Optional, Set
10
+ from urllib.parse import urlparse
11
+
12
+ log = logging.getLogger(__name__)
13
+
14
+ APP_SUPPORT = Path.home() / "Library" / "Application Support"
15
+
16
+
17
+ @dataclass
18
+ class BrowserProfile:
19
+ browser: str # "arc", "chrome", "safari", "firefox", "brave", "edge"
20
+ name: str # "Default", "Profile 1", etc.
21
+ path: Path # Full path to the profile directory
22
+
23
+
24
+ def _chromium_profiles(browser: str, base: Path) -> list[BrowserProfile]:
25
+ """Find Chromium-based browser profiles (Default, Profile 1, etc.)."""
26
+ profiles = []
27
+ if not base.exists():
28
+ return profiles
29
+
30
+ for d in sorted(base.iterdir()):
31
+ if d.is_dir() and (d.name == "Default" or d.name.startswith("Profile ")):
32
+ if (d / "History").exists() or (d / "IndexedDB").exists():
33
+ profiles.append(BrowserProfile(browser=browser, name=d.name, path=d))
34
+
35
+ if not profiles:
36
+ default = base / "Default"
37
+ if default.exists():
38
+ profiles.append(BrowserProfile(browser=browser, name="Default", path=default))
39
+
40
+ return profiles
41
+
42
+
43
+ def detect_browsers(allowed: Optional[Set[str]] = None) -> list[BrowserProfile]:
44
+ """Return all detected browser profiles. Optionally filter by browser name."""
45
+ profiles: list[BrowserProfile] = []
46
+
47
+ browsers = {
48
+ "arc": APP_SUPPORT / "Arc" / "User Data",
49
+ "chrome": APP_SUPPORT / "Google" / "Chrome",
50
+ "brave": APP_SUPPORT / "BraveSoftware" / "Brave-Browser",
51
+ "edge": APP_SUPPORT / "Microsoft Edge",
52
+ }
53
+
54
+ for name, base in browsers.items():
55
+ if allowed and name not in allowed:
56
+ continue
57
+ profiles.extend(_chromium_profiles(name, base))
58
+
59
+ # Safari
60
+ if not allowed or "safari" in allowed:
61
+ safari_dir = Path.home() / "Library" / "Safari"
62
+ if safari_dir.exists():
63
+ profiles.append(BrowserProfile(browser="safari", name="Default", path=safari_dir))
64
+
65
+ # Firefox
66
+ if not allowed or "firefox" in allowed:
67
+ firefox_base = APP_SUPPORT / "Firefox" / "Profiles"
68
+ if firefox_base.exists():
69
+ for d in sorted(firefox_base.iterdir()):
70
+ if d.is_dir() and (d / "places.sqlite").exists():
71
+ profiles.append(BrowserProfile(browser="firefox", name=d.name, path=d))
72
+
73
+ log.info(f"Detected {len(profiles)} browser profiles: {[(p.browser, p.name) for p in profiles]}")
74
+ return profiles
75
+
76
+
77
+ def copy_db(src: Path) -> Optional[Path]:
78
+ """Copy a SQLite DB to temp dir to avoid browser locks."""
79
+ if not src.exists():
80
+ return None
81
+ try:
82
+ tmp = Path(tempfile.mkdtemp(prefix="ai_browser_profile_"))
83
+ dst = tmp / src.name
84
+ shutil.copy2(src, dst)
85
+ for suffix in ["-wal", "-shm"]:
86
+ wal = src.parent / (src.name + suffix)
87
+ if wal.exists():
88
+ shutil.copy2(wal, tmp / (src.name + suffix))
89
+ return dst
90
+ except PermissionError:
91
+ log.warning(f"Permission denied reading {src} — grant Full Disk Access or skip")
92
+ return None
93
+
94
+
95
+ def domain(url: str) -> str:
96
+ """Extract domain from URL."""
97
+ try:
98
+ return urlparse(url).netloc
99
+ except Exception:
100
+ return ""