evolutiondb-browser-sync 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ """evolutiondb-browser-sync — read local browser history (Chrome,
2
+ Edge, Firefox, Brave, Arc, Vivaldi) directly from each browser's
3
+ on-disk SQLite store and pour the entries into EvolutionDB
4
+ long-term memory.
5
+
6
+ No OAuth, no extension install, no remote API. The browser's own
7
+ data lives in a local file the user already owns, so the sync
8
+ just opens it read-only and copies the rows out."""
9
+
10
+ __version__ = "0.1.0"
@@ -0,0 +1,4 @@
1
+ from .sync import main
2
+
3
+ if __name__ == "__main__":
4
+ raise SystemExit(main())
@@ -0,0 +1,212 @@
1
+ """
2
+ Open a browser history SQLite store in read-only mode and yield
3
+ normalised visit records. Two on-disk schemas exist in the wild —
4
+ the Chromium family (Chrome, Edge, Brave, Arc, Vivaldi, Opera) and
5
+ Mozilla Firefox. Each gets its own SELECT; the records they
6
+ produce share a single shape so the sync loop downstream does not
7
+ care which browser they came from.
8
+
9
+ Locking
10
+ -------
11
+ A running browser holds an exclusive WAL lock on its History /
12
+ places.sqlite file. We snapshot to a temp file with `shutil.copy`
13
+ first; on Windows that step can race a busy browser and raise
14
+ `PermissionError`, in which case we fall back to SQLite's URI
15
+ `mode=ro&immutable=1` open which often succeeds on the original.
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import hashlib
20
+ import shutil
21
+ import sqlite3
22
+ import sys
23
+ import tempfile
24
+ from datetime import datetime, timedelta, timezone
25
+ from pathlib import Path
26
+ from typing import Dict, Iterator, Optional
27
+
28
+ from .scanner import CHROMIUM, FIREFOX, Profile
29
+
30
+
31
+ _CHROME_EPOCH = datetime(1601, 1, 1, tzinfo=timezone.utc)
32
+ _SKIP_PREFIXES = (
33
+ "chrome://", "chrome-extension://", "chrome-search://",
34
+ "chrome-untrusted://", "edge://", "extension://",
35
+ "moz-extension://", "about:", "view-source:",
36
+ "file://", "data:", "javascript:", "blob:",
37
+ )
38
+
39
+
40
+ # ---------------------------------------------------------------- #
41
+ # Snapshot + connect #
42
+ # ---------------------------------------------------------------- #
43
+ def _snapshot(src: Path) -> Path:
44
+ tmp = Path(tempfile.mkstemp(suffix=".db",
45
+ prefix="evosql-bs-")[1])
46
+ try:
47
+ shutil.copy(src, tmp)
48
+ # WAL sidecars carry uncommitted history; copying them too
49
+ # avoids "no such table: visits" on freshly-opened browsers.
50
+ for sfx in ("-wal", "-shm"):
51
+ side = src.parent / (src.name + sfx)
52
+ if side.is_file():
53
+ try:
54
+ shutil.copy(side, tmp.parent / (tmp.name + sfx))
55
+ except OSError:
56
+ pass
57
+ return tmp
58
+ except PermissionError:
59
+ # Windows + browser exclusively locked. Cleanup, signal
60
+ # caller to fall back to URI immutable open of the original.
61
+ try:
62
+ tmp.unlink(missing_ok=True)
63
+ except Exception:
64
+ pass
65
+ raise
66
+
67
+
68
+ def _connect(src: Path) -> sqlite3.Connection:
69
+ try:
70
+ snap = _snapshot(src)
71
+ return sqlite3.connect(snap)
72
+ except PermissionError:
73
+ # Last-resort: read the live file in immutable mode. Will
74
+ # surface uncommitted rows when -wal exists, but at least
75
+ # surfaces something on busy Windows hosts.
76
+ return sqlite3.connect(
77
+ f"file:{src}?mode=ro&immutable=1", uri=True)
78
+
79
+
80
+ # ---------------------------------------------------------------- #
81
+ # Timestamp normalisation #
82
+ # ---------------------------------------------------------------- #
83
+ def _chrome_us_to_iso(us: int) -> Optional[str]:
84
+ if not us or us < 0:
85
+ return None
86
+ try:
87
+ dt = _CHROME_EPOCH + timedelta(microseconds=int(us))
88
+ return dt.isoformat(timespec="seconds").replace("+00:00", "Z")
89
+ except (OverflowError, OSError):
90
+ return None
91
+
92
+
93
+ def _firefox_us_to_iso(us: int) -> Optional[str]:
94
+ if not us or us < 0:
95
+ return None
96
+ try:
97
+ dt = datetime.fromtimestamp(int(us) / 1_000_000,
98
+ tz=timezone.utc)
99
+ return dt.isoformat(timespec="seconds").replace("+00:00", "Z")
100
+ except (OverflowError, OSError, ValueError):
101
+ return None
102
+
103
+
104
+ # ---------------------------------------------------------------- #
105
+ # Filtering #
106
+ # ---------------------------------------------------------------- #
107
+ def _keep(url: str) -> bool:
108
+ if not url:
109
+ return False
110
+ lower = url.lower()
111
+ if lower.startswith(_SKIP_PREFIXES):
112
+ return False
113
+ return True
114
+
115
+
116
+ # ---------------------------------------------------------------- #
117
+ # Record builder #
118
+ # ---------------------------------------------------------------- #
119
+ def _short_url(url: str, n: int = 80) -> str:
120
+ return url if len(url) <= n else url[: n - 1] + "…"
121
+
122
+
123
+ def _build_record(profile: Profile, url: str, title: str,
124
+ visit_count: int, last_iso: str) -> Dict:
125
+ title = (title or "").strip() or "(no title)"
126
+ fact = (f"Visited [{profile.browser}] \"{title}\" "
127
+ f"({_short_url(url)}) — last on {last_iso}, "
128
+ f"{visit_count} visit{'s' if visit_count != 1 else ''}")
129
+ url_hash = hashlib.sha1(url.encode("utf-8")).hexdigest()[:16]
130
+ return {
131
+ "fact": fact,
132
+ "source": "browser",
133
+ "kind": "visit",
134
+ "browser": profile.browser,
135
+ "browser_family": profile.family,
136
+ "profile": profile.profile_id,
137
+ "url": url,
138
+ "title": title,
139
+ "visit_count": int(visit_count or 0),
140
+ "last_visited_at": last_iso,
141
+ "url_hash": url_hash,
142
+ "tags": ["browser", "history", profile.browser],
143
+ }
144
+
145
+
146
+ # ---------------------------------------------------------------- #
147
+ # Per-family SELECT #
148
+ # ---------------------------------------------------------------- #
149
+ def _iter_chromium(profile: Profile,
150
+ since_iso: Optional[str]) -> Iterator[Dict]:
151
+ """Chromium schema: `urls(url, title, visit_count,
152
+ last_visit_time)`. `last_visit_time` is Chrome epoch
153
+ microseconds since 1601."""
154
+ conn = _connect(profile.history_path)
155
+ try:
156
+ try:
157
+ cur = conn.execute(
158
+ "SELECT url, title, visit_count, last_visit_time "
159
+ "FROM urls WHERE last_visit_time > 0")
160
+ except sqlite3.DatabaseError as exc:
161
+ print(f"[browser-sync] {profile.profile_id} "
162
+ f"DB error: {exc}", file=sys.stderr, flush=True)
163
+ return
164
+ for url, title, visits, last in cur:
165
+ if not _keep(url):
166
+ continue
167
+ iso = _chrome_us_to_iso(last)
168
+ if iso is None:
169
+ continue
170
+ if since_iso and iso < since_iso:
171
+ continue
172
+ yield _build_record(profile, url, title, visits, iso)
173
+ finally:
174
+ conn.close()
175
+
176
+
177
+ def _iter_firefox(profile: Profile,
178
+ since_iso: Optional[str]) -> Iterator[Dict]:
179
+ """Firefox schema: `moz_places(url, title, visit_count,
180
+ last_visit_date)`. `last_visit_date` is Unix microseconds."""
181
+ conn = _connect(profile.history_path)
182
+ try:
183
+ try:
184
+ cur = conn.execute(
185
+ "SELECT url, title, visit_count, last_visit_date "
186
+ "FROM moz_places WHERE last_visit_date IS NOT NULL")
187
+ except sqlite3.DatabaseError as exc:
188
+ print(f"[browser-sync] {profile.profile_id} "
189
+ f"DB error: {exc}", file=sys.stderr, flush=True)
190
+ return
191
+ for url, title, visits, last in cur:
192
+ if not _keep(url):
193
+ continue
194
+ iso = _firefox_us_to_iso(last)
195
+ if iso is None:
196
+ continue
197
+ if since_iso and iso < since_iso:
198
+ continue
199
+ yield _build_record(profile, url, title, visits, iso)
200
+ finally:
201
+ conn.close()
202
+
203
+
204
+ def iter_visits(profile: Profile,
205
+ since_iso: Optional[str] = None) -> Iterator[Dict]:
206
+ """Yield normalised visit records for a profile, optionally
207
+ filtered to only the rows whose latest visit is after
208
+ `since_iso` (a UTC ISO 8601 string)."""
209
+ if profile.family == CHROMIUM:
210
+ yield from _iter_chromium(profile, since_iso)
211
+ elif profile.family == FIREFOX:
212
+ yield from _iter_firefox(profile, since_iso)
@@ -0,0 +1,223 @@
1
+ """
2
+ Enumerate every browser profile the current user owns across the
3
+ three desktop OSes. Yields one (browser_family, browser_label,
4
+ profile_id, history_path) tuple per discovered profile.
5
+
6
+ Supports
7
+ --------
8
+ Chromium family Chrome, Edge, Brave, Arc, Vivaldi, Chromium
9
+ Mozilla family Firefox (incl. Developer / Nightly / ESR)
10
+
11
+ Cross-OS path handling
12
+ ----------------------
13
+ macOS ~/Library/Application Support/<vendor>/...
14
+ Linux ~/.config/<vendor>/... + snap + flatpak sandbox paths
15
+ Windows %LOCALAPPDATA% or %APPDATA%\\<vendor>\\...
16
+
17
+ Firefox is special: the profile directory has a random id; the
18
+ active profile names live in profiles.ini next to the directory.
19
+ We enumerate every *.default* directory plus whatever profiles.ini
20
+ lists so users with multiple profiles all flow through.
21
+ """
22
+ from __future__ import annotations
23
+
24
+ import configparser
25
+ import os
26
+ import sys
27
+ from dataclasses import dataclass
28
+ from pathlib import Path
29
+ from typing import Iterator, List
30
+
31
+
32
+ CHROMIUM = "chromium"
33
+ FIREFOX = "firefox"
34
+
35
+
36
+ @dataclass(frozen=True)
37
+ class Profile:
38
+ family: str # "chromium" | "firefox"
39
+ browser: str # "chrome" | "edge" | "brave" | "arc" | "vivaldi" | "firefox"
40
+ profile_id: str # "Default", "Profile 1", or firefox random id
41
+ history_path: Path
42
+
43
+
44
+ # ---------------------------------------------------------------- #
45
+ # Per-platform vendor roots #
46
+ # ---------------------------------------------------------------- #
47
+ def _chromium_roots() -> List[tuple]:
48
+ """Return list of (browser_id, browser_label, user_data_root)
49
+ candidates for every Chromium-family browser known to the
50
+ current OS."""
51
+ home = Path.home()
52
+ p = sys.platform
53
+ out: List[tuple] = []
54
+
55
+ if p == "darwin":
56
+ base = home / "Library/Application Support"
57
+ out += [
58
+ ("chrome", "Chrome", base / "Google/Chrome"),
59
+ ("chrome", "Chrome Beta", base / "Google/Chrome Beta"),
60
+ ("chrome", "Chromium", base / "Chromium"),
61
+ ("edge", "Edge", base / "Microsoft Edge"),
62
+ ("brave", "Brave", base / "BraveSoftware/Brave-Browser"),
63
+ ("arc", "Arc", base / "Arc/User Data"),
64
+ ("vivaldi", "Vivaldi", base / "Vivaldi"),
65
+ ("opera", "Opera", base / "com.operasoftware.Opera"),
66
+ ]
67
+ elif p.startswith("linux"):
68
+ cfg = home / ".config"
69
+ out += [
70
+ ("chrome", "Chrome", cfg / "google-chrome"),
71
+ ("chrome", "Chrome Beta", cfg / "google-chrome-beta"),
72
+ ("chrome", "Chromium", cfg / "chromium"),
73
+ ("edge", "Edge", cfg / "microsoft-edge"),
74
+ ("brave", "Brave",
75
+ cfg / "BraveSoftware/Brave-Browser"),
76
+ ("vivaldi", "Vivaldi", cfg / "vivaldi"),
77
+ ("opera", "Opera", cfg / "opera"),
78
+ ]
79
+ # snap + flatpak sandboxed paths
80
+ snap = home / "snap"
81
+ out += [
82
+ ("chrome", "Chromium (snap)",
83
+ snap / "chromium/common/chromium"),
84
+ ]
85
+ flat = home / ".var/app"
86
+ out += [
87
+ ("chrome", "Chromium (flatpak)",
88
+ flat / "org.chromium.Chromium/config/chromium"),
89
+ ("brave", "Brave (flatpak)",
90
+ flat / "com.brave.Browser/config/BraveSoftware/"
91
+ "Brave-Browser"),
92
+ ]
93
+ elif p == "win32":
94
+ local = Path(os.environ.get("LOCALAPPDATA", str(home)))
95
+ roaming = Path(os.environ.get("APPDATA", str(home)))
96
+ out += [
97
+ ("chrome", "Chrome",
98
+ local / "Google/Chrome/User Data"),
99
+ ("chrome", "Chromium",
100
+ local / "Chromium/User Data"),
101
+ ("edge", "Edge",
102
+ local / "Microsoft/Edge/User Data"),
103
+ ("brave", "Brave",
104
+ local / "BraveSoftware/Brave-Browser/User Data"),
105
+ ("vivaldi", "Vivaldi",
106
+ local / "Vivaldi/User Data"),
107
+ ("opera", "Opera",
108
+ roaming / "Opera Software/Opera Stable"),
109
+ ]
110
+ _ = roaming # quiet linter; reserved for future use
111
+ return out
112
+
113
+
114
+ def _firefox_roots() -> List[Path]:
115
+ """Return list of Firefox-family base directories (Mozilla/Firefox
116
+ or equivalent) for the current OS. The actual profile lives one
117
+ or two levels deeper."""
118
+ home = Path.home()
119
+ p = sys.platform
120
+ if p == "darwin":
121
+ base = home / "Library/Application Support"
122
+ return [
123
+ base / "Firefox",
124
+ base / "Firefox Developer Edition",
125
+ base / "Firefox Nightly",
126
+ ]
127
+ if p.startswith("linux"):
128
+ out = [
129
+ home / ".mozilla/firefox",
130
+ home / "snap/firefox/common/.mozilla/firefox",
131
+ home / ".var/app/org.mozilla.firefox/.mozilla/firefox",
132
+ ]
133
+ return out
134
+ if p == "win32":
135
+ roaming = Path(os.environ.get("APPDATA", str(home)))
136
+ return [
137
+ roaming / "Mozilla/Firefox",
138
+ roaming / "Mozilla/Firefox Developer Edition",
139
+ ]
140
+ return []
141
+
142
+
143
+ # ---------------------------------------------------------------- #
144
+ # Discovery #
145
+ # ---------------------------------------------------------------- #
146
+ def _iter_chromium() -> Iterator[Profile]:
147
+ for browser_id, browser_label, root in _chromium_roots():
148
+ if not root.is_dir():
149
+ continue
150
+ for prof_dir in sorted(root.iterdir()):
151
+ if not prof_dir.is_dir():
152
+ continue
153
+ name = prof_dir.name
154
+ if name not in ("Default",) and not name.startswith("Profile"):
155
+ continue
156
+ hist = prof_dir / "History"
157
+ if hist.is_file():
158
+ yield Profile(
159
+ family = CHROMIUM,
160
+ browser = browser_id,
161
+ profile_id = f"{browser_label}/{name}",
162
+ history_path = hist,
163
+ )
164
+
165
+
166
+ def _iter_firefox() -> Iterator[Profile]:
167
+ for root in _firefox_roots():
168
+ if not root.is_dir():
169
+ continue
170
+ # profiles.ini tells us which directories Firefox itself
171
+ # treats as profiles. Falling back to a glob in case the ini
172
+ # is missing or stale.
173
+ seen: set = set()
174
+ ini = root / "profiles.ini"
175
+ if ini.is_file():
176
+ try:
177
+ cfg = configparser.ConfigParser()
178
+ cfg.read(ini, encoding="utf-8")
179
+ for section in cfg.sections():
180
+ if not section.startswith("Profile"):
181
+ continue
182
+ rel = cfg.get(section, "Path", fallback="")
183
+ if not rel:
184
+ continue
185
+ is_relative = cfg.get(section, "IsRelative",
186
+ fallback="1") == "1"
187
+ p = (root / rel) if is_relative else Path(rel)
188
+ if p.is_dir():
189
+ seen.add(p.resolve())
190
+ except (configparser.Error, OSError):
191
+ pass
192
+
193
+ profiles_root = root / "Profiles"
194
+ if profiles_root.is_dir():
195
+ for prof_dir in profiles_root.iterdir():
196
+ if prof_dir.is_dir():
197
+ seen.add(prof_dir.resolve())
198
+
199
+ for prof_dir in sorted(seen):
200
+ hist = prof_dir / "places.sqlite"
201
+ if hist.is_file():
202
+ yield Profile(
203
+ family = FIREFOX,
204
+ browser = "firefox",
205
+ profile_id = f"Firefox/{prof_dir.name}",
206
+ history_path = hist,
207
+ )
208
+
209
+
210
+ def discover_profiles(
211
+ browser_filter: List[str] = None) -> List[Profile]:
212
+ """Return every profile we can see on this machine. Optional
213
+ `browser_filter` keeps only matching browser ids (e.g.
214
+ ['chrome', 'firefox'])."""
215
+ result: List[Profile] = []
216
+ for p in _iter_chromium():
217
+ result.append(p)
218
+ for p in _iter_firefox():
219
+ result.append(p)
220
+ if browser_filter:
221
+ keep = {b.strip().lower() for b in browser_filter if b.strip()}
222
+ result = [p for p in result if p.browser in keep]
223
+ return result
browser_sync/state.py ADDED
@@ -0,0 +1,130 @@
1
+ """Memory backend for browser-sync. Same template as the sibling
2
+ connector packages — read-write to EvolutionDB via psycopg over the
3
+ PostgreSQL wire protocol. The browser sync stores one row per
4
+ unique URL per profile (keyed by sha1 of the URL) and one
5
+ per-profile watermark holding the ISO timestamp of the most recent
6
+ visit it has already imported."""
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import sys
11
+ import time
12
+ from typing import Dict, Optional
13
+
14
+
15
+ def _e(s: str) -> str:
16
+ if not isinstance(s, str):
17
+ s = str(s)
18
+ s = s.replace("\r", " ").replace("\n", " ").replace("\t", " ")
19
+ return s.replace("'", "''")
20
+
21
+
22
+ def _parse_value(raw):
23
+ if isinstance(raw, dict):
24
+ return raw
25
+ if not raw:
26
+ return {}
27
+ try:
28
+ return json.loads(raw)
29
+ except (TypeError, ValueError):
30
+ return {}
31
+
32
+
33
+ class MemoryStore:
34
+ _RECONNECT_ATTEMPTS = 3
35
+ _RECONNECT_BACKOFF_SEC = 0.5
36
+
37
+ def __init__(self, host: str, port: int, user: str, password: str,
38
+ database: str, store: str, namespace: str):
39
+ try:
40
+ import psycopg
41
+ except ImportError as exc:
42
+ raise RuntimeError(
43
+ "evolutiondb-browser-sync requires psycopg. Install "
44
+ "with `pip install psycopg[binary]>=3.1`.") from exc
45
+
46
+ self.psycopg = psycopg
47
+ self._conn_kwargs = dict(
48
+ host=host, port=port, user=user, password=password,
49
+ dbname=database, autocommit=True,
50
+ )
51
+ self.store = store
52
+ self.namespace = namespace
53
+ self.conn = self._connect()
54
+
55
+ try:
56
+ with self.conn.cursor() as cur:
57
+ cur.execute(f"CREATE MEMORY STORE {self.store}")
58
+ except Exception:
59
+ pass
60
+
61
+ def _connect(self):
62
+ return self.psycopg.connect(**self._conn_kwargs)
63
+
64
+ def _is_dead(self, exc: BaseException) -> bool:
65
+ return isinstance(exc, (self.psycopg.OperationalError,
66
+ self.psycopg.InterfaceError))
67
+
68
+ def _retry(self, fn):
69
+ last = None
70
+ for attempt in range(self._RECONNECT_ATTEMPTS):
71
+ try:
72
+ with self.conn.cursor() as cur:
73
+ return fn(cur)
74
+ except Exception as exc:
75
+ if not self._is_dead(exc):
76
+ raise
77
+ last = exc
78
+ print(f"[browser-sync] db connection lost "
79
+ f"(attempt {attempt + 1}): {exc}",
80
+ file=sys.stderr, flush=True)
81
+ try:
82
+ self.conn.close()
83
+ except Exception:
84
+ pass
85
+ if attempt + 1 < self._RECONNECT_ATTEMPTS:
86
+ time.sleep(self._RECONNECT_BACKOFF_SEC *
87
+ (attempt + 1))
88
+ try:
89
+ self.conn = self._connect()
90
+ except Exception as reconn:
91
+ last = reconn
92
+ continue
93
+ raise last # type: ignore[misc]
94
+
95
+ # ---------- watermark (per-profile) ----------
96
+ @staticmethod
97
+ def _wm_key(profile_id: str) -> str:
98
+ safe = profile_id.replace("/", "_").replace(" ", "_")
99
+ return f"browser_state_{safe}"
100
+
101
+ def get_watermark_iso(self, profile_id: str) -> Optional[str]:
102
+ key = self._wm_key(profile_id)
103
+
104
+ def run(cur):
105
+ cur.execute(
106
+ f"SELECT mem_value FROM __mem_{self.store} "
107
+ f"WHERE mem_namespace = '{_e(self.namespace)}' "
108
+ f"AND mem_key = '{_e(key)}'")
109
+ rows = cur.fetchall()
110
+ if not rows:
111
+ return None
112
+ v = _parse_value(rows[0][0]).get("last_visited_at")
113
+ return str(v) if v else None
114
+ return self._retry(run)
115
+
116
+ def set_watermark_iso(self, profile_id: str,
117
+ last_iso: str) -> None:
118
+ key = self._wm_key(profile_id)
119
+ value = json.dumps({"last_visited_at": last_iso,
120
+ "saved_at": time.time()})
121
+ self._retry(lambda cur: cur.execute(
122
+ f"MEMORY PUT INTO {self.store} VALUES "
123
+ f"('{_e(self.namespace)}','{_e(key)}','{_e(value)}')"))
124
+
125
+ # ---------- records ----------
126
+ def put_record(self, key: str, record: Dict) -> None:
127
+ value = json.dumps(record, ensure_ascii=False)
128
+ self._retry(lambda cur: cur.execute(
129
+ f"MEMORY PUT INTO {self.store} VALUES "
130
+ f"('{_e(self.namespace)}','{_e(key)}','{_e(value)}')"))
browser_sync/sync.py ADDED
@@ -0,0 +1,228 @@
1
+ """
2
+ evolutiondb-browser-sync — daemon that snapshots local browser
3
+ history and pours visit records into the evolutiondb-memory store.
4
+
5
+ Modes
6
+ -----
7
+ --once one sync pass over every discovered profile,
8
+ then exit.
9
+ --interval SECONDS daemon mode. Each pass is incremental.
10
+ --since "30d" first-run wall-clock floor when the profile
11
+ has no stored watermark.
12
+ --browser chrome,edge,firefox
13
+ restrict to a subset of browsers.
14
+ --dry-run do everything except write to memory.
15
+
16
+ No --auth subcommand exists. Browser history lives in files the
17
+ user already owns, so there is no OAuth flow to start.
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import argparse
22
+ import json
23
+ import os
24
+ import re
25
+ import signal
26
+ import sys
27
+ import time
28
+ from datetime import datetime, timedelta, timezone
29
+ from pathlib import Path
30
+ from typing import Dict, List, Optional
31
+
32
+ from . import scanner as scan_mod
33
+ from . import extract as ext_mod
34
+ from . import state as state_mod
35
+
36
+
37
+ # ---------------------------------------------------------------- #
38
+ # Config + .env loader (same shape as sibling syncs) #
39
+ # ---------------------------------------------------------------- #
40
+ def _load_dotenv(path: Path) -> None:
41
+ if not path.exists():
42
+ return
43
+ for raw in path.read_text(encoding="utf-8").splitlines():
44
+ line = raw.strip()
45
+ if not line or line.startswith("#") or "=" not in line:
46
+ continue
47
+ k, _, v = line.partition("=")
48
+ k, v = k.strip(), v.strip().strip('"').strip("'")
49
+ if v:
50
+ os.environ.setdefault(k, v)
51
+
52
+
53
+ class Config:
54
+ def __init__(self):
55
+ self.evosql_host = os.environ.get("EVOSQL_HOST", "127.0.0.1")
56
+ self.evosql_port = int(os.environ.get("EVOSQL_PORT", "5433"))
57
+ self.evosql_user = os.environ.get("EVOSQL_USER", "admin")
58
+ self.evosql_pass = os.environ.get("EVOSQL_PASSWORD", "admin")
59
+ self.evosql_db = os.environ.get("EVOSQL_DATABASE", "evosql")
60
+ self.user_id = os.environ.get("MCP_USER_ID",
61
+ "default_user")
62
+ self.store = os.environ.get("BROWSER_MEMORY_STORE",
63
+ "mcp_mem")
64
+ self.poll_secs = int(os.environ.get("BROWSER_POLL_INTERVAL",
65
+ "1800"))
66
+
67
+
68
+ _SINCE_RE = re.compile(r"^\s*(\d+)\s*([smhdw])\s*$", re.I)
69
+
70
+
71
+ def parse_since(text: str) -> str:
72
+ m = _SINCE_RE.match(text)
73
+ if not m:
74
+ raise ValueError(
75
+ f"--since must be like '1h', '7d', got {text!r}")
76
+ n, unit = int(m.group(1)), m.group(2).lower()
77
+ delta = {
78
+ "s": timedelta(seconds=n),
79
+ "m": timedelta(minutes=n),
80
+ "h": timedelta(hours=n),
81
+ "d": timedelta(days=n),
82
+ "w": timedelta(weeks=n),
83
+ }[unit]
84
+ return (datetime.now(timezone.utc) - delta).isoformat(
85
+ timespec="seconds").replace("+00:00", "Z")
86
+
87
+
88
+ # ---------------------------------------------------------------- #
89
+ # Pass #
90
+ # ---------------------------------------------------------------- #
91
+ def _record_key(record: Dict) -> str:
92
+ """Stable per (browser, profile, url) key so re-syncing the same
93
+ URL upserts the watermark / visit_count instead of multiplying
94
+ rows in the memory store."""
95
+ profile_slug = (record["profile"]
96
+ .replace("/", "_").replace(" ", "_"))
97
+ return (f"browser_visit_{record['browser']}_"
98
+ f"{profile_slug}_{record['url_hash']}")
99
+
100
+
101
+ def sync_once(cfg: Config, *, since_iso: Optional[str],
102
+ browser_filter: Optional[List[str]],
103
+ dry_run: bool = False) -> Dict[str, int]:
104
+ profiles = scan_mod.discover_profiles(browser_filter)
105
+ counters = {"profiles": 0, "visits": 0,
106
+ "skipped": 0, "errors": 0}
107
+
108
+ store: Optional[state_mod.MemoryStore]
109
+ if dry_run:
110
+ store = None
111
+ else:
112
+ store = state_mod.MemoryStore(
113
+ host=cfg.evosql_host, port=cfg.evosql_port,
114
+ user=cfg.evosql_user, password=cfg.evosql_pass,
115
+ database=cfg.evosql_db, store=cfg.store,
116
+ namespace=cfg.user_id,
117
+ )
118
+
119
+ for prof in profiles:
120
+ counters["profiles"] += 1
121
+ wm = (store.get_watermark_iso(prof.profile_id)
122
+ if store else None)
123
+ floor = wm or since_iso
124
+ latest = floor or ""
125
+ try:
126
+ for record in ext_mod.iter_visits(prof, since_iso=floor):
127
+ key = _record_key(record)
128
+ if store:
129
+ store.put_record(key, record)
130
+ counters["visits"] += 1
131
+ if record["last_visited_at"] > latest:
132
+ latest = record["last_visited_at"]
133
+ except Exception as exc: # noqa: BLE001
134
+ print(f"[browser-sync] {prof.profile_id} failed: {exc}",
135
+ file=sys.stderr, flush=True)
136
+ counters["errors"] += 1
137
+ continue
138
+ if store and latest and latest != (wm or ""):
139
+ store.set_watermark_iso(prof.profile_id, latest)
140
+
141
+ return counters
142
+
143
+
144
+ # ---------------------------------------------------------------- #
145
+ # Signals #
146
+ # ---------------------------------------------------------------- #
147
+ _stop = False
148
+
149
+
150
+ def _install_signal_handlers() -> None:
151
+ def _handler(_signum, _frame):
152
+ global _stop
153
+ _stop = True
154
+ print("[browser-sync] stop requested, finishing current pass",
155
+ file=sys.stderr, flush=True)
156
+ for s in (signal.SIGTERM, signal.SIGINT):
157
+ try:
158
+ signal.signal(s, _handler)
159
+ except (ValueError, OSError):
160
+ pass
161
+
162
+
163
+ # ---------------------------------------------------------------- #
164
+ # CLI #
165
+ # ---------------------------------------------------------------- #
166
+ def main(argv: Optional[List[str]] = None) -> int:
167
+ parser = argparse.ArgumentParser(prog="evolutiondb-browser-sync",
168
+ description="Sync browser history into EvolutionDB memory.")
169
+ parser.add_argument("--once", action="store_true")
170
+ parser.add_argument("--interval", type=int)
171
+ parser.add_argument("--since", default="7d")
172
+ parser.add_argument("--browser", default="",
173
+ help="Comma list (chrome,edge,firefox,brave,arc,vivaldi,"
174
+ "opera). Empty means every detected browser.")
175
+ parser.add_argument("--list", action="store_true",
176
+ help="Print every detected profile and exit.")
177
+ parser.add_argument("--dry-run", action="store_true")
178
+ parser.add_argument("--env-file", default=".env")
179
+ args = parser.parse_args(argv)
180
+
181
+ _load_dotenv(Path(args.env_file).expanduser())
182
+
183
+ browser_filter = (
184
+ [b for b in args.browser.split(",") if b.strip()]
185
+ if args.browser else None)
186
+
187
+ if args.list:
188
+ profiles = scan_mod.discover_profiles(browser_filter)
189
+ for p in profiles:
190
+ print(f"{p.browser:10s} {p.profile_id:40s} "
191
+ f"{p.history_path}")
192
+ if not profiles:
193
+ print("(no profiles detected)")
194
+ return 0
195
+
196
+ try:
197
+ since_iso = parse_since(args.since)
198
+ except ValueError as exc:
199
+ print(f"error: {exc}", file=sys.stderr)
200
+ return 2
201
+
202
+ cfg = Config()
203
+ _install_signal_handlers()
204
+
205
+ def run_pass() -> int:
206
+ try:
207
+ counts = sync_once(cfg, since_iso=since_iso,
208
+ browser_filter=browser_filter,
209
+ dry_run=args.dry_run)
210
+ print(json.dumps({"ok": True, **counts}), flush=True)
211
+ return 0
212
+ except Exception as exc: # noqa: BLE001
213
+ print(json.dumps({"ok": False, "error": str(exc)}),
214
+ flush=True)
215
+ return 4
216
+
217
+ if args.once or not args.interval:
218
+ return run_pass()
219
+
220
+ interval = max(60, int(args.interval))
221
+ rc = 0
222
+ while not _stop:
223
+ rc = run_pass()
224
+ for _ in range(interval):
225
+ if _stop:
226
+ break
227
+ time.sleep(1)
228
+ return rc
@@ -0,0 +1,29 @@
1
+ Metadata-Version: 2.4
2
+ Name: evolutiondb-browser-sync
3
+ Version: 0.1.0
4
+ Summary: Sync browser history (Chrome, Edge, Firefox) into EvolutionDB long-term memory.
5
+ Author-email: alptekin topal <topal.alptekin@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/alptekin/evolutiondb
8
+ Project-URL: Repository, https://github.com/alptekin/evolutiondb
9
+ Project-URL: Source, https://github.com/alptekin/evolutiondb/tree/main/client/browser-sync
10
+ Project-URL: Issues, https://github.com/alptekin/evolutiondb/issues
11
+ Keywords: evolutiondb,browser,history,chrome,edge,firefox,long-term-memory,agent-memory,mcp
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Operating System :: MacOS
18
+ Classifier: Operating System :: Microsoft :: Windows
19
+ Classifier: Operating System :: POSIX :: Linux
20
+ Classifier: Programming Language :: Python :: 3
21
+ Classifier: Programming Language :: Python :: 3.9
22
+ Classifier: Programming Language :: Python :: 3.10
23
+ Classifier: Programming Language :: Python :: 3.11
24
+ Classifier: Programming Language :: Python :: 3.12
25
+ Classifier: Programming Language :: Python :: 3.13
26
+ Classifier: Topic :: Database
27
+ Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
28
+ Requires-Python: >=3.9
29
+ Requires-Dist: psycopg[binary]>=3.1
@@ -0,0 +1,11 @@
1
+ browser_sync/__init__.py,sha256=Q_VVL-1Ove9j8DowqW0AYIa2QQbROunVMW0xq6MPv6g,413
2
+ browser_sync/__main__.py,sha256=8Ap-X_Be9dePdOFaAWS6BNHL2kg3FQHkMgQ6Hufk7jg,80
3
+ browser_sync/extract.py,sha256=10xCWtSADqBuRmjfNJldstnAw3ih_t3DAzTxIEc4yBQ,7972
4
+ browser_sync/scanner.py,sha256=xj_quelH4sJL4uxiA83XEPIZy4xeBSuDuns4aYuwQvI,8328
5
+ browser_sync/state.py,sha256=5BnSiTKwENvEyIUunFhCfUT7_4Q5Mv7gnOZBslEXUOY,4553
6
+ browser_sync/sync.py,sha256=JESNnblQSEB7IKwKma9AfH5Bi9th6FedZLJciEXPzBQ,8215
7
+ evolutiondb_browser_sync-0.1.0.dist-info/METADATA,sha256=k4lxKp2up8lSfdqu7KLs-62f7wyuSHWuP9KxsELP23Q,1408
8
+ evolutiondb_browser_sync-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
9
+ evolutiondb_browser_sync-0.1.0.dist-info/entry_points.txt,sha256=8CbFmjK0Ls5Usp86mI_VNCT2PHL3bmtJifBFZpLfc2M,72
10
+ evolutiondb_browser_sync-0.1.0.dist-info/top_level.txt,sha256=ZOT504CJKp9k-UKUagrC2S2-YZkSVCiGOfMzwbkcgFg,13
11
+ evolutiondb_browser_sync-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ evolutiondb-browser-sync = browser_sync.__main__:main
@@ -0,0 +1 @@
1
+ browser_sync