ai-browser-profile 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -0
- package/ai_browser_profile/__init__.py +6 -0
- package/ai_browser_profile/db.py +929 -0
- package/ai_browser_profile/embeddings.py +196 -0
- package/ai_browser_profile/extract.py +108 -0
- package/ai_browser_profile/ingestors/__init__.py +0 -0
- package/ai_browser_profile/ingestors/bookmarks.py +185 -0
- package/ai_browser_profile/ingestors/browser_detect.py +100 -0
- package/ai_browser_profile/ingestors/constants.py +208 -0
- package/ai_browser_profile/ingestors/history.py +123 -0
- package/ai_browser_profile/ingestors/indexeddb.py +203 -0
- package/ai_browser_profile/ingestors/localstorage.py +66 -0
- package/ai_browser_profile/ingestors/logins.py +46 -0
- package/ai_browser_profile/ingestors/messages.py +151 -0
- package/ai_browser_profile/ingestors/notion.py +313 -0
- package/ai_browser_profile/ingestors/webdata.py +134 -0
- package/autofill/SKILL.md +252 -0
- package/bin/cli.js +315 -0
- package/clean.py +295 -0
- package/extract.py +53 -0
- package/package.json +40 -0
- package/review/SKILL.md +171 -0
- package/review/run.sh +82 -0
- package/setup/SKILL.md +177 -0
- package/skill/SKILL.md +180 -0
- package/whatsapp/SKILL.md +321 -0
package/README.md
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# ai-browser-profile
|
|
2
|
+
|
|
3
|
+
Extract what your browser knows about you into a self-ranking SQLite database. Reads autofill, login data, browsing history, bookmarks, WhatsApp contacts, LinkedIn connections, and Notion workspaces — directly from local browser files.
|
|
4
|
+
|
|
5
|
+
## What it extracts
|
|
6
|
+
|
|
7
|
+
| Source | Data | Browser files |
|
|
8
|
+
|--------|------|---------------|
|
|
9
|
+
| **Web Data** | Autofill, addresses, credit cards | `Web Data` SQLite |
|
|
10
|
+
| **Login Data** | Accounts, emails, usernames | `Login Data` SQLite |
|
|
11
|
+
| **History** | Tool/service usage frequency | `History` SQLite |
|
|
12
|
+
| **Bookmarks** | Interests, saved tools | `Bookmarks` JSON |
|
|
13
|
+
| **IndexedDB** | WhatsApp contacts | LevelDB via `ccl_chromium_reader` |
|
|
14
|
+
| **Local Storage** | LinkedIn connections | LevelDB via `ccl_chromium_reader` |
|
|
15
|
+
| **Notion** | Workspace users, pages | IndexedDB |
|
|
16
|
+
|
|
17
|
+
Supported browsers: Arc, Chrome, Brave, Edge, Safari, Firefox.
|
|
18
|
+
|
|
19
|
+
## Install
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
npx ai-browser-profile init # sets up ~/ai-browser-profile, Python venv, core deps
|
|
23
|
+
npx ai-browser-profile install-embeddings # optional: semantic search (~180MB)
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Requires Python 3.10+ and Node.js 16+. macOS only (reads from `~/Library/Application Support/`).
|
|
27
|
+
|
|
28
|
+
This creates `~/ai-browser-profile/` with a Python venv, installs dependencies, and symlinks Claude Code skills to `~/.claude/skills/`.
|
|
29
|
+
|
|
30
|
+
## Usage
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
cd ~/ai-browser-profile && source .venv/bin/activate
|
|
34
|
+
python extract.py # scan all browsers
|
|
35
|
+
python extract.py --browsers arc chrome # specific browsers
|
|
36
|
+
python extract.py --no-indexeddb --no-localstorage # skip LevelDB (faster)
|
|
37
|
+
python extract.py --output /path/to/memories.db # custom output path
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
To update after a new release:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
npx ai-browser-profile update # updates code, preserves memories.db
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Python API
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from ai_browser_profile import MemoryDB, extract_memories
|
|
50
|
+
|
|
51
|
+
# Extract from browsers
|
|
52
|
+
mem = extract_memories("memories.db")
|
|
53
|
+
|
|
54
|
+
# Query
|
|
55
|
+
mem.search(tags=["identity", "contact_info"])
|
|
56
|
+
mem.text_search("github")
|
|
57
|
+
mem.semantic_search("what tools do I use most")
|
|
58
|
+
|
|
59
|
+
# Profile summary
|
|
60
|
+
print(mem.profile_text())
|
|
61
|
+
|
|
62
|
+
# History + supersession chain
|
|
63
|
+
mem.history("email")
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## How it works
|
|
67
|
+
|
|
68
|
+
**Self-ranking** — each memory tracks `appeared_count` (how often it was seen during extraction) and `accessed_count` (how often it was queried). The ratio `accessed_count / appeared_count` is the `hit_rate`, used to surface the most relevant memories.
|
|
69
|
+
|
|
70
|
+
**Semantic dedup** — new entries are compared against existing ones using [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) embeddings (768-dim, ONNX Runtime). If cosine similarity >= 0.92 with the same key prefix, the old entry is superseded rather than duplicated.
|
|
71
|
+
|
|
72
|
+
**Key schema** — memories use structured keys with cardinality rules:
|
|
73
|
+
- **Single-value** (`first_name`, `last_name`, `full_name`, ...): new values automatically supersede old ones
|
|
74
|
+
- **Multi-value** (`email`, `phone`, `account:github.com`, `tool:vscode`, ...): multiple values coexist
|
|
75
|
+
|
|
76
|
+
**Entity linking** — accounts sharing the same username/email are automatically linked via `same_identity` relations.
|
|
77
|
+
|
|
78
|
+
## Schema
|
|
79
|
+
|
|
80
|
+
```sql
|
|
81
|
+
memories (id, key, value, confidence, source, appeared_count, accessed_count,
|
|
82
|
+
created_at, last_appeared_at, last_accessed_at, superseded_by,
|
|
83
|
+
superseded_at, search_text, reviewed_at)
|
|
84
|
+
|
|
85
|
+
memory_tags (memory_id, tag) -- identity, contact_info, address, payment,
|
|
86
|
+
-- account, tool, contact, work, knowledge,
|
|
87
|
+
-- communication, social, finance
|
|
88
|
+
|
|
89
|
+
memory_links (source_id, target_id, relation, created_at)
|
|
90
|
+
|
|
91
|
+
memory_embeddings (memory_id, embedding) -- 768-dim BLOB
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Project structure
|
|
95
|
+
|
|
96
|
+
```
|
|
97
|
+
extract.py # CLI entry point
|
|
98
|
+
ai_browser_profile/
|
|
99
|
+
__init__.py # exports MemoryDB, extract_memories
|
|
100
|
+
db.py # MemoryDB: schema, upsert, search, profile
|
|
101
|
+
embeddings.py # ONNX Runtime embeddings + cosine search
|
|
102
|
+
extract.py # extraction orchestrator
|
|
103
|
+
ingestors/
|
|
104
|
+
browser_detect.py # find browser profiles
|
|
105
|
+
constants.py # lookup maps, browser paths
|
|
106
|
+
webdata.py # autofill, addresses, credit cards
|
|
107
|
+
history.py # browsing history → tool usage
|
|
108
|
+
logins.py # saved logins → accounts
|
|
109
|
+
bookmarks.py # bookmarks → interests
|
|
110
|
+
indexeddb.py # WhatsApp contacts
|
|
111
|
+
localstorage.py # LinkedIn connections
|
|
112
|
+
notion.py # Notion workspace data
|
|
113
|
+
messages.py # message extraction
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## License
|
|
117
|
+
|
|
118
|
+
MIT
|