ai-browser-profile 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -0
- package/ai_browser_profile/__init__.py +6 -0
- package/ai_browser_profile/db.py +929 -0
- package/ai_browser_profile/embeddings.py +196 -0
- package/ai_browser_profile/extract.py +108 -0
- package/ai_browser_profile/ingestors/__init__.py +0 -0
- package/ai_browser_profile/ingestors/bookmarks.py +185 -0
- package/ai_browser_profile/ingestors/browser_detect.py +100 -0
- package/ai_browser_profile/ingestors/constants.py +208 -0
- package/ai_browser_profile/ingestors/history.py +123 -0
- package/ai_browser_profile/ingestors/indexeddb.py +203 -0
- package/ai_browser_profile/ingestors/localstorage.py +66 -0
- package/ai_browser_profile/ingestors/logins.py +46 -0
- package/ai_browser_profile/ingestors/messages.py +151 -0
- package/ai_browser_profile/ingestors/notion.py +313 -0
- package/ai_browser_profile/ingestors/webdata.py +134 -0
- package/autofill/SKILL.md +252 -0
- package/bin/cli.js +315 -0
- package/clean.py +295 -0
- package/extract.py +53 -0
- package/package.json +40 -0
- package/review/SKILL.md +171 -0
- package/review/run.sh +82 -0
- package/setup/SKILL.md +177 -0
- package/skill/SKILL.md +180 -0
- package/whatsapp/SKILL.md +321 -0
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: autofill-profiles
|
|
3
|
+
description: "Extract structured autofill data (names, emails, phones, addresses, companies) from Chromium browser 'Web Data' SQLite files. Use when: 'autofill data', 'browser addresses', 'saved addresses', 'autofill profiles', 'who is this person', 'extract contact info from browser', 'browser PII', 'form data'."
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Autofill Profile Extraction
|
|
7
|
+
|
|
8
|
+
Extract structured personal data (names, emails, phones, addresses, companies) from Chromium-based browsers' `Web Data` SQLite files. Works with Arc, Chrome, Brave, and Edge.
|
|
9
|
+
|
|
10
|
+
## Where the Data Lives
|
|
11
|
+
|
|
12
|
+
Every Chromium browser profile has a `Web Data` SQLite file:
|
|
13
|
+
|
|
14
|
+
| Browser | Path |
|
|
15
|
+
|---------|------|
|
|
16
|
+
| Arc | `~/Library/Application Support/Arc/User Data/{Profile}/Web Data` |
|
|
17
|
+
| Chrome | `~/Library/Application Support/Google/Chrome/{Profile}/Web Data` |
|
|
18
|
+
| Brave | `~/Library/Application Support/BraveSoftware/Brave-Browser/{Profile}/Web Data` |
|
|
19
|
+
| Edge | `~/Library/Application Support/Microsoft Edge/{Profile}/Web Data` |
|
|
20
|
+
|
|
21
|
+
Where `{Profile}` is `Default`, `Profile 1`, `Profile 2`, etc.
|
|
22
|
+
|
|
23
|
+
## Schema
|
|
24
|
+
|
|
25
|
+
### Structured Address Profiles
|
|
26
|
+
|
|
27
|
+
The modern Chromium schema stores address profiles across two tables:
|
|
28
|
+
|
|
29
|
+
**`addresses`** — profile metadata:
|
|
30
|
+
```sql
|
|
31
|
+
CREATE TABLE addresses (
|
|
32
|
+
guid VARCHAR PRIMARY KEY,
|
|
33
|
+
use_count INTEGER NOT NULL DEFAULT 0,
|
|
34
|
+
use_date INTEGER NOT NULL DEFAULT 0, -- Unix timestamp
|
|
35
|
+
date_modified INTEGER NOT NULL DEFAULT 0,
|
|
36
|
+
language_code VARCHAR,
|
|
37
|
+
label VARCHAR,
|
|
38
|
+
initial_creator_id INTEGER DEFAULT 0,
|
|
39
|
+
last_modifier_id INTEGER DEFAULT 0,
|
|
40
|
+
record_type INTEGER -- 0=local, 1=synced from Google account
|
|
41
|
+
);
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
**`address_type_tokens`** — the actual field values:
|
|
45
|
+
```sql
|
|
46
|
+
CREATE TABLE address_type_tokens (
|
|
47
|
+
guid VARCHAR, -- FK to addresses.guid
|
|
48
|
+
type INTEGER, -- field type code (see mapping below)
|
|
49
|
+
value VARCHAR, -- the actual data
|
|
50
|
+
verification_status INTEGER DEFAULT 0,
|
|
51
|
+
observations BLOB,
|
|
52
|
+
PRIMARY KEY (guid, type)
|
|
53
|
+
);
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Type Code Mapping
|
|
57
|
+
|
|
58
|
+
| Type | Field | Example |
|
|
59
|
+
|------|-------|---------|
|
|
60
|
+
| 3 | First name | Matthew |
|
|
61
|
+
| 4 | Middle name | |
|
|
62
|
+
| 5 | Last name | Diakonov |
|
|
63
|
+
| 7 | Full name | Matthew Diakonov |
|
|
64
|
+
| 9 | Email | i@m13v.com |
|
|
65
|
+
| 14 | Phone | +1 650-796-1489 |
|
|
66
|
+
| 33 | City | San Francisco |
|
|
67
|
+
| 34 | State | California |
|
|
68
|
+
| 35 | ZIP | 94117 |
|
|
69
|
+
| 36 | Country | US |
|
|
70
|
+
| 60 | Company | Mediar, Inc. |
|
|
71
|
+
| 77 | Street address | 546 Fillmore st. |
|
|
72
|
+
| 79 | Address line 2 | Apt 4B |
|
|
73
|
+
| 103 | Street name | Marina Boulevard |
|
|
74
|
+
| 104 | House number | 2 |
|
|
75
|
+
| 109 | Family name (alt) | Diakonov |
|
|
76
|
+
| 142 | Full street (alt) | Marina Boulevard 2 |
|
|
77
|
+
|
|
78
|
+
Types not listed (32, 81, 105, 107, 108, 110, 116, 135, 136, 140, 141, 143, 144, 151-153, 156-157, 166-167) are usually empty — they hold name affixes, honorifics, and address subcomponents for i18n.
|
|
79
|
+
|
|
80
|
+
### Form Autofill Entries
|
|
81
|
+
|
|
82
|
+
The **`autofill`** table stores raw form field values the user has typed:
|
|
83
|
+
|
|
84
|
+
```sql
|
|
85
|
+
CREATE TABLE autofill (
|
|
86
|
+
name VARCHAR, -- HTML field name or id
|
|
87
|
+
value VARCHAR, -- what the user typed
|
|
88
|
+
value_lower VARCHAR, -- lowercased for lookup
|
|
89
|
+
date_created INTEGER,
|
|
90
|
+
date_last_used INTEGER,
|
|
91
|
+
count INTEGER DEFAULT 1,
|
|
92
|
+
PRIMARY KEY (name, value)
|
|
93
|
+
);
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Common field names: `email`, `firstName`, `lastName`, `name`, `phone`, `city`, `state`, `zip`, `company`, `username`, `address`, `identifier`.
|
|
97
|
+
|
|
98
|
+
### Credit Cards (encrypted)
|
|
99
|
+
|
|
100
|
+
```sql
|
|
101
|
+
CREATE TABLE credit_cards (
|
|
102
|
+
guid VARCHAR PRIMARY KEY,
|
|
103
|
+
name_on_card VARCHAR,
|
|
104
|
+
expiration_month INTEGER,
|
|
105
|
+
expiration_year INTEGER,
|
|
106
|
+
card_number_encrypted BLOB, -- AES-encrypted, requires OS keychain
|
|
107
|
+
date_modified INTEGER,
|
|
108
|
+
origin VARCHAR,
|
|
109
|
+
use_count INTEGER,
|
|
110
|
+
use_date INTEGER,
|
|
111
|
+
billing_address_id VARCHAR,
|
|
112
|
+
nickname VARCHAR
|
|
113
|
+
);
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Card numbers are AES-encrypted and require macOS Keychain access to decrypt. `name_on_card`, `expiration_month`, `expiration_year`, and `nickname` are plaintext.
|
|
117
|
+
|
|
118
|
+
## Extraction Workflow
|
|
119
|
+
|
|
120
|
+
### Step 1: Copy the database (avoid browser locks)
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
cp "~/Library/Application Support/Arc/User Data/Default/Web Data" /tmp/webdata.db
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Step 2: Extract structured address profiles
|
|
127
|
+
|
|
128
|
+
```sql
|
|
129
|
+
-- All address profiles with non-empty fields
|
|
130
|
+
SELECT a.guid, a.use_count, a.record_type, t.type, t.value
|
|
131
|
+
FROM addresses a
|
|
132
|
+
JOIN address_type_tokens t ON a.guid = t.guid
|
|
133
|
+
WHERE t.value != ''
|
|
134
|
+
ORDER BY a.use_count DESC, a.guid, t.type;
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Step 3: Build structured profiles (Python)
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
import sqlite3, shutil, tempfile
|
|
141
|
+
from pathlib import Path
|
|
142
|
+
|
|
143
|
+
TYPE_MAP = {
|
|
144
|
+
3: "first_name", 4: "middle_name", 5: "last_name", 7: "full_name",
|
|
145
|
+
9: "email", 14: "phone",
|
|
146
|
+
33: "city", 34: "state", 35: "zip", 36: "country",
|
|
147
|
+
60: "company", 77: "street_address", 79: "address_line_2",
|
|
148
|
+
103: "street_name", 104: "house_number", 109: "family_name",
|
|
149
|
+
142: "full_street",
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
def extract_address_profiles(webdata_path: Path) -> list[dict]:
|
|
153
|
+
"""Extract structured address profiles from a Chromium Web Data file."""
|
|
154
|
+
tmp = Path(tempfile.mkdtemp())
|
|
155
|
+
dst = tmp / "Web Data"
|
|
156
|
+
shutil.copy2(webdata_path, dst)
|
|
157
|
+
for suffix in ["-wal", "-shm"]:
|
|
158
|
+
wal = webdata_path.parent / (webdata_path.name + suffix)
|
|
159
|
+
if wal.exists():
|
|
160
|
+
shutil.copy2(wal, tmp / (webdata_path.name + suffix))
|
|
161
|
+
|
|
162
|
+
profiles = []
|
|
163
|
+
try:
|
|
164
|
+
conn = sqlite3.connect(f"file:{dst}?mode=ro", uri=True)
|
|
165
|
+
conn.row_factory = sqlite3.Row
|
|
166
|
+
|
|
167
|
+
addresses = {}
|
|
168
|
+
for row in conn.execute("SELECT guid, use_count, use_date, record_type FROM addresses"):
|
|
169
|
+
addresses[row["guid"]] = {
|
|
170
|
+
"guid": row["guid"],
|
|
171
|
+
"use_count": row["use_count"],
|
|
172
|
+
"use_date": row["use_date"],
|
|
173
|
+
"record_type": "synced" if row["record_type"] == 1 else "local",
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
for row in conn.execute("SELECT guid, type, value FROM address_type_tokens WHERE value != ''"):
|
|
177
|
+
guid = row["guid"]
|
|
178
|
+
if guid not in addresses:
|
|
179
|
+
continue
|
|
180
|
+
field = TYPE_MAP.get(row["type"])
|
|
181
|
+
if field:
|
|
182
|
+
addresses[guid][field] = row["value"]
|
|
183
|
+
|
|
184
|
+
conn.close()
|
|
185
|
+
profiles = sorted(addresses.values(), key=lambda x: x["use_count"], reverse=True)
|
|
186
|
+
except Exception as e:
|
|
187
|
+
print(f"Error: {e}")
|
|
188
|
+
finally:
|
|
189
|
+
shutil.rmtree(tmp, ignore_errors=True)
|
|
190
|
+
|
|
191
|
+
return profiles
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### Step 4: Extract form autofill entries
|
|
195
|
+
|
|
196
|
+
```sql
|
|
197
|
+
-- Top autofill entries by usage
|
|
198
|
+
SELECT name, value, count FROM autofill ORDER BY count DESC LIMIT 50;
|
|
199
|
+
|
|
200
|
+
-- Emails
|
|
201
|
+
SELECT value, count FROM autofill WHERE lower(name) IN ('email', 'e-mail', 'email_address', 'emailaddress') ORDER BY count DESC;
|
|
202
|
+
|
|
203
|
+
-- Names
|
|
204
|
+
SELECT name, value, count FROM autofill WHERE lower(name) IN ('name', 'firstname', 'first_name', 'first-name', 'given-name', 'lastname', 'last_name', 'last-name', 'family-name', 'fullname', 'full_name', 'full-name') ORDER BY count DESC;
|
|
205
|
+
|
|
206
|
+
-- Phones
|
|
207
|
+
SELECT value, count FROM autofill WHERE lower(name) IN ('phone', 'tel', 'telephone', 'mobile', 'cell', 'phonenumber', 'phone_number') ORDER BY count DESC;
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Step 5: Extract credit card metadata (no card numbers)
|
|
211
|
+
|
|
212
|
+
```sql
|
|
213
|
+
SELECT name_on_card, expiration_month, expiration_year, nickname, use_count
|
|
214
|
+
FROM credit_cards
|
|
215
|
+
ORDER BY use_count DESC;
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## All Browsers at Once
|
|
219
|
+
|
|
220
|
+
```python
|
|
221
|
+
from pathlib import Path
|
|
222
|
+
|
|
223
|
+
APP_SUPPORT = Path.home() / "Library" / "Application Support"
|
|
224
|
+
|
|
225
|
+
BROWSER_PATHS = {
|
|
226
|
+
"arc": APP_SUPPORT / "Arc" / "User Data",
|
|
227
|
+
"chrome": APP_SUPPORT / "Google" / "Chrome",
|
|
228
|
+
"brave": APP_SUPPORT / "BraveSoftware" / "Brave-Browser",
|
|
229
|
+
"edge": APP_SUPPORT / "Microsoft Edge",
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
def find_all_webdata() -> list[tuple[str, str, Path]]:
|
|
233
|
+
"""Find all Web Data files across browsers and profiles."""
|
|
234
|
+
results = []
|
|
235
|
+
for browser, base in BROWSER_PATHS.items():
|
|
236
|
+
if not base.exists():
|
|
237
|
+
continue
|
|
238
|
+
for d in sorted(base.iterdir()):
|
|
239
|
+
if d.is_dir() and (d.name == "Default" or d.name.startswith("Profile ")):
|
|
240
|
+
webdata = d / "Web Data"
|
|
241
|
+
if webdata.exists():
|
|
242
|
+
results.append((browser, d.name, webdata))
|
|
243
|
+
return results
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
## Notes
|
|
247
|
+
|
|
248
|
+
- **Safari** does not use `Web Data` — its autofill is in `~/Library/Safari/Form Values` (binary plist, requires Full Disk Access)
|
|
249
|
+
- **Firefox** stores autofill in `formhistory.sqlite` in the profile directory, not `Web Data`
|
|
250
|
+
- Data persists even after clearing browser history — autofill is separate
|
|
251
|
+
- Google account sync means the same profiles appear across Chrome and Arc if logged into the same account
|
|
252
|
+
- `record_type=1` (synced) profiles came from Google account and are the most reliable identity data
|
package/bin/cli.js
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const path = require('path');
|
|
5
|
+
const fs = require('fs');
|
|
6
|
+
const os = require('os');
|
|
7
|
+
const { spawnSync } = require('child_process');
|
|
8
|
+
|
|
9
|
+
const DEST = path.join(os.homedir(), 'ai-browser-profile');
|
|
10
|
+
const PKG_ROOT = path.join(__dirname, '..');
|
|
11
|
+
const HOME = os.homedir();
|
|
12
|
+
|
|
13
|
+
// Files/dirs to copy from npm package to ~/ai-browser-profile
|
|
14
|
+
const COPY_TARGETS = [
|
|
15
|
+
'ai_browser_profile',
|
|
16
|
+
'extract.py',
|
|
17
|
+
'clean.py',
|
|
18
|
+
'skill',
|
|
19
|
+
'review',
|
|
20
|
+
'setup',
|
|
21
|
+
'autofill',
|
|
22
|
+
'whatsapp',
|
|
23
|
+
];
|
|
24
|
+
|
|
25
|
+
// Never overwrite these during update
|
|
26
|
+
const NEVER_OVERWRITE = new Set(['memories.db', '.venv', 'scripts', 'config.json']);
|
|
27
|
+
|
|
28
|
+
// Core Python deps (tier 1) — enough for tag search, SQL, extraction
|
|
29
|
+
// ccl_chromium_reader is only on GitHub, not PyPI
|
|
30
|
+
const CORE_DEPS = [
|
|
31
|
+
'git+https://github.com/cclgroupltd/ccl_chromium_reader.git',
|
|
32
|
+
'numpy',
|
|
33
|
+
];
|
|
34
|
+
|
|
35
|
+
// Embedding deps (tier 2) — optional, for semantic search
|
|
36
|
+
const EMBEDDING_DEPS = ['onnxruntime', 'huggingface_hub', 'tokenizers'];
|
|
37
|
+
|
|
38
|
+
function copyDir(src, dest) {
|
|
39
|
+
fs.mkdirSync(dest, { recursive: true });
|
|
40
|
+
for (const entry of fs.readdirSync(src, { withFileTypes: true })) {
|
|
41
|
+
const srcPath = path.join(src, entry.name);
|
|
42
|
+
const destPath = path.join(dest, entry.name);
|
|
43
|
+
if (entry.name === '__pycache__') continue;
|
|
44
|
+
if (entry.isDirectory()) {
|
|
45
|
+
copyDir(srcPath, destPath);
|
|
46
|
+
} else {
|
|
47
|
+
fs.copyFileSync(srcPath, destPath);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function linkOrRelink(target, linkPath) {
|
|
53
|
+
try { fs.rmSync(linkPath, { recursive: true, force: true }); } catch {}
|
|
54
|
+
fs.symlinkSync(target, linkPath);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function findPython() {
|
|
58
|
+
// Try specific versions first (prefer newer), then generic
|
|
59
|
+
const candidates = [
|
|
60
|
+
'python3.13', 'python3.12', 'python3.11', 'python3.10',
|
|
61
|
+
'python3', 'python',
|
|
62
|
+
];
|
|
63
|
+
for (const cmd of candidates) {
|
|
64
|
+
const result = spawnSync(cmd, ['--version'], { stdio: 'pipe' });
|
|
65
|
+
if (result.status === 0) {
|
|
66
|
+
const version = result.stdout.toString().trim();
|
|
67
|
+
const match = version.match(/(\d+)\.(\d+)/);
|
|
68
|
+
if (match) {
|
|
69
|
+
const major = parseInt(match[1]);
|
|
70
|
+
const minor = parseInt(match[2]);
|
|
71
|
+
if (major >= 3 && minor >= 10) return cmd;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
// Fallback: return whatever python3 is available (will warn later)
|
|
76
|
+
const fallback = spawnSync('python3', ['--version'], { stdio: 'pipe' });
|
|
77
|
+
if (fallback.status === 0) return 'python3';
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function pipPath() {
|
|
82
|
+
return path.join(DEST, '.venv', 'bin', 'pip');
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function pythonPath() {
|
|
86
|
+
return path.join(DEST, '.venv', 'bin', 'python');
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function generatePlists() {
|
|
90
|
+
const plists = [
|
|
91
|
+
{
|
|
92
|
+
file: 'com.m13v.memory-review.plist',
|
|
93
|
+
label: 'com.m13v.memory-review',
|
|
94
|
+
script: `${DEST}/review/run.sh`,
|
|
95
|
+
interval: 604800, // weekly
|
|
96
|
+
runAtLoad: false,
|
|
97
|
+
stdoutLog: `${DEST}/review/logs/launchd-stdout.log`,
|
|
98
|
+
stderrLog: `${DEST}/review/logs/launchd-stderr.log`,
|
|
99
|
+
},
|
|
100
|
+
];
|
|
101
|
+
|
|
102
|
+
const launchdDir = path.join(DEST, 'launchd');
|
|
103
|
+
fs.mkdirSync(launchdDir, { recursive: true });
|
|
104
|
+
|
|
105
|
+
for (const p of plists) {
|
|
106
|
+
const xml = `<?xml version="1.0" encoding="UTF-8"?>
|
|
107
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
108
|
+
<plist version="1.0">
|
|
109
|
+
<dict>
|
|
110
|
+
\t<key>Label</key>
|
|
111
|
+
\t<string>${p.label}</string>
|
|
112
|
+
\t<key>ProgramArguments</key>
|
|
113
|
+
\t<array>
|
|
114
|
+
\t\t<string>/bin/bash</string>
|
|
115
|
+
\t\t<string>${p.script}</string>
|
|
116
|
+
\t</array>
|
|
117
|
+
\t<key>StartInterval</key>
|
|
118
|
+
\t<integer>${p.interval}</integer>
|
|
119
|
+
\t<key>StandardOutPath</key>
|
|
120
|
+
\t<string>${p.stdoutLog}</string>
|
|
121
|
+
\t<key>StandardErrorPath</key>
|
|
122
|
+
\t<string>${p.stderrLog}</string>
|
|
123
|
+
\t<key>EnvironmentVariables</key>
|
|
124
|
+
\t<dict>
|
|
125
|
+
\t\t<key>PATH</key>
|
|
126
|
+
\t\t<string>/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin</string>
|
|
127
|
+
\t\t<key>HOME</key>
|
|
128
|
+
\t\t<string>${HOME}</string>
|
|
129
|
+
\t</dict>
|
|
130
|
+
\t<key>RunAtLoad</key>
|
|
131
|
+
\t<${p.runAtLoad}/>
|
|
132
|
+
</dict>
|
|
133
|
+
</plist>
|
|
134
|
+
`;
|
|
135
|
+
fs.writeFileSync(path.join(launchdDir, p.file), xml);
|
|
136
|
+
}
|
|
137
|
+
console.log(' generated launchd plists');
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function init() {
|
|
141
|
+
console.log('Setting up ai-browser-profile in', DEST);
|
|
142
|
+
fs.mkdirSync(DEST, { recursive: true });
|
|
143
|
+
|
|
144
|
+
// Copy all package files
|
|
145
|
+
for (const f of COPY_TARGETS) {
|
|
146
|
+
const src = path.join(PKG_ROOT, f);
|
|
147
|
+
const dest = path.join(DEST, f);
|
|
148
|
+
if (!fs.existsSync(src)) continue;
|
|
149
|
+
const stat = fs.statSync(src);
|
|
150
|
+
if (stat.isDirectory()) {
|
|
151
|
+
copyDir(src, dest);
|
|
152
|
+
} else {
|
|
153
|
+
fs.copyFileSync(src, dest);
|
|
154
|
+
}
|
|
155
|
+
console.log(' copied', f);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Generate launchd plists
|
|
159
|
+
generatePlists();
|
|
160
|
+
|
|
161
|
+
// Create Python venv
|
|
162
|
+
const python = findPython();
|
|
163
|
+
if (!python) {
|
|
164
|
+
console.error('ERROR: python3 not found. Install Python 3.9+ and try again.');
|
|
165
|
+
process.exit(1);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const venvPath = path.join(DEST, '.venv');
|
|
169
|
+
if (!fs.existsSync(venvPath)) {
|
|
170
|
+
console.log(' creating Python venv...');
|
|
171
|
+
const venvResult = spawnSync(python, ['-m', 'venv', venvPath], { stdio: 'inherit' });
|
|
172
|
+
if (venvResult.status !== 0) {
|
|
173
|
+
console.error('ERROR: Failed to create Python venv');
|
|
174
|
+
process.exit(1);
|
|
175
|
+
}
|
|
176
|
+
} else {
|
|
177
|
+
console.log(' .venv exists — skipping creation');
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Upgrade pip first (old pip can't find some packages)
|
|
181
|
+
console.log(' upgrading pip...');
|
|
182
|
+
spawnSync(pythonPath(), ['-m', 'pip', 'install', '--upgrade', 'pip', '-q'], { stdio: 'inherit' });
|
|
183
|
+
|
|
184
|
+
// Install core deps
|
|
185
|
+
console.log(' installing core dependencies...');
|
|
186
|
+
const pipResult = spawnSync(pipPath(), ['install', ...CORE_DEPS, '-q'], { stdio: 'inherit' });
|
|
187
|
+
if (pipResult.status !== 0) {
|
|
188
|
+
console.warn(' WARNING: Some dependencies failed to install. Check that git is available and try:');
|
|
189
|
+
console.warn(` ${pipPath()} install git+https://github.com/cclgroupltd/ccl_chromium_reader.git numpy`);
|
|
190
|
+
} else {
|
|
191
|
+
console.log(' core dependencies installed');
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Create logs dir for review
|
|
195
|
+
fs.mkdirSync(path.join(DEST, 'review', 'logs'), { recursive: true });
|
|
196
|
+
|
|
197
|
+
// Skill symlinks
|
|
198
|
+
const skillsDir = path.join(HOME, '.claude', 'skills');
|
|
199
|
+
fs.mkdirSync(skillsDir, { recursive: true });
|
|
200
|
+
|
|
201
|
+
const links = [
|
|
202
|
+
['ai-browser-profile', 'skill'],
|
|
203
|
+
['ai-browser-profile-setup', 'setup'],
|
|
204
|
+
['memory-review', 'review'],
|
|
205
|
+
['autofill-profiles', 'autofill'],
|
|
206
|
+
['whatsapp-analysis', 'whatsapp'],
|
|
207
|
+
];
|
|
208
|
+
|
|
209
|
+
for (const [name, dir] of links) {
|
|
210
|
+
const target = path.join(DEST, dir);
|
|
211
|
+
const link = path.join(skillsDir, name);
|
|
212
|
+
linkOrRelink(target, link);
|
|
213
|
+
console.log(` ~/.claude/skills/${name} -> ~/${path.relative(HOME, target)}`);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
console.log('');
|
|
217
|
+
console.log('Done! Next steps:');
|
|
218
|
+
console.log(` 1. Extract browser data: ${pythonPath()} ${path.join(DEST, 'extract.py')}`);
|
|
219
|
+
console.log(' 2. Tell Claude: "search my browser profile for my email"');
|
|
220
|
+
console.log('');
|
|
221
|
+
console.log('Optional — add semantic search (~180MB download):');
|
|
222
|
+
console.log(' npx ai-browser-profile install-embeddings');
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function update() {
|
|
226
|
+
if (!fs.existsSync(DEST)) {
|
|
227
|
+
console.error('Not installed. Run: npx ai-browser-profile init');
|
|
228
|
+
process.exit(1);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
console.log('Updating ai-browser-profile...');
|
|
232
|
+
|
|
233
|
+
for (const f of COPY_TARGETS) {
|
|
234
|
+
if (NEVER_OVERWRITE.has(f)) {
|
|
235
|
+
console.log(' skipping', f, '(user data)');
|
|
236
|
+
continue;
|
|
237
|
+
}
|
|
238
|
+
const src = path.join(PKG_ROOT, f);
|
|
239
|
+
const dest = path.join(DEST, f);
|
|
240
|
+
if (!fs.existsSync(src)) continue;
|
|
241
|
+
const stat = fs.statSync(src);
|
|
242
|
+
if (stat.isDirectory()) {
|
|
243
|
+
copyDir(src, dest);
|
|
244
|
+
} else {
|
|
245
|
+
fs.copyFileSync(src, dest);
|
|
246
|
+
}
|
|
247
|
+
console.log(' updated', f);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// Regenerate launchd plists
|
|
251
|
+
generatePlists();
|
|
252
|
+
|
|
253
|
+
// Re-symlink skills
|
|
254
|
+
const skillsDir = path.join(HOME, '.claude', 'skills');
|
|
255
|
+
const links = [
|
|
256
|
+
['ai-browser-profile', 'skill'],
|
|
257
|
+
['ai-browser-profile-setup', 'setup'],
|
|
258
|
+
['memory-review', 'review'],
|
|
259
|
+
['autofill-profiles', 'autofill'],
|
|
260
|
+
['whatsapp-analysis', 'whatsapp'],
|
|
261
|
+
];
|
|
262
|
+
|
|
263
|
+
for (const [name, dir] of links) {
|
|
264
|
+
try {
|
|
265
|
+
linkOrRelink(path.join(DEST, dir), path.join(skillsDir, name));
|
|
266
|
+
console.log(` re-linked ~/.claude/skills/${name}`);
|
|
267
|
+
} catch {}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// Upgrade core deps
|
|
271
|
+
console.log(' upgrading core dependencies...');
|
|
272
|
+
spawnSync(pipPath(), ['install', '--upgrade', ...CORE_DEPS, '-q'], { stdio: 'inherit' });
|
|
273
|
+
|
|
274
|
+
console.log('');
|
|
275
|
+
console.log('Update complete. memories.db and .venv preserved.');
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
function installEmbeddings() {
|
|
279
|
+
const venvPath = path.join(DEST, '.venv');
|
|
280
|
+
if (!fs.existsSync(venvPath)) {
|
|
281
|
+
console.error('Not installed. Run: npx ai-browser-profile init');
|
|
282
|
+
process.exit(1);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
console.log('Installing embedding dependencies...');
|
|
286
|
+
const result = spawnSync(pipPath(), ['install', ...EMBEDDING_DEPS, '-q'], { stdio: 'inherit' });
|
|
287
|
+
if (result.status !== 0) {
|
|
288
|
+
console.error('Failed to install embedding dependencies. Try manually:');
|
|
289
|
+
console.error(` ${pipPath()} install ${EMBEDDING_DEPS.join(' ')}`);
|
|
290
|
+
process.exit(1);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
console.log('');
|
|
294
|
+
console.log('Embedding dependencies installed.');
|
|
295
|
+
console.log('The model (~131MB) will download automatically on first semantic search.');
|
|
296
|
+
console.log('');
|
|
297
|
+
console.log('To backfill embeddings for existing memories:');
|
|
298
|
+
console.log(` ${pythonPath()} -c "from ai_browser_profile import MemoryDB; m = MemoryDB('${path.join(DEST, 'memories.db')}'); print(f'Embedded {m.backfill_embeddings()} memories'); m.close()"`);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
const cmd = process.argv[2];
|
|
302
|
+
if (cmd === 'init') {
|
|
303
|
+
init();
|
|
304
|
+
} else if (cmd === 'update') {
|
|
305
|
+
update();
|
|
306
|
+
} else if (cmd === 'install-embeddings') {
|
|
307
|
+
installEmbeddings();
|
|
308
|
+
} else {
|
|
309
|
+
console.log('ai-browser-profile — extract user identity from browser data');
|
|
310
|
+
console.log('');
|
|
311
|
+
console.log('Usage:');
|
|
312
|
+
console.log(' npx ai-browser-profile init first-time setup');
|
|
313
|
+
console.log(' npx ai-browser-profile update update code, preserve data');
|
|
314
|
+
console.log(' npx ai-browser-profile install-embeddings add semantic search (~180MB)');
|
|
315
|
+
}
|