digital-brain 0.1.7 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -2
- package/bin/digital-brain.js +136 -20
- package/docs/INTEGRATIONS.md +72 -0
- package/docs/PRIVACY.md +3 -1
- package/docs/SETUP.md +33 -1
- package/examples/sample-vault/{04 People/Interpreted Relationships/Close Friend.md → 06 AI Memory/Generated Relationship Drafts/Close Friend (WhatsApp).md } +4 -3
- package/examples/sample-vault/{04 People/Interpreted Relationships/Mom.md → 06 AI Memory/Generated Relationship Drafts/Mom (WhatsApp).md } +4 -3
- package/examples/sample-vault/{08 Sources/WhatsApp/Analysis/Interpreted/Project Team.md → 06 AI Memory/Generated Relationship Drafts/Project Team (WhatsApp).md } +4 -3
- package/examples/sample-vault/06 AI Memory/Interpreted Relationship Memory.md +3 -3
- package/examples/sample-vault/06 AI Memory/Person Context Index.md +26 -0
- package/examples/sample-vault/06 AI Memory/Person Reply Context.md +26 -0
- package/examples/sample-vault/08 Sources/{WhatsApp/Analysis/Interpreted/Close Friend.md → Analysis/Interpreted/Close Friend (WhatsApp).md } +4 -3
- package/examples/sample-vault/08 Sources/{WhatsApp/Analysis/Interpreted/Mom.md → Analysis/Interpreted/Mom (WhatsApp).md } +4 -3
- package/examples/sample-vault/{04 People/Interpreted Relationships/Project Team.md → 08 Sources/Analysis/Interpreted/Project Team (WhatsApp).md } +4 -3
- package/examples/sample-vault/08 Sources/Analysis/Relationship Map.md +38 -0
- package/examples/sample-vault/08 Sources/Analysis/interpreted_relationship_models.json +175 -0
- package/examples/sample-vault/08 Sources/Analysis/person_identity_map.json +78 -0
- package/examples/sample-vault/08 Sources/Analysis/relationship_profiles.json +122 -0
- package/examples/sample-vault/08 Sources/WhatsApp/Analysis/Interpreted/Close Friend (WhatsApp).md +44 -0
- package/examples/sample-vault/08 Sources/WhatsApp/Analysis/Interpreted/Mom (WhatsApp).md +45 -0
- package/examples/sample-vault/08 Sources/WhatsApp/Analysis/Interpreted/Project Team (WhatsApp).md +45 -0
- package/examples/sample-vault/08 Sources/WhatsApp/Analysis/Relationship Map.md +9 -3
- package/examples/sample-vault/08 Sources/WhatsApp/Analysis/interpreted_relationship_models.json +18 -0
- package/examples/sample-vault/08 Sources/WhatsApp/Analysis/person_identity_map.json +78 -0
- package/examples/sample-vault/08 Sources/WhatsApp/Analysis/relationship_profiles.json +18 -0
- package/examples/sample-vault/08 Sources/WhatsApp/Raw/2026-01-01.jsonl +6 -6
- package/lib/fs.js +7 -1
- package/package.json +2 -1
- package/scripts/digital_brain_imessage_sync.py +175 -0
- package/scripts/digital_brain_linkedin_export_import.py +214 -0
- package/scripts/digital_brain_relationship_extractor.py +189 -12
- package/scripts/digital_brain_relationship_interpreter.py +104 -15
- package/scripts/digital_brain_slack_export_import.py +181 -0
- package/scripts/digital_brain_whatsapp_mac_sync.py +37 -8
- package/templates/vault/00 Home/How AI Should Use This Vault.md +1 -1
- package/templates/vault/00 Home/Start Here.md +2 -1
- package/templates/vault/04 People/Relationship Overrides.md +2 -1
- package/templates/vault/06 AI Memory/Generated Relationship Drafts/README.md +5 -0
- package/templates/vault/06 AI Memory/Interpreted Relationship Memory.md +1 -2
- package/templates/vault/06 AI Memory/Person Context Index.md +4 -0
- package/templates/vault/06 AI Memory/Person Reply Context.md +4 -0
- package/templates/vault/08 Sources/README.md +5 -0
- package/templates/vault/08 Sources/WhatsApp/Outbound/README.md +2 -2
- package/templates/vault/AGENTS.md +5 -1
- package/templates/vault/CLAUDE.md +3 -0
- package/templates/vault/GEMINI.md +4 -0
- package/whatsapp-web/send.mjs +32 -5
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"canonicalPersonKey": "person::mom",
|
|
4
|
+
"displayName": "Mom",
|
|
5
|
+
"aliases": [
|
|
6
|
+
"Mom"
|
|
7
|
+
],
|
|
8
|
+
"sources": [
|
|
9
|
+
"WhatsApp"
|
|
10
|
+
],
|
|
11
|
+
"sourceProfiles": [
|
|
12
|
+
{
|
|
13
|
+
"sourceSystem": "WhatsApp",
|
|
14
|
+
"chatName": "Mom",
|
|
15
|
+
"displayName": "Mom (WhatsApp)",
|
|
16
|
+
"messageCount": 2,
|
|
17
|
+
"firstSeen": "2026-01-01",
|
|
18
|
+
"lastSeen": "2026-01-01",
|
|
19
|
+
"relationshipGuess": "warm personal relationship",
|
|
20
|
+
"typingStyle": {
|
|
21
|
+
"sampleSize": 1,
|
|
22
|
+
"avgChars": 24.0,
|
|
23
|
+
"avgWords": 4.0,
|
|
24
|
+
"lowercaseShare": 0.0,
|
|
25
|
+
"uppercaseShare": 0.0,
|
|
26
|
+
"questionShare": 0.0,
|
|
27
|
+
"exclamationShare": 0.0,
|
|
28
|
+
"emojiShare": 0.0,
|
|
29
|
+
"slang": [],
|
|
30
|
+
"signature": "very short"
|
|
31
|
+
},
|
|
32
|
+
"identityConfidence": "medium",
|
|
33
|
+
"identityEvidence": "WhatsApp direct chat identity"
|
|
34
|
+
}
|
|
35
|
+
],
|
|
36
|
+
"totalMessages": 2,
|
|
37
|
+
"firstSeen": "2026-01-01",
|
|
38
|
+
"lastSeen": "2026-01-01"
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"canonicalPersonKey": "person::close friend",
|
|
42
|
+
"displayName": "Close Friend",
|
|
43
|
+
"aliases": [
|
|
44
|
+
"Close Friend"
|
|
45
|
+
],
|
|
46
|
+
"sources": [
|
|
47
|
+
"WhatsApp"
|
|
48
|
+
],
|
|
49
|
+
"sourceProfiles": [
|
|
50
|
+
{
|
|
51
|
+
"sourceSystem": "WhatsApp",
|
|
52
|
+
"chatName": "Close Friend",
|
|
53
|
+
"displayName": "Close Friend (WhatsApp)",
|
|
54
|
+
"messageCount": 2,
|
|
55
|
+
"firstSeen": "2026-01-01",
|
|
56
|
+
"lastSeen": "2026-01-01",
|
|
57
|
+
"relationshipGuess": "warm personal relationship",
|
|
58
|
+
"typingStyle": {
|
|
59
|
+
"sampleSize": 1,
|
|
60
|
+
"avgChars": 30.0,
|
|
61
|
+
"avgWords": 5.0,
|
|
62
|
+
"lowercaseShare": 0.0,
|
|
63
|
+
"uppercaseShare": 0.0,
|
|
64
|
+
"questionShare": 0.0,
|
|
65
|
+
"exclamationShare": 0.0,
|
|
66
|
+
"emojiShare": 0.0,
|
|
67
|
+
"slang": [],
|
|
68
|
+
"signature": "short"
|
|
69
|
+
},
|
|
70
|
+
"identityConfidence": "medium",
|
|
71
|
+
"identityEvidence": "WhatsApp direct chat identity"
|
|
72
|
+
}
|
|
73
|
+
],
|
|
74
|
+
"totalMessages": 2,
|
|
75
|
+
"firstSeen": "2026-01-01",
|
|
76
|
+
"lastSeen": "2026-01-01"
|
|
77
|
+
}
|
|
78
|
+
]
|
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
[
|
|
2
2
|
{
|
|
3
3
|
"chatName": "Mom",
|
|
4
|
+
"sourceSystem": "WhatsApp",
|
|
5
|
+
"displayName": "Mom (WhatsApp)",
|
|
6
|
+
"identityName": "Mom",
|
|
7
|
+
"canonicalPersonKey": "person::mom",
|
|
8
|
+
"identityConfidence": "medium",
|
|
9
|
+
"identityEvidence": "WhatsApp direct chat identity",
|
|
4
10
|
"messageCount": 2,
|
|
5
11
|
"inbound": 1,
|
|
6
12
|
"outbound": 1,
|
|
@@ -35,6 +41,12 @@
|
|
|
35
41
|
},
|
|
36
42
|
{
|
|
37
43
|
"chatName": "Project Team",
|
|
44
|
+
"sourceSystem": "WhatsApp",
|
|
45
|
+
"displayName": "Project Team (WhatsApp)",
|
|
46
|
+
"identityName": "Project Team",
|
|
47
|
+
"canonicalPersonKey": "group::whatsapp::project team",
|
|
48
|
+
"identityConfidence": "medium",
|
|
49
|
+
"identityEvidence": "group chat kept source-specific",
|
|
38
50
|
"messageCount": 2,
|
|
39
51
|
"inbound": 1,
|
|
40
52
|
"outbound": 1,
|
|
@@ -69,6 +81,12 @@
|
|
|
69
81
|
},
|
|
70
82
|
{
|
|
71
83
|
"chatName": "Close Friend",
|
|
84
|
+
"sourceSystem": "WhatsApp",
|
|
85
|
+
"displayName": "Close Friend (WhatsApp)",
|
|
86
|
+
"identityName": "Close Friend",
|
|
87
|
+
"canonicalPersonKey": "person::close friend",
|
|
88
|
+
"identityConfidence": "medium",
|
|
89
|
+
"identityEvidence": "WhatsApp direct chat identity",
|
|
72
90
|
"messageCount": 2,
|
|
73
91
|
"inbound": 1,
|
|
74
92
|
"outbound": 1,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
{"id":"sample-1","source":"sample","timestamp":"2026-01-01T10:00:00+00:00","chatName":"Mom","isGroup":false,"fromMe":false,"author":"Mom","body":"Good morning, call me when you wake up"}
|
|
2
|
-
{"id":"sample-2","source":"sample","timestamp":"2026-01-01T10:05:00+00:00","chatName":"Mom","isGroup":false,"fromMe":true,"author":"Me","body":"Morning, will call in 10"}
|
|
3
|
-
{"id":"sample-3","source":"sample","timestamp":"2026-01-01T11:00:00+00:00","chatName":"Project Team","isGroup":true,"fromMe":false,"author":"Alex","body":"Can you send the deck before the client meeting?"}
|
|
4
|
-
{"id":"sample-4","source":"sample","timestamp":"2026-01-01T11:03:00+00:00","chatName":"Project Team","isGroup":true,"fromMe":true,"author":"Me","body":"Yes, sending the updated deck now"}
|
|
5
|
-
{"id":"sample-5","source":"sample","timestamp":"2026-01-01T20:00:00+00:00","chatName":"Close Friend","isGroup":false,"fromMe":false,"author":"Friend","body":"That was fun haha, let's plan another trip"}
|
|
6
|
-
{"id":"sample-6","source":"sample","timestamp":"2026-01-01T20:02:00+00:00","chatName":"Close Friend","isGroup":false,"fromMe":true,"author":"Me","body":"Yes absolutely, that was great"}
|
|
1
|
+
{"id":"sample-1","source":"sample","sourceSystem":"WhatsApp","timestamp":"2026-01-01T10:00:00+00:00","chatName":"Mom","isGroup":false,"fromMe":false,"author":"Mom","body":"Good morning, call me when you wake up"}
|
|
2
|
+
{"id":"sample-2","source":"sample","sourceSystem":"WhatsApp","timestamp":"2026-01-01T10:05:00+00:00","chatName":"Mom","isGroup":false,"fromMe":true,"author":"Me","body":"Morning, will call in 10"}
|
|
3
|
+
{"id":"sample-3","source":"sample","sourceSystem":"WhatsApp","timestamp":"2026-01-01T11:00:00+00:00","chatName":"Project Team","isGroup":true,"fromMe":false,"author":"Alex","body":"Can you send the deck before the client meeting?"}
|
|
4
|
+
{"id":"sample-4","source":"sample","sourceSystem":"WhatsApp","timestamp":"2026-01-01T11:03:00+00:00","chatName":"Project Team","isGroup":true,"fromMe":true,"author":"Me","body":"Yes, sending the updated deck now"}
|
|
5
|
+
{"id":"sample-5","source":"sample","sourceSystem":"WhatsApp","timestamp":"2026-01-01T20:00:00+00:00","chatName":"Close Friend","isGroup":false,"fromMe":false,"author":"Friend","body":"That was fun haha, let's plan another trip"}
|
|
6
|
+
{"id":"sample-6","source":"sample","sourceSystem":"WhatsApp","timestamp":"2026-01-01T20:02:00+00:00","chatName":"Close Friend","isGroup":false,"fromMe":true,"author":"Me","body":"Yes absolutely, that was great"}
|
package/lib/fs.js
CHANGED
|
@@ -55,5 +55,11 @@ export function readDefaultVault() {
|
|
|
55
55
|
export function writeDefaultVault(vault) {
|
|
56
56
|
const file = globalConfigPath();
|
|
57
57
|
ensureDir(path.dirname(file));
|
|
58
|
-
|
|
58
|
+
writeFileAtomic(file, `${JSON.stringify({ defaultVault: vault }, null, 2)}\n`);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function writeFileAtomic(file, content) {
|
|
62
|
+
const temp = `${file}.${process.pid}.tmp`;
|
|
63
|
+
fs.writeFileSync(temp, content);
|
|
64
|
+
fs.renameSync(temp, file);
|
|
59
65
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "digital-brain",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "1.0.3",
|
|
4
4
|
"description": "Your private digital imprint for AI assistants.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -29,6 +29,7 @@
|
|
|
29
29
|
"personal-ai",
|
|
30
30
|
"obsidian",
|
|
31
31
|
"whatsapp",
|
|
32
|
+
"imessage",
|
|
32
33
|
"local-first",
|
|
33
34
|
"context"
|
|
34
35
|
],
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import argparse
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import sqlite3
|
|
6
|
+
import time
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
CORE_DATA_EPOCH_OFFSET = 978_307_200
|
|
11
|
+
DEFAULT_DB = Path.home() / "Library" / "Messages" / "chat.db"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def main():
|
|
15
|
+
args = parse_args()
|
|
16
|
+
if not args.db.exists():
|
|
17
|
+
raise SystemExit(
|
|
18
|
+
f"Apple Messages database not found: {args.db}\n"
|
|
19
|
+
"Open Messages on macOS and grant Terminal Full Disk Access if macOS blocks access."
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
vault = args.vault.resolve()
|
|
23
|
+
source_dir = vault / "08 Sources" / "iMessage"
|
|
24
|
+
raw_dir = source_dir / "Raw"
|
|
25
|
+
chats_dir = source_dir / "ChatsByMonth"
|
|
26
|
+
state_dir = source_dir / ".sync-state"
|
|
27
|
+
for directory in (raw_dir, chats_dir, state_dir):
|
|
28
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
|
|
30
|
+
seen_path = state_dir / "imessage-seen-message-ids.json"
|
|
31
|
+
seen = load_seen(seen_path)
|
|
32
|
+
added = sync_once(args, seen, raw_dir, chats_dir)
|
|
33
|
+
save_seen(seen_path, seen)
|
|
34
|
+
print(f"Imported {added} iMessage messages.")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def sync_once(args, seen, raw_dir, chats_dir):
|
|
38
|
+
cutoff = datetime.now(timezone.utc).timestamp() - args.days * 24 * 60 * 60 if args.days else None
|
|
39
|
+
conn = sqlite3.connect(f"file:{args.db}?mode=ro", uri=True)
|
|
40
|
+
conn.row_factory = sqlite3.Row
|
|
41
|
+
rows = conn.execute(
|
|
42
|
+
"""
|
|
43
|
+
SELECT m.ROWID message_pk, m.guid guid, m.date message_date,
|
|
44
|
+
m.is_from_me is_from_me, m.text text, m.service service,
|
|
45
|
+
h.id handle_id,
|
|
46
|
+
c.ROWID chat_pk, c.display_name display_name, c.chat_identifier chat_identifier
|
|
47
|
+
FROM message m
|
|
48
|
+
LEFT JOIN handle h ON h.ROWID = m.handle_id
|
|
49
|
+
LEFT JOIN chat_message_join cmj ON cmj.message_id = m.ROWID
|
|
50
|
+
LEFT JOIN chat c ON c.ROWID = cmj.chat_id
|
|
51
|
+
WHERE m.text IS NOT NULL
|
|
52
|
+
AND length(m.text) > 0
|
|
53
|
+
ORDER BY m.date ASC, m.ROWID ASC
|
|
54
|
+
"""
|
|
55
|
+
).fetchall()
|
|
56
|
+
conn.close()
|
|
57
|
+
|
|
58
|
+
added = 0
|
|
59
|
+
for row in rows:
|
|
60
|
+
record = row_to_record(row, args.self_name, args.privacy_mode)
|
|
61
|
+
if cutoff and datetime.fromisoformat(record["timestamp"]).timestamp() < cutoff:
|
|
62
|
+
continue
|
|
63
|
+
if args.chat and args.chat.lower() not in record["chatName"].lower():
|
|
64
|
+
continue
|
|
65
|
+
if record["id"] in seen:
|
|
66
|
+
continue
|
|
67
|
+
append_jsonl(raw_dir, record)
|
|
68
|
+
append_markdown(chats_dir, record, args.markdown_mode)
|
|
69
|
+
seen.add(record["id"])
|
|
70
|
+
added += 1
|
|
71
|
+
return added
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def row_to_record(row, self_name, privacy_mode):
|
|
75
|
+
timestamp = apple_timestamp(row["message_date"])
|
|
76
|
+
body = row["text"] or ""
|
|
77
|
+
from_me = bool(row["is_from_me"])
|
|
78
|
+
chat_name = row["display_name"] or row["chat_identifier"] or row["handle_id"] or "iMessage"
|
|
79
|
+
return {
|
|
80
|
+
"id": compound_id(row, timestamp),
|
|
81
|
+
"source": "Apple Messages chat.db",
|
|
82
|
+
"sourceSystem": "iMessage",
|
|
83
|
+
"timestamp": timestamp,
|
|
84
|
+
"chatPk": row["chat_pk"],
|
|
85
|
+
"chatName": chat_name,
|
|
86
|
+
"chatIdentifier": row["chat_identifier"],
|
|
87
|
+
"isGroup": bool(row["display_name"]) or str(row["chat_identifier"] or "").startswith("chat"),
|
|
88
|
+
"fromMe": from_me,
|
|
89
|
+
"author": self_name if from_me else (row["handle_id"] or "Unknown"),
|
|
90
|
+
"handleId": row["handle_id"],
|
|
91
|
+
"service": row["service"],
|
|
92
|
+
"body": "" if privacy_mode == "metadata-only" else body,
|
|
93
|
+
"bodyHash": hashlib.sha256(body.encode("utf-8")).hexdigest() if privacy_mode == "metadata-only" else "",
|
|
94
|
+
"bodyCharCount": len(body),
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def apple_timestamp(value):
|
|
99
|
+
raw = float(value or 0)
|
|
100
|
+
seconds = raw / 1_000_000_000 if abs(raw) > 10_000_000_000 else raw
|
|
101
|
+
return datetime.fromtimestamp(seconds + CORE_DATA_EPOCH_OFFSET, tz=timezone.utc).isoformat()
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def compound_id(row, timestamp):
|
|
105
|
+
return "::".join([
|
|
106
|
+
"imessage",
|
|
107
|
+
str(row["chat_pk"] or row["chat_identifier"] or row["handle_id"] or "unknown-chat"),
|
|
108
|
+
str(row["guid"] or "no-guid"),
|
|
109
|
+
str(row["message_pk"] or "no-pk"),
|
|
110
|
+
timestamp,
|
|
111
|
+
])
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def append_jsonl(raw_dir, record):
|
|
115
|
+
with (raw_dir / f"{record['timestamp'][:10]}.jsonl").open("a", encoding="utf-8") as f:
|
|
116
|
+
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def append_markdown(chats_dir, record, mode):
|
|
120
|
+
if mode == "none":
|
|
121
|
+
return
|
|
122
|
+
directory = chats_dir / record["timestamp"][:7]
|
|
123
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
124
|
+
file_path = directory / f"{safe_filename(record['chatName'])}.md"
|
|
125
|
+
if not file_path.exists():
|
|
126
|
+
write_text_atomic(file_path, f"# {escape_markdown(record['chatName'])}\n\nSynced from Apple Messages.\n\n")
|
|
127
|
+
speaker = escape_markdown(record["author"])
|
|
128
|
+
body = escape_markdown(" ".join(record["body"].split()))
|
|
129
|
+
with file_path.open("a", encoding="utf-8") as f:
|
|
130
|
+
f.write(f"- {record['timestamp']} | {speaker}: {body}\n")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def load_seen(path):
|
|
134
|
+
if not path.exists():
|
|
135
|
+
return set()
|
|
136
|
+
try:
|
|
137
|
+
return set(json.loads(path.read_text(encoding="utf-8")))
|
|
138
|
+
except Exception:
|
|
139
|
+
return set()
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def save_seen(path, seen):
|
|
143
|
+
write_text_atomic(path, json.dumps(sorted(seen), indent=2))
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def safe_filename(value):
|
|
147
|
+
cleaned = "".join("-" if char in '/:\\?%*"<>|' else char for char in value)
|
|
148
|
+
return (" ".join(cleaned.split()).strip() or "iMessage")[:120]
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def escape_markdown(value):
|
|
152
|
+
text = str(value).replace("\n", " ").replace("\r", " ")
|
|
153
|
+
return text.replace("\\", "\\\\").replace("[", "\\[").replace("]", "\\]").replace("|", "\\|")
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def write_text_atomic(path, content):
|
|
157
|
+
temp = path.with_name(f"{path.name}.{time.time_ns()}.tmp")
|
|
158
|
+
temp.write_text(content, encoding="utf-8")
|
|
159
|
+
temp.replace(path)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def parse_args():
|
|
163
|
+
parser = argparse.ArgumentParser()
|
|
164
|
+
parser.add_argument("--vault", type=Path, required=True)
|
|
165
|
+
parser.add_argument("--days", type=int, default=30)
|
|
166
|
+
parser.add_argument("--chat", default="")
|
|
167
|
+
parser.add_argument("--db", type=Path, default=DEFAULT_DB)
|
|
168
|
+
parser.add_argument("--self-name", default="Me")
|
|
169
|
+
parser.add_argument("--markdown-mode", choices=["chat", "month", "none"], default="none")
|
|
170
|
+
parser.add_argument("--privacy-mode", choices=["standard", "metadata-only"], default="standard")
|
|
171
|
+
return parser.parse_args()
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
if __name__ == "__main__":
|
|
175
|
+
main()
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import argparse
|
|
3
|
+
import csv
|
|
4
|
+
import json
|
|
5
|
+
import tempfile
|
|
6
|
+
import zipfile
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def main():
|
|
12
|
+
args = parse_args()
|
|
13
|
+
with unpack(args.input) as source:
|
|
14
|
+
import_archive(args.vault.resolve(), source, args.days)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def import_archive(vault, source, days):
|
|
18
|
+
linkedin = vault / "08 Sources" / "LinkedIn"
|
|
19
|
+
raw_dir = linkedin / "Raw"
|
|
20
|
+
state_dir = linkedin / ".sync-state"
|
|
21
|
+
people_dir = vault / "04 People"
|
|
22
|
+
for directory in (raw_dir, state_dir, people_dir):
|
|
23
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
24
|
+
|
|
25
|
+
cutoff = datetime.now(timezone.utc).timestamp() - days * 24 * 60 * 60 if days else None
|
|
26
|
+
seen_path = state_dir / "linkedin-seen-message-ids.json"
|
|
27
|
+
seen = load_seen(seen_path)
|
|
28
|
+
imported_messages = 0
|
|
29
|
+
imported_connections = write_connections(source, people_dir)
|
|
30
|
+
|
|
31
|
+
for file in find_csvs(source, "message"):
|
|
32
|
+
for row in read_csv(file):
|
|
33
|
+
record = message_record(row, file)
|
|
34
|
+
if not record:
|
|
35
|
+
continue
|
|
36
|
+
if cutoff and datetime.fromisoformat(record["timestamp"]).timestamp() < cutoff:
|
|
37
|
+
continue
|
|
38
|
+
if record["id"] in seen:
|
|
39
|
+
continue
|
|
40
|
+
append_jsonl(raw_dir, record)
|
|
41
|
+
seen.add(record["id"])
|
|
42
|
+
imported_messages += 1
|
|
43
|
+
|
|
44
|
+
save_seen(seen_path, seen)
|
|
45
|
+
print(f"Imported {imported_messages} LinkedIn messages.")
|
|
46
|
+
print(f"Indexed {imported_connections} LinkedIn connections.")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def write_connections(source, people_dir):
|
|
50
|
+
files = find_csvs(source, "connection")
|
|
51
|
+
if not files:
|
|
52
|
+
return 0
|
|
53
|
+
rows = []
|
|
54
|
+
for file in files:
|
|
55
|
+
rows.extend(read_csv(file))
|
|
56
|
+
lines = ["# LinkedIn Connections", "", "Imported from LinkedIn data archive.", ""]
|
|
57
|
+
count = 0
|
|
58
|
+
for row in rows:
|
|
59
|
+
name = first_value(row, ["First Name", "FirstName", "First name"])
|
|
60
|
+
last = first_value(row, ["Last Name", "LastName", "Last name"])
|
|
61
|
+
full_name = " ".join(part for part in [name, last] if part).strip() or first_value(row, ["Name", "Full Name"])
|
|
62
|
+
company = first_value(row, ["Company", "Company Name"])
|
|
63
|
+
position = first_value(row, ["Position", "Title"])
|
|
64
|
+
connected_on = first_value(row, ["Connected On", "ConnectedOn"])
|
|
65
|
+
if not full_name:
|
|
66
|
+
continue
|
|
67
|
+
detail = ", ".join(part for part in [position, company, connected_on] if part)
|
|
68
|
+
lines.append(f"- {full_name}{f' - {detail}' if detail else ''}")
|
|
69
|
+
count += 1
|
|
70
|
+
if count:
|
|
71
|
+
(people_dir / "LinkedIn Connections.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
72
|
+
return count
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def message_record(row, file):
|
|
76
|
+
body = first_value(row, ["Content", "Message", "Body", "Text"])
|
|
77
|
+
if not body:
|
|
78
|
+
return None
|
|
79
|
+
timestamp = parse_timestamp(first_value(row, ["Date", "Created At", "Timestamp", "Sent Date"]))
|
|
80
|
+
if not timestamp:
|
|
81
|
+
return None
|
|
82
|
+
sender = first_value(row, ["From", "Sender", "Sender Name", "From Name"]) or "Unknown"
|
|
83
|
+
recipients = first_value(row, ["To", "Recipient", "Recipients", "To Name"]) or "Unknown"
|
|
84
|
+
explicit_conversation = first_value(row, ["Conversation Title", "Subject", "Conversation"])
|
|
85
|
+
conversation = explicit_conversation or other_party(sender, recipients)
|
|
86
|
+
return {
|
|
87
|
+
"id": f"linkedin-{file.name}-{timestamp}-{sender}-{hash(body)}",
|
|
88
|
+
"source": "LinkedIn data archive",
|
|
89
|
+
"sourceSystem": "LinkedIn",
|
|
90
|
+
"timestamp": timestamp,
|
|
91
|
+
"chatName": conversation,
|
|
92
|
+
"isGroup": "," in recipients,
|
|
93
|
+
"fromMe": False,
|
|
94
|
+
"author": sender,
|
|
95
|
+
"to": recipients,
|
|
96
|
+
"body": body,
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def other_party(sender, recipients):
|
|
101
|
+
if sender and sender.lower() not in {"me", "you"}:
|
|
102
|
+
return sender
|
|
103
|
+
if recipients and recipients.lower() not in {"me", "you"}:
|
|
104
|
+
return recipients.split(",")[0].strip()
|
|
105
|
+
return sender or recipients or "Unknown"
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def parse_timestamp(value):
|
|
109
|
+
if not value:
|
|
110
|
+
return ""
|
|
111
|
+
value = value.strip()
|
|
112
|
+
formats = [
|
|
113
|
+
"%Y-%m-%d %H:%M:%S",
|
|
114
|
+
"%Y-%m-%dT%H:%M:%S.%fZ",
|
|
115
|
+
"%Y-%m-%dT%H:%M:%SZ",
|
|
116
|
+
"%m/%d/%Y, %I:%M %p",
|
|
117
|
+
"%m/%d/%Y",
|
|
118
|
+
"%Y-%m-%d",
|
|
119
|
+
]
|
|
120
|
+
for fmt in formats:
|
|
121
|
+
try:
|
|
122
|
+
return datetime.strptime(value, fmt).replace(tzinfo=timezone.utc).isoformat()
|
|
123
|
+
except ValueError:
|
|
124
|
+
pass
|
|
125
|
+
try:
|
|
126
|
+
return datetime.fromisoformat(value.replace("Z", "+00:00")).astimezone(timezone.utc).isoformat()
|
|
127
|
+
except ValueError:
|
|
128
|
+
return ""
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def append_jsonl(raw_dir, record):
|
|
132
|
+
with (raw_dir / f"{record['timestamp'][:10]}.jsonl").open("a", encoding="utf-8") as f:
|
|
133
|
+
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def find_csvs(source, needle):
|
|
137
|
+
return sorted(path for path in source.rglob("*.csv") if needle.lower() in path.name.lower())
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def read_csv(path):
|
|
141
|
+
with path.open("r", encoding="utf-8-sig", newline="") as f:
|
|
142
|
+
return list(csv.DictReader(f))
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def first_value(row, keys):
|
|
146
|
+
for key in keys:
|
|
147
|
+
value = row.get(key)
|
|
148
|
+
if value:
|
|
149
|
+
return value.strip()
|
|
150
|
+
lower = {key.lower(): value for key, value in row.items()}
|
|
151
|
+
for key in keys:
|
|
152
|
+
value = lower.get(key.lower())
|
|
153
|
+
if value:
|
|
154
|
+
return value.strip()
|
|
155
|
+
return ""
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def load_seen(path):
|
|
159
|
+
if not path.exists():
|
|
160
|
+
return set()
|
|
161
|
+
try:
|
|
162
|
+
return set(json.loads(path.read_text(encoding="utf-8")))
|
|
163
|
+
except Exception:
|
|
164
|
+
return set()
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def save_seen(path, seen):
|
|
168
|
+
path.write_text(json.dumps(sorted(seen), indent=2), encoding="utf-8")
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def unpack(input_path):
|
|
172
|
+
input_path = input_path.resolve()
|
|
173
|
+
if input_path.is_dir():
|
|
174
|
+
return NullContext(input_path)
|
|
175
|
+
temp = tempfile.TemporaryDirectory()
|
|
176
|
+
with zipfile.ZipFile(input_path) as archive:
|
|
177
|
+
archive.extractall(temp.name)
|
|
178
|
+
return TempContext(Path(temp.name), temp)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class NullContext:
|
|
182
|
+
def __init__(self, path):
|
|
183
|
+
self.path = path
|
|
184
|
+
|
|
185
|
+
def __enter__(self):
|
|
186
|
+
return self.path
|
|
187
|
+
|
|
188
|
+
def __exit__(self, *_):
|
|
189
|
+
return False
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class TempContext:
|
|
193
|
+
def __init__(self, path, temp):
|
|
194
|
+
self.path = path
|
|
195
|
+
self.temp = temp
|
|
196
|
+
|
|
197
|
+
def __enter__(self):
|
|
198
|
+
return self.path
|
|
199
|
+
|
|
200
|
+
def __exit__(self, *_):
|
|
201
|
+
self.temp.cleanup()
|
|
202
|
+
return False
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def parse_args():
|
|
206
|
+
parser = argparse.ArgumentParser()
|
|
207
|
+
parser.add_argument("--vault", type=Path, required=True)
|
|
208
|
+
parser.add_argument("--input", type=Path, required=True)
|
|
209
|
+
parser.add_argument("--days", type=int, default=3650)
|
|
210
|
+
return parser.parse_args()
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
if __name__ == "__main__":
|
|
214
|
+
main()
|