digital-brain 0.1.3 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -7
- package/bin/digital-brain.js +275 -38
- package/docs/AUTOMATIONS.md +13 -4
- package/docs/INTEGRATIONS.md +72 -0
- package/docs/PRIVACY.md +3 -1
- package/docs/SETUP.md +78 -0
- package/examples/sample-vault/{04 People/Interpreted Relationships/Close Friend.md → 06 AI Memory/Generated Relationship Drafts/Close Friend (WhatsApp).md } +4 -3
- package/examples/sample-vault/{04 People/Interpreted Relationships/Mom.md → 06 AI Memory/Generated Relationship Drafts/Mom (WhatsApp).md } +4 -3
- package/examples/sample-vault/{08 Sources/WhatsApp/Analysis/Interpreted/Project Team.md → 06 AI Memory/Generated Relationship Drafts/Project Team (WhatsApp).md } +4 -3
- package/examples/sample-vault/06 AI Memory/Interpreted Relationship Memory.md +3 -3
- package/examples/sample-vault/06 AI Memory/Person Context Index.md +26 -0
- package/examples/sample-vault/06 AI Memory/Person Reply Context.md +26 -0
- package/examples/sample-vault/08 Sources/{WhatsApp/Analysis/Interpreted/Close Friend.md → Analysis/Interpreted/Close Friend (WhatsApp).md } +4 -3
- package/examples/sample-vault/08 Sources/{WhatsApp/Analysis/Interpreted/Mom.md → Analysis/Interpreted/Mom (WhatsApp).md } +4 -3
- package/examples/sample-vault/{04 People/Interpreted Relationships/Project Team.md → 08 Sources/Analysis/Interpreted/Project Team (WhatsApp).md } +4 -3
- package/examples/sample-vault/08 Sources/Analysis/Relationship Map.md +38 -0
- package/examples/sample-vault/08 Sources/Analysis/interpreted_relationship_models.json +175 -0
- package/examples/sample-vault/08 Sources/Analysis/person_identity_map.json +78 -0
- package/examples/sample-vault/08 Sources/Analysis/relationship_profiles.json +122 -0
- package/examples/sample-vault/08 Sources/WhatsApp/Analysis/Interpreted/Close Friend (WhatsApp).md +44 -0
- package/examples/sample-vault/08 Sources/WhatsApp/Analysis/Interpreted/Mom (WhatsApp).md +45 -0
- package/examples/sample-vault/08 Sources/WhatsApp/Analysis/Interpreted/Project Team (WhatsApp).md +45 -0
- package/examples/sample-vault/08 Sources/WhatsApp/Analysis/Relationship Map.md +9 -3
- package/examples/sample-vault/08 Sources/WhatsApp/Analysis/interpreted_relationship_models.json +18 -0
- package/examples/sample-vault/08 Sources/WhatsApp/Analysis/person_identity_map.json +78 -0
- package/examples/sample-vault/08 Sources/WhatsApp/Analysis/relationship_profiles.json +18 -0
- package/examples/sample-vault/08 Sources/WhatsApp/Raw/2026-01-01.jsonl +6 -6
- package/lib/fs.js +33 -0
- package/package.json +2 -1
- package/scripts/digital_brain_imessage_sync.py +175 -0
- package/scripts/digital_brain_linkedin_export_import.py +214 -0
- package/scripts/digital_brain_relationship_extractor.py +189 -12
- package/scripts/digital_brain_relationship_interpreter.py +104 -15
- package/scripts/digital_brain_slack_export_import.py +181 -0
- package/scripts/digital_brain_whatsapp_mac_sync.py +37 -8
- package/templates/vault/00 Home/How AI Should Use This Vault.md +1 -1
- package/templates/vault/00 Home/Start Here.md +2 -1
- package/templates/vault/04 People/Relationship Overrides.md +2 -1
- package/templates/vault/06 AI Memory/Generated Relationship Drafts/README.md +5 -0
- package/templates/vault/06 AI Memory/Interpreted Relationship Memory.md +1 -2
- package/templates/vault/06 AI Memory/Person Context Index.md +4 -0
- package/templates/vault/06 AI Memory/Person Reply Context.md +4 -0
- package/templates/vault/08 Sources/README.md +5 -0
- package/templates/vault/08 Sources/WhatsApp/Outbound/README.md +2 -2
- package/templates/vault/AGENTS.md +5 -1
- package/templates/vault/CLAUDE.md +3 -0
- package/templates/vault/GEMINI.md +4 -0
- package/whatsapp-web/send.mjs +32 -5
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import argparse
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import sqlite3
|
|
6
|
+
import time
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
CORE_DATA_EPOCH_OFFSET = 978_307_200
|
|
11
|
+
DEFAULT_DB = Path.home() / "Library" / "Messages" / "chat.db"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def main():
|
|
15
|
+
args = parse_args()
|
|
16
|
+
if not args.db.exists():
|
|
17
|
+
raise SystemExit(
|
|
18
|
+
f"Apple Messages database not found: {args.db}\n"
|
|
19
|
+
"Open Messages on macOS and grant Terminal Full Disk Access if macOS blocks access."
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
vault = args.vault.resolve()
|
|
23
|
+
source_dir = vault / "08 Sources" / "iMessage"
|
|
24
|
+
raw_dir = source_dir / "Raw"
|
|
25
|
+
chats_dir = source_dir / "ChatsByMonth"
|
|
26
|
+
state_dir = source_dir / ".sync-state"
|
|
27
|
+
for directory in (raw_dir, chats_dir, state_dir):
|
|
28
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
|
|
30
|
+
seen_path = state_dir / "imessage-seen-message-ids.json"
|
|
31
|
+
seen = load_seen(seen_path)
|
|
32
|
+
added = sync_once(args, seen, raw_dir, chats_dir)
|
|
33
|
+
save_seen(seen_path, seen)
|
|
34
|
+
print(f"Imported {added} iMessage messages.")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def sync_once(args, seen, raw_dir, chats_dir):
|
|
38
|
+
cutoff = datetime.now(timezone.utc).timestamp() - args.days * 24 * 60 * 60 if args.days else None
|
|
39
|
+
conn = sqlite3.connect(f"file:{args.db}?mode=ro", uri=True)
|
|
40
|
+
conn.row_factory = sqlite3.Row
|
|
41
|
+
rows = conn.execute(
|
|
42
|
+
"""
|
|
43
|
+
SELECT m.ROWID message_pk, m.guid guid, m.date message_date,
|
|
44
|
+
m.is_from_me is_from_me, m.text text, m.service service,
|
|
45
|
+
h.id handle_id,
|
|
46
|
+
c.ROWID chat_pk, c.display_name display_name, c.chat_identifier chat_identifier
|
|
47
|
+
FROM message m
|
|
48
|
+
LEFT JOIN handle h ON h.ROWID = m.handle_id
|
|
49
|
+
LEFT JOIN chat_message_join cmj ON cmj.message_id = m.ROWID
|
|
50
|
+
LEFT JOIN chat c ON c.ROWID = cmj.chat_id
|
|
51
|
+
WHERE m.text IS NOT NULL
|
|
52
|
+
AND length(m.text) > 0
|
|
53
|
+
ORDER BY m.date ASC, m.ROWID ASC
|
|
54
|
+
"""
|
|
55
|
+
).fetchall()
|
|
56
|
+
conn.close()
|
|
57
|
+
|
|
58
|
+
added = 0
|
|
59
|
+
for row in rows:
|
|
60
|
+
record = row_to_record(row, args.self_name, args.privacy_mode)
|
|
61
|
+
if cutoff and datetime.fromisoformat(record["timestamp"]).timestamp() < cutoff:
|
|
62
|
+
continue
|
|
63
|
+
if args.chat and args.chat.lower() not in record["chatName"].lower():
|
|
64
|
+
continue
|
|
65
|
+
if record["id"] in seen:
|
|
66
|
+
continue
|
|
67
|
+
append_jsonl(raw_dir, record)
|
|
68
|
+
append_markdown(chats_dir, record, args.markdown_mode)
|
|
69
|
+
seen.add(record["id"])
|
|
70
|
+
added += 1
|
|
71
|
+
return added
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def row_to_record(row, self_name, privacy_mode):
|
|
75
|
+
timestamp = apple_timestamp(row["message_date"])
|
|
76
|
+
body = row["text"] or ""
|
|
77
|
+
from_me = bool(row["is_from_me"])
|
|
78
|
+
chat_name = row["display_name"] or row["chat_identifier"] or row["handle_id"] or "iMessage"
|
|
79
|
+
return {
|
|
80
|
+
"id": compound_id(row, timestamp),
|
|
81
|
+
"source": "Apple Messages chat.db",
|
|
82
|
+
"sourceSystem": "iMessage",
|
|
83
|
+
"timestamp": timestamp,
|
|
84
|
+
"chatPk": row["chat_pk"],
|
|
85
|
+
"chatName": chat_name,
|
|
86
|
+
"chatIdentifier": row["chat_identifier"],
|
|
87
|
+
"isGroup": bool(row["display_name"]) or str(row["chat_identifier"] or "").startswith("chat"),
|
|
88
|
+
"fromMe": from_me,
|
|
89
|
+
"author": self_name if from_me else (row["handle_id"] or "Unknown"),
|
|
90
|
+
"handleId": row["handle_id"],
|
|
91
|
+
"service": row["service"],
|
|
92
|
+
"body": "" if privacy_mode == "metadata-only" else body,
|
|
93
|
+
"bodyHash": hashlib.sha256(body.encode("utf-8")).hexdigest() if privacy_mode == "metadata-only" else "",
|
|
94
|
+
"bodyCharCount": len(body),
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def apple_timestamp(value):
|
|
99
|
+
raw = float(value or 0)
|
|
100
|
+
seconds = raw / 1_000_000_000 if abs(raw) > 10_000_000_000 else raw
|
|
101
|
+
return datetime.fromtimestamp(seconds + CORE_DATA_EPOCH_OFFSET, tz=timezone.utc).isoformat()
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def compound_id(row, timestamp):
|
|
105
|
+
return "::".join([
|
|
106
|
+
"imessage",
|
|
107
|
+
str(row["chat_pk"] or row["chat_identifier"] or row["handle_id"] or "unknown-chat"),
|
|
108
|
+
str(row["guid"] or "no-guid"),
|
|
109
|
+
str(row["message_pk"] or "no-pk"),
|
|
110
|
+
timestamp,
|
|
111
|
+
])
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def append_jsonl(raw_dir, record):
|
|
115
|
+
with (raw_dir / f"{record['timestamp'][:10]}.jsonl").open("a", encoding="utf-8") as f:
|
|
116
|
+
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def append_markdown(chats_dir, record, mode):
|
|
120
|
+
if mode == "none":
|
|
121
|
+
return
|
|
122
|
+
directory = chats_dir / record["timestamp"][:7]
|
|
123
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
124
|
+
file_path = directory / f"{safe_filename(record['chatName'])}.md"
|
|
125
|
+
if not file_path.exists():
|
|
126
|
+
write_text_atomic(file_path, f"# {escape_markdown(record['chatName'])}\n\nSynced from Apple Messages.\n\n")
|
|
127
|
+
speaker = escape_markdown(record["author"])
|
|
128
|
+
body = escape_markdown(" ".join(record["body"].split()))
|
|
129
|
+
with file_path.open("a", encoding="utf-8") as f:
|
|
130
|
+
f.write(f"- {record['timestamp']} | {speaker}: {body}\n")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def load_seen(path):
|
|
134
|
+
if not path.exists():
|
|
135
|
+
return set()
|
|
136
|
+
try:
|
|
137
|
+
return set(json.loads(path.read_text(encoding="utf-8")))
|
|
138
|
+
except Exception:
|
|
139
|
+
return set()
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def save_seen(path, seen):
|
|
143
|
+
write_text_atomic(path, json.dumps(sorted(seen), indent=2))
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def safe_filename(value):
|
|
147
|
+
cleaned = "".join("-" if char in '/:\\?%*"<>|' else char for char in value)
|
|
148
|
+
return (" ".join(cleaned.split()).strip() or "iMessage")[:120]
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def escape_markdown(value):
|
|
152
|
+
text = str(value).replace("\n", " ").replace("\r", " ")
|
|
153
|
+
return text.replace("\\", "\\\\").replace("[", "\\[").replace("]", "\\]").replace("|", "\\|")
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def write_text_atomic(path, content):
|
|
157
|
+
temp = path.with_name(f"{path.name}.{time.time_ns()}.tmp")
|
|
158
|
+
temp.write_text(content, encoding="utf-8")
|
|
159
|
+
temp.replace(path)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def parse_args():
|
|
163
|
+
parser = argparse.ArgumentParser()
|
|
164
|
+
parser.add_argument("--vault", type=Path, required=True)
|
|
165
|
+
parser.add_argument("--days", type=int, default=30)
|
|
166
|
+
parser.add_argument("--chat", default="")
|
|
167
|
+
parser.add_argument("--db", type=Path, default=DEFAULT_DB)
|
|
168
|
+
parser.add_argument("--self-name", default="Me")
|
|
169
|
+
parser.add_argument("--markdown-mode", choices=["chat", "month", "none"], default="none")
|
|
170
|
+
parser.add_argument("--privacy-mode", choices=["standard", "metadata-only"], default="standard")
|
|
171
|
+
return parser.parse_args()
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
if __name__ == "__main__":
|
|
175
|
+
main()
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import argparse
|
|
3
|
+
import csv
|
|
4
|
+
import json
|
|
5
|
+
import tempfile
|
|
6
|
+
import zipfile
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def main():
|
|
12
|
+
args = parse_args()
|
|
13
|
+
with unpack(args.input) as source:
|
|
14
|
+
import_archive(args.vault.resolve(), source, args.days)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def import_archive(vault, source, days):
|
|
18
|
+
linkedin = vault / "08 Sources" / "LinkedIn"
|
|
19
|
+
raw_dir = linkedin / "Raw"
|
|
20
|
+
state_dir = linkedin / ".sync-state"
|
|
21
|
+
people_dir = vault / "04 People"
|
|
22
|
+
for directory in (raw_dir, state_dir, people_dir):
|
|
23
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
24
|
+
|
|
25
|
+
cutoff = datetime.now(timezone.utc).timestamp() - days * 24 * 60 * 60 if days else None
|
|
26
|
+
seen_path = state_dir / "linkedin-seen-message-ids.json"
|
|
27
|
+
seen = load_seen(seen_path)
|
|
28
|
+
imported_messages = 0
|
|
29
|
+
imported_connections = write_connections(source, people_dir)
|
|
30
|
+
|
|
31
|
+
for file in find_csvs(source, "message"):
|
|
32
|
+
for row in read_csv(file):
|
|
33
|
+
record = message_record(row, file)
|
|
34
|
+
if not record:
|
|
35
|
+
continue
|
|
36
|
+
if cutoff and datetime.fromisoformat(record["timestamp"]).timestamp() < cutoff:
|
|
37
|
+
continue
|
|
38
|
+
if record["id"] in seen:
|
|
39
|
+
continue
|
|
40
|
+
append_jsonl(raw_dir, record)
|
|
41
|
+
seen.add(record["id"])
|
|
42
|
+
imported_messages += 1
|
|
43
|
+
|
|
44
|
+
save_seen(seen_path, seen)
|
|
45
|
+
print(f"Imported {imported_messages} LinkedIn messages.")
|
|
46
|
+
print(f"Indexed {imported_connections} LinkedIn connections.")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def write_connections(source, people_dir):
|
|
50
|
+
files = find_csvs(source, "connection")
|
|
51
|
+
if not files:
|
|
52
|
+
return 0
|
|
53
|
+
rows = []
|
|
54
|
+
for file in files:
|
|
55
|
+
rows.extend(read_csv(file))
|
|
56
|
+
lines = ["# LinkedIn Connections", "", "Imported from LinkedIn data archive.", ""]
|
|
57
|
+
count = 0
|
|
58
|
+
for row in rows:
|
|
59
|
+
name = first_value(row, ["First Name", "FirstName", "First name"])
|
|
60
|
+
last = first_value(row, ["Last Name", "LastName", "Last name"])
|
|
61
|
+
full_name = " ".join(part for part in [name, last] if part).strip() or first_value(row, ["Name", "Full Name"])
|
|
62
|
+
company = first_value(row, ["Company", "Company Name"])
|
|
63
|
+
position = first_value(row, ["Position", "Title"])
|
|
64
|
+
connected_on = first_value(row, ["Connected On", "ConnectedOn"])
|
|
65
|
+
if not full_name:
|
|
66
|
+
continue
|
|
67
|
+
detail = ", ".join(part for part in [position, company, connected_on] if part)
|
|
68
|
+
lines.append(f"- {full_name}{f' - {detail}' if detail else ''}")
|
|
69
|
+
count += 1
|
|
70
|
+
if count:
|
|
71
|
+
(people_dir / "LinkedIn Connections.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
72
|
+
return count
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def message_record(row, file):
|
|
76
|
+
body = first_value(row, ["Content", "Message", "Body", "Text"])
|
|
77
|
+
if not body:
|
|
78
|
+
return None
|
|
79
|
+
timestamp = parse_timestamp(first_value(row, ["Date", "Created At", "Timestamp", "Sent Date"]))
|
|
80
|
+
if not timestamp:
|
|
81
|
+
return None
|
|
82
|
+
sender = first_value(row, ["From", "Sender", "Sender Name", "From Name"]) or "Unknown"
|
|
83
|
+
recipients = first_value(row, ["To", "Recipient", "Recipients", "To Name"]) or "Unknown"
|
|
84
|
+
explicit_conversation = first_value(row, ["Conversation Title", "Subject", "Conversation"])
|
|
85
|
+
conversation = explicit_conversation or other_party(sender, recipients)
|
|
86
|
+
return {
|
|
87
|
+
"id": f"linkedin-{file.name}-{timestamp}-{sender}-{hash(body)}",
|
|
88
|
+
"source": "LinkedIn data archive",
|
|
89
|
+
"sourceSystem": "LinkedIn",
|
|
90
|
+
"timestamp": timestamp,
|
|
91
|
+
"chatName": conversation,
|
|
92
|
+
"isGroup": "," in recipients,
|
|
93
|
+
"fromMe": False,
|
|
94
|
+
"author": sender,
|
|
95
|
+
"to": recipients,
|
|
96
|
+
"body": body,
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def other_party(sender, recipients):
|
|
101
|
+
if sender and sender.lower() not in {"me", "you"}:
|
|
102
|
+
return sender
|
|
103
|
+
if recipients and recipients.lower() not in {"me", "you"}:
|
|
104
|
+
return recipients.split(",")[0].strip()
|
|
105
|
+
return sender or recipients or "Unknown"
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def parse_timestamp(value):
|
|
109
|
+
if not value:
|
|
110
|
+
return ""
|
|
111
|
+
value = value.strip()
|
|
112
|
+
formats = [
|
|
113
|
+
"%Y-%m-%d %H:%M:%S",
|
|
114
|
+
"%Y-%m-%dT%H:%M:%S.%fZ",
|
|
115
|
+
"%Y-%m-%dT%H:%M:%SZ",
|
|
116
|
+
"%m/%d/%Y, %I:%M %p",
|
|
117
|
+
"%m/%d/%Y",
|
|
118
|
+
"%Y-%m-%d",
|
|
119
|
+
]
|
|
120
|
+
for fmt in formats:
|
|
121
|
+
try:
|
|
122
|
+
return datetime.strptime(value, fmt).replace(tzinfo=timezone.utc).isoformat()
|
|
123
|
+
except ValueError:
|
|
124
|
+
pass
|
|
125
|
+
try:
|
|
126
|
+
return datetime.fromisoformat(value.replace("Z", "+00:00")).astimezone(timezone.utc).isoformat()
|
|
127
|
+
except ValueError:
|
|
128
|
+
return ""
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def append_jsonl(raw_dir, record):
|
|
132
|
+
with (raw_dir / f"{record['timestamp'][:10]}.jsonl").open("a", encoding="utf-8") as f:
|
|
133
|
+
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def find_csvs(source, needle):
|
|
137
|
+
return sorted(path for path in source.rglob("*.csv") if needle.lower() in path.name.lower())
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def read_csv(path):
|
|
141
|
+
with path.open("r", encoding="utf-8-sig", newline="") as f:
|
|
142
|
+
return list(csv.DictReader(f))
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def first_value(row, keys):
|
|
146
|
+
for key in keys:
|
|
147
|
+
value = row.get(key)
|
|
148
|
+
if value:
|
|
149
|
+
return value.strip()
|
|
150
|
+
lower = {key.lower(): value for key, value in row.items()}
|
|
151
|
+
for key in keys:
|
|
152
|
+
value = lower.get(key.lower())
|
|
153
|
+
if value:
|
|
154
|
+
return value.strip()
|
|
155
|
+
return ""
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def load_seen(path):
|
|
159
|
+
if not path.exists():
|
|
160
|
+
return set()
|
|
161
|
+
try:
|
|
162
|
+
return set(json.loads(path.read_text(encoding="utf-8")))
|
|
163
|
+
except Exception:
|
|
164
|
+
return set()
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def save_seen(path, seen):
|
|
168
|
+
path.write_text(json.dumps(sorted(seen), indent=2), encoding="utf-8")
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def unpack(input_path):
|
|
172
|
+
input_path = input_path.resolve()
|
|
173
|
+
if input_path.is_dir():
|
|
174
|
+
return NullContext(input_path)
|
|
175
|
+
temp = tempfile.TemporaryDirectory()
|
|
176
|
+
with zipfile.ZipFile(input_path) as archive:
|
|
177
|
+
archive.extractall(temp.name)
|
|
178
|
+
return TempContext(Path(temp.name), temp)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class NullContext:
|
|
182
|
+
def __init__(self, path):
|
|
183
|
+
self.path = path
|
|
184
|
+
|
|
185
|
+
def __enter__(self):
|
|
186
|
+
return self.path
|
|
187
|
+
|
|
188
|
+
def __exit__(self, *_):
|
|
189
|
+
return False
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class TempContext:
|
|
193
|
+
def __init__(self, path, temp):
|
|
194
|
+
self.path = path
|
|
195
|
+
self.temp = temp
|
|
196
|
+
|
|
197
|
+
def __enter__(self):
|
|
198
|
+
return self.path
|
|
199
|
+
|
|
200
|
+
def __exit__(self, *_):
|
|
201
|
+
self.temp.cleanup()
|
|
202
|
+
return False
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def parse_args():
|
|
206
|
+
parser = argparse.ArgumentParser()
|
|
207
|
+
parser.add_argument("--vault", type=Path, required=True)
|
|
208
|
+
parser.add_argument("--input", type=Path, required=True)
|
|
209
|
+
parser.add_argument("--days", type=int, default=3650)
|
|
210
|
+
return parser.parse_args()
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
if __name__ == "__main__":
|
|
214
|
+
main()
|
|
@@ -3,6 +3,7 @@ import argparse
|
|
|
3
3
|
import json
|
|
4
4
|
import math
|
|
5
5
|
import re
|
|
6
|
+
import time
|
|
6
7
|
from collections import Counter, defaultdict
|
|
7
8
|
from datetime import datetime, timezone
|
|
8
9
|
from pathlib import Path
|
|
@@ -17,26 +18,36 @@ SLANG = {"lol", "lmao", "haha", "hahaha", "bro", "bruh", "wtf", "omg", "ngl", "i
|
|
|
17
18
|
def main():
|
|
18
19
|
args = parse_args()
|
|
19
20
|
vault = args.vault.resolve()
|
|
20
|
-
|
|
21
|
-
output_dir =
|
|
21
|
+
sources = vault / "08 Sources"
|
|
22
|
+
output_dir = sources / "Analysis"
|
|
22
23
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
23
|
-
messages = load_messages(
|
|
24
|
+
messages = load_messages(sources, args.days)
|
|
24
25
|
profiles = build_profiles(messages, args.min_messages)
|
|
25
|
-
|
|
26
|
+
people = build_people(profiles)
|
|
27
|
+
write_json_atomic(output_dir / "relationship_profiles.json", profiles)
|
|
28
|
+
write_json_atomic(output_dir / "person_identity_map.json", people)
|
|
26
29
|
write_markdown(output_dir / "Relationship Map.md", profiles, args.days)
|
|
30
|
+
write_people_memory(vault / "06 AI Memory" / "Person Context Index.md", people, args.days)
|
|
31
|
+
write_legacy_whatsapp_outputs(vault, output_dir)
|
|
27
32
|
print(f"Analyzed {len(messages)} messages.")
|
|
28
33
|
print(f"Wrote {len(profiles)} relationship profiles.")
|
|
34
|
+
print(f"Wrote {len(people)} canonical person records.")
|
|
29
35
|
|
|
30
36
|
|
|
31
|
-
def load_messages(
|
|
37
|
+
def load_messages(sources_dir, days):
|
|
32
38
|
cutoff = datetime.now(timezone.utc).timestamp() - days * 24 * 60 * 60 if days else None
|
|
33
39
|
messages = []
|
|
34
|
-
|
|
40
|
+
raw_files = sorted(sources_dir.glob("*/Raw/*.jsonl"))
|
|
41
|
+
for path in raw_files:
|
|
35
42
|
with path.open("r", encoding="utf-8") as f:
|
|
36
43
|
for line in f:
|
|
37
44
|
if not line.strip():
|
|
38
45
|
continue
|
|
39
|
-
|
|
46
|
+
try:
|
|
47
|
+
record = json.loads(line)
|
|
48
|
+
except json.JSONDecodeError:
|
|
49
|
+
print(f"Skipping corrupt JSONL line in {path}")
|
|
50
|
+
continue
|
|
40
51
|
dt = datetime.fromisoformat(record["timestamp"].replace("Z", "+00:00"))
|
|
41
52
|
if cutoff and dt.timestamp() < cutoff:
|
|
42
53
|
continue
|
|
@@ -48,13 +59,15 @@ def load_messages(raw_dir, days):
|
|
|
48
59
|
def build_profiles(messages, min_messages):
|
|
49
60
|
by_chat = defaultdict(list)
|
|
50
61
|
for message in messages:
|
|
51
|
-
|
|
52
|
-
|
|
62
|
+
key = f"{message.get('sourceSystem') or source_system(message)}::{message.get('chatName') or 'Unknown Chat'}"
|
|
63
|
+
by_chat[key].append(message)
|
|
64
|
+
profiles = [profile_chat(key, items) for key, items in by_chat.items() if len(items) >= min_messages]
|
|
53
65
|
profiles.sort(key=lambda p: (p["messageCount"], p["lastSeen"]), reverse=True)
|
|
54
66
|
return profiles
|
|
55
67
|
|
|
56
68
|
|
|
57
|
-
def profile_chat(
|
|
69
|
+
def profile_chat(chat_key, messages):
|
|
70
|
+
source, chat_name = split_chat_key(chat_key)
|
|
58
71
|
count = len(messages)
|
|
59
72
|
outbound = sum(1 for m in messages if m.get("fromMe"))
|
|
60
73
|
inbound = count - outbound
|
|
@@ -72,8 +85,15 @@ def profile_chat(chat_name, messages):
|
|
|
72
85
|
sentiment = normalized_sentiment(positive, negative, count)
|
|
73
86
|
tags = infer_tags(any(m.get("isGroup") for m in messages), count, inbound, outbound, warmth, friction, operational, work, logistics, text.count("?"))
|
|
74
87
|
guess = infer_relationship(tags, count, warmth, friction, operational, work, outbound / count)
|
|
88
|
+
identity = infer_identity(source, chat_name, messages)
|
|
75
89
|
return {
|
|
76
90
|
"chatName": chat_name,
|
|
91
|
+
"sourceSystem": source,
|
|
92
|
+
"displayName": f"{chat_name} ({source})",
|
|
93
|
+
"identityName": identity["name"],
|
|
94
|
+
"canonicalPersonKey": identity["key"],
|
|
95
|
+
"identityConfidence": identity["confidence"],
|
|
96
|
+
"identityEvidence": identity["evidence"],
|
|
77
97
|
"messageCount": count,
|
|
78
98
|
"inbound": inbound,
|
|
79
99
|
"outbound": outbound,
|
|
@@ -130,12 +150,110 @@ def infer_relationship(tags, count, warmth, friction, operational, work, balance
|
|
|
130
150
|
return "general relationship, needs human labeling"
|
|
131
151
|
|
|
132
152
|
|
|
153
|
+
def infer_identity(source, chat_name, messages):
|
|
154
|
+
is_group = any(m.get("isGroup") for m in messages)
|
|
155
|
+
if is_group:
|
|
156
|
+
return {
|
|
157
|
+
"name": chat_name,
|
|
158
|
+
"key": f"group::{source.lower()}::{normalize_identity(chat_name)}",
|
|
159
|
+
"confidence": "medium",
|
|
160
|
+
"evidence": "group chat kept source-specific",
|
|
161
|
+
}
|
|
162
|
+
candidates = []
|
|
163
|
+
if source in {"Slack", "LinkedIn"}:
|
|
164
|
+
candidates.extend((m.get("author") or "").strip() for m in messages if not m.get("fromMe"))
|
|
165
|
+
if source == "LinkedIn":
|
|
166
|
+
candidates.extend((m.get("to") or "").split(",")[0].strip() for m in messages if m.get("fromMe"))
|
|
167
|
+
candidates.append(chat_name)
|
|
168
|
+
name = best_identity_name(candidates) or chat_name
|
|
169
|
+
key = f"person::{normalize_identity(name)}"
|
|
170
|
+
confidence = "medium" if normalize_identity(name) == normalize_identity(chat_name) else "low"
|
|
171
|
+
if source in {"Slack", "LinkedIn"} and normalize_identity(name) != normalize_identity(chat_name):
|
|
172
|
+
confidence = "medium"
|
|
173
|
+
return {
|
|
174
|
+
"name": name,
|
|
175
|
+
"key": key,
|
|
176
|
+
"confidence": confidence,
|
|
177
|
+
"evidence": f"{source} direct chat identity",
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def best_identity_name(candidates):
|
|
182
|
+
cleaned = [candidate for candidate in candidates if usable_identity_name(candidate)]
|
|
183
|
+
if not cleaned:
|
|
184
|
+
return ""
|
|
185
|
+
counts = Counter(normalize_identity(candidate) for candidate in cleaned)
|
|
186
|
+
best_key, _ = counts.most_common(1)[0]
|
|
187
|
+
for candidate in cleaned:
|
|
188
|
+
if normalize_identity(candidate) == best_key:
|
|
189
|
+
return candidate
|
|
190
|
+
return cleaned[0]
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def usable_identity_name(value):
|
|
194
|
+
if not value:
|
|
195
|
+
return False
|
|
196
|
+
normalized = normalize_identity(value)
|
|
197
|
+
if not normalized or normalized in {"me", "you", "unknown", "unknown chat", "imessage"}:
|
|
198
|
+
return False
|
|
199
|
+
return True
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def normalize_identity(value):
|
|
203
|
+
text = str(value or "").lower()
|
|
204
|
+
text = re.sub(r"<[^>]+>", " ", text)
|
|
205
|
+
text = re.sub(r"https?://\S+", " ", text)
|
|
206
|
+
text = re.sub(r"[^a-z0-9@+]+", " ", text)
|
|
207
|
+
return " ".join(text.split())
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def build_people(profiles):
|
|
211
|
+
grouped = defaultdict(list)
|
|
212
|
+
for profile in profiles:
|
|
213
|
+
key = profile.get("canonicalPersonKey")
|
|
214
|
+
if key and not key.startswith("group::"):
|
|
215
|
+
grouped[key].append(profile)
|
|
216
|
+
people = []
|
|
217
|
+
for key, items in grouped.items():
|
|
218
|
+
items.sort(key=lambda item: (item["messageCount"], item["lastSeen"]), reverse=True)
|
|
219
|
+
sources = sorted({item["sourceSystem"] for item in items})
|
|
220
|
+
names = [item.get("identityName") or item["chatName"] for item in items]
|
|
221
|
+
people.append({
|
|
222
|
+
"canonicalPersonKey": key,
|
|
223
|
+
"displayName": names[0],
|
|
224
|
+
"aliases": sorted({name for name in names if name}),
|
|
225
|
+
"sources": sources,
|
|
226
|
+
"sourceProfiles": [
|
|
227
|
+
{
|
|
228
|
+
"sourceSystem": item["sourceSystem"],
|
|
229
|
+
"chatName": item["chatName"],
|
|
230
|
+
"displayName": item["displayName"],
|
|
231
|
+
"messageCount": item["messageCount"],
|
|
232
|
+
"firstSeen": item["firstSeen"],
|
|
233
|
+
"lastSeen": item["lastSeen"],
|
|
234
|
+
"relationshipGuess": item["relationshipGuess"],
|
|
235
|
+
"typingStyle": item["typingStyle"],
|
|
236
|
+
"identityConfidence": item["identityConfidence"],
|
|
237
|
+
"identityEvidence": item["identityEvidence"],
|
|
238
|
+
}
|
|
239
|
+
for item in items
|
|
240
|
+
],
|
|
241
|
+
"totalMessages": sum(item["messageCount"] for item in items),
|
|
242
|
+
"firstSeen": min(item["firstSeen"] for item in items),
|
|
243
|
+
"lastSeen": max(item["lastSeen"] for item in items),
|
|
244
|
+
})
|
|
245
|
+
people.sort(key=lambda person: (len(person["sources"]), person["totalMessages"], person["lastSeen"]), reverse=True)
|
|
246
|
+
return people
|
|
247
|
+
|
|
248
|
+
|
|
133
249
|
def write_markdown(path, profiles, days):
|
|
134
250
|
lines = ["# Relationship Map", "", f"Window: last {days} days", "", "Generated signals. Treat as editable working notes.", ""]
|
|
135
251
|
for profile in profiles:
|
|
136
252
|
lines.extend([
|
|
137
|
-
f"## {profile['
|
|
253
|
+
f"## {profile['displayName']}",
|
|
138
254
|
"",
|
|
255
|
+
f"- Source: {profile['sourceSystem']}",
|
|
256
|
+
f"- Canonical person: {profile['identityName']} ({profile['canonicalPersonKey']})",
|
|
139
257
|
f"- Guess: {profile['relationshipGuess']}",
|
|
140
258
|
f"- Messages: {profile['messageCount']} ({profile['inbound']} inbound, {profile['outbound']} outbound)",
|
|
141
259
|
f"- Dates: {profile['firstSeen']} to {profile['lastSeen']}",
|
|
@@ -144,7 +262,66 @@ def write_markdown(path, profiles, days):
|
|
|
144
262
|
f"- Typing style: {typing_style_summary(profile['typingStyle'])}",
|
|
145
263
|
"",
|
|
146
264
|
])
|
|
147
|
-
path
|
|
265
|
+
write_text_atomic(path, "\n".join(lines))
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def write_people_memory(path, people, days):
|
|
269
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
270
|
+
lines = ["# Person Context Index", "", f"Window: last {days} days", "", "Canonical people matched across sources. Treat matches as provisional unless manually confirmed.", ""]
|
|
271
|
+
for person in people:
|
|
272
|
+
lines.extend([
|
|
273
|
+
f"## {person['displayName']}",
|
|
274
|
+
"",
|
|
275
|
+
f"- Canonical key: `{person['canonicalPersonKey']}`",
|
|
276
|
+
f"- Aliases: {', '.join(person['aliases'])}",
|
|
277
|
+
f"- Sources: {', '.join(person['sources'])}",
|
|
278
|
+
f"- Messages: {person['totalMessages']}",
|
|
279
|
+
f"- Dates: {person['firstSeen']} to {person['lastSeen']}",
|
|
280
|
+
"- Source-specific context:",
|
|
281
|
+
])
|
|
282
|
+
for source in person["sourceProfiles"]:
|
|
283
|
+
style = typing_style_summary(source.get("typingStyle", {}))
|
|
284
|
+
lines.append(f" - {source['sourceSystem']} / {source['chatName']}: {source['relationshipGuess']}; {source['messageCount']} messages; style {style}")
|
|
285
|
+
lines.append("")
|
|
286
|
+
write_text_atomic(path, "\n".join(lines) + "\n")
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def write_legacy_whatsapp_outputs(vault, output_dir):
|
|
290
|
+
legacy_dir = vault / "08 Sources" / "WhatsApp" / "Analysis"
|
|
291
|
+
legacy_dir.mkdir(parents=True, exist_ok=True)
|
|
292
|
+
for name in ("relationship_profiles.json", "person_identity_map.json", "Relationship Map.md"):
|
|
293
|
+
source = output_dir / name
|
|
294
|
+
target = legacy_dir / name
|
|
295
|
+
if source.exists():
|
|
296
|
+
write_text_atomic(target, source.read_text(encoding="utf-8"))
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def write_json_atomic(path, data):
|
|
300
|
+
write_text_atomic(path, json.dumps(data, indent=2, ensure_ascii=False))
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def write_text_atomic(path, content):
|
|
304
|
+
temp = path.with_name(f"{path.name}.{time.time_ns()}.tmp")
|
|
305
|
+
temp.write_text(content, encoding="utf-8")
|
|
306
|
+
temp.replace(path)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def source_system(message):
|
|
310
|
+
source = message.get("source") or ""
|
|
311
|
+
if "Slack" in source:
|
|
312
|
+
return "Slack"
|
|
313
|
+
if "LinkedIn" in source:
|
|
314
|
+
return "LinkedIn"
|
|
315
|
+
if "WhatsApp" in source:
|
|
316
|
+
return "WhatsApp"
|
|
317
|
+
return "Unknown"
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def split_chat_key(key):
|
|
321
|
+
if "::" not in key:
|
|
322
|
+
return "Unknown", key
|
|
323
|
+
source, chat_name = key.split("::", 1)
|
|
324
|
+
return source, chat_name
|
|
148
325
|
|
|
149
326
|
|
|
150
327
|
def score(words, lexicon):
|