digital-brain 0.1.3 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +38 -7
  2. package/bin/digital-brain.js +275 -38
  3. package/docs/AUTOMATIONS.md +13 -4
  4. package/docs/INTEGRATIONS.md +72 -0
  5. package/docs/PRIVACY.md +3 -1
  6. package/docs/SETUP.md +78 -0
  7. package/examples/sample-vault/{04 People/Interpreted Relationships/Close Friend.md → 06 AI Memory/Generated Relationship Drafts/Close Friend (WhatsApp).md } +4 -3
  8. package/examples/sample-vault/{04 People/Interpreted Relationships/Mom.md → 06 AI Memory/Generated Relationship Drafts/Mom (WhatsApp).md } +4 -3
  9. package/examples/sample-vault/{08 Sources/WhatsApp/Analysis/Interpreted/Project Team.md → 06 AI Memory/Generated Relationship Drafts/Project Team (WhatsApp).md } +4 -3
  10. package/examples/sample-vault/06 AI Memory/Interpreted Relationship Memory.md +3 -3
  11. package/examples/sample-vault/06 AI Memory/Person Context Index.md +26 -0
  12. package/examples/sample-vault/06 AI Memory/Person Reply Context.md +26 -0
  13. package/examples/sample-vault/08 Sources/{WhatsApp/Analysis/Interpreted/Close Friend.md → Analysis/Interpreted/Close Friend (WhatsApp).md } +4 -3
  14. package/examples/sample-vault/08 Sources/{WhatsApp/Analysis/Interpreted/Mom.md → Analysis/Interpreted/Mom (WhatsApp).md } +4 -3
  15. package/examples/sample-vault/{04 People/Interpreted Relationships/Project Team.md → 08 Sources/Analysis/Interpreted/Project Team (WhatsApp).md } +4 -3
  16. package/examples/sample-vault/08 Sources/Analysis/Relationship Map.md +38 -0
  17. package/examples/sample-vault/08 Sources/Analysis/interpreted_relationship_models.json +175 -0
  18. package/examples/sample-vault/08 Sources/Analysis/person_identity_map.json +78 -0
  19. package/examples/sample-vault/08 Sources/Analysis/relationship_profiles.json +122 -0
  20. package/examples/sample-vault/08 Sources/WhatsApp/Analysis/Interpreted/Close Friend (WhatsApp).md +44 -0
  21. package/examples/sample-vault/08 Sources/WhatsApp/Analysis/Interpreted/Mom (WhatsApp).md +45 -0
  22. package/examples/sample-vault/08 Sources/WhatsApp/Analysis/Interpreted/Project Team (WhatsApp).md +45 -0
  23. package/examples/sample-vault/08 Sources/WhatsApp/Analysis/Relationship Map.md +9 -3
  24. package/examples/sample-vault/08 Sources/WhatsApp/Analysis/interpreted_relationship_models.json +18 -0
  25. package/examples/sample-vault/08 Sources/WhatsApp/Analysis/person_identity_map.json +78 -0
  26. package/examples/sample-vault/08 Sources/WhatsApp/Analysis/relationship_profiles.json +18 -0
  27. package/examples/sample-vault/08 Sources/WhatsApp/Raw/2026-01-01.jsonl +6 -6
  28. package/lib/fs.js +33 -0
  29. package/package.json +2 -1
  30. package/scripts/digital_brain_imessage_sync.py +175 -0
  31. package/scripts/digital_brain_linkedin_export_import.py +214 -0
  32. package/scripts/digital_brain_relationship_extractor.py +189 -12
  33. package/scripts/digital_brain_relationship_interpreter.py +104 -15
  34. package/scripts/digital_brain_slack_export_import.py +181 -0
  35. package/scripts/digital_brain_whatsapp_mac_sync.py +37 -8
  36. package/templates/vault/00 Home/How AI Should Use This Vault.md +1 -1
  37. package/templates/vault/00 Home/Start Here.md +2 -1
  38. package/templates/vault/04 People/Relationship Overrides.md +2 -1
  39. package/templates/vault/06 AI Memory/Generated Relationship Drafts/README.md +5 -0
  40. package/templates/vault/06 AI Memory/Interpreted Relationship Memory.md +1 -2
  41. package/templates/vault/06 AI Memory/Person Context Index.md +4 -0
  42. package/templates/vault/06 AI Memory/Person Reply Context.md +4 -0
  43. package/templates/vault/08 Sources/README.md +5 -0
  44. package/templates/vault/08 Sources/WhatsApp/Outbound/README.md +2 -2
  45. package/templates/vault/AGENTS.md +5 -1
  46. package/templates/vault/CLAUDE.md +3 -0
  47. package/templates/vault/GEMINI.md +4 -0
  48. package/whatsapp-web/send.mjs +32 -5
@@ -0,0 +1,175 @@
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import hashlib
4
+ import json
5
+ import sqlite3
6
+ import time
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+
10
+ CORE_DATA_EPOCH_OFFSET = 978_307_200
11
+ DEFAULT_DB = Path.home() / "Library" / "Messages" / "chat.db"
12
+
13
+
14
+ def main():
15
+ args = parse_args()
16
+ if not args.db.exists():
17
+ raise SystemExit(
18
+ f"Apple Messages database not found: {args.db}\n"
19
+ "Open Messages on macOS and grant Terminal Full Disk Access if macOS blocks access."
20
+ )
21
+
22
+ vault = args.vault.resolve()
23
+ source_dir = vault / "08 Sources" / "iMessage"
24
+ raw_dir = source_dir / "Raw"
25
+ chats_dir = source_dir / "ChatsByMonth"
26
+ state_dir = source_dir / ".sync-state"
27
+ for directory in (raw_dir, chats_dir, state_dir):
28
+ directory.mkdir(parents=True, exist_ok=True)
29
+
30
+ seen_path = state_dir / "imessage-seen-message-ids.json"
31
+ seen = load_seen(seen_path)
32
+ added = sync_once(args, seen, raw_dir, chats_dir)
33
+ save_seen(seen_path, seen)
34
+ print(f"Imported {added} iMessage messages.")
35
+
36
+
37
+ def sync_once(args, seen, raw_dir, chats_dir):
38
+ cutoff = datetime.now(timezone.utc).timestamp() - args.days * 24 * 60 * 60 if args.days else None
39
+ conn = sqlite3.connect(f"file:{args.db}?mode=ro", uri=True)
40
+ conn.row_factory = sqlite3.Row
41
+ rows = conn.execute(
42
+ """
43
+ SELECT m.ROWID message_pk, m.guid guid, m.date message_date,
44
+ m.is_from_me is_from_me, m.text text, m.service service,
45
+ h.id handle_id,
46
+ c.ROWID chat_pk, c.display_name display_name, c.chat_identifier chat_identifier
47
+ FROM message m
48
+ LEFT JOIN handle h ON h.ROWID = m.handle_id
49
+ LEFT JOIN chat_message_join cmj ON cmj.message_id = m.ROWID
50
+ LEFT JOIN chat c ON c.ROWID = cmj.chat_id
51
+ WHERE m.text IS NOT NULL
52
+ AND length(m.text) > 0
53
+ ORDER BY m.date ASC, m.ROWID ASC
54
+ """
55
+ ).fetchall()
56
+ conn.close()
57
+
58
+ added = 0
59
+ for row in rows:
60
+ record = row_to_record(row, args.self_name, args.privacy_mode)
61
+ if cutoff and datetime.fromisoformat(record["timestamp"]).timestamp() < cutoff:
62
+ continue
63
+ if args.chat and args.chat.lower() not in record["chatName"].lower():
64
+ continue
65
+ if record["id"] in seen:
66
+ continue
67
+ append_jsonl(raw_dir, record)
68
+ append_markdown(chats_dir, record, args.markdown_mode)
69
+ seen.add(record["id"])
70
+ added += 1
71
+ return added
72
+
73
+
74
+ def row_to_record(row, self_name, privacy_mode):
75
+ timestamp = apple_timestamp(row["message_date"])
76
+ body = row["text"] or ""
77
+ from_me = bool(row["is_from_me"])
78
+ chat_name = row["display_name"] or row["chat_identifier"] or row["handle_id"] or "iMessage"
79
+ return {
80
+ "id": compound_id(row, timestamp),
81
+ "source": "Apple Messages chat.db",
82
+ "sourceSystem": "iMessage",
83
+ "timestamp": timestamp,
84
+ "chatPk": row["chat_pk"],
85
+ "chatName": chat_name,
86
+ "chatIdentifier": row["chat_identifier"],
87
+ "isGroup": bool(row["display_name"]) or str(row["chat_identifier"] or "").startswith("chat"),
88
+ "fromMe": from_me,
89
+ "author": self_name if from_me else (row["handle_id"] or "Unknown"),
90
+ "handleId": row["handle_id"],
91
+ "service": row["service"],
92
+ "body": "" if privacy_mode == "metadata-only" else body,
93
+ "bodyHash": hashlib.sha256(body.encode("utf-8")).hexdigest() if privacy_mode == "metadata-only" else "",
94
+ "bodyCharCount": len(body),
95
+ }
96
+
97
+
98
+ def apple_timestamp(value):
99
+ raw = float(value or 0)
100
+ seconds = raw / 1_000_000_000 if abs(raw) > 10_000_000_000 else raw
101
+ return datetime.fromtimestamp(seconds + CORE_DATA_EPOCH_OFFSET, tz=timezone.utc).isoformat()
102
+
103
+
104
+ def compound_id(row, timestamp):
105
+ return "::".join([
106
+ "imessage",
107
+ str(row["chat_pk"] or row["chat_identifier"] or row["handle_id"] or "unknown-chat"),
108
+ str(row["guid"] or "no-guid"),
109
+ str(row["message_pk"] or "no-pk"),
110
+ timestamp,
111
+ ])
112
+
113
+
114
+ def append_jsonl(raw_dir, record):
115
+ with (raw_dir / f"{record['timestamp'][:10]}.jsonl").open("a", encoding="utf-8") as f:
116
+ f.write(json.dumps(record, ensure_ascii=False) + "\n")
117
+
118
+
119
+ def append_markdown(chats_dir, record, mode):
120
+ if mode == "none":
121
+ return
122
+ directory = chats_dir / record["timestamp"][:7]
123
+ directory.mkdir(parents=True, exist_ok=True)
124
+ file_path = directory / f"{safe_filename(record['chatName'])}.md"
125
+ if not file_path.exists():
126
+ write_text_atomic(file_path, f"# {escape_markdown(record['chatName'])}\n\nSynced from Apple Messages.\n\n")
127
+ speaker = escape_markdown(record["author"])
128
+ body = escape_markdown(" ".join(record["body"].split()))
129
+ with file_path.open("a", encoding="utf-8") as f:
130
+ f.write(f"- {record['timestamp']} | {speaker}: {body}\n")
131
+
132
+
133
+ def load_seen(path):
134
+ if not path.exists():
135
+ return set()
136
+ try:
137
+ return set(json.loads(path.read_text(encoding="utf-8")))
138
+ except Exception:
139
+ return set()
140
+
141
+
142
+ def save_seen(path, seen):
143
+ write_text_atomic(path, json.dumps(sorted(seen), indent=2))
144
+
145
+
146
+ def safe_filename(value):
147
+ cleaned = "".join("-" if char in '/:\\?%*"<>|' else char for char in value)
148
+ return (" ".join(cleaned.split()).strip() or "iMessage")[:120]
149
+
150
+
151
+ def escape_markdown(value):
152
+ text = str(value).replace("\n", " ").replace("\r", " ")
153
+ return text.replace("\\", "\\\\").replace("[", "\\[").replace("]", "\\]").replace("|", "\\|")
154
+
155
+
156
+ def write_text_atomic(path, content):
157
+ temp = path.with_name(f"{path.name}.{time.time_ns()}.tmp")
158
+ temp.write_text(content, encoding="utf-8")
159
+ temp.replace(path)
160
+
161
+
162
+ def parse_args():
163
+ parser = argparse.ArgumentParser()
164
+ parser.add_argument("--vault", type=Path, required=True)
165
+ parser.add_argument("--days", type=int, default=30)
166
+ parser.add_argument("--chat", default="")
167
+ parser.add_argument("--db", type=Path, default=DEFAULT_DB)
168
+ parser.add_argument("--self-name", default="Me")
169
+ parser.add_argument("--markdown-mode", choices=["chat", "month", "none"], default="none")
170
+ parser.add_argument("--privacy-mode", choices=["standard", "metadata-only"], default="standard")
171
+ return parser.parse_args()
172
+
173
+
174
+ if __name__ == "__main__":
175
+ main()
@@ -0,0 +1,214 @@
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import csv
4
+ import json
5
+ import tempfile
6
+ import zipfile
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+
10
+
11
+ def main():
12
+ args = parse_args()
13
+ with unpack(args.input) as source:
14
+ import_archive(args.vault.resolve(), source, args.days)
15
+
16
+
17
+ def import_archive(vault, source, days):
18
+ linkedin = vault / "08 Sources" / "LinkedIn"
19
+ raw_dir = linkedin / "Raw"
20
+ state_dir = linkedin / ".sync-state"
21
+ people_dir = vault / "04 People"
22
+ for directory in (raw_dir, state_dir, people_dir):
23
+ directory.mkdir(parents=True, exist_ok=True)
24
+
25
+ cutoff = datetime.now(timezone.utc).timestamp() - days * 24 * 60 * 60 if days else None
26
+ seen_path = state_dir / "linkedin-seen-message-ids.json"
27
+ seen = load_seen(seen_path)
28
+ imported_messages = 0
29
+ imported_connections = write_connections(source, people_dir)
30
+
31
+ for file in find_csvs(source, "message"):
32
+ for row in read_csv(file):
33
+ record = message_record(row, file)
34
+ if not record:
35
+ continue
36
+ if cutoff and datetime.fromisoformat(record["timestamp"]).timestamp() < cutoff:
37
+ continue
38
+ if record["id"] in seen:
39
+ continue
40
+ append_jsonl(raw_dir, record)
41
+ seen.add(record["id"])
42
+ imported_messages += 1
43
+
44
+ save_seen(seen_path, seen)
45
+ print(f"Imported {imported_messages} LinkedIn messages.")
46
+ print(f"Indexed {imported_connections} LinkedIn connections.")
47
+
48
+
49
+ def write_connections(source, people_dir):
50
+ files = find_csvs(source, "connection")
51
+ if not files:
52
+ return 0
53
+ rows = []
54
+ for file in files:
55
+ rows.extend(read_csv(file))
56
+ lines = ["# LinkedIn Connections", "", "Imported from LinkedIn data archive.", ""]
57
+ count = 0
58
+ for row in rows:
59
+ name = first_value(row, ["First Name", "FirstName", "First name"])
60
+ last = first_value(row, ["Last Name", "LastName", "Last name"])
61
+ full_name = " ".join(part for part in [name, last] if part).strip() or first_value(row, ["Name", "Full Name"])
62
+ company = first_value(row, ["Company", "Company Name"])
63
+ position = first_value(row, ["Position", "Title"])
64
+ connected_on = first_value(row, ["Connected On", "ConnectedOn"])
65
+ if not full_name:
66
+ continue
67
+ detail = ", ".join(part for part in [position, company, connected_on] if part)
68
+ lines.append(f"- {full_name}{f' - {detail}' if detail else ''}")
69
+ count += 1
70
+ if count:
71
+ (people_dir / "LinkedIn Connections.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
72
+ return count
73
+
74
+
75
+ def message_record(row, file):
76
+ body = first_value(row, ["Content", "Message", "Body", "Text"])
77
+ if not body:
78
+ return None
79
+ timestamp = parse_timestamp(first_value(row, ["Date", "Created At", "Timestamp", "Sent Date"]))
80
+ if not timestamp:
81
+ return None
82
+ sender = first_value(row, ["From", "Sender", "Sender Name", "From Name"]) or "Unknown"
83
+ recipients = first_value(row, ["To", "Recipient", "Recipients", "To Name"]) or "Unknown"
84
+ explicit_conversation = first_value(row, ["Conversation Title", "Subject", "Conversation"])
85
+ conversation = explicit_conversation or other_party(sender, recipients)
86
+ return {
87
+ "id": f"linkedin-{file.name}-{timestamp}-{sender}-{hash(body)}",
88
+ "source": "LinkedIn data archive",
89
+ "sourceSystem": "LinkedIn",
90
+ "timestamp": timestamp,
91
+ "chatName": conversation,
92
+ "isGroup": "," in recipients,
93
+ "fromMe": False,
94
+ "author": sender,
95
+ "to": recipients,
96
+ "body": body,
97
+ }
98
+
99
+
100
+ def other_party(sender, recipients):
101
+ if sender and sender.lower() not in {"me", "you"}:
102
+ return sender
103
+ if recipients and recipients.lower() not in {"me", "you"}:
104
+ return recipients.split(",")[0].strip()
105
+ return sender or recipients or "Unknown"
106
+
107
+
108
+ def parse_timestamp(value):
109
+ if not value:
110
+ return ""
111
+ value = value.strip()
112
+ formats = [
113
+ "%Y-%m-%d %H:%M:%S",
114
+ "%Y-%m-%dT%H:%M:%S.%fZ",
115
+ "%Y-%m-%dT%H:%M:%SZ",
116
+ "%m/%d/%Y, %I:%M %p",
117
+ "%m/%d/%Y",
118
+ "%Y-%m-%d",
119
+ ]
120
+ for fmt in formats:
121
+ try:
122
+ return datetime.strptime(value, fmt).replace(tzinfo=timezone.utc).isoformat()
123
+ except ValueError:
124
+ pass
125
+ try:
126
+ return datetime.fromisoformat(value.replace("Z", "+00:00")).astimezone(timezone.utc).isoformat()
127
+ except ValueError:
128
+ return ""
129
+
130
+
131
+ def append_jsonl(raw_dir, record):
132
+ with (raw_dir / f"{record['timestamp'][:10]}.jsonl").open("a", encoding="utf-8") as f:
133
+ f.write(json.dumps(record, ensure_ascii=False) + "\n")
134
+
135
+
136
+ def find_csvs(source, needle):
137
+ return sorted(path for path in source.rglob("*.csv") if needle.lower() in path.name.lower())
138
+
139
+
140
+ def read_csv(path):
141
+ with path.open("r", encoding="utf-8-sig", newline="") as f:
142
+ return list(csv.DictReader(f))
143
+
144
+
145
+ def first_value(row, keys):
146
+ for key in keys:
147
+ value = row.get(key)
148
+ if value:
149
+ return value.strip()
150
+ lower = {key.lower(): value for key, value in row.items()}
151
+ for key in keys:
152
+ value = lower.get(key.lower())
153
+ if value:
154
+ return value.strip()
155
+ return ""
156
+
157
+
158
+ def load_seen(path):
159
+ if not path.exists():
160
+ return set()
161
+ try:
162
+ return set(json.loads(path.read_text(encoding="utf-8")))
163
+ except Exception:
164
+ return set()
165
+
166
+
167
+ def save_seen(path, seen):
168
+ path.write_text(json.dumps(sorted(seen), indent=2), encoding="utf-8")
169
+
170
+
171
+ def unpack(input_path):
172
+ input_path = input_path.resolve()
173
+ if input_path.is_dir():
174
+ return NullContext(input_path)
175
+ temp = tempfile.TemporaryDirectory()
176
+ with zipfile.ZipFile(input_path) as archive:
177
+ archive.extractall(temp.name)
178
+ return TempContext(Path(temp.name), temp)
179
+
180
+
181
+ class NullContext:
182
+ def __init__(self, path):
183
+ self.path = path
184
+
185
+ def __enter__(self):
186
+ return self.path
187
+
188
+ def __exit__(self, *_):
189
+ return False
190
+
191
+
192
+ class TempContext:
193
+ def __init__(self, path, temp):
194
+ self.path = path
195
+ self.temp = temp
196
+
197
+ def __enter__(self):
198
+ return self.path
199
+
200
+ def __exit__(self, *_):
201
+ self.temp.cleanup()
202
+ return False
203
+
204
+
205
+ def parse_args():
206
+ parser = argparse.ArgumentParser()
207
+ parser.add_argument("--vault", type=Path, required=True)
208
+ parser.add_argument("--input", type=Path, required=True)
209
+ parser.add_argument("--days", type=int, default=3650)
210
+ return parser.parse_args()
211
+
212
+
213
+ if __name__ == "__main__":
214
+ main()
@@ -3,6 +3,7 @@ import argparse
3
3
  import json
4
4
  import math
5
5
  import re
6
+ import time
6
7
  from collections import Counter, defaultdict
7
8
  from datetime import datetime, timezone
8
9
  from pathlib import Path
@@ -17,26 +18,36 @@ SLANG = {"lol", "lmao", "haha", "hahaha", "bro", "bruh", "wtf", "omg", "ngl", "i
17
18
  def main():
18
19
  args = parse_args()
19
20
  vault = args.vault.resolve()
20
- whatsapp = vault / "08 Sources" / "WhatsApp"
21
- output_dir = whatsapp / "Analysis"
21
+ sources = vault / "08 Sources"
22
+ output_dir = sources / "Analysis"
22
23
  output_dir.mkdir(parents=True, exist_ok=True)
23
- messages = load_messages(whatsapp / "Raw", args.days)
24
+ messages = load_messages(sources, args.days)
24
25
  profiles = build_profiles(messages, args.min_messages)
25
- (output_dir / "relationship_profiles.json").write_text(json.dumps(profiles, indent=2, ensure_ascii=False), encoding="utf-8")
26
+ people = build_people(profiles)
27
+ write_json_atomic(output_dir / "relationship_profiles.json", profiles)
28
+ write_json_atomic(output_dir / "person_identity_map.json", people)
26
29
  write_markdown(output_dir / "Relationship Map.md", profiles, args.days)
30
+ write_people_memory(vault / "06 AI Memory" / "Person Context Index.md", people, args.days)
31
+ write_legacy_whatsapp_outputs(vault, output_dir)
27
32
  print(f"Analyzed {len(messages)} messages.")
28
33
  print(f"Wrote {len(profiles)} relationship profiles.")
34
+ print(f"Wrote {len(people)} canonical person records.")
29
35
 
30
36
 
31
- def load_messages(raw_dir, days):
37
+ def load_messages(sources_dir, days):
32
38
  cutoff = datetime.now(timezone.utc).timestamp() - days * 24 * 60 * 60 if days else None
33
39
  messages = []
34
- for path in sorted(raw_dir.glob("*.jsonl")):
40
+ raw_files = sorted(sources_dir.glob("*/Raw/*.jsonl"))
41
+ for path in raw_files:
35
42
  with path.open("r", encoding="utf-8") as f:
36
43
  for line in f:
37
44
  if not line.strip():
38
45
  continue
39
- record = json.loads(line)
46
+ try:
47
+ record = json.loads(line)
48
+ except json.JSONDecodeError:
49
+ print(f"Skipping corrupt JSONL line in {path}")
50
+ continue
40
51
  dt = datetime.fromisoformat(record["timestamp"].replace("Z", "+00:00"))
41
52
  if cutoff and dt.timestamp() < cutoff:
42
53
  continue
@@ -48,13 +59,15 @@ def load_messages(raw_dir, days):
48
59
  def build_profiles(messages, min_messages):
49
60
  by_chat = defaultdict(list)
50
61
  for message in messages:
51
- by_chat[message.get("chatName") or "Unknown Chat"].append(message)
52
- profiles = [profile_chat(name, items) for name, items in by_chat.items() if len(items) >= min_messages]
62
+ key = f"{message.get('sourceSystem') or source_system(message)}::{message.get('chatName') or 'Unknown Chat'}"
63
+ by_chat[key].append(message)
64
+ profiles = [profile_chat(key, items) for key, items in by_chat.items() if len(items) >= min_messages]
53
65
  profiles.sort(key=lambda p: (p["messageCount"], p["lastSeen"]), reverse=True)
54
66
  return profiles
55
67
 
56
68
 
57
- def profile_chat(chat_name, messages):
69
+ def profile_chat(chat_key, messages):
70
+ source, chat_name = split_chat_key(chat_key)
58
71
  count = len(messages)
59
72
  outbound = sum(1 for m in messages if m.get("fromMe"))
60
73
  inbound = count - outbound
@@ -72,8 +85,15 @@ def profile_chat(chat_name, messages):
72
85
  sentiment = normalized_sentiment(positive, negative, count)
73
86
  tags = infer_tags(any(m.get("isGroup") for m in messages), count, inbound, outbound, warmth, friction, operational, work, logistics, text.count("?"))
74
87
  guess = infer_relationship(tags, count, warmth, friction, operational, work, outbound / count)
88
+ identity = infer_identity(source, chat_name, messages)
75
89
  return {
76
90
  "chatName": chat_name,
91
+ "sourceSystem": source,
92
+ "displayName": f"{chat_name} ({source})",
93
+ "identityName": identity["name"],
94
+ "canonicalPersonKey": identity["key"],
95
+ "identityConfidence": identity["confidence"],
96
+ "identityEvidence": identity["evidence"],
77
97
  "messageCount": count,
78
98
  "inbound": inbound,
79
99
  "outbound": outbound,
@@ -130,12 +150,110 @@ def infer_relationship(tags, count, warmth, friction, operational, work, balance
130
150
  return "general relationship, needs human labeling"
131
151
 
132
152
 
153
+ def infer_identity(source, chat_name, messages):
154
+ is_group = any(m.get("isGroup") for m in messages)
155
+ if is_group:
156
+ return {
157
+ "name": chat_name,
158
+ "key": f"group::{source.lower()}::{normalize_identity(chat_name)}",
159
+ "confidence": "medium",
160
+ "evidence": "group chat kept source-specific",
161
+ }
162
+ candidates = []
163
+ if source in {"Slack", "LinkedIn"}:
164
+ candidates.extend((m.get("author") or "").strip() for m in messages if not m.get("fromMe"))
165
+ if source == "LinkedIn":
166
+ candidates.extend((m.get("to") or "").split(",")[0].strip() for m in messages if m.get("fromMe"))
167
+ candidates.append(chat_name)
168
+ name = best_identity_name(candidates) or chat_name
169
+ key = f"person::{normalize_identity(name)}"
170
+ confidence = "medium" if normalize_identity(name) == normalize_identity(chat_name) else "low"
171
+ if source in {"Slack", "LinkedIn"} and normalize_identity(name) != normalize_identity(chat_name):
172
+ confidence = "medium"
173
+ return {
174
+ "name": name,
175
+ "key": key,
176
+ "confidence": confidence,
177
+ "evidence": f"{source} direct chat identity",
178
+ }
179
+
180
+
181
+ def best_identity_name(candidates):
182
+ cleaned = [candidate for candidate in candidates if usable_identity_name(candidate)]
183
+ if not cleaned:
184
+ return ""
185
+ counts = Counter(normalize_identity(candidate) for candidate in cleaned)
186
+ best_key, _ = counts.most_common(1)[0]
187
+ for candidate in cleaned:
188
+ if normalize_identity(candidate) == best_key:
189
+ return candidate
190
+ return cleaned[0]
191
+
192
+
193
+ def usable_identity_name(value):
194
+ if not value:
195
+ return False
196
+ normalized = normalize_identity(value)
197
+ if not normalized or normalized in {"me", "you", "unknown", "unknown chat", "imessage"}:
198
+ return False
199
+ return True
200
+
201
+
202
+ def normalize_identity(value):
203
+ text = str(value or "").lower()
204
+ text = re.sub(r"<[^>]+>", " ", text)
205
+ text = re.sub(r"https?://\S+", " ", text)
206
+ text = re.sub(r"[^a-z0-9@+]+", " ", text)
207
+ return " ".join(text.split())
208
+
209
+
210
+ def build_people(profiles):
211
+ grouped = defaultdict(list)
212
+ for profile in profiles:
213
+ key = profile.get("canonicalPersonKey")
214
+ if key and not key.startswith("group::"):
215
+ grouped[key].append(profile)
216
+ people = []
217
+ for key, items in grouped.items():
218
+ items.sort(key=lambda item: (item["messageCount"], item["lastSeen"]), reverse=True)
219
+ sources = sorted({item["sourceSystem"] for item in items})
220
+ names = [item.get("identityName") or item["chatName"] for item in items]
221
+ people.append({
222
+ "canonicalPersonKey": key,
223
+ "displayName": names[0],
224
+ "aliases": sorted({name for name in names if name}),
225
+ "sources": sources,
226
+ "sourceProfiles": [
227
+ {
228
+ "sourceSystem": item["sourceSystem"],
229
+ "chatName": item["chatName"],
230
+ "displayName": item["displayName"],
231
+ "messageCount": item["messageCount"],
232
+ "firstSeen": item["firstSeen"],
233
+ "lastSeen": item["lastSeen"],
234
+ "relationshipGuess": item["relationshipGuess"],
235
+ "typingStyle": item["typingStyle"],
236
+ "identityConfidence": item["identityConfidence"],
237
+ "identityEvidence": item["identityEvidence"],
238
+ }
239
+ for item in items
240
+ ],
241
+ "totalMessages": sum(item["messageCount"] for item in items),
242
+ "firstSeen": min(item["firstSeen"] for item in items),
243
+ "lastSeen": max(item["lastSeen"] for item in items),
244
+ })
245
+ people.sort(key=lambda person: (len(person["sources"]), person["totalMessages"], person["lastSeen"]), reverse=True)
246
+ return people
247
+
248
+
133
249
  def write_markdown(path, profiles, days):
134
250
  lines = ["# Relationship Map", "", f"Window: last {days} days", "", "Generated signals. Treat as editable working notes.", ""]
135
251
  for profile in profiles:
136
252
  lines.extend([
137
- f"## {profile['chatName']}",
253
+ f"## {profile['displayName']}",
138
254
  "",
255
+ f"- Source: {profile['sourceSystem']}",
256
+ f"- Canonical person: {profile['identityName']} ({profile['canonicalPersonKey']})",
139
257
  f"- Guess: {profile['relationshipGuess']}",
140
258
  f"- Messages: {profile['messageCount']} ({profile['inbound']} inbound, {profile['outbound']} outbound)",
141
259
  f"- Dates: {profile['firstSeen']} to {profile['lastSeen']}",
@@ -144,7 +262,66 @@ def write_markdown(path, profiles, days):
144
262
  f"- Typing style: {typing_style_summary(profile['typingStyle'])}",
145
263
  "",
146
264
  ])
147
- path.write_text("\n".join(lines), encoding="utf-8")
265
+ write_text_atomic(path, "\n".join(lines))
266
+
267
+
268
+ def write_people_memory(path, people, days):
269
+ path.parent.mkdir(parents=True, exist_ok=True)
270
+ lines = ["# Person Context Index", "", f"Window: last {days} days", "", "Canonical people matched across sources. Treat matches as provisional unless manually confirmed.", ""]
271
+ for person in people:
272
+ lines.extend([
273
+ f"## {person['displayName']}",
274
+ "",
275
+ f"- Canonical key: `{person['canonicalPersonKey']}`",
276
+ f"- Aliases: {', '.join(person['aliases'])}",
277
+ f"- Sources: {', '.join(person['sources'])}",
278
+ f"- Messages: {person['totalMessages']}",
279
+ f"- Dates: {person['firstSeen']} to {person['lastSeen']}",
280
+ "- Source-specific context:",
281
+ ])
282
+ for source in person["sourceProfiles"]:
283
+ style = typing_style_summary(source.get("typingStyle", {}))
284
+ lines.append(f" - {source['sourceSystem']} / {source['chatName']}: {source['relationshipGuess']}; {source['messageCount']} messages; style {style}")
285
+ lines.append("")
286
+ write_text_atomic(path, "\n".join(lines) + "\n")
287
+
288
+
289
+ def write_legacy_whatsapp_outputs(vault, output_dir):
290
+ legacy_dir = vault / "08 Sources" / "WhatsApp" / "Analysis"
291
+ legacy_dir.mkdir(parents=True, exist_ok=True)
292
+ for name in ("relationship_profiles.json", "person_identity_map.json", "Relationship Map.md"):
293
+ source = output_dir / name
294
+ target = legacy_dir / name
295
+ if source.exists():
296
+ write_text_atomic(target, source.read_text(encoding="utf-8"))
297
+
298
+
299
+ def write_json_atomic(path, data):
300
+ write_text_atomic(path, json.dumps(data, indent=2, ensure_ascii=False))
301
+
302
+
303
+ def write_text_atomic(path, content):
304
+ temp = path.with_name(f"{path.name}.{time.time_ns()}.tmp")
305
+ temp.write_text(content, encoding="utf-8")
306
+ temp.replace(path)
307
+
308
+
309
+ def source_system(message):
310
+ source = message.get("source") or ""
311
+ if "Slack" in source:
312
+ return "Slack"
313
+ if "LinkedIn" in source:
314
+ return "LinkedIn"
315
+ if "WhatsApp" in source:
316
+ return "WhatsApp"
317
+ return "Unknown"
318
+
319
+
320
+ def split_chat_key(key):
321
+ if "::" not in key:
322
+ return "Unknown", key
323
+ source, chat_name = key.split("::", 1)
324
+ return source, chat_name
148
325
 
149
326
 
150
327
  def score(words, lexicon):