digital-brain 0.1.7 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +20 -2
  2. package/bin/digital-brain.js +137 -21
  3. package/docs/INTEGRATIONS.md +72 -0
  4. package/docs/PRIVACY.md +3 -1
  5. package/docs/SETUP.md +33 -1
  6. package/examples/sample-vault/{04 People/Interpreted Relationships/Close Friend.md → 06 AI Memory/Generated Relationship Drafts/Close Friend (WhatsApp).md } +4 -3
  7. package/examples/sample-vault/{04 People/Interpreted Relationships/Mom.md → 06 AI Memory/Generated Relationship Drafts/Mom (WhatsApp).md } +4 -3
  8. package/examples/sample-vault/{08 Sources/WhatsApp/Analysis/Interpreted/Project Team.md → 06 AI Memory/Generated Relationship Drafts/Project Team (WhatsApp).md } +4 -3
  9. package/examples/sample-vault/06 AI Memory/Interpreted Relationship Memory.md +3 -3
  10. package/examples/sample-vault/06 AI Memory/Person Context Index.md +26 -0
  11. package/examples/sample-vault/06 AI Memory/Person Reply Context.md +26 -0
  12. package/examples/sample-vault/08 Sources/{WhatsApp/Analysis/Interpreted/Close Friend.md → Analysis/Interpreted/Close Friend (WhatsApp).md } +4 -3
  13. package/examples/sample-vault/08 Sources/{WhatsApp/Analysis/Interpreted/Mom.md → Analysis/Interpreted/Mom (WhatsApp).md } +4 -3
  14. package/examples/sample-vault/{04 People/Interpreted Relationships/Project Team.md → 08 Sources/Analysis/Interpreted/Project Team (WhatsApp).md } +4 -3
  15. package/examples/sample-vault/08 Sources/Analysis/Relationship Map.md +38 -0
  16. package/examples/sample-vault/08 Sources/Analysis/interpreted_relationship_models.json +175 -0
  17. package/examples/sample-vault/08 Sources/Analysis/person_identity_map.json +78 -0
  18. package/examples/sample-vault/08 Sources/Analysis/relationship_profiles.json +122 -0
  19. package/examples/sample-vault/08 Sources/WhatsApp/Analysis/Interpreted/Close Friend (WhatsApp).md +44 -0
  20. package/examples/sample-vault/08 Sources/WhatsApp/Analysis/Interpreted/Mom (WhatsApp).md +45 -0
  21. package/examples/sample-vault/08 Sources/WhatsApp/Analysis/Interpreted/Project Team (WhatsApp).md +45 -0
  22. package/examples/sample-vault/08 Sources/WhatsApp/Analysis/Relationship Map.md +9 -3
  23. package/examples/sample-vault/08 Sources/WhatsApp/Analysis/interpreted_relationship_models.json +18 -0
  24. package/examples/sample-vault/08 Sources/WhatsApp/Analysis/person_identity_map.json +78 -0
  25. package/examples/sample-vault/08 Sources/WhatsApp/Analysis/relationship_profiles.json +18 -0
  26. package/examples/sample-vault/08 Sources/WhatsApp/Raw/2026-01-01.jsonl +6 -6
  27. package/lib/fs.js +7 -1
  28. package/package.json +2 -1
  29. package/scripts/digital_brain_imessage_sync.py +175 -0
  30. package/scripts/digital_brain_linkedin_export_import.py +214 -0
  31. package/scripts/digital_brain_relationship_extractor.py +189 -12
  32. package/scripts/digital_brain_relationship_interpreter.py +104 -15
  33. package/scripts/digital_brain_slack_export_import.py +181 -0
  34. package/scripts/digital_brain_whatsapp_mac_sync.py +37 -8
  35. package/templates/vault/00 Home/How AI Should Use This Vault.md +1 -1
  36. package/templates/vault/00 Home/Start Here.md +2 -1
  37. package/templates/vault/04 People/Relationship Overrides.md +2 -1
  38. package/templates/vault/06 AI Memory/Generated Relationship Drafts/README.md +5 -0
  39. package/templates/vault/06 AI Memory/Interpreted Relationship Memory.md +1 -2
  40. package/templates/vault/06 AI Memory/Person Context Index.md +4 -0
  41. package/templates/vault/06 AI Memory/Person Reply Context.md +4 -0
  42. package/templates/vault/08 Sources/README.md +5 -0
  43. package/templates/vault/08 Sources/WhatsApp/Outbound/README.md +2 -2
  44. package/templates/vault/AGENTS.md +5 -1
  45. package/templates/vault/CLAUDE.md +3 -0
  46. package/templates/vault/GEMINI.md +4 -0
  47. package/whatsapp-web/send.mjs +32 -5
@@ -3,6 +3,7 @@ import argparse
3
3
  import json
4
4
  import math
5
5
  import re
6
+ import time
6
7
  from collections import Counter, defaultdict
7
8
  from datetime import datetime, timezone
8
9
  from pathlib import Path
@@ -17,26 +18,36 @@ SLANG = {"lol", "lmao", "haha", "hahaha", "bro", "bruh", "wtf", "omg", "ngl", "i
17
18
  def main():
18
19
  args = parse_args()
19
20
  vault = args.vault.resolve()
20
- whatsapp = vault / "08 Sources" / "WhatsApp"
21
- output_dir = whatsapp / "Analysis"
21
+ sources = vault / "08 Sources"
22
+ output_dir = sources / "Analysis"
22
23
  output_dir.mkdir(parents=True, exist_ok=True)
23
- messages = load_messages(whatsapp / "Raw", args.days)
24
+ messages = load_messages(sources, args.days)
24
25
  profiles = build_profiles(messages, args.min_messages)
25
- (output_dir / "relationship_profiles.json").write_text(json.dumps(profiles, indent=2, ensure_ascii=False), encoding="utf-8")
26
+ people = build_people(profiles)
27
+ write_json_atomic(output_dir / "relationship_profiles.json", profiles)
28
+ write_json_atomic(output_dir / "person_identity_map.json", people)
26
29
  write_markdown(output_dir / "Relationship Map.md", profiles, args.days)
30
+ write_people_memory(vault / "06 AI Memory" / "Person Context Index.md", people, args.days)
31
+ write_legacy_whatsapp_outputs(vault, output_dir)
27
32
  print(f"Analyzed {len(messages)} messages.")
28
33
  print(f"Wrote {len(profiles)} relationship profiles.")
34
+ print(f"Wrote {len(people)} canonical person records.")
29
35
 
30
36
 
31
- def load_messages(raw_dir, days):
37
+ def load_messages(sources_dir, days):
32
38
  cutoff = datetime.now(timezone.utc).timestamp() - days * 24 * 60 * 60 if days else None
33
39
  messages = []
34
- for path in sorted(raw_dir.glob("*.jsonl")):
40
+ raw_files = sorted(sources_dir.glob("*/Raw/*.jsonl"))
41
+ for path in raw_files:
35
42
  with path.open("r", encoding="utf-8") as f:
36
43
  for line in f:
37
44
  if not line.strip():
38
45
  continue
39
- record = json.loads(line)
46
+ try:
47
+ record = json.loads(line)
48
+ except json.JSONDecodeError:
49
+ print(f"Skipping corrupt JSONL line in {path}")
50
+ continue
40
51
  dt = datetime.fromisoformat(record["timestamp"].replace("Z", "+00:00"))
41
52
  if cutoff and dt.timestamp() < cutoff:
42
53
  continue
@@ -48,13 +59,15 @@ def load_messages(raw_dir, days):
48
59
  def build_profiles(messages, min_messages):
49
60
  by_chat = defaultdict(list)
50
61
  for message in messages:
51
- by_chat[message.get("chatName") or "Unknown Chat"].append(message)
52
- profiles = [profile_chat(name, items) for name, items in by_chat.items() if len(items) >= min_messages]
62
+ key = f"{message.get('sourceSystem') or source_system(message)}::{message.get('chatName') or 'Unknown Chat'}"
63
+ by_chat[key].append(message)
64
+ profiles = [profile_chat(key, items) for key, items in by_chat.items() if len(items) >= min_messages]
53
65
  profiles.sort(key=lambda p: (p["messageCount"], p["lastSeen"]), reverse=True)
54
66
  return profiles
55
67
 
56
68
 
57
- def profile_chat(chat_name, messages):
69
+ def profile_chat(chat_key, messages):
70
+ source, chat_name = split_chat_key(chat_key)
58
71
  count = len(messages)
59
72
  outbound = sum(1 for m in messages if m.get("fromMe"))
60
73
  inbound = count - outbound
@@ -72,8 +85,15 @@ def profile_chat(chat_name, messages):
72
85
  sentiment = normalized_sentiment(positive, negative, count)
73
86
  tags = infer_tags(any(m.get("isGroup") for m in messages), count, inbound, outbound, warmth, friction, operational, work, logistics, text.count("?"))
74
87
  guess = infer_relationship(tags, count, warmth, friction, operational, work, outbound / count)
88
+ identity = infer_identity(source, chat_name, messages)
75
89
  return {
76
90
  "chatName": chat_name,
91
+ "sourceSystem": source,
92
+ "displayName": f"{chat_name} ({source})",
93
+ "identityName": identity["name"],
94
+ "canonicalPersonKey": identity["key"],
95
+ "identityConfidence": identity["confidence"],
96
+ "identityEvidence": identity["evidence"],
77
97
  "messageCount": count,
78
98
  "inbound": inbound,
79
99
  "outbound": outbound,
@@ -130,12 +150,110 @@ def infer_relationship(tags, count, warmth, friction, operational, work, balance
130
150
  return "general relationship, needs human labeling"
131
151
 
132
152
 
153
+ def infer_identity(source, chat_name, messages):
154
+ is_group = any(m.get("isGroup") for m in messages)
155
+ if is_group:
156
+ return {
157
+ "name": chat_name,
158
+ "key": f"group::{source.lower()}::{normalize_identity(chat_name)}",
159
+ "confidence": "medium",
160
+ "evidence": "group chat kept source-specific",
161
+ }
162
+ candidates = []
163
+ if source in {"Slack", "LinkedIn"}:
164
+ candidates.extend((m.get("author") or "").strip() for m in messages if not m.get("fromMe"))
165
+ if source == "LinkedIn":
166
+ candidates.extend((m.get("to") or "").split(",")[0].strip() for m in messages if m.get("fromMe"))
167
+ candidates.append(chat_name)
168
+ name = best_identity_name(candidates) or chat_name
169
+ key = f"person::{normalize_identity(name)}"
170
+ confidence = "medium" if normalize_identity(name) == normalize_identity(chat_name) else "low"
171
+ if source in {"Slack", "LinkedIn"} and normalize_identity(name) != normalize_identity(chat_name):
172
+ confidence = "medium"
173
+ return {
174
+ "name": name,
175
+ "key": key,
176
+ "confidence": confidence,
177
+ "evidence": f"{source} direct chat identity",
178
+ }
179
+
180
+
181
+ def best_identity_name(candidates):
182
+ cleaned = [candidate for candidate in candidates if usable_identity_name(candidate)]
183
+ if not cleaned:
184
+ return ""
185
+ counts = Counter(normalize_identity(candidate) for candidate in cleaned)
186
+ best_key, _ = counts.most_common(1)[0]
187
+ for candidate in cleaned:
188
+ if normalize_identity(candidate) == best_key:
189
+ return candidate
190
+ return cleaned[0]
191
+
192
+
193
+ def usable_identity_name(value):
194
+ if not value:
195
+ return False
196
+ normalized = normalize_identity(value)
197
+ if not normalized or normalized in {"me", "you", "unknown", "unknown chat", "imessage"}:
198
+ return False
199
+ return True
200
+
201
+
202
+ def normalize_identity(value):
203
+ text = str(value or "").lower()
204
+ text = re.sub(r"<[^>]+>", " ", text)
205
+ text = re.sub(r"https?://\S+", " ", text)
206
+ text = re.sub(r"[^a-z0-9@+]+", " ", text)
207
+ return " ".join(text.split())
208
+
209
+
210
+ def build_people(profiles):
211
+ grouped = defaultdict(list)
212
+ for profile in profiles:
213
+ key = profile.get("canonicalPersonKey")
214
+ if key and not key.startswith("group::"):
215
+ grouped[key].append(profile)
216
+ people = []
217
+ for key, items in grouped.items():
218
+ items.sort(key=lambda item: (item["messageCount"], item["lastSeen"]), reverse=True)
219
+ sources = sorted({item["sourceSystem"] for item in items})
220
+ names = [item.get("identityName") or item["chatName"] for item in items]
221
+ people.append({
222
+ "canonicalPersonKey": key,
223
+ "displayName": names[0],
224
+ "aliases": sorted({name for name in names if name}),
225
+ "sources": sources,
226
+ "sourceProfiles": [
227
+ {
228
+ "sourceSystem": item["sourceSystem"],
229
+ "chatName": item["chatName"],
230
+ "displayName": item["displayName"],
231
+ "messageCount": item["messageCount"],
232
+ "firstSeen": item["firstSeen"],
233
+ "lastSeen": item["lastSeen"],
234
+ "relationshipGuess": item["relationshipGuess"],
235
+ "typingStyle": item["typingStyle"],
236
+ "identityConfidence": item["identityConfidence"],
237
+ "identityEvidence": item["identityEvidence"],
238
+ }
239
+ for item in items
240
+ ],
241
+ "totalMessages": sum(item["messageCount"] for item in items),
242
+ "firstSeen": min(item["firstSeen"] for item in items),
243
+ "lastSeen": max(item["lastSeen"] for item in items),
244
+ })
245
+ people.sort(key=lambda person: (len(person["sources"]), person["totalMessages"], person["lastSeen"]), reverse=True)
246
+ return people
247
+
248
+
133
249
  def write_markdown(path, profiles, days):
134
250
  lines = ["# Relationship Map", "", f"Window: last {days} days", "", "Generated signals. Treat as editable working notes.", ""]
135
251
  for profile in profiles:
136
252
  lines.extend([
137
- f"## {profile['chatName']}",
253
+ f"## {profile['displayName']}",
138
254
  "",
255
+ f"- Source: {profile['sourceSystem']}",
256
+ f"- Canonical person: {profile['identityName']} ({profile['canonicalPersonKey']})",
139
257
  f"- Guess: {profile['relationshipGuess']}",
140
258
  f"- Messages: {profile['messageCount']} ({profile['inbound']} inbound, {profile['outbound']} outbound)",
141
259
  f"- Dates: {profile['firstSeen']} to {profile['lastSeen']}",
@@ -144,7 +262,66 @@ def write_markdown(path, profiles, days):
144
262
  f"- Typing style: {typing_style_summary(profile['typingStyle'])}",
145
263
  "",
146
264
  ])
147
- path.write_text("\n".join(lines), encoding="utf-8")
265
+ write_text_atomic(path, "\n".join(lines))
266
+
267
+
268
+ def write_people_memory(path, people, days):
269
+ path.parent.mkdir(parents=True, exist_ok=True)
270
+ lines = ["# Person Context Index", "", f"Window: last {days} days", "", "Canonical people matched across sources. Treat matches as provisional unless manually confirmed.", ""]
271
+ for person in people:
272
+ lines.extend([
273
+ f"## {person['displayName']}",
274
+ "",
275
+ f"- Canonical key: `{person['canonicalPersonKey']}`",
276
+ f"- Aliases: {', '.join(person['aliases'])}",
277
+ f"- Sources: {', '.join(person['sources'])}",
278
+ f"- Messages: {person['totalMessages']}",
279
+ f"- Dates: {person['firstSeen']} to {person['lastSeen']}",
280
+ "- Source-specific context:",
281
+ ])
282
+ for source in person["sourceProfiles"]:
283
+ style = typing_style_summary(source.get("typingStyle", {}))
284
+ lines.append(f" - {source['sourceSystem']} / {source['chatName']}: {source['relationshipGuess']}; {source['messageCount']} messages; style {style}")
285
+ lines.append("")
286
+ write_text_atomic(path, "\n".join(lines) + "\n")
287
+
288
+
289
+ def write_legacy_whatsapp_outputs(vault, output_dir):
290
+ legacy_dir = vault / "08 Sources" / "WhatsApp" / "Analysis"
291
+ legacy_dir.mkdir(parents=True, exist_ok=True)
292
+ for name in ("relationship_profiles.json", "person_identity_map.json", "Relationship Map.md"):
293
+ source = output_dir / name
294
+ target = legacy_dir / name
295
+ if source.exists():
296
+ write_text_atomic(target, source.read_text(encoding="utf-8"))
297
+
298
+
299
+ def write_json_atomic(path, data):
300
+ write_text_atomic(path, json.dumps(data, indent=2, ensure_ascii=False))
301
+
302
+
303
+ def write_text_atomic(path, content):
304
+ temp = path.with_name(f"{path.name}.{time.time_ns()}.tmp")
305
+ temp.write_text(content, encoding="utf-8")
306
+ temp.replace(path)
307
+
308
+
309
+ def source_system(message):
310
+ source = message.get("source") or ""
311
+ if "Slack" in source:
312
+ return "Slack"
313
+ if "LinkedIn" in source:
314
+ return "LinkedIn"
315
+ if "WhatsApp" in source:
316
+ return "WhatsApp"
317
+ return "Unknown"
318
+
319
+
320
+ def split_chat_key(key):
321
+ if "::" not in key:
322
+ return "Unknown", key
323
+ source, chat_name = key.split("::", 1)
324
+ return source, chat_name
148
325
 
149
326
 
150
327
  def score(words, lexicon):
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  import argparse
3
3
  import json
4
+ import time
4
5
  from datetime import datetime, timezone
5
6
  from pathlib import Path
6
7
 
@@ -19,26 +20,38 @@ ROLE_KEYWORDS = [
19
20
  def main():
20
21
  args = parse_args()
21
22
  vault = args.vault.resolve()
22
- whatsapp = vault / "08 Sources" / "WhatsApp"
23
- profile_path = whatsapp / "Analysis" / "relationship_profiles.json"
23
+ sources = vault / "08 Sources"
24
+ whatsapp = sources / "WhatsApp"
25
+ analysis = sources / "Analysis"
26
+ profile_path = analysis / "relationship_profiles.json"
27
+ if not profile_path.exists():
28
+ profile_path = whatsapp / "Analysis" / "relationship_profiles.json"
24
29
  profiles = json.loads(profile_path.read_text(encoding="utf-8"))
25
- overrides = load_json(whatsapp / "relationship_overrides.json", {})
26
- out_dir = whatsapp / "Analysis" / "Interpreted"
27
- people_dir = vault / "04 People" / "Interpreted Relationships"
30
+ people_path = analysis / "person_identity_map.json"
31
+ people = load_json(people_path, [])
32
+ overrides = load_overrides(sources)
33
+ out_dir = analysis / "Interpreted"
34
+ legacy_out_dir = whatsapp / "Analysis" / "Interpreted"
35
+ drafts_dir = vault / "06 AI Memory" / "Generated Relationship Drafts"
28
36
  out_dir.mkdir(parents=True, exist_ok=True)
29
- people_dir.mkdir(parents=True, exist_ok=True)
37
+ legacy_out_dir.mkdir(parents=True, exist_ok=True)
38
+ drafts_dir.mkdir(parents=True, exist_ok=True)
30
39
 
31
40
  models = []
32
41
  for profile in profiles:
33
- model = build_model(profile, overrides.get(profile["chatName"], {}))
42
+ override = overrides.get(profile_key(profile), overrides.get(profile["chatName"], {}))
43
+ model = build_model(profile, override)
34
44
  note = render_note(model)
35
- filename = safe_filename(profile["chatName"]) + ".md"
36
- (out_dir / filename).write_text(note, encoding="utf-8")
37
- (people_dir / filename).write_text(note, encoding="utf-8")
45
+ filename = safe_filename(profile.get("displayName") or profile["chatName"]) + ".md"
46
+ write_text_atomic(out_dir / filename, note)
47
+ write_text_atomic(legacy_out_dir / filename, note)
48
+ write_text_atomic(drafts_dir / filename, note)
38
49
  models.append(model)
39
50
 
40
- (whatsapp / "Analysis" / "interpreted_relationship_models.json").write_text(json.dumps(models, indent=2, ensure_ascii=False), encoding="utf-8")
51
+ write_json_atomic(analysis / "interpreted_relationship_models.json", models)
52
+ write_json_atomic(whatsapp / "Analysis" / "interpreted_relationship_models.json", models)
41
53
  write_index(vault / "06 AI Memory" / "Interpreted Relationship Memory.md", models)
54
+ write_person_reply_index(vault / "06 AI Memory" / "Person Reply Context.md", people, models)
42
55
  print(f"Wrote interpreted notes: {len(models)}")
43
56
 
44
57
 
@@ -130,16 +143,17 @@ def infer_boundaries(role, difficulty):
130
143
 
131
144
 
132
145
  def render_note(model):
133
- return f"""# {model['chatName']}
146
+ return f"""# {model.get('displayName') or model['chatName']}
134
147
 
135
148
  Generated: {datetime.now(timezone.utc).isoformat()}
149
+ Source: {model.get('sourceSystem', 'Unknown')}
136
150
  Role: {model['role']}
137
151
  Role confidence: {model['roleConfidence']}
138
152
  Closeness: {model['closeness']}
139
153
  Conversation difficulty: {model['conversationDifficulty']}
140
154
  Typing style: {model['typingStyle'].get('signature', 'unknown')}
141
155
 
142
- These are private working notes. Edit them where wrong.
156
+ Generated draft, not truth. These are private working notes. Edit them where wrong.
143
157
 
144
158
  ## Role / Relationship Label
145
159
  - {model['role']} ({model['roleConfidence']} confidence).
@@ -173,8 +187,62 @@ def write_index(path, models):
173
187
  lines = ["# Interpreted Relationship Memory", "", "Generated working notes. Treat as editable, not truth.", ""]
174
188
  for model in models:
175
189
  style = model.get("typingStyle", {}).get("signature", "unknown style")
176
- lines.append(f"- [[{safe_filename(model['chatName'])}]]: {model['role']} ({model['roleConfidence']}), closeness {model['closeness']}, difficulty {model['conversationDifficulty']}, style {style}")
177
- path.write_text("\n".join(lines) + "\n", encoding="utf-8")
190
+ display = model.get("displayName") or model["chatName"]
191
+ lines.append(f"- [[{safe_filename(display)}]]: {model['role']} ({model['roleConfidence']}), closeness {model['closeness']}, difficulty {model['conversationDifficulty']}, style {style}")
192
+ write_text_atomic(path, "\n".join(lines) + "\n")
193
+
194
+
195
+ def write_person_reply_index(path, people, models):
196
+ path.parent.mkdir(parents=True, exist_ok=True)
197
+ models_by_key = {}
198
+ for model in models:
199
+ key = model.get("canonicalPersonKey")
200
+ if key:
201
+ models_by_key.setdefault(key, []).append(model)
202
+ if not people:
203
+ people = synthesize_people(models_by_key)
204
+ lines = [
205
+ "# Person Reply Context",
206
+ "",
207
+ "Use this first when responding to a specific person. It merges confirmed-looking matches across sources while keeping each source visible.",
208
+ "",
209
+ ]
210
+ for person in people:
211
+ key = person.get("canonicalPersonKey")
212
+ linked_models = sorted(models_by_key.get(key, []), key=lambda model: (model["messageCount"], model["lastSeen"]), reverse=True)
213
+ if not linked_models:
214
+ continue
215
+ lines.extend([
216
+ f"## {person.get('displayName') or linked_models[0].get('identityName') or linked_models[0]['chatName']}",
217
+ "",
218
+ f"- Canonical key: `{key}`",
219
+ f"- Aliases: {', '.join(person.get('aliases') or [])}",
220
+ f"- Sources: {', '.join(sorted({model['sourceSystem'] for model in linked_models}))}",
221
+ f"- Total messages: {sum(model['messageCount'] for model in linked_models)}",
222
+ "- Source-specific guidance:",
223
+ ])
224
+ for model in linked_models:
225
+ lines.extend([
226
+ f" - {model['sourceSystem']} / {model['chatName']}: {model['role']} ({model['roleConfidence']}), closeness {model['closeness']}, difficulty {model['conversationDifficulty']}.",
227
+ f" Style: {model.get('typingStyle', {}).get('signature', 'unknown')}.",
228
+ f" Reply: {' '.join(model.get('replyStyle', [])[:2])}",
229
+ ])
230
+ lines.append("")
231
+ write_text_atomic(path, "\n".join(lines) + "\n")
232
+
233
+
234
+ def synthesize_people(models_by_key):
235
+ people = []
236
+ for key, models in models_by_key.items():
237
+ if key.startswith("group::"):
238
+ continue
239
+ names = [model.get("identityName") or model["chatName"] for model in models]
240
+ people.append({
241
+ "canonicalPersonKey": key,
242
+ "displayName": names[0],
243
+ "aliases": sorted(set(names)),
244
+ })
245
+ return people
178
246
 
179
247
 
180
248
  def bullets(items):
@@ -229,6 +297,27 @@ def load_json(path, fallback):
229
297
  return json.loads(path.read_text(encoding="utf-8"))
230
298
 
231
299
 
300
+ def write_json_atomic(path, data):
301
+ write_text_atomic(path, json.dumps(data, indent=2, ensure_ascii=False))
302
+
303
+
304
+ def write_text_atomic(path, content):
305
+ temp = path.with_name(f"{path.name}.{time.time_ns()}.tmp")
306
+ temp.write_text(content, encoding="utf-8")
307
+ temp.replace(path)
308
+
309
+
310
+ def load_overrides(sources):
311
+ merged = {}
312
+ for path in sorted(sources.glob("*/relationship_overrides.json")):
313
+ merged.update(load_json(path, {}))
314
+ return merged
315
+
316
+
317
+ def profile_key(profile):
318
+ return f"{profile.get('sourceSystem') or 'Unknown'}::{profile.get('chatName') or 'Unknown Chat'}"
319
+
320
+
232
321
  def parse_args():
233
322
  parser = argparse.ArgumentParser()
234
323
  parser.add_argument("--vault", type=Path, required=True)
@@ -0,0 +1,181 @@
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import json
4
+ import tempfile
5
+ import zipfile
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+
9
+
10
+ def main():
11
+ args = parse_args()
12
+ with unpack(args.input) as source:
13
+ import_export(args.vault.resolve(), source, args.days)
14
+
15
+
16
+ def import_export(vault, source, days):
17
+ slack = vault / "08 Sources" / "Slack"
18
+ raw_dir = slack / "Raw"
19
+ chats_dir = slack / "ChatsByMonth"
20
+ state_dir = slack / ".sync-state"
21
+ for directory in (raw_dir, chats_dir, state_dir):
22
+ directory.mkdir(parents=True, exist_ok=True)
23
+
24
+ users = load_users(source / "users.json")
25
+ conversations = load_conversations(source)
26
+ seen_path = state_dir / "slack-seen-message-ids.json"
27
+ seen = load_seen(seen_path)
28
+ cutoff = datetime.now(timezone.utc).timestamp() - days * 24 * 60 * 60 if days else None
29
+ added = 0
30
+
31
+ for folder in sorted(path for path in source.iterdir() if path.is_dir()):
32
+ conversation = conversations.get(folder.name, {"name": folder.name, "is_group": True})
33
+ chat_name = conversation.get("name") or folder.name
34
+ is_group = conversation.get("is_group", True)
35
+ for file in sorted(folder.glob("*.json")):
36
+ for item in load_json(file, []):
37
+ if "ts" not in item or not item.get("text"):
38
+ continue
39
+ timestamp = float(item["ts"])
40
+ if cutoff and timestamp < cutoff:
41
+ continue
42
+ record = slack_record(item, users, chat_name, is_group, folder.name)
43
+ if record["id"] in seen:
44
+ continue
45
+ append_jsonl(raw_dir, record)
46
+ append_markdown(chats_dir, record)
47
+ seen.add(record["id"])
48
+ added += 1
49
+
50
+ save_seen(seen_path, seen)
51
+ print(f"Imported {added} Slack messages.")
52
+
53
+
54
+ def slack_record(item, users, chat_name, is_group, conversation_id):
55
+ user_id = item.get("user") or item.get("bot_id") or "unknown"
56
+ author = users.get(user_id, user_id)
57
+ timestamp = datetime.fromtimestamp(float(item["ts"]), tz=timezone.utc).isoformat()
58
+ return {
59
+ "id": f"slack-{conversation_id}-{item['ts']}",
60
+ "source": "Slack export",
61
+ "sourceSystem": "Slack",
62
+ "timestamp": timestamp,
63
+ "chatName": chat_name,
64
+ "chatId": conversation_id,
65
+ "isGroup": is_group,
66
+ "fromMe": False,
67
+ "author": author,
68
+ "authorId": user_id,
69
+ "body": item.get("text") or "",
70
+ }
71
+
72
+
73
+ def load_users(path):
74
+ users = {}
75
+ for item in load_json(path, []):
76
+ user_id = item.get("id")
77
+ if not user_id:
78
+ continue
79
+ profile = item.get("profile") or {}
80
+ users[user_id] = profile.get("real_name") or profile.get("display_name") or item.get("name") or user_id
81
+ return users
82
+
83
+
84
+ def load_conversations(source):
85
+ conversations = {}
86
+ for filename in ("channels.json", "groups.json", "dms.json", "mpims.json"):
87
+ for item in load_json(source / filename, []):
88
+ conversation_id = item.get("id") or item.get("name")
89
+ if not conversation_id:
90
+ continue
91
+ conversations[conversation_id] = {
92
+ "name": item.get("name") or item.get("name_normalized") or conversation_id,
93
+ "is_group": not filename == "dms.json",
94
+ }
95
+ return conversations
96
+
97
+
98
+ def append_jsonl(raw_dir, record):
99
+ with (raw_dir / f"{record['timestamp'][:10]}.jsonl").open("a", encoding="utf-8") as f:
100
+ f.write(json.dumps(record, ensure_ascii=False) + "\n")
101
+
102
+
103
+ def append_markdown(chats_dir, record):
104
+ directory = chats_dir / record["timestamp"][:7]
105
+ directory.mkdir(parents=True, exist_ok=True)
106
+ file_path = directory / f"{safe_filename(record['chatName'])}.md"
107
+ if not file_path.exists():
108
+ file_path.write_text(f"# {record['chatName']}\n\nSynced from Slack export.\n\n", encoding="utf-8")
109
+ body = " ".join((record.get("body") or "").split())
110
+ with file_path.open("a", encoding="utf-8") as f:
111
+ f.write(f"- {record['timestamp']} | {record['author']}: {body}\n")
112
+
113
+
114
+ def load_json(path, fallback):
115
+ if not path.exists():
116
+ return fallback
117
+ return json.loads(path.read_text(encoding="utf-8"))
118
+
119
+
120
+ def load_seen(path):
121
+ if not path.exists():
122
+ return set()
123
+ try:
124
+ return set(json.loads(path.read_text(encoding="utf-8")))
125
+ except Exception:
126
+ return set()
127
+
128
+
129
+ def save_seen(path, seen):
130
+ path.write_text(json.dumps(sorted(seen), indent=2), encoding="utf-8")
131
+
132
+
133
+ def safe_filename(value):
134
+ cleaned = "".join("-" if char in '/:\\?%*"<>|' else char for char in value)
135
+ return (" ".join(cleaned.split()).strip() or "Unknown Chat")[:120]
136
+
137
+
138
+ def unpack(input_path):
139
+ input_path = input_path.resolve()
140
+ if input_path.is_dir():
141
+ return NullContext(input_path)
142
+ temp = tempfile.TemporaryDirectory()
143
+ with zipfile.ZipFile(input_path) as archive:
144
+ archive.extractall(temp.name)
145
+ return TempContext(Path(temp.name), temp)
146
+
147
+
148
+ class NullContext:
149
+ def __init__(self, path):
150
+ self.path = path
151
+
152
+ def __enter__(self):
153
+ return self.path
154
+
155
+ def __exit__(self, *_):
156
+ return False
157
+
158
+
159
+ class TempContext:
160
+ def __init__(self, path, temp):
161
+ self.path = path
162
+ self.temp = temp
163
+
164
+ def __enter__(self):
165
+ return self.path
166
+
167
+ def __exit__(self, *_):
168
+ self.temp.cleanup()
169
+ return False
170
+
171
+
172
+ def parse_args():
173
+ parser = argparse.ArgumentParser()
174
+ parser.add_argument("--vault", type=Path, required=True)
175
+ parser.add_argument("--input", type=Path, required=True)
176
+ parser.add_argument("--days", type=int, default=365)
177
+ return parser.parse_args()
178
+
179
+
180
+ if __name__ == "__main__":
181
+ main()