@forwardimpact/basecamp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +229 -0
- package/build.js +124 -0
- package/config/scheduler.json +28 -0
- package/package.json +37 -0
- package/scheduler.js +552 -0
- package/scripts/build-pkg.sh +117 -0
- package/scripts/compile.sh +26 -0
- package/scripts/install.sh +108 -0
- package/scripts/pkg-resources/conclusion.html +62 -0
- package/scripts/pkg-resources/welcome.html +64 -0
- package/scripts/postinstall +46 -0
- package/scripts/uninstall.sh +56 -0
- package/template/.claude/settings.json +40 -0
- package/template/.claude/skills/create-presentations/SKILL.md +75 -0
- package/template/.claude/skills/create-presentations/references/slide.css +35 -0
- package/template/.claude/skills/create-presentations/scripts/convert-to-pdf.js +32 -0
- package/template/.claude/skills/doc-collab/SKILL.md +112 -0
- package/template/.claude/skills/draft-emails/SKILL.md +191 -0
- package/template/.claude/skills/draft-emails/scripts/scan-emails.sh +33 -0
- package/template/.claude/skills/extract-entities/SKILL.md +466 -0
- package/template/.claude/skills/extract-entities/references/TEMPLATES.md +131 -0
- package/template/.claude/skills/extract-entities/scripts/state.py +100 -0
- package/template/.claude/skills/meeting-prep/SKILL.md +135 -0
- package/template/.claude/skills/organize-files/SKILL.md +146 -0
- package/template/.claude/skills/organize-files/scripts/organize-by-type.sh +42 -0
- package/template/.claude/skills/organize-files/scripts/summarize.sh +21 -0
- package/template/.claude/skills/sync-apple-calendar/SKILL.md +101 -0
- package/template/.claude/skills/sync-apple-calendar/references/SCHEMA.md +80 -0
- package/template/.claude/skills/sync-apple-calendar/scripts/sync.py +233 -0
- package/template/.claude/skills/sync-apple-mail/SKILL.md +131 -0
- package/template/.claude/skills/sync-apple-mail/references/SCHEMA.md +88 -0
- package/template/.claude/skills/sync-apple-mail/scripts/parse-emlx.py +104 -0
- package/template/.claude/skills/sync-apple-mail/scripts/sync.py +348 -0
- package/template/CLAUDE.md +152 -0
- package/template/USER.md +5 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# Apple Calendar Database Schema
|
|
2
|
+
|
|
3
|
+
The Apple Calendar SQLite database uses Core Data. Key tables and their actual
|
|
4
|
+
column names (verified on macOS Sonoma+).
|
|
5
|
+
|
|
6
|
+
## CalendarItem (events and reminders)
|
|
7
|
+
|
|
8
|
+
| Column | Type | Notes |
|
|
9
|
+
| ---------------- | ------- | ---------------------------------------------------------- |
|
|
10
|
+
| `ROWID` | INTEGER | Primary key |
|
|
11
|
+
| `summary` | TEXT | Event title |
|
|
12
|
+
| `start_date` | REAL | Core Data timestamp (seconds since 2001-01-01 UTC) |
|
|
13
|
+
| `end_date` | REAL | Core Data timestamp (null for all-day events) |
|
|
14
|
+
| `start_tz` | TEXT | IANA timezone (e.g., `Europe/Paris`), `_float` for all-day |
|
|
15
|
+
| `end_tz` | TEXT | IANA timezone, `_float` for all-day |
|
|
16
|
+
| `all_day` | INTEGER | 1 = all-day event |
|
|
17
|
+
| `location_id` | INTEGER | FK → Location.ROWID |
|
|
18
|
+
| `description` | TEXT | Event notes/body |
|
|
19
|
+
| `organizer_id` | INTEGER | FK → Identity.ROWID |
|
|
20
|
+
| `calendar_id` | INTEGER | FK → Calendar.ROWID |
|
|
21
|
+
| `has_attendees` | INTEGER | 1 = event has attendees |
|
|
22
|
+
| `conference_url` | TEXT | Video call URL (often null — check description too) |
|
|
23
|
+
| `entity_type` | INTEGER | 2 = calendar event |
|
|
24
|
+
|
|
25
|
+
## Identity (organizer info)
|
|
26
|
+
|
|
27
|
+
| Column | Type | Notes |
|
|
28
|
+
| -------------- | ---- | ------------------------------------------------------------- |
|
|
29
|
+
| `display_name` | TEXT | Full name (e.g., `"Chen, Sarah"`) |
|
|
30
|
+
| `address` | TEXT | Email with `mailto:` prefix (e.g., `"mailto:sarah@acme.com"`) |
|
|
31
|
+
| `first_name` | TEXT | Usually null — `display_name` is the reliable field |
|
|
32
|
+
| `last_name` | TEXT | Usually null — `display_name` is the reliable field |
|
|
33
|
+
|
|
34
|
+
**IMPORTANT:** Identity does NOT have an `email` column. Use `address` and strip
|
|
35
|
+
the `mailto:` prefix. Use `display_name` for the name (not
|
|
36
|
+
`first_name`/`last_name`, which are typically null).
|
|
37
|
+
|
|
38
|
+
## Participant (attendees and organizer)
|
|
39
|
+
|
|
40
|
+
| Column | Type | Notes |
|
|
41
|
+
| ------------- | ------- | -------------------------------------------------- |
|
|
42
|
+
| `ROWID` | INTEGER | Primary key |
|
|
43
|
+
| `entity_type` | INTEGER | 7 = attendee, 8 = organizer |
|
|
44
|
+
| `owner_id` | INTEGER | FK → CalendarItem.ROWID |
|
|
45
|
+
| `identity_id` | INTEGER | FK → Identity.ROWID (for display_name lookup) |
|
|
46
|
+
| `email` | TEXT | Email address (no `mailto:` prefix) |
|
|
47
|
+
| `status` | INTEGER | EKParticipantStatus (see mapping below) |
|
|
48
|
+
| `role` | INTEGER | 0 = unknown, 1 = required, 2 = optional, 3 = chair |
|
|
49
|
+
| `is_self` | INTEGER | 1 = this is the calendar owner |
|
|
50
|
+
|
|
51
|
+
**IMPORTANT:** Participant does NOT have `display_name`, `first_name`, or
|
|
52
|
+
`last_name` columns. To get the attendee's name, JOIN with Identity via
|
|
53
|
+
`identity_id`. There is NO `Attendee` table — only use `Participant`.
|
|
54
|
+
|
|
55
|
+
### EKParticipantStatus mapping
|
|
56
|
+
|
|
57
|
+
| Value | Status |
|
|
58
|
+
| ----- | ---------- |
|
|
59
|
+
| 0 | unknown |
|
|
60
|
+
| 1 | pending |
|
|
61
|
+
| 2 | accepted |
|
|
62
|
+
| 3 | declined |
|
|
63
|
+
| 4 | tentative |
|
|
64
|
+
| 5 | delegated |
|
|
65
|
+
| 6 | completed |
|
|
66
|
+
| 7 | in-process |
|
|
67
|
+
|
|
68
|
+
## Calendar (calendar metadata)
|
|
69
|
+
|
|
70
|
+
| Column | Type | Notes |
|
|
71
|
+
| ------- | ------- | ------------- |
|
|
72
|
+
| `ROWID` | INTEGER | Primary key |
|
|
73
|
+
| `title` | TEXT | Calendar name |
|
|
74
|
+
|
|
75
|
+
## Location
|
|
76
|
+
|
|
77
|
+
| Column | Type | Notes |
|
|
78
|
+
| ------- | ------- | --------------- |
|
|
79
|
+
| `ROWID` | INTEGER | Primary key |
|
|
80
|
+
| `title` | TEXT | Location string |
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Sync Apple Calendar events to ~/.cache/fit/basecamp/apple_calendar/ as JSON.
|
|
3
|
+
|
|
4
|
+
Queries the macOS Calendar SQLite database for events in a 14-day sliding
|
|
5
|
+
window (past and future) and writes one JSON file per event.
|
|
6
|
+
|
|
7
|
+
Usage: python3 scripts/sync.py
|
|
8
|
+
|
|
9
|
+
Requires: macOS with Calendar app configured and Full Disk Access granted.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import subprocess
|
|
15
|
+
from datetime import datetime, timezone, timedelta
|
|
16
|
+
|
|
17
|
+
EPOCH = datetime(2001, 1, 1, tzinfo=timezone.utc)
|
|
18
|
+
OUTDIR = os.path.expanduser("~/.cache/fit/basecamp/apple_calendar")
|
|
19
|
+
|
|
20
|
+
DB_PATHS = [
|
|
21
|
+
os.path.expanduser(
|
|
22
|
+
"~/Library/Group Containers/group.com.apple.calendar/Calendar.sqlitedb"
|
|
23
|
+
),
|
|
24
|
+
os.path.expanduser("~/Library/Calendars/Calendar.sqlitedb"),
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
STATUS_MAP = {
|
|
28
|
+
0: "unknown",
|
|
29
|
+
1: "pending",
|
|
30
|
+
2: "accepted",
|
|
31
|
+
3: "declined",
|
|
32
|
+
4: "tentative",
|
|
33
|
+
5: "delegated",
|
|
34
|
+
6: "completed",
|
|
35
|
+
7: "in-process",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
ROLE_MAP = {0: "unknown", 1: "required", 2: "optional", 3: "chair"}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def find_db():
|
|
42
|
+
db = next((p for p in DB_PATHS if os.path.exists(p)), None)
|
|
43
|
+
if not db:
|
|
44
|
+
print("Error: Apple Calendar database not found. Is Calendar configured?")
|
|
45
|
+
exit(1)
|
|
46
|
+
return db
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def query(db, sql):
|
|
50
|
+
result = subprocess.run(
|
|
51
|
+
["sqlite3", "-readonly", "-json", db, sql], capture_output=True, text=True
|
|
52
|
+
)
|
|
53
|
+
if result.returncode != 0:
|
|
54
|
+
if "database is locked" in result.stderr:
|
|
55
|
+
import time
|
|
56
|
+
|
|
57
|
+
time.sleep(2)
|
|
58
|
+
result = subprocess.run(
|
|
59
|
+
["sqlite3", "-readonly", "-json", db, sql],
|
|
60
|
+
capture_output=True,
|
|
61
|
+
text=True,
|
|
62
|
+
)
|
|
63
|
+
if result.returncode != 0:
|
|
64
|
+
print(f"SQLite error: {result.stderr.strip()}")
|
|
65
|
+
return []
|
|
66
|
+
return json.loads(result.stdout) if result.stdout.strip() else []
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def coredata_to_iso(ts, tz_name=None):
|
|
70
|
+
"""Convert Core Data timestamp to ISO 8601."""
|
|
71
|
+
if ts is None:
|
|
72
|
+
return None
|
|
73
|
+
dt = EPOCH + timedelta(seconds=ts)
|
|
74
|
+
if tz_name and tz_name != "_float":
|
|
75
|
+
try:
|
|
76
|
+
from zoneinfo import ZoneInfo
|
|
77
|
+
|
|
78
|
+
dt = dt.astimezone(ZoneInfo(tz_name))
|
|
79
|
+
except Exception:
|
|
80
|
+
pass
|
|
81
|
+
return dt.isoformat()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def main():
|
|
85
|
+
db = find_db()
|
|
86
|
+
os.makedirs(OUTDIR, exist_ok=True)
|
|
87
|
+
|
|
88
|
+
now = datetime.now(timezone.utc)
|
|
89
|
+
start = now - timedelta(days=14)
|
|
90
|
+
end = now + timedelta(days=14)
|
|
91
|
+
START_TS = (start - EPOCH).total_seconds()
|
|
92
|
+
END_TS = (end - EPOCH).total_seconds()
|
|
93
|
+
|
|
94
|
+
# Fetch events with a single query
|
|
95
|
+
events = query(
|
|
96
|
+
db,
|
|
97
|
+
f"""
|
|
98
|
+
SELECT
|
|
99
|
+
ci.ROWID AS id,
|
|
100
|
+
ci.summary,
|
|
101
|
+
ci.start_date,
|
|
102
|
+
ci.end_date,
|
|
103
|
+
ci.start_tz,
|
|
104
|
+
ci.end_tz,
|
|
105
|
+
ci.all_day,
|
|
106
|
+
ci.description,
|
|
107
|
+
ci.has_attendees,
|
|
108
|
+
ci.conference_url,
|
|
109
|
+
loc.title AS location,
|
|
110
|
+
cal.title AS calendar_name,
|
|
111
|
+
org.address AS organizer_email,
|
|
112
|
+
org.display_name AS organizer_name
|
|
113
|
+
FROM CalendarItem ci
|
|
114
|
+
LEFT JOIN Location loc ON loc.ROWID = ci.location_id
|
|
115
|
+
LEFT JOIN Calendar cal ON cal.ROWID = ci.calendar_id
|
|
116
|
+
LEFT JOIN Identity org ON org.ROWID = ci.organizer_id
|
|
117
|
+
WHERE ci.start_date <= {END_TS}
|
|
118
|
+
AND COALESCE(ci.end_date, ci.start_date) >= {START_TS}
|
|
119
|
+
AND ci.summary IS NOT NULL
|
|
120
|
+
AND ci.summary != ''
|
|
121
|
+
ORDER BY ci.start_date ASC
|
|
122
|
+
LIMIT 1000;
|
|
123
|
+
""",
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Collect event IDs for batch attendee query
|
|
127
|
+
event_ids = [str(ev["id"]) for ev in events]
|
|
128
|
+
|
|
129
|
+
# Batch-fetch all attendees in one query (avoids N+1)
|
|
130
|
+
attendees_by_event = {}
|
|
131
|
+
if event_ids:
|
|
132
|
+
id_list = ",".join(event_ids)
|
|
133
|
+
attendees_raw = query(
|
|
134
|
+
db,
|
|
135
|
+
f"""
|
|
136
|
+
SELECT
|
|
137
|
+
p.owner_id,
|
|
138
|
+
p.email,
|
|
139
|
+
p.status,
|
|
140
|
+
p.role,
|
|
141
|
+
p.is_self,
|
|
142
|
+
p.entity_type,
|
|
143
|
+
i.display_name
|
|
144
|
+
FROM Participant p
|
|
145
|
+
LEFT JOIN Identity i ON i.ROWID = p.identity_id
|
|
146
|
+
WHERE p.owner_id IN ({id_list})
|
|
147
|
+
AND p.entity_type = 7;
|
|
148
|
+
""",
|
|
149
|
+
)
|
|
150
|
+
for a in attendees_raw:
|
|
151
|
+
oid = a["owner_id"]
|
|
152
|
+
attendees_by_event.setdefault(oid, []).append(a)
|
|
153
|
+
|
|
154
|
+
# Write event JSON files
|
|
155
|
+
written_ids = set()
|
|
156
|
+
for ev in events:
|
|
157
|
+
eid = ev["id"]
|
|
158
|
+
|
|
159
|
+
# Organizer — strip mailto: prefix from Identity.address
|
|
160
|
+
org_email = ev.get("organizer_email") or None
|
|
161
|
+
if org_email and org_email.startswith("mailto:"):
|
|
162
|
+
org_email = org_email[7:]
|
|
163
|
+
|
|
164
|
+
# Attendees
|
|
165
|
+
attendees = []
|
|
166
|
+
for a in attendees_by_event.get(eid, []):
|
|
167
|
+
if not a.get("email"):
|
|
168
|
+
continue
|
|
169
|
+
attendees.append(
|
|
170
|
+
{
|
|
171
|
+
"email": a["email"],
|
|
172
|
+
"name": (a.get("display_name") or "").strip() or None,
|
|
173
|
+
"status": STATUS_MAP.get(a.get("status"), "unknown"),
|
|
174
|
+
"role": ROLE_MAP.get(a.get("role"), "unknown"),
|
|
175
|
+
"self": bool(a.get("is_self")),
|
|
176
|
+
}
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
is_all_day = bool(ev.get("all_day"))
|
|
180
|
+
|
|
181
|
+
event_json = {
|
|
182
|
+
"id": f"apple_cal_{eid}",
|
|
183
|
+
"summary": ev["summary"],
|
|
184
|
+
"start": {
|
|
185
|
+
"dateTime": coredata_to_iso(ev["start_date"], ev.get("start_tz")),
|
|
186
|
+
"timeZone": ev.get("start_tz")
|
|
187
|
+
if ev.get("start_tz") != "_float"
|
|
188
|
+
else None,
|
|
189
|
+
},
|
|
190
|
+
"end": {
|
|
191
|
+
"dateTime": coredata_to_iso(
|
|
192
|
+
ev["end_date"] if ev["end_date"] else ev["start_date"],
|
|
193
|
+
ev.get("end_tz"),
|
|
194
|
+
),
|
|
195
|
+
"timeZone": ev.get("end_tz")
|
|
196
|
+
if ev.get("end_tz") != "_float"
|
|
197
|
+
else None,
|
|
198
|
+
},
|
|
199
|
+
"allDay": is_all_day,
|
|
200
|
+
"location": ev.get("location") or None,
|
|
201
|
+
"description": ev.get("description") or None,
|
|
202
|
+
"conferenceUrl": ev.get("conference_url") or None,
|
|
203
|
+
"calendar": ev.get("calendar_name") or None,
|
|
204
|
+
"organizer": {
|
|
205
|
+
"email": org_email,
|
|
206
|
+
"name": (ev.get("organizer_name") or "").strip() or None,
|
|
207
|
+
}
|
|
208
|
+
if org_email
|
|
209
|
+
else None,
|
|
210
|
+
"attendees": attendees if attendees else None,
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
filepath = os.path.join(OUTDIR, f"{eid}.json")
|
|
214
|
+
with open(filepath, "w") as f:
|
|
215
|
+
json.dump(event_json, f, indent=2)
|
|
216
|
+
written_ids.add(f"{eid}.json")
|
|
217
|
+
|
|
218
|
+
# Clean up events outside the window
|
|
219
|
+
removed = 0
|
|
220
|
+
for fname in os.listdir(OUTDIR):
|
|
221
|
+
if fname.endswith(".json") and fname not in written_ids:
|
|
222
|
+
os.remove(os.path.join(OUTDIR, fname))
|
|
223
|
+
removed += 1
|
|
224
|
+
|
|
225
|
+
print(f"Apple Calendar Sync Complete")
|
|
226
|
+
print(f"Events synced: {len(written_ids)}")
|
|
227
|
+
print(f"Time window: {start.date()} to {end.date()}")
|
|
228
|
+
print(f"Files cleaned up: {removed} (outside window)")
|
|
229
|
+
print(f"Output: {OUTDIR}")
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
if __name__ == "__main__":
|
|
233
|
+
main()
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: sync-apple-mail
|
|
3
|
+
description: Sync email threads from the macOS Mail app's local SQLite database into ~/.cache/fit/basecamp/apple_mail/ as markdown files. Use on a schedule or when the user asks to sync their email. Requires macOS with Mail app configured and Full Disk Access granted.
|
|
4
|
+
compatibility: Requires macOS with Apple Mail configured and Full Disk Access granted to the terminal
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Sync Apple Mail
|
|
8
|
+
|
|
9
|
+
Sync email threads from the macOS Mail app's local SQLite database into
|
|
10
|
+
`~/.cache/fit/basecamp/apple_mail/` as markdown files. This is an automated data
|
|
11
|
+
pipeline skill — it ingests raw email data that other skills (like
|
|
12
|
+
`extract-entities`) consume downstream.
|
|
13
|
+
|
|
14
|
+
## Trigger
|
|
15
|
+
|
|
16
|
+
Run this skill on a schedule (every 5 minutes) or when the user asks to sync
|
|
17
|
+
their email.
|
|
18
|
+
|
|
19
|
+
## Prerequisites
|
|
20
|
+
|
|
21
|
+
- macOS with the built-in Mail app configured
|
|
22
|
+
- Full Disk Access granted to the terminal (System Settings → Privacy & Security
|
|
23
|
+
→ Full Disk Access)
|
|
24
|
+
|
|
25
|
+
## Inputs
|
|
26
|
+
|
|
27
|
+
- `~/.cache/fit/basecamp/state/apple_mail_last_sync` — last sync timestamp
|
|
28
|
+
(single-line text file)
|
|
29
|
+
- `~/Library/Mail/V*/MailData/Envelope Index` — Apple Mail SQLite database
|
|
30
|
+
|
|
31
|
+
## Outputs
|
|
32
|
+
|
|
33
|
+
- `~/.cache/fit/basecamp/apple_mail/{thread_id}.md` — one markdown file per
|
|
34
|
+
email thread
|
|
35
|
+
- `~/.cache/fit/basecamp/state/apple_mail_last_sync` — updated with new sync
|
|
36
|
+
timestamp
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Implementation
|
|
41
|
+
|
|
42
|
+
Run the sync as a single Python script. This avoids N+1 shell invocations and
|
|
43
|
+
handles all data transformation in one pass:
|
|
44
|
+
|
|
45
|
+
python3 scripts/sync.py
|
|
46
|
+
|
|
47
|
+
The script:
|
|
48
|
+
|
|
49
|
+
1. Finds the Mail database (`~/Library/Mail/V*/MailData/Envelope Index`)
|
|
50
|
+
2. Loads last sync timestamp (or defaults to 30 days ago for first sync)
|
|
51
|
+
3. Discovers the thread grouping column (`conversation_id` or `thread_id`)
|
|
52
|
+
4. Finds threads with new messages since last sync (up to 500)
|
|
53
|
+
5. For each thread: fetches messages, batch-fetches recipients, parses `.emlx`
|
|
54
|
+
files for full email bodies (falling back to database summaries)
|
|
55
|
+
6. Writes one markdown file per thread to `~/.cache/fit/basecamp/apple_mail/`
|
|
56
|
+
7. Updates sync state timestamp
|
|
57
|
+
8. Reports summary (threads processed, files written)
|
|
58
|
+
|
|
59
|
+
The script calls `scripts/parse-emlx.py` to extract plain text bodies from
|
|
60
|
+
`.emlx` / `.partial.emlx` files (handles HTML-only emails by stripping tags).
|
|
61
|
+
|
|
62
|
+
## Database Schema
|
|
63
|
+
|
|
64
|
+
See [references/SCHEMA.md](references/SCHEMA.md) for the complete Apple Mail
|
|
65
|
+
SQLite schema including table structures, column names, and important caveats
|
|
66
|
+
(e.g., `date_received` is Unix timestamps not Core Data, `addresses.comment`
|
|
67
|
+
holds display names, `recipients` columns are `message`/`address` not
|
|
68
|
+
`message_id`/`address_id`).
|
|
69
|
+
|
|
70
|
+
## Output Format
|
|
71
|
+
|
|
72
|
+
Each `{thread_id}.md` file:
|
|
73
|
+
|
|
74
|
+
```markdown
|
|
75
|
+
# {Base Subject}
|
|
76
|
+
|
|
77
|
+
**Thread ID:** {thread_id}
|
|
78
|
+
**Message Count:** {count}
|
|
79
|
+
**Flags:** mailing-list, automated
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
### From: {sender_name} <{sender_email}>
|
|
84
|
+
**Date:** {YYYY-MM-DD HH:MM:SS UTC}
|
|
85
|
+
**To:** {name} <{email}>, {name2} <{email2}>
|
|
86
|
+
**Cc:** {name} <{email}>
|
|
87
|
+
|
|
88
|
+
{email_body_or_summary}
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
### From: {next_sender_name} <{next_sender_email}>
|
|
93
|
+
**Date:** {next_date}
|
|
94
|
+
**To:** ...
|
|
95
|
+
**Cc:** ...
|
|
96
|
+
|
|
97
|
+
{next_body}
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Rules:
|
|
101
|
+
|
|
102
|
+
- Use the **base subject** (from `subject` column, without `subject_prefix`) as
|
|
103
|
+
the `# heading`.
|
|
104
|
+
- **Flags line** — only include when at least one flag is set:
|
|
105
|
+
- `mailing-list` if any message in the thread has `list_id_hash != 0`
|
|
106
|
+
- `automated` if any message has `automated_conversation = 1`
|
|
107
|
+
- Omit the `**Flags:**` line entirely if neither flag applies.
|
|
108
|
+
- **Sender** — format as `{sender_name} <{sender_email}>` when display name is
|
|
109
|
+
present, otherwise just `{sender_email}`.
|
|
110
|
+
- **To/Cc** — include per-message. Format each recipient as `{name} <{email}>`
|
|
111
|
+
when name exists, otherwise just `{email}`. Omit the line if that field has no
|
|
112
|
+
recipients.
|
|
113
|
+
|
|
114
|
+
## Error Handling
|
|
115
|
+
|
|
116
|
+
- Database not found → Mail not configured, report and stop
|
|
117
|
+
- Permission denied → Full Disk Access not granted, report and stop
|
|
118
|
+
- Database locked → wait 2 seconds, retry once
|
|
119
|
+
- `.emlx` / `.partial.emlx` not found → fall back to database summary field
|
|
120
|
+
- `.emlx` parse error → fall back to database summary field
|
|
121
|
+
- HTML-only email → strip tags and use as plain text body (handled by
|
|
122
|
+
parse-emlx.py)
|
|
123
|
+
- `find` timeout → skip that message's body, use summary
|
|
124
|
+
- Always update sync state, even on partial success
|
|
125
|
+
|
|
126
|
+
## Constraints
|
|
127
|
+
|
|
128
|
+
- Open database read-only (`-readonly`)
|
|
129
|
+
- Only sync Inbox and Sent folders
|
|
130
|
+
- Limit to 500 threads per run
|
|
131
|
+
- Incremental: only threads with new messages since last sync
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# Apple Mail Database Schema
|
|
2
|
+
|
|
3
|
+
The Apple Mail SQLite database (`Envelope Index`) stores email metadata. Key
|
|
4
|
+
tables and their actual column names (verified on macOS Sequoia / V10).
|
|
5
|
+
|
|
6
|
+
Typical path: `~/Library/Mail/V10/MailData/Envelope Index`
|
|
7
|
+
|
|
8
|
+
## messages (email metadata)
|
|
9
|
+
|
|
10
|
+
| Column | Type | Notes |
|
|
11
|
+
| ------------------------ | ------- | -------------------------------------------------------- |
|
|
12
|
+
| `ROWID` | INTEGER | Primary key |
|
|
13
|
+
| `sender` | INTEGER | FK → addresses.ROWID |
|
|
14
|
+
| `subject` | INTEGER | FK → subjects.ROWID |
|
|
15
|
+
| `subject_prefix` | TEXT | `Re:`, `Fwd:`, etc. (directly on messages, not subjects) |
|
|
16
|
+
| `summary` | INTEGER | FK → summaries.ROWID |
|
|
17
|
+
| `date_sent` | INTEGER | Unix timestamp (seconds since 1970-01-01 UTC) |
|
|
18
|
+
| `date_received` | INTEGER | Unix timestamp (seconds since 1970-01-01 UTC) |
|
|
19
|
+
| `mailbox` | INTEGER | FK → mailboxes.ROWID |
|
|
20
|
+
| `deleted` | INTEGER | 1 = deleted |
|
|
21
|
+
| `conversation_id` | INTEGER | Thread grouping ID |
|
|
22
|
+
| `list_id_hash` | INTEGER | Non-zero = mailing list message |
|
|
23
|
+
| `automated_conversation` | INTEGER | 1 = automated/machine-generated |
|
|
24
|
+
| `read` | INTEGER | 1 = read |
|
|
25
|
+
| `flagged` | INTEGER | 1 = flagged |
|
|
26
|
+
|
|
27
|
+
**IMPORTANT:** `date_received` stores **Unix timestamps** (seconds since
|
|
28
|
+
1970-01-01 UTC), NOT Core Data timestamps (which use 2001-01-01 epoch). Do NOT
|
|
29
|
+
apply Core Data conversion.
|
|
30
|
+
|
|
31
|
+
## addresses (sender and recipient addresses)
|
|
32
|
+
|
|
33
|
+
| Column | Type | Notes |
|
|
34
|
+
| --------- | ------- | ------------------------------------- |
|
|
35
|
+
| `ROWID` | INTEGER | Primary key |
|
|
36
|
+
| `address` | TEXT | Email address |
|
|
37
|
+
| `comment` | TEXT | Display name (e.g., `"Olsson, Dick"`) |
|
|
38
|
+
|
|
39
|
+
**IMPORTANT:** The display name is in `comment`, not a `name` or `display_name`
|
|
40
|
+
column.
|
|
41
|
+
|
|
42
|
+
## subjects
|
|
43
|
+
|
|
44
|
+
| Column | Type | Notes |
|
|
45
|
+
| --------- | ------- | ----------------- |
|
|
46
|
+
| `ROWID` | INTEGER | Primary key |
|
|
47
|
+
| `subject` | TEXT | Base subject text |
|
|
48
|
+
|
|
49
|
+
Note: `subject_prefix` (Re:, Fwd:, etc.) is stored on the `messages` table
|
|
50
|
+
directly, not here.
|
|
51
|
+
|
|
52
|
+
## recipients (To/Cc/Bcc per message)
|
|
53
|
+
|
|
54
|
+
| Column | Type | Notes |
|
|
55
|
+
| ---------- | ------- | --------------------------- |
|
|
56
|
+
| `ROWID` | INTEGER | Primary key |
|
|
57
|
+
| `message` | INTEGER | FK → messages.ROWID |
|
|
58
|
+
| `address` | INTEGER | FK → addresses.ROWID |
|
|
59
|
+
| `type` | INTEGER | 0 = To, 1 = Cc, 2 = Bcc |
|
|
60
|
+
| `position` | INTEGER | Order within the type group |
|
|
61
|
+
|
|
62
|
+
**IMPORTANT:** Column names are `message` and `address` (not `message_id` or
|
|
63
|
+
`address_id`).
|
|
64
|
+
|
|
65
|
+
## summaries (Apple Intelligence email summaries)
|
|
66
|
+
|
|
67
|
+
| Column | Type | Notes |
|
|
68
|
+
| --------- | ------- | ------------ |
|
|
69
|
+
| `ROWID` | INTEGER | Primary key |
|
|
70
|
+
| `summary` | TEXT | Summary text |
|
|
71
|
+
|
|
72
|
+
## mailboxes
|
|
73
|
+
|
|
74
|
+
| Column | Type | Notes |
|
|
75
|
+
| ------- | ------- | -------------------------------- |
|
|
76
|
+
| `ROWID` | INTEGER | Primary key |
|
|
77
|
+
| `url` | TEXT | Mailbox URL (IMAP or EWS format) |
|
|
78
|
+
|
|
79
|
+
### Mailbox URL patterns
|
|
80
|
+
|
|
81
|
+
Standard IMAP: `imap://user@host/INBOX`, `imap://user@host/Sent Messages` EWS
|
|
82
|
+
(Exchange): `ews://UUID/Inbox`, `ews://UUID/Sent%20Items`
|
|
83
|
+
|
|
84
|
+
Use case-insensitive `LIKE` patterns to match both:
|
|
85
|
+
|
|
86
|
+
- `%/Inbox%` (catches IMAP `/INBOX` and EWS `/Inbox`)
|
|
87
|
+
- `%/INBOX%` (explicit uppercase match)
|
|
88
|
+
- `%/Sent%` (catches `Sent Messages`, `Sent Items`, `Sent%20Items`)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Parse a macOS Mail .emlx or .partial.emlx file and output the plain text body.
|
|
3
|
+
|
|
4
|
+
Usage: python3 scripts/parse-emlx.py <path-to-emlx-file>
|
|
5
|
+
|
|
6
|
+
The .emlx format is: first line = byte count, then RFC822 message, then Apple
|
|
7
|
+
plist. This script extracts and prints the plain text body.
|
|
8
|
+
|
|
9
|
+
If the email has no text/plain part (HTML-only), falls back to stripping HTML
|
|
10
|
+
tags and outputting as plain text.
|
|
11
|
+
|
|
12
|
+
Exit codes:
|
|
13
|
+
0 — success (body printed to stdout)
|
|
14
|
+
1 — file not found or parse error (message on stderr)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import email
|
|
18
|
+
import html as html_mod
|
|
19
|
+
import re
|
|
20
|
+
import sys
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def html_to_text(html):
|
|
24
|
+
"""Strip HTML tags and convert to plain text. Uses only stdlib."""
|
|
25
|
+
# Remove style and script blocks
|
|
26
|
+
text = re.sub(
|
|
27
|
+
r"<(style|script)[^>]*>.*?</\1>", "", html, flags=re.DOTALL | re.IGNORECASE
|
|
28
|
+
)
|
|
29
|
+
# Replace br and p tags with newlines
|
|
30
|
+
text = re.sub(r"<br\s*/?\s*>", "\n", text, flags=re.IGNORECASE)
|
|
31
|
+
text = re.sub(r"</p>", "\n", text, flags=re.IGNORECASE)
|
|
32
|
+
# Strip remaining tags
|
|
33
|
+
text = re.sub(r"<[^>]+>", "", text)
|
|
34
|
+
# Decode HTML entities
|
|
35
|
+
text = html_mod.unescape(text)
|
|
36
|
+
# Collapse whitespace
|
|
37
|
+
text = re.sub(r"[ \t]+", " ", text)
|
|
38
|
+
text = re.sub(r"\n{3,}", "\n\n", text)
|
|
39
|
+
return text.strip()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def extract_body(msg):
|
|
43
|
+
"""Extract plain text body from an email message, with HTML fallback."""
|
|
44
|
+
body = None
|
|
45
|
+
html_body = None
|
|
46
|
+
|
|
47
|
+
if msg.is_multipart():
|
|
48
|
+
for part in msg.walk():
|
|
49
|
+
ct = part.get_content_type()
|
|
50
|
+
if ct == "text/plain" and body is None:
|
|
51
|
+
charset = part.get_content_charset() or "utf-8"
|
|
52
|
+
payload = part.get_payload(decode=True)
|
|
53
|
+
if payload:
|
|
54
|
+
body = payload.decode(charset, errors="replace")
|
|
55
|
+
elif ct == "text/html" and html_body is None:
|
|
56
|
+
charset = part.get_content_charset() or "utf-8"
|
|
57
|
+
payload = part.get_payload(decode=True)
|
|
58
|
+
if payload:
|
|
59
|
+
html_body = payload.decode(charset, errors="replace")
|
|
60
|
+
else:
|
|
61
|
+
ct = msg.get_content_type()
|
|
62
|
+
charset = msg.get_content_charset() or "utf-8"
|
|
63
|
+
payload = msg.get_payload(decode=True)
|
|
64
|
+
if payload:
|
|
65
|
+
text = payload.decode(charset, errors="replace")
|
|
66
|
+
if ct == "text/plain":
|
|
67
|
+
body = text
|
|
68
|
+
elif ct == "text/html":
|
|
69
|
+
html_body = text
|
|
70
|
+
|
|
71
|
+
if body:
|
|
72
|
+
return body
|
|
73
|
+
elif html_body:
|
|
74
|
+
return html_to_text(html_body)
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def parse_emlx(path):
|
|
79
|
+
try:
|
|
80
|
+
with open(path, "rb") as f:
|
|
81
|
+
byte_count = int(f.readline())
|
|
82
|
+
raw = f.read(byte_count)
|
|
83
|
+
msg = email.message_from_bytes(raw)
|
|
84
|
+
|
|
85
|
+
print(f"From: {msg.get('From', 'Unknown')}")
|
|
86
|
+
print(f"Date: {msg.get('Date', '')}")
|
|
87
|
+
print("---")
|
|
88
|
+
|
|
89
|
+
body = extract_body(msg)
|
|
90
|
+
if body:
|
|
91
|
+
print(body)
|
|
92
|
+
except FileNotFoundError:
|
|
93
|
+
print(f"Error: File not found: {path}", file=sys.stderr)
|
|
94
|
+
sys.exit(1)
|
|
95
|
+
except Exception as e:
|
|
96
|
+
print(f"Error parsing {path}: {e}", file=sys.stderr)
|
|
97
|
+
sys.exit(1)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
if __name__ == "__main__":
|
|
101
|
+
if len(sys.argv) != 2:
|
|
102
|
+
print("Usage: python3 scripts/parse-emlx.py <path>", file=sys.stderr)
|
|
103
|
+
sys.exit(1)
|
|
104
|
+
parse_emlx(sys.argv[1])
|