chatwire 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chat_db.py ADDED
@@ -0,0 +1,393 @@
1
+ """Read new iMessage rows from ~/Library/Messages/chat.db.
2
+
3
+ Requires Full Disk Access granted to the python binary running this code.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ import logging
9
+ import sqlite3
10
+ import subprocess
11
+ import time
12
+ from dataclasses import dataclass
13
+ from pathlib import Path
14
+
15
+ # chat.db timestamps: nanoseconds since 2001-01-01 UTC (Apple epoch).
16
+ APPLE_EPOCH_OFFSET = 978307200
17
+
18
+ log = logging.getLogger("chat_db")
19
+
20
+ CHAT_DB = Path.home() / "Library" / "Messages" / "chat.db"
21
+
22
+ # Attachments downloaded successfully have transfer_state = 5.
23
+ ATTACHMENT_READY = 5
24
+
25
+ # A message belongs to exactly one chat via chat_message_join (INSERT/DELETE
26
+ # triggers maintain this invariant). For 1:1 chats style=45 and chat_identifier
27
+ # is the peer handle; for groups style=43 and chat_identifier is like
28
+ # "chat629180424750381661". display_name is the user-set group name (may be
29
+ # empty). chat.guid is the full AppleScript-addressable form, e.g.
30
+ # "iMessage;+;chat629180424750381661" — use this for `chat id` sends.
31
+ NEW_MESSAGES_SQL = """
32
+ SELECT
33
+ m.ROWID AS rowid,
34
+ COALESCE(h.id, '') AS handle,
35
+ m.is_from_me AS is_from_me,
36
+ COALESCE(m.text, '') AS text,
37
+ m.cache_has_attachments AS has_attachments,
38
+ COALESCE(parent.text, '') AS parent_text,
39
+ COALESCE(parent_h.id, '') AS parent_handle,
40
+ COALESCE(parent.is_from_me, 0) AS parent_is_from_me,
41
+ COALESCE(c.guid, '') AS chat_guid,
42
+ COALESCE(c.chat_identifier, '') AS chat_identifier,
43
+ COALESCE(c.display_name, '') AS chat_display_name,
44
+ COALESCE(c.style, 0) AS chat_style
45
+ FROM message m
46
+ LEFT JOIN handle h ON m.handle_id = h.ROWID
47
+ LEFT JOIN message parent ON parent.guid = m.thread_originator_guid
48
+ LEFT JOIN handle parent_h ON parent.handle_id = parent_h.ROWID
49
+ LEFT JOIN chat_message_join cmj ON cmj.message_id = m.ROWID
50
+ LEFT JOIN chat c ON c.ROWID = cmj.chat_id
51
+ WHERE m.ROWID > ?
52
+ ORDER BY m.ROWID ASC
53
+ """
54
+
55
+ ATTACHMENTS_SQL = """
56
+ SELECT
57
+ a.filename AS filename,
58
+ a.mime_type AS mime_type,
59
+ a.transfer_state AS transfer_state
60
+ FROM message_attachment_join maj
61
+ JOIN attachment a ON a.ROWID = maj.attachment_id
62
+ WHERE maj.message_id = ?
63
+ """
64
+
65
+
66
+ # chat.style values we care about. Apple has other values for abandoned
67
+ # chats etc., but the only useful distinction is group vs 1:1.
68
+ CHAT_STYLE_GROUP = 43
69
+ CHAT_STYLE_DIRECT = 45
70
+
71
+
72
+ @dataclass
73
+ class InboundMessage:
74
+ rowid: int
75
+ handle: str # e.g. '+15551234567' or 'foo@example.com'
76
+ text: str
77
+ attachments: list["InboundAttachment"]
78
+ is_from_me: bool # 1 when this device (or an iCloud-synced device of the same account) sent it
79
+ parent_text: str = ""
80
+ parent_handle: str = ""
81
+ parent_is_from_me: bool = False
82
+ # Source chat context. chat_guid is the AppleScript-addressable ID
83
+ # ("iMessage;+;chat…"); chat_identifier is the short form the `handle`
84
+ # table stores; chat_name is the group's display_name (empty for 1:1 and
85
+ # for unnamed groups). is_group tracks style=43.
86
+ chat_guid: str = ""
87
+ chat_identifier: str = ""
88
+ chat_name: str = ""
89
+ is_group: bool = False
90
+
91
+
92
+ @dataclass
93
+ class InboundAttachment:
94
+ path: Path # local filesystem path (HEIC may be converted)
95
+ mime_type: str
96
+ ready: bool # False if Apple hasn't finished downloading from iCloud yet
97
+
98
+
99
+ def _expand(p: str | None) -> Path | None:
100
+ if not p:
101
+ return None
102
+ return Path(p).expanduser()
103
+
104
+
105
+ def _maybe_convert_heic(src: Path) -> Path:
106
+ """Convert HEIC to JPEG via macOS `sips`. Returns original path if not HEIC."""
107
+ if src.suffix.lower() not in {".heic", ".heif"}:
108
+ return src
109
+ dst = src.with_suffix(".jpg")
110
+ if dst.exists() and dst.stat().st_mtime >= src.stat().st_mtime:
111
+ return dst
112
+ try:
113
+ subprocess.run(
114
+ ["sips", "-s", "format", "jpeg", str(src), "--out", str(dst)],
115
+ check=True, capture_output=True, timeout=30,
116
+ )
117
+ return dst
118
+ except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
119
+ log.warning("sips HEIC convert failed for %s: %s", src, e)
120
+ return src
121
+
122
+
123
+ class ChatDBReader:
124
+ """Read chat.db through a stable TCC-warm connection, then snapshot-via-backup
125
+ for fresh data each poll.
126
+
127
+ Observed constraints on macOS 12 / launchd user agent:
128
+ - FRESH `sqlite3.connect(chat.db)` calls succeed for the first ~4 minutes
129
+ after process start, then TCC starts denying with "unable to open
130
+ database file". Reason unclear — suspected responsibility-process cache
131
+ TTL.
132
+ - A persistent read-only connection does NOT see WAL frames Messages.app
133
+ appends after open (likely because the `chat.db-shm` index isn't
134
+ accessible the same way), so `MAX(ROWID)` stays stale.
135
+ - `conn.backup(target)` uses the already-open source connection and
136
+ correctly copies the latest committed state including WAL frames.
137
+
138
+ Strategy: open ONCE at startup (TCC is hot), then every poll `backup()`
139
+ the current state into a fresh in-memory db and query that.
140
+ """
141
+
142
+ def __init__(self, state_path: Path):
143
+ self.state_path = state_path
144
+ self.last_seen = self._load_state()
145
+ self._src: sqlite3.Connection | None = None
146
+
147
+ def _load_state(self) -> int:
148
+ if self.state_path.exists():
149
+ try:
150
+ return int(json.loads(self.state_path.read_text()).get("last_seen_rowid", 0))
151
+ except (json.JSONDecodeError, ValueError):
152
+ log.exception("state file corrupt; resetting last_seen=0")
153
+ return 0
154
+
155
+ def _save_state(self) -> None:
156
+ tmp = self.state_path.with_suffix(".tmp")
157
+ tmp.write_text(json.dumps({"last_seen_rowid": self.last_seen}, indent=2))
158
+ tmp.replace(self.state_path)
159
+
160
+ def _src_conn(self) -> sqlite3.Connection:
161
+ """Long-lived source connection, opened once while TCC is hot."""
162
+ if self._src is None:
163
+ self._src = sqlite3.connect(f"file:{CHAT_DB}?mode=ro", uri=True)
164
+ return self._src
165
+
166
+ def _fresh_snapshot(self) -> sqlite3.Connection:
167
+ """Copy the live chat.db into an in-memory db via sqlite backup.
168
+
169
+ Uses the warm source connection (no new chat.db open, so TCC stays
170
+ happy) and produces an in-memory db that reflects the latest committed
171
+ state — including WAL frames the persistent source cursor would miss
172
+ if we queried it directly.
173
+ """
174
+ src = self._src_conn()
175
+ mem = sqlite3.connect(":memory:")
176
+ mem.row_factory = sqlite3.Row
177
+ src.backup(mem)
178
+ return mem
179
+
180
+ def initialize_to_now(self) -> None:
181
+ """Open chat.db once (TCC is hot) and seed last_seen on first run."""
182
+ snap = self._fresh_snapshot()
183
+ try:
184
+ current_max = int(snap.execute("SELECT COALESCE(MAX(ROWID), 0) FROM message").fetchone()[0])
185
+ finally:
186
+ snap.close()
187
+ if self.last_seen == 0:
188
+ self.last_seen = current_max
189
+ self._save_state()
190
+ log.info("first run: seeded last_seen_rowid=%d (skipping history)", self.last_seen)
191
+ else:
192
+ log.info("resuming from last_seen_rowid=%d (current max=%d)",
193
+ self.last_seen, current_max)
194
+
195
+ def list_groups(self) -> list[dict]:
196
+ """Return group chats visible in chat.db, most-recently-active first.
197
+ Each dict: {guid, chat_identifier, name, last_rowid, participants}.
198
+
199
+ Used to populate the inline whitelist search with groups alongside
200
+ handles. Only groups we've actually exchanged messages in show up —
201
+ abandoned/empty chats are skipped via the chat_message_join filter.
202
+ Unnamed groups get a synthetic name from participants so they're
203
+ still findable.
204
+ """
205
+ conn = self._fresh_snapshot()
206
+ try:
207
+ rows = conn.execute(
208
+ """
209
+ SELECT c.guid AS guid,
210
+ c.chat_identifier AS chat_identifier,
211
+ COALESCE(c.display_name, '') AS name,
212
+ MAX(cmj.message_id) AS last_rowid
213
+ FROM chat c
214
+ JOIN chat_message_join cmj ON cmj.chat_id = c.ROWID
215
+ WHERE c.style = ?
216
+ GROUP BY c.ROWID
217
+ ORDER BY last_rowid DESC
218
+ """,
219
+ (CHAT_STYLE_GROUP,),
220
+ ).fetchall()
221
+ out: list[dict] = []
222
+ for r in rows:
223
+ participants = [
224
+ row["id"] for row in conn.execute(
225
+ """
226
+ SELECT h.id AS id FROM chat_handle_join chj
227
+ JOIN handle h ON h.ROWID = chj.handle_id
228
+ JOIN chat c ON c.ROWID = chj.chat_id
229
+ WHERE c.guid = ?
230
+ """,
231
+ (r["guid"],),
232
+ ).fetchall()
233
+ ]
234
+ out.append({
235
+ "guid": r["guid"],
236
+ "chat_identifier": r["chat_identifier"],
237
+ "name": r["name"],
238
+ "last_rowid": int(r["last_rowid"] or 0),
239
+ "participants": participants,
240
+ })
241
+ return out
242
+ finally:
243
+ conn.close()
244
+
245
+ def services_for(self, handles: list[str]) -> dict[str, list[str]]:
246
+ """Return {handle_lc: [services]} — e.g. {"+19805858391": ["SMS", "iMessage"]}.
247
+
248
+ Apple stores one handle row per (id, service) pair. A phone number
249
+ that has an iMessage identity AND has been SMS'd will appear twice;
250
+ a number you've only ever iMessaged will appear once; a number never
251
+ messaged won't appear at all (empty list in the result).
252
+
253
+ Used to tell the user BEFORE whitelisting whether a candidate is
254
+ iMessage-capable from this Mac.
255
+ """
256
+ if not handles:
257
+ return {}
258
+ lows = [h.lower() for h in handles]
259
+ placeholders = ",".join("?" * len(lows))
260
+ out: dict[str, list[str]] = {h: [] for h in lows}
261
+ conn = self._fresh_snapshot()
262
+ try:
263
+ rows = conn.execute(
264
+ f"SELECT LOWER(id) AS id, service FROM handle WHERE LOWER(id) IN ({placeholders})",
265
+ lows,
266
+ ).fetchall()
267
+ for r in rows:
268
+ out.setdefault(r["id"], []).append(r["service"])
269
+ finally:
270
+ conn.close()
271
+ return out
272
+
273
+ def outcomes_for(
274
+ self, handles: list[str], window_days: int = 30
275
+ ) -> dict[str, dict[str, dict]]:
276
+ """Per-handle-per-service outgoing delivery stats.
277
+
278
+ A handle row in chat.db tells you what's *configured* (e.g. an iMessage
279
+ identity exists) but not whether iMessage still works — Apple leaves
280
+ stale handle rows around after the recipient deregisters, which is the
281
+ whole reason `error=22` exists. These aggregates surface actual
282
+ recent-reachability so /check can say "deregistered → SMS" instead of
283
+ "iMessage" when the last real attempt failed.
284
+
285
+ Returns `{handle_lc: {service: stats}}` where stats is:
286
+ total — outgoing message rows in the window
287
+ delivered — rows with is_delivered=1
288
+ err22 — rows with error=22 ("not registered on iMessage")
289
+ latest_error — error code of the *most recent ever* outgoing row
290
+ latest_delivered — is_delivered of the most recent ever row
291
+ latest_rowid — ROWID of the most recent ever row
292
+
293
+ Latest-* fields ignore the window so stale err=22 signals still surface
294
+ even if the last iMessage attempt was months ago. A service absent from
295
+ the inner dict means we've never sent via it to that handle.
296
+ """
297
+ if not handles:
298
+ return {}
299
+ lows = [h.lower() for h in handles]
300
+ placeholders = ",".join("?" * len(lows))
301
+ cutoff_apple_ns = int(
302
+ (time.time() - window_days * 86400 - APPLE_EPOCH_OFFSET) * 1_000_000_000
303
+ )
304
+ out: dict[str, dict[str, dict]] = {h: {} for h in lows}
305
+ conn = self._fresh_snapshot()
306
+ try:
307
+ agg_sql = f"""
308
+ SELECT LOWER(h.id) AS handle, h.service AS service,
309
+ COUNT(*) AS total,
310
+ COALESCE(SUM(m.is_delivered), 0) AS delivered,
311
+ COALESCE(SUM(CASE WHEN m.error = 22 THEN 1 ELSE 0 END), 0) AS err22
312
+ FROM message m JOIN handle h ON m.handle_id = h.ROWID
313
+ WHERE m.is_from_me = 1
314
+ AND LOWER(h.id) IN ({placeholders})
315
+ AND m.date >= ?
316
+ GROUP BY LOWER(h.id), h.service
317
+ """
318
+ for r in conn.execute(agg_sql, [*lows, cutoff_apple_ns]).fetchall():
319
+ out[r["handle"]][r["service"]] = {
320
+ "total": int(r["total"]),
321
+ "delivered": int(r["delivered"]),
322
+ "err22": int(r["err22"]),
323
+ }
324
+ # Latest row per (handle, service) across all time — so a stale
325
+ # err=22 still tells the truth even if nothing was sent recently.
326
+ latest_sql = f"""
327
+ SELECT LOWER(h.id) AS handle, h.service AS service,
328
+ m.error AS error, m.is_delivered AS is_delivered, m.ROWID AS rowid
329
+ FROM message m JOIN handle h ON m.handle_id = h.ROWID
330
+ WHERE m.is_from_me = 1
331
+ AND LOWER(h.id) IN ({placeholders})
332
+ AND m.ROWID IN (
333
+ SELECT MAX(m2.ROWID) FROM message m2
334
+ JOIN handle h2 ON m2.handle_id = h2.ROWID
335
+ WHERE m2.is_from_me = 1
336
+ AND LOWER(h2.id) IN ({placeholders})
337
+ GROUP BY LOWER(h2.id), h2.service
338
+ )
339
+ """
340
+ for r in conn.execute(latest_sql, [*lows, *lows]).fetchall():
341
+ stats = out[r["handle"]].setdefault(
342
+ r["service"], {"total": 0, "delivered": 0, "err22": 0}
343
+ )
344
+ stats["latest_error"] = int(r["error"] or 0)
345
+ stats["latest_delivered"] = bool(r["is_delivered"])
346
+ stats["latest_rowid"] = int(r["rowid"])
347
+ finally:
348
+ conn.close()
349
+ return out
350
+
351
+ def poll(self) -> list[InboundMessage]:
352
+ """Return new INCOMING messages since last poll."""
353
+ out: list[InboundMessage] = []
354
+ conn = self._fresh_snapshot()
355
+ try:
356
+ rows = conn.execute(NEW_MESSAGES_SQL, (self.last_seen,)).fetchall()
357
+ for r in rows:
358
+ self.last_seen = int(r["rowid"])
359
+ attachments: list[InboundAttachment] = []
360
+ if r["has_attachments"]:
361
+ for a in conn.execute(ATTACHMENTS_SQL, (r["rowid"],)).fetchall():
362
+ path = _expand(a["filename"])
363
+ if not path:
364
+ continue
365
+ # Trust the filesystem: if the file is there, we can
366
+ # send it. transfer_state lags behind iCloud sync.
367
+ ready = path.exists()
368
+ if ready:
369
+ path = _maybe_convert_heic(path)
370
+ attachments.append(InboundAttachment(
371
+ path=path,
372
+ mime_type=a["mime_type"] or "application/octet-stream",
373
+ ready=ready,
374
+ ))
375
+ out.append(InboundMessage(
376
+ rowid=int(r["rowid"]),
377
+ handle=r["handle"],
378
+ text=r["text"],
379
+ attachments=attachments,
380
+ is_from_me=bool(r["is_from_me"]),
381
+ parent_text=r["parent_text"],
382
+ parent_handle=r["parent_handle"],
383
+ parent_is_from_me=bool(r["parent_is_from_me"]),
384
+ chat_guid=r["chat_guid"],
385
+ chat_identifier=r["chat_identifier"],
386
+ chat_name=r["chat_display_name"],
387
+ is_group=(int(r["chat_style"]) == CHAT_STYLE_GROUP),
388
+ ))
389
+ finally:
390
+ conn.close()
391
+ if rows:
392
+ self._save_state()
393
+ return out