jxa-mail-mcp 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jxa-mail-mcp might be problematic. Click here for more details.

@@ -0,0 +1,305 @@
1
+ """Incremental sync via JXA for new emails.
2
+
3
+ This module syncs new emails that arrived since the last index build.
4
+ Uses JXA (slower) because it works without Full Disk Access.
5
+
6
+ The sync process:
7
+ 1. Get set of already-indexed message IDs
8
+ 2. Fetch message IDs from Mail.app via JXA
9
+ 3. Find IDs that aren't in the index yet
10
+ 4. Fetch content for new emails via JXA
11
+ 5. Insert into index
12
+
13
+ SECURITY NOTE: All strings passed to JXA are serialized via json.dumps()
14
+ to prevent injection attacks.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import logging
21
+ import sqlite3
22
+ from datetime import datetime
23
+ from typing import TYPE_CHECKING
24
+
25
+ from ..config import get_index_max_emails
26
+ from ..executor import build_mailbox_setup_js
27
+ from .schema import INSERT_EMAIL_SQL, email_to_row
28
+ from .search import add_account_mailbox_filter
29
+
30
+ if TYPE_CHECKING:
31
+ from collections.abc import Callable
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ def get_indexed_message_ids(
37
+ conn: sqlite3.Connection,
38
+ account: str | None = None,
39
+ mailbox: str | None = None,
40
+ ) -> set[int]:
41
+ """
42
+ Get all message IDs currently in the index.
43
+
44
+ Args:
45
+ conn: Database connection
46
+ account: Optional account filter
47
+ mailbox: Optional mailbox filter
48
+
49
+ Returns:
50
+ Set of message IDs (note: only unique within account/mailbox)
51
+ """
52
+ # Use "WHERE 1=1" pattern to allow consistent filter appending
53
+ sql = "SELECT message_id FROM emails e WHERE 1=1"
54
+ params: list = []
55
+ sql = add_account_mailbox_filter(sql, params, account, mailbox)
56
+
57
+ cursor = conn.execute(sql, params)
58
+ return {row[0] for row in cursor}
59
+
60
+
61
+ def fetch_mailbox_ids_jxa(account: str | None, mailbox: str) -> list[int]:
62
+ """
63
+ Fetch all message IDs from a mailbox via JXA.
64
+
65
+ Args:
66
+ account: Account name (None for first account)
67
+ mailbox: Mailbox name
68
+
69
+ Returns:
70
+ List of message IDs
71
+ """
72
+ from ..executor import execute_with_core
73
+
74
+ # Use shared helper for safe serialization
75
+ mailbox_setup = build_mailbox_setup_js(account, mailbox)
76
+
77
+ script = f"""
78
+ {mailbox_setup}
79
+ if (!mailbox) {{
80
+ JSON.stringify([]);
81
+ }} else {{
82
+ const ids = mailbox.messages.id();
83
+ JSON.stringify(ids || []);
84
+ }}
85
+ """
86
+
87
+ try:
88
+ result = execute_with_core(script)
89
+ return result if isinstance(result, list) else []
90
+ except Exception as e:
91
+ logger.warning("Failed to fetch IDs for %s/%s: %s", account, mailbox, e)
92
+ return []
93
+
94
+
95
+ def fetch_emails_by_ids_jxa(
96
+ account: str | None, mailbox: str, message_ids: list[int]
97
+ ) -> list[dict]:
98
+ """
99
+ Fetch email content for specific IDs via JXA.
100
+
101
+ Args:
102
+ account: Account name
103
+ mailbox: Mailbox name
104
+ message_ids: List of message IDs to fetch
105
+
106
+ Returns:
107
+ List of email dicts with id, subject, sender, content, date_received
108
+ """
109
+ if not message_ids:
110
+ return []
111
+
112
+ from ..executor import execute_with_core
113
+
114
+ # Use shared helper for safe serialization
115
+ mailbox_setup = build_mailbox_setup_js(account, mailbox)
116
+ ids_json = json.dumps(message_ids)
117
+
118
+ script = f"""
119
+ {mailbox_setup}
120
+ if (!mailbox) {{
121
+ JSON.stringify([]);
122
+ }} else {{
123
+ const targetIds = new Set({ids_json});
124
+
125
+ // Batch fetch IDs to find indices
126
+ const allIds = mailbox.messages.id() || [];
127
+ const indices = [];
128
+ for (let i = 0; i < allIds.length; i++) {{
129
+ if (targetIds.has(allIds[i])) {{
130
+ indices.push(i);
131
+ }}
132
+ }}
133
+
134
+ // Fetch properties for matching messages only
135
+ const results = [];
136
+ for (const idx of indices) {{
137
+ try {{
138
+ const msg = mailbox.messages[idx];
139
+ results.push({{
140
+ id: msg.id(),
141
+ subject: msg.subject() || '',
142
+ sender: msg.sender() || '',
143
+ content: msg.content() || '',
144
+ date_received: MailCore.formatDate(msg.dateReceived())
145
+ }});
146
+ }} catch (e) {{
147
+ // Skip messages that can't be read
148
+ }}
149
+ }}
150
+
151
+ JSON.stringify(results);
152
+ }}
153
+ """
154
+
155
+ try:
156
+ result = execute_with_core(script)
157
+ return result if isinstance(result, list) else []
158
+ except Exception as e:
159
+ logger.warning(
160
+ "Failed to fetch emails for %s/%s: %s", account, mailbox, e
161
+ )
162
+ return []
163
+
164
+
165
+ def get_all_mailboxes_jxa() -> list[tuple[str, str]]:
166
+ """
167
+ Get all account/mailbox pairs via JXA.
168
+
169
+ Returns:
170
+ List of (account_name, mailbox_name) tuples
171
+ """
172
+ from ..executor import execute_with_core
173
+
174
+ script = """
175
+ const results = [];
176
+ const accounts = Mail.accounts();
177
+ const accountNames = Mail.accounts.name();
178
+
179
+ for (let i = 0; i < accounts.length; i++) {
180
+ const account = accounts[i];
181
+ const accountName = accountNames[i];
182
+ const mailboxNames = account.mailboxes.name() || [];
183
+
184
+ for (const mbName of mailboxNames) {
185
+ results.push([accountName, mbName]);
186
+ }
187
+ }
188
+
189
+ JSON.stringify(results);
190
+ """
191
+
192
+ try:
193
+ result = execute_with_core(script)
194
+ return result if isinstance(result, list) else []
195
+ except Exception as e:
196
+ logger.warning("Failed to get mailboxes: %s", e)
197
+ return []
198
+
199
+
200
+ def sync_incremental(
201
+ conn: sqlite3.Connection,
202
+ progress_callback: Callable[[int, int | None, str], None] | None = None,
203
+ ) -> int:
204
+ """
205
+ Sync new emails via JXA.
206
+
207
+ Compares indexed IDs with Mail.app and fetches only new emails.
208
+ Much faster than rebuild for startup sync.
209
+
210
+ Args:
211
+ conn: Database connection
212
+ progress_callback: Optional callback(current, total, message)
213
+
214
+ Returns:
215
+ Number of new emails synced
216
+ """
217
+ max_per_mailbox = get_index_max_emails()
218
+ total_synced = 0
219
+
220
+ # Get all mailboxes
221
+ if progress_callback:
222
+ progress_callback(0, None, "Discovering mailboxes...")
223
+
224
+ mailboxes = get_all_mailboxes_jxa()
225
+ if not mailboxes:
226
+ logger.info("No mailboxes found to sync")
227
+ return 0
228
+
229
+ logger.info("Syncing %d mailboxes", len(mailboxes))
230
+
231
+ for i, (account, mailbox) in enumerate(mailboxes):
232
+ if progress_callback:
233
+ progress_callback(
234
+ i, len(mailboxes), f"Syncing {account}/{mailbox}..."
235
+ )
236
+
237
+ # Get already indexed IDs for this mailbox
238
+ indexed_ids = get_indexed_message_ids(conn, account, mailbox)
239
+
240
+ # Check if we're at the limit
241
+ if len(indexed_ids) >= max_per_mailbox:
242
+ logger.debug("Mailbox %s/%s at limit, skipping", account, mailbox)
243
+ continue
244
+
245
+ # Get current IDs from Mail.app
246
+ current_ids = fetch_mailbox_ids_jxa(account, mailbox)
247
+ if not current_ids:
248
+ continue
249
+
250
+ # Find new IDs
251
+ new_ids = [mid for mid in current_ids if mid not in indexed_ids]
252
+
253
+ if not new_ids:
254
+ continue
255
+
256
+ # Limit to stay under max
257
+ remaining_capacity = max_per_mailbox - len(indexed_ids)
258
+ new_ids = new_ids[:remaining_capacity]
259
+
260
+ logger.debug(
261
+ "Fetching %d new emails from %s/%s", len(new_ids), account, mailbox
262
+ )
263
+
264
+ # Fetch content for new emails (in batches)
265
+ batch_size = 50
266
+ for batch_start in range(0, len(new_ids), batch_size):
267
+ batch_ids = new_ids[batch_start : batch_start + batch_size]
268
+ emails = fetch_emails_by_ids_jxa(account, mailbox, batch_ids)
269
+
270
+ if not emails:
271
+ continue
272
+
273
+ # Insert into database using centralized SQL and tuple converter
274
+ for email in emails:
275
+ try:
276
+ row = email_to_row(email, account, mailbox)
277
+ conn.execute(INSERT_EMAIL_SQL, row)
278
+ except sqlite3.IntegrityError:
279
+ logger.debug(
280
+ "Duplicate email ID %s in %s/%s",
281
+ email["id"],
282
+ account,
283
+ mailbox,
284
+ )
285
+ except sqlite3.Error as e:
286
+ logger.error("Database error: %s", e)
287
+
288
+ total_synced += len(emails)
289
+
290
+ # Update sync state
291
+ now = datetime.now().isoformat()
292
+ conn.execute(
293
+ """INSERT OR REPLACE INTO sync_state
294
+ (account, mailbox, last_sync, message_count)
295
+ VALUES (?, ?, ?, ?)""",
296
+ (account, mailbox, now, len(indexed_ids) + len(new_ids)),
297
+ )
298
+
299
+ conn.commit()
300
+
301
+ if progress_callback:
302
+ progress_callback(len(mailboxes), len(mailboxes), "Sync complete")
303
+
304
+ logger.info("Synced %d new emails", total_synced)
305
+ return total_synced
@@ -0,0 +1,341 @@
1
+ """File watcher for real-time index updates.
2
+
3
+ Watches ~/Library/Mail/V10/ for .emlx file changes and updates the index.
4
+
5
+ Uses watchfiles (Rust-based, efficient) to monitor:
6
+ - New emails → parse and add to index
7
+ - Deleted emails → remove from index
8
+
9
+ The watcher runs in a background thread and batches updates to avoid
10
+ overwhelming the database with rapid changes.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ import re
17
+ import sqlite3
18
+ import threading
19
+ import time
20
+ from pathlib import Path
21
+ from typing import TYPE_CHECKING
22
+
23
+ from .disk import find_mail_directory, parse_emlx
24
+ from .schema import INSERT_EMAIL_SQL, create_connection
25
+
26
+ if TYPE_CHECKING:
27
+ from collections.abc import Callable
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # Regex to extract account/mailbox from path
32
+ # ~/Library/Mail/V10/[AccountUUID]/[Mailbox].mbox/.../*.emlx
33
+ PATH_PATTERN = re.compile(r"/V\d+/([^/]+)/([^/]+)\.mbox/.*?/(\d+)\.emlx$")
34
+
35
+ # Constants for safety limits
36
+ MAX_PENDING_CHANGES = 10000 # Prevent unbounded memory growth
37
+ DELETE_BATCH_SIZE = 500 # SQLite variable limit safety
38
+ FILE_RETRY_DELAY_MS = 200 # Wait for Mail.app to finish writing
39
+ MAX_FILE_RETRIES = 3
40
+
41
+
42
+ class IndexWatcher:
43
+ """
44
+ Watches Mail directory for changes and updates the index.
45
+
46
+ Usage:
47
+ watcher = IndexWatcher(db_path, on_update=callback)
48
+ watcher.start()
49
+ # ... later ...
50
+ watcher.stop()
51
+ """
52
+
53
+ def __init__(
54
+ self,
55
+ db_path: Path,
56
+ on_update: Callable[[int, int], None] | None = None,
57
+ debounce_ms: int = 500,
58
+ ):
59
+ """
60
+ Initialize the watcher.
61
+
62
+ Args:
63
+ db_path: Path to the index database
64
+ on_update: Optional callback(added, removed) after processing
65
+ debounce_ms: Milliseconds to wait before processing changes
66
+ """
67
+ self.db_path = db_path
68
+ self.on_update = on_update
69
+ self.debounce_ms = debounce_ms
70
+
71
+ self._mail_dir: Path | None = None
72
+ self._stop_event = threading.Event()
73
+ self._thread: threading.Thread | None = None
74
+
75
+ # Pending changes (debounced)
76
+ self._pending_adds: dict[
77
+ tuple[str, str, int], Path
78
+ ] = {} # (acc, mb, id) -> path
79
+ self._pending_deletes: set[tuple[str, str, int]] = (
80
+ set()
81
+ ) # (acc, mb, id)
82
+ self._pending_lock = threading.Lock()
83
+
84
+ # Persistent connection for the watcher thread
85
+ self._conn: sqlite3.Connection | None = None
86
+
87
+ def start(self) -> bool:
88
+ """
89
+ Start watching for changes.
90
+
91
+ Returns:
92
+ True if started successfully, False if mail dir not found
93
+ """
94
+ try:
95
+ self._mail_dir = find_mail_directory()
96
+ except FileNotFoundError:
97
+ logger.warning("Mail directory not found, watcher not started")
98
+ return False
99
+
100
+ self._stop_event.clear()
101
+ self._thread = threading.Thread(
102
+ target=self._watch_loop,
103
+ name="IndexWatcher",
104
+ daemon=True,
105
+ )
106
+ self._thread.start()
107
+ logger.info("File watcher started for %s", self._mail_dir)
108
+ return True
109
+
110
+ def stop(self, timeout: float = 5.0) -> None:
111
+ """Stop watching and wait for thread to finish."""
112
+ self._stop_event.set()
113
+ if self._thread and self._thread.is_alive():
114
+ self._thread.join(timeout=timeout)
115
+ self._thread = None
116
+
117
+ # Close persistent connection
118
+ if self._conn:
119
+ try:
120
+ self._conn.close()
121
+ except Exception:
122
+ pass
123
+ self._conn = None
124
+
125
+ logger.info("File watcher stopped")
126
+
127
+ @property
128
+ def is_running(self) -> bool:
129
+ """Check if watcher is running."""
130
+ return self._thread is not None and self._thread.is_alive()
131
+
132
+ def _get_conn(self) -> sqlite3.Connection:
133
+ """Get or create a persistent connection for this thread."""
134
+ if self._conn is None:
135
+ self._conn = create_connection(self.db_path)
136
+ return self._conn
137
+
138
+ def _watch_loop(self) -> None:
139
+ """Main watch loop (runs in background thread)."""
140
+ try:
141
+ from watchfiles import Change, watch
142
+ except ImportError:
143
+ logger.error("watchfiles not installed, file watcher unavailable")
144
+ return
145
+
146
+ if not self._mail_dir:
147
+ return
148
+
149
+ logger.debug("Starting watch loop on %s", self._mail_dir)
150
+
151
+ for changes in watch(
152
+ self._mail_dir,
153
+ stop_event=self._stop_event,
154
+ debounce=self.debounce_ms,
155
+ recursive=True,
156
+ ):
157
+ if self._stop_event.is_set():
158
+ break
159
+
160
+ # Collect changes
161
+ for change_type, path_str in changes:
162
+ if not path_str.endswith(".emlx"):
163
+ continue
164
+
165
+ # Security: Validate path is within mail directory
166
+ try:
167
+ path = Path(path_str).resolve()
168
+ if not str(path).startswith(str(self._mail_dir.resolve())):
169
+ logger.warning(
170
+ "Ignoring path outside mail dir: %s", path
171
+ )
172
+ continue
173
+ except (OSError, ValueError) as e:
174
+ logger.warning("Invalid path %s: %s", path_str, e)
175
+ continue
176
+
177
+ parsed = self._parse_path(path)
178
+ if not parsed:
179
+ continue
180
+
181
+ account, mailbox, message_id = parsed
182
+ key = (account, mailbox, message_id)
183
+
184
+ with self._pending_lock:
185
+ # Prevent unbounded memory growth
186
+ total_pending = len(self._pending_adds) + len(
187
+ self._pending_deletes
188
+ )
189
+ if total_pending >= MAX_PENDING_CHANGES:
190
+ logger.warning(
191
+ "Pending limit (%d) reached, clearing",
192
+ MAX_PENDING_CHANGES,
193
+ )
194
+ # Clear half to make room
195
+ self._pending_adds.clear()
196
+
197
+ if change_type == Change.added:
198
+ self._pending_adds[key] = path
199
+ self._pending_deletes.discard(key)
200
+ elif change_type == Change.deleted:
201
+ self._pending_deletes.add(key)
202
+ self._pending_adds.pop(key, None)
203
+ elif change_type == Change.modified:
204
+ # Treat as add (re-index)
205
+ self._pending_adds[key] = path
206
+
207
+ # Process after debounce period
208
+ self._process_pending()
209
+
210
+ def _parse_path(self, path: Path) -> tuple[str, str, int] | None:
211
+ """
212
+ Extract account, mailbox, and message ID from path.
213
+
214
+ Returns:
215
+ (account_name, mailbox_name, message_id) or None if invalid
216
+ """
217
+ match = PATH_PATTERN.search(str(path))
218
+ if not match:
219
+ return None
220
+
221
+ account_uuid, mailbox_dir, message_id_str = match.groups()
222
+
223
+ try:
224
+ message_id = int(message_id_str)
225
+ except ValueError:
226
+ return None
227
+
228
+ # Use UUID as account name (more reliable than trying to map)
229
+ account_name = account_uuid
230
+ mailbox_name = mailbox_dir
231
+
232
+ return account_name, mailbox_name, message_id
233
+
234
+ def _process_pending(self) -> None:
235
+ """Process pending adds and deletes."""
236
+ with self._pending_lock:
237
+ adds = dict(self._pending_adds)
238
+ deletes = set(self._pending_deletes)
239
+ self._pending_adds.clear()
240
+ self._pending_deletes.clear()
241
+
242
+ if not adds and not deletes:
243
+ return
244
+
245
+ added_count = 0
246
+ deleted_count = 0
247
+
248
+ try:
249
+ conn = self._get_conn()
250
+
251
+ # Process deletes in batches to avoid SQLite variable limit
252
+ if deletes:
253
+ delete_list = list(deletes)
254
+ for i in range(0, len(delete_list), DELETE_BATCH_SIZE):
255
+ batch = delete_list[i : i + DELETE_BATCH_SIZE]
256
+ # Use composite key for deletion
257
+ for account, mailbox, msg_id in batch:
258
+ sql = """DELETE FROM emails WHERE account = ?
259
+ AND mailbox = ? AND message_id = ?"""
260
+ conn.execute(sql, (account, mailbox, msg_id))
261
+ deleted_count += len(batch)
262
+
263
+ # Process adds with retry for files still being written
264
+ for key, path in adds.items():
265
+ account, mailbox, message_id = key
266
+ email = None
267
+
268
+ # Retry logic for race condition with Mail.app writing
269
+ for attempt in range(MAX_FILE_RETRIES):
270
+ try:
271
+ email = parse_emlx(path)
272
+ if email:
273
+ break
274
+ except OSError as e:
275
+ if attempt < MAX_FILE_RETRIES - 1:
276
+ logger.debug(
277
+ "Retry %d for %s: %s", attempt + 1, path, e
278
+ )
279
+ time.sleep(FILE_RETRY_DELAY_MS / 1000)
280
+ else:
281
+ logger.warning(
282
+ "Failed to parse %s after retries: %s", path, e
283
+ )
284
+ except Exception as e:
285
+ logger.warning("Error parsing %s: %s", path, e)
286
+ break
287
+
288
+ if email:
289
+ try:
290
+ # Use centralized SQL; indexed_at uses DEFAULT
291
+ conn.execute(
292
+ INSERT_EMAIL_SQL,
293
+ (
294
+ message_id,
295
+ account,
296
+ mailbox,
297
+ email.get("subject", ""),
298
+ email.get("sender", ""),
299
+ email.get("content", ""),
300
+ email.get("date_received", ""),
301
+ ),
302
+ )
303
+ added_count += 1
304
+ except sqlite3.IntegrityError as e:
305
+ logger.debug("Duplicate email %s: %s", key, e)
306
+ except sqlite3.Error as e:
307
+ logger.error("Database error for %s: %s", key, e)
308
+
309
+ conn.commit()
310
+
311
+ except sqlite3.Error as e:
312
+ logger.error("Database error in watcher: %s", e)
313
+
314
+ # Notify callback
315
+ if self.on_update and (added_count or deleted_count):
316
+ try:
317
+ self.on_update(added_count, deleted_count)
318
+ except Exception as e:
319
+ logger.warning("Error in on_update callback: %s", e)
320
+
321
+ if added_count or deleted_count:
322
+ logger.debug(
323
+ "Processed: +%d -%d emails", added_count, deleted_count
324
+ )
325
+
326
+
327
+ def create_watcher(
328
+ db_path: Path,
329
+ on_update: Callable[[int, int], None] | None = None,
330
+ ) -> IndexWatcher:
331
+ """
332
+ Create and return a new IndexWatcher.
333
+
334
+ Args:
335
+ db_path: Path to the index database
336
+ on_update: Optional callback(added, removed) after changes
337
+
338
+ Returns:
339
+ Configured IndexWatcher (call .start() to begin watching)
340
+ """
341
+ return IndexWatcher(db_path, on_update=on_update)