jxa-mail-mcp 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jxa-mail-mcp might be problematic. Click here for more details.

@@ -0,0 +1,458 @@
1
+ """IndexManager - Central interface for the FTS5 search index.
2
+
3
+ Provides:
4
+ - build_from_disk(): Pre-index emails by reading .emlx files directly
5
+ - sync_updates(): Incremental sync via JXA for new emails
6
+ - search(): Fast FTS5 search with BM25 ranking
7
+ - get_stats(): Index statistics for status reporting
8
+
9
+ Thread Safety:
10
+ - Uses threading.Lock for connection management
11
+ - Database connections use check_same_thread=False
12
+ - File watcher runs in separate thread with its own connection
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import logging
18
+ import sqlite3
19
+ import threading
20
+ from dataclasses import dataclass
21
+ from datetime import datetime
22
+ from pathlib import Path
23
+ from typing import TYPE_CHECKING
24
+
25
+ from ..config import (
26
+ get_index_max_emails,
27
+ get_index_path,
28
+ get_index_staleness_hours,
29
+ )
30
+ from .schema import (
31
+ INSERT_EMAIL_SQL,
32
+ init_database,
33
+ optimize_fts_index,
34
+ rebuild_fts_index,
35
+ )
36
+ from .search import SearchResult # Re-use, don't duplicate
37
+
38
+ if TYPE_CHECKING:
39
+ from collections.abc import Callable
40
+
41
+ from .watcher import IndexWatcher
42
+
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ @dataclass
47
+ class IndexStats:
48
+ """Statistics about the search index."""
49
+
50
+ email_count: int
51
+ mailbox_count: int
52
+ last_sync: datetime | None
53
+ db_size_mb: float
54
+ staleness_hours: float | None
55
+
56
+
57
+ # SearchResult is imported from .search to avoid duplication
58
+
59
+
60
+ class IndexManager:
61
+ """
62
+ Manages the FTS5 search index for email body search.
63
+
64
+ The index is stored at ~/.jxa-mail-mcp/index.db by default.
65
+ Use environment variables to customize:
66
+ - JXA_MAIL_INDEX_PATH: Database location
67
+ - JXA_MAIL_INDEX_MAX_EMAILS: Max emails per mailbox (5000)
68
+ - JXA_MAIL_INDEX_STALENESS_HOURS: Hours before stale (24)
69
+
70
+ Thread Safety:
71
+ - get_instance() uses class-level lock
72
+ - _get_conn() uses instance-level lock
73
+ - Watcher runs in separate thread with its own connection
74
+ """
75
+
76
+ _instance: IndexManager | None = None
77
+ _instance_lock = threading.Lock()
78
+
79
+ def __init__(self, db_path: Path | None = None):
80
+ """
81
+ Initialize the IndexManager.
82
+
83
+ Args:
84
+ db_path: Custom database path (uses config default if None)
85
+ """
86
+ self._db_path = db_path or get_index_path()
87
+ self._conn: sqlite3.Connection | None = None
88
+ self._conn_lock = threading.Lock()
89
+ self._watcher: IndexWatcher | None = None
90
+ self._watcher_callback: Callable | None = None
91
+
92
+ @classmethod
93
+ def get_instance(cls) -> IndexManager:
94
+ """Get the singleton IndexManager instance (thread-safe)."""
95
+ with cls._instance_lock:
96
+ if cls._instance is None:
97
+ cls._instance = IndexManager()
98
+ return cls._instance
99
+
100
+ @property
101
+ def db_path(self) -> Path:
102
+ """Get the database file path."""
103
+ return self._db_path
104
+
105
+ def _get_conn(self) -> sqlite3.Connection:
106
+ """Get or create the database connection (thread-safe)."""
107
+ with self._conn_lock:
108
+ if self._conn is None:
109
+ self._conn = init_database(self._db_path)
110
+ return self._conn
111
+
112
+ def close(self) -> None:
113
+ """Close the database connection."""
114
+ if self._conn is not None:
115
+ self._conn.close()
116
+ self._conn = None
117
+
118
+ def has_index(self) -> bool:
119
+ """Check if an index database exists."""
120
+ return self._db_path.exists()
121
+
122
+ def get_stats(self) -> IndexStats:
123
+ """
124
+ Get index statistics.
125
+
126
+ Returns:
127
+ IndexStats with counts, size, and staleness info
128
+ """
129
+ conn = self._get_conn()
130
+
131
+ # Email count
132
+ cursor = conn.execute("SELECT COUNT(*) FROM emails")
133
+ email_count = cursor.fetchone()[0]
134
+
135
+ # Mailbox count
136
+ cursor = conn.execute(
137
+ "SELECT COUNT(DISTINCT account || '/' || mailbox) FROM emails"
138
+ )
139
+ mailbox_count = cursor.fetchone()[0]
140
+
141
+ # Last sync time
142
+ cursor = conn.execute("SELECT MAX(last_sync) FROM sync_state")
143
+ row = cursor.fetchone()
144
+ last_sync = None
145
+ staleness_hours = None
146
+ if row and row[0]:
147
+ last_sync = datetime.fromisoformat(row[0])
148
+ delta = (datetime.now() - last_sync).total_seconds()
149
+ staleness_hours = delta / 3600
150
+
151
+ # Database file size
152
+ db_size_mb = 0.0
153
+ if self._db_path.exists():
154
+ db_size_mb = self._db_path.stat().st_size / (1024 * 1024)
155
+
156
+ return IndexStats(
157
+ email_count=email_count,
158
+ mailbox_count=mailbox_count,
159
+ last_sync=last_sync,
160
+ db_size_mb=db_size_mb,
161
+ staleness_hours=staleness_hours,
162
+ )
163
+
164
+ def is_stale(self) -> bool:
165
+ """Check if the index needs a sync."""
166
+ stats = self.get_stats()
167
+ if stats.staleness_hours is None:
168
+ return True
169
+ return stats.staleness_hours > get_index_staleness_hours()
170
+
171
+ def build_from_disk(self, progress_callback: callable | None = None) -> int:
172
+ """
173
+ Build the index by reading .emlx files directly from disk.
174
+
175
+ This requires Full Disk Access permission for the terminal.
176
+ Much faster than fetching via JXA (~30x faster).
177
+
178
+ Args:
179
+ progress_callback: Optional callback(current, total, message)
180
+
181
+ Returns:
182
+ Number of emails indexed
183
+
184
+ Raises:
185
+ PermissionError: If Full Disk Access is not granted
186
+ FileNotFoundError: If Mail directory not found
187
+ """
188
+ from .disk import find_mail_directory, scan_all_emails
189
+
190
+ # Verify we can access the mail directory
191
+ mail_dir = find_mail_directory()
192
+
193
+ conn = self._get_conn()
194
+ max_per_mailbox = get_index_max_emails()
195
+
196
+ # Track counts per mailbox to enforce limits
197
+ mailbox_counts: dict[tuple[str, str], int] = {}
198
+ total_indexed = 0
199
+
200
+ # Clear existing data for rebuild
201
+ conn.execute("DELETE FROM emails")
202
+ conn.execute("DELETE FROM sync_state")
203
+
204
+ # Disable triggers during bulk insert for performance
205
+ conn.execute("DROP TRIGGER IF EXISTS emails_ai")
206
+ conn.execute("DROP TRIGGER IF EXISTS emails_ad")
207
+ conn.execute("DROP TRIGGER IF EXISTS emails_au")
208
+
209
+ try:
210
+ batch: list[tuple] = []
211
+ batch_size = 500
212
+
213
+ for email in scan_all_emails(mail_dir):
214
+ key = (email["account"], email["mailbox"])
215
+ count = mailbox_counts.get(key, 0)
216
+
217
+ if count >= max_per_mailbox:
218
+ continue
219
+
220
+ mailbox_counts[key] = count + 1
221
+
222
+ batch.append(
223
+ (
224
+ email["id"], # message_id from .emlx filename
225
+ email["account"],
226
+ email["mailbox"],
227
+ email.get("subject", ""),
228
+ email.get("sender", ""),
229
+ email.get("content", ""),
230
+ email.get("date_received", ""),
231
+ )
232
+ )
233
+
234
+ if len(batch) >= batch_size:
235
+ conn.executemany(INSERT_EMAIL_SQL, batch)
236
+ conn.commit()
237
+ total_indexed += len(batch)
238
+
239
+ if progress_callback:
240
+ msg = f"Indexed {total_indexed} emails..."
241
+ progress_callback(total_indexed, None, msg)
242
+
243
+ batch = []
244
+
245
+ # Insert remaining batch
246
+ if batch:
247
+ conn.executemany(INSERT_EMAIL_SQL, batch)
248
+ total_indexed += len(batch)
249
+
250
+ # Update sync state
251
+ now = datetime.now().isoformat()
252
+ for (account, mailbox), count in mailbox_counts.items():
253
+ conn.execute(
254
+ """INSERT OR REPLACE INTO sync_state
255
+ (account, mailbox, last_sync, message_count)
256
+ VALUES (?, ?, ?, ?)""",
257
+ (account, mailbox, now, count),
258
+ )
259
+
260
+ conn.commit()
261
+
262
+ # Rebuild FTS index
263
+ if progress_callback:
264
+ msg = "Building search index..."
265
+ progress_callback(total_indexed, total_indexed, msg)
266
+
267
+ rebuild_fts_index(conn)
268
+ optimize_fts_index(conn)
269
+
270
+ finally:
271
+ # Re-enable triggers (use rowid, not message_id)
272
+ conn.executescript("""
273
+ CREATE TRIGGER IF NOT EXISTS emails_ai
274
+ AFTER INSERT ON emails BEGIN
275
+ INSERT INTO emails_fts(rowid, subject, sender, content)
276
+ VALUES (new.rowid, new.subject, new.sender, new.content);
277
+ END;
278
+
279
+ CREATE TRIGGER IF NOT EXISTS emails_ad
280
+ AFTER DELETE ON emails BEGIN
281
+ INSERT INTO emails_fts(
282
+ emails_fts, rowid, subject, sender, content
283
+ ) VALUES(
284
+ 'delete', old.rowid, old.subject,
285
+ old.sender, old.content
286
+ );
287
+ END;
288
+
289
+ CREATE TRIGGER IF NOT EXISTS emails_au
290
+ AFTER UPDATE ON emails BEGIN
291
+ INSERT INTO emails_fts(
292
+ emails_fts, rowid, subject, sender, content
293
+ ) VALUES(
294
+ 'delete', old.rowid, old.subject,
295
+ old.sender, old.content
296
+ );
297
+ INSERT INTO emails_fts(rowid, subject, sender, content)
298
+ VALUES (new.rowid, new.subject, new.sender, new.content);
299
+ END;
300
+ """)
301
+
302
+ return total_indexed
303
+
304
+ def sync_updates(self, progress_callback: callable | None = None) -> int:
305
+ """
306
+ Sync new emails via JXA.
307
+
308
+ This is much faster than build_from_disk() but only fetches
309
+ emails not already in the index. Use at server startup.
310
+
311
+ Args:
312
+ progress_callback: Optional callback(current, total, message)
313
+
314
+ Returns:
315
+ Number of new emails synced
316
+ """
317
+ from .sync import sync_incremental
318
+
319
+ return sync_incremental(self._get_conn(), progress_callback)
320
+
321
+ def search(
322
+ self,
323
+ query: str,
324
+ account: str | None = None,
325
+ mailbox: str | None = None,
326
+ limit: int = 20,
327
+ ) -> list[SearchResult]:
328
+ """
329
+ Search indexed emails using FTS5.
330
+
331
+ Args:
332
+ query: Search query (supports FTS5 syntax)
333
+ account: Optional account filter
334
+ mailbox: Optional mailbox filter
335
+ limit: Maximum results (default: 20)
336
+
337
+ Returns:
338
+ List of SearchResult ordered by relevance (BM25 score)
339
+ """
340
+ from .search import search_fts
341
+
342
+ return search_fts(
343
+ self._get_conn(),
344
+ query,
345
+ account=account,
346
+ mailbox=mailbox,
347
+ limit=limit,
348
+ )
349
+
350
+ def rebuild(
351
+ self,
352
+ account: str | None = None,
353
+ mailbox: str | None = None,
354
+ progress_callback: callable | None = None,
355
+ ) -> int:
356
+ """
357
+ Force rebuild of the index.
358
+
359
+ Args:
360
+ account: Optional account to rebuild (all if None)
361
+ mailbox: Optional mailbox to rebuild (all in account if None)
362
+ progress_callback: Optional progress callback
363
+
364
+ Returns:
365
+ Number of emails re-indexed
366
+ """
367
+ conn = self._get_conn()
368
+
369
+ # Delete existing entries for rebuild scope
370
+ if account and mailbox:
371
+ conn.execute(
372
+ "DELETE FROM emails WHERE account = ? AND mailbox = ?",
373
+ (account, mailbox),
374
+ )
375
+ elif account:
376
+ conn.execute("DELETE FROM emails WHERE account = ?", (account,))
377
+ else:
378
+ conn.execute("DELETE FROM emails")
379
+
380
+ conn.commit()
381
+
382
+ # Rebuild from disk
383
+ return self.build_from_disk(progress_callback)
384
+
385
+ def get_indexed_message_ids(
386
+ self, account: str | None = None, mailbox: str | None = None
387
+ ) -> set[int]:
388
+ """
389
+ Get all message IDs currently in the index.
390
+
391
+ Note: Message IDs are only unique within (account, mailbox).
392
+
393
+ Args:
394
+ account: Optional account filter
395
+ mailbox: Optional mailbox filter
396
+
397
+ Returns:
398
+ Set of message IDs
399
+ """
400
+ conn = self._get_conn()
401
+
402
+ if account and mailbox:
403
+ sql = """SELECT message_id FROM emails
404
+ WHERE account = ? AND mailbox = ?"""
405
+ cursor = conn.execute(sql, (account, mailbox))
406
+ elif account:
407
+ cursor = conn.execute(
408
+ "SELECT message_id FROM emails WHERE account = ?", (account,)
409
+ )
410
+ else:
411
+ cursor = conn.execute("SELECT message_id FROM emails")
412
+
413
+ return {row[0] for row in cursor}
414
+
415
+ # ─────────────────────────────────────────────────────────────────
416
+ # File Watcher Methods
417
+ # ─────────────────────────────────────────────────────────────────
418
+
419
+ def start_watcher(
420
+ self,
421
+ on_update: Callable[[int, int], None] | None = None,
422
+ ) -> bool:
423
+ """
424
+ Start the file watcher for real-time index updates.
425
+
426
+ Watches ~/Library/Mail/V10/ for .emlx changes and automatically
427
+ updates the index when emails are added or deleted.
428
+
429
+ Args:
430
+ on_update: Optional callback(added_count, removed_count)
431
+ called after each batch of changes
432
+
433
+ Returns:
434
+ True if watcher started, False if already running or failed
435
+ """
436
+ if self._watcher is not None and self._watcher.is_running:
437
+ return False
438
+
439
+ from .watcher import IndexWatcher
440
+
441
+ self._watcher_callback = on_update
442
+ self._watcher = IndexWatcher(
443
+ db_path=self._db_path,
444
+ on_update=on_update,
445
+ )
446
+
447
+ return self._watcher.start()
448
+
449
+ def stop_watcher(self) -> None:
450
+ """Stop the file watcher if running."""
451
+ if self._watcher is not None:
452
+ self._watcher.stop()
453
+ self._watcher = None
454
+
455
+ @property
456
+ def watcher_running(self) -> bool:
457
+ """Check if the file watcher is running."""
458
+ return self._watcher is not None and self._watcher.is_running