jxa-mail-mcp 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jxa-mail-mcp might be problematic. Click here for more details.
- jxa_mail_mcp/__init__.py +14 -6
- jxa_mail_mcp/cli.py +358 -0
- jxa_mail_mcp/config.py +52 -0
- jxa_mail_mcp/executor.py +178 -4
- jxa_mail_mcp/index/__init__.py +14 -0
- jxa_mail_mcp/index/disk.py +485 -0
- jxa_mail_mcp/index/manager.py +458 -0
- jxa_mail_mcp/index/schema.py +277 -0
- jxa_mail_mcp/index/search.py +331 -0
- jxa_mail_mcp/index/sync.py +305 -0
- jxa_mail_mcp/index/watcher.py +341 -0
- jxa_mail_mcp/server.py +450 -201
- jxa_mail_mcp-0.3.0.dist-info/METADATA +355 -0
- jxa_mail_mcp-0.3.0.dist-info/RECORD +20 -0
- jxa_mail_mcp-0.2.0.dist-info/METADATA +0 -264
- jxa_mail_mcp-0.2.0.dist-info/RECORD +0 -12
- {jxa_mail_mcp-0.2.0.dist-info → jxa_mail_mcp-0.3.0.dist-info}/WHEEL +0 -0
- {jxa_mail_mcp-0.2.0.dist-info → jxa_mail_mcp-0.3.0.dist-info}/entry_points.txt +0 -0
- {jxa_mail_mcp-0.2.0.dist-info → jxa_mail_mcp-0.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,458 @@
|
|
|
1
|
+
"""IndexManager - Central interface for the FTS5 search index.
|
|
2
|
+
|
|
3
|
+
Provides:
|
|
4
|
+
- build_from_disk(): Pre-index emails by reading .emlx files directly
|
|
5
|
+
- sync_updates(): Incremental sync via JXA for new emails
|
|
6
|
+
- search(): Fast FTS5 search with BM25 ranking
|
|
7
|
+
- get_stats(): Index statistics for status reporting
|
|
8
|
+
|
|
9
|
+
Thread Safety:
|
|
10
|
+
- Uses threading.Lock for connection management
|
|
11
|
+
- Database connections use check_same_thread=False
|
|
12
|
+
- File watcher runs in separate thread with its own connection
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
import sqlite3
|
|
19
|
+
import threading
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
from datetime import datetime
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import TYPE_CHECKING
|
|
24
|
+
|
|
25
|
+
from ..config import (
|
|
26
|
+
get_index_max_emails,
|
|
27
|
+
get_index_path,
|
|
28
|
+
get_index_staleness_hours,
|
|
29
|
+
)
|
|
30
|
+
from .schema import (
|
|
31
|
+
INSERT_EMAIL_SQL,
|
|
32
|
+
init_database,
|
|
33
|
+
optimize_fts_index,
|
|
34
|
+
rebuild_fts_index,
|
|
35
|
+
)
|
|
36
|
+
from .search import SearchResult # Re-use, don't duplicate
|
|
37
|
+
|
|
38
|
+
if TYPE_CHECKING:
|
|
39
|
+
from collections.abc import Callable
|
|
40
|
+
|
|
41
|
+
from .watcher import IndexWatcher
|
|
42
|
+
|
|
43
|
+
logger = logging.getLogger(__name__)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class IndexStats:
|
|
48
|
+
"""Statistics about the search index."""
|
|
49
|
+
|
|
50
|
+
email_count: int
|
|
51
|
+
mailbox_count: int
|
|
52
|
+
last_sync: datetime | None
|
|
53
|
+
db_size_mb: float
|
|
54
|
+
staleness_hours: float | None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# SearchResult is imported from .search to avoid duplication
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class IndexManager:
|
|
61
|
+
"""
|
|
62
|
+
Manages the FTS5 search index for email body search.
|
|
63
|
+
|
|
64
|
+
The index is stored at ~/.jxa-mail-mcp/index.db by default.
|
|
65
|
+
Use environment variables to customize:
|
|
66
|
+
- JXA_MAIL_INDEX_PATH: Database location
|
|
67
|
+
- JXA_MAIL_INDEX_MAX_EMAILS: Max emails per mailbox (5000)
|
|
68
|
+
- JXA_MAIL_INDEX_STALENESS_HOURS: Hours before stale (24)
|
|
69
|
+
|
|
70
|
+
Thread Safety:
|
|
71
|
+
- get_instance() uses class-level lock
|
|
72
|
+
- _get_conn() uses instance-level lock
|
|
73
|
+
- Watcher runs in separate thread with its own connection
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
_instance: IndexManager | None = None
|
|
77
|
+
_instance_lock = threading.Lock()
|
|
78
|
+
|
|
79
|
+
def __init__(self, db_path: Path | None = None):
|
|
80
|
+
"""
|
|
81
|
+
Initialize the IndexManager.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
db_path: Custom database path (uses config default if None)
|
|
85
|
+
"""
|
|
86
|
+
self._db_path = db_path or get_index_path()
|
|
87
|
+
self._conn: sqlite3.Connection | None = None
|
|
88
|
+
self._conn_lock = threading.Lock()
|
|
89
|
+
self._watcher: IndexWatcher | None = None
|
|
90
|
+
self._watcher_callback: Callable | None = None
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def get_instance(cls) -> IndexManager:
|
|
94
|
+
"""Get the singleton IndexManager instance (thread-safe)."""
|
|
95
|
+
with cls._instance_lock:
|
|
96
|
+
if cls._instance is None:
|
|
97
|
+
cls._instance = IndexManager()
|
|
98
|
+
return cls._instance
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def db_path(self) -> Path:
|
|
102
|
+
"""Get the database file path."""
|
|
103
|
+
return self._db_path
|
|
104
|
+
|
|
105
|
+
def _get_conn(self) -> sqlite3.Connection:
|
|
106
|
+
"""Get or create the database connection (thread-safe)."""
|
|
107
|
+
with self._conn_lock:
|
|
108
|
+
if self._conn is None:
|
|
109
|
+
self._conn = init_database(self._db_path)
|
|
110
|
+
return self._conn
|
|
111
|
+
|
|
112
|
+
def close(self) -> None:
|
|
113
|
+
"""Close the database connection."""
|
|
114
|
+
if self._conn is not None:
|
|
115
|
+
self._conn.close()
|
|
116
|
+
self._conn = None
|
|
117
|
+
|
|
118
|
+
def has_index(self) -> bool:
|
|
119
|
+
"""Check if an index database exists."""
|
|
120
|
+
return self._db_path.exists()
|
|
121
|
+
|
|
122
|
+
def get_stats(self) -> IndexStats:
|
|
123
|
+
"""
|
|
124
|
+
Get index statistics.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
IndexStats with counts, size, and staleness info
|
|
128
|
+
"""
|
|
129
|
+
conn = self._get_conn()
|
|
130
|
+
|
|
131
|
+
# Email count
|
|
132
|
+
cursor = conn.execute("SELECT COUNT(*) FROM emails")
|
|
133
|
+
email_count = cursor.fetchone()[0]
|
|
134
|
+
|
|
135
|
+
# Mailbox count
|
|
136
|
+
cursor = conn.execute(
|
|
137
|
+
"SELECT COUNT(DISTINCT account || '/' || mailbox) FROM emails"
|
|
138
|
+
)
|
|
139
|
+
mailbox_count = cursor.fetchone()[0]
|
|
140
|
+
|
|
141
|
+
# Last sync time
|
|
142
|
+
cursor = conn.execute("SELECT MAX(last_sync) FROM sync_state")
|
|
143
|
+
row = cursor.fetchone()
|
|
144
|
+
last_sync = None
|
|
145
|
+
staleness_hours = None
|
|
146
|
+
if row and row[0]:
|
|
147
|
+
last_sync = datetime.fromisoformat(row[0])
|
|
148
|
+
delta = (datetime.now() - last_sync).total_seconds()
|
|
149
|
+
staleness_hours = delta / 3600
|
|
150
|
+
|
|
151
|
+
# Database file size
|
|
152
|
+
db_size_mb = 0.0
|
|
153
|
+
if self._db_path.exists():
|
|
154
|
+
db_size_mb = self._db_path.stat().st_size / (1024 * 1024)
|
|
155
|
+
|
|
156
|
+
return IndexStats(
|
|
157
|
+
email_count=email_count,
|
|
158
|
+
mailbox_count=mailbox_count,
|
|
159
|
+
last_sync=last_sync,
|
|
160
|
+
db_size_mb=db_size_mb,
|
|
161
|
+
staleness_hours=staleness_hours,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def is_stale(self) -> bool:
|
|
165
|
+
"""Check if the index needs a sync."""
|
|
166
|
+
stats = self.get_stats()
|
|
167
|
+
if stats.staleness_hours is None:
|
|
168
|
+
return True
|
|
169
|
+
return stats.staleness_hours > get_index_staleness_hours()
|
|
170
|
+
|
|
171
|
+
def build_from_disk(self, progress_callback: callable | None = None) -> int:
|
|
172
|
+
"""
|
|
173
|
+
Build the index by reading .emlx files directly from disk.
|
|
174
|
+
|
|
175
|
+
This requires Full Disk Access permission for the terminal.
|
|
176
|
+
Much faster than fetching via JXA (~30x faster).
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
progress_callback: Optional callback(current, total, message)
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Number of emails indexed
|
|
183
|
+
|
|
184
|
+
Raises:
|
|
185
|
+
PermissionError: If Full Disk Access is not granted
|
|
186
|
+
FileNotFoundError: If Mail directory not found
|
|
187
|
+
"""
|
|
188
|
+
from .disk import find_mail_directory, scan_all_emails
|
|
189
|
+
|
|
190
|
+
# Verify we can access the mail directory
|
|
191
|
+
mail_dir = find_mail_directory()
|
|
192
|
+
|
|
193
|
+
conn = self._get_conn()
|
|
194
|
+
max_per_mailbox = get_index_max_emails()
|
|
195
|
+
|
|
196
|
+
# Track counts per mailbox to enforce limits
|
|
197
|
+
mailbox_counts: dict[tuple[str, str], int] = {}
|
|
198
|
+
total_indexed = 0
|
|
199
|
+
|
|
200
|
+
# Clear existing data for rebuild
|
|
201
|
+
conn.execute("DELETE FROM emails")
|
|
202
|
+
conn.execute("DELETE FROM sync_state")
|
|
203
|
+
|
|
204
|
+
# Disable triggers during bulk insert for performance
|
|
205
|
+
conn.execute("DROP TRIGGER IF EXISTS emails_ai")
|
|
206
|
+
conn.execute("DROP TRIGGER IF EXISTS emails_ad")
|
|
207
|
+
conn.execute("DROP TRIGGER IF EXISTS emails_au")
|
|
208
|
+
|
|
209
|
+
try:
|
|
210
|
+
batch: list[tuple] = []
|
|
211
|
+
batch_size = 500
|
|
212
|
+
|
|
213
|
+
for email in scan_all_emails(mail_dir):
|
|
214
|
+
key = (email["account"], email["mailbox"])
|
|
215
|
+
count = mailbox_counts.get(key, 0)
|
|
216
|
+
|
|
217
|
+
if count >= max_per_mailbox:
|
|
218
|
+
continue
|
|
219
|
+
|
|
220
|
+
mailbox_counts[key] = count + 1
|
|
221
|
+
|
|
222
|
+
batch.append(
|
|
223
|
+
(
|
|
224
|
+
email["id"], # message_id from .emlx filename
|
|
225
|
+
email["account"],
|
|
226
|
+
email["mailbox"],
|
|
227
|
+
email.get("subject", ""),
|
|
228
|
+
email.get("sender", ""),
|
|
229
|
+
email.get("content", ""),
|
|
230
|
+
email.get("date_received", ""),
|
|
231
|
+
)
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
if len(batch) >= batch_size:
|
|
235
|
+
conn.executemany(INSERT_EMAIL_SQL, batch)
|
|
236
|
+
conn.commit()
|
|
237
|
+
total_indexed += len(batch)
|
|
238
|
+
|
|
239
|
+
if progress_callback:
|
|
240
|
+
msg = f"Indexed {total_indexed} emails..."
|
|
241
|
+
progress_callback(total_indexed, None, msg)
|
|
242
|
+
|
|
243
|
+
batch = []
|
|
244
|
+
|
|
245
|
+
# Insert remaining batch
|
|
246
|
+
if batch:
|
|
247
|
+
conn.executemany(INSERT_EMAIL_SQL, batch)
|
|
248
|
+
total_indexed += len(batch)
|
|
249
|
+
|
|
250
|
+
# Update sync state
|
|
251
|
+
now = datetime.now().isoformat()
|
|
252
|
+
for (account, mailbox), count in mailbox_counts.items():
|
|
253
|
+
conn.execute(
|
|
254
|
+
"""INSERT OR REPLACE INTO sync_state
|
|
255
|
+
(account, mailbox, last_sync, message_count)
|
|
256
|
+
VALUES (?, ?, ?, ?)""",
|
|
257
|
+
(account, mailbox, now, count),
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
conn.commit()
|
|
261
|
+
|
|
262
|
+
# Rebuild FTS index
|
|
263
|
+
if progress_callback:
|
|
264
|
+
msg = "Building search index..."
|
|
265
|
+
progress_callback(total_indexed, total_indexed, msg)
|
|
266
|
+
|
|
267
|
+
rebuild_fts_index(conn)
|
|
268
|
+
optimize_fts_index(conn)
|
|
269
|
+
|
|
270
|
+
finally:
|
|
271
|
+
# Re-enable triggers (use rowid, not message_id)
|
|
272
|
+
conn.executescript("""
|
|
273
|
+
CREATE TRIGGER IF NOT EXISTS emails_ai
|
|
274
|
+
AFTER INSERT ON emails BEGIN
|
|
275
|
+
INSERT INTO emails_fts(rowid, subject, sender, content)
|
|
276
|
+
VALUES (new.rowid, new.subject, new.sender, new.content);
|
|
277
|
+
END;
|
|
278
|
+
|
|
279
|
+
CREATE TRIGGER IF NOT EXISTS emails_ad
|
|
280
|
+
AFTER DELETE ON emails BEGIN
|
|
281
|
+
INSERT INTO emails_fts(
|
|
282
|
+
emails_fts, rowid, subject, sender, content
|
|
283
|
+
) VALUES(
|
|
284
|
+
'delete', old.rowid, old.subject,
|
|
285
|
+
old.sender, old.content
|
|
286
|
+
);
|
|
287
|
+
END;
|
|
288
|
+
|
|
289
|
+
CREATE TRIGGER IF NOT EXISTS emails_au
|
|
290
|
+
AFTER UPDATE ON emails BEGIN
|
|
291
|
+
INSERT INTO emails_fts(
|
|
292
|
+
emails_fts, rowid, subject, sender, content
|
|
293
|
+
) VALUES(
|
|
294
|
+
'delete', old.rowid, old.subject,
|
|
295
|
+
old.sender, old.content
|
|
296
|
+
);
|
|
297
|
+
INSERT INTO emails_fts(rowid, subject, sender, content)
|
|
298
|
+
VALUES (new.rowid, new.subject, new.sender, new.content);
|
|
299
|
+
END;
|
|
300
|
+
""")
|
|
301
|
+
|
|
302
|
+
return total_indexed
|
|
303
|
+
|
|
304
|
+
def sync_updates(self, progress_callback: callable | None = None) -> int:
|
|
305
|
+
"""
|
|
306
|
+
Sync new emails via JXA.
|
|
307
|
+
|
|
308
|
+
This is much faster than build_from_disk() but only fetches
|
|
309
|
+
emails not already in the index. Use at server startup.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
progress_callback: Optional callback(current, total, message)
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
Number of new emails synced
|
|
316
|
+
"""
|
|
317
|
+
from .sync import sync_incremental
|
|
318
|
+
|
|
319
|
+
return sync_incremental(self._get_conn(), progress_callback)
|
|
320
|
+
|
|
321
|
+
def search(
|
|
322
|
+
self,
|
|
323
|
+
query: str,
|
|
324
|
+
account: str | None = None,
|
|
325
|
+
mailbox: str | None = None,
|
|
326
|
+
limit: int = 20,
|
|
327
|
+
) -> list[SearchResult]:
|
|
328
|
+
"""
|
|
329
|
+
Search indexed emails using FTS5.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
query: Search query (supports FTS5 syntax)
|
|
333
|
+
account: Optional account filter
|
|
334
|
+
mailbox: Optional mailbox filter
|
|
335
|
+
limit: Maximum results (default: 20)
|
|
336
|
+
|
|
337
|
+
Returns:
|
|
338
|
+
List of SearchResult ordered by relevance (BM25 score)
|
|
339
|
+
"""
|
|
340
|
+
from .search import search_fts
|
|
341
|
+
|
|
342
|
+
return search_fts(
|
|
343
|
+
self._get_conn(),
|
|
344
|
+
query,
|
|
345
|
+
account=account,
|
|
346
|
+
mailbox=mailbox,
|
|
347
|
+
limit=limit,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
def rebuild(
|
|
351
|
+
self,
|
|
352
|
+
account: str | None = None,
|
|
353
|
+
mailbox: str | None = None,
|
|
354
|
+
progress_callback: callable | None = None,
|
|
355
|
+
) -> int:
|
|
356
|
+
"""
|
|
357
|
+
Force rebuild of the index.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
account: Optional account to rebuild (all if None)
|
|
361
|
+
mailbox: Optional mailbox to rebuild (all in account if None)
|
|
362
|
+
progress_callback: Optional progress callback
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
Number of emails re-indexed
|
|
366
|
+
"""
|
|
367
|
+
conn = self._get_conn()
|
|
368
|
+
|
|
369
|
+
# Delete existing entries for rebuild scope
|
|
370
|
+
if account and mailbox:
|
|
371
|
+
conn.execute(
|
|
372
|
+
"DELETE FROM emails WHERE account = ? AND mailbox = ?",
|
|
373
|
+
(account, mailbox),
|
|
374
|
+
)
|
|
375
|
+
elif account:
|
|
376
|
+
conn.execute("DELETE FROM emails WHERE account = ?", (account,))
|
|
377
|
+
else:
|
|
378
|
+
conn.execute("DELETE FROM emails")
|
|
379
|
+
|
|
380
|
+
conn.commit()
|
|
381
|
+
|
|
382
|
+
# Rebuild from disk
|
|
383
|
+
return self.build_from_disk(progress_callback)
|
|
384
|
+
|
|
385
|
+
def get_indexed_message_ids(
|
|
386
|
+
self, account: str | None = None, mailbox: str | None = None
|
|
387
|
+
) -> set[int]:
|
|
388
|
+
"""
|
|
389
|
+
Get all message IDs currently in the index.
|
|
390
|
+
|
|
391
|
+
Note: Message IDs are only unique within (account, mailbox).
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
account: Optional account filter
|
|
395
|
+
mailbox: Optional mailbox filter
|
|
396
|
+
|
|
397
|
+
Returns:
|
|
398
|
+
Set of message IDs
|
|
399
|
+
"""
|
|
400
|
+
conn = self._get_conn()
|
|
401
|
+
|
|
402
|
+
if account and mailbox:
|
|
403
|
+
sql = """SELECT message_id FROM emails
|
|
404
|
+
WHERE account = ? AND mailbox = ?"""
|
|
405
|
+
cursor = conn.execute(sql, (account, mailbox))
|
|
406
|
+
elif account:
|
|
407
|
+
cursor = conn.execute(
|
|
408
|
+
"SELECT message_id FROM emails WHERE account = ?", (account,)
|
|
409
|
+
)
|
|
410
|
+
else:
|
|
411
|
+
cursor = conn.execute("SELECT message_id FROM emails")
|
|
412
|
+
|
|
413
|
+
return {row[0] for row in cursor}
|
|
414
|
+
|
|
415
|
+
# ─────────────────────────────────────────────────────────────────
|
|
416
|
+
# File Watcher Methods
|
|
417
|
+
# ─────────────────────────────────────────────────────────────────
|
|
418
|
+
|
|
419
|
+
def start_watcher(
|
|
420
|
+
self,
|
|
421
|
+
on_update: Callable[[int, int], None] | None = None,
|
|
422
|
+
) -> bool:
|
|
423
|
+
"""
|
|
424
|
+
Start the file watcher for real-time index updates.
|
|
425
|
+
|
|
426
|
+
Watches ~/Library/Mail/V10/ for .emlx changes and automatically
|
|
427
|
+
updates the index when emails are added or deleted.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
on_update: Optional callback(added_count, removed_count)
|
|
431
|
+
called after each batch of changes
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
True if watcher started, False if already running or failed
|
|
435
|
+
"""
|
|
436
|
+
if self._watcher is not None and self._watcher.is_running:
|
|
437
|
+
return False
|
|
438
|
+
|
|
439
|
+
from .watcher import IndexWatcher
|
|
440
|
+
|
|
441
|
+
self._watcher_callback = on_update
|
|
442
|
+
self._watcher = IndexWatcher(
|
|
443
|
+
db_path=self._db_path,
|
|
444
|
+
on_update=on_update,
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
return self._watcher.start()
|
|
448
|
+
|
|
449
|
+
def stop_watcher(self) -> None:
|
|
450
|
+
"""Stop the file watcher if running."""
|
|
451
|
+
if self._watcher is not None:
|
|
452
|
+
self._watcher.stop()
|
|
453
|
+
self._watcher = None
|
|
454
|
+
|
|
455
|
+
@property
|
|
456
|
+
def watcher_running(self) -> bool:
|
|
457
|
+
"""Check if the file watcher is running."""
|
|
458
|
+
return self._watcher is not None and self._watcher.is_running
|