jxa-mail-mcp 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jxa-mail-mcp might be problematic. Click here for more details.
- jxa_mail_mcp/__init__.py +14 -6
- jxa_mail_mcp/cli.py +358 -0
- jxa_mail_mcp/config.py +52 -0
- jxa_mail_mcp/executor.py +178 -4
- jxa_mail_mcp/index/__init__.py +14 -0
- jxa_mail_mcp/index/disk.py +485 -0
- jxa_mail_mcp/index/manager.py +458 -0
- jxa_mail_mcp/index/schema.py +277 -0
- jxa_mail_mcp/index/search.py +331 -0
- jxa_mail_mcp/index/sync.py +305 -0
- jxa_mail_mcp/index/watcher.py +341 -0
- jxa_mail_mcp/server.py +450 -201
- jxa_mail_mcp-0.3.0.dist-info/METADATA +355 -0
- jxa_mail_mcp-0.3.0.dist-info/RECORD +20 -0
- jxa_mail_mcp-0.2.0.dist-info/METADATA +0 -264
- jxa_mail_mcp-0.2.0.dist-info/RECORD +0 -12
- {jxa_mail_mcp-0.2.0.dist-info → jxa_mail_mcp-0.3.0.dist-info}/WHEEL +0 -0
- {jxa_mail_mcp-0.2.0.dist-info → jxa_mail_mcp-0.3.0.dist-info}/entry_points.txt +0 -0
- {jxa_mail_mcp-0.2.0.dist-info → jxa_mail_mcp-0.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
"""Incremental sync via JXA for new emails.
|
|
2
|
+
|
|
3
|
+
This module syncs new emails that arrived since the last index build.
|
|
4
|
+
Uses JXA (slower) because it works without Full Disk Access.
|
|
5
|
+
|
|
6
|
+
The sync process:
|
|
7
|
+
1. Get set of already-indexed message IDs
|
|
8
|
+
2. Fetch message IDs from Mail.app via JXA
|
|
9
|
+
3. Find IDs that aren't in the index yet
|
|
10
|
+
4. Fetch content for new emails via JXA
|
|
11
|
+
5. Insert into index
|
|
12
|
+
|
|
13
|
+
SECURITY NOTE: All strings passed to JXA are serialized via json.dumps()
|
|
14
|
+
to prevent injection attacks.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import json
|
|
20
|
+
import logging
|
|
21
|
+
import sqlite3
|
|
22
|
+
from datetime import datetime
|
|
23
|
+
from typing import TYPE_CHECKING
|
|
24
|
+
|
|
25
|
+
from ..config import get_index_max_emails
|
|
26
|
+
from ..executor import build_mailbox_setup_js
|
|
27
|
+
from .schema import INSERT_EMAIL_SQL, email_to_row
|
|
28
|
+
from .search import add_account_mailbox_filter
|
|
29
|
+
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
from collections.abc import Callable
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_indexed_message_ids(
|
|
37
|
+
conn: sqlite3.Connection,
|
|
38
|
+
account: str | None = None,
|
|
39
|
+
mailbox: str | None = None,
|
|
40
|
+
) -> set[int]:
|
|
41
|
+
"""
|
|
42
|
+
Get all message IDs currently in the index.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
conn: Database connection
|
|
46
|
+
account: Optional account filter
|
|
47
|
+
mailbox: Optional mailbox filter
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
Set of message IDs (note: only unique within account/mailbox)
|
|
51
|
+
"""
|
|
52
|
+
# Use "WHERE 1=1" pattern to allow consistent filter appending
|
|
53
|
+
sql = "SELECT message_id FROM emails e WHERE 1=1"
|
|
54
|
+
params: list = []
|
|
55
|
+
sql = add_account_mailbox_filter(sql, params, account, mailbox)
|
|
56
|
+
|
|
57
|
+
cursor = conn.execute(sql, params)
|
|
58
|
+
return {row[0] for row in cursor}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def fetch_mailbox_ids_jxa(account: str | None, mailbox: str) -> list[int]:
|
|
62
|
+
"""
|
|
63
|
+
Fetch all message IDs from a mailbox via JXA.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
account: Account name (None for first account)
|
|
67
|
+
mailbox: Mailbox name
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
List of message IDs
|
|
71
|
+
"""
|
|
72
|
+
from ..executor import execute_with_core
|
|
73
|
+
|
|
74
|
+
# Use shared helper for safe serialization
|
|
75
|
+
mailbox_setup = build_mailbox_setup_js(account, mailbox)
|
|
76
|
+
|
|
77
|
+
script = f"""
|
|
78
|
+
{mailbox_setup}
|
|
79
|
+
if (!mailbox) {{
|
|
80
|
+
JSON.stringify([]);
|
|
81
|
+
}} else {{
|
|
82
|
+
const ids = mailbox.messages.id();
|
|
83
|
+
JSON.stringify(ids || []);
|
|
84
|
+
}}
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
result = execute_with_core(script)
|
|
89
|
+
return result if isinstance(result, list) else []
|
|
90
|
+
except Exception as e:
|
|
91
|
+
logger.warning("Failed to fetch IDs for %s/%s: %s", account, mailbox, e)
|
|
92
|
+
return []
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def fetch_emails_by_ids_jxa(
|
|
96
|
+
account: str | None, mailbox: str, message_ids: list[int]
|
|
97
|
+
) -> list[dict]:
|
|
98
|
+
"""
|
|
99
|
+
Fetch email content for specific IDs via JXA.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
account: Account name
|
|
103
|
+
mailbox: Mailbox name
|
|
104
|
+
message_ids: List of message IDs to fetch
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
List of email dicts with id, subject, sender, content, date_received
|
|
108
|
+
"""
|
|
109
|
+
if not message_ids:
|
|
110
|
+
return []
|
|
111
|
+
|
|
112
|
+
from ..executor import execute_with_core
|
|
113
|
+
|
|
114
|
+
# Use shared helper for safe serialization
|
|
115
|
+
mailbox_setup = build_mailbox_setup_js(account, mailbox)
|
|
116
|
+
ids_json = json.dumps(message_ids)
|
|
117
|
+
|
|
118
|
+
script = f"""
|
|
119
|
+
{mailbox_setup}
|
|
120
|
+
if (!mailbox) {{
|
|
121
|
+
JSON.stringify([]);
|
|
122
|
+
}} else {{
|
|
123
|
+
const targetIds = new Set({ids_json});
|
|
124
|
+
|
|
125
|
+
// Batch fetch IDs to find indices
|
|
126
|
+
const allIds = mailbox.messages.id() || [];
|
|
127
|
+
const indices = [];
|
|
128
|
+
for (let i = 0; i < allIds.length; i++) {{
|
|
129
|
+
if (targetIds.has(allIds[i])) {{
|
|
130
|
+
indices.push(i);
|
|
131
|
+
}}
|
|
132
|
+
}}
|
|
133
|
+
|
|
134
|
+
// Fetch properties for matching messages only
|
|
135
|
+
const results = [];
|
|
136
|
+
for (const idx of indices) {{
|
|
137
|
+
try {{
|
|
138
|
+
const msg = mailbox.messages[idx];
|
|
139
|
+
results.push({{
|
|
140
|
+
id: msg.id(),
|
|
141
|
+
subject: msg.subject() || '',
|
|
142
|
+
sender: msg.sender() || '',
|
|
143
|
+
content: msg.content() || '',
|
|
144
|
+
date_received: MailCore.formatDate(msg.dateReceived())
|
|
145
|
+
}});
|
|
146
|
+
}} catch (e) {{
|
|
147
|
+
// Skip messages that can't be read
|
|
148
|
+
}}
|
|
149
|
+
}}
|
|
150
|
+
|
|
151
|
+
JSON.stringify(results);
|
|
152
|
+
}}
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
result = execute_with_core(script)
|
|
157
|
+
return result if isinstance(result, list) else []
|
|
158
|
+
except Exception as e:
|
|
159
|
+
logger.warning(
|
|
160
|
+
"Failed to fetch emails for %s/%s: %s", account, mailbox, e
|
|
161
|
+
)
|
|
162
|
+
return []
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def get_all_mailboxes_jxa() -> list[tuple[str, str]]:
|
|
166
|
+
"""
|
|
167
|
+
Get all account/mailbox pairs via JXA.
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
List of (account_name, mailbox_name) tuples
|
|
171
|
+
"""
|
|
172
|
+
from ..executor import execute_with_core
|
|
173
|
+
|
|
174
|
+
script = """
|
|
175
|
+
const results = [];
|
|
176
|
+
const accounts = Mail.accounts();
|
|
177
|
+
const accountNames = Mail.accounts.name();
|
|
178
|
+
|
|
179
|
+
for (let i = 0; i < accounts.length; i++) {
|
|
180
|
+
const account = accounts[i];
|
|
181
|
+
const accountName = accountNames[i];
|
|
182
|
+
const mailboxNames = account.mailboxes.name() || [];
|
|
183
|
+
|
|
184
|
+
for (const mbName of mailboxNames) {
|
|
185
|
+
results.push([accountName, mbName]);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
JSON.stringify(results);
|
|
190
|
+
"""
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
result = execute_with_core(script)
|
|
194
|
+
return result if isinstance(result, list) else []
|
|
195
|
+
except Exception as e:
|
|
196
|
+
logger.warning("Failed to get mailboxes: %s", e)
|
|
197
|
+
return []
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def sync_incremental(
|
|
201
|
+
conn: sqlite3.Connection,
|
|
202
|
+
progress_callback: Callable[[int, int | None, str], None] | None = None,
|
|
203
|
+
) -> int:
|
|
204
|
+
"""
|
|
205
|
+
Sync new emails via JXA.
|
|
206
|
+
|
|
207
|
+
Compares indexed IDs with Mail.app and fetches only new emails.
|
|
208
|
+
Much faster than rebuild for startup sync.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
conn: Database connection
|
|
212
|
+
progress_callback: Optional callback(current, total, message)
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Number of new emails synced
|
|
216
|
+
"""
|
|
217
|
+
max_per_mailbox = get_index_max_emails()
|
|
218
|
+
total_synced = 0
|
|
219
|
+
|
|
220
|
+
# Get all mailboxes
|
|
221
|
+
if progress_callback:
|
|
222
|
+
progress_callback(0, None, "Discovering mailboxes...")
|
|
223
|
+
|
|
224
|
+
mailboxes = get_all_mailboxes_jxa()
|
|
225
|
+
if not mailboxes:
|
|
226
|
+
logger.info("No mailboxes found to sync")
|
|
227
|
+
return 0
|
|
228
|
+
|
|
229
|
+
logger.info("Syncing %d mailboxes", len(mailboxes))
|
|
230
|
+
|
|
231
|
+
for i, (account, mailbox) in enumerate(mailboxes):
|
|
232
|
+
if progress_callback:
|
|
233
|
+
progress_callback(
|
|
234
|
+
i, len(mailboxes), f"Syncing {account}/{mailbox}..."
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Get already indexed IDs for this mailbox
|
|
238
|
+
indexed_ids = get_indexed_message_ids(conn, account, mailbox)
|
|
239
|
+
|
|
240
|
+
# Check if we're at the limit
|
|
241
|
+
if len(indexed_ids) >= max_per_mailbox:
|
|
242
|
+
logger.debug("Mailbox %s/%s at limit, skipping", account, mailbox)
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
# Get current IDs from Mail.app
|
|
246
|
+
current_ids = fetch_mailbox_ids_jxa(account, mailbox)
|
|
247
|
+
if not current_ids:
|
|
248
|
+
continue
|
|
249
|
+
|
|
250
|
+
# Find new IDs
|
|
251
|
+
new_ids = [mid for mid in current_ids if mid not in indexed_ids]
|
|
252
|
+
|
|
253
|
+
if not new_ids:
|
|
254
|
+
continue
|
|
255
|
+
|
|
256
|
+
# Limit to stay under max
|
|
257
|
+
remaining_capacity = max_per_mailbox - len(indexed_ids)
|
|
258
|
+
new_ids = new_ids[:remaining_capacity]
|
|
259
|
+
|
|
260
|
+
logger.debug(
|
|
261
|
+
"Fetching %d new emails from %s/%s", len(new_ids), account, mailbox
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# Fetch content for new emails (in batches)
|
|
265
|
+
batch_size = 50
|
|
266
|
+
for batch_start in range(0, len(new_ids), batch_size):
|
|
267
|
+
batch_ids = new_ids[batch_start : batch_start + batch_size]
|
|
268
|
+
emails = fetch_emails_by_ids_jxa(account, mailbox, batch_ids)
|
|
269
|
+
|
|
270
|
+
if not emails:
|
|
271
|
+
continue
|
|
272
|
+
|
|
273
|
+
# Insert into database using centralized SQL and tuple converter
|
|
274
|
+
for email in emails:
|
|
275
|
+
try:
|
|
276
|
+
row = email_to_row(email, account, mailbox)
|
|
277
|
+
conn.execute(INSERT_EMAIL_SQL, row)
|
|
278
|
+
except sqlite3.IntegrityError:
|
|
279
|
+
logger.debug(
|
|
280
|
+
"Duplicate email ID %s in %s/%s",
|
|
281
|
+
email["id"],
|
|
282
|
+
account,
|
|
283
|
+
mailbox,
|
|
284
|
+
)
|
|
285
|
+
except sqlite3.Error as e:
|
|
286
|
+
logger.error("Database error: %s", e)
|
|
287
|
+
|
|
288
|
+
total_synced += len(emails)
|
|
289
|
+
|
|
290
|
+
# Update sync state
|
|
291
|
+
now = datetime.now().isoformat()
|
|
292
|
+
conn.execute(
|
|
293
|
+
"""INSERT OR REPLACE INTO sync_state
|
|
294
|
+
(account, mailbox, last_sync, message_count)
|
|
295
|
+
VALUES (?, ?, ?, ?)""",
|
|
296
|
+
(account, mailbox, now, len(indexed_ids) + len(new_ids)),
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
conn.commit()
|
|
300
|
+
|
|
301
|
+
if progress_callback:
|
|
302
|
+
progress_callback(len(mailboxes), len(mailboxes), "Sync complete")
|
|
303
|
+
|
|
304
|
+
logger.info("Synced %d new emails", total_synced)
|
|
305
|
+
return total_synced
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
"""File watcher for real-time index updates.
|
|
2
|
+
|
|
3
|
+
Watches ~/Library/Mail/V10/ for .emlx file changes and updates the index.
|
|
4
|
+
|
|
5
|
+
Uses watchfiles (Rust-based, efficient) to monitor:
|
|
6
|
+
- New emails → parse and add to index
|
|
7
|
+
- Deleted emails → remove from index
|
|
8
|
+
|
|
9
|
+
The watcher runs in a background thread and batches updates to avoid
|
|
10
|
+
overwhelming the database with rapid changes.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
import re
|
|
17
|
+
import sqlite3
|
|
18
|
+
import threading
|
|
19
|
+
import time
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import TYPE_CHECKING
|
|
22
|
+
|
|
23
|
+
from .disk import find_mail_directory, parse_emlx
|
|
24
|
+
from .schema import INSERT_EMAIL_SQL, create_connection
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from collections.abc import Callable
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
# Regex to extract account/mailbox from path
|
|
32
|
+
# ~/Library/Mail/V10/[AccountUUID]/[Mailbox].mbox/.../*.emlx
|
|
33
|
+
PATH_PATTERN = re.compile(r"/V\d+/([^/]+)/([^/]+)\.mbox/.*?/(\d+)\.emlx$")
|
|
34
|
+
|
|
35
|
+
# Constants for safety limits
|
|
36
|
+
MAX_PENDING_CHANGES = 10000 # Prevent unbounded memory growth
|
|
37
|
+
DELETE_BATCH_SIZE = 500 # SQLite variable limit safety
|
|
38
|
+
FILE_RETRY_DELAY_MS = 200 # Wait for Mail.app to finish writing
|
|
39
|
+
MAX_FILE_RETRIES = 3
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class IndexWatcher:
|
|
43
|
+
"""
|
|
44
|
+
Watches Mail directory for changes and updates the index.
|
|
45
|
+
|
|
46
|
+
Usage:
|
|
47
|
+
watcher = IndexWatcher(db_path, on_update=callback)
|
|
48
|
+
watcher.start()
|
|
49
|
+
# ... later ...
|
|
50
|
+
watcher.stop()
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
db_path: Path,
|
|
56
|
+
on_update: Callable[[int, int], None] | None = None,
|
|
57
|
+
debounce_ms: int = 500,
|
|
58
|
+
):
|
|
59
|
+
"""
|
|
60
|
+
Initialize the watcher.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
db_path: Path to the index database
|
|
64
|
+
on_update: Optional callback(added, removed) after processing
|
|
65
|
+
debounce_ms: Milliseconds to wait before processing changes
|
|
66
|
+
"""
|
|
67
|
+
self.db_path = db_path
|
|
68
|
+
self.on_update = on_update
|
|
69
|
+
self.debounce_ms = debounce_ms
|
|
70
|
+
|
|
71
|
+
self._mail_dir: Path | None = None
|
|
72
|
+
self._stop_event = threading.Event()
|
|
73
|
+
self._thread: threading.Thread | None = None
|
|
74
|
+
|
|
75
|
+
# Pending changes (debounced)
|
|
76
|
+
self._pending_adds: dict[
|
|
77
|
+
tuple[str, str, int], Path
|
|
78
|
+
] = {} # (acc, mb, id) -> path
|
|
79
|
+
self._pending_deletes: set[tuple[str, str, int]] = (
|
|
80
|
+
set()
|
|
81
|
+
) # (acc, mb, id)
|
|
82
|
+
self._pending_lock = threading.Lock()
|
|
83
|
+
|
|
84
|
+
# Persistent connection for the watcher thread
|
|
85
|
+
self._conn: sqlite3.Connection | None = None
|
|
86
|
+
|
|
87
|
+
def start(self) -> bool:
|
|
88
|
+
"""
|
|
89
|
+
Start watching for changes.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
True if started successfully, False if mail dir not found
|
|
93
|
+
"""
|
|
94
|
+
try:
|
|
95
|
+
self._mail_dir = find_mail_directory()
|
|
96
|
+
except FileNotFoundError:
|
|
97
|
+
logger.warning("Mail directory not found, watcher not started")
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
self._stop_event.clear()
|
|
101
|
+
self._thread = threading.Thread(
|
|
102
|
+
target=self._watch_loop,
|
|
103
|
+
name="IndexWatcher",
|
|
104
|
+
daemon=True,
|
|
105
|
+
)
|
|
106
|
+
self._thread.start()
|
|
107
|
+
logger.info("File watcher started for %s", self._mail_dir)
|
|
108
|
+
return True
|
|
109
|
+
|
|
110
|
+
def stop(self, timeout: float = 5.0) -> None:
|
|
111
|
+
"""Stop watching and wait for thread to finish."""
|
|
112
|
+
self._stop_event.set()
|
|
113
|
+
if self._thread and self._thread.is_alive():
|
|
114
|
+
self._thread.join(timeout=timeout)
|
|
115
|
+
self._thread = None
|
|
116
|
+
|
|
117
|
+
# Close persistent connection
|
|
118
|
+
if self._conn:
|
|
119
|
+
try:
|
|
120
|
+
self._conn.close()
|
|
121
|
+
except Exception:
|
|
122
|
+
pass
|
|
123
|
+
self._conn = None
|
|
124
|
+
|
|
125
|
+
logger.info("File watcher stopped")
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
def is_running(self) -> bool:
|
|
129
|
+
"""Check if watcher is running."""
|
|
130
|
+
return self._thread is not None and self._thread.is_alive()
|
|
131
|
+
|
|
132
|
+
def _get_conn(self) -> sqlite3.Connection:
|
|
133
|
+
"""Get or create a persistent connection for this thread."""
|
|
134
|
+
if self._conn is None:
|
|
135
|
+
self._conn = create_connection(self.db_path)
|
|
136
|
+
return self._conn
|
|
137
|
+
|
|
138
|
+
def _watch_loop(self) -> None:
|
|
139
|
+
"""Main watch loop (runs in background thread)."""
|
|
140
|
+
try:
|
|
141
|
+
from watchfiles import Change, watch
|
|
142
|
+
except ImportError:
|
|
143
|
+
logger.error("watchfiles not installed, file watcher unavailable")
|
|
144
|
+
return
|
|
145
|
+
|
|
146
|
+
if not self._mail_dir:
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
logger.debug("Starting watch loop on %s", self._mail_dir)
|
|
150
|
+
|
|
151
|
+
for changes in watch(
|
|
152
|
+
self._mail_dir,
|
|
153
|
+
stop_event=self._stop_event,
|
|
154
|
+
debounce=self.debounce_ms,
|
|
155
|
+
recursive=True,
|
|
156
|
+
):
|
|
157
|
+
if self._stop_event.is_set():
|
|
158
|
+
break
|
|
159
|
+
|
|
160
|
+
# Collect changes
|
|
161
|
+
for change_type, path_str in changes:
|
|
162
|
+
if not path_str.endswith(".emlx"):
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
# Security: Validate path is within mail directory
|
|
166
|
+
try:
|
|
167
|
+
path = Path(path_str).resolve()
|
|
168
|
+
if not str(path).startswith(str(self._mail_dir.resolve())):
|
|
169
|
+
logger.warning(
|
|
170
|
+
"Ignoring path outside mail dir: %s", path
|
|
171
|
+
)
|
|
172
|
+
continue
|
|
173
|
+
except (OSError, ValueError) as e:
|
|
174
|
+
logger.warning("Invalid path %s: %s", path_str, e)
|
|
175
|
+
continue
|
|
176
|
+
|
|
177
|
+
parsed = self._parse_path(path)
|
|
178
|
+
if not parsed:
|
|
179
|
+
continue
|
|
180
|
+
|
|
181
|
+
account, mailbox, message_id = parsed
|
|
182
|
+
key = (account, mailbox, message_id)
|
|
183
|
+
|
|
184
|
+
with self._pending_lock:
|
|
185
|
+
# Prevent unbounded memory growth
|
|
186
|
+
total_pending = len(self._pending_adds) + len(
|
|
187
|
+
self._pending_deletes
|
|
188
|
+
)
|
|
189
|
+
if total_pending >= MAX_PENDING_CHANGES:
|
|
190
|
+
logger.warning(
|
|
191
|
+
"Pending limit (%d) reached, clearing",
|
|
192
|
+
MAX_PENDING_CHANGES,
|
|
193
|
+
)
|
|
194
|
+
# Clear half to make room
|
|
195
|
+
self._pending_adds.clear()
|
|
196
|
+
|
|
197
|
+
if change_type == Change.added:
|
|
198
|
+
self._pending_adds[key] = path
|
|
199
|
+
self._pending_deletes.discard(key)
|
|
200
|
+
elif change_type == Change.deleted:
|
|
201
|
+
self._pending_deletes.add(key)
|
|
202
|
+
self._pending_adds.pop(key, None)
|
|
203
|
+
elif change_type == Change.modified:
|
|
204
|
+
# Treat as add (re-index)
|
|
205
|
+
self._pending_adds[key] = path
|
|
206
|
+
|
|
207
|
+
# Process after debounce period
|
|
208
|
+
self._process_pending()
|
|
209
|
+
|
|
210
|
+
def _parse_path(self, path: Path) -> tuple[str, str, int] | None:
|
|
211
|
+
"""
|
|
212
|
+
Extract account, mailbox, and message ID from path.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
(account_name, mailbox_name, message_id) or None if invalid
|
|
216
|
+
"""
|
|
217
|
+
match = PATH_PATTERN.search(str(path))
|
|
218
|
+
if not match:
|
|
219
|
+
return None
|
|
220
|
+
|
|
221
|
+
account_uuid, mailbox_dir, message_id_str = match.groups()
|
|
222
|
+
|
|
223
|
+
try:
|
|
224
|
+
message_id = int(message_id_str)
|
|
225
|
+
except ValueError:
|
|
226
|
+
return None
|
|
227
|
+
|
|
228
|
+
# Use UUID as account name (more reliable than trying to map)
|
|
229
|
+
account_name = account_uuid
|
|
230
|
+
mailbox_name = mailbox_dir
|
|
231
|
+
|
|
232
|
+
return account_name, mailbox_name, message_id
|
|
233
|
+
|
|
234
|
+
def _process_pending(self) -> None:
|
|
235
|
+
"""Process pending adds and deletes."""
|
|
236
|
+
with self._pending_lock:
|
|
237
|
+
adds = dict(self._pending_adds)
|
|
238
|
+
deletes = set(self._pending_deletes)
|
|
239
|
+
self._pending_adds.clear()
|
|
240
|
+
self._pending_deletes.clear()
|
|
241
|
+
|
|
242
|
+
if not adds and not deletes:
|
|
243
|
+
return
|
|
244
|
+
|
|
245
|
+
added_count = 0
|
|
246
|
+
deleted_count = 0
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
conn = self._get_conn()
|
|
250
|
+
|
|
251
|
+
# Process deletes in batches to avoid SQLite variable limit
|
|
252
|
+
if deletes:
|
|
253
|
+
delete_list = list(deletes)
|
|
254
|
+
for i in range(0, len(delete_list), DELETE_BATCH_SIZE):
|
|
255
|
+
batch = delete_list[i : i + DELETE_BATCH_SIZE]
|
|
256
|
+
# Use composite key for deletion
|
|
257
|
+
for account, mailbox, msg_id in batch:
|
|
258
|
+
sql = """DELETE FROM emails WHERE account = ?
|
|
259
|
+
AND mailbox = ? AND message_id = ?"""
|
|
260
|
+
conn.execute(sql, (account, mailbox, msg_id))
|
|
261
|
+
deleted_count += len(batch)
|
|
262
|
+
|
|
263
|
+
# Process adds with retry for files still being written
|
|
264
|
+
for key, path in adds.items():
|
|
265
|
+
account, mailbox, message_id = key
|
|
266
|
+
email = None
|
|
267
|
+
|
|
268
|
+
# Retry logic for race condition with Mail.app writing
|
|
269
|
+
for attempt in range(MAX_FILE_RETRIES):
|
|
270
|
+
try:
|
|
271
|
+
email = parse_emlx(path)
|
|
272
|
+
if email:
|
|
273
|
+
break
|
|
274
|
+
except OSError as e:
|
|
275
|
+
if attempt < MAX_FILE_RETRIES - 1:
|
|
276
|
+
logger.debug(
|
|
277
|
+
"Retry %d for %s: %s", attempt + 1, path, e
|
|
278
|
+
)
|
|
279
|
+
time.sleep(FILE_RETRY_DELAY_MS / 1000)
|
|
280
|
+
else:
|
|
281
|
+
logger.warning(
|
|
282
|
+
"Failed to parse %s after retries: %s", path, e
|
|
283
|
+
)
|
|
284
|
+
except Exception as e:
|
|
285
|
+
logger.warning("Error parsing %s: %s", path, e)
|
|
286
|
+
break
|
|
287
|
+
|
|
288
|
+
if email:
|
|
289
|
+
try:
|
|
290
|
+
# Use centralized SQL; indexed_at uses DEFAULT
|
|
291
|
+
conn.execute(
|
|
292
|
+
INSERT_EMAIL_SQL,
|
|
293
|
+
(
|
|
294
|
+
message_id,
|
|
295
|
+
account,
|
|
296
|
+
mailbox,
|
|
297
|
+
email.get("subject", ""),
|
|
298
|
+
email.get("sender", ""),
|
|
299
|
+
email.get("content", ""),
|
|
300
|
+
email.get("date_received", ""),
|
|
301
|
+
),
|
|
302
|
+
)
|
|
303
|
+
added_count += 1
|
|
304
|
+
except sqlite3.IntegrityError as e:
|
|
305
|
+
logger.debug("Duplicate email %s: %s", key, e)
|
|
306
|
+
except sqlite3.Error as e:
|
|
307
|
+
logger.error("Database error for %s: %s", key, e)
|
|
308
|
+
|
|
309
|
+
conn.commit()
|
|
310
|
+
|
|
311
|
+
except sqlite3.Error as e:
|
|
312
|
+
logger.error("Database error in watcher: %s", e)
|
|
313
|
+
|
|
314
|
+
# Notify callback
|
|
315
|
+
if self.on_update and (added_count or deleted_count):
|
|
316
|
+
try:
|
|
317
|
+
self.on_update(added_count, deleted_count)
|
|
318
|
+
except Exception as e:
|
|
319
|
+
logger.warning("Error in on_update callback: %s", e)
|
|
320
|
+
|
|
321
|
+
if added_count or deleted_count:
|
|
322
|
+
logger.debug(
|
|
323
|
+
"Processed: +%d -%d emails", added_count, deleted_count
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def create_watcher(
|
|
328
|
+
db_path: Path,
|
|
329
|
+
on_update: Callable[[int, int], None] | None = None,
|
|
330
|
+
) -> IndexWatcher:
|
|
331
|
+
"""
|
|
332
|
+
Create and return a new IndexWatcher.
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
db_path: Path to the index database
|
|
336
|
+
on_update: Optional callback(added, removed) after changes
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
Configured IndexWatcher (call .start() to begin watching)
|
|
340
|
+
"""
|
|
341
|
+
return IndexWatcher(db_path, on_update=on_update)
|