jxa-mail-mcp 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jxa-mail-mcp might be problematic. Click here for more details.

@@ -0,0 +1,277 @@
1
+ """SQLite schema for FTS5 email search index.
2
+
3
+ The schema uses:
4
+ - emails: Base table storing email content and metadata
5
+ - emails_fts: FTS5 virtual table for full-text search with external content
6
+ - sync_state: Tracks sync progress per mailbox
7
+
8
+ IMPORTANT: Message IDs from .emlx filenames are only unique within a mailbox,
9
+ NOT globally. We use (account, mailbox, message_id) as the unique constraint.
10
+ """
11
+
12
+ import logging
13
+ import os
14
+ import sqlite3
15
+ from pathlib import Path
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Current schema version for migrations
20
+ SCHEMA_VERSION = 2 # Bumped for composite key fix
21
+
22
+ # Default PRAGMAs for all connections (centralized to avoid drift)
23
+ DEFAULT_PRAGMAS = {
24
+ "journal_mode": "WAL", # Better concurrent read performance
25
+ "synchronous": "NORMAL", # Good balance of safety and speed
26
+ "busy_timeout": 5000, # Wait up to 5s for locks
27
+ }
28
+
29
+ # Centralized SQL for email insertion (used by manager, sync, watcher)
30
+ # Uses INSERT OR REPLACE for idempotent upserts on composite key
31
+ INSERT_EMAIL_SQL = """INSERT OR REPLACE INTO emails
32
+ (message_id, account, mailbox, subject, sender, content, date_received)
33
+ VALUES (?, ?, ?, ?, ?, ?, ?)"""
34
+
35
+
36
+ def email_to_row(
37
+ email: dict, account: str, mailbox: str
38
+ ) -> tuple[int, str, str, str, str, str, str]:
39
+ """
40
+ Convert an email dict to a database row tuple.
41
+
42
+ Centralizes field extraction to ensure consistency across:
43
+ - manager.py (disk indexing)
44
+ - sync.py (JXA incremental sync)
45
+ - watcher.py (real-time file watching)
46
+
47
+ Args:
48
+ email: Email dict with id, subject, sender, content, date_received
49
+ account: Account name/identifier
50
+ mailbox: Mailbox name
51
+
52
+ Returns:
53
+ Tuple matching INSERT_EMAIL_SQL parameter order
54
+ """
55
+ return (
56
+ email["id"],
57
+ account,
58
+ mailbox,
59
+ email.get("subject", ""),
60
+ email.get("sender", ""),
61
+ email.get("content", ""),
62
+ email.get("date_received", ""),
63
+ )
64
+
65
+
66
+ def create_connection(db_path: Path) -> sqlite3.Connection:
67
+ """
68
+ Create a database connection with standard configuration.
69
+
70
+ This factory ensures consistent PRAGMA settings across all connection
71
+ points (IndexManager, file watcher, etc.) to prevent configuration drift.
72
+
73
+ Args:
74
+ db_path: Path to the SQLite database file
75
+
76
+ Returns:
77
+ Configured connection with WAL mode, busy timeout, and Row factory
78
+ """
79
+ conn = sqlite3.connect(db_path, check_same_thread=False)
80
+ conn.row_factory = sqlite3.Row
81
+
82
+ # Apply standard PRAGMAs
83
+ for pragma, value in DEFAULT_PRAGMAS.items():
84
+ conn.execute(f"PRAGMA {pragma}={value}")
85
+
86
+ return conn
87
+
88
+
89
+ def get_schema_sql() -> str:
90
+ """Return the complete schema creation SQL."""
91
+ return """
92
+ -- Schema version tracking
93
+ CREATE TABLE IF NOT EXISTS schema_version (
94
+ version INTEGER PRIMARY KEY
95
+ );
96
+
97
+ -- Email content cache
98
+ -- Note: rowid is auto-generated for FTS5 content_rowid compatibility
99
+ -- message_id is the Mail.app ID (from .emlx filename), unique per mailbox only
100
+ CREATE TABLE IF NOT EXISTS emails (
101
+ rowid INTEGER PRIMARY KEY AUTOINCREMENT,
102
+ message_id INTEGER NOT NULL, -- Mail.app ID (per-mailbox only)
103
+ account TEXT NOT NULL,
104
+ mailbox TEXT NOT NULL,
105
+ subject TEXT,
106
+ sender TEXT,
107
+ content TEXT, -- Body text
108
+ date_received TEXT,
109
+ indexed_at TEXT DEFAULT (datetime('now')),
110
+ UNIQUE(account, mailbox, message_id) -- Composite uniqueness
111
+ );
112
+
113
+ -- Indexes for efficient queries
114
+ CREATE INDEX IF NOT EXISTS idx_emails_account_mailbox
115
+ ON emails(account, mailbox);
116
+ CREATE INDEX IF NOT EXISTS idx_emails_date
117
+ ON emails(date_received DESC);
118
+ CREATE INDEX IF NOT EXISTS idx_emails_message_id
119
+ ON emails(message_id);
120
+
121
+ -- FTS5 index (external content - shares storage with emails table)
122
+ -- Uses porter stemmer for English + unicode61 for international text
123
+ CREATE VIRTUAL TABLE IF NOT EXISTS emails_fts USING fts5(
124
+ subject,
125
+ sender,
126
+ content,
127
+ content='emails',
128
+ content_rowid='rowid',
129
+ tokenize='porter unicode61'
130
+ );
131
+
132
+ -- Triggers to keep FTS index in sync with emails table
133
+ CREATE TRIGGER IF NOT EXISTS emails_ai AFTER INSERT ON emails BEGIN
134
+ INSERT INTO emails_fts(rowid, subject, sender, content)
135
+ VALUES (new.rowid, new.subject, new.sender, new.content);
136
+ END;
137
+
138
+ CREATE TRIGGER IF NOT EXISTS emails_ad AFTER DELETE ON emails BEGIN
139
+ INSERT INTO emails_fts(emails_fts, rowid, subject, sender, content)
140
+ VALUES('delete', old.rowid, old.subject, old.sender, old.content);
141
+ END;
142
+
143
+ CREATE TRIGGER IF NOT EXISTS emails_au AFTER UPDATE ON emails BEGIN
144
+ INSERT INTO emails_fts(emails_fts, rowid, subject, sender, content)
145
+ VALUES('delete', old.rowid, old.subject, old.sender, old.content);
146
+ INSERT INTO emails_fts(rowid, subject, sender, content)
147
+ VALUES (new.rowid, new.subject, new.sender, new.content);
148
+ END;
149
+
150
+ -- Sync state tracking per mailbox
151
+ CREATE TABLE IF NOT EXISTS sync_state (
152
+ account TEXT NOT NULL,
153
+ mailbox TEXT NOT NULL,
154
+ last_sync TEXT,
155
+ message_count INTEGER DEFAULT 0,
156
+ PRIMARY KEY(account, mailbox)
157
+ );
158
+ """
159
+
160
+
161
+ def init_database(db_path: Path) -> sqlite3.Connection:
162
+ """
163
+ Initialize the database with schema, creating parent directories if needed.
164
+
165
+ Args:
166
+ db_path: Path to the SQLite database file
167
+
168
+ Returns:
169
+ Open database connection with check_same_thread=False for thread safety
170
+
171
+ Security:
172
+ Sets file permissions to 0600 (owner read/write only) on new databases
173
+ to protect sensitive email content from other users on shared systems.
174
+ """
175
+ # Ensure parent directory exists with secure permissions
176
+ db_path.parent.mkdir(parents=True, exist_ok=True)
177
+
178
+ # Track if this is a new database for permission setting
179
+ is_new_db = not db_path.exists()
180
+
181
+ # Create connection with standard configuration
182
+ conn = create_connection(db_path)
183
+
184
+ # Set secure file permissions on new databases (owner read/write only)
185
+ # Must be done after sqlite3.connect() creates the file
186
+ if is_new_db:
187
+ try:
188
+ os.chmod(db_path, 0o600)
189
+ logger.debug("Set secure permissions (0600) on %s", db_path)
190
+ except OSError as e:
191
+ logger.warning(
192
+ "Could not set secure permissions on %s: %s", db_path, e
193
+ )
194
+
195
+ # Check current schema version
196
+ sql = "SELECT name FROM sqlite_master "
197
+ sql += "WHERE type='table' AND name='schema_version'"
198
+ cursor = conn.execute(sql)
199
+ if cursor.fetchone() is None:
200
+ # Fresh database - create schema
201
+ logger.info(
202
+ "Creating fresh database schema (version %d)", SCHEMA_VERSION
203
+ )
204
+ conn.executescript(get_schema_sql())
205
+ conn.execute(
206
+ "INSERT INTO schema_version (version) VALUES (?)", (SCHEMA_VERSION,)
207
+ )
208
+ conn.commit()
209
+ else:
210
+ # Check for migrations
211
+ cursor = conn.execute("SELECT version FROM schema_version LIMIT 1")
212
+ row = cursor.fetchone()
213
+ current_version = row[0] if row else 0
214
+
215
+ if current_version < SCHEMA_VERSION:
216
+ logger.info(
217
+ "Migrating database from version %d to %d",
218
+ current_version,
219
+ SCHEMA_VERSION,
220
+ )
221
+ _run_migrations(conn, current_version, SCHEMA_VERSION)
222
+
223
+ return conn
224
+
225
+
226
+ def _run_migrations(
227
+ conn: sqlite3.Connection, from_version: int, to_version: int
228
+ ) -> None:
229
+ """
230
+ Run schema migrations.
231
+
232
+ Args:
233
+ conn: Database connection
234
+ from_version: Current schema version
235
+ to_version: Target schema version
236
+ """
237
+ if from_version < 2:
238
+ # Migration from v1 to v2: Change from id-as-primary-key to composite
239
+ # This requires rebuilding the table since SQLite doesn't support
240
+ # changing primary keys
241
+ logger.warning(
242
+ "Schema migration v1→v2 requires rebuild. "
243
+ "Run 'jxa-mail-mcp rebuild' to re-index."
244
+ )
245
+
246
+ # Drop old tables and recreate
247
+ conn.executescript("""
248
+ DROP TABLE IF EXISTS emails_fts;
249
+ DROP TABLE IF EXISTS emails;
250
+ DROP TABLE IF EXISTS sync_state;
251
+ """)
252
+
253
+ # Recreate with new schema
254
+ conn.executescript(get_schema_sql())
255
+
256
+ conn.execute("UPDATE schema_version SET version = ?", (to_version,))
257
+ conn.commit()
258
+
259
+
260
+ def rebuild_fts_index(conn: sqlite3.Connection) -> None:
261
+ """
262
+ Rebuild the FTS index from the emails table.
263
+
264
+ Use this after bulk inserts without triggers or to fix corruption.
265
+ """
266
+ conn.execute("INSERT INTO emails_fts(emails_fts) VALUES('rebuild')")
267
+ conn.commit()
268
+
269
+
270
+ def optimize_fts_index(conn: sqlite3.Connection) -> None:
271
+ """
272
+ Optimize the FTS index for better query performance.
273
+
274
+ Call periodically after many insertions.
275
+ """
276
+ conn.execute("INSERT INTO emails_fts(emails_fts) VALUES('optimize')")
277
+ conn.commit()
@@ -0,0 +1,331 @@
1
+ """FTS5 full-text search for indexed emails.
2
+
3
+ Provides:
4
+ - search_fts(): Search indexed emails with BM25 ranking
5
+ - sanitize_fts_query(): Escape special FTS5 syntax characters
6
+
7
+ FTS5 query syntax supported:
8
+ - Simple terms: "meeting notes"
9
+ - Phrases: '"exact phrase"'
10
+ - Boolean: "meeting OR notes"
11
+ - Prefix: "meet*"
12
+ - Column filter: "subject:urgent"
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import re
18
+ import sqlite3
19
+ from dataclasses import dataclass
20
+
21
+ # Characters that have special meaning in FTS5 and need escaping
22
+ FTS5_SPECIAL_CHARS = re.compile(r'(["\'\-\*\(\)\:\^])')
23
+
24
+
25
+ def add_account_mailbox_filter(
26
+ sql: str,
27
+ params: list,
28
+ account: str | None,
29
+ mailbox: str | None,
30
+ table_alias: str = "e",
31
+ ) -> str:
32
+ """
33
+ Add account/mailbox WHERE clauses to a SQL query.
34
+
35
+ This helper reduces repetition when building filtered queries.
36
+ Modifies params in-place and returns the updated SQL string.
37
+
38
+ Args:
39
+ sql: Base SQL query string
40
+ params: List of query parameters (modified in-place)
41
+ account: Optional account filter
42
+ mailbox: Optional mailbox filter
43
+ table_alias: Table alias prefix (default: "e")
44
+
45
+ Returns:
46
+ Updated SQL string with added WHERE clauses
47
+
48
+ Example:
49
+ >>> sql = "SELECT * FROM emails e WHERE 1=1"
50
+ >>> params = []
51
+ >>> sql = add_account_mailbox_filter(sql, params, "Work", "INBOX")
52
+ >>> sql
53
+ "SELECT * FROM emails e WHERE 1=1 AND e.account = ? AND e.mailbox = ?"
54
+ >>> params
55
+ ["Work", "INBOX"]
56
+ """
57
+ if account:
58
+ sql += f" AND {table_alias}.account = ?"
59
+ params.append(account)
60
+ if mailbox:
61
+ sql += f" AND {table_alias}.mailbox = ?"
62
+ params.append(mailbox)
63
+ return sql
64
+
65
+
66
+ @dataclass
67
+ class SearchResult:
68
+ """A single search result with ranking info."""
69
+
70
+ id: int
71
+ account: str
72
+ mailbox: str
73
+ subject: str
74
+ sender: str
75
+ content_snippet: str
76
+ date_received: str
77
+ score: float
78
+
79
+
80
+ def sanitize_fts_query(query: str) -> str:
81
+ """
82
+ Sanitize a query string for safe FTS5 use.
83
+
84
+ Escapes special characters to prevent syntax errors.
85
+ Boolean operators (OR, AND, NOT) are preserved since they
86
+ don't contain special characters.
87
+
88
+ Args:
89
+ query: Raw user query
90
+
91
+ Returns:
92
+ Sanitized query safe for FTS5
93
+ """
94
+ if not query:
95
+ return ""
96
+
97
+ # Remove leading/trailing whitespace
98
+ query = query.strip()
99
+
100
+ # Escape all FTS5 special characters
101
+ # Boolean operators (OR, AND, NOT) are unaffected since they
102
+ # don't contain any of these chars: " ' - * ( ) : ^
103
+ sanitized = FTS5_SPECIAL_CHARS.sub(r"\\\1", query)
104
+
105
+ return sanitized
106
+
107
+
108
+ def _extract_snippet(content: str, max_length: int = 150) -> str:
109
+ """Extract a snippet from content for display."""
110
+ if not content:
111
+ return ""
112
+
113
+ # Remove excessive whitespace
114
+ text = " ".join(content.split())
115
+
116
+ if len(text) <= max_length:
117
+ return text
118
+
119
+ # Truncate and add ellipsis
120
+ return text[:max_length].rsplit(" ", 1)[0] + "..."
121
+
122
+
123
+ def search_fts(
124
+ conn: sqlite3.Connection,
125
+ query: str,
126
+ account: str | None = None,
127
+ mailbox: str | None = None,
128
+ limit: int = 20,
129
+ *,
130
+ _is_retry: bool = False,
131
+ ) -> list[SearchResult]:
132
+ """
133
+ Search indexed emails using FTS5 with BM25 ranking.
134
+
135
+ Args:
136
+ conn: Database connection
137
+ query: Search query (supports FTS5 syntax)
138
+ account: Optional account filter
139
+ mailbox: Optional mailbox filter
140
+ limit: Maximum results (default: 20)
141
+
142
+ Returns:
143
+ List of SearchResult ordered by relevance (BM25 score)
144
+ """
145
+ if not query or not query.strip():
146
+ return []
147
+
148
+ # Sanitize query for FTS5 (skip on retry to avoid double-escaping)
149
+ safe_query = query if _is_retry else sanitize_fts_query(query)
150
+
151
+ if not safe_query:
152
+ return []
153
+
154
+ # Build the SQL query with optional filters
155
+ # BM25 returns negative scores (more negative = better match)
156
+ # We negate it for intuitive positive scores
157
+ # Note: FTS5 content_rowid='rowid' links to emails.rowid
158
+ sql = """
159
+ SELECT
160
+ e.message_id,
161
+ e.account,
162
+ e.mailbox,
163
+ e.subject,
164
+ e.sender,
165
+ e.content,
166
+ e.date_received,
167
+ -bm25(emails_fts, 1.0, 0.5, 2.0) as score
168
+ FROM emails_fts
169
+ JOIN emails e ON emails_fts.rowid = e.rowid
170
+ WHERE emails_fts MATCH ?
171
+ """
172
+
173
+ params: list = [safe_query]
174
+ sql = add_account_mailbox_filter(sql, params, account, mailbox)
175
+ sql += " ORDER BY score DESC LIMIT ?"
176
+ params.append(limit)
177
+
178
+ try:
179
+ cursor = conn.execute(sql, params)
180
+ results = []
181
+
182
+ for row in cursor:
183
+ results.append(
184
+ SearchResult(
185
+ id=row[0],
186
+ account=row[1],
187
+ mailbox=row[2],
188
+ subject=row[3] or "",
189
+ sender=row[4] or "",
190
+ content_snippet=_extract_snippet(row[5]),
191
+ date_received=row[6] or "",
192
+ score=round(row[7], 3),
193
+ )
194
+ )
195
+
196
+ return results
197
+
198
+ except sqlite3.OperationalError as e:
199
+ # FTS5 syntax error - try with phrase search as fallback
200
+ if "fts5: syntax error" in str(e).lower() and not _is_retry:
201
+ # Wrap entire query in quotes as a phrase search
202
+ escaped_query = '"' + query.replace('"', '""') + '"'
203
+ return search_fts(
204
+ conn,
205
+ escaped_query,
206
+ account=account,
207
+ mailbox=mailbox,
208
+ limit=limit,
209
+ _is_retry=True,
210
+ )
211
+ raise
212
+
213
+
214
+ def search_fts_highlight(
215
+ conn: sqlite3.Connection,
216
+ query: str,
217
+ account: str | None = None,
218
+ mailbox: str | None = None,
219
+ limit: int = 20,
220
+ ) -> list[SearchResult]:
221
+ """
222
+ Search with highlighted snippets showing match context.
223
+
224
+ Similar to search_fts but uses FTS5 highlight() function
225
+ to mark matched terms in the content.
226
+
227
+ Args:
228
+ conn: Database connection
229
+ query: Search query
230
+ account: Optional account filter
231
+ mailbox: Optional mailbox filter
232
+ limit: Maximum results
233
+
234
+ Returns:
235
+ List of SearchResult with highlighted content_snippet
236
+ """
237
+ if not query or not query.strip():
238
+ return []
239
+
240
+ safe_query = sanitize_fts_query(query)
241
+ if not safe_query:
242
+ return []
243
+
244
+ # Use highlight() to mark matches with ** markers
245
+ sql = """
246
+ SELECT
247
+ e.message_id,
248
+ e.account,
249
+ e.mailbox,
250
+ highlight(emails_fts, 0, '**', '**') as subject_hl,
251
+ e.sender,
252
+ snippet(emails_fts, 2, '**', '**', '...', 32) as content_snippet,
253
+ e.date_received,
254
+ -bm25(emails_fts, 1.0, 0.5, 2.0) as score
255
+ FROM emails_fts
256
+ JOIN emails e ON emails_fts.rowid = e.rowid
257
+ WHERE emails_fts MATCH ?
258
+ """
259
+
260
+ params: list = [safe_query]
261
+ sql = add_account_mailbox_filter(sql, params, account, mailbox)
262
+ sql += " ORDER BY score DESC LIMIT ?"
263
+ params.append(limit)
264
+
265
+ try:
266
+ cursor = conn.execute(sql, params)
267
+ results = []
268
+
269
+ for row in cursor:
270
+ results.append(
271
+ SearchResult(
272
+ id=row[0],
273
+ account=row[1],
274
+ mailbox=row[2],
275
+ subject=row[3] or "",
276
+ sender=row[4] or "",
277
+ content_snippet=row[5] or "",
278
+ date_received=row[6] or "",
279
+ score=round(row[7], 3),
280
+ )
281
+ )
282
+
283
+ return results
284
+
285
+ except sqlite3.OperationalError:
286
+ # Fall back to basic search
287
+ return search_fts(conn, query, account, mailbox, limit)
288
+
289
+
290
+ def count_matches(
291
+ conn: sqlite3.Connection,
292
+ query: str,
293
+ account: str | None = None,
294
+ mailbox: str | None = None,
295
+ ) -> int:
296
+ """
297
+ Count total matches for a query without returning results.
298
+
299
+ Useful for pagination or showing "X results found".
300
+
301
+ Args:
302
+ conn: Database connection
303
+ query: Search query
304
+ account: Optional account filter
305
+ mailbox: Optional mailbox filter
306
+
307
+ Returns:
308
+ Total number of matching emails
309
+ """
310
+ if not query or not query.strip():
311
+ return 0
312
+
313
+ safe_query = sanitize_fts_query(query)
314
+ if not safe_query:
315
+ return 0
316
+
317
+ sql = """
318
+ SELECT COUNT(*)
319
+ FROM emails_fts
320
+ JOIN emails e ON emails_fts.rowid = e.rowid
321
+ WHERE emails_fts MATCH ?
322
+ """
323
+
324
+ params: list = [safe_query]
325
+ sql = add_account_mailbox_filter(sql, params, account, mailbox)
326
+
327
+ try:
328
+ cursor = conn.execute(sql, params)
329
+ return cursor.fetchone()[0]
330
+ except sqlite3.OperationalError:
331
+ return 0