jxa-mail-mcp 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jxa-mail-mcp might be problematic. Click here for more details.
- jxa_mail_mcp/__init__.py +14 -6
- jxa_mail_mcp/cli.py +358 -0
- jxa_mail_mcp/config.py +52 -0
- jxa_mail_mcp/executor.py +178 -4
- jxa_mail_mcp/index/__init__.py +14 -0
- jxa_mail_mcp/index/disk.py +485 -0
- jxa_mail_mcp/index/manager.py +458 -0
- jxa_mail_mcp/index/schema.py +277 -0
- jxa_mail_mcp/index/search.py +331 -0
- jxa_mail_mcp/index/sync.py +305 -0
- jxa_mail_mcp/index/watcher.py +341 -0
- jxa_mail_mcp/server.py +450 -201
- jxa_mail_mcp-0.3.0.dist-info/METADATA +355 -0
- jxa_mail_mcp-0.3.0.dist-info/RECORD +20 -0
- jxa_mail_mcp-0.2.0.dist-info/METADATA +0 -264
- jxa_mail_mcp-0.2.0.dist-info/RECORD +0 -12
- {jxa_mail_mcp-0.2.0.dist-info → jxa_mail_mcp-0.3.0.dist-info}/WHEEL +0 -0
- {jxa_mail_mcp-0.2.0.dist-info → jxa_mail_mcp-0.3.0.dist-info}/entry_points.txt +0 -0
- {jxa_mail_mcp-0.2.0.dist-info → jxa_mail_mcp-0.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""SQLite schema for FTS5 email search index.
|
|
2
|
+
|
|
3
|
+
The schema uses:
|
|
4
|
+
- emails: Base table storing email content and metadata
|
|
5
|
+
- emails_fts: FTS5 virtual table for full-text search with external content
|
|
6
|
+
- sync_state: Tracks sync progress per mailbox
|
|
7
|
+
|
|
8
|
+
IMPORTANT: Message IDs from .emlx filenames are only unique within a mailbox,
|
|
9
|
+
NOT globally. We use (account, mailbox, message_id) as the unique constraint.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
import sqlite3
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
# Current schema version for migrations
|
|
20
|
+
SCHEMA_VERSION = 2 # Bumped for composite key fix
|
|
21
|
+
|
|
22
|
+
# Default PRAGMAs for all connections (centralized to avoid drift)
|
|
23
|
+
DEFAULT_PRAGMAS = {
|
|
24
|
+
"journal_mode": "WAL", # Better concurrent read performance
|
|
25
|
+
"synchronous": "NORMAL", # Good balance of safety and speed
|
|
26
|
+
"busy_timeout": 5000, # Wait up to 5s for locks
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
# Centralized SQL for email insertion (used by manager, sync, watcher)
|
|
30
|
+
# Uses INSERT OR REPLACE for idempotent upserts on composite key
|
|
31
|
+
INSERT_EMAIL_SQL = """INSERT OR REPLACE INTO emails
|
|
32
|
+
(message_id, account, mailbox, subject, sender, content, date_received)
|
|
33
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)"""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def email_to_row(
|
|
37
|
+
email: dict, account: str, mailbox: str
|
|
38
|
+
) -> tuple[int, str, str, str, str, str, str]:
|
|
39
|
+
"""
|
|
40
|
+
Convert an email dict to a database row tuple.
|
|
41
|
+
|
|
42
|
+
Centralizes field extraction to ensure consistency across:
|
|
43
|
+
- manager.py (disk indexing)
|
|
44
|
+
- sync.py (JXA incremental sync)
|
|
45
|
+
- watcher.py (real-time file watching)
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
email: Email dict with id, subject, sender, content, date_received
|
|
49
|
+
account: Account name/identifier
|
|
50
|
+
mailbox: Mailbox name
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Tuple matching INSERT_EMAIL_SQL parameter order
|
|
54
|
+
"""
|
|
55
|
+
return (
|
|
56
|
+
email["id"],
|
|
57
|
+
account,
|
|
58
|
+
mailbox,
|
|
59
|
+
email.get("subject", ""),
|
|
60
|
+
email.get("sender", ""),
|
|
61
|
+
email.get("content", ""),
|
|
62
|
+
email.get("date_received", ""),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def create_connection(db_path: Path) -> sqlite3.Connection:
|
|
67
|
+
"""
|
|
68
|
+
Create a database connection with standard configuration.
|
|
69
|
+
|
|
70
|
+
This factory ensures consistent PRAGMA settings across all connection
|
|
71
|
+
points (IndexManager, file watcher, etc.) to prevent configuration drift.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
db_path: Path to the SQLite database file
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Configured connection with WAL mode, busy timeout, and Row factory
|
|
78
|
+
"""
|
|
79
|
+
conn = sqlite3.connect(db_path, check_same_thread=False)
|
|
80
|
+
conn.row_factory = sqlite3.Row
|
|
81
|
+
|
|
82
|
+
# Apply standard PRAGMAs
|
|
83
|
+
for pragma, value in DEFAULT_PRAGMAS.items():
|
|
84
|
+
conn.execute(f"PRAGMA {pragma}={value}")
|
|
85
|
+
|
|
86
|
+
return conn
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def get_schema_sql() -> str:
|
|
90
|
+
"""Return the complete schema creation SQL."""
|
|
91
|
+
return """
|
|
92
|
+
-- Schema version tracking
|
|
93
|
+
CREATE TABLE IF NOT EXISTS schema_version (
|
|
94
|
+
version INTEGER PRIMARY KEY
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
-- Email content cache
|
|
98
|
+
-- Note: rowid is auto-generated for FTS5 content_rowid compatibility
|
|
99
|
+
-- message_id is the Mail.app ID (from .emlx filename), unique per mailbox only
|
|
100
|
+
CREATE TABLE IF NOT EXISTS emails (
|
|
101
|
+
rowid INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
102
|
+
message_id INTEGER NOT NULL, -- Mail.app ID (per-mailbox only)
|
|
103
|
+
account TEXT NOT NULL,
|
|
104
|
+
mailbox TEXT NOT NULL,
|
|
105
|
+
subject TEXT,
|
|
106
|
+
sender TEXT,
|
|
107
|
+
content TEXT, -- Body text
|
|
108
|
+
date_received TEXT,
|
|
109
|
+
indexed_at TEXT DEFAULT (datetime('now')),
|
|
110
|
+
UNIQUE(account, mailbox, message_id) -- Composite uniqueness
|
|
111
|
+
);
|
|
112
|
+
|
|
113
|
+
-- Indexes for efficient queries
|
|
114
|
+
CREATE INDEX IF NOT EXISTS idx_emails_account_mailbox
|
|
115
|
+
ON emails(account, mailbox);
|
|
116
|
+
CREATE INDEX IF NOT EXISTS idx_emails_date
|
|
117
|
+
ON emails(date_received DESC);
|
|
118
|
+
CREATE INDEX IF NOT EXISTS idx_emails_message_id
|
|
119
|
+
ON emails(message_id);
|
|
120
|
+
|
|
121
|
+
-- FTS5 index (external content - shares storage with emails table)
|
|
122
|
+
-- Uses porter stemmer for English + unicode61 for international text
|
|
123
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS emails_fts USING fts5(
|
|
124
|
+
subject,
|
|
125
|
+
sender,
|
|
126
|
+
content,
|
|
127
|
+
content='emails',
|
|
128
|
+
content_rowid='rowid',
|
|
129
|
+
tokenize='porter unicode61'
|
|
130
|
+
);
|
|
131
|
+
|
|
132
|
+
-- Triggers to keep FTS index in sync with emails table
|
|
133
|
+
CREATE TRIGGER IF NOT EXISTS emails_ai AFTER INSERT ON emails BEGIN
|
|
134
|
+
INSERT INTO emails_fts(rowid, subject, sender, content)
|
|
135
|
+
VALUES (new.rowid, new.subject, new.sender, new.content);
|
|
136
|
+
END;
|
|
137
|
+
|
|
138
|
+
CREATE TRIGGER IF NOT EXISTS emails_ad AFTER DELETE ON emails BEGIN
|
|
139
|
+
INSERT INTO emails_fts(emails_fts, rowid, subject, sender, content)
|
|
140
|
+
VALUES('delete', old.rowid, old.subject, old.sender, old.content);
|
|
141
|
+
END;
|
|
142
|
+
|
|
143
|
+
CREATE TRIGGER IF NOT EXISTS emails_au AFTER UPDATE ON emails BEGIN
|
|
144
|
+
INSERT INTO emails_fts(emails_fts, rowid, subject, sender, content)
|
|
145
|
+
VALUES('delete', old.rowid, old.subject, old.sender, old.content);
|
|
146
|
+
INSERT INTO emails_fts(rowid, subject, sender, content)
|
|
147
|
+
VALUES (new.rowid, new.subject, new.sender, new.content);
|
|
148
|
+
END;
|
|
149
|
+
|
|
150
|
+
-- Sync state tracking per mailbox
|
|
151
|
+
CREATE TABLE IF NOT EXISTS sync_state (
|
|
152
|
+
account TEXT NOT NULL,
|
|
153
|
+
mailbox TEXT NOT NULL,
|
|
154
|
+
last_sync TEXT,
|
|
155
|
+
message_count INTEGER DEFAULT 0,
|
|
156
|
+
PRIMARY KEY(account, mailbox)
|
|
157
|
+
);
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def init_database(db_path: Path) -> sqlite3.Connection:
|
|
162
|
+
"""
|
|
163
|
+
Initialize the database with schema, creating parent directories if needed.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
db_path: Path to the SQLite database file
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Open database connection with check_same_thread=False for thread safety
|
|
170
|
+
|
|
171
|
+
Security:
|
|
172
|
+
Sets file permissions to 0600 (owner read/write only) on new databases
|
|
173
|
+
to protect sensitive email content from other users on shared systems.
|
|
174
|
+
"""
|
|
175
|
+
# Ensure parent directory exists with secure permissions
|
|
176
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
177
|
+
|
|
178
|
+
# Track if this is a new database for permission setting
|
|
179
|
+
is_new_db = not db_path.exists()
|
|
180
|
+
|
|
181
|
+
# Create connection with standard configuration
|
|
182
|
+
conn = create_connection(db_path)
|
|
183
|
+
|
|
184
|
+
# Set secure file permissions on new databases (owner read/write only)
|
|
185
|
+
# Must be done after sqlite3.connect() creates the file
|
|
186
|
+
if is_new_db:
|
|
187
|
+
try:
|
|
188
|
+
os.chmod(db_path, 0o600)
|
|
189
|
+
logger.debug("Set secure permissions (0600) on %s", db_path)
|
|
190
|
+
except OSError as e:
|
|
191
|
+
logger.warning(
|
|
192
|
+
"Could not set secure permissions on %s: %s", db_path, e
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# Check current schema version
|
|
196
|
+
sql = "SELECT name FROM sqlite_master "
|
|
197
|
+
sql += "WHERE type='table' AND name='schema_version'"
|
|
198
|
+
cursor = conn.execute(sql)
|
|
199
|
+
if cursor.fetchone() is None:
|
|
200
|
+
# Fresh database - create schema
|
|
201
|
+
logger.info(
|
|
202
|
+
"Creating fresh database schema (version %d)", SCHEMA_VERSION
|
|
203
|
+
)
|
|
204
|
+
conn.executescript(get_schema_sql())
|
|
205
|
+
conn.execute(
|
|
206
|
+
"INSERT INTO schema_version (version) VALUES (?)", (SCHEMA_VERSION,)
|
|
207
|
+
)
|
|
208
|
+
conn.commit()
|
|
209
|
+
else:
|
|
210
|
+
# Check for migrations
|
|
211
|
+
cursor = conn.execute("SELECT version FROM schema_version LIMIT 1")
|
|
212
|
+
row = cursor.fetchone()
|
|
213
|
+
current_version = row[0] if row else 0
|
|
214
|
+
|
|
215
|
+
if current_version < SCHEMA_VERSION:
|
|
216
|
+
logger.info(
|
|
217
|
+
"Migrating database from version %d to %d",
|
|
218
|
+
current_version,
|
|
219
|
+
SCHEMA_VERSION,
|
|
220
|
+
)
|
|
221
|
+
_run_migrations(conn, current_version, SCHEMA_VERSION)
|
|
222
|
+
|
|
223
|
+
return conn
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _run_migrations(
|
|
227
|
+
conn: sqlite3.Connection, from_version: int, to_version: int
|
|
228
|
+
) -> None:
|
|
229
|
+
"""
|
|
230
|
+
Run schema migrations.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
conn: Database connection
|
|
234
|
+
from_version: Current schema version
|
|
235
|
+
to_version: Target schema version
|
|
236
|
+
"""
|
|
237
|
+
if from_version < 2:
|
|
238
|
+
# Migration from v1 to v2: Change from id-as-primary-key to composite
|
|
239
|
+
# This requires rebuilding the table since SQLite doesn't support
|
|
240
|
+
# changing primary keys
|
|
241
|
+
logger.warning(
|
|
242
|
+
"Schema migration v1→v2 requires rebuild. "
|
|
243
|
+
"Run 'jxa-mail-mcp rebuild' to re-index."
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# Drop old tables and recreate
|
|
247
|
+
conn.executescript("""
|
|
248
|
+
DROP TABLE IF EXISTS emails_fts;
|
|
249
|
+
DROP TABLE IF EXISTS emails;
|
|
250
|
+
DROP TABLE IF EXISTS sync_state;
|
|
251
|
+
""")
|
|
252
|
+
|
|
253
|
+
# Recreate with new schema
|
|
254
|
+
conn.executescript(get_schema_sql())
|
|
255
|
+
|
|
256
|
+
conn.execute("UPDATE schema_version SET version = ?", (to_version,))
|
|
257
|
+
conn.commit()
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def rebuild_fts_index(conn: sqlite3.Connection) -> None:
|
|
261
|
+
"""
|
|
262
|
+
Rebuild the FTS index from the emails table.
|
|
263
|
+
|
|
264
|
+
Use this after bulk inserts without triggers or to fix corruption.
|
|
265
|
+
"""
|
|
266
|
+
conn.execute("INSERT INTO emails_fts(emails_fts) VALUES('rebuild')")
|
|
267
|
+
conn.commit()
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def optimize_fts_index(conn: sqlite3.Connection) -> None:
|
|
271
|
+
"""
|
|
272
|
+
Optimize the FTS index for better query performance.
|
|
273
|
+
|
|
274
|
+
Call periodically after many insertions.
|
|
275
|
+
"""
|
|
276
|
+
conn.execute("INSERT INTO emails_fts(emails_fts) VALUES('optimize')")
|
|
277
|
+
conn.commit()
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
"""FTS5 full-text search for indexed emails.
|
|
2
|
+
|
|
3
|
+
Provides:
|
|
4
|
+
- search_fts(): Search indexed emails with BM25 ranking
|
|
5
|
+
- sanitize_fts_query(): Escape special FTS5 syntax characters
|
|
6
|
+
|
|
7
|
+
FTS5 query syntax supported:
|
|
8
|
+
- Simple terms: "meeting notes"
|
|
9
|
+
- Phrases: '"exact phrase"'
|
|
10
|
+
- Boolean: "meeting OR notes"
|
|
11
|
+
- Prefix: "meet*"
|
|
12
|
+
- Column filter: "subject:urgent"
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import re
|
|
18
|
+
import sqlite3
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
|
|
21
|
+
# Characters that have special meaning in FTS5 and need escaping
|
|
22
|
+
FTS5_SPECIAL_CHARS = re.compile(r'(["\'\-\*\(\)\:\^])')
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def add_account_mailbox_filter(
|
|
26
|
+
sql: str,
|
|
27
|
+
params: list,
|
|
28
|
+
account: str | None,
|
|
29
|
+
mailbox: str | None,
|
|
30
|
+
table_alias: str = "e",
|
|
31
|
+
) -> str:
|
|
32
|
+
"""
|
|
33
|
+
Add account/mailbox WHERE clauses to a SQL query.
|
|
34
|
+
|
|
35
|
+
This helper reduces repetition when building filtered queries.
|
|
36
|
+
Modifies params in-place and returns the updated SQL string.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
sql: Base SQL query string
|
|
40
|
+
params: List of query parameters (modified in-place)
|
|
41
|
+
account: Optional account filter
|
|
42
|
+
mailbox: Optional mailbox filter
|
|
43
|
+
table_alias: Table alias prefix (default: "e")
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Updated SQL string with added WHERE clauses
|
|
47
|
+
|
|
48
|
+
Example:
|
|
49
|
+
>>> sql = "SELECT * FROM emails e WHERE 1=1"
|
|
50
|
+
>>> params = []
|
|
51
|
+
>>> sql = add_account_mailbox_filter(sql, params, "Work", "INBOX")
|
|
52
|
+
>>> sql
|
|
53
|
+
"SELECT * FROM emails e WHERE 1=1 AND e.account = ? AND e.mailbox = ?"
|
|
54
|
+
>>> params
|
|
55
|
+
["Work", "INBOX"]
|
|
56
|
+
"""
|
|
57
|
+
if account:
|
|
58
|
+
sql += f" AND {table_alias}.account = ?"
|
|
59
|
+
params.append(account)
|
|
60
|
+
if mailbox:
|
|
61
|
+
sql += f" AND {table_alias}.mailbox = ?"
|
|
62
|
+
params.append(mailbox)
|
|
63
|
+
return sql
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class SearchResult:
|
|
68
|
+
"""A single search result with ranking info."""
|
|
69
|
+
|
|
70
|
+
id: int
|
|
71
|
+
account: str
|
|
72
|
+
mailbox: str
|
|
73
|
+
subject: str
|
|
74
|
+
sender: str
|
|
75
|
+
content_snippet: str
|
|
76
|
+
date_received: str
|
|
77
|
+
score: float
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def sanitize_fts_query(query: str) -> str:
|
|
81
|
+
"""
|
|
82
|
+
Sanitize a query string for safe FTS5 use.
|
|
83
|
+
|
|
84
|
+
Escapes special characters to prevent syntax errors.
|
|
85
|
+
Boolean operators (OR, AND, NOT) are preserved since they
|
|
86
|
+
don't contain special characters.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
query: Raw user query
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Sanitized query safe for FTS5
|
|
93
|
+
"""
|
|
94
|
+
if not query:
|
|
95
|
+
return ""
|
|
96
|
+
|
|
97
|
+
# Remove leading/trailing whitespace
|
|
98
|
+
query = query.strip()
|
|
99
|
+
|
|
100
|
+
# Escape all FTS5 special characters
|
|
101
|
+
# Boolean operators (OR, AND, NOT) are unaffected since they
|
|
102
|
+
# don't contain any of these chars: " ' - * ( ) : ^
|
|
103
|
+
sanitized = FTS5_SPECIAL_CHARS.sub(r"\\\1", query)
|
|
104
|
+
|
|
105
|
+
return sanitized
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _extract_snippet(content: str, max_length: int = 150) -> str:
|
|
109
|
+
"""Extract a snippet from content for display."""
|
|
110
|
+
if not content:
|
|
111
|
+
return ""
|
|
112
|
+
|
|
113
|
+
# Remove excessive whitespace
|
|
114
|
+
text = " ".join(content.split())
|
|
115
|
+
|
|
116
|
+
if len(text) <= max_length:
|
|
117
|
+
return text
|
|
118
|
+
|
|
119
|
+
# Truncate and add ellipsis
|
|
120
|
+
return text[:max_length].rsplit(" ", 1)[0] + "..."
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def search_fts(
|
|
124
|
+
conn: sqlite3.Connection,
|
|
125
|
+
query: str,
|
|
126
|
+
account: str | None = None,
|
|
127
|
+
mailbox: str | None = None,
|
|
128
|
+
limit: int = 20,
|
|
129
|
+
*,
|
|
130
|
+
_is_retry: bool = False,
|
|
131
|
+
) -> list[SearchResult]:
|
|
132
|
+
"""
|
|
133
|
+
Search indexed emails using FTS5 with BM25 ranking.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
conn: Database connection
|
|
137
|
+
query: Search query (supports FTS5 syntax)
|
|
138
|
+
account: Optional account filter
|
|
139
|
+
mailbox: Optional mailbox filter
|
|
140
|
+
limit: Maximum results (default: 20)
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
List of SearchResult ordered by relevance (BM25 score)
|
|
144
|
+
"""
|
|
145
|
+
if not query or not query.strip():
|
|
146
|
+
return []
|
|
147
|
+
|
|
148
|
+
# Sanitize query for FTS5 (skip on retry to avoid double-escaping)
|
|
149
|
+
safe_query = query if _is_retry else sanitize_fts_query(query)
|
|
150
|
+
|
|
151
|
+
if not safe_query:
|
|
152
|
+
return []
|
|
153
|
+
|
|
154
|
+
# Build the SQL query with optional filters
|
|
155
|
+
# BM25 returns negative scores (more negative = better match)
|
|
156
|
+
# We negate it for intuitive positive scores
|
|
157
|
+
# Note: FTS5 content_rowid='rowid' links to emails.rowid
|
|
158
|
+
sql = """
|
|
159
|
+
SELECT
|
|
160
|
+
e.message_id,
|
|
161
|
+
e.account,
|
|
162
|
+
e.mailbox,
|
|
163
|
+
e.subject,
|
|
164
|
+
e.sender,
|
|
165
|
+
e.content,
|
|
166
|
+
e.date_received,
|
|
167
|
+
-bm25(emails_fts, 1.0, 0.5, 2.0) as score
|
|
168
|
+
FROM emails_fts
|
|
169
|
+
JOIN emails e ON emails_fts.rowid = e.rowid
|
|
170
|
+
WHERE emails_fts MATCH ?
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
params: list = [safe_query]
|
|
174
|
+
sql = add_account_mailbox_filter(sql, params, account, mailbox)
|
|
175
|
+
sql += " ORDER BY score DESC LIMIT ?"
|
|
176
|
+
params.append(limit)
|
|
177
|
+
|
|
178
|
+
try:
|
|
179
|
+
cursor = conn.execute(sql, params)
|
|
180
|
+
results = []
|
|
181
|
+
|
|
182
|
+
for row in cursor:
|
|
183
|
+
results.append(
|
|
184
|
+
SearchResult(
|
|
185
|
+
id=row[0],
|
|
186
|
+
account=row[1],
|
|
187
|
+
mailbox=row[2],
|
|
188
|
+
subject=row[3] or "",
|
|
189
|
+
sender=row[4] or "",
|
|
190
|
+
content_snippet=_extract_snippet(row[5]),
|
|
191
|
+
date_received=row[6] or "",
|
|
192
|
+
score=round(row[7], 3),
|
|
193
|
+
)
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
return results
|
|
197
|
+
|
|
198
|
+
except sqlite3.OperationalError as e:
|
|
199
|
+
# FTS5 syntax error - try with phrase search as fallback
|
|
200
|
+
if "fts5: syntax error" in str(e).lower() and not _is_retry:
|
|
201
|
+
# Wrap entire query in quotes as a phrase search
|
|
202
|
+
escaped_query = '"' + query.replace('"', '""') + '"'
|
|
203
|
+
return search_fts(
|
|
204
|
+
conn,
|
|
205
|
+
escaped_query,
|
|
206
|
+
account=account,
|
|
207
|
+
mailbox=mailbox,
|
|
208
|
+
limit=limit,
|
|
209
|
+
_is_retry=True,
|
|
210
|
+
)
|
|
211
|
+
raise
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def search_fts_highlight(
|
|
215
|
+
conn: sqlite3.Connection,
|
|
216
|
+
query: str,
|
|
217
|
+
account: str | None = None,
|
|
218
|
+
mailbox: str | None = None,
|
|
219
|
+
limit: int = 20,
|
|
220
|
+
) -> list[SearchResult]:
|
|
221
|
+
"""
|
|
222
|
+
Search with highlighted snippets showing match context.
|
|
223
|
+
|
|
224
|
+
Similar to search_fts but uses FTS5 highlight() function
|
|
225
|
+
to mark matched terms in the content.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
conn: Database connection
|
|
229
|
+
query: Search query
|
|
230
|
+
account: Optional account filter
|
|
231
|
+
mailbox: Optional mailbox filter
|
|
232
|
+
limit: Maximum results
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
List of SearchResult with highlighted content_snippet
|
|
236
|
+
"""
|
|
237
|
+
if not query or not query.strip():
|
|
238
|
+
return []
|
|
239
|
+
|
|
240
|
+
safe_query = sanitize_fts_query(query)
|
|
241
|
+
if not safe_query:
|
|
242
|
+
return []
|
|
243
|
+
|
|
244
|
+
# Use highlight() to mark matches with ** markers
|
|
245
|
+
sql = """
|
|
246
|
+
SELECT
|
|
247
|
+
e.message_id,
|
|
248
|
+
e.account,
|
|
249
|
+
e.mailbox,
|
|
250
|
+
highlight(emails_fts, 0, '**', '**') as subject_hl,
|
|
251
|
+
e.sender,
|
|
252
|
+
snippet(emails_fts, 2, '**', '**', '...', 32) as content_snippet,
|
|
253
|
+
e.date_received,
|
|
254
|
+
-bm25(emails_fts, 1.0, 0.5, 2.0) as score
|
|
255
|
+
FROM emails_fts
|
|
256
|
+
JOIN emails e ON emails_fts.rowid = e.rowid
|
|
257
|
+
WHERE emails_fts MATCH ?
|
|
258
|
+
"""
|
|
259
|
+
|
|
260
|
+
params: list = [safe_query]
|
|
261
|
+
sql = add_account_mailbox_filter(sql, params, account, mailbox)
|
|
262
|
+
sql += " ORDER BY score DESC LIMIT ?"
|
|
263
|
+
params.append(limit)
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
cursor = conn.execute(sql, params)
|
|
267
|
+
results = []
|
|
268
|
+
|
|
269
|
+
for row in cursor:
|
|
270
|
+
results.append(
|
|
271
|
+
SearchResult(
|
|
272
|
+
id=row[0],
|
|
273
|
+
account=row[1],
|
|
274
|
+
mailbox=row[2],
|
|
275
|
+
subject=row[3] or "",
|
|
276
|
+
sender=row[4] or "",
|
|
277
|
+
content_snippet=row[5] or "",
|
|
278
|
+
date_received=row[6] or "",
|
|
279
|
+
score=round(row[7], 3),
|
|
280
|
+
)
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
return results
|
|
284
|
+
|
|
285
|
+
except sqlite3.OperationalError:
|
|
286
|
+
# Fall back to basic search
|
|
287
|
+
return search_fts(conn, query, account, mailbox, limit)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def count_matches(
|
|
291
|
+
conn: sqlite3.Connection,
|
|
292
|
+
query: str,
|
|
293
|
+
account: str | None = None,
|
|
294
|
+
mailbox: str | None = None,
|
|
295
|
+
) -> int:
|
|
296
|
+
"""
|
|
297
|
+
Count total matches for a query without returning results.
|
|
298
|
+
|
|
299
|
+
Useful for pagination or showing "X results found".
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
conn: Database connection
|
|
303
|
+
query: Search query
|
|
304
|
+
account: Optional account filter
|
|
305
|
+
mailbox: Optional mailbox filter
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
Total number of matching emails
|
|
309
|
+
"""
|
|
310
|
+
if not query or not query.strip():
|
|
311
|
+
return 0
|
|
312
|
+
|
|
313
|
+
safe_query = sanitize_fts_query(query)
|
|
314
|
+
if not safe_query:
|
|
315
|
+
return 0
|
|
316
|
+
|
|
317
|
+
sql = """
|
|
318
|
+
SELECT COUNT(*)
|
|
319
|
+
FROM emails_fts
|
|
320
|
+
JOIN emails e ON emails_fts.rowid = e.rowid
|
|
321
|
+
WHERE emails_fts MATCH ?
|
|
322
|
+
"""
|
|
323
|
+
|
|
324
|
+
params: list = [safe_query]
|
|
325
|
+
sql = add_account_mailbox_filter(sql, params, account, mailbox)
|
|
326
|
+
|
|
327
|
+
try:
|
|
328
|
+
cursor = conn.execute(sql, params)
|
|
329
|
+
return cursor.fetchone()[0]
|
|
330
|
+
except sqlite3.OperationalError:
|
|
331
|
+
return 0
|