openmail 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openmail/__init__.py +6 -0
- openmail/assistants/__init__.py +35 -0
- openmail/assistants/classify_emails.py +83 -0
- openmail/assistants/compose_email.py +43 -0
- openmail/assistants/detect_phishing_for_email.py +61 -0
- openmail/assistants/evaluate_sender_trust_for_email.py +59 -0
- openmail/assistants/extract_tasks_from_emails.py +126 -0
- openmail/assistants/generate_follow_up_for_email.py +54 -0
- openmail/assistants/natural_language_query.py +699 -0
- openmail/assistants/prioritize_emails.py +89 -0
- openmail/assistants/reply.py +58 -0
- openmail/assistants/reply_suggestions.py +46 -0
- openmail/assistants/rewrite_email.py +50 -0
- openmail/assistants/summarize_attachments_for_email.py +101 -0
- openmail/assistants/summarize_thread_emails.py +55 -0
- openmail/assistants/summary.py +44 -0
- openmail/assistants/summary_multi.py +57 -0
- openmail/assistants/translate_email.py +54 -0
- openmail/auth/__init__.py +6 -0
- openmail/auth/base.py +34 -0
- openmail/auth/no_auth.py +19 -0
- openmail/auth/oauth2.py +58 -0
- openmail/auth/password.py +26 -0
- openmail/config.py +26 -0
- openmail/email_assistant.py +418 -0
- openmail/email_manager.py +777 -0
- openmail/email_query.py +279 -0
- openmail/errors.py +16 -0
- openmail/imap/__init__.py +5 -0
- openmail/imap/attachment_parts.py +55 -0
- openmail/imap/bodystructure.py +296 -0
- openmail/imap/client.py +806 -0
- openmail/imap/fetch_response.py +115 -0
- openmail/imap/inline_cid.py +106 -0
- openmail/imap/pagination.py +16 -0
- openmail/imap/parser.py +298 -0
- openmail/imap/query.py +233 -0
- openmail/llm/__init__.py +3 -0
- openmail/llm/claude.py +35 -0
- openmail/llm/costs.py +108 -0
- openmail/llm/gemini.py +34 -0
- openmail/llm/gpt.py +33 -0
- openmail/llm/groq.py +36 -0
- openmail/llm/model.py +126 -0
- openmail/llm/xai.py +35 -0
- openmail/logger.py +20 -0
- openmail/models/__init__.py +20 -0
- openmail/models/attachment.py +128 -0
- openmail/models/message.py +113 -0
- openmail/models/subscription.py +45 -0
- openmail/models/task.py +24 -0
- openmail/py.typed +0 -0
- openmail/smtp/__init__.py +7 -0
- openmail/smtp/builder.py +41 -0
- openmail/smtp/client.py +218 -0
- openmail/smtp/templates.py +16 -0
- openmail/subscription/__init__.py +7 -0
- openmail/subscription/detector.py +58 -0
- openmail/subscription/parser.py +32 -0
- openmail/subscription/service.py +237 -0
- openmail/types.py +30 -0
- openmail/utils/__init__.py +39 -0
- openmail/utils/utils.py +295 -0
- openmail-0.1.5.dist-info/METADATA +180 -0
- openmail-0.1.5.dist-info/RECORD +67 -0
- openmail-0.1.5.dist-info/WHEEL +4 -0
- openmail-0.1.5.dist-info/licenses/LICENSE +21 -0
openmail/email_query.py
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, List, Optional, Sequence
|
|
4
|
+
|
|
5
|
+
from openmail.imap import IMAPQuery, PagedSearchResult
|
|
6
|
+
from openmail.models import EmailMessage, EmailOverview
|
|
7
|
+
from openmail.utils import iso_days_ago
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from openmail.email_manager import EmailManager
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class EmailQuery:
|
|
14
|
+
"""
|
|
15
|
+
Builder that composes filters and only hits IMAP when you call .search() or .fetch().
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, manager: Optional[EmailManager], mailbox: str = "INBOX"):
|
|
19
|
+
self._m = manager
|
|
20
|
+
self._mailbox = mailbox
|
|
21
|
+
self._q = IMAPQuery()
|
|
22
|
+
self._limit: int = 50
|
|
23
|
+
|
|
24
|
+
def mailbox(self, mailbox: str) -> EmailQuery:
|
|
25
|
+
self._mailbox = mailbox
|
|
26
|
+
return self
|
|
27
|
+
|
|
28
|
+
def limit(self, n: int) -> EmailQuery:
|
|
29
|
+
self._limit = n
|
|
30
|
+
return self
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def query(self) -> IMAPQuery:
|
|
34
|
+
"""
|
|
35
|
+
The underlying IMAPQuery.
|
|
36
|
+
|
|
37
|
+
This is a LIVE object:
|
|
38
|
+
mutating it will affect this EmailQuery.
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
easy = EmailQuery(mgr)
|
|
42
|
+
|
|
43
|
+
# mutate existing IMAPQuery
|
|
44
|
+
easy.query.unseen().from_("alerts@example.com")
|
|
45
|
+
|
|
46
|
+
# later:
|
|
47
|
+
refs = easy.search()
|
|
48
|
+
"""
|
|
49
|
+
return self._q
|
|
50
|
+
|
|
51
|
+
@query.setter
|
|
52
|
+
def query(self, value: IMAPQuery) -> None:
|
|
53
|
+
"""
|
|
54
|
+
Replace the underlying IMAPQuery.
|
|
55
|
+
|
|
56
|
+
Example:
|
|
57
|
+
q = IMAPQuery().unseen().subject("invoice")
|
|
58
|
+
easy.query = q
|
|
59
|
+
"""
|
|
60
|
+
if not isinstance(value, IMAPQuery):
|
|
61
|
+
raise TypeError("query must be an IMAPQuery")
|
|
62
|
+
self._q = value
|
|
63
|
+
|
|
64
|
+
def last_days(self, days: int) -> EmailQuery:
|
|
65
|
+
"""Convenience: messages since N days ago (UTC)."""
|
|
66
|
+
if days < 0:
|
|
67
|
+
raise ValueError("days must be >= 0")
|
|
68
|
+
self._q.since(iso_days_ago(days))
|
|
69
|
+
return self
|
|
70
|
+
|
|
71
|
+
def from_any(self, *senders: str) -> EmailQuery:
|
|
72
|
+
"""
|
|
73
|
+
FROM any of the senders (nested OR). Equivalent to:
|
|
74
|
+
OR FROM a OR FROM b FROM c ...
|
|
75
|
+
"""
|
|
76
|
+
qs = [IMAPQuery().from_(s) for s in senders if s]
|
|
77
|
+
if len(qs) == 0:
|
|
78
|
+
return self
|
|
79
|
+
if len(qs) == 1:
|
|
80
|
+
self._q.parts += qs[0].parts
|
|
81
|
+
return self
|
|
82
|
+
self._q.or_(*qs)
|
|
83
|
+
return self
|
|
84
|
+
|
|
85
|
+
def to_any(self, *recipients: str) -> EmailQuery:
|
|
86
|
+
qs = [IMAPQuery().to(s) for s in recipients if s]
|
|
87
|
+
if len(qs) == 0:
|
|
88
|
+
return self
|
|
89
|
+
if len(qs) == 1:
|
|
90
|
+
self._q.parts += qs[0].parts
|
|
91
|
+
return self
|
|
92
|
+
self._q.or_(*qs)
|
|
93
|
+
return self
|
|
94
|
+
|
|
95
|
+
def subject_any(self, *needles: str) -> EmailQuery:
|
|
96
|
+
qs = [IMAPQuery().subject(s) for s in needles if s]
|
|
97
|
+
if len(qs) == 0:
|
|
98
|
+
return self
|
|
99
|
+
if len(qs) == 1:
|
|
100
|
+
self._q.parts += qs[0].parts
|
|
101
|
+
return self
|
|
102
|
+
self._q.or_(*qs)
|
|
103
|
+
return self
|
|
104
|
+
|
|
105
|
+
def text_any(self, *needles: str) -> EmailQuery:
|
|
106
|
+
qs = [IMAPQuery().text(s) for s in needles if s]
|
|
107
|
+
if len(qs) == 0:
|
|
108
|
+
return self
|
|
109
|
+
if len(qs) == 1:
|
|
110
|
+
self._q.parts += qs[0].parts
|
|
111
|
+
return self
|
|
112
|
+
self._q.or_(*qs)
|
|
113
|
+
return self
|
|
114
|
+
|
|
115
|
+
def recent_unread(self, days: int = 7) -> EmailQuery:
|
|
116
|
+
"""UNSEEN AND SINCE (days ago)."""
|
|
117
|
+
self._q.unseen()
|
|
118
|
+
return self.last_days(days)
|
|
119
|
+
|
|
120
|
+
def inbox_triage(self, days: int = 14) -> EmailQuery:
|
|
121
|
+
"""
|
|
122
|
+
A very common triage filter:
|
|
123
|
+
- not deleted
|
|
124
|
+
- not drafts
|
|
125
|
+
- recent window
|
|
126
|
+
- and either unseen OR flagged
|
|
127
|
+
"""
|
|
128
|
+
triage_or = IMAPQuery().or_(
|
|
129
|
+
IMAPQuery().unseen(),
|
|
130
|
+
IMAPQuery().flagged(),
|
|
131
|
+
)
|
|
132
|
+
self._q.undeleted().undraft()
|
|
133
|
+
self.last_days(days)
|
|
134
|
+
self._q.raw(triage_or.build())
|
|
135
|
+
return self
|
|
136
|
+
|
|
137
|
+
def header_contains(self, name: str, needle: str) -> EmailQuery:
|
|
138
|
+
if name and needle:
|
|
139
|
+
self._q.header(name, needle)
|
|
140
|
+
return self
|
|
141
|
+
|
|
142
|
+
def for_thread_root(self, root: EmailMessage) -> EmailQuery:
|
|
143
|
+
"""
|
|
144
|
+
Narrow this query to messages that look like they belong to the same
|
|
145
|
+
thread as `root`, based on its Message-ID.
|
|
146
|
+
"""
|
|
147
|
+
if not root.message_id:
|
|
148
|
+
return self
|
|
149
|
+
|
|
150
|
+
mid = root.message_id
|
|
151
|
+
|
|
152
|
+
self._q.or_(
|
|
153
|
+
IMAPQuery().header("References", mid),
|
|
154
|
+
IMAPQuery().header("In-Reply-To", mid),
|
|
155
|
+
)
|
|
156
|
+
return self
|
|
157
|
+
|
|
158
|
+
def thread_like(
|
|
159
|
+
self, *, subject: Optional[str] = None, participants: Sequence[str] = ()
|
|
160
|
+
) -> EmailQuery:
|
|
161
|
+
"""
|
|
162
|
+
Approximate "thread" matching:
|
|
163
|
+
- optional SUBJECT contains `subject`
|
|
164
|
+
- AND (FROM any participants OR TO any participants OR CC any participants)
|
|
165
|
+
"""
|
|
166
|
+
if subject:
|
|
167
|
+
self._q.subject(subject)
|
|
168
|
+
|
|
169
|
+
p = [x for x in participants if x]
|
|
170
|
+
if not p:
|
|
171
|
+
return self
|
|
172
|
+
|
|
173
|
+
q_from = [IMAPQuery().from_(x) for x in p]
|
|
174
|
+
q_to = [IMAPQuery().to(x) for x in p]
|
|
175
|
+
q_cc = [IMAPQuery().cc(x) for x in p]
|
|
176
|
+
|
|
177
|
+
self._q.or_(*(q_from + q_to + q_cc))
|
|
178
|
+
return self
|
|
179
|
+
|
|
180
|
+
def newsletters(self) -> EmailQuery:
|
|
181
|
+
"""
|
|
182
|
+
Common newsletter identification:
|
|
183
|
+
- has List-Unsubscribe header
|
|
184
|
+
"""
|
|
185
|
+
self._q.header("List-Unsubscribe", "")
|
|
186
|
+
return self
|
|
187
|
+
|
|
188
|
+
def from_domain(self, domain: str) -> EmailQuery:
|
|
189
|
+
"""
|
|
190
|
+
Practical: FROM contains '@domain'.
|
|
191
|
+
(IMAP has no dedicated "domain" operator.)
|
|
192
|
+
"""
|
|
193
|
+
if not domain:
|
|
194
|
+
return self
|
|
195
|
+
needle = domain if domain.startswith("@") else f"@{domain}"
|
|
196
|
+
self._q.from_(needle)
|
|
197
|
+
return self
|
|
198
|
+
|
|
199
|
+
def invoices_or_receipts(self) -> EmailQuery:
|
|
200
|
+
"""Common finance mailbox query."""
|
|
201
|
+
return self.subject_any("invoice", "receipt", "payment", "order confirmation")
|
|
202
|
+
|
|
203
|
+
def security_alerts(self) -> EmailQuery:
|
|
204
|
+
"""Common security / auth notifications."""
|
|
205
|
+
return self.subject_any(
|
|
206
|
+
"security alert",
|
|
207
|
+
"new sign-in",
|
|
208
|
+
"new login",
|
|
209
|
+
"password",
|
|
210
|
+
"verification code",
|
|
211
|
+
"one-time",
|
|
212
|
+
"2fa",
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
def with_attachments_hint(self) -> EmailQuery:
|
|
216
|
+
"""
|
|
217
|
+
IMAP SEARCH cannot reliably filter 'has attachment' across servers.
|
|
218
|
+
"""
|
|
219
|
+
hint = IMAPQuery().or_(
|
|
220
|
+
IMAPQuery().header("Content-Disposition", "attachment"),
|
|
221
|
+
IMAPQuery().header("Content-Type", "name="),
|
|
222
|
+
IMAPQuery().header("Content-Type", "filename="),
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
self._q.raw(hint.build())
|
|
226
|
+
return self
|
|
227
|
+
|
|
228
|
+
def raw(self, *tokens: str) -> EmailQuery:
|
|
229
|
+
self._q.raw(*tokens)
|
|
230
|
+
return self
|
|
231
|
+
|
|
232
|
+
def search(
|
|
233
|
+
self,
|
|
234
|
+
*,
|
|
235
|
+
before_uid: Optional[int] = None,
|
|
236
|
+
after_uid: Optional[int] = None,
|
|
237
|
+
refresh: bool = False,
|
|
238
|
+
) -> PagedSearchResult:
|
|
239
|
+
return self._m.imap.search_page_cached(
|
|
240
|
+
mailbox=self._mailbox,
|
|
241
|
+
query=self._q,
|
|
242
|
+
page_size=self._limit,
|
|
243
|
+
before_uid=before_uid,
|
|
244
|
+
after_uid=after_uid,
|
|
245
|
+
refresh=refresh,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
def fetch(
|
|
249
|
+
self,
|
|
250
|
+
*,
|
|
251
|
+
before_uid: Optional[int] = None,
|
|
252
|
+
after_uid: Optional[int] = None,
|
|
253
|
+
refresh: bool = False,
|
|
254
|
+
include_attachment_meta: bool = False,
|
|
255
|
+
) -> tuple[PagedSearchResult, List[EmailMessage]]:
|
|
256
|
+
"""
|
|
257
|
+
Fetch a page of full EmailMessage objects plus its paging metadata.
|
|
258
|
+
"""
|
|
259
|
+
page = self.search(before_uid=before_uid, after_uid=after_uid, refresh=refresh)
|
|
260
|
+
if not page.refs:
|
|
261
|
+
return page, []
|
|
262
|
+
messages = self._m.imap.fetch(page.refs, include_attachment_meta=include_attachment_meta)
|
|
263
|
+
return page, messages
|
|
264
|
+
|
|
265
|
+
def fetch_overview(
|
|
266
|
+
self,
|
|
267
|
+
*,
|
|
268
|
+
before_uid: Optional[int] = None,
|
|
269
|
+
after_uid: Optional[int] = None,
|
|
270
|
+
refresh: bool = False,
|
|
271
|
+
) -> tuple[PagedSearchResult, List[EmailOverview]]:
|
|
272
|
+
"""
|
|
273
|
+
Fetch a page of EmailOverview objects plus its paging metadata.
|
|
274
|
+
"""
|
|
275
|
+
page = self.search(before_uid=before_uid, after_uid=after_uid, refresh=refresh)
|
|
276
|
+
if not page.refs:
|
|
277
|
+
return page, []
|
|
278
|
+
overviews = self._m.imap.fetch_overview(page.refs)
|
|
279
|
+
return page, overviews
|
openmail/errors.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
class EmailManagerError(Exception): ...
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ConfigError(EmailManagerError): ...
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class AuthError(EmailManagerError): ...
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SMTPError(EmailManagerError): ...
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class IMAPError(EmailManagerError): ...
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ParseError(EmailManagerError): ...
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# openmail/imap/attachment_parts.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import imaplib
|
|
5
|
+
from email.parser import BytesParser
|
|
6
|
+
from email.policy import default as default_policy
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from openmail.errors import IMAPError
|
|
10
|
+
from openmail.imap.parser import decode_transfer
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def fetch_part_bytes(
|
|
14
|
+
conn: imaplib.IMAP4,
|
|
15
|
+
*,
|
|
16
|
+
uid: int,
|
|
17
|
+
part: str,
|
|
18
|
+
) -> bytes:
|
|
19
|
+
"""
|
|
20
|
+
Fetch a single BODY part and decode it according to its MIME headers'
|
|
21
|
+
Content-Transfer-Encoding.
|
|
22
|
+
|
|
23
|
+
This is used for:
|
|
24
|
+
- downloading attachments
|
|
25
|
+
- fetching inline CID images for HTML rewriting
|
|
26
|
+
"""
|
|
27
|
+
typ, mime_data = conn.uid("FETCH", str(uid), f"(UID BODY.PEEK[{part}.MIME])")
|
|
28
|
+
if typ != "OK" or not mime_data:
|
|
29
|
+
raise IMAPError(f"FETCH attachment MIME failed uid={uid} part={part}: {mime_data}")
|
|
30
|
+
|
|
31
|
+
mime_bytes: Optional[bytes] = None
|
|
32
|
+
for item in mime_data:
|
|
33
|
+
if isinstance(item, tuple) and len(item) > 1 and isinstance(item[1], (bytes, bytearray)):
|
|
34
|
+
mime_bytes = bytes(item[1])
|
|
35
|
+
break
|
|
36
|
+
|
|
37
|
+
cte = None
|
|
38
|
+
if mime_bytes:
|
|
39
|
+
msg = BytesParser(policy=default_policy).parsebytes(mime_bytes)
|
|
40
|
+
cte = msg.get("Content-Transfer-Encoding")
|
|
41
|
+
|
|
42
|
+
typ, body_data = conn.uid("FETCH", str(uid), f"(UID BODY.PEEK[{part}])")
|
|
43
|
+
if typ != "OK" or not body_data:
|
|
44
|
+
raise IMAPError(f"FETCH attachment failed uid={uid} part={part}: {body_data}")
|
|
45
|
+
|
|
46
|
+
payload: Optional[bytes] = None
|
|
47
|
+
for item in body_data:
|
|
48
|
+
if isinstance(item, tuple) and len(item) > 1 and isinstance(item[1], (bytes, bytearray)):
|
|
49
|
+
payload = bytes(item[1])
|
|
50
|
+
break
|
|
51
|
+
|
|
52
|
+
if payload is None:
|
|
53
|
+
raise IMAPError(f"Attachment payload not found uid={uid} part={part}")
|
|
54
|
+
|
|
55
|
+
return decode_transfer(payload, cte)
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
6
|
+
|
|
7
|
+
from openmail.models import AttachmentMeta
|
|
8
|
+
|
|
9
|
+
BODYSTRUCTURE_RE = re.compile(r"BODYSTRUCTURE\s+(\(.*\))", re.IGNORECASE | re.DOTALL)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _tokenize(s: str) -> List[str]:
|
|
13
|
+
out: List[str] = []
|
|
14
|
+
i = 0
|
|
15
|
+
n = len(s)
|
|
16
|
+
while i < n:
|
|
17
|
+
c = s[i]
|
|
18
|
+
if c.isspace():
|
|
19
|
+
i += 1
|
|
20
|
+
continue
|
|
21
|
+
if c in ("(", ")"):
|
|
22
|
+
out.append(c)
|
|
23
|
+
i += 1
|
|
24
|
+
continue
|
|
25
|
+
if c == '"':
|
|
26
|
+
i += 1
|
|
27
|
+
buf = []
|
|
28
|
+
while i < n:
|
|
29
|
+
if s[i] == '"' and s[i - 1] != "\\":
|
|
30
|
+
break
|
|
31
|
+
buf.append(s[i])
|
|
32
|
+
i += 1
|
|
33
|
+
out.append("".join(buf))
|
|
34
|
+
i += 1
|
|
35
|
+
continue
|
|
36
|
+
|
|
37
|
+
j = i
|
|
38
|
+
while j < n and (not s[j].isspace()) and s[j] not in ("(", ")"):
|
|
39
|
+
j += 1
|
|
40
|
+
out.append(s[i:j])
|
|
41
|
+
i = j
|
|
42
|
+
|
|
43
|
+
return out
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _parse_tokens(tokens: List[str], idx: int = 0) -> Tuple[Any, int]:
|
|
47
|
+
if tokens[idx] != "(":
|
|
48
|
+
return tokens[idx], idx + 1
|
|
49
|
+
|
|
50
|
+
idx += 1
|
|
51
|
+
lst: List[Any] = []
|
|
52
|
+
while idx < len(tokens) and tokens[idx] != ")":
|
|
53
|
+
tok = tokens[idx]
|
|
54
|
+
if tok == "(":
|
|
55
|
+
node, idx = _parse_tokens(tokens, idx)
|
|
56
|
+
lst.append(node)
|
|
57
|
+
else:
|
|
58
|
+
lst.append(tok)
|
|
59
|
+
idx += 1
|
|
60
|
+
|
|
61
|
+
if idx >= len(tokens) or tokens[idx] != ")":
|
|
62
|
+
raise ValueError("Unbalanced parentheses in BODYSTRUCTURE")
|
|
63
|
+
|
|
64
|
+
idx += 1
|
|
65
|
+
return lst, idx
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def parse_bodystructure(bodystructure_str: str) -> Any:
|
|
69
|
+
tokens = _tokenize(bodystructure_str)
|
|
70
|
+
tree, _ = _parse_tokens(tokens, 0)
|
|
71
|
+
return tree
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass(frozen=True)
|
|
75
|
+
class TextPartRef:
|
|
76
|
+
part: str
|
|
77
|
+
content_type: str
|
|
78
|
+
charset: Optional[str]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _parse_param_list(x: Any) -> Dict[str, str]:
|
|
82
|
+
if not isinstance(x, list):
|
|
83
|
+
return {}
|
|
84
|
+
out: Dict[str, str] = {}
|
|
85
|
+
i = 0
|
|
86
|
+
while i + 1 < len(x):
|
|
87
|
+
out[str(x[i]).lower()] = str(x[i + 1])
|
|
88
|
+
i += 2
|
|
89
|
+
return out
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _find_disposition_filename(dispo: Any) -> Optional[str]:
|
|
93
|
+
if not isinstance(dispo, list) or not dispo:
|
|
94
|
+
return None
|
|
95
|
+
params = _parse_param_list(dispo[1]) if len(dispo) > 1 else {}
|
|
96
|
+
return params.get("filename") or params.get("name")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _leaf_size(node: list) -> int:
|
|
100
|
+
try:
|
|
101
|
+
return int(node[6])
|
|
102
|
+
except Exception:
|
|
103
|
+
return 0
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _leaf_content_type(node: list) -> str:
|
|
107
|
+
try:
|
|
108
|
+
return f"{str(node[0]).lower()}/{str(node[1]).lower()}"
|
|
109
|
+
except Exception:
|
|
110
|
+
return "application/octet-stream"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _leaf_charset(node: list) -> Optional[str]:
|
|
114
|
+
params = _parse_param_list(node[2]) if len(node) > 2 else {}
|
|
115
|
+
return params.get("charset")
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _leaf_filename(node: list) -> str:
|
|
119
|
+
params = _parse_param_list(node[2]) if len(node) > 2 else {}
|
|
120
|
+
dispo = _leaf_disposition(node)
|
|
121
|
+
fn = _find_disposition_filename(dispo) or params.get("name")
|
|
122
|
+
return fn or "attachment"
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _leaf_content_id(node: list) -> Optional[str]:
|
|
126
|
+
"""
|
|
127
|
+
BODYSTRUCTURE body-fld-id is typically at index 3 for leaf parts:
|
|
128
|
+
... SP body-fld-id SP body-fld-desc ...
|
|
129
|
+
Servers often return NIL if absent.
|
|
130
|
+
"""
|
|
131
|
+
try:
|
|
132
|
+
cid = node[3]
|
|
133
|
+
if cid is None:
|
|
134
|
+
return None
|
|
135
|
+
cid_str = str(cid)
|
|
136
|
+
if cid_str.upper() == "NIL":
|
|
137
|
+
return None
|
|
138
|
+
cid_str = cid_str.strip().strip("<>").strip()
|
|
139
|
+
return cid_str or None
|
|
140
|
+
except Exception:
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _leaf_content_location(node: list) -> Optional[str]:
|
|
145
|
+
"""
|
|
146
|
+
Some servers include body-fld-md5 at node[5], but Content-Location is not
|
|
147
|
+
a standard BODYSTRUCTURE field. However, some servers stash it in
|
|
148
|
+
"body-ext-1part" params (rare). We try to find it in any param list.
|
|
149
|
+
If not found, return None.
|
|
150
|
+
"""
|
|
151
|
+
# Try leaf params (node[2]) first
|
|
152
|
+
try:
|
|
153
|
+
params = _parse_param_list(node[2]) if len(node) > 2 else {}
|
|
154
|
+
# Not standard, but some providers may place it here
|
|
155
|
+
for k in ("content-location", "content_location", "location"):
|
|
156
|
+
if k in params:
|
|
157
|
+
return str(params[k]).strip() or None
|
|
158
|
+
except Exception:
|
|
159
|
+
pass
|
|
160
|
+
|
|
161
|
+
# Then try disposition params if present
|
|
162
|
+
dispo = _leaf_disposition(node)
|
|
163
|
+
if isinstance(dispo, list) and len(dispo) > 1:
|
|
164
|
+
dparams = _parse_param_list(dispo[1])
|
|
165
|
+
for k in ("content-location", "content_location", "location"):
|
|
166
|
+
if k in dparams:
|
|
167
|
+
return str(dparams[k]).strip() or None
|
|
168
|
+
|
|
169
|
+
return None
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _leaf_disposition(node: list) -> Optional[list]:
|
|
173
|
+
"""
|
|
174
|
+
Disposition lives in body-ext-1part and its index differs between
|
|
175
|
+
text vs non-text leafs. Find it structurally instead.
|
|
176
|
+
Expected form: ("INLINE" params) or ("ATTACHMENT" params)
|
|
177
|
+
"""
|
|
178
|
+
if not isinstance(node, list):
|
|
179
|
+
return None
|
|
180
|
+
for el in node:
|
|
181
|
+
if isinstance(el, list) and el:
|
|
182
|
+
head = str(el[0]).upper()
|
|
183
|
+
if head in ("INLINE", "ATTACHMENT"):
|
|
184
|
+
return el
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _leaf_disposition_kind(node: list) -> Optional[str]:
|
|
189
|
+
dispo = _leaf_disposition(node)
|
|
190
|
+
if not (isinstance(dispo, list) and dispo):
|
|
191
|
+
return None
|
|
192
|
+
head = str(dispo[0]).strip().lower()
|
|
193
|
+
if head in ("inline", "attachment"):
|
|
194
|
+
return head
|
|
195
|
+
return head or None
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _leaf_is_inline_image(node: list) -> bool:
|
|
199
|
+
ctype = _leaf_content_type(node)
|
|
200
|
+
if not ctype.startswith("image/"):
|
|
201
|
+
return False
|
|
202
|
+
dispo_kind = _leaf_disposition_kind(node) or ""
|
|
203
|
+
# Many inline images have no filename; CID is the key signal.
|
|
204
|
+
return bool(_leaf_content_id(node) or dispo_kind == "inline")
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _leaf_is_attachment(node: list) -> bool:
|
|
208
|
+
"""
|
|
209
|
+
"Attachment" for our purposes includes:
|
|
210
|
+
- traditional attachments (filename/name)
|
|
211
|
+
- inline images that we must fetch to resolve cid: links
|
|
212
|
+
"""
|
|
213
|
+
if not isinstance(node, list) or len(node) < 7:
|
|
214
|
+
return False
|
|
215
|
+
|
|
216
|
+
params = _parse_param_list(node[2]) if len(node) > 2 else {}
|
|
217
|
+
dispo = _leaf_disposition(node)
|
|
218
|
+
disp_filename = _find_disposition_filename(dispo)
|
|
219
|
+
|
|
220
|
+
if disp_filename or ("name" in params):
|
|
221
|
+
return True
|
|
222
|
+
|
|
223
|
+
# Important: include CID inline images as "attachments" so HTML can resolve them.
|
|
224
|
+
if _leaf_is_inline_image(node):
|
|
225
|
+
return True
|
|
226
|
+
|
|
227
|
+
return False
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def extract_text_and_attachments(
|
|
231
|
+
bodystructure: Any,
|
|
232
|
+
) -> Tuple[List[TextPartRef], List[AttachmentMeta]]:
|
|
233
|
+
text_parts: List[TextPartRef] = []
|
|
234
|
+
atts: List[AttachmentMeta] = []
|
|
235
|
+
|
|
236
|
+
def walk(node: Any, prefix: str) -> None:
|
|
237
|
+
if isinstance(node, list) and node:
|
|
238
|
+
# Multipart: consecutive list children, followed by subtype atom(s)
|
|
239
|
+
if isinstance(node[0], list):
|
|
240
|
+
child_index = 1
|
|
241
|
+
for child in node:
|
|
242
|
+
if not isinstance(child, list):
|
|
243
|
+
break
|
|
244
|
+
part_no = f"{prefix}.{child_index}" if prefix else str(child_index)
|
|
245
|
+
walk(child, part_no)
|
|
246
|
+
child_index += 1
|
|
247
|
+
return
|
|
248
|
+
|
|
249
|
+
# Leaf
|
|
250
|
+
ctype = _leaf_content_type(node)
|
|
251
|
+
if ctype in ("text/plain", "text/html"):
|
|
252
|
+
text_parts.append(
|
|
253
|
+
TextPartRef(
|
|
254
|
+
part=prefix or "1",
|
|
255
|
+
content_type=ctype,
|
|
256
|
+
charset=_leaf_charset(node),
|
|
257
|
+
)
|
|
258
|
+
)
|
|
259
|
+
return
|
|
260
|
+
|
|
261
|
+
if _leaf_is_attachment(node):
|
|
262
|
+
cid = _leaf_content_id(node)
|
|
263
|
+
dispo_kind = _leaf_disposition_kind(node) # "inline"/"attachment"/None
|
|
264
|
+
is_inline = _leaf_is_inline_image(node) or (dispo_kind == "inline")
|
|
265
|
+
|
|
266
|
+
atts.append(
|
|
267
|
+
AttachmentMeta(
|
|
268
|
+
idx=len(atts),
|
|
269
|
+
part=prefix or "1",
|
|
270
|
+
filename=_leaf_filename(node),
|
|
271
|
+
content_type=ctype,
|
|
272
|
+
size=_leaf_size(node),
|
|
273
|
+
# NEW FIELDS
|
|
274
|
+
content_id=cid,
|
|
275
|
+
disposition=dispo_kind,
|
|
276
|
+
is_inline=is_inline,
|
|
277
|
+
content_location=_leaf_content_location(node),
|
|
278
|
+
)
|
|
279
|
+
)
|
|
280
|
+
return
|
|
281
|
+
|
|
282
|
+
walk(bodystructure, "")
|
|
283
|
+
return text_parts, atts
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def pick_best_text_parts(
|
|
287
|
+
parts: List[TextPartRef],
|
|
288
|
+
) -> Tuple[Optional[TextPartRef], Optional[TextPartRef]]:
|
|
289
|
+
plain = next((p for p in parts if p.content_type == "text/plain"), None)
|
|
290
|
+
html = next((p for p in parts if p.content_type == "text/html"), None)
|
|
291
|
+
return plain, html
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def extract_bodystructure_from_fetch_meta(meta_str: str) -> Optional[str]:
|
|
295
|
+
m = BODYSTRUCTURE_RE.search(meta_str)
|
|
296
|
+
return m.group(1) if m else None
|