openmail 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openmail/__init__.py +6 -0
- openmail/assistants/__init__.py +35 -0
- openmail/assistants/classify_emails.py +83 -0
- openmail/assistants/compose_email.py +43 -0
- openmail/assistants/detect_phishing_for_email.py +61 -0
- openmail/assistants/evaluate_sender_trust_for_email.py +59 -0
- openmail/assistants/extract_tasks_from_emails.py +126 -0
- openmail/assistants/generate_follow_up_for_email.py +54 -0
- openmail/assistants/natural_language_query.py +699 -0
- openmail/assistants/prioritize_emails.py +89 -0
- openmail/assistants/reply.py +58 -0
- openmail/assistants/reply_suggestions.py +46 -0
- openmail/assistants/rewrite_email.py +50 -0
- openmail/assistants/summarize_attachments_for_email.py +101 -0
- openmail/assistants/summarize_thread_emails.py +55 -0
- openmail/assistants/summary.py +44 -0
- openmail/assistants/summary_multi.py +57 -0
- openmail/assistants/translate_email.py +54 -0
- openmail/auth/__init__.py +6 -0
- openmail/auth/base.py +34 -0
- openmail/auth/no_auth.py +19 -0
- openmail/auth/oauth2.py +58 -0
- openmail/auth/password.py +26 -0
- openmail/config.py +26 -0
- openmail/email_assistant.py +418 -0
- openmail/email_manager.py +777 -0
- openmail/email_query.py +279 -0
- openmail/errors.py +16 -0
- openmail/imap/__init__.py +5 -0
- openmail/imap/attachment_parts.py +55 -0
- openmail/imap/bodystructure.py +296 -0
- openmail/imap/client.py +806 -0
- openmail/imap/fetch_response.py +115 -0
- openmail/imap/inline_cid.py +106 -0
- openmail/imap/pagination.py +16 -0
- openmail/imap/parser.py +298 -0
- openmail/imap/query.py +233 -0
- openmail/llm/__init__.py +3 -0
- openmail/llm/claude.py +35 -0
- openmail/llm/costs.py +108 -0
- openmail/llm/gemini.py +34 -0
- openmail/llm/gpt.py +33 -0
- openmail/llm/groq.py +36 -0
- openmail/llm/model.py +126 -0
- openmail/llm/xai.py +35 -0
- openmail/logger.py +20 -0
- openmail/models/__init__.py +20 -0
- openmail/models/attachment.py +128 -0
- openmail/models/message.py +113 -0
- openmail/models/subscription.py +45 -0
- openmail/models/task.py +24 -0
- openmail/py.typed +0 -0
- openmail/smtp/__init__.py +7 -0
- openmail/smtp/builder.py +41 -0
- openmail/smtp/client.py +218 -0
- openmail/smtp/templates.py +16 -0
- openmail/subscription/__init__.py +7 -0
- openmail/subscription/detector.py +58 -0
- openmail/subscription/parser.py +32 -0
- openmail/subscription/service.py +237 -0
- openmail/types.py +30 -0
- openmail/utils/__init__.py +39 -0
- openmail/utils/utils.py +295 -0
- openmail-0.1.5.dist-info/METADATA +180 -0
- openmail-0.1.5.dist-info/RECORD +67 -0
- openmail-0.1.5.dist-info/WHEEL +4 -0
- openmail-0.1.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# openmail/imap/fetch_response.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import re
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Iterator, Optional, Sequence, Tuple
|
|
7
|
+
|
|
8
|
+
UID_RE = re.compile(r"UID\s+(\d+)", re.IGNORECASE)
|
|
9
|
+
INTERNALDATE_RE = re.compile(r'INTERNALDATE\s+"([^"]+)"', re.IGNORECASE)
|
|
10
|
+
FLAGS_RE = re.compile(r"FLAGS\s*\(([^)]*)\)", re.IGNORECASE)
|
|
11
|
+
|
|
12
|
+
# Used for parsing FETCH section results
|
|
13
|
+
MIME_TOKEN_RE = re.compile(r"BODY\[(\d+(?:\.\d+)*)\.MIME\]", re.IGNORECASE)
|
|
14
|
+
BODY_TOKEN_RE = re.compile(r"BODY\[(\d+(?:\.\d+)*)\]", re.IGNORECASE)
|
|
15
|
+
HEADER_PEEK_RE = re.compile(r"BODY\[HEADER\]", re.IGNORECASE)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True)
|
|
19
|
+
class FetchPiece:
|
|
20
|
+
"""
|
|
21
|
+
A normalized piece of a FETCH response.
|
|
22
|
+
|
|
23
|
+
meta: decoded string metadata from the FETCH tuple element.
|
|
24
|
+
payload: bytes payload (if present), else None.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
meta: str
|
|
28
|
+
payload: Optional[bytes]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _extract_payload_from_fetch_item(
|
|
32
|
+
item: tuple, data: Sequence[object], i: int
|
|
33
|
+
) -> Tuple[Optional[bytes], bool]:
|
|
34
|
+
"""
|
|
35
|
+
Returns (payload_bytes, used_next_element).
|
|
36
|
+
|
|
37
|
+
imaplib can return:
|
|
38
|
+
- (meta, payload)
|
|
39
|
+
- (meta, None) then payload as next bytes item
|
|
40
|
+
"""
|
|
41
|
+
raw = item[1] if len(item) > 1 and isinstance(item[1], (bytes, bytearray)) else None
|
|
42
|
+
used_next = False
|
|
43
|
+
if raw is None and i + 1 < len(data) and isinstance(data[i + 1], (bytes, bytearray)):
|
|
44
|
+
raw = data[i + 1]
|
|
45
|
+
used_next = True
|
|
46
|
+
return (bytes(raw) if isinstance(raw, (bytes, bytearray)) else None), used_next
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def iter_fetch_pieces(data: Sequence[object]) -> Iterator[FetchPiece]:
|
|
50
|
+
"""
|
|
51
|
+
Normalize imaplib FETCH response data into (meta_str, payload_bytes?) pieces.
|
|
52
|
+
|
|
53
|
+
Skips non-tuple elements except tuple metadata; ignores the b")" terminators.
|
|
54
|
+
"""
|
|
55
|
+
i = 0
|
|
56
|
+
n = len(data)
|
|
57
|
+
while i < n:
|
|
58
|
+
item = data[i]
|
|
59
|
+
|
|
60
|
+
# b")" terminator or other raw bytes => skip
|
|
61
|
+
if isinstance(item, (bytes, bytearray)):
|
|
62
|
+
i += 1
|
|
63
|
+
continue
|
|
64
|
+
|
|
65
|
+
if not isinstance(item, tuple) or not item:
|
|
66
|
+
i += 1
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
meta_raw = item[0]
|
|
70
|
+
if not isinstance(meta_raw, (bytes, bytearray)):
|
|
71
|
+
i += 1
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
meta_str = meta_raw.decode(errors="ignore")
|
|
75
|
+
payload, used_next = _extract_payload_from_fetch_item(item, data, i)
|
|
76
|
+
yield FetchPiece(meta=meta_str, payload=payload)
|
|
77
|
+
|
|
78
|
+
i += 2 if used_next else 1
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def parse_uid(meta: str) -> Optional[int]:
|
|
82
|
+
m = UID_RE.search(meta)
|
|
83
|
+
return int(m.group(1)) if m else None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def parse_internaldate(meta: str) -> Optional[str]:
|
|
87
|
+
m = INTERNALDATE_RE.search(meta)
|
|
88
|
+
return m.group(1) if m else None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def parse_flags(meta: str) -> set[str]:
|
|
92
|
+
m = FLAGS_RE.search(meta)
|
|
93
|
+
if not m:
|
|
94
|
+
return set()
|
|
95
|
+
flags_str = m.group(1).strip()
|
|
96
|
+
return {f for f in flags_str.split() if f} if flags_str else set()
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def has_header_peek(meta: str) -> bool:
|
|
100
|
+
return bool(HEADER_PEEK_RE.search(meta))
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def match_section_mime(meta: str) -> Optional[str]:
|
|
104
|
+
m = MIME_TOKEN_RE.search(meta)
|
|
105
|
+
return m.group(1) if m else None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def match_section_body(meta: str) -> Optional[str]:
|
|
109
|
+
"""
|
|
110
|
+
Returns section id for BODY[...] but NOT BODY[...MIME].
|
|
111
|
+
"""
|
|
112
|
+
if MIME_TOKEN_RE.search(meta):
|
|
113
|
+
return None
|
|
114
|
+
m = BODY_TOKEN_RE.search(meta)
|
|
115
|
+
return m.group(1) if m else None
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import imaplib
|
|
5
|
+
import re
|
|
6
|
+
from typing import Dict, Iterable, Optional
|
|
7
|
+
from urllib.parse import unquote
|
|
8
|
+
|
|
9
|
+
from openmail.models import AttachmentMeta
|
|
10
|
+
|
|
11
|
+
_IMG_SRC_RE = re.compile(r'(<img\b[^>]*\bsrc=["\'])([^"\']+)(["\'])', re.IGNORECASE)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _cid_variants(cid_src: str) -> list[str]:
|
|
15
|
+
"""
|
|
16
|
+
Turn 'cid:image001.png@01DC....' into candidates:
|
|
17
|
+
- image001.png@01DC...
|
|
18
|
+
- image001.png
|
|
19
|
+
Also handles <...> and urlencoding.
|
|
20
|
+
"""
|
|
21
|
+
s = cid_src.strip()
|
|
22
|
+
if s.lower().startswith("cid:"):
|
|
23
|
+
s = s[4:].strip()
|
|
24
|
+
|
|
25
|
+
s = unquote(s)
|
|
26
|
+
s = s.strip().strip("<>").strip()
|
|
27
|
+
if not s:
|
|
28
|
+
return []
|
|
29
|
+
|
|
30
|
+
out = [s, s.lower()]
|
|
31
|
+
if "@" in s:
|
|
32
|
+
base = s.split("@", 1)[0]
|
|
33
|
+
out.extend([base, base.lower()])
|
|
34
|
+
|
|
35
|
+
# de-dupe preserving order
|
|
36
|
+
seen = set()
|
|
37
|
+
uniq: list[str] = []
|
|
38
|
+
for x in out:
|
|
39
|
+
if x and x not in seen:
|
|
40
|
+
seen.add(x)
|
|
41
|
+
uniq.append(x)
|
|
42
|
+
return uniq
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def build_inline_index(atts: Iterable[AttachmentMeta]) -> Dict[str, AttachmentMeta]:
|
|
46
|
+
"""
|
|
47
|
+
Index inline-ish image attachments by content_id (+ variants).
|
|
48
|
+
"""
|
|
49
|
+
idx: Dict[str, AttachmentMeta] = {}
|
|
50
|
+
for a in atts:
|
|
51
|
+
if not a.content_type.lower().startswith("image/"):
|
|
52
|
+
continue
|
|
53
|
+
# Use your is_inline signal OR content_id presence (both are useful)
|
|
54
|
+
if not (a.is_inline or a.content_id):
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
if a.content_id:
|
|
58
|
+
key = a.content_id.strip().strip("<>").strip()
|
|
59
|
+
for k in _cid_variants(key):
|
|
60
|
+
idx.setdefault(k, a)
|
|
61
|
+
return idx
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def inline_cids_as_data_uris(
|
|
65
|
+
*,
|
|
66
|
+
conn: imaplib.IMAP4,
|
|
67
|
+
uid: int,
|
|
68
|
+
html: str,
|
|
69
|
+
attachment_metas: list[AttachmentMeta],
|
|
70
|
+
fetch_part_bytes, # callable(part: str) -> bytes
|
|
71
|
+
) -> str:
|
|
72
|
+
"""
|
|
73
|
+
Rewrite <img src="cid:..."> to data: URIs by fetching the bytes via IMAP.
|
|
74
|
+
"""
|
|
75
|
+
if not html or not attachment_metas:
|
|
76
|
+
return html
|
|
77
|
+
|
|
78
|
+
idx = build_inline_index(attachment_metas)
|
|
79
|
+
|
|
80
|
+
def repl(m: re.Match) -> str:
|
|
81
|
+
prefix, src, suffix = m.group(1), m.group(2), m.group(3)
|
|
82
|
+
if not src.lower().startswith("cid:"):
|
|
83
|
+
return m.group(0)
|
|
84
|
+
|
|
85
|
+
hit: Optional[AttachmentMeta] = None
|
|
86
|
+
for k in _cid_variants(src):
|
|
87
|
+
hit = idx.get(k)
|
|
88
|
+
if hit:
|
|
89
|
+
break
|
|
90
|
+
if not hit:
|
|
91
|
+
return m.group(0)
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
data = fetch_part_bytes(conn, uid=uid, part=hit.part)
|
|
95
|
+
except Exception:
|
|
96
|
+
return m.group(0)
|
|
97
|
+
|
|
98
|
+
if not data:
|
|
99
|
+
return m.group(0)
|
|
100
|
+
|
|
101
|
+
ctype = (hit.content_type or "application/octet-stream").lower()
|
|
102
|
+
b64 = base64.b64encode(data).decode("ascii")
|
|
103
|
+
data_uri = f"data:{ctype};base64,{b64}"
|
|
104
|
+
return f"{prefix}{data_uri}{suffix}"
|
|
105
|
+
|
|
106
|
+
return _IMG_SRC_RE.sub(repl, html)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from openmail.types import EmailRef
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class PagedSearchResult:
|
|
9
|
+
refs: List["EmailRef"]
|
|
10
|
+
next_before_uid: Optional[int] = None
|
|
11
|
+
prev_after_uid: Optional[int] = None
|
|
12
|
+
newest_uid: Optional[int] = None
|
|
13
|
+
oldest_uid: Optional[int] = None
|
|
14
|
+
total: Optional[int] = None
|
|
15
|
+
has_next: bool = False
|
|
16
|
+
has_prev: bool = False
|
openmail/imap/parser.py
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import email
|
|
5
|
+
import quopri
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from email import policy
|
|
8
|
+
from email.header import decode_header, make_header
|
|
9
|
+
from email.message import Message as PyMessage
|
|
10
|
+
from email.parser import BytesParser
|
|
11
|
+
from email.policy import default as default_policy
|
|
12
|
+
from email.utils import getaddresses
|
|
13
|
+
from typing import Dict, List, Optional, Tuple
|
|
14
|
+
|
|
15
|
+
from openmail.errors import ParseError
|
|
16
|
+
from openmail.models import Attachment, EmailAddress, EmailMessage, EmailOverview
|
|
17
|
+
from openmail.types import EmailRef
|
|
18
|
+
from openmail.utils import best_effort_date
|
|
19
|
+
|
|
20
|
+
_INTERNALDATE_FMTS = [
|
|
21
|
+
"%d-%b-%Y %H:%M:%S %z", # standard INTERNALDATE
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse_internaldate(internaldate_raw: Optional[str]) -> Optional[datetime]:
|
|
26
|
+
if not internaldate_raw:
|
|
27
|
+
return None
|
|
28
|
+
s = internaldate_raw.strip().strip('"')
|
|
29
|
+
for fmt in _INTERNALDATE_FMTS:
|
|
30
|
+
try:
|
|
31
|
+
return datetime.strptime(s, fmt)
|
|
32
|
+
except Exception:
|
|
33
|
+
pass
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _decode_header_value(value: Optional[str]) -> str:
|
|
38
|
+
if not value:
|
|
39
|
+
return ""
|
|
40
|
+
try:
|
|
41
|
+
return str(make_header(decode_header(value)))
|
|
42
|
+
except Exception:
|
|
43
|
+
return value
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def decode_transfer(payload: bytes, cte: str | None) -> bytes:
|
|
47
|
+
if not cte:
|
|
48
|
+
return payload
|
|
49
|
+
cte = cte.strip().lower()
|
|
50
|
+
|
|
51
|
+
if cte == "base64":
|
|
52
|
+
return base64.b64decode(payload, validate=False)
|
|
53
|
+
if cte in ("quoted-printable", "quopri"):
|
|
54
|
+
return quopri.decodestring(payload)
|
|
55
|
+
return payload
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def decode_body_chunk(chunk: bytes, msg: PyMessage) -> str:
|
|
59
|
+
"""
|
|
60
|
+
Decode a body chunk using Content-Transfer-Encoding and charset
|
|
61
|
+
from the given (headers-only) message.
|
|
62
|
+
"""
|
|
63
|
+
charset = msg.get_content_charset() or "utf-8"
|
|
64
|
+
cte = (msg.get("Content-Transfer-Encoding") or "").lower()
|
|
65
|
+
|
|
66
|
+
raw = chunk
|
|
67
|
+
try:
|
|
68
|
+
if cte == "base64":
|
|
69
|
+
raw = base64.b64decode(raw, validate=False)
|
|
70
|
+
elif cte in ("quoted-printable", "quotedprintable"):
|
|
71
|
+
raw = quopri.decodestring(raw)
|
|
72
|
+
except Exception:
|
|
73
|
+
raw = chunk
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
return raw.decode(charset, errors="replace")
|
|
77
|
+
except Exception:
|
|
78
|
+
return raw.decode("utf-8", errors="replace")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _parse_addr_list(header_val: Optional[str]) -> List[EmailAddress]:
|
|
82
|
+
if not header_val:
|
|
83
|
+
return []
|
|
84
|
+
out: List[EmailAddress] = []
|
|
85
|
+
for name, addr in getaddresses([header_val]):
|
|
86
|
+
name_decoded = _decode_header_value(name).strip()
|
|
87
|
+
addr = (addr or "").strip()
|
|
88
|
+
if not addr and not name_decoded:
|
|
89
|
+
continue
|
|
90
|
+
out.append(EmailAddress(email=addr or "", name=name_decoded or None))
|
|
91
|
+
return out
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _parse_single_addr(header_val: Optional[str]) -> EmailAddress:
|
|
95
|
+
addrs = _parse_addr_list(header_val)
|
|
96
|
+
return addrs[0] if addrs else EmailAddress(email="", name=None)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _extract_parts(msg: PyMessage) -> Tuple[Optional[str], Optional[str], List[Attachment]]:
|
|
100
|
+
text: Optional[str] = None
|
|
101
|
+
html: Optional[str] = None
|
|
102
|
+
atts: List[Attachment] = []
|
|
103
|
+
attachment_idx = 0
|
|
104
|
+
|
|
105
|
+
if msg.is_multipart():
|
|
106
|
+
for part in msg.walk():
|
|
107
|
+
if part.is_multipart():
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
ctype = part.get_content_type()
|
|
111
|
+
disp = (part.get("Content-Disposition") or "").lower()
|
|
112
|
+
|
|
113
|
+
filename = part.get_filename()
|
|
114
|
+
if filename:
|
|
115
|
+
filename = _decode_header_value(filename)
|
|
116
|
+
|
|
117
|
+
payload = part.get_payload(decode=True) or b""
|
|
118
|
+
|
|
119
|
+
content_id = part.get("Content-ID")
|
|
120
|
+
if content_id:
|
|
121
|
+
content_id = content_id.strip().strip("<>").strip() or None
|
|
122
|
+
|
|
123
|
+
is_inline_image = ctype.startswith("image/") and (
|
|
124
|
+
("inline" in disp) or bool(content_id)
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Attachment (explicit disposition or filename)
|
|
128
|
+
if filename or "attachment" in disp:
|
|
129
|
+
atts.append(
|
|
130
|
+
Attachment(
|
|
131
|
+
idx=attachment_idx,
|
|
132
|
+
filename=filename or "attachment",
|
|
133
|
+
content_type=ctype,
|
|
134
|
+
data=payload,
|
|
135
|
+
size=len(payload),
|
|
136
|
+
content_id=content_id,
|
|
137
|
+
disposition=(
|
|
138
|
+
"inline"
|
|
139
|
+
if is_inline_image
|
|
140
|
+
else ("attachment" if "attachment" in disp else None)
|
|
141
|
+
),
|
|
142
|
+
is_inline=is_inline_image,
|
|
143
|
+
)
|
|
144
|
+
)
|
|
145
|
+
attachment_idx += 1
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
if ctype in ("text/plain", "text/html"):
|
|
149
|
+
charset = part.get_content_charset() or "utf-8"
|
|
150
|
+
body = payload.decode(charset, errors="replace")
|
|
151
|
+
|
|
152
|
+
if ctype == "text/plain" and text is None:
|
|
153
|
+
text = body
|
|
154
|
+
elif ctype == "text/html" and html is None:
|
|
155
|
+
html = body
|
|
156
|
+
else:
|
|
157
|
+
payload = msg.get_payload(decode=True) or b""
|
|
158
|
+
charset = msg.get_content_charset() or "utf-8"
|
|
159
|
+
body = payload.decode(charset, errors="replace")
|
|
160
|
+
if msg.get_content_type() == "text/html":
|
|
161
|
+
html = body
|
|
162
|
+
else:
|
|
163
|
+
text = body
|
|
164
|
+
|
|
165
|
+
return text, html, atts
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def decode_section(mime_bytes: Optional[bytes], body_bytes: Optional[bytes]) -> str:
|
|
169
|
+
if not body_bytes:
|
|
170
|
+
return ""
|
|
171
|
+
if not mime_bytes:
|
|
172
|
+
try:
|
|
173
|
+
return body_bytes.decode("utf-8", errors="replace")
|
|
174
|
+
except Exception:
|
|
175
|
+
return body_bytes.decode("latin-1", errors="replace")
|
|
176
|
+
|
|
177
|
+
msg = BytesParser(policy=default_policy).parsebytes(mime_bytes)
|
|
178
|
+
return decode_body_chunk(body_bytes, msg)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def parse_rfc822(
|
|
182
|
+
ref: EmailRef,
|
|
183
|
+
raw: bytes,
|
|
184
|
+
*,
|
|
185
|
+
include_attachments: bool = False,
|
|
186
|
+
internaldate_raw: Optional[str] = None,
|
|
187
|
+
) -> EmailMessage:
|
|
188
|
+
try:
|
|
189
|
+
pymsg: PyMessage = email.message_from_bytes(raw, policy=policy.default)
|
|
190
|
+
|
|
191
|
+
text, html, atts = _extract_parts(pymsg)
|
|
192
|
+
if not include_attachments:
|
|
193
|
+
atts = []
|
|
194
|
+
|
|
195
|
+
headers: Dict[str, str] = {k: _decode_header_value(str(v)) for k, v in pymsg.items()}
|
|
196
|
+
|
|
197
|
+
raw_date = pymsg.get("Date")
|
|
198
|
+
received_at = parse_internaldate(internaldate_raw)
|
|
199
|
+
sent_at = best_effort_date(raw_date, None)
|
|
200
|
+
|
|
201
|
+
return EmailMessage(
|
|
202
|
+
ref=ref,
|
|
203
|
+
subject=_decode_header_value(pymsg.get("Subject")),
|
|
204
|
+
from_email=_parse_single_addr(pymsg.get("From")),
|
|
205
|
+
to=_parse_addr_list(pymsg.get("To")),
|
|
206
|
+
cc=_parse_addr_list(pymsg.get("Cc")),
|
|
207
|
+
bcc=_parse_addr_list(pymsg.get("Bcc")),
|
|
208
|
+
text=text,
|
|
209
|
+
html=html,
|
|
210
|
+
attachments=atts,
|
|
211
|
+
received_at=received_at,
|
|
212
|
+
sent_at=sent_at,
|
|
213
|
+
message_id=_decode_header_value(pymsg.get("Message-ID")),
|
|
214
|
+
headers=headers,
|
|
215
|
+
)
|
|
216
|
+
except Exception as e:
|
|
217
|
+
raise ParseError(f"Failed to parse RFC822: {e}") from e
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def parse_headers_and_bodies(
|
|
221
|
+
ref: EmailRef,
|
|
222
|
+
header_bytes: bytes,
|
|
223
|
+
*,
|
|
224
|
+
text: str,
|
|
225
|
+
html: str,
|
|
226
|
+
attachments,
|
|
227
|
+
internaldate_raw: Optional[str] = None,
|
|
228
|
+
) -> EmailMessage:
|
|
229
|
+
try:
|
|
230
|
+
msg_headers = BytesParser(policy=default_policy).parsebytes(header_bytes or b"")
|
|
231
|
+
|
|
232
|
+
headers: Dict[str, str] = {k: _decode_header_value(str(v)) for k, v in msg_headers.items()}
|
|
233
|
+
raw_date = msg_headers.get("Date")
|
|
234
|
+
received_at = parse_internaldate(internaldate_raw)
|
|
235
|
+
sent_at = best_effort_date(raw_date, None)
|
|
236
|
+
|
|
237
|
+
return EmailMessage(
|
|
238
|
+
ref=ref,
|
|
239
|
+
subject=_decode_header_value(msg_headers.get("Subject")),
|
|
240
|
+
from_email=_parse_single_addr(msg_headers.get("From")),
|
|
241
|
+
to=_parse_addr_list(msg_headers.get("To")),
|
|
242
|
+
cc=_parse_addr_list(msg_headers.get("Cc")),
|
|
243
|
+
bcc=_parse_addr_list(msg_headers.get("Bcc")),
|
|
244
|
+
text=text or None,
|
|
245
|
+
html=html or None,
|
|
246
|
+
attachments=attachments,
|
|
247
|
+
received_at=received_at,
|
|
248
|
+
sent_at=sent_at,
|
|
249
|
+
message_id=_decode_header_value(msg_headers.get("Message-ID")),
|
|
250
|
+
headers=headers,
|
|
251
|
+
)
|
|
252
|
+
except Exception as e:
|
|
253
|
+
raise ParseError(f"Failed to parse headers/bodies: {e}") from e
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def parse_overview(
|
|
257
|
+
ref: EmailRef,
|
|
258
|
+
flags: set,
|
|
259
|
+
header_bytes: bytes | bytearray,
|
|
260
|
+
*,
|
|
261
|
+
internaldate_raw: Optional[str] = None,
|
|
262
|
+
) -> EmailOverview:
|
|
263
|
+
try:
|
|
264
|
+
subject = ""
|
|
265
|
+
from_addr = EmailAddress(email="", name=None)
|
|
266
|
+
to_addrs: List[EmailAddress] = []
|
|
267
|
+
headers: Dict[str, str] = {}
|
|
268
|
+
date_header_raw: Optional[str] = None
|
|
269
|
+
|
|
270
|
+
if isinstance(header_bytes, (bytes, bytearray)):
|
|
271
|
+
msg_headers = BytesParser(policy=default_policy).parsebytes(bytes(header_bytes))
|
|
272
|
+
|
|
273
|
+
subject = _decode_header_value(msg_headers.get("Subject"))
|
|
274
|
+
from_addr = _parse_single_addr(msg_headers.get("From"))
|
|
275
|
+
date_header_raw = msg_headers.get("Date")
|
|
276
|
+
|
|
277
|
+
to_raw_list = msg_headers.get_all("To", [])
|
|
278
|
+
if to_raw_list:
|
|
279
|
+
to_addrs = _parse_addr_list(", ".join(to_raw_list))
|
|
280
|
+
|
|
281
|
+
for k, v in msg_headers.items():
|
|
282
|
+
headers[k] = _decode_header_value(str(v))
|
|
283
|
+
|
|
284
|
+
received_at = parse_internaldate(internaldate_raw)
|
|
285
|
+
sent_at = best_effort_date(date_header_raw, None)
|
|
286
|
+
|
|
287
|
+
return EmailOverview(
|
|
288
|
+
ref=ref,
|
|
289
|
+
subject=subject or "",
|
|
290
|
+
from_email=from_addr or EmailAddress(email="", name=None),
|
|
291
|
+
to=to_addrs,
|
|
292
|
+
flags=flags,
|
|
293
|
+
received_at=received_at,
|
|
294
|
+
sent_at=sent_at,
|
|
295
|
+
headers=headers,
|
|
296
|
+
)
|
|
297
|
+
except Exception as e:
|
|
298
|
+
raise ParseError(f"Failed to parse Email Overview: {e}") from e
|