mail-parser 4.2.0__tar.gz → 4.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mail_parser-4.2.0 → mail_parser-4.2.1}/PKG-INFO +1 -1
- {mail_parser-4.2.0 → mail_parser-4.2.1}/src/mailparser/core.py +13 -5
- {mail_parser-4.2.0 → mail_parser-4.2.1}/src/mailparser/utils.py +86 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/src/mailparser/version.py +1 -1
- mail_parser-4.2.1/tests/mails/mail_test_19 +12 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/test_mail_parser.py +161 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/test_utils.py +13 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/.github/FUNDING.yml +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/.github/copilot-instructions.md +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/.github/instructions/containerization-docker-best-practices.instructions.md +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/.github/instructions/github-actions-ci-cd-best-practices.instructions.md +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/.github/instructions/markdown.instructions.md +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/.github/instructions/python.instructions.md +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/.github/workflows/main.yml +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/.gitignore +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/.markdownlint.json +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/.pre-commit-config.yaml +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/Dockerfile +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/LICENSE.txt +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/Makefile +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/NOTICE.txt +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/README.md +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/docker-compose.yml +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/docs/images/Bitcoin SpamScope.jpg +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/pyproject.toml +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/src/mailparser/__init__.py +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/src/mailparser/__main__.py +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/src/mailparser/const.py +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/src/mailparser/exceptions.py +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_malformed_1 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_malformed_2 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_malformed_3 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_outlook_1 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_1 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_10 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_11 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_12 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_13 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_14 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_15 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_16 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_17 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_18 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_2 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_3 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_4 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_5 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_6 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_7 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_8 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/mails/mail_test_9 +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/test_improved_received_patterns.py +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/test_main.py +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/tests/test_received_corpus.py +0 -0
- {mail_parser-4.2.0 → mail_parser-4.2.1}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mail-parser
|
|
3
|
-
Version: 4.2.
|
|
3
|
+
Version: 4.2.1
|
|
4
4
|
Summary: A tool that parses emails by enhancing the Python standard library, extracting all details into a comprehensive object.
|
|
5
5
|
Author-email: Fedele Mantuano <mantuano.fedele@gmail.com>
|
|
6
6
|
Maintainer-email: Fedele Mantuano <mantuano.fedele@gmail.com>
|
|
@@ -18,7 +18,6 @@ limitations under the License.
|
|
|
18
18
|
|
|
19
19
|
import base64
|
|
20
20
|
import email
|
|
21
|
-
import email.utils
|
|
22
21
|
import ipaddress
|
|
23
22
|
import json
|
|
24
23
|
import logging
|
|
@@ -29,6 +28,7 @@ from mailparser.utils import (
|
|
|
29
28
|
convert_mail_date,
|
|
30
29
|
decode_header_part,
|
|
31
30
|
find_between,
|
|
31
|
+
get_addresses,
|
|
32
32
|
get_header,
|
|
33
33
|
get_mail_keys,
|
|
34
34
|
get_to_domains,
|
|
@@ -569,10 +569,17 @@ class MailParser:
|
|
|
569
569
|
# object headers
|
|
570
570
|
elif name_header in ADDRESSES_HEADERS:
|
|
571
571
|
raw_header = self.message.get(name_header, "") if self.message else ""
|
|
572
|
-
#
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
#
|
|
572
|
+
# Parse addresses. RFC 5322 §3.4 does not allow unquoted "@" in
|
|
573
|
+
# display names, so a strict parser correctly rejects headers like
|
|
574
|
+
# From: alice@example.com <bob@example.com>
|
|
575
|
+
# and returns ('', ''). mail-parser is a security/forensics tool,
|
|
576
|
+
# not an MTA: hiding addresses from analysts is worse than accepting
|
|
577
|
+
# non-conforming input. get_addresses() applies a regex fallback
|
|
578
|
+
# when strict parsing yields only empty results — see its docstring
|
|
579
|
+
# in utils.py for the full rationale.
|
|
580
|
+
parsed_addresses = get_addresses(raw_header)
|
|
581
|
+
|
|
582
|
+
# decoded addresses — skip entries with no address (absent header)
|
|
576
583
|
return [
|
|
577
584
|
(
|
|
578
585
|
(
|
|
@@ -583,6 +590,7 @@ class MailParser:
|
|
|
583
590
|
email_addr,
|
|
584
591
|
)
|
|
585
592
|
for name, email_addr in parsed_addresses
|
|
593
|
+
if email_addr
|
|
586
594
|
]
|
|
587
595
|
|
|
588
596
|
# others headers
|
|
@@ -19,6 +19,7 @@ limitations under the License.
|
|
|
19
19
|
import base64
|
|
20
20
|
import datetime
|
|
21
21
|
import email
|
|
22
|
+
import email.header
|
|
22
23
|
import email.utils
|
|
23
24
|
import functools
|
|
24
25
|
import hashlib
|
|
@@ -49,6 +50,88 @@ from mailparser.exceptions import MailParserOSError, MailParserReceivedParsingEr
|
|
|
49
50
|
|
|
50
51
|
log = logging.getLogger(__name__)
|
|
51
52
|
|
|
53
|
+
# ---------------------------------------------------------------------------
|
|
54
|
+
# RFC 5322 address parsing — fallback for non-compliant display names
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
# RFC 5322 §3.4 defines the display-name as a "phrase", which must not contain
|
|
57
|
+
# unquoted special characters such as "@". A header like
|
|
58
|
+
#
|
|
59
|
+
# From: alice@example.com <bob@example.com>
|
|
60
|
+
#
|
|
61
|
+
# is therefore *technically non-conforming*: the display name contains an
|
|
62
|
+
# unquoted "@". Python's ``email.utils.getaddresses`` with ``strict=True``
|
|
63
|
+
# (hardened against CVE-2023-27043) correctly rejects this and returns
|
|
64
|
+
# ``[('', '')]``, leaving the real address invisible.
|
|
65
|
+
#
|
|
66
|
+
# mail-parser is a security / forensics tool, not an MTA. Silently hiding an
|
|
67
|
+
# address because its display-name looks like an e-mail address defeats the
|
|
68
|
+
# purpose of the tool — analysts *need* to see those values. We therefore
|
|
69
|
+
# bypass strict compliance with a regex fallback whenever strict parsing yields
|
|
70
|
+
# an empty address, always surfacing the value that is actually in the header.
|
|
71
|
+
_ADDR_FALLBACK_RE = re.compile(
|
|
72
|
+
r'"([^"]*?)"\s*<([^>]+)>' # "Quoted Name" <email@addr>
|
|
73
|
+
r"|([^<,]*?)\s*<([^>]+)>" # Any Name <email@addr> (incl. email-as-name)
|
|
74
|
+
r"|([^\s,<>]+@[^\s,<>]+)" # bare email@addr
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def get_addresses(raw_header):
|
|
79
|
+
"""
|
|
80
|
+
Parse email addresses from a raw address header with a fallback for
|
|
81
|
+
RFC-non-compliant but real-world-common formats.
|
|
82
|
+
|
|
83
|
+
RFC 5322 §3.4 requires the display name (phrase) before an angle-bracket
|
|
84
|
+
address to consist only of printable ASCII characters that are *not*
|
|
85
|
+
special. The ``@`` character is special, so a header such as::
|
|
86
|
+
|
|
87
|
+
From: alice@example.com <bob@example.com>
|
|
88
|
+
|
|
89
|
+
is technically non-conforming because the display name contains an
|
|
90
|
+
unquoted ``@``. Python's ``email.utils.getaddresses`` with
|
|
91
|
+
``strict=True`` (hardened against CVE-2023-27043) correctly returns
|
|
92
|
+
``[('', '')]`` for this input, making the real sender invisible.
|
|
93
|
+
|
|
94
|
+
mail-parser is a *security / forensics* tool, not an MTA. Silently
|
|
95
|
+
discarding an address because its display name happens to look like an
|
|
96
|
+
e-mail address would hide relevant forensic information from analysts —
|
|
97
|
+
the very opposite of what the tool is for. We therefore bypass strict
|
|
98
|
+
RFC compliance by applying a regex-based fallback whenever the strict
|
|
99
|
+
parser yields only empty addresses, so that analysts always see the value
|
|
100
|
+
that was actually present in the header.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
raw_header (str): raw value of an address header
|
|
104
|
+
(e.g. ``From``, ``To``, ``CC`` …)
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
list[tuple[str, str]]: list of ``(display_name, email_addr)`` tuples.
|
|
108
|
+
``display_name`` is an empty string when absent.
|
|
109
|
+
"""
|
|
110
|
+
parsed = email.utils.getaddresses([raw_header], strict=True)
|
|
111
|
+
|
|
112
|
+
# If every result from the strict parser has an empty address — while the
|
|
113
|
+
# raw header is non-empty — fall back to regex extraction so that the
|
|
114
|
+
# actual address values are not silently lost.
|
|
115
|
+
if raw_header.strip() and all(not addr for _, addr in parsed):
|
|
116
|
+
results = []
|
|
117
|
+
for m in _ADDR_FALLBACK_RE.finditer(raw_header):
|
|
118
|
+
if m.group(2): # "Quoted Name" <email>
|
|
119
|
+
results.append((m.group(1).strip(), m.group(2).strip()))
|
|
120
|
+
elif m.group(4): # Any Name <email> (incl. email-as-display-name)
|
|
121
|
+
results.append((m.group(3).strip(), m.group(4).strip()))
|
|
122
|
+
elif m.group(5): # bare email
|
|
123
|
+
results.append(("", m.group(5).strip()))
|
|
124
|
+
if results:
|
|
125
|
+
log.debug(
|
|
126
|
+
"Strict address parsing yielded empty results for %r; "
|
|
127
|
+
"regex fallback recovered %d address(es)",
|
|
128
|
+
raw_header,
|
|
129
|
+
len(results),
|
|
130
|
+
)
|
|
131
|
+
return results
|
|
132
|
+
|
|
133
|
+
return parsed
|
|
134
|
+
|
|
52
135
|
|
|
53
136
|
def custom_log(level="WARNING", name=None): # pragma: no cover
|
|
54
137
|
"""
|
|
@@ -107,6 +190,9 @@ def ported_string(raw_data, encoding="utf-8", errors="ignore"):
|
|
|
107
190
|
if not raw_data:
|
|
108
191
|
return str()
|
|
109
192
|
|
|
193
|
+
if isinstance(raw_data, email.header.Header):
|
|
194
|
+
return str(raw_data)
|
|
195
|
+
|
|
110
196
|
if isinstance(raw_data, str):
|
|
111
197
|
return raw_data
|
|
112
198
|
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
From: alice@example.com <bob@example.com>
|
|
2
|
+
To: "Charlie Brown" <charlie@example.com>, dave@example.com
|
|
3
|
+
CC: eve@example.com <frank@example.com>
|
|
4
|
+
Reply-To: henry@example.com <ivan@example.com>
|
|
5
|
+
Subject: Test email with email address as display name
|
|
6
|
+
Message-ID: <test-email-as-name@example.com>
|
|
7
|
+
Date: Mon, 01 Jan 2024 12:00:00 +0000
|
|
8
|
+
MIME-Version: 1.0
|
|
9
|
+
Content-Type: text/plain; charset=utf-8
|
|
10
|
+
|
|
11
|
+
This email tests parsing of address headers where the display name is itself
|
|
12
|
+
an email address (RFC non-compliant but common in real-world mail).
|
|
@@ -29,6 +29,7 @@ import mailparser
|
|
|
29
29
|
from mailparser.utils import (
|
|
30
30
|
convert_mail_date,
|
|
31
31
|
fingerprints,
|
|
32
|
+
get_addresses,
|
|
32
33
|
get_header,
|
|
33
34
|
get_mail_keys,
|
|
34
35
|
get_to_domains,
|
|
@@ -62,6 +63,7 @@ mail_test_15 = os.path.join(base_path, "mails", "mail_test_15")
|
|
|
62
63
|
mail_test_16 = os.path.join(base_path, "mails", "mail_test_16")
|
|
63
64
|
mail_test_17 = os.path.join(base_path, "mails", "mail_test_17")
|
|
64
65
|
mail_test_18 = os.path.join(base_path, "mails", "mail_test_18")
|
|
66
|
+
mail_test_19 = os.path.join(base_path, "mails", "mail_test_19")
|
|
65
67
|
mail_malformed_1 = os.path.join(base_path, "mails", "mail_malformed_1")
|
|
66
68
|
mail_malformed_2 = os.path.join(base_path, "mails", "mail_malformed_2")
|
|
67
69
|
mail_malformed_3 = os.path.join(base_path, "mails", "mail_malformed_3")
|
|
@@ -1084,3 +1086,162 @@ This is plain text with 8bit encoding."""
|
|
|
1084
1086
|
mail = mailparser.parse_from_string(raw_mail)
|
|
1085
1087
|
# Should have parsed successfully and body contains the text
|
|
1086
1088
|
self.assertIn("hello", mail.body)
|
|
1089
|
+
|
|
1090
|
+
|
|
1091
|
+
class TestEmailAsDisplayName(unittest.TestCase):
|
|
1092
|
+
"""
|
|
1093
|
+
Tests for address parsing when the display name is itself an email address.
|
|
1094
|
+
|
|
1095
|
+
RFC 5322 §3.4 forbids unquoted "@" in the display-name phrase, so a header
|
|
1096
|
+
like ``From: alice@example.com <bob@example.com>`` is technically
|
|
1097
|
+
non-conforming. Python's strict parser (CVE-2023-27043 hardening) returns
|
|
1098
|
+
``[('', '')]`` for such input, which would silently hide the real sender.
|
|
1099
|
+
|
|
1100
|
+
mail-parser is a security/forensics tool: it intentionally bypasses this
|
|
1101
|
+
strict compliance and applies a regex fallback so that analysts always see
|
|
1102
|
+
the address values that are actually present in the header.
|
|
1103
|
+
"""
|
|
1104
|
+
|
|
1105
|
+
def test_from_email_as_display_name(self):
|
|
1106
|
+
"""From header with an email address as display name is parsed correctly."""
|
|
1107
|
+
mail = mailparser.parse_from_file(mail_test_19)
|
|
1108
|
+
result = mail.from_
|
|
1109
|
+
self.assertIsInstance(result, list)
|
|
1110
|
+
self.assertEqual(len(result), 1)
|
|
1111
|
+
name, addr = result[0]
|
|
1112
|
+
self.assertEqual(addr, "bob@example.com")
|
|
1113
|
+
self.assertEqual(name, "alice@example.com")
|
|
1114
|
+
|
|
1115
|
+
def test_cc_email_as_display_name(self):
|
|
1116
|
+
"""CC header with an email address as display name is parsed correctly."""
|
|
1117
|
+
mail = mailparser.parse_from_file(mail_test_19)
|
|
1118
|
+
result = mail.cc
|
|
1119
|
+
self.assertIsInstance(result, list)
|
|
1120
|
+
self.assertEqual(len(result), 1)
|
|
1121
|
+
name, addr = result[0]
|
|
1122
|
+
self.assertEqual(addr, "frank@example.com")
|
|
1123
|
+
self.assertEqual(name, "eve@example.com")
|
|
1124
|
+
|
|
1125
|
+
def test_reply_to_email_as_display_name(self):
|
|
1126
|
+
"""Reply-To header with an email address as display name is parsed correctly."""
|
|
1127
|
+
mail = mailparser.parse_from_file(mail_test_19)
|
|
1128
|
+
result = mail.reply_to
|
|
1129
|
+
self.assertIsInstance(result, list)
|
|
1130
|
+
self.assertEqual(len(result), 1)
|
|
1131
|
+
name, addr = result[0]
|
|
1132
|
+
self.assertEqual(addr, "ivan@example.com")
|
|
1133
|
+
self.assertEqual(name, "henry@example.com")
|
|
1134
|
+
|
|
1135
|
+
def test_to_mixed_addresses(self):
|
|
1136
|
+
"""To header with a mix of quoted name and bare address is parsed correctly."""
|
|
1137
|
+
mail = mailparser.parse_from_file(mail_test_19)
|
|
1138
|
+
result = mail.to
|
|
1139
|
+
self.assertIsInstance(result, list)
|
|
1140
|
+
self.assertEqual(len(result), 2)
|
|
1141
|
+
# "Charlie Brown" <charlie@example.com>
|
|
1142
|
+
name0, addr0 = result[0]
|
|
1143
|
+
self.assertEqual(addr0, "charlie@example.com")
|
|
1144
|
+
self.assertEqual(name0, "Charlie Brown")
|
|
1145
|
+
# dave@example.com (bare address, no display name)
|
|
1146
|
+
name1, addr1 = result[1]
|
|
1147
|
+
self.assertEqual(addr1, "dave@example.com")
|
|
1148
|
+
self.assertEqual(name1, "")
|
|
1149
|
+
|
|
1150
|
+
# ------------------------------------------------------------------
|
|
1151
|
+
# Edge-case tests via parse_from_string (no additional mail files needed)
|
|
1152
|
+
# ------------------------------------------------------------------
|
|
1153
|
+
|
|
1154
|
+
def test_same_email_as_name_and_address_suppresses_name(self):
|
|
1155
|
+
"""When display name == address, name is suppressed to empty string.
|
|
1156
|
+
|
|
1157
|
+
This covers the case ``From: bob@example.com <bob@example.com>`` which
|
|
1158
|
+
is both RFC non-compliant (unquoted @) AND redundant. After the regex
|
|
1159
|
+
fallback recovers the address, the existing name-suppression logic
|
|
1160
|
+
(decoded_name == email_addr → "") must still fire correctly.
|
|
1161
|
+
"""
|
|
1162
|
+
mail = mailparser.parse_from_string(
|
|
1163
|
+
"From: bob@example.com <bob@example.com>\nSubject: x\n\nBody"
|
|
1164
|
+
)
|
|
1165
|
+
result = mail.from_
|
|
1166
|
+
self.assertEqual(len(result), 1)
|
|
1167
|
+
name, addr = result[0]
|
|
1168
|
+
self.assertEqual(addr, "bob@example.com")
|
|
1169
|
+
self.assertEqual(name, "")
|
|
1170
|
+
|
|
1171
|
+
def test_quoted_email_as_display_name(self):
|
|
1172
|
+
"""Properly quoted email-as-name (RFC-compliant) is parsed by strict parser."""
|
|
1173
|
+
mail = mailparser.parse_from_string(
|
|
1174
|
+
'From: "alice@example.com" <bob@example.com>\nSubject: x\n\nBody'
|
|
1175
|
+
)
|
|
1176
|
+
result = mail.from_
|
|
1177
|
+
self.assertEqual(len(result), 1)
|
|
1178
|
+
name, addr = result[0]
|
|
1179
|
+
self.assertEqual(addr, "bob@example.com")
|
|
1180
|
+
self.assertEqual(name, "alice@example.com")
|
|
1181
|
+
|
|
1182
|
+
def test_standard_display_name_unchanged(self):
|
|
1183
|
+
"""Standard ``Name <email>`` format still works correctly (no regression)."""
|
|
1184
|
+
mail = mailparser.parse_from_string(
|
|
1185
|
+
"From: Alice Smith <alice@example.com>\nSubject: x\n\nBody"
|
|
1186
|
+
)
|
|
1187
|
+
result = mail.from_
|
|
1188
|
+
self.assertEqual(len(result), 1)
|
|
1189
|
+
name, addr = result[0]
|
|
1190
|
+
self.assertEqual(addr, "alice@example.com")
|
|
1191
|
+
self.assertEqual(name, "Alice Smith")
|
|
1192
|
+
|
|
1193
|
+
def test_bare_address_no_display_name(self):
|
|
1194
|
+
"""Bare address with no display name returns empty name (no regression)."""
|
|
1195
|
+
mail = mailparser.parse_from_string(
|
|
1196
|
+
"From: alice@example.com\nSubject: x\n\nBody"
|
|
1197
|
+
)
|
|
1198
|
+
result = mail.from_
|
|
1199
|
+
self.assertEqual(len(result), 1)
|
|
1200
|
+
name, addr = result[0]
|
|
1201
|
+
self.assertEqual(addr, "alice@example.com")
|
|
1202
|
+
self.assertEqual(name, "")
|
|
1203
|
+
|
|
1204
|
+
def test_empty_header_returns_empty_list(self):
|
|
1205
|
+
"""A missing address header returns [] — absent headers must not appear."""
|
|
1206
|
+
mail = mailparser.parse_from_string("Subject: x\n\nBody")
|
|
1207
|
+
# Python's getaddresses("") yields [('', '')], but we filter out entries
|
|
1208
|
+
# with an empty address so that absent headers are not included in the
|
|
1209
|
+
# parsed mail object.
|
|
1210
|
+
self.assertEqual(mail.from_, [])
|
|
1211
|
+
|
|
1212
|
+
# ------------------------------------------------------------------
|
|
1213
|
+
# Unit tests for get_addresses() helper directly
|
|
1214
|
+
# ------------------------------------------------------------------
|
|
1215
|
+
|
|
1216
|
+
def test_get_addresses_email_as_name(self):
|
|
1217
|
+
"""get_addresses() fallback recovers address when display name is an email."""
|
|
1218
|
+
result = get_addresses("alice@example.com <bob@example.com>")
|
|
1219
|
+
self.assertEqual(result, [("alice@example.com", "bob@example.com")])
|
|
1220
|
+
|
|
1221
|
+
def test_get_addresses_standard_format(self):
|
|
1222
|
+
"""get_addresses() strict path handles normal ``Name <email>`` correctly."""
|
|
1223
|
+
result = get_addresses("Alice Smith <alice@example.com>")
|
|
1224
|
+
self.assertEqual(result, [("Alice Smith", "alice@example.com")])
|
|
1225
|
+
|
|
1226
|
+
def test_get_addresses_bare_email(self):
|
|
1227
|
+
"""get_addresses() handles bare email address with no display name."""
|
|
1228
|
+
result = get_addresses("alice@example.com")
|
|
1229
|
+
self.assertEqual(result, [("", "alice@example.com")])
|
|
1230
|
+
|
|
1231
|
+
def test_get_addresses_empty_header(self):
|
|
1232
|
+
"""get_addresses() on empty string returns [('', '')] — raw Python lib result.
|
|
1233
|
+
|
|
1234
|
+
The ('', '') entry is filtered out in __getattr__ (core.py) so that
|
|
1235
|
+
absent headers do not appear in the parsed mail output.
|
|
1236
|
+
"""
|
|
1237
|
+
result = get_addresses("")
|
|
1238
|
+
self.assertEqual(result, [("", "")])
|
|
1239
|
+
|
|
1240
|
+
def test_get_addresses_multiple_with_email_as_name(self):
|
|
1241
|
+
"""get_addresses() fallback handles multiple addresses when all fail strict."""
|
|
1242
|
+
result = get_addresses(
|
|
1243
|
+
"alice@example.com <bob@example.com>, eve@example.com <frank@example.com>"
|
|
1244
|
+
)
|
|
1245
|
+
self.assertEqual(len(result), 2)
|
|
1246
|
+
self.assertEqual(result[0], ("alice@example.com", "bob@example.com"))
|
|
1247
|
+
self.assertEqual(result[1], ("eve@example.com", "frank@example.com"))
|
|
@@ -587,6 +587,19 @@ class TestUtilsEdgeCases(unittest.TestCase):
|
|
|
587
587
|
# But should have a valid date itself
|
|
588
588
|
self.assertIsNotNone(result[1].get("date_utc"))
|
|
589
589
|
|
|
590
|
+
def test_ported_string_handles_header_object(self):
|
|
591
|
+
"""
|
|
592
|
+
Test that ported_string can accept an email.header.Header object
|
|
593
|
+
and return a decoded string without crashing.
|
|
594
|
+
"""
|
|
595
|
+
from email.header import Header
|
|
596
|
+
|
|
597
|
+
raw_val = 'attachment;\r\nfilename="Just a text – 2026.pdf'
|
|
598
|
+
header_obj = Header(raw_val, charset="utf-8")
|
|
599
|
+
result = ported_string(header_obj)
|
|
600
|
+
self.assertIsInstance(result, str)
|
|
601
|
+
self.assertEqual(result, raw_val)
|
|
602
|
+
|
|
590
603
|
def test_parse_received_envelope_from_with_angle_brackets(self):
|
|
591
604
|
"""Test utils.py:294-296 — envelope-from clause with angle-bracket match"""
|
|
592
605
|
# When envelope-from keyword is present AND its value has angle
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|