mail-parser 4.3.0__tar.gz → 4.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/workflows/main.yml +2 -1
- {mail_parser-4.3.0 → mail_parser-4.4.0}/CLAUDE.md +0 -6
- mail_parser-4.3.0/README.md → mail_parser-4.4.0/PKG-INFO +61 -10
- mail_parser-4.3.0/PKG-INFO → mail_parser-4.4.0/README.md +34 -35
- {mail_parser-4.3.0 → mail_parser-4.4.0}/pyproject.toml +3 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/src/mailparser/core.py +21 -1
- {mail_parser-4.3.0 → mail_parser-4.4.0}/src/mailparser/utils.py +71 -5
- {mail_parser-4.3.0 → mail_parser-4.4.0}/src/mailparser/version.py +1 -1
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/test_mail_parser.py +92 -1
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/test_main.py +11 -4
- mail_parser-4.4.0/uv.lock +2529 -0
- mail_parser-4.3.0/uv.lock +0 -1322
- {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/FUNDING.yml +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/copilot-instructions.md +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/instructions/containerization-docker-best-practices.instructions.md +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/instructions/github-actions-ci-cd-best-practices.instructions.md +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/instructions/markdown.instructions.md +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/instructions/python.instructions.md +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/.gitignore +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/.markdownlint.json +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/.pre-commit-config.yaml +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/Dockerfile +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/LICENSE.txt +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/Makefile +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/NOTICE.txt +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/docker-compose.yml +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/docs/images/Bitcoin SpamScope.jpg +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/src/mailparser/__init__.py +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/src/mailparser/__main__.py +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/src/mailparser/const.py +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/src/mailparser/exceptions.py +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_malformed_1 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_malformed_2 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_malformed_3 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_outlook_1 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_1 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_10 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_11 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_12 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_13 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_14 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_15 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_16 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_17 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_18 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_19 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_2 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_3 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_4 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_5 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_6 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_7 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_8 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_9 +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/test_improved_received_patterns.py +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/test_received_corpus.py +0 -0
- {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/test_utils.py +0 -0
|
@@ -29,7 +29,7 @@ jobs:
|
|
|
29
29
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
30
30
|
sudo apt-get -qq update
|
|
31
31
|
sudo apt-get install -y libemail-outlook-message-perl
|
|
32
|
-
uv sync
|
|
32
|
+
uv sync --all-extras
|
|
33
33
|
export PERL_MM_USE_DEFAULT=1
|
|
34
34
|
sudo cpan -f -i Email::Outlook::Message
|
|
35
35
|
|
|
@@ -41,6 +41,7 @@ jobs:
|
|
|
41
41
|
uv run mail-parser -v
|
|
42
42
|
uv run mail-parser -h
|
|
43
43
|
uv run mail-parser -f tests/mails/mail_malformed_3 -j
|
|
44
|
+
uv run mail-parser -f tests/mails/mail_outlook_1 -o -j
|
|
44
45
|
cat tests/mails/mail_malformed_3 | uv run mail-parser -k -j
|
|
45
46
|
|
|
46
47
|
- name: Run pre-commit
|
|
@@ -95,9 +95,3 @@ After every change:
|
|
|
95
95
|
1. Update README.md if the change affects usage, API, or setup.
|
|
96
96
|
1. Stage changes and run pre-commit; fix all reported issues before proceeding.
|
|
97
97
|
1. Run full test suite; fix all failures before reporting done.
|
|
98
|
-
|
|
99
|
-
### Test fixtures
|
|
100
|
-
|
|
101
|
-
Raw email files in `tests/mails/` are the fixtures. `mail_malformed_*` files exercise defect
|
|
102
|
-
detection; `mail_outlook_*` require `msgconvert` installed. Tests that need the Outlook tool should
|
|
103
|
-
be marked `@pytest.mark.integration`.
|
|
@@ -1,3 +1,30 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mail-parser
|
|
3
|
+
Version: 4.4.0
|
|
4
|
+
Summary: A tool that parses emails by enhancing the Python standard library, extracting all details into a comprehensive object.
|
|
5
|
+
Author-email: Fedele Mantuano <mantuano.fedele@gmail.com>
|
|
6
|
+
Maintainer-email: Fedele Mantuano <mantuano.fedele@gmail.com>
|
|
7
|
+
License-Expression: Apache-2.0
|
|
8
|
+
License-File: LICENSE.txt
|
|
9
|
+
License-File: NOTICE.txt
|
|
10
|
+
Keywords: email,forensics,mail,malware,parser,phishing,security,spam,threat detection
|
|
11
|
+
Classifier: Natural Language :: English
|
|
12
|
+
Classifier: Operating System :: MacOS
|
|
13
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
14
|
+
Classifier: Operating System :: Unix
|
|
15
|
+
Classifier: Programming Language :: Python
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
23
|
+
Requires-Python: <3.15,>=3.9
|
|
24
|
+
Provides-Extra: outlook
|
|
25
|
+
Requires-Dist: extract-msg>=0.54; extra == 'outlook'
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
1
28
|
[](https://pypi.org/project/mail-parser/)
|
|
2
29
|
[](https://coveralls.io/github/SpamScope/mail-parser?branch=develop)
|
|
3
30
|
[](https://pypistats.org/packages/mail-parser)
|
|
@@ -36,20 +63,44 @@ formats, making it versatile for diverse email ecosystems.
|
|
|
36
63
|
**⚡ Production-Ready**: Trusted by security professionals and developers worldwide, with extensive
|
|
37
64
|
test coverage and proven reliability in high-stakes environments.
|
|
38
65
|
|
|
39
|
-
|
|
40
|
-
this functionality on Debian-based systems, simply install the required system package:
|
|
66
|
+
mail-parser is fully compatible with Python 3, ensuring modern performance and reliability.
|
|
41
67
|
|
|
42
|
-
|
|
43
|
-
apt-get install libemail-outlook-message-perl
|
|
44
|
-
```
|
|
68
|
+
## Parsing Outlook `.msg` files
|
|
45
69
|
|
|
46
|
-
|
|
70
|
+
mail-parser converts Outlook `.msg` files to standard `.eml` before parsing.
|
|
71
|
+
Two conversion backends are supported:
|
|
47
72
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
```
|
|
73
|
+
1. **`extract-msg` (recommended, pure Python).** No external tools required.
|
|
74
|
+
Install the optional extra:
|
|
51
75
|
|
|
52
|
-
|
|
76
|
+
```bash
|
|
77
|
+
pip install mail-parser[outlook]
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
1. **`msgconvert` (deprecated, external Perl tool).** Requires the
|
|
81
|
+
`libemail-outlook-message-perl` system package:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
apt-get install libemail-outlook-message-perl # Debian-based systems
|
|
85
|
+
apt-cache show libemail-outlook-message-perl # package details
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
**Backend precedence:** when `extract-msg` is installed it is used first.
|
|
89
|
+
Only when it is *not* available does mail-parser fall back to the `msgconvert`
|
|
90
|
+
external tool, logging a deprecation warning. If neither backend is available,
|
|
91
|
+
`parse_from_file_msg()` raises `MailParserOSError` telling you to install
|
|
92
|
+
either path.
|
|
93
|
+
|
|
94
|
+
> **⚠️ Deprecated:** the `msgconvert` external-tool backend is deprecated and
|
|
95
|
+
> will be removed in a future release. Migrate to the pure-Python backend with
|
|
96
|
+
> `pip install mail-parser[outlook]`.
|
|
97
|
+
|
|
98
|
+
**💥 BREAKING CHANGE:** the default `.msg` conversion backend changed.
|
|
99
|
+
When `extract-msg` is installed it is now preferred over `msgconvert`. The two
|
|
100
|
+
converters produce different intermediate `.eml` output, so some parsed fields
|
|
101
|
+
(header ordering, encoding edge cases, attachment naming) can differ from the
|
|
102
|
+
previous `msgconvert`-only behavior. Downstream code asserting on exact
|
|
103
|
+
`.msg`-derived output may need updating.
|
|
53
104
|
|
|
54
105
|
# Apache 2 Open Source License
|
|
55
106
|
|
|
@@ -1,28 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: mail-parser
|
|
3
|
-
Version: 4.3.0
|
|
4
|
-
Summary: A tool that parses emails by enhancing the Python standard library, extracting all details into a comprehensive object.
|
|
5
|
-
Author-email: Fedele Mantuano <mantuano.fedele@gmail.com>
|
|
6
|
-
Maintainer-email: Fedele Mantuano <mantuano.fedele@gmail.com>
|
|
7
|
-
License-Expression: Apache-2.0
|
|
8
|
-
License-File: LICENSE.txt
|
|
9
|
-
License-File: NOTICE.txt
|
|
10
|
-
Keywords: email,forensics,mail,malware,parser,phishing,security,spam,threat detection
|
|
11
|
-
Classifier: Natural Language :: English
|
|
12
|
-
Classifier: Operating System :: MacOS
|
|
13
|
-
Classifier: Operating System :: Microsoft :: Windows
|
|
14
|
-
Classifier: Operating System :: Unix
|
|
15
|
-
Classifier: Programming Language :: Python
|
|
16
|
-
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
-
Classifier: Programming Language :: Python :: 3.14
|
|
23
|
-
Requires-Python: <3.15,>=3.9
|
|
24
|
-
Description-Content-Type: text/markdown
|
|
25
|
-
|
|
26
1
|
[](https://pypi.org/project/mail-parser/)
|
|
27
2
|
[](https://coveralls.io/github/SpamScope/mail-parser?branch=develop)
|
|
28
3
|
[](https://pypistats.org/packages/mail-parser)
|
|
@@ -61,20 +36,44 @@ formats, making it versatile for diverse email ecosystems.
|
|
|
61
36
|
**⚡ Production-Ready**: Trusted by security professionals and developers worldwide, with extensive
|
|
62
37
|
test coverage and proven reliability in high-stakes environments.
|
|
63
38
|
|
|
64
|
-
|
|
65
|
-
this functionality on Debian-based systems, simply install the required system package:
|
|
39
|
+
mail-parser is fully compatible with Python 3, ensuring modern performance and reliability.
|
|
66
40
|
|
|
67
|
-
|
|
68
|
-
apt-get install libemail-outlook-message-perl
|
|
69
|
-
```
|
|
41
|
+
## Parsing Outlook `.msg` files
|
|
70
42
|
|
|
71
|
-
|
|
43
|
+
mail-parser converts Outlook `.msg` files to standard `.eml` before parsing.
|
|
44
|
+
Two conversion backends are supported:
|
|
72
45
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
```
|
|
46
|
+
1. **`extract-msg` (recommended, pure Python).** No external tools required.
|
|
47
|
+
Install the optional extra:
|
|
76
48
|
|
|
77
|
-
|
|
49
|
+
```bash
|
|
50
|
+
pip install mail-parser[outlook]
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
1. **`msgconvert` (deprecated, external Perl tool).** Requires the
|
|
54
|
+
`libemail-outlook-message-perl` system package:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
apt-get install libemail-outlook-message-perl # Debian-based systems
|
|
58
|
+
apt-cache show libemail-outlook-message-perl # package details
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**Backend precedence:** when `extract-msg` is installed it is used first.
|
|
62
|
+
Only when it is *not* available does mail-parser fall back to the `msgconvert`
|
|
63
|
+
external tool, logging a deprecation warning. If neither backend is available,
|
|
64
|
+
`parse_from_file_msg()` raises `MailParserOSError` telling you to install
|
|
65
|
+
either path.
|
|
66
|
+
|
|
67
|
+
> **⚠️ Deprecated:** the `msgconvert` external-tool backend is deprecated and
|
|
68
|
+
> will be removed in a future release. Migrate to the pure-Python backend with
|
|
69
|
+
> `pip install mail-parser[outlook]`.
|
|
70
|
+
|
|
71
|
+
**💥 BREAKING CHANGE:** the default `.msg` conversion backend changed.
|
|
72
|
+
When `extract-msg` is installed it is now preferred over `msgconvert`. The two
|
|
73
|
+
converters produce different intermediate `.eml` output, so some parsed fields
|
|
74
|
+
(header ordering, encoding edge cases, attachment naming) can differ from the
|
|
75
|
+
previous `msgconvert`-only behavior. Downstream code asserting on exact
|
|
76
|
+
`.msg`-derived output may need updating.
|
|
78
77
|
|
|
79
78
|
# Apache 2 Open Source License
|
|
80
79
|
|
|
@@ -18,6 +18,7 @@ limitations under the License.
|
|
|
18
18
|
|
|
19
19
|
import base64
|
|
20
20
|
import email
|
|
21
|
+
import importlib.util
|
|
21
22
|
import ipaddress
|
|
22
23
|
import json
|
|
23
24
|
import logging
|
|
@@ -27,6 +28,7 @@ from mailparser.const import ADDRESSES_HEADERS, EPILOGUE_DEFECTS, REGXIP, REGXIP
|
|
|
27
28
|
from mailparser.utils import (
|
|
28
29
|
convert_mail_date,
|
|
29
30
|
decode_header_part,
|
|
31
|
+
extract_msg_convert,
|
|
30
32
|
find_between,
|
|
31
33
|
get_addresses,
|
|
32
34
|
get_header,
|
|
@@ -186,14 +188,32 @@ class MailParser:
|
|
|
186
188
|
Init a new object from a Outlook message file,
|
|
187
189
|
mime type: application/vnd.ms-outlook
|
|
188
190
|
|
|
191
|
+
Conversion backend precedence:
|
|
192
|
+
1. ``extract-msg`` (pure Python, optional ``outlook`` extra).
|
|
193
|
+
2. ``msgconvert`` external Perl tool — **deprecated** fallback,
|
|
194
|
+
used only when ``extract-msg`` is not installed.
|
|
195
|
+
|
|
189
196
|
Args:
|
|
190
197
|
fp (string): file path of raw Outlook email
|
|
191
198
|
|
|
192
199
|
Returns:
|
|
193
200
|
Instance of MailParser
|
|
201
|
+
|
|
202
|
+
Raises:
|
|
203
|
+
MailParserOSError: if no conversion backend is available
|
|
194
204
|
"""
|
|
195
205
|
log.debug("Parsing email from file Outlook")
|
|
196
|
-
|
|
206
|
+
|
|
207
|
+
if importlib.util.find_spec("extract_msg") is not None:
|
|
208
|
+
f, _ = extract_msg_convert(fp)
|
|
209
|
+
else:
|
|
210
|
+
log.warning(
|
|
211
|
+
"msgconvert backend is deprecated and will be removed "
|
|
212
|
+
"in a future release. Install the pure-Python Outlook "
|
|
213
|
+
"support with 'pip install mail-parser[outlook]'."
|
|
214
|
+
)
|
|
215
|
+
f, _ = msgconvert(fp)
|
|
216
|
+
|
|
197
217
|
return cls.from_file(f, True)
|
|
198
218
|
|
|
199
219
|
@classmethod
|
|
@@ -318,6 +318,66 @@ def fingerprints(data):
|
|
|
318
318
|
return hashes(md5, sha1, sha256, sha512)
|
|
319
319
|
|
|
320
320
|
|
|
321
|
+
def _new_outlook_tempfile():
|
|
322
|
+
"""
|
|
323
|
+
Create an empty temporary file to hold a converted Outlook email.
|
|
324
|
+
|
|
325
|
+
The OS-level file handle is closed immediately; callers write to the
|
|
326
|
+
returned path with their own handle (a subprocess ``--outfile`` for
|
|
327
|
+
``msgconvert`` or a plain ``open`` for the pure-Python backend).
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
str: path of the new temporary ``.eml`` file
|
|
331
|
+
"""
|
|
332
|
+
handle, path = tempfile.mkstemp(prefix="outlook_")
|
|
333
|
+
os.close(handle)
|
|
334
|
+
return path
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def extract_msg_convert(fp):
|
|
338
|
+
"""
|
|
339
|
+
Convert an Outlook ``.msg`` file to ``.eml`` using the pure-Python
|
|
340
|
+
``extract-msg`` library (no external Perl tool required).
|
|
341
|
+
|
|
342
|
+
The ``extract_msg`` import is performed lazily inside this function so
|
|
343
|
+
that the package keeps importing with zero runtime dependencies when
|
|
344
|
+
the optional ``outlook`` extra is not installed.
|
|
345
|
+
|
|
346
|
+
Args:
|
|
347
|
+
fp (string): file path of the Outlook ``.msg`` mail
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
tuple: ``(eml_path, info)`` where ``eml_path`` is the path of the
|
|
351
|
+
converted ``.eml`` file and ``info`` is a short descriptive string
|
|
352
|
+
|
|
353
|
+
Raises:
|
|
354
|
+
ImportError: if the ``extract-msg`` library is not installed
|
|
355
|
+
MailParserOSError: if the ``.msg`` is not a convertible email
|
|
356
|
+
message (e.g. a contact or calendar item)
|
|
357
|
+
"""
|
|
358
|
+
import extract_msg # lazy: keep package import stdlib-only
|
|
359
|
+
|
|
360
|
+
log.debug("Started converting Outlook email with extract-msg")
|
|
361
|
+
msg = extract_msg.openMsg(fp)
|
|
362
|
+
try:
|
|
363
|
+
# openMsg() may return a non-email MSGFile (contact, calendar,
|
|
364
|
+
# task...) which cannot be rendered as an email message.
|
|
365
|
+
as_email_message = getattr(msg, "asEmailMessage", None)
|
|
366
|
+
if as_email_message is None:
|
|
367
|
+
raise MailParserOSError(
|
|
368
|
+
f"Outlook file {fp!r} is not a convertible email "
|
|
369
|
+
f"message (type {type(msg).__name__})"
|
|
370
|
+
)
|
|
371
|
+
eml = as_email_message()
|
|
372
|
+
info = f"{eml.get('From', '')} | {eml.get('Subject', '')}".strip()
|
|
373
|
+
temp = _new_outlook_tempfile()
|
|
374
|
+
with open(temp, "wb") as f:
|
|
375
|
+
f.write(eml.as_bytes())
|
|
376
|
+
return temp, info
|
|
377
|
+
finally:
|
|
378
|
+
msg.close()
|
|
379
|
+
|
|
380
|
+
|
|
321
381
|
def msgconvert(email):
|
|
322
382
|
"""
|
|
323
383
|
Exec msgconvert tool, to convert msg Outlook
|
|
@@ -329,9 +389,12 @@ def msgconvert(email):
|
|
|
329
389
|
Returns:
|
|
330
390
|
tuple with file path of mail converted and
|
|
331
391
|
standard output data (str)
|
|
392
|
+
|
|
393
|
+
Raises:
|
|
394
|
+
MailParserOSError: if the ``msgconvert`` tool is not installed
|
|
332
395
|
"""
|
|
333
396
|
log.debug("Started converting Outlook email")
|
|
334
|
-
|
|
397
|
+
temp = _new_outlook_tempfile()
|
|
335
398
|
command = ["msgconvert", "--outfile", temp, email]
|
|
336
399
|
|
|
337
400
|
try:
|
|
@@ -343,7 +406,13 @@ def msgconvert(email):
|
|
|
343
406
|
)
|
|
344
407
|
|
|
345
408
|
except OSError as e:
|
|
346
|
-
message =
|
|
409
|
+
message = (
|
|
410
|
+
"Cannot convert Outlook .msg: no conversion backend "
|
|
411
|
+
"available. Install pure-Python support with "
|
|
412
|
+
"'pip install mail-parser[outlook]', or install the "
|
|
413
|
+
"'msgconvert' Perl tool "
|
|
414
|
+
f"(libemail-outlook-message-perl). {e!r}"
|
|
415
|
+
)
|
|
347
416
|
log.exception(message)
|
|
348
417
|
raise MailParserOSError(message)
|
|
349
418
|
|
|
@@ -351,9 +420,6 @@ def msgconvert(email):
|
|
|
351
420
|
stdoutdata, _ = out.communicate()
|
|
352
421
|
return temp, stdoutdata.decode("utf-8").strip()
|
|
353
422
|
|
|
354
|
-
finally:
|
|
355
|
-
os.close(temph)
|
|
356
|
-
|
|
357
423
|
|
|
358
424
|
def parse_received(received):
|
|
359
425
|
"""
|
|
@@ -18,6 +18,7 @@ limitations under the License.
|
|
|
18
18
|
|
|
19
19
|
import datetime
|
|
20
20
|
import hashlib
|
|
21
|
+
import logging
|
|
21
22
|
import os
|
|
22
23
|
import shutil
|
|
23
24
|
import sys
|
|
@@ -25,9 +26,13 @@ import tempfile
|
|
|
25
26
|
import unittest
|
|
26
27
|
from unittest.mock import patch
|
|
27
28
|
|
|
29
|
+
import pytest
|
|
30
|
+
|
|
28
31
|
import mailparser
|
|
32
|
+
from mailparser.exceptions import MailParserOSError
|
|
29
33
|
from mailparser.utils import (
|
|
30
34
|
convert_mail_date,
|
|
35
|
+
extract_msg_convert,
|
|
31
36
|
fingerprints,
|
|
32
37
|
get_addresses,
|
|
33
38
|
get_header,
|
|
@@ -433,9 +438,10 @@ class TestMailParser(unittest.TestCase):
|
|
|
433
438
|
self.assertIsInstance(m.mail, dict)
|
|
434
439
|
self.assertIsInstance(m.mail_json, str)
|
|
435
440
|
|
|
441
|
+
@patch("mailparser.core.importlib.util.find_spec", return_value=None)
|
|
436
442
|
@patch("mailparser.core.os.remove")
|
|
437
443
|
@patch("mailparser.core.msgconvert")
|
|
438
|
-
def test_parse_from_file_msg(self, mock_msgconvert, mock_remove):
|
|
444
|
+
def test_parse_from_file_msg(self, mock_msgconvert, mock_remove, mock_find_spec):
|
|
439
445
|
"""
|
|
440
446
|
Tested mail from VirusTotal: md5 b89bf096c9e3717f2d218b3307c69bd0
|
|
441
447
|
|
|
@@ -1301,3 +1307,88 @@ class TestEmailAsDisplayName(unittest.TestCase):
|
|
|
1301
1307
|
self.assertEqual(len(result), 2)
|
|
1302
1308
|
self.assertEqual(result[0], ("alice@example.com", "bob@example.com"))
|
|
1303
1309
|
self.assertEqual(result[1], ("eve@example.com", "frank@example.com"))
|
|
1310
|
+
|
|
1311
|
+
|
|
1312
|
+
# ---------------------------------------------------------------------------
|
|
1313
|
+
# Outlook .msg conversion backends (extract-msg vs deprecated msgconvert)
|
|
1314
|
+
# ---------------------------------------------------------------------------
|
|
1315
|
+
|
|
1316
|
+
|
|
1317
|
+
def test_from_file_msg_prefers_extract_msg(mocker):
|
|
1318
|
+
"""extract-msg is preferred and msgconvert is NOT called when available."""
|
|
1319
|
+
mocker.patch("importlib.util.find_spec", return_value=object())
|
|
1320
|
+
extract = mocker.patch(
|
|
1321
|
+
"mailparser.core.extract_msg_convert",
|
|
1322
|
+
return_value=(mail_test_2, "info"),
|
|
1323
|
+
)
|
|
1324
|
+
msgconv = mocker.patch("mailparser.core.msgconvert")
|
|
1325
|
+
remove = mocker.patch("mailparser.core.os.remove")
|
|
1326
|
+
|
|
1327
|
+
mailparser.parse_from_file_msg(mail_outlook_1)
|
|
1328
|
+
|
|
1329
|
+
extract.assert_called_once_with(mail_outlook_1)
|
|
1330
|
+
msgconv.assert_not_called()
|
|
1331
|
+
remove.assert_called_once_with(mail_test_2)
|
|
1332
|
+
|
|
1333
|
+
|
|
1334
|
+
def test_from_file_msg_fallback_warns(mocker, caplog):
|
|
1335
|
+
"""When extract-msg is absent, msgconvert runs and a deprecation warns."""
|
|
1336
|
+
mocker.patch("importlib.util.find_spec", return_value=None)
|
|
1337
|
+
msgconv = mocker.patch(
|
|
1338
|
+
"mailparser.core.msgconvert",
|
|
1339
|
+
return_value=(mail_test_2, None),
|
|
1340
|
+
)
|
|
1341
|
+
mocker.patch("mailparser.core.os.remove")
|
|
1342
|
+
|
|
1343
|
+
with caplog.at_level(logging.WARNING, logger="mailparser.core"):
|
|
1344
|
+
mailparser.parse_from_file_msg(mail_outlook_1)
|
|
1345
|
+
|
|
1346
|
+
msgconv.assert_called_once_with(mail_outlook_1)
|
|
1347
|
+
messages = [r.message for r in caplog.records]
|
|
1348
|
+
assert any("deprecated" in m for m in messages)
|
|
1349
|
+
assert any("mail-parser[outlook]" in m for m in messages)
|
|
1350
|
+
|
|
1351
|
+
|
|
1352
|
+
def test_from_file_msg_no_backend_raises(mocker):
|
|
1353
|
+
"""No backend at all → MailParserOSError mentioning both install paths."""
|
|
1354
|
+
mocker.patch("importlib.util.find_spec", return_value=None)
|
|
1355
|
+
mocker.patch(
|
|
1356
|
+
"mailparser.utils.subprocess.Popen",
|
|
1357
|
+
side_effect=OSError("no msgconvert"),
|
|
1358
|
+
)
|
|
1359
|
+
|
|
1360
|
+
with pytest.raises(MailParserOSError) as exc:
|
|
1361
|
+
mailparser.parse_from_file_msg(mail_outlook_1)
|
|
1362
|
+
|
|
1363
|
+
assert "mail-parser[outlook]" in str(exc.value)
|
|
1364
|
+
assert "msgconvert" in str(exc.value)
|
|
1365
|
+
|
|
1366
|
+
|
|
1367
|
+
@pytest.mark.integration
|
|
1368
|
+
def test_outlook_backend_parity():
|
|
1369
|
+
"""mail_outlook_1 parses to the same result under both backends.
|
|
1370
|
+
|
|
1371
|
+
Requires both the optional ``extract-msg`` dependency and the
|
|
1372
|
+
``msgconvert`` Perl tool; skips otherwise. The two converters do not
|
|
1373
|
+
emit byte-identical ``.eml`` files, so only the meaningful parsed
|
|
1374
|
+
result is compared (key headers, attachment names/count). The raw
|
|
1375
|
+
body is intentionally not compared: msgconvert and extract-msg differ
|
|
1376
|
+
in line endings, MIME structure and RTF/HTML reconstruction.
|
|
1377
|
+
"""
|
|
1378
|
+
|
|
1379
|
+
# Force each backend explicitly via its util. from_file(..., True)
|
|
1380
|
+
# removes the temporary converted .eml after parsing.
|
|
1381
|
+
f_extract, _ = extract_msg_convert(mail_outlook_1)
|
|
1382
|
+
parsed_extract = mailparser.MailParser.from_file(f_extract, True)
|
|
1383
|
+
|
|
1384
|
+
# Parsing from the original .msg Outlook file
|
|
1385
|
+
parsed_msgconv = mailparser.MailParser.from_file_msg(mail_outlook_1)
|
|
1386
|
+
|
|
1387
|
+
for key in ("from", "to", "subject"):
|
|
1388
|
+
assert parsed_extract.mail.get(key) == parsed_msgconv.mail.get(key)
|
|
1389
|
+
|
|
1390
|
+
assert parsed_extract.date == parsed_msgconv.date
|
|
1391
|
+
|
|
1392
|
+
extract_names = sorted(a["filename"] for a in parsed_extract.attachments)
|
|
1393
|
+
msgconv_names = sorted(a["filename"] for a in parsed_msgconv.attachments)
|
|
1394
|
+
assert extract_names == msgconv_names
|
|
@@ -245,10 +245,17 @@ class TestMain:
|
|
|
245
245
|
non_existent_file = str(tmp_path / "non_existent.msg")
|
|
246
246
|
args = parser.parse_args(["--file", non_existent_file, "--outlook"])
|
|
247
247
|
|
|
248
|
-
#
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
248
|
+
# Force the deprecated msgconvert fallback (extract-msg absent) and
|
|
249
|
+
# mock msgconvert to raise OSError (simulating msgconvert unavailable)
|
|
250
|
+
with (
|
|
251
|
+
patch(
|
|
252
|
+
"mailparser.core.importlib.util.find_spec",
|
|
253
|
+
return_value=None,
|
|
254
|
+
),
|
|
255
|
+
patch(
|
|
256
|
+
"mailparser.utils.subprocess.Popen",
|
|
257
|
+
side_effect=OSError("msgconvert not found"),
|
|
258
|
+
),
|
|
252
259
|
):
|
|
253
260
|
with pytest.raises(MailParserOSError, match="msgconvert"):
|
|
254
261
|
parse_file(args)
|