mail-parser 4.3.0__tar.gz → 4.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/workflows/main.yml +2 -1
  2. {mail_parser-4.3.0 → mail_parser-4.4.0}/CLAUDE.md +0 -6
  3. mail_parser-4.3.0/README.md → mail_parser-4.4.0/PKG-INFO +61 -10
  4. mail_parser-4.3.0/PKG-INFO → mail_parser-4.4.0/README.md +34 -35
  5. {mail_parser-4.3.0 → mail_parser-4.4.0}/pyproject.toml +3 -0
  6. {mail_parser-4.3.0 → mail_parser-4.4.0}/src/mailparser/core.py +21 -1
  7. {mail_parser-4.3.0 → mail_parser-4.4.0}/src/mailparser/utils.py +71 -5
  8. {mail_parser-4.3.0 → mail_parser-4.4.0}/src/mailparser/version.py +1 -1
  9. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/test_mail_parser.py +92 -1
  10. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/test_main.py +11 -4
  11. mail_parser-4.4.0/uv.lock +2529 -0
  12. mail_parser-4.3.0/uv.lock +0 -1322
  13. {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/FUNDING.yml +0 -0
  14. {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  15. {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  16. {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/copilot-instructions.md +0 -0
  17. {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/instructions/containerization-docker-best-practices.instructions.md +0 -0
  18. {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/instructions/github-actions-ci-cd-best-practices.instructions.md +0 -0
  19. {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/instructions/markdown.instructions.md +0 -0
  20. {mail_parser-4.3.0 → mail_parser-4.4.0}/.github/instructions/python.instructions.md +0 -0
  21. {mail_parser-4.3.0 → mail_parser-4.4.0}/.gitignore +0 -0
  22. {mail_parser-4.3.0 → mail_parser-4.4.0}/.markdownlint.json +0 -0
  23. {mail_parser-4.3.0 → mail_parser-4.4.0}/.pre-commit-config.yaml +0 -0
  24. {mail_parser-4.3.0 → mail_parser-4.4.0}/Dockerfile +0 -0
  25. {mail_parser-4.3.0 → mail_parser-4.4.0}/LICENSE.txt +0 -0
  26. {mail_parser-4.3.0 → mail_parser-4.4.0}/Makefile +0 -0
  27. {mail_parser-4.3.0 → mail_parser-4.4.0}/NOTICE.txt +0 -0
  28. {mail_parser-4.3.0 → mail_parser-4.4.0}/docker-compose.yml +0 -0
  29. {mail_parser-4.3.0 → mail_parser-4.4.0}/docs/images/Bitcoin SpamScope.jpg +0 -0
  30. {mail_parser-4.3.0 → mail_parser-4.4.0}/src/mailparser/__init__.py +0 -0
  31. {mail_parser-4.3.0 → mail_parser-4.4.0}/src/mailparser/__main__.py +0 -0
  32. {mail_parser-4.3.0 → mail_parser-4.4.0}/src/mailparser/const.py +0 -0
  33. {mail_parser-4.3.0 → mail_parser-4.4.0}/src/mailparser/exceptions.py +0 -0
  34. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_malformed_1 +0 -0
  35. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_malformed_2 +0 -0
  36. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_malformed_3 +0 -0
  37. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_outlook_1 +0 -0
  38. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_1 +0 -0
  39. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_10 +0 -0
  40. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_11 +0 -0
  41. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_12 +0 -0
  42. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_13 +0 -0
  43. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_14 +0 -0
  44. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_15 +0 -0
  45. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_16 +0 -0
  46. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_17 +0 -0
  47. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_18 +0 -0
  48. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_19 +0 -0
  49. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_2 +0 -0
  50. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_3 +0 -0
  51. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_4 +0 -0
  52. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_5 +0 -0
  53. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_6 +0 -0
  54. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_7 +0 -0
  55. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_8 +0 -0
  56. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/mails/mail_test_9 +0 -0
  57. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/test_improved_received_patterns.py +0 -0
  58. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/test_received_corpus.py +0 -0
  59. {mail_parser-4.3.0 → mail_parser-4.4.0}/tests/test_utils.py +0 -0
@@ -29,7 +29,7 @@ jobs:
29
29
  curl -LsSf https://astral.sh/uv/install.sh | sh
30
30
  sudo apt-get -qq update
31
31
  sudo apt-get install -y libemail-outlook-message-perl
32
- uv sync
32
+ uv sync --all-extras
33
33
  export PERL_MM_USE_DEFAULT=1
34
34
  sudo cpan -f -i Email::Outlook::Message
35
35
 
@@ -41,6 +41,7 @@ jobs:
41
41
  uv run mail-parser -v
42
42
  uv run mail-parser -h
43
43
  uv run mail-parser -f tests/mails/mail_malformed_3 -j
44
+ uv run mail-parser -f tests/mails/mail_outlook_1 -o -j
44
45
  cat tests/mails/mail_malformed_3 | uv run mail-parser -k -j
45
46
 
46
47
  - name: Run pre-commit
@@ -95,9 +95,3 @@ After every change:
95
95
  1. Update README.md if the change affects usage, API, or setup.
96
96
  1. Stage changes and run pre-commit; fix all reported issues before proceeding.
97
97
  1. Run full test suite; fix all failures before reporting done.
98
-
99
- ### Test fixtures
100
-
101
- Raw email files in `tests/mails/` are the fixtures. `mail_malformed_*` files exercise defect
102
- detection; `mail_outlook_*` require `msgconvert` installed. Tests that need the Outlook tool should
103
- be marked `@pytest.mark.integration`.
@@ -1,3 +1,30 @@
1
+ Metadata-Version: 2.4
2
+ Name: mail-parser
3
+ Version: 4.4.0
4
+ Summary: A tool that parses emails by enhancing the Python standard library, extracting all details into a comprehensive object.
5
+ Author-email: Fedele Mantuano <mantuano.fedele@gmail.com>
6
+ Maintainer-email: Fedele Mantuano <mantuano.fedele@gmail.com>
7
+ License-Expression: Apache-2.0
8
+ License-File: LICENSE.txt
9
+ License-File: NOTICE.txt
10
+ Keywords: email,forensics,mail,malware,parser,phishing,security,spam,threat detection
11
+ Classifier: Natural Language :: English
12
+ Classifier: Operating System :: MacOS
13
+ Classifier: Operating System :: Microsoft :: Windows
14
+ Classifier: Operating System :: Unix
15
+ Classifier: Programming Language :: Python
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Programming Language :: Python :: 3.14
23
+ Requires-Python: <3.15,>=3.9
24
+ Provides-Extra: outlook
25
+ Requires-Dist: extract-msg>=0.54; extra == 'outlook'
26
+ Description-Content-Type: text/markdown
27
+
1
28
  [![PyPI - Version](https://img.shields.io/pypi/v/mail-parser)](https://pypi.org/project/mail-parser/)
2
29
  [![Coverage Status](https://coveralls.io/repos/github/SpamScope/mail-parser/badge.svg?branch=develop)](https://coveralls.io/github/SpamScope/mail-parser?branch=develop)
3
30
  [![PyPI - Downloads](https://img.shields.io/pypi/dm/mail-parser?color=blue)](https://pypistats.org/packages/mail-parser)
@@ -36,20 +63,44 @@ formats, making it versatile for diverse email ecosystems.
36
63
  **⚡ Production-Ready**: Trusted by security professionals and developers worldwide, with extensive
37
64
  test coverage and proven reliability in high-stakes environments.
38
65
 
39
- Additionally, mail-parser provides full support for parsing Outlook email formats (.msg). To enable
40
- this functionality on Debian-based systems, simply install the required system package:
66
+ mail-parser is fully compatible with Python 3, ensuring modern performance and reliability.
41
67
 
42
- ```bash
43
- apt-get install libemail-outlook-message-perl
44
- ```
68
+ ## Parsing Outlook `.msg` files
45
69
 
46
- For further details about the package, you can run:
70
+ mail-parser converts Outlook `.msg` files to standard `.eml` before parsing.
71
+ Two conversion backends are supported:
47
72
 
48
- ```bash
49
- apt-cache show libemail-outlook-message-perl
50
- ```
73
+ 1. **`extract-msg` (recommended, pure Python).** No external tools required.
74
+ Install the optional extra:
51
75
 
52
- mail-parser is fully compatible with Python 3, ensuring modern performance and reliability.
76
+ ```bash
77
+ pip install mail-parser[outlook]
78
+ ```
79
+
80
+ 1. **`msgconvert` (deprecated, external Perl tool).** Requires the
81
+ `libemail-outlook-message-perl` system package:
82
+
83
+ ```bash
84
+ apt-get install libemail-outlook-message-perl # Debian-based systems
85
+ apt-cache show libemail-outlook-message-perl # package details
86
+ ```
87
+
88
+ **Backend precedence:** when `extract-msg` is installed it is used first.
89
+ Only when it is *not* available does mail-parser fall back to the `msgconvert`
90
+ external tool, logging a deprecation warning. If neither backend is available,
91
+ `parse_from_file_msg()` raises `MailParserOSError` telling you to install
92
+ either path.
93
+
94
+ > **⚠️ Deprecated:** the `msgconvert` external-tool backend is deprecated and
95
+ > will be removed in a future release. Migrate to the pure-Python backend with
96
+ > `pip install mail-parser[outlook]`.
97
+
98
+ **💥 BREAKING CHANGE:** the default `.msg` conversion backend changed.
99
+ When `extract-msg` is installed it is now preferred over `msgconvert`. The two
100
+ converters produce different intermediate `.eml` output, so some parsed fields
101
+ (header ordering, encoding edge cases, attachment naming) can differ from the
102
+ previous `msgconvert`-only behavior. Downstream code asserting on exact
103
+ `.msg`-derived output may need updating.
53
104
 
54
105
  # Apache 2 Open Source License
55
106
 
@@ -1,28 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: mail-parser
3
- Version: 4.3.0
4
- Summary: A tool that parses emails by enhancing the Python standard library, extracting all details into a comprehensive object.
5
- Author-email: Fedele Mantuano <mantuano.fedele@gmail.com>
6
- Maintainer-email: Fedele Mantuano <mantuano.fedele@gmail.com>
7
- License-Expression: Apache-2.0
8
- License-File: LICENSE.txt
9
- License-File: NOTICE.txt
10
- Keywords: email,forensics,mail,malware,parser,phishing,security,spam,threat detection
11
- Classifier: Natural Language :: English
12
- Classifier: Operating System :: MacOS
13
- Classifier: Operating System :: Microsoft :: Windows
14
- Classifier: Operating System :: Unix
15
- Classifier: Programming Language :: Python
16
- Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3.9
18
- Classifier: Programming Language :: Python :: 3.10
19
- Classifier: Programming Language :: Python :: 3.11
20
- Classifier: Programming Language :: Python :: 3.12
21
- Classifier: Programming Language :: Python :: 3.13
22
- Classifier: Programming Language :: Python :: 3.14
23
- Requires-Python: <3.15,>=3.9
24
- Description-Content-Type: text/markdown
25
-
26
1
  [![PyPI - Version](https://img.shields.io/pypi/v/mail-parser)](https://pypi.org/project/mail-parser/)
27
2
  [![Coverage Status](https://coveralls.io/repos/github/SpamScope/mail-parser/badge.svg?branch=develop)](https://coveralls.io/github/SpamScope/mail-parser?branch=develop)
28
3
  [![PyPI - Downloads](https://img.shields.io/pypi/dm/mail-parser?color=blue)](https://pypistats.org/packages/mail-parser)
@@ -61,20 +36,44 @@ formats, making it versatile for diverse email ecosystems.
61
36
  **⚡ Production-Ready**: Trusted by security professionals and developers worldwide, with extensive
62
37
  test coverage and proven reliability in high-stakes environments.
63
38
 
64
- Additionally, mail-parser provides full support for parsing Outlook email formats (.msg). To enable
65
- this functionality on Debian-based systems, simply install the required system package:
39
+ mail-parser is fully compatible with Python 3, ensuring modern performance and reliability.
66
40
 
67
- ```bash
68
- apt-get install libemail-outlook-message-perl
69
- ```
41
+ ## Parsing Outlook `.msg` files
70
42
 
71
- For further details about the package, you can run:
43
+ mail-parser converts Outlook `.msg` files to standard `.eml` before parsing.
44
+ Two conversion backends are supported:
72
45
 
73
- ```bash
74
- apt-cache show libemail-outlook-message-perl
75
- ```
46
+ 1. **`extract-msg` (recommended, pure Python).** No external tools required.
47
+ Install the optional extra:
76
48
 
77
- mail-parser is fully compatible with Python 3, ensuring modern performance and reliability.
49
+ ```bash
50
+ pip install mail-parser[outlook]
51
+ ```
52
+
53
+ 1. **`msgconvert` (deprecated, external Perl tool).** Requires the
54
+ `libemail-outlook-message-perl` system package:
55
+
56
+ ```bash
57
+ apt-get install libemail-outlook-message-perl # Debian-based systems
58
+ apt-cache show libemail-outlook-message-perl # package details
59
+ ```
60
+
61
+ **Backend precedence:** when `extract-msg` is installed it is used first.
62
+ Only when it is *not* available does mail-parser fall back to the `msgconvert`
63
+ external tool, logging a deprecation warning. If neither backend is available,
64
+ `parse_from_file_msg()` raises `MailParserOSError` telling you to install
65
+ either path.
66
+
67
+ > **⚠️ Deprecated:** the `msgconvert` external-tool backend is deprecated and
68
+ > will be removed in a future release. Migrate to the pure-Python backend with
69
+ > `pip install mail-parser[outlook]`.
70
+
71
+ **💥 BREAKING CHANGE:** the default `.msg` conversion backend changed.
72
+ When `extract-msg` is installed it is now preferred over `msgconvert`. The two
73
+ converters produce different intermediate `.eml` output, so some parsed fields
74
+ (header ordering, encoding edge cases, attachment naming) can differ from the
75
+ previous `msgconvert`-only behavior. Downstream code asserting on exact
76
+ `.msg`-derived output may need updating.
78
77
 
79
78
  # Apache 2 Open Source License
80
79
 
@@ -28,6 +28,9 @@ maintainers = [
28
28
  ]
29
29
  dependencies = []
30
30
 
31
+ [project.optional-dependencies]
32
+ outlook = ["extract-msg>=0.54"]
33
+
31
34
  [dependency-groups]
32
35
  dev = [
33
36
  "build>=1.2.2.post1",
@@ -18,6 +18,7 @@ limitations under the License.
18
18
 
19
19
  import base64
20
20
  import email
21
+ import importlib.util
21
22
  import ipaddress
22
23
  import json
23
24
  import logging
@@ -27,6 +28,7 @@ from mailparser.const import ADDRESSES_HEADERS, EPILOGUE_DEFECTS, REGXIP, REGXIP
27
28
  from mailparser.utils import (
28
29
  convert_mail_date,
29
30
  decode_header_part,
31
+ extract_msg_convert,
30
32
  find_between,
31
33
  get_addresses,
32
34
  get_header,
@@ -186,14 +188,32 @@ class MailParser:
186
188
  Init a new object from a Outlook message file,
187
189
  mime type: application/vnd.ms-outlook
188
190
 
191
+ Conversion backend precedence:
192
+ 1. ``extract-msg`` (pure Python, optional ``outlook`` extra).
193
+ 2. ``msgconvert`` external Perl tool — **deprecated** fallback,
194
+ used only when ``extract-msg`` is not installed.
195
+
189
196
  Args:
190
197
  fp (string): file path of raw Outlook email
191
198
 
192
199
  Returns:
193
200
  Instance of MailParser
201
+
202
+ Raises:
203
+ MailParserOSError: if no conversion backend is available
194
204
  """
195
205
  log.debug("Parsing email from file Outlook")
196
- f, _ = msgconvert(fp)
206
+
207
+ if importlib.util.find_spec("extract_msg") is not None:
208
+ f, _ = extract_msg_convert(fp)
209
+ else:
210
+ log.warning(
211
+ "msgconvert backend is deprecated and will be removed "
212
+ "in a future release. Install the pure-Python Outlook "
213
+ "support with 'pip install mail-parser[outlook]'."
214
+ )
215
+ f, _ = msgconvert(fp)
216
+
197
217
  return cls.from_file(f, True)
198
218
 
199
219
  @classmethod
@@ -318,6 +318,66 @@ def fingerprints(data):
318
318
  return hashes(md5, sha1, sha256, sha512)
319
319
 
320
320
 
321
+ def _new_outlook_tempfile():
322
+ """
323
+ Create an empty temporary file to hold a converted Outlook email.
324
+
325
+ The OS-level file handle is closed immediately; callers write to the
326
+ returned path with their own handle (a subprocess ``--outfile`` for
327
+ ``msgconvert`` or a plain ``open`` for the pure-Python backend).
328
+
329
+ Returns:
330
+ str: path of the new temporary ``.eml`` file
331
+ """
332
+ handle, path = tempfile.mkstemp(prefix="outlook_")
333
+ os.close(handle)
334
+ return path
335
+
336
+
337
+ def extract_msg_convert(fp):
338
+ """
339
+ Convert an Outlook ``.msg`` file to ``.eml`` using the pure-Python
340
+ ``extract-msg`` library (no external Perl tool required).
341
+
342
+ The ``extract_msg`` import is performed lazily inside this function so
343
+ that the package keeps importing with zero runtime dependencies when
344
+ the optional ``outlook`` extra is not installed.
345
+
346
+ Args:
347
+ fp (string): file path of the Outlook ``.msg`` mail
348
+
349
+ Returns:
350
+ tuple: ``(eml_path, info)`` where ``eml_path`` is the path of the
351
+ converted ``.eml`` file and ``info`` is a short descriptive string
352
+
353
+ Raises:
354
+ ImportError: if the ``extract-msg`` library is not installed
355
+ MailParserOSError: if the ``.msg`` is not a convertible email
356
+ message (e.g. a contact or calendar item)
357
+ """
358
+ import extract_msg # lazy: keep package import stdlib-only
359
+
360
+ log.debug("Started converting Outlook email with extract-msg")
361
+ msg = extract_msg.openMsg(fp)
362
+ try:
363
+ # openMsg() may return a non-email MSGFile (contact, calendar,
364
+ # task...) which cannot be rendered as an email message.
365
+ as_email_message = getattr(msg, "asEmailMessage", None)
366
+ if as_email_message is None:
367
+ raise MailParserOSError(
368
+ f"Outlook file {fp!r} is not a convertible email "
369
+ f"message (type {type(msg).__name__})"
370
+ )
371
+ eml = as_email_message()
372
+ info = f"{eml.get('From', '')} | {eml.get('Subject', '')}".strip()
373
+ temp = _new_outlook_tempfile()
374
+ with open(temp, "wb") as f:
375
+ f.write(eml.as_bytes())
376
+ return temp, info
377
+ finally:
378
+ msg.close()
379
+
380
+
321
381
  def msgconvert(email):
322
382
  """
323
383
  Exec msgconvert tool, to convert msg Outlook
@@ -329,9 +389,12 @@ def msgconvert(email):
329
389
  Returns:
330
390
  tuple with file path of mail converted and
331
391
  standard output data (str)
392
+
393
+ Raises:
394
+ MailParserOSError: if the ``msgconvert`` tool is not installed
332
395
  """
333
396
  log.debug("Started converting Outlook email")
334
- temph, temp = tempfile.mkstemp(prefix="outlook_")
397
+ temp = _new_outlook_tempfile()
335
398
  command = ["msgconvert", "--outfile", temp, email]
336
399
 
337
400
  try:
@@ -343,7 +406,13 @@ def msgconvert(email):
343
406
  )
344
407
 
345
408
  except OSError as e:
346
- message = f"Check if 'msgconvert' tool is installed / {e!r}"
409
+ message = (
410
+ "Cannot convert Outlook .msg: no conversion backend "
411
+ "available. Install pure-Python support with "
412
+ "'pip install mail-parser[outlook]', or install the "
413
+ "'msgconvert' Perl tool "
414
+ f"(libemail-outlook-message-perl). {e!r}"
415
+ )
347
416
  log.exception(message)
348
417
  raise MailParserOSError(message)
349
418
 
@@ -351,9 +420,6 @@ def msgconvert(email):
351
420
  stdoutdata, _ = out.communicate()
352
421
  return temp, stdoutdata.decode("utf-8").strip()
353
422
 
354
- finally:
355
- os.close(temph)
356
-
357
423
 
358
424
  def parse_received(received):
359
425
  """
@@ -16,4 +16,4 @@ See the License for the specific language governing permissions and
16
16
  limitations under the License.
17
17
  """
18
18
 
19
- __version__ = "4.3.0"
19
+ __version__ = "4.4.0"
@@ -18,6 +18,7 @@ limitations under the License.
18
18
 
19
19
  import datetime
20
20
  import hashlib
21
+ import logging
21
22
  import os
22
23
  import shutil
23
24
  import sys
@@ -25,9 +26,13 @@ import tempfile
25
26
  import unittest
26
27
  from unittest.mock import patch
27
28
 
29
+ import pytest
30
+
28
31
  import mailparser
32
+ from mailparser.exceptions import MailParserOSError
29
33
  from mailparser.utils import (
30
34
  convert_mail_date,
35
+ extract_msg_convert,
31
36
  fingerprints,
32
37
  get_addresses,
33
38
  get_header,
@@ -433,9 +438,10 @@ class TestMailParser(unittest.TestCase):
433
438
  self.assertIsInstance(m.mail, dict)
434
439
  self.assertIsInstance(m.mail_json, str)
435
440
 
441
+ @patch("mailparser.core.importlib.util.find_spec", return_value=None)
436
442
  @patch("mailparser.core.os.remove")
437
443
  @patch("mailparser.core.msgconvert")
438
- def test_parse_from_file_msg(self, mock_msgconvert, mock_remove):
444
+ def test_parse_from_file_msg(self, mock_msgconvert, mock_remove, mock_find_spec):
439
445
  """
440
446
  Tested mail from VirusTotal: md5 b89bf096c9e3717f2d218b3307c69bd0
441
447
 
@@ -1301,3 +1307,88 @@ class TestEmailAsDisplayName(unittest.TestCase):
1301
1307
  self.assertEqual(len(result), 2)
1302
1308
  self.assertEqual(result[0], ("alice@example.com", "bob@example.com"))
1303
1309
  self.assertEqual(result[1], ("eve@example.com", "frank@example.com"))
1310
+
1311
+
1312
+ # ---------------------------------------------------------------------------
1313
+ # Outlook .msg conversion backends (extract-msg vs deprecated msgconvert)
1314
+ # ---------------------------------------------------------------------------
1315
+
1316
+
1317
+ def test_from_file_msg_prefers_extract_msg(mocker):
1318
+ """extract-msg is preferred and msgconvert is NOT called when available."""
1319
+ mocker.patch("importlib.util.find_spec", return_value=object())
1320
+ extract = mocker.patch(
1321
+ "mailparser.core.extract_msg_convert",
1322
+ return_value=(mail_test_2, "info"),
1323
+ )
1324
+ msgconv = mocker.patch("mailparser.core.msgconvert")
1325
+ remove = mocker.patch("mailparser.core.os.remove")
1326
+
1327
+ mailparser.parse_from_file_msg(mail_outlook_1)
1328
+
1329
+ extract.assert_called_once_with(mail_outlook_1)
1330
+ msgconv.assert_not_called()
1331
+ remove.assert_called_once_with(mail_test_2)
1332
+
1333
+
1334
+ def test_from_file_msg_fallback_warns(mocker, caplog):
1335
+ """When extract-msg is absent, msgconvert runs and a deprecation warns."""
1336
+ mocker.patch("importlib.util.find_spec", return_value=None)
1337
+ msgconv = mocker.patch(
1338
+ "mailparser.core.msgconvert",
1339
+ return_value=(mail_test_2, None),
1340
+ )
1341
+ mocker.patch("mailparser.core.os.remove")
1342
+
1343
+ with caplog.at_level(logging.WARNING, logger="mailparser.core"):
1344
+ mailparser.parse_from_file_msg(mail_outlook_1)
1345
+
1346
+ msgconv.assert_called_once_with(mail_outlook_1)
1347
+ messages = [r.message for r in caplog.records]
1348
+ assert any("deprecated" in m for m in messages)
1349
+ assert any("mail-parser[outlook]" in m for m in messages)
1350
+
1351
+
1352
+ def test_from_file_msg_no_backend_raises(mocker):
1353
+ """No backend at all → MailParserOSError mentioning both install paths."""
1354
+ mocker.patch("importlib.util.find_spec", return_value=None)
1355
+ mocker.patch(
1356
+ "mailparser.utils.subprocess.Popen",
1357
+ side_effect=OSError("no msgconvert"),
1358
+ )
1359
+
1360
+ with pytest.raises(MailParserOSError) as exc:
1361
+ mailparser.parse_from_file_msg(mail_outlook_1)
1362
+
1363
+ assert "mail-parser[outlook]" in str(exc.value)
1364
+ assert "msgconvert" in str(exc.value)
1365
+
1366
+
1367
+ @pytest.mark.integration
1368
+ def test_outlook_backend_parity():
1369
+ """mail_outlook_1 parses to the same result under both backends.
1370
+
1371
+ Requires both the optional ``extract-msg`` dependency and the
1372
+ ``msgconvert`` Perl tool; skips otherwise. The two converters do not
1373
+ emit byte-identical ``.eml`` files, so only the meaningful parsed
1374
+ result is compared (key headers, attachment names/count). The raw
1375
+ body is intentionally not compared: msgconvert and extract-msg differ
1376
+ in line endings, MIME structure and RTF/HTML reconstruction.
1377
+ """
1378
+
1379
+ # Force each backend explicitly via its util. from_file(..., True)
1380
+ # removes the temporary converted .eml after parsing.
1381
+ f_extract, _ = extract_msg_convert(mail_outlook_1)
1382
+ parsed_extract = mailparser.MailParser.from_file(f_extract, True)
1383
+
1384
+ # Parsing from the original .msg Outlook file
1385
+ parsed_msgconv = mailparser.MailParser.from_file_msg(mail_outlook_1)
1386
+
1387
+ for key in ("from", "to", "subject"):
1388
+ assert parsed_extract.mail.get(key) == parsed_msgconv.mail.get(key)
1389
+
1390
+ assert parsed_extract.date == parsed_msgconv.date
1391
+
1392
+ extract_names = sorted(a["filename"] for a in parsed_extract.attachments)
1393
+ msgconv_names = sorted(a["filename"] for a in parsed_msgconv.attachments)
1394
+ assert extract_names == msgconv_names
@@ -245,10 +245,17 @@ class TestMain:
245
245
  non_existent_file = str(tmp_path / "non_existent.msg")
246
246
  args = parser.parse_args(["--file", non_existent_file, "--outlook"])
247
247
 
248
- # Mock msgconvert to raise OSError (simulating msgconvert unavailable)
249
- with patch(
250
- "mailparser.utils.subprocess.Popen",
251
- side_effect=OSError("msgconvert not found"),
248
+ # Force the deprecated msgconvert fallback (extract-msg absent) and
249
+ # mock msgconvert to raise OSError (simulating msgconvert unavailable)
250
+ with (
251
+ patch(
252
+ "mailparser.core.importlib.util.find_spec",
253
+ return_value=None,
254
+ ),
255
+ patch(
256
+ "mailparser.utils.subprocess.Popen",
257
+ side_effect=OSError("msgconvert not found"),
258
+ ),
252
259
  ):
253
260
  with pytest.raises(MailParserOSError, match="msgconvert"):
254
261
  parse_file(args)