mail-parser 4.0.0__tar.gz → 4.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {mail_parser-4.0.0/src/mail_parser.egg-info → mail_parser-4.1.2}/PKG-INFO +12 -3
  2. {mail_parser-4.0.0 → mail_parser-4.1.2}/README.md +12 -3
  3. {mail_parser-4.0.0 → mail_parser-4.1.2}/pyproject.toml +1 -1
  4. {mail_parser-4.0.0 → mail_parser-4.1.2}/setup.py +1 -1
  5. {mail_parser-4.0.0 → mail_parser-4.1.2/src/mail_parser.egg-info}/PKG-INFO +12 -3
  6. {mail_parser-4.0.0 → mail_parser-4.1.2}/src/mail_parser.egg-info/SOURCES.txt +1 -2
  7. {mail_parser-4.0.0 → mail_parser-4.1.2}/src/mailparser/__init__.py +14 -4
  8. mail_parser-4.1.2/src/mailparser/__main__.py +348 -0
  9. mail_parser-4.1.2/src/mailparser/const.py +98 -0
  10. mail_parser-4.0.0/src/mailparser/mailparser.py → mail_parser-4.1.2/src/mailparser/core.py +137 -96
  11. {mail_parser-4.0.0 → mail_parser-4.1.2}/src/mailparser/exceptions.py +6 -2
  12. {mail_parser-4.0.0 → mail_parser-4.1.2}/src/mailparser/utils.py +63 -50
  13. {mail_parser-4.0.0 → mail_parser-4.1.2}/src/mailparser/version.py +1 -4
  14. {mail_parser-4.0.0 → mail_parser-4.1.2}/tests/test_mail_parser.py +95 -123
  15. mail_parser-4.1.2/tests/test_main.py +172 -0
  16. mail_parser-4.0.0/MANIFEST.in +0 -1
  17. mail_parser-4.0.0/src/mailparser/__main__.py +0 -264
  18. mail_parser-4.0.0/src/mailparser/const.py +0 -108
  19. mail_parser-4.0.0/tests/test_main.py +0 -99
  20. {mail_parser-4.0.0 → mail_parser-4.1.2}/LICENSE.txt +0 -0
  21. {mail_parser-4.0.0 → mail_parser-4.1.2}/NOTICE.txt +0 -0
  22. {mail_parser-4.0.0 → mail_parser-4.1.2}/setup.cfg +0 -0
  23. {mail_parser-4.0.0 → mail_parser-4.1.2}/src/mail_parser.egg-info/dependency_links.txt +0 -0
  24. {mail_parser-4.0.0 → mail_parser-4.1.2}/src/mail_parser.egg-info/entry_points.txt +0 -0
  25. {mail_parser-4.0.0 → mail_parser-4.1.2}/src/mail_parser.egg-info/requires.txt +0 -0
  26. {mail_parser-4.0.0 → mail_parser-4.1.2}/src/mail_parser.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mail-parser
3
- Version: 4.0.0
3
+ Version: 4.1.2
4
4
  Summary: Improved wrapper for email standard library
5
5
  Home-page: https://github.com/SpamScope/mail-parser
6
6
  Author: Fedele Mantuano
@@ -37,8 +37,10 @@ Requires-Dist: pytest-cov; extra == "test"
37
37
  Requires-Dist: pytest-mock; extra == "test"
38
38
  Requires-Dist: pytest-ordering; extra == "test"
39
39
 
40
- [![PyPI version](https://badge.fury.io/py/mail-parser.svg)](https://badge.fury.io/py/mail-parser)
40
+ [![PyPI - Version](https://img.shields.io/pypi/v/mail-parser)](https://pypi.org/project/mail-parser/)
41
41
  [![Coverage Status](https://coveralls.io/repos/github/SpamScope/mail-parser/badge.svg?branch=develop)](https://coveralls.io/github/SpamScope/mail-parser?branch=develop)
42
+ [![PyPI - Downloads](https://img.shields.io/pypi/dm/mail-parser?color=blue)](https://pypistats.org/packages/mail-parser)
43
+
42
44
 
43
45
  ![SpamScope](https://raw.githubusercontent.com/SpamScope/spamscope/develop/docs/logo/spamscope.png)
44
46
 
@@ -67,9 +69,16 @@ mail-parser supports Python 3.
67
69
  mail-parser can be downloaded, used, and modified free of charge. It is available under the Apache 2 license.
68
70
 
69
71
  ## Support the project
72
+ If you find this project useful, you can support it by donating any amount you want. All donations are greatly appreciated and help maintain and develop the project.
70
73
 
71
74
  [![Donate](https://www.paypal.com/en_US/i/btn/btn_donateCC_LG.gif "Donate")](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=VEPXYP745KJF2)
72
75
 
76
+ <a href="bitcoin:bc1qxhz3tghztpjqdt7atey68s344wvmugtl55tm32">
77
+ <img src="https://github.com/SpamScope/mail-parser/blob/develop/docs/images/Bitcoin%20SpamScope.jpg?raw=true" alt="Bitcoin" width="200">
78
+ </a>
79
+
80
+ Bitcoin Address: `bc1qxhz3tghztpjqdt7atey68s344wvmugtl55tm32`
81
+
73
82
  # mail-parser on Web
74
83
  - [Splunk app](https://splunkbase.splunk.com/app/4129/)
75
84
  - [FreeBSD port](https://www.freshports.org/mail/py-mail-parser/)
@@ -318,7 +327,7 @@ $ pip install -e ".[dev, test]"
318
327
  The second step is to run the tests:
319
328
 
320
329
  ```
321
- $ make test
330
+ $ make unittest
322
331
  ```
323
332
 
324
333
  Then you can try to run the command line tool:
@@ -1,5 +1,7 @@
1
- [![PyPI version](https://badge.fury.io/py/mail-parser.svg)](https://badge.fury.io/py/mail-parser)
1
+ [![PyPI - Version](https://img.shields.io/pypi/v/mail-parser)](https://pypi.org/project/mail-parser/)
2
2
  [![Coverage Status](https://coveralls.io/repos/github/SpamScope/mail-parser/badge.svg?branch=develop)](https://coveralls.io/github/SpamScope/mail-parser?branch=develop)
3
+ [![PyPI - Downloads](https://img.shields.io/pypi/dm/mail-parser?color=blue)](https://pypistats.org/packages/mail-parser)
4
+
3
5
 
4
6
  ![SpamScope](https://raw.githubusercontent.com/SpamScope/spamscope/develop/docs/logo/spamscope.png)
5
7
 
@@ -28,9 +30,16 @@ mail-parser supports Python 3.
28
30
  mail-parser can be downloaded, used, and modified free of charge. It is available under the Apache 2 license.
29
31
 
30
32
  ## Support the project
33
+ If you find this project useful, you can support it by donating any amount you want. All donations are greatly appreciated and help maintain and develop the project.
31
34
 
32
35
  [![Donate](https://www.paypal.com/en_US/i/btn/btn_donateCC_LG.gif "Donate")](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=VEPXYP745KJF2)
33
36
 
37
+ <a href="bitcoin:bc1qxhz3tghztpjqdt7atey68s344wvmugtl55tm32">
38
+ <img src="https://github.com/SpamScope/mail-parser/blob/develop/docs/images/Bitcoin%20SpamScope.jpg?raw=true" alt="Bitcoin" width="200">
39
+ </a>
40
+
41
+ Bitcoin Address: `bc1qxhz3tghztpjqdt7atey68s344wvmugtl55tm32`
42
+
34
43
  # mail-parser on Web
35
44
  - [Splunk app](https://splunkbase.splunk.com/app/4129/)
36
45
  - [FreeBSD port](https://www.freshports.org/mail/py-mail-parser/)
@@ -279,7 +288,7 @@ $ pip install -e ".[dev, test]"
279
288
  The second step is to run the tests:
280
289
 
281
290
  ```
282
- $ make test
291
+ $ make unittest
283
292
  ```
284
293
 
285
294
  Then you can try to run the command line tool:
@@ -288,4 +297,4 @@ Then you can try to run the command line tool:
288
297
  $ mail-parser -f tests/mails/mail_malformed_3 -j
289
298
  ```
290
299
 
291
- If all is ok, you can start to develop.
300
+ If all is ok, you can start to develop.
@@ -1,3 +1,3 @@
1
1
  [build-system]
2
2
  requires = ["setuptools >= 40.6.0", "wheel"]
3
- build-backend = "setuptools.build_meta"
3
+ build-backend = "setuptools.build_meta"
@@ -17,4 +17,4 @@ limitations under the License.
17
17
  import setuptools
18
18
 
19
19
  if __name__ == "__main__":
20
- setuptools.setup()
20
+ setuptools.setup()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mail-parser
3
- Version: 4.0.0
3
+ Version: 4.1.2
4
4
  Summary: Improved wrapper for email standard library
5
5
  Home-page: https://github.com/SpamScope/mail-parser
6
6
  Author: Fedele Mantuano
@@ -37,8 +37,10 @@ Requires-Dist: pytest-cov; extra == "test"
37
37
  Requires-Dist: pytest-mock; extra == "test"
38
38
  Requires-Dist: pytest-ordering; extra == "test"
39
39
 
40
- [![PyPI version](https://badge.fury.io/py/mail-parser.svg)](https://badge.fury.io/py/mail-parser)
40
+ [![PyPI - Version](https://img.shields.io/pypi/v/mail-parser)](https://pypi.org/project/mail-parser/)
41
41
  [![Coverage Status](https://coveralls.io/repos/github/SpamScope/mail-parser/badge.svg?branch=develop)](https://coveralls.io/github/SpamScope/mail-parser?branch=develop)
42
+ [![PyPI - Downloads](https://img.shields.io/pypi/dm/mail-parser?color=blue)](https://pypistats.org/packages/mail-parser)
43
+
42
44
 
43
45
  ![SpamScope](https://raw.githubusercontent.com/SpamScope/spamscope/develop/docs/logo/spamscope.png)
44
46
 
@@ -67,9 +69,16 @@ mail-parser supports Python 3.
67
69
  mail-parser can be downloaded, used, and modified free of charge. It is available under the Apache 2 license.
68
70
 
69
71
  ## Support the project
72
+ If you find this project useful, you can support it by donating any amount you want. All donations are greatly appreciated and help maintain and develop the project.
70
73
 
71
74
  [![Donate](https://www.paypal.com/en_US/i/btn/btn_donateCC_LG.gif "Donate")](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=VEPXYP745KJF2)
72
75
 
76
+ <a href="bitcoin:bc1qxhz3tghztpjqdt7atey68s344wvmugtl55tm32">
77
+ <img src="https://github.com/SpamScope/mail-parser/blob/develop/docs/images/Bitcoin%20SpamScope.jpg?raw=true" alt="Bitcoin" width="200">
78
+ </a>
79
+
80
+ Bitcoin Address: `bc1qxhz3tghztpjqdt7atey68s344wvmugtl55tm32`
81
+
73
82
  # mail-parser on Web
74
83
  - [Splunk app](https://splunkbase.splunk.com/app/4129/)
75
84
  - [FreeBSD port](https://www.freshports.org/mail/py-mail-parser/)
@@ -318,7 +327,7 @@ $ pip install -e ".[dev, test]"
318
327
  The second step is to run the tests:
319
328
 
320
329
  ```
321
- $ make test
330
+ $ make unittest
322
331
  ```
323
332
 
324
333
  Then you can try to run the command line tool:
@@ -1,5 +1,4 @@
1
1
  LICENSE.txt
2
- MANIFEST.in
3
2
  NOTICE.txt
4
3
  README.md
5
4
  pyproject.toml
@@ -14,8 +13,8 @@ src/mail_parser.egg-info/top_level.txt
14
13
  src/mailparser/__init__.py
15
14
  src/mailparser/__main__.py
16
15
  src/mailparser/const.py
16
+ src/mailparser/core.py
17
17
  src/mailparser/exceptions.py
18
- src/mailparser/mailparser.py
19
18
  src/mailparser/utils.py
20
19
  src/mailparser/version.py
21
20
  tests/test_mail_parser.py
@@ -17,13 +17,23 @@ See the License for the specific language governing permissions and
17
17
  limitations under the License.
18
18
  """
19
19
 
20
-
21
- from .mailparser import (
20
+ from mailparser.core import (
22
21
  MailParser,
23
22
  parse_from_bytes,
24
23
  parse_from_file,
25
24
  parse_from_file_msg,
26
25
  parse_from_file_obj,
27
- parse_from_string)
26
+ parse_from_string,
27
+ )
28
+
29
+ from mailparser.utils import get_header
28
30
 
29
- from .utils import get_header
31
+ __all__ = [
32
+ "MailParser",
33
+ "parse_from_bytes",
34
+ "parse_from_file",
35
+ "parse_from_file_msg",
36
+ "parse_from_file_obj",
37
+ "parse_from_string",
38
+ "get_header",
39
+ ]
@@ -0,0 +1,348 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Copyright 2016 Fedele Mantuano (https://twitter.com/fedelemantuano)
6
+
7
+ Licensed under the Apache License, Version 2.0 (the "License");
8
+ you may not use this file except in compliance with the License.
9
+ You may obtain a copy of the License at
10
+
11
+ http://www.apache.org/licenses/LICENSE-2.0
12
+
13
+ Unless required by applicable law or agreed to in writing, software
14
+ distributed under the License is distributed on an "AS IS" BASIS,
15
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ See the License for the specific language governing permissions and
17
+ limitations under the License.
18
+ """
19
+
20
+ import argparse
21
+ import logging
22
+ import sys
23
+
24
+ import mailparser
25
+ from mailparser.exceptions import MailParserOutlookError
26
+ from mailparser.utils import (
27
+ custom_log,
28
+ print_attachments,
29
+ print_mail_fingerprints,
30
+ safe_print,
31
+ write_attachments,
32
+ )
33
+ from mailparser.version import __version__
34
+
35
+
36
+ log = logging.getLogger("mailparser")
37
+
38
+
39
+ def get_args():
40
+ """
41
+ Get arguments from command line.
42
+ :return: argparse.ArgumentParser
43
+ :rtype: argparse.ArgumentParser
44
+ """
45
+ parser = argparse.ArgumentParser(
46
+ description="Wrapper for email Python Standard Library",
47
+ epilog="It takes as input a raw mail and generates a parsed object.",
48
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
49
+ )
50
+
51
+ parsing_group = parser.add_mutually_exclusive_group(required=True)
52
+ parsing_group.add_argument("-f", "--file", dest="file", help="Raw email file")
53
+ parsing_group.add_argument("-s", "--string", dest="string", help="Raw email string")
54
+ parsing_group.add_argument(
55
+ "-k",
56
+ "--stdin",
57
+ dest="stdin",
58
+ action="store_true",
59
+ help="Enable parsing from stdin",
60
+ )
61
+
62
+ parser.add_argument(
63
+ "-l",
64
+ "--log-level",
65
+ dest="log_level",
66
+ default="WARNING",
67
+ choices=["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"],
68
+ help="Set log level",
69
+ )
70
+
71
+ parser.add_argument(
72
+ "-j",
73
+ "--json",
74
+ dest="json",
75
+ action="store_true",
76
+ help="Show the JSON of parsed mail",
77
+ )
78
+
79
+ parser.add_argument(
80
+ "-b", "--body", dest="body", action="store_true", help="Print the body of mail"
81
+ )
82
+
83
+ parser.add_argument(
84
+ "-a",
85
+ "--attachments",
86
+ dest="attachments",
87
+ action="store_true",
88
+ help="Print the attachments of mail",
89
+ )
90
+
91
+ parser.add_argument(
92
+ "-r",
93
+ "--headers",
94
+ dest="headers",
95
+ action="store_true",
96
+ help="Print the headers of mail",
97
+ )
98
+
99
+ parser.add_argument(
100
+ "-t", "--to", dest="to", action="store_true", help="Print the to of mail"
101
+ )
102
+
103
+ parser.add_argument(
104
+ "-dt",
105
+ "--delivered-to",
106
+ dest="delivered_to",
107
+ action="store_true",
108
+ help="Print the delivered-to of mail",
109
+ )
110
+
111
+ parser.add_argument(
112
+ "-m", "--from", dest="from_", action="store_true", help="Print the from of mail"
113
+ )
114
+
115
+ parser.add_argument(
116
+ "-u",
117
+ "--subject",
118
+ dest="subject",
119
+ action="store_true",
120
+ help="Print the subject of mail",
121
+ )
122
+
123
+ parser.add_argument(
124
+ "-c",
125
+ "--receiveds",
126
+ dest="receiveds",
127
+ action="store_true",
128
+ help="Print all receiveds of mail",
129
+ )
130
+
131
+ parser.add_argument(
132
+ "-d",
133
+ "--defects",
134
+ dest="defects",
135
+ action="store_true",
136
+ help="Print the defects of mail",
137
+ )
138
+
139
+ parser.add_argument(
140
+ "-o",
141
+ "--outlook",
142
+ dest="outlook",
143
+ action="store_true",
144
+ help="Analyze Outlook msg",
145
+ )
146
+
147
+ parser.add_argument(
148
+ "-i",
149
+ "--senderip",
150
+ dest="senderip",
151
+ metavar="Trust mail server string",
152
+ help="Extract a reliable sender IP address heuristically",
153
+ )
154
+
155
+ parser.add_argument(
156
+ "-p",
157
+ "--mail-hash",
158
+ dest="mail_hash",
159
+ action="store_true",
160
+ help="Print mail fingerprints without headers",
161
+ )
162
+
163
+ parser.add_argument(
164
+ "-z",
165
+ "--attachments-hash",
166
+ dest="attachments_hash",
167
+ action="store_true",
168
+ help="Print attachments with fingerprints",
169
+ )
170
+
171
+ parser.add_argument(
172
+ "-sa",
173
+ "--store-attachments",
174
+ dest="store_attachments",
175
+ action="store_true",
176
+ help="Store attachments on disk",
177
+ )
178
+
179
+ parser.add_argument(
180
+ "-ap",
181
+ "--attachments-path",
182
+ dest="attachments_path",
183
+ default="/tmp",
184
+ help="Path where store attachments",
185
+ )
186
+
187
+ parser.add_argument(
188
+ "-v", "--version", action="version", version="%(prog)s {}".format(__version__)
189
+ )
190
+
191
+ return parser
192
+
193
+
194
+ def main():
195
+ """
196
+ Main function.
197
+ """
198
+ args = get_args().parse_args()
199
+ log = custom_log(level=args.log_level, name="mailparser")
200
+
201
+ try:
202
+ parser = get_parser(args)
203
+ process_output(args, parser)
204
+ except Exception as e:
205
+ log.error(f"An error occurred: {e}")
206
+ sys.exit(1)
207
+
208
+
209
+ def get_parser(args):
210
+ """
211
+ Get the correct parser based on the input source.
212
+ :param args: argparse.Namespace
213
+ :type args: argparse.Namespace
214
+ :return: MailParser
215
+ :rtype: mailparser.core.MailParser
216
+ """
217
+ if args.file:
218
+ return parse_file(args)
219
+ elif args.string:
220
+ log.debug("Start analysis by string mail")
221
+ return mailparser.parse_from_string(args.string)
222
+ elif args.stdin:
223
+ return parse_stdin(args)
224
+ else:
225
+ raise ValueError("No input source provided")
226
+
227
+
228
+ def parse_file(args):
229
+ """
230
+ Parse the file based on the arguments provided.
231
+ :param args: argparse.Namespace
232
+ :type args: argparse.Namespace
233
+ :return: MailParser
234
+ :rtype: mailparser.core.MailParser
235
+ """
236
+ log.debug("Start analysis by file mail")
237
+ if args.outlook:
238
+ log.debug("Start analysis by Outlook msg")
239
+ return mailparser.parse_from_file_msg(args.file)
240
+ else:
241
+ log.debug("Start analysis by raw mail")
242
+ return mailparser.parse_from_file(args.file)
243
+
244
+
245
+ def parse_stdin(args):
246
+ """
247
+ Parse the stdin based on the arguments provided.
248
+ :param args: argparse.Namespace
249
+ :type args: argparse.Namespace
250
+ :return: MailParser
251
+ :rtype: mailparser.core.MailParser
252
+ """
253
+ log.debug("Start analysis by stdin mail")
254
+ if args.outlook:
255
+ raise MailParserOutlookError("You can't use stdin with msg Outlook")
256
+ return mailparser.parse_from_file_obj(sys.stdin)
257
+
258
+
259
+ def process_output(args, parser):
260
+ """
261
+ Process the output based on the arguments provided.
262
+ :param args: argparse.Namespace
263
+ :type args: argparse.Namespace
264
+ :param parser: MailParser
265
+ :type parser: mailparser.core.MailParser
266
+ :param log: logger
267
+ :type log: logging.Logger
268
+ """
269
+ if args.json:
270
+ safe_print(parser.mail_json)
271
+
272
+ if args.body:
273
+ safe_print(parser.body)
274
+
275
+ if args.headers:
276
+ safe_print(parser.headers_json)
277
+
278
+ if args.to:
279
+ safe_print(parser.to_json)
280
+
281
+ if args.delivered_to:
282
+ safe_print(parser.delivered_to_json)
283
+
284
+ if args.from_:
285
+ safe_print(parser.from_json)
286
+
287
+ if args.subject:
288
+ safe_print(parser.subject)
289
+
290
+ if args.receiveds:
291
+ safe_print(parser.received_json)
292
+
293
+ if args.defects:
294
+ print_defects(parser)
295
+
296
+ if args.senderip:
297
+ print_sender_ip(parser, args)
298
+
299
+ if args.attachments or args.attachments_hash:
300
+ print_attachments_details(parser, args)
301
+
302
+ if args.mail_hash:
303
+ log.debug("Printing also mail fingerprints")
304
+ print_mail_fingerprints(parser.body.encode("utf-8"))
305
+
306
+ if args.store_attachments:
307
+ log.debug("Store attachments on disk")
308
+ write_attachments(parser.attachments, args.attachments_path)
309
+
310
+
311
+ def print_defects(parser):
312
+ """
313
+ Print email defects.
314
+ :param parser: MailParser
315
+ :type parser: mailparser.core.MailParser
316
+ """
317
+ log.debug("Printing defects")
318
+ for defect in parser.defects_categories:
319
+ safe_print(defect)
320
+
321
+
322
+ def print_sender_ip(parser, args):
323
+ """
324
+ Print sender IP address.
325
+ :param parser: MailParser
326
+ :type parser: mailparser.core.MailParser
327
+ :param args: argparse.Namespace
328
+ :type args: argparse.Namespace
329
+ """
330
+ log.debug("Printing sender IP")
331
+ sender_ip = parser.get_server_ipaddress(args.senderip)
332
+ safe_print(sender_ip if sender_ip else "Not Found")
333
+
334
+
335
+ def print_attachments_details(parser, args):
336
+ """
337
+ Print attachments details.
338
+ :param parser: MailParser
339
+ :type parser: mailparser.core.MailParser
340
+ :param args: argparse.Namespace
341
+ :type args: argparse.Namespace
342
+ """
343
+ log.debug("Printing attachments details")
344
+ print_attachments(parser.attachments, args.attachments_hash)
345
+
346
+
347
+ if __name__ == "__main__": # pragma: no cover
348
+ main()
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Copyright 2018 Fedele Mantuano (https://twitter.com/fedelemantuano)
6
+
7
+ Licensed under the Apache License, Version 2.0 (the "License");
8
+ you may not use this file except in compliance with the License.
9
+ You may obtain a copy of the License at
10
+
11
+ http://www.apache.org/licenses/LICENSE-2.0
12
+
13
+ Unless required by applicable law or agreed to in writing, software
14
+ distributed under the License is distributed on an "AS IS" BASIS,
15
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ See the License for the specific language governing permissions and
17
+ limitations under the License.
18
+ """
19
+
20
+ import re
21
+
22
+
23
+ REGXIP = re.compile(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}")
24
+
25
+ JUNK_PATTERN = r"[ \(\)\[\]\t\n]+"
26
+
27
+ # Patterns for receiveds
28
+ RECEIVED_PATTERNS = [
29
+ # each pattern handles matching a single clause
30
+ # need to exclude withs followed by cipher (e.g., google); (?! cipher)
31
+ # TODO: ideally would do negative matching for with in parens
32
+ # need the beginning or space to differentiate from envelope-from
33
+ (
34
+ r"(?:(?:^|\s)from\s+(?P<from>.+?)(?:\s*[(]?"
35
+ r"envelope-from|\s*[(]?envelope-sender|\s+"
36
+ r"by|\s+with(?! cipher)|\s+id|\s+for|\s+via|;))"
37
+ ),
38
+ # need to make sure envelope-from comes before from to prevent mismatches
39
+ # envelope-from and -sender seem to optionally have space and/or
40
+ # ( before them other clauses must have whitespace before
41
+ (
42
+ r"(?:[^-]by\s+(?P<by>.+?)(?:\s*[(]?envelope-from|\s*"
43
+ r"[(]?envelope-sender|\s+from|\s+with"
44
+ r"(?! cipher)|\s+id|\s+for|\s+via|;))"
45
+ ),
46
+ (
47
+ r"(?:with(?! cipher)\s+(?P<with>.+?)(?:\s*[(]?envelope-from|\s*[(]?"
48
+ r"envelope-sender|\s+from|\s+by|\s+id|\s+for|\s+via|;))"
49
+ ),
50
+ (
51
+ r"[^\w](?:id\s+(?P<id>.+?)(?:\s*[(]?envelope-from|\s*"
52
+ r"[(]?envelope-sender|\s+from|\s+by|\s+with"
53
+ r"(?! cipher)|\s+for|\s+via|;))"
54
+ ),
55
+ (
56
+ r"(?:for\s+(?P<for>.+?)(?:\s*[(]?envelope-from|\s*[(]?"
57
+ r"envelope-sender|\s+from|\s+by|\s+with"
58
+ r"(?! cipher)|\s+id|\s+via|;))"
59
+ ),
60
+ (
61
+ r"(?:via\s+(?P<via>.+?)(?:\s*[(]?"
62
+ r"envelope-from|\s*[(]?envelope-sender|\s+"
63
+ r"from|\s+by|\s+id|\s+for|\s+with(?! cipher)|;))"
64
+ ),
65
+ # assumes emails are always inside <>
66
+ r"(?:envelope-from\s+<(?P<envelope_from>.+?)>)",
67
+ r"(?:envelope-sender\s+<(?P<envelope_sender>.+?)>)",
68
+ # datetime comes after ; at the end
69
+ r";\s*(?P<date>.*)",
70
+ # sendgrid datetime
71
+ (
72
+ r"(?P<date>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:"
73
+ r"\d{2}\.\d{9} \+0000 UTC) m=\+\d+\.\d+"
74
+ ),
75
+ ]
76
+
77
+ RECEIVED_COMPILED_LIST = [re.compile(i, re.I | re.DOTALL) for i in RECEIVED_PATTERNS]
78
+
79
+ EPILOGUE_DEFECTS = {"StartBoundaryNotFoundDefect"}
80
+
81
+ ADDRESSES_HEADERS = set(["bcc", "cc", "delivered-to", "from", "reply-to", "to"])
82
+
83
+ # These parts are always returned
84
+ OTHERS_PARTS = set(
85
+ [
86
+ "attachments",
87
+ "body",
88
+ "date",
89
+ "message-id",
90
+ "received",
91
+ "subject",
92
+ "timezone",
93
+ "to_domains",
94
+ "user-agent",
95
+ "x-mailer",
96
+ "x-original-to",
97
+ ]
98
+ )