django-codenerix-email 4.0.34__py2.py3-none-any.whl → 4.0.35__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codenerix_email/__init__.py +1 -1
- codenerix_email/__pycache__/__init__.cpython-310.pyc +0 -0
- codenerix_email/__pycache__/__init__.cpython-311.pyc +0 -0
- codenerix_email/__pycache__/models.cpython-310.pyc +0 -0
- codenerix_email/__pycache__/models.cpython-311.pyc +0 -0
- codenerix_email/management/commands/__pycache__/send_emails.cpython-311.pyc +0 -0
- codenerix_email/management/commands/__pycache__/test_email.cpython-311.pyc +0 -0
- codenerix_email/management/commands/email_test.py +184 -0
- codenerix_email/management/commands/emails_recv.py +611 -0
- codenerix_email/management/commands/emails_send.py +248 -0
- codenerix_email/management/commands/recv_emails.py +10 -0
- codenerix_email/management/commands/send_emails.py +10 -6
- codenerix_email/management/commands/test_email.py +11 -1
- codenerix_email/models.py +16 -18
- {django_codenerix_email-4.0.34.dist-info → django_codenerix_email-4.0.35.dist-info}/METADATA +1 -1
- {django_codenerix_email-4.0.34.dist-info → django_codenerix_email-4.0.35.dist-info}/RECORD +19 -15
- {django_codenerix_email-4.0.34.dist-info → django_codenerix_email-4.0.35.dist-info}/LICENSE +0 -0
- {django_codenerix_email-4.0.34.dist-info → django_codenerix_email-4.0.35.dist-info}/WHEEL +0 -0
- {django_codenerix_email-4.0.34.dist-info → django_codenerix_email-4.0.35.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,611 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
from django.conf import settings
|
|
7
|
+
from django.core.management.base import BaseCommand, CommandError
|
|
8
|
+
from zoneinfo import ZoneInfo
|
|
9
|
+
|
|
10
|
+
from email import message_from_bytes
|
|
11
|
+
from email.header import decode_header
|
|
12
|
+
from email.message import Message
|
|
13
|
+
from email.parser import HeaderParser
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
from codenerix_email.models import (
|
|
17
|
+
EmailMessage,
|
|
18
|
+
EmailReceived,
|
|
19
|
+
BOUNCE_SOFT,
|
|
20
|
+
BOUNCE_HARD,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Silence DEBUG logs from imapclient
|
|
25
|
+
logging.getLogger("imapclient").setLevel(logging.WARNING)
|
|
26
|
+
|
|
27
|
+
import imaplib # noqa: E402
|
|
28
|
+
from imapclient import IMAPClient # noqa: E402
|
|
29
|
+
from imapclient.exceptions import LoginError # noqa: E402
|
|
30
|
+
|
|
31
|
+
# Deprecation warning
|
|
32
|
+
if not sys.argv[0].startswith("email"):
|
|
33
|
+
import logging
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger("codenerix")
|
|
36
|
+
logger.warning(
|
|
37
|
+
"WARNING: 'recv_emails' is DEPRECATED, switch to 'emails_recv' instead"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class Command(BaseCommand):
|
|
42
|
+
help = "Fetches new emails from the configured IMAP account."
|
|
43
|
+
|
|
44
|
+
def add_arguments(self, parser):
|
|
45
|
+
# Named (optional) arguments
|
|
46
|
+
parser.add_argument(
|
|
47
|
+
"--silent",
|
|
48
|
+
action="store_true",
|
|
49
|
+
dest="silent",
|
|
50
|
+
default=False,
|
|
51
|
+
help="Enable silent mode",
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
parser.add_argument(
|
|
55
|
+
"--tracking-id", type=str, help="Tracking ID to filter"
|
|
56
|
+
)
|
|
57
|
+
parser.add_argument("--imap-id", type=str, help="IMAP ID to filter")
|
|
58
|
+
parser.add_argument(
|
|
59
|
+
"--message-id", type=str, help="Message-ID to filter"
|
|
60
|
+
)
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
"--all", action="store_true", help="Process all emails"
|
|
63
|
+
)
|
|
64
|
+
parser.add_argument(
|
|
65
|
+
"--rewrite", action="store_true", help="Rewrite existing"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
def handle(self, *args, **options):
|
|
69
|
+
# Get configuration
|
|
70
|
+
self.silent = options["silent"]
|
|
71
|
+
self.verbose = not self.silent
|
|
72
|
+
self.imap_id = options.get("imap_id")
|
|
73
|
+
self.message_id = options.get("message_id")
|
|
74
|
+
self.tracking_id = options.get("tracking_id")
|
|
75
|
+
self.rewrite = options.get("rewrite", False)
|
|
76
|
+
self.process_all = options.get("all", False)
|
|
77
|
+
|
|
78
|
+
# Show header
|
|
79
|
+
if self.verbose:
|
|
80
|
+
self.stdout.write(
|
|
81
|
+
self.style.SUCCESS("Starting IMAP email synchronization...")
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Get configuration from settings
|
|
85
|
+
host = getattr(settings, "IMAP_EMAIL_HOST", None)
|
|
86
|
+
port = getattr(settings, "IMAP_EMAIL_PORT", 993)
|
|
87
|
+
user = getattr(settings, "IMAP_EMAIL_USER", None)
|
|
88
|
+
password = getattr(settings, "IMAP_EMAIL_PASSWORD", None)
|
|
89
|
+
ssl = getattr(settings, "IMAP_EMAIL_SSL", True)
|
|
90
|
+
folder = getattr(settings, "IMAP_EMAIL_INBOX_FOLDER", "INBOX")
|
|
91
|
+
|
|
92
|
+
# Verify that IMAP settings are configured
|
|
93
|
+
if host is not None and port:
|
|
94
|
+
# Validate configuration
|
|
95
|
+
if user is None or password is None:
|
|
96
|
+
if self.silent:
|
|
97
|
+
return
|
|
98
|
+
else:
|
|
99
|
+
raise CommandError(
|
|
100
|
+
"IMAP user or password not configured. Please set "
|
|
101
|
+
"IMAP_EMAIL_USER and IMAP_EMAIL_PASSWORD in settings."
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
try:
|
|
105
|
+
# Connect to the IMAP server
|
|
106
|
+
server = IMAPClient(host, port=port, ssl=ssl)
|
|
107
|
+
except Exception as e:
|
|
108
|
+
raise CommandError(
|
|
109
|
+
f"Failed to connect to IMAP server ("
|
|
110
|
+
f"{host=}, "
|
|
111
|
+
f"{port=}, "
|
|
112
|
+
f"ssl={ssl and 'yes' or 'no'}"
|
|
113
|
+
f"): {e}"
|
|
114
|
+
) from e
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
# Login and select the inbox
|
|
118
|
+
try:
|
|
119
|
+
server.login(user, password)
|
|
120
|
+
except LoginError as e:
|
|
121
|
+
raise CommandError(
|
|
122
|
+
f"Failed to login to IMAP server with {user=}: {e}"
|
|
123
|
+
) from e
|
|
124
|
+
|
|
125
|
+
if folder:
|
|
126
|
+
try:
|
|
127
|
+
server.select_folder(folder, readonly=False)
|
|
128
|
+
except imaplib.IMAP4.error:
|
|
129
|
+
raise CommandError(f"Failed to select inbox {folder=}")
|
|
130
|
+
|
|
131
|
+
# Process emails
|
|
132
|
+
(created_count, overwritten_count) = self.process(server)
|
|
133
|
+
count = created_count + overwritten_count
|
|
134
|
+
|
|
135
|
+
# Show summary
|
|
136
|
+
if self.verbose:
|
|
137
|
+
self.stdout.write(
|
|
138
|
+
self.style.SUCCESS(
|
|
139
|
+
f"Successfully synchronized {count} emails "
|
|
140
|
+
f"(new: {created_count}, "
|
|
141
|
+
f"overwritten: {overwritten_count})"
|
|
142
|
+
)
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
except Exception as e:
|
|
146
|
+
raise
|
|
147
|
+
self.stderr.write(
|
|
148
|
+
self.style.ERROR(
|
|
149
|
+
f"An error occurred during synchronization: {e}"
|
|
150
|
+
)
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
finally:
|
|
154
|
+
# Logout from the server
|
|
155
|
+
try:
|
|
156
|
+
server.logout()
|
|
157
|
+
except Exception:
|
|
158
|
+
pass
|
|
159
|
+
|
|
160
|
+
elif self.verbose:
|
|
161
|
+
raise CommandError(
|
|
162
|
+
"IMAP settings not configured. Please set IMAP_EMAIL_HOST "
|
|
163
|
+
"and IMAP_EMAIL_PORT in settings."
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
def process(self, server):
|
|
167
|
+
"""
|
|
168
|
+
Connects to the IMAP server and fetches new emails,
|
|
169
|
+
saving them as ReceivedEmail objects.
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
# Processed emails count
|
|
173
|
+
created_count = 0
|
|
174
|
+
overwrite_count = 0
|
|
175
|
+
|
|
176
|
+
# Look up for emails
|
|
177
|
+
if self.imap_id:
|
|
178
|
+
# Search by specific IMAP ID
|
|
179
|
+
try:
|
|
180
|
+
imap_id = int(self.imap_id)
|
|
181
|
+
except ValueError:
|
|
182
|
+
raise CommandError(
|
|
183
|
+
f"Invalid IMAP ID '{self.imap_id}'. Must be an integer."
|
|
184
|
+
)
|
|
185
|
+
messages_ids = [imap_id]
|
|
186
|
+
if self.verbose:
|
|
187
|
+
self.stdout.write(
|
|
188
|
+
self.style.SUCCESS(
|
|
189
|
+
f"Processing email with IMAP ID {self.imap_id}."
|
|
190
|
+
)
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
elif self.message_id:
|
|
194
|
+
# Search by specific Message-ID
|
|
195
|
+
messages_ids = server.search(
|
|
196
|
+
["HEADER", "Message-ID", self.message_id]
|
|
197
|
+
)
|
|
198
|
+
if self.verbose:
|
|
199
|
+
self.stdout.write(
|
|
200
|
+
self.style.SUCCESS(
|
|
201
|
+
f"Found {len(messages_ids)} email(s) with "
|
|
202
|
+
f"Message-ID {self.message_id}."
|
|
203
|
+
)
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
elif self.process_all:
|
|
207
|
+
# Process all emails
|
|
208
|
+
messages_ids = server.search(["ALL"])
|
|
209
|
+
if self.verbose:
|
|
210
|
+
self.stdout.write(
|
|
211
|
+
self.style.SUCCESS(
|
|
212
|
+
f"Found {len(messages_ids)} email(s) to process."
|
|
213
|
+
)
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
else:
|
|
217
|
+
# Search by UNSEEN
|
|
218
|
+
messages_ids = server.search(["UNSEEN"])
|
|
219
|
+
if self.verbose:
|
|
220
|
+
self.stdout.write(
|
|
221
|
+
self.style.SUCCESS(
|
|
222
|
+
f"Found {len(messages_ids)} new email(s) to process."
|
|
223
|
+
)
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# If there are new messages, fetch and process them
|
|
227
|
+
if messages_ids:
|
|
228
|
+
# Fetch the full message and internal date
|
|
229
|
+
fetched_data = server.fetch(
|
|
230
|
+
messages_ids, ["BODY.PEEK[]", "INTERNALDATE"]
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# Get the envelope (metadata) and the full body
|
|
234
|
+
# Use IMAP IDs so identifiers do not change between sessions
|
|
235
|
+
for imap_id, message_data in fetched_data.items():
|
|
236
|
+
# Filter out by IMAP ID if specified
|
|
237
|
+
if self.imap_id and str(imap_id) != self.imap_id:
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
# Get the raw email and internal date
|
|
241
|
+
raw_email = message_data[b"BODY[]"]
|
|
242
|
+
internal_date_naive = message_data[b"INTERNALDATE"]
|
|
243
|
+
internal_date = internal_date_naive.replace(
|
|
244
|
+
tzinfo=ZoneInfo(settings.TIME_ZONE)
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Parse the email
|
|
248
|
+
msg = message_from_bytes(raw_email)
|
|
249
|
+
|
|
250
|
+
# Extract subject, efrom, eto & eid
|
|
251
|
+
subject, encoding = decode_header(msg["Subject"])[0]
|
|
252
|
+
if isinstance(subject, bytes):
|
|
253
|
+
subject = subject.decode(encoding or "utf-8")
|
|
254
|
+
efrom = msg.get("From")
|
|
255
|
+
eto = msg.get("To")
|
|
256
|
+
eid = msg.get("Message-ID")
|
|
257
|
+
|
|
258
|
+
# If we can't get a Message-ID, use the IMAP ID as fallback
|
|
259
|
+
# to avoid duplicates
|
|
260
|
+
if not eid:
|
|
261
|
+
eid = f"<imapid-{imap_id}@{settings.IMAP_EMAIL_HOST}>"
|
|
262
|
+
|
|
263
|
+
# Avoid processing duplicates
|
|
264
|
+
email_received = EmailReceived.objects.filter(eid=eid).first()
|
|
265
|
+
if self.rewrite or not email_received:
|
|
266
|
+
# Process multipart emails
|
|
267
|
+
body_plain = ""
|
|
268
|
+
body_html = ""
|
|
269
|
+
if msg.is_multipart():
|
|
270
|
+
for part in msg.walk():
|
|
271
|
+
content_type = part.get_content_type()
|
|
272
|
+
if content_type == "text/plain" and not body_plain:
|
|
273
|
+
body_plain = part.get_payload(
|
|
274
|
+
decode=True
|
|
275
|
+
).decode(
|
|
276
|
+
part.get_content_charset() or "utf-8",
|
|
277
|
+
"ignore",
|
|
278
|
+
)
|
|
279
|
+
elif content_type == "text/html" and not body_html:
|
|
280
|
+
body_html = part.get_payload(
|
|
281
|
+
decode=True
|
|
282
|
+
).decode(
|
|
283
|
+
part.get_content_charset() or "utf-8",
|
|
284
|
+
"ignore",
|
|
285
|
+
)
|
|
286
|
+
else:
|
|
287
|
+
body_plain = msg.get_payload(decode=True).decode(
|
|
288
|
+
msg.get_content_charset() or "utf-8",
|
|
289
|
+
"ignore",
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# Logic to associate replies/bounces with sent emails
|
|
293
|
+
try:
|
|
294
|
+
email_message = None
|
|
295
|
+
|
|
296
|
+
# Locate the tracking ID
|
|
297
|
+
tracking_id = self.find_tracking_id(msg)
|
|
298
|
+
|
|
299
|
+
# Filter out by tracking ID if specified
|
|
300
|
+
if (
|
|
301
|
+
self.tracking_id
|
|
302
|
+
and tracking_id != self.tracking_id
|
|
303
|
+
):
|
|
304
|
+
continue
|
|
305
|
+
|
|
306
|
+
# If found, try to link to the sent email
|
|
307
|
+
if tracking_id:
|
|
308
|
+
try:
|
|
309
|
+
email_message = EmailMessage.objects.get(
|
|
310
|
+
uuid=tracking_id
|
|
311
|
+
)
|
|
312
|
+
except EmailMessage.DoesNotExist:
|
|
313
|
+
email_message = None
|
|
314
|
+
if self.verbose:
|
|
315
|
+
self.stdout.write(
|
|
316
|
+
self.style.WARNING(
|
|
317
|
+
f"Tracking ID {tracking_id} found "
|
|
318
|
+
"but no matching sent email."
|
|
319
|
+
)
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
except Exception as e:
|
|
323
|
+
raise CommandError(
|
|
324
|
+
"Error while linking email with IMAP ID "
|
|
325
|
+
f"{imap_id} to sent email: {e}"
|
|
326
|
+
) from e
|
|
327
|
+
|
|
328
|
+
# Heuristic keywords commonly found in bounce messages
|
|
329
|
+
(bounce_type, bounce_reason) = self.analyze_bounce(msg)
|
|
330
|
+
|
|
331
|
+
# Extract all headers into a dictionary
|
|
332
|
+
headers = {}
|
|
333
|
+
for header, value in msg.items():
|
|
334
|
+
decoded_value, encoding = decode_header(value)[0]
|
|
335
|
+
if isinstance(decoded_value, bytes):
|
|
336
|
+
decoded_value = decoded_value.decode(
|
|
337
|
+
encoding or "utf-8", "ignore"
|
|
338
|
+
)
|
|
339
|
+
headers[header] = decoded_value
|
|
340
|
+
|
|
341
|
+
# Create EmailReceived object if doesn't exist
|
|
342
|
+
if not email_received:
|
|
343
|
+
overwriting = False
|
|
344
|
+
email_received = EmailReceived()
|
|
345
|
+
else:
|
|
346
|
+
overwriting = True
|
|
347
|
+
|
|
348
|
+
# Populate fields
|
|
349
|
+
email_received.imap_id = imap_id
|
|
350
|
+
email_received.eid = eid
|
|
351
|
+
email_received.efrom = efrom
|
|
352
|
+
email_received.eto = eto
|
|
353
|
+
email_received.subject = subject
|
|
354
|
+
email_received.headers = headers
|
|
355
|
+
email_received.body_text = body_plain
|
|
356
|
+
email_received.body_html = body_html
|
|
357
|
+
email_received.date_received = internal_date
|
|
358
|
+
email_received.email = email_message
|
|
359
|
+
email_received.bounce_type = bounce_type
|
|
360
|
+
email_received.bounce_reason = bounce_reason
|
|
361
|
+
|
|
362
|
+
# Save the received email
|
|
363
|
+
email_received.save()
|
|
364
|
+
|
|
365
|
+
# Count created or overwritten
|
|
366
|
+
if overwriting:
|
|
367
|
+
overwrite_count += 1
|
|
368
|
+
verb = "Overwritten"
|
|
369
|
+
else:
|
|
370
|
+
created_count += 1
|
|
371
|
+
verb = "Created"
|
|
372
|
+
|
|
373
|
+
if self.verbose:
|
|
374
|
+
msg = (
|
|
375
|
+
f"{verb} email with IMAP ID: "
|
|
376
|
+
f"{imap_id} (link={tracking_id})"
|
|
377
|
+
)
|
|
378
|
+
if bounce_type:
|
|
379
|
+
bounce_type_str = (
|
|
380
|
+
bounce_type == BOUNCE_HARD and "Hard" or "Soft"
|
|
381
|
+
)
|
|
382
|
+
bounce_reason_str = bounce_reason or "Unknown"
|
|
383
|
+
self.stdout.write(
|
|
384
|
+
self.style.WARNING(
|
|
385
|
+
f"{msg} "
|
|
386
|
+
f"[{bounce_type_str} bounce, "
|
|
387
|
+
f"reason={bounce_reason_str}]"
|
|
388
|
+
)
|
|
389
|
+
)
|
|
390
|
+
else:
|
|
391
|
+
self.stdout.write(self.style.SUCCESS(msg))
|
|
392
|
+
|
|
393
|
+
else:
|
|
394
|
+
if self.verbose:
|
|
395
|
+
self.stdout.write(
|
|
396
|
+
self.style.WARNING(
|
|
397
|
+
f"Skipping email with IMAP ID: {imap_id} (DUP)"
|
|
398
|
+
)
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
# Mark the message as read
|
|
402
|
+
# (flag \Seen) avoid reprocessing
|
|
403
|
+
server.add_flags(imap_id, [b"\\Seen"])
|
|
404
|
+
|
|
405
|
+
return (created_count, overwrite_count)
|
|
406
|
+
|
|
407
|
+
def find_tracking_id(self, msg: Message) -> str | None:
|
|
408
|
+
"""
|
|
409
|
+
Searches for the X-Codenerix-Tracking-ID robustly in an email.
|
|
410
|
+
|
|
411
|
+
It performs the search in three steps:
|
|
412
|
+
1. In the main headers of the email.
|
|
413
|
+
2. In the attached parts that are a complete email (message/rfc822).
|
|
414
|
+
3. As a last resort, searches the text in the body of the message.
|
|
415
|
+
"""
|
|
416
|
+
|
|
417
|
+
# Method 1: Search in main headers (for direct replies)
|
|
418
|
+
tracking_id = msg.get("X-Codenerix-Tracking-ID", None)
|
|
419
|
+
|
|
420
|
+
# Method 2: Search in attached parts (for bounces and forwards)
|
|
421
|
+
if not tracking_id:
|
|
422
|
+
# Not found directly in headers
|
|
423
|
+
# Search in attached parts (for bounces and forwards)
|
|
424
|
+
if msg.is_multipart():
|
|
425
|
+
# Iterate through parts
|
|
426
|
+
for part in msg.walk():
|
|
427
|
+
# Get the content type of the part
|
|
428
|
+
content_type = part.get_content_type()
|
|
429
|
+
|
|
430
|
+
# We look for an attachment that is itself an email
|
|
431
|
+
if content_type == "message/rfc822":
|
|
432
|
+
# The payload of this part is the original email
|
|
433
|
+
# The payload is a list of messages, take the first one
|
|
434
|
+
original_msg_payload = part.get_payload()
|
|
435
|
+
if (
|
|
436
|
+
isinstance(original_msg_payload, list)
|
|
437
|
+
and original_msg_payload
|
|
438
|
+
):
|
|
439
|
+
original_msg = original_msg_payload[0]
|
|
440
|
+
if isinstance(original_msg, Message):
|
|
441
|
+
tracking_id = original_msg.get(
|
|
442
|
+
"X-Codenerix-Tracking-ID"
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
elif content_type == "text/rfc822-headers":
|
|
446
|
+
# The payload is the raw headers of the original email
|
|
447
|
+
headers_payload = part.get_payload(decode=True)
|
|
448
|
+
if isinstance(headers_payload, bytes):
|
|
449
|
+
# Decode using the specified charset
|
|
450
|
+
charset = part.get_content_charset() or "utf-8"
|
|
451
|
+
headers_text = headers_payload.decode(
|
|
452
|
+
charset, errors="ignore"
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
# Parse headers text into a Message object
|
|
456
|
+
headers_msg = HeaderParser().parsestr(headers_text)
|
|
457
|
+
tracking_id = headers_msg.get(
|
|
458
|
+
"X-Codenerix-Tracking-ID"
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
# Method 3: Search in the body text (fallback)
|
|
462
|
+
if not tracking_id:
|
|
463
|
+
# The original email might be quoted as plain text.
|
|
464
|
+
body_text = ""
|
|
465
|
+
if msg.is_multipart():
|
|
466
|
+
# Concatenate all text/plain parts
|
|
467
|
+
for part in msg.walk():
|
|
468
|
+
# We only want text/plain parts
|
|
469
|
+
if part.get_content_type() == "text/plain":
|
|
470
|
+
# Get the decoded payload
|
|
471
|
+
payload = part.get_payload(decode=True)
|
|
472
|
+
if isinstance(payload, bytes):
|
|
473
|
+
# Decode using the specified charset
|
|
474
|
+
charset = part.get_content_charset() or "utf-8"
|
|
475
|
+
body_text += payload.decode(
|
|
476
|
+
charset, errors="ignore"
|
|
477
|
+
)
|
|
478
|
+
else:
|
|
479
|
+
# Single part email, check if it's text/plain
|
|
480
|
+
if msg.get_content_type() == "text/plain":
|
|
481
|
+
# Get the decoded payload
|
|
482
|
+
payload = msg.get_payload(decode=True)
|
|
483
|
+
if isinstance(payload, bytes):
|
|
484
|
+
# Decode using the specified charset
|
|
485
|
+
charset = msg.get_content_charset() or "utf-8"
|
|
486
|
+
body_text = payload.decode(
|
|
487
|
+
charset, errors="ignore"
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
# If we have body text, search for the header using regex
|
|
491
|
+
if body_text:
|
|
492
|
+
# We use a regex to find the header in the text
|
|
493
|
+
match = re.search(
|
|
494
|
+
r"X-Codenerix-Tracking-ID:\s*([a-fA-F0-9\-]{36})",
|
|
495
|
+
body_text,
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
if match:
|
|
499
|
+
# If found, extract the tracking ID
|
|
500
|
+
tracking_id = match.group(1).strip()
|
|
501
|
+
|
|
502
|
+
# Return the found tracking ID if any
|
|
503
|
+
return tracking_id
|
|
504
|
+
|
|
505
|
+
def analyze_bounce(
|
|
506
|
+
self, msg: Message
|
|
507
|
+
) -> tuple[Optional[str], Optional[str]]:
|
|
508
|
+
"""
|
|
509
|
+
Analyzes an email to determine if it is a bounce and of what type.
|
|
510
|
+
|
|
511
|
+
Returns:
|
|
512
|
+
A tuple (bounce_type, smtp_code).
|
|
513
|
+
- bounce_type: BOUNCE_HARD, BOUNCE_SOFT, or None if not a bounce.
|
|
514
|
+
- bounce_reason: the SMTP status code (e.g., '5.1.1') or None.
|
|
515
|
+
"""
|
|
516
|
+
|
|
517
|
+
# Initialize
|
|
518
|
+
bounce_type: Optional[str] = None
|
|
519
|
+
bounce_reason: Optional[str] = None
|
|
520
|
+
|
|
521
|
+
# Method 1: Look for DSN reports
|
|
522
|
+
if (
|
|
523
|
+
msg.get_content_type() == "multipart/report"
|
|
524
|
+
and msg.get_param("report-type") == "delivery-status"
|
|
525
|
+
):
|
|
526
|
+
# Iterate through parts to find the delivery-status part
|
|
527
|
+
for part in msg.walk():
|
|
528
|
+
# We look for the delivery-status part
|
|
529
|
+
if part.get_content_type() == "message/delivery-status":
|
|
530
|
+
# The payload is a list of headers
|
|
531
|
+
payload = part.get_payload()
|
|
532
|
+
if payload and isinstance(payload, list):
|
|
533
|
+
# The first part contains the status headers
|
|
534
|
+
status_headers = payload[0]
|
|
535
|
+
if isinstance(status_headers, Message):
|
|
536
|
+
# Extract Action and Status headers
|
|
537
|
+
action = status_headers.get("Action", "").lower()
|
|
538
|
+
status_code = status_headers.get("Status", "")
|
|
539
|
+
|
|
540
|
+
# Check if action indicates failure
|
|
541
|
+
if action == "failed":
|
|
542
|
+
# Determine Hard/Soft by SMTP code (RFC3463)
|
|
543
|
+
if status_code.startswith("5."):
|
|
544
|
+
# 5.x.x: permanent failure (hard)
|
|
545
|
+
bounce_type = BOUNCE_HARD
|
|
546
|
+
bounce_reason = status_code
|
|
547
|
+
break
|
|
548
|
+
elif status_code.startswith("4."):
|
|
549
|
+
# 4.x.x: temporary failure (soft)
|
|
550
|
+
bounce_type = BOUNCE_SOFT
|
|
551
|
+
bounce_reason = status_code
|
|
552
|
+
break
|
|
553
|
+
else:
|
|
554
|
+
# Unknown status, assume hard bounce
|
|
555
|
+
bounce_type = BOUNCE_HARD
|
|
556
|
+
bounce_reason = status_code or "Unknown"
|
|
557
|
+
break
|
|
558
|
+
|
|
559
|
+
# Method 2: Some mail servers include headers indicating a bounce
|
|
560
|
+
if not bounce_type:
|
|
561
|
+
if msg.get("X-Failed-Recipients"):
|
|
562
|
+
# Presence of this header usually indicates a hard bounce
|
|
563
|
+
bounce_type = BOUNCE_HARD
|
|
564
|
+
bounce_reason = "Unknown (X-Failed-Recipients)"
|
|
565
|
+
|
|
566
|
+
else:
|
|
567
|
+
# Check for Auto-Submitted header
|
|
568
|
+
if msg.get("Auto-Submitted", "").lower() in (
|
|
569
|
+
"auto-replied",
|
|
570
|
+
"auto-generated",
|
|
571
|
+
):
|
|
572
|
+
# It could be a bounce, but also an "Out of Office",
|
|
573
|
+
# so we combine it with a keyword search.
|
|
574
|
+
subject = msg.get("Subject", "").lower()
|
|
575
|
+
bounce_keywords = [
|
|
576
|
+
"undeliverable",
|
|
577
|
+
"delivery failed",
|
|
578
|
+
"failure notice",
|
|
579
|
+
]
|
|
580
|
+
|
|
581
|
+
# If we find bounce keywords in the subject
|
|
582
|
+
if any(keyword in subject for keyword in bounce_keywords):
|
|
583
|
+
# Assume is a hard bounce
|
|
584
|
+
bounce_type = BOUNCE_HARD
|
|
585
|
+
bounce_reason = "Unknown (Auto-Submitted + Keyword)"
|
|
586
|
+
|
|
587
|
+
# Method 3: keyword search (less reliable)
|
|
588
|
+
if not bounce_type:
|
|
589
|
+
# We look for common bounce keywords in the From or Subject headers
|
|
590
|
+
# We avoid false positives by requiring specific keywords.
|
|
591
|
+
from_header = msg.get("From", "").lower()
|
|
592
|
+
subject_header = msg.get("Subject", "").lower()
|
|
593
|
+
|
|
594
|
+
if "mailer-daemon@" in from_header or "postmaster@" in from_header:
|
|
595
|
+
# Common bounce sender addresses
|
|
596
|
+
bounce_type = BOUNCE_HARD
|
|
597
|
+
bounce_reason = "Unknown (From Keyword)"
|
|
598
|
+
else:
|
|
599
|
+
# Check subject for common bounce keywords
|
|
600
|
+
bounce_keywords = [
|
|
601
|
+
"undelivered",
|
|
602
|
+
"delivery error",
|
|
603
|
+
"mail delivery failed",
|
|
604
|
+
]
|
|
605
|
+
if any(
|
|
606
|
+
keyword in subject_header for keyword in bounce_keywords
|
|
607
|
+
):
|
|
608
|
+
bounce_type = BOUNCE_HARD
|
|
609
|
+
bounce_reason = "Unknown (Subject Keyword)"
|
|
610
|
+
|
|
611
|
+
return (bounce_type, bounce_reason)
|