django-codenerix-email 4.0.28__py2.py3-none-any.whl → 4.0.30__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,629 @@
1
+ import re
2
+
3
+ import logging
4
+
5
+ from django.conf import settings
6
+ from django.core.management.base import BaseCommand, CommandError
7
+ from zoneinfo import ZoneInfo
8
+
9
+ from email import message_from_bytes
10
+ from email.header import decode_header
11
+ from email.message import Message
12
+ from email.parser import HeaderParser
13
+ from typing import Optional
14
+
15
+ from codenerix_email.models import ( # type: ignore
16
+ EmailMessage,
17
+ EmailReceived,
18
+ BOUNCE_SOFT,
19
+ BOUNCE_HARD,
20
+ )
21
+
22
+
23
+ # Silence DEBUG logs from imapclient
24
+ logging.getLogger("imapclient").setLevel(logging.WARNING)
25
+
26
+ import imaplib # noqa: E402
27
+ from imapclient import IMAPClient # type: ignore # noqa: E402
28
+ from imapclient.exceptions import LoginError # type:ignore # noqa: E402
29
+
30
+
31
+ class Command(BaseCommand):
32
+ help = "Fetches new emails from the configured IMAP account."
33
+
34
+ def add_arguments(self, parser):
35
+
36
+ # Named (optional) arguments
37
+ parser.add_argument(
38
+ "--silent",
39
+ action="store_true",
40
+ dest="silent",
41
+ default=False,
42
+ help="Enable silent mode",
43
+ )
44
+
45
+ parser.add_argument(
46
+ "--tracking-id", type=str, help="Tracking ID to filter"
47
+ )
48
+ parser.add_argument("--imap-id", type=str, help="IMAP ID to filter")
49
+ parser.add_argument(
50
+ "--message-id", type=str, help="Message-ID to filter"
51
+ )
52
+ parser.add_argument(
53
+ "--all", action="store_true", help="Process all emails"
54
+ )
55
+ parser.add_argument(
56
+ "--rewrite", action="store_true", help="Rewrite existing"
57
+ )
58
+
59
+ def handle(self, *args, **options):
60
+
61
+ # Get configuration
62
+ self.verbose = not options["silent"]
63
+ self.imap_id = options.get("imap_id")
64
+ self.message_id = options.get("message_id")
65
+ self.tracking_id = options.get("tracking_id")
66
+ self.rewrite = options.get("rewrite", False)
67
+ self.process_all = options.get("all", False)
68
+
69
+ # Show header
70
+ if self.verbose:
71
+ self.stdout.write(
72
+ self.style.SUCCESS("Starting IMAP email synchronization...")
73
+ )
74
+
75
+ # Verify that IMAP settings are configured
76
+ if settings.IMAP_EMAIL_HOST and settings.IMAP_EMAIL_PORT:
77
+
78
+ try:
79
+ # Connect to the IMAP server
80
+ server = IMAPClient(
81
+ settings.IMAP_EMAIL_HOST,
82
+ port=settings.IMAP_EMAIL_PORT,
83
+ ssl=settings.IMAP_EMAIL_SSL,
84
+ )
85
+ except Exception as e:
86
+ raise CommandError(
87
+ f"Failed to connect to IMAP server ("
88
+ f"host={settings.IMAP_EMAIL_HOST}, "
89
+ f"port={settings.IMAP_EMAIL_PORT}, "
90
+ f"ssl={settings.IMAP_EMAIL_SSL and 'yes' or 'no'}"
91
+ f"): {e}"
92
+ ) from e
93
+
94
+ try:
95
+
96
+ # Login and select the inbox
97
+ try:
98
+ server.login(
99
+ settings.IMAP_EMAIL_USER, settings.IMAP_EMAIL_PASSWORD
100
+ )
101
+ except LoginError as e:
102
+ raise CommandError(
103
+ f"Failed to login to IMAP server with user "
104
+ f"'{settings.IMAP_EMAIL_USER}': {e}"
105
+ ) from e
106
+
107
+ try:
108
+ server.select_folder(
109
+ settings.IMAP_EMAIL_INBOX_FOLDER, readonly=False
110
+ )
111
+ except imaplib.IMAP4.error:
112
+ raise CommandError(
113
+ f"Failed to select inbox folder "
114
+ f"'{settings.IMAP_EMAIL_INBOX_FOLDER}'"
115
+ )
116
+
117
+ # Process emails
118
+ (created_count, overwritten_count) = self.process(server)
119
+ count = created_count + overwritten_count
120
+
121
+ # Show summary
122
+ if self.verbose:
123
+ self.stdout.write(
124
+ self.style.SUCCESS(
125
+ f"Successfully synchronized {count} emails "
126
+ f"(new: {created_count}, "
127
+ f"overwritten: {overwritten_count})"
128
+ )
129
+ )
130
+
131
+ except Exception as e:
132
+ raise
133
+ self.stderr.write(
134
+ self.style.ERROR(
135
+ f"An error occurred during synchronization: {e}"
136
+ )
137
+ )
138
+
139
+ finally:
140
+
141
+ # Logout from the server
142
+ try:
143
+ server.logout()
144
+ except Exception:
145
+ pass
146
+
147
+ else:
148
+ raise CommandError(
149
+ "IMAP settings not configured. Please set IMAP_EMAIL_HOST "
150
+ "and IMAP_EMAIL_PORT in settings."
151
+ )
152
+
153
+ def process(self, server):
154
+ """
155
+ Connects to the IMAP server and fetches new emails,
156
+ saving them as ReceivedEmail objects.
157
+ """
158
+
159
+ # Processed emails count
160
+ created_count = 0
161
+ overwrite_count = 0
162
+
163
+ # Look up for emails
164
+ if self.imap_id:
165
+ # Search by specific IMAP ID
166
+ try:
167
+ imap_id = int(self.imap_id)
168
+ except ValueError:
169
+ raise CommandError(
170
+ f"Invalid IMAP ID '{self.imap_id}'. Must be an integer."
171
+ )
172
+ messages_ids = [imap_id]
173
+ if self.verbose:
174
+ self.stdout.write(
175
+ self.style.SUCCESS(
176
+ f"Processing email with IMAP ID {self.imap_id}."
177
+ )
178
+ )
179
+
180
+ elif self.message_id:
181
+ # Search by specific Message-ID
182
+ messages_ids = server.search(
183
+ ["HEADER", "Message-ID", self.message_id]
184
+ )
185
+ if self.verbose:
186
+ self.stdout.write(
187
+ self.style.SUCCESS(
188
+ f"Found {len(messages_ids)} email(s) with "
189
+ f"Message-ID {self.message_id}."
190
+ )
191
+ )
192
+
193
+ elif self.process_all:
194
+ # Process all emails
195
+ messages_ids = server.search(["ALL"])
196
+ if self.verbose:
197
+ self.stdout.write(
198
+ self.style.SUCCESS(
199
+ f"Found {len(messages_ids)} email(s) to process."
200
+ )
201
+ )
202
+
203
+ else:
204
+ # Search by UNSEEN
205
+ messages_ids = server.search(["UNSEEN"])
206
+ if self.verbose:
207
+ self.stdout.write(
208
+ self.style.SUCCESS(
209
+ f"Found {len(messages_ids)} new email(s) to process."
210
+ )
211
+ )
212
+
213
+ # If there are new messages, fetch and process them
214
+ if messages_ids:
215
+
216
+ # Fetch the full message and internal date
217
+ fetched_data = server.fetch(
218
+ messages_ids, ["BODY.PEEK[]", "INTERNALDATE"]
219
+ )
220
+
221
+ # Get the envelope (metadata) and the full body
222
+ # Use IMAP IDs so identifiers do not change between sessions
223
+ for imap_id, message_data in fetched_data.items():
224
+
225
+ # Filter out by IMAP ID if specified
226
+ if self.imap_id and str(imap_id) != self.imap_id:
227
+ continue
228
+
229
+ # Get the raw email and internal date
230
+ raw_email = message_data[b"BODY[]"]
231
+ internal_date_naive = message_data[b"INTERNALDATE"]
232
+ internal_date = internal_date_naive.replace(
233
+ tzinfo=ZoneInfo(settings.TIME_ZONE)
234
+ )
235
+
236
+ # Parse the email
237
+ msg = message_from_bytes(raw_email)
238
+
239
+ # Extract subject, efrom, eto & eid
240
+ subject, encoding = decode_header(msg["Subject"])[0]
241
+ if isinstance(subject, bytes):
242
+ subject = subject.decode(encoding or "utf-8")
243
+ efrom = msg.get("From")
244
+ eto = msg.get("To")
245
+ eid = msg.get("Message-ID")
246
+
247
+ # If we can't get a Message-ID, use the IMAP ID as fallback
248
+ # to avoid duplicates
249
+ if not eid:
250
+ eid = f"<imapid-{imap_id}@{settings.IMAP_EMAIL_HOST}>"
251
+
252
+ # Avoid processing duplicates
253
+ email_received = EmailReceived.objects.filter(eid=eid).first()
254
+ if self.rewrite or not email_received:
255
+
256
+ # Process multipart emails
257
+ body_plain = ""
258
+ body_html = ""
259
+ if msg.is_multipart():
260
+ for part in msg.walk():
261
+ content_type = part.get_content_type()
262
+ if content_type == "text/plain" and not body_plain:
263
+ body_plain = part.get_payload(
264
+ decode=True
265
+ ).decode(
266
+ part.get_content_charset() or "utf-8",
267
+ "ignore",
268
+ )
269
+ elif content_type == "text/html" and not body_html:
270
+ body_html = part.get_payload(
271
+ decode=True
272
+ ).decode(
273
+ part.get_content_charset() or "utf-8",
274
+ "ignore",
275
+ )
276
+ else:
277
+ body_plain = msg.get_payload(decode=True).decode(
278
+ msg.get_content_charset() or "utf-8",
279
+ "ignore",
280
+ )
281
+
282
+ # Logic to associate replies/bounces with sent emails
283
+ try:
284
+ email_message = None
285
+
286
+ # Locate the tracking ID
287
+ tracking_id = self.find_tracking_id(msg)
288
+
289
+ # Filter out by tracking ID if specified
290
+ if (
291
+ self.tracking_id
292
+ and tracking_id != self.tracking_id
293
+ ):
294
+ continue
295
+
296
+ # If found, try to link to the sent email
297
+ if tracking_id:
298
+ try:
299
+ email_message = EmailMessage.objects.get(
300
+ uuid=tracking_id
301
+ )
302
+ except EmailMessage.DoesNotExist:
303
+ email_message = None
304
+ if self.verbose:
305
+ self.stdout.write(
306
+ self.style.WARNING(
307
+ f"Tracking ID {tracking_id} found "
308
+ "but no matching sent email."
309
+ )
310
+ )
311
+
312
+ except Exception as e:
313
+ raise CommandError(
314
+ "Error while linking email with IMAP ID "
315
+ f"{imap_id} to sent email: {e}"
316
+ ) from e
317
+
318
+ # Heuristic keywords commonly found in bounce messages
319
+ (bounce_type, bounce_reason) = self.analyze_bounce(msg)
320
+
321
+ # Extract all headers into a dictionary
322
+ headers = {}
323
+ for header, value in msg.items():
324
+ decoded_value, encoding = decode_header(value)[0]
325
+ if isinstance(decoded_value, bytes):
326
+ decoded_value = decoded_value.decode(
327
+ encoding or "utf-8", "ignore"
328
+ )
329
+ headers[header] = decoded_value
330
+
331
+ # Create EmailReceived object if doesn't exist
332
+ if not email_received:
333
+ overwriting = False
334
+ email_received = EmailReceived()
335
+ else:
336
+ overwriting = True
337
+
338
+ # Populate fields
339
+ email_received.imap_id = imap_id
340
+ email_received.eid = eid
341
+ email_received.efrom = efrom
342
+ email_received.eto = eto
343
+ email_received.subject = subject
344
+ email_received.headers = headers
345
+ email_received.body_text = body_plain
346
+ email_received.body_html = body_html
347
+ email_received.date_received = internal_date
348
+ email_received.email = email_message
349
+ email_received.bounce_type = bounce_type
350
+ email_received.bounce_reason = bounce_reason
351
+
352
+ # Save the received email
353
+ email_received.save()
354
+
355
+ # Count created or overwritten
356
+ if overwriting:
357
+ overwrite_count += 1
358
+ verb = "Overwritten"
359
+ else:
360
+ created_count += 1
361
+ verb = "Created"
362
+
363
+ if self.verbose:
364
+ msg = (
365
+ f"{verb} email with IMAP ID: "
366
+ f"{imap_id} (link={tracking_id})"
367
+ )
368
+ if bounce_type:
369
+ bounce_type_str = (
370
+ bounce_type == BOUNCE_HARD and "Hard" or "Soft"
371
+ )
372
+ bounce_reason_str = bounce_reason or "Unknown"
373
+ self.stdout.write(
374
+ self.style.WARNING(
375
+ f"{msg} "
376
+ f"[{bounce_type_str} bounce, "
377
+ f"reason={bounce_reason_str}]"
378
+ )
379
+ )
380
+ else:
381
+ self.stdout.write(self.style.SUCCESS(msg))
382
+
383
+ else:
384
+ if self.verbose:
385
+ self.stdout.write(
386
+ self.style.WARNING(
387
+ f"Skipping email with IMAP ID: {imap_id} (DUP)"
388
+ )
389
+ )
390
+
391
+ # Mark the message as read
392
+ # (flag \Seen) avoid reprocessing
393
+ server.add_flags(imap_id, [b"\\Seen"])
394
+
395
+ return (created_count, overwrite_count)
396
+
397
+ def find_tracking_id(self, msg: Message) -> str | None:
398
+ """
399
+ Searches for the X-Codenerix-Tracking-ID robustly in an email.
400
+
401
+ It performs the search in three steps:
402
+ 1. In the main headers of the email.
403
+ 2. In the attached parts that are a complete email (message/rfc822).
404
+ 3. As a last resort, searches the text in the body of the message.
405
+ """
406
+
407
+ # Method 1: Search in main headers (for direct replies)
408
+ tracking_id = msg.get("X-Codenerix-Tracking-ID", None)
409
+
410
+ # Method 2: Search in attached parts (for bounces and forwards)
411
+ if not tracking_id:
412
+
413
+ # Not found directly in headers
414
+ # Search in attached parts (for bounces and forwards)
415
+ if msg.is_multipart():
416
+
417
+ # Iterate through parts
418
+ for part in msg.walk():
419
+
420
+ # Get the content type of the part
421
+ content_type = part.get_content_type()
422
+
423
+ # We look for an attachment that is itself an email
424
+ if content_type == "message/rfc822":
425
+
426
+ # The payload of this part is the original email
427
+ # The payload is a list of messages, take the first one
428
+ original_msg_payload = part.get_payload()
429
+ if (
430
+ isinstance(original_msg_payload, list)
431
+ and original_msg_payload
432
+ ):
433
+ original_msg = original_msg_payload[0]
434
+ if isinstance(original_msg, Message):
435
+ tracking_id = original_msg.get(
436
+ "X-Codenerix-Tracking-ID"
437
+ )
438
+
439
+ elif content_type == "text/rfc822-headers":
440
+
441
+ # The payload is the raw headers of the original email
442
+ headers_payload = part.get_payload(decode=True)
443
+ if isinstance(headers_payload, bytes):
444
+
445
+ # Decode using the specified charset
446
+ charset = part.get_content_charset() or "utf-8"
447
+ headers_text = headers_payload.decode(
448
+ charset, errors="ignore"
449
+ )
450
+
451
+ # Parse headers text into a Message object
452
+ headers_msg = HeaderParser().parsestr(headers_text)
453
+ tracking_id = headers_msg.get(
454
+ "X-Codenerix-Tracking-ID"
455
+ )
456
+
457
+ # Method 3: Search in the body text (fallback)
458
+ if not tracking_id:
459
+
460
+ # The original email might be quoted as plain text.
461
+ body_text = ""
462
+ if msg.is_multipart():
463
+
464
+ # Concatenate all text/plain parts
465
+ for part in msg.walk():
466
+
467
+ # We only want text/plain parts
468
+ if part.get_content_type() == "text/plain":
469
+
470
+ # Get the decoded payload
471
+ payload = part.get_payload(decode=True)
472
+ if isinstance(payload, bytes):
473
+
474
+ # Decode using the specified charset
475
+ charset = part.get_content_charset() or "utf-8"
476
+ body_text += payload.decode(
477
+ charset, errors="ignore"
478
+ )
479
+ else:
480
+
481
+ # Single part email, check if it's text/plain
482
+ if msg.get_content_type() == "text/plain":
483
+
484
+ # Get the decoded payload
485
+ payload = msg.get_payload(decode=True)
486
+ if isinstance(payload, bytes):
487
+
488
+ # Decode using the specified charset
489
+ charset = msg.get_content_charset() or "utf-8"
490
+ body_text = payload.decode(
491
+ charset, errors="ignore"
492
+ )
493
+
494
+ # If we have body text, search for the header using regex
495
+ if body_text:
496
+
497
+ # We use a regex to find the header in the text
498
+ match = re.search(
499
+ r"X-Codenerix-Tracking-ID:\s*([a-fA-F0-9\-]{36})",
500
+ body_text,
501
+ )
502
+
503
+ if match:
504
+ # If found, extract the tracking ID
505
+ tracking_id = match.group(1).strip()
506
+
507
+ # Return the found tracking ID if any
508
+ return tracking_id
509
+
510
+ def analyze_bounce(
511
+ self, msg: Message
512
+ ) -> tuple[Optional[str], Optional[str]]:
513
+ """
514
+ Analyzes an email to determine if it is a bounce and of what type.
515
+
516
+ Returns:
517
+ A tuple (bounce_type, smtp_code).
518
+ - bounce_type: BOUNCE_HARD, BOUNCE_SOFT, or None if not a bounce.
519
+ - bounce_reason: the SMTP status code (e.g., '5.1.1') or None.
520
+ """
521
+
522
+ # Initialize
523
+ bounce_type: Optional[str] = None
524
+ bounce_reason: Optional[str] = None
525
+
526
+ # Method 1: Look for DSN reports
527
+ if (
528
+ msg.get_content_type() == "multipart/report"
529
+ and msg.get_param("report-type") == "delivery-status"
530
+ ):
531
+
532
+ # Iterate through parts to find the delivery-status part
533
+ for part in msg.walk():
534
+
535
+ # We look for the delivery-status part
536
+ if part.get_content_type() == "message/delivery-status":
537
+
538
+ # The payload is a list of headers
539
+ payload = part.get_payload()
540
+ if payload and isinstance(payload, list):
541
+
542
+ # The first part contains the status headers
543
+ status_headers = payload[0]
544
+ if isinstance(status_headers, Message):
545
+
546
+ # Extract Action and Status headers
547
+ action = status_headers.get("Action", "").lower()
548
+ status_code = status_headers.get("Status", "")
549
+
550
+ # Check if action indicates failure
551
+ if action == "failed":
552
+
553
+ # Determine Hard/Soft by SMTP code (RFC3463)
554
+ if status_code.startswith("5."):
555
+ # 5.x.x: permanent failure (hard)
556
+ bounce_type = BOUNCE_HARD
557
+ bounce_reason = status_code
558
+ break
559
+ elif status_code.startswith("4."):
560
+ # 4.x.x: temporary failure (soft)
561
+ bounce_type = BOUNCE_SOFT
562
+ bounce_reason = status_code
563
+ break
564
+ else:
565
+ # Unknown status, assume hard bounce
566
+ bounce_type = BOUNCE_HARD
567
+ bounce_reason = status_code or "Unknown"
568
+ break
569
+
570
+ # Method 2: Some mail servers include headers indicating a bounce
571
+ if not bounce_type:
572
+ if msg.get("X-Failed-Recipients"):
573
+
574
+ # Presence of this header usually indicates a hard bounce
575
+ bounce_type = BOUNCE_HARD
576
+ bounce_reason = "Unknown (X-Failed-Recipients)"
577
+
578
+ else:
579
+
580
+ # Check for Auto-Submitted header
581
+ if msg.get("Auto-Submitted", "").lower() in (
582
+ "auto-replied",
583
+ "auto-generated",
584
+ ):
585
+
586
+ # It could be a bounce, but also an "Out of Office",
587
+ # so we combine it with a keyword search.
588
+ subject = msg.get("Subject", "").lower()
589
+ bounce_keywords = [
590
+ "undeliverable",
591
+ "delivery failed",
592
+ "failure notice",
593
+ ]
594
+
595
+ # If we find bounce keywords in the subject
596
+ if any(keyword in subject for keyword in bounce_keywords):
597
+
598
+ # Assume is a hard bounce
599
+ bounce_type = BOUNCE_HARD
600
+ bounce_reason = "Unknown (Auto-Submitted + Keyword)"
601
+
602
+ # Method 3: keyword search (less reliable)
603
+ if not bounce_type:
604
+
605
+ # We look for common bounce keywords in the From or Subject headers
606
+ # We avoid false positives by requiring specific keywords.
607
+ from_header = msg.get("From", "").lower()
608
+ subject_header = msg.get("Subject", "").lower()
609
+
610
+ if "mailer-daemon@" in from_header or "postmaster@" in from_header:
611
+
612
+ # Common bounce sender addresses
613
+ bounce_type = BOUNCE_HARD
614
+ bounce_reason = "Unknown (From Keyword)"
615
+ else:
616
+
617
+ # Check subject for common bounce keywords
618
+ bounce_keywords = [
619
+ "undelivered",
620
+ "delivery error",
621
+ "mail delivery failed",
622
+ ]
623
+ if any(
624
+ keyword in subject_header for keyword in bounce_keywords
625
+ ):
626
+ bounce_type = BOUNCE_HARD
627
+ bounce_reason = "Unknown (Subject Keyword)"
628
+
629
+ return (bounce_type, bounce_reason)