django-codenerix-email 4.0.34__py2.py3-none-any.whl → 4.0.35__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,611 @@
1
+ import sys
2
+ import re
3
+
4
+ import logging
5
+
6
+ from django.conf import settings
7
+ from django.core.management.base import BaseCommand, CommandError
8
+ from zoneinfo import ZoneInfo
9
+
10
+ from email import message_from_bytes
11
+ from email.header import decode_header
12
+ from email.message import Message
13
+ from email.parser import HeaderParser
14
+ from typing import Optional
15
+
16
+ from codenerix_email.models import (
17
+ EmailMessage,
18
+ EmailReceived,
19
+ BOUNCE_SOFT,
20
+ BOUNCE_HARD,
21
+ )
22
+
23
+
24
+ # Silence DEBUG logs from imapclient
25
+ logging.getLogger("imapclient").setLevel(logging.WARNING)
26
+
27
+ import imaplib # noqa: E402
28
+ from imapclient import IMAPClient # noqa: E402
29
+ from imapclient.exceptions import LoginError # noqa: E402
30
+
31
+ # Deprecation warning
32
+ if not sys.argv[0].startswith("email"):
33
+ import logging
34
+
35
+ logger = logging.getLogger("codenerix")
36
+ logger.warning(
37
+ "WARNING: 'recv_emails' is DEPRECATED, switch to 'emails_recv' instead"
38
+ )
39
+
40
+
41
+ class Command(BaseCommand):
42
+ help = "Fetches new emails from the configured IMAP account."
43
+
44
+ def add_arguments(self, parser):
45
+ # Named (optional) arguments
46
+ parser.add_argument(
47
+ "--silent",
48
+ action="store_true",
49
+ dest="silent",
50
+ default=False,
51
+ help="Enable silent mode",
52
+ )
53
+
54
+ parser.add_argument(
55
+ "--tracking-id", type=str, help="Tracking ID to filter"
56
+ )
57
+ parser.add_argument("--imap-id", type=str, help="IMAP ID to filter")
58
+ parser.add_argument(
59
+ "--message-id", type=str, help="Message-ID to filter"
60
+ )
61
+ parser.add_argument(
62
+ "--all", action="store_true", help="Process all emails"
63
+ )
64
+ parser.add_argument(
65
+ "--rewrite", action="store_true", help="Rewrite existing"
66
+ )
67
+
68
+ def handle(self, *args, **options):
69
+ # Get configuration
70
+ self.silent = options["silent"]
71
+ self.verbose = not self.silent
72
+ self.imap_id = options.get("imap_id")
73
+ self.message_id = options.get("message_id")
74
+ self.tracking_id = options.get("tracking_id")
75
+ self.rewrite = options.get("rewrite", False)
76
+ self.process_all = options.get("all", False)
77
+
78
+ # Show header
79
+ if self.verbose:
80
+ self.stdout.write(
81
+ self.style.SUCCESS("Starting IMAP email synchronization...")
82
+ )
83
+
84
+ # Get configuration from settings
85
+ host = getattr(settings, "IMAP_EMAIL_HOST", None)
86
+ port = getattr(settings, "IMAP_EMAIL_PORT", 993)
87
+ user = getattr(settings, "IMAP_EMAIL_USER", None)
88
+ password = getattr(settings, "IMAP_EMAIL_PASSWORD", None)
89
+ ssl = getattr(settings, "IMAP_EMAIL_SSL", True)
90
+ folder = getattr(settings, "IMAP_EMAIL_INBOX_FOLDER", "INBOX")
91
+
92
+ # Verify that IMAP settings are configured
93
+ if host is not None and port:
94
+ # Validate configuration
95
+ if user is None or password is None:
96
+ if self.silent:
97
+ return
98
+ else:
99
+ raise CommandError(
100
+ "IMAP user or password not configured. Please set "
101
+ "IMAP_EMAIL_USER and IMAP_EMAIL_PASSWORD in settings."
102
+ )
103
+
104
+ try:
105
+ # Connect to the IMAP server
106
+ server = IMAPClient(host, port=port, ssl=ssl)
107
+ except Exception as e:
108
+ raise CommandError(
109
+ f"Failed to connect to IMAP server ("
110
+ f"{host=}, "
111
+ f"{port=}, "
112
+ f"ssl={ssl and 'yes' or 'no'}"
113
+ f"): {e}"
114
+ ) from e
115
+
116
+ try:
117
+ # Login and select the inbox
118
+ try:
119
+ server.login(user, password)
120
+ except LoginError as e:
121
+ raise CommandError(
122
+ f"Failed to login to IMAP server with {user=}: {e}"
123
+ ) from e
124
+
125
+ if folder:
126
+ try:
127
+ server.select_folder(folder, readonly=False)
128
+ except imaplib.IMAP4.error:
129
+ raise CommandError(f"Failed to select inbox {folder=}")
130
+
131
+ # Process emails
132
+ (created_count, overwritten_count) = self.process(server)
133
+ count = created_count + overwritten_count
134
+
135
+ # Show summary
136
+ if self.verbose:
137
+ self.stdout.write(
138
+ self.style.SUCCESS(
139
+ f"Successfully synchronized {count} emails "
140
+ f"(new: {created_count}, "
141
+ f"overwritten: {overwritten_count})"
142
+ )
143
+ )
144
+
145
+ except Exception as e:
146
+ raise
147
+ self.stderr.write(
148
+ self.style.ERROR(
149
+ f"An error occurred during synchronization: {e}"
150
+ )
151
+ )
152
+
153
+ finally:
154
+ # Logout from the server
155
+ try:
156
+ server.logout()
157
+ except Exception:
158
+ pass
159
+
160
+ elif self.verbose:
161
+ raise CommandError(
162
+ "IMAP settings not configured. Please set IMAP_EMAIL_HOST "
163
+ "and IMAP_EMAIL_PORT in settings."
164
+ )
165
+
166
+ def process(self, server):
167
+ """
168
+ Connects to the IMAP server and fetches new emails,
169
+ saving them as ReceivedEmail objects.
170
+ """
171
+
172
+ # Processed emails count
173
+ created_count = 0
174
+ overwrite_count = 0
175
+
176
+ # Look up for emails
177
+ if self.imap_id:
178
+ # Search by specific IMAP ID
179
+ try:
180
+ imap_id = int(self.imap_id)
181
+ except ValueError:
182
+ raise CommandError(
183
+ f"Invalid IMAP ID '{self.imap_id}'. Must be an integer."
184
+ )
185
+ messages_ids = [imap_id]
186
+ if self.verbose:
187
+ self.stdout.write(
188
+ self.style.SUCCESS(
189
+ f"Processing email with IMAP ID {self.imap_id}."
190
+ )
191
+ )
192
+
193
+ elif self.message_id:
194
+ # Search by specific Message-ID
195
+ messages_ids = server.search(
196
+ ["HEADER", "Message-ID", self.message_id]
197
+ )
198
+ if self.verbose:
199
+ self.stdout.write(
200
+ self.style.SUCCESS(
201
+ f"Found {len(messages_ids)} email(s) with "
202
+ f"Message-ID {self.message_id}."
203
+ )
204
+ )
205
+
206
+ elif self.process_all:
207
+ # Process all emails
208
+ messages_ids = server.search(["ALL"])
209
+ if self.verbose:
210
+ self.stdout.write(
211
+ self.style.SUCCESS(
212
+ f"Found {len(messages_ids)} email(s) to process."
213
+ )
214
+ )
215
+
216
+ else:
217
+ # Search by UNSEEN
218
+ messages_ids = server.search(["UNSEEN"])
219
+ if self.verbose:
220
+ self.stdout.write(
221
+ self.style.SUCCESS(
222
+ f"Found {len(messages_ids)} new email(s) to process."
223
+ )
224
+ )
225
+
226
+ # If there are new messages, fetch and process them
227
+ if messages_ids:
228
+ # Fetch the full message and internal date
229
+ fetched_data = server.fetch(
230
+ messages_ids, ["BODY.PEEK[]", "INTERNALDATE"]
231
+ )
232
+
233
+ # Get the envelope (metadata) and the full body
234
+ # Use IMAP IDs so identifiers do not change between sessions
235
+ for imap_id, message_data in fetched_data.items():
236
+ # Filter out by IMAP ID if specified
237
+ if self.imap_id and str(imap_id) != self.imap_id:
238
+ continue
239
+
240
+ # Get the raw email and internal date
241
+ raw_email = message_data[b"BODY[]"]
242
+ internal_date_naive = message_data[b"INTERNALDATE"]
243
+ internal_date = internal_date_naive.replace(
244
+ tzinfo=ZoneInfo(settings.TIME_ZONE)
245
+ )
246
+
247
+ # Parse the email
248
+ msg = message_from_bytes(raw_email)
249
+
250
+ # Extract subject, efrom, eto & eid
251
+ subject, encoding = decode_header(msg["Subject"])[0]
252
+ if isinstance(subject, bytes):
253
+ subject = subject.decode(encoding or "utf-8")
254
+ efrom = msg.get("From")
255
+ eto = msg.get("To")
256
+ eid = msg.get("Message-ID")
257
+
258
+ # If we can't get a Message-ID, use the IMAP ID as fallback
259
+ # to avoid duplicates
260
+ if not eid:
261
+ eid = f"<imapid-{imap_id}@{settings.IMAP_EMAIL_HOST}>"
262
+
263
+ # Avoid processing duplicates
264
+ email_received = EmailReceived.objects.filter(eid=eid).first()
265
+ if self.rewrite or not email_received:
266
+ # Process multipart emails
267
+ body_plain = ""
268
+ body_html = ""
269
+ if msg.is_multipart():
270
+ for part in msg.walk():
271
+ content_type = part.get_content_type()
272
+ if content_type == "text/plain" and not body_plain:
273
+ body_plain = part.get_payload(
274
+ decode=True
275
+ ).decode(
276
+ part.get_content_charset() or "utf-8",
277
+ "ignore",
278
+ )
279
+ elif content_type == "text/html" and not body_html:
280
+ body_html = part.get_payload(
281
+ decode=True
282
+ ).decode(
283
+ part.get_content_charset() or "utf-8",
284
+ "ignore",
285
+ )
286
+ else:
287
+ body_plain = msg.get_payload(decode=True).decode(
288
+ msg.get_content_charset() or "utf-8",
289
+ "ignore",
290
+ )
291
+
292
+ # Logic to associate replies/bounces with sent emails
293
+ try:
294
+ email_message = None
295
+
296
+ # Locate the tracking ID
297
+ tracking_id = self.find_tracking_id(msg)
298
+
299
+ # Filter out by tracking ID if specified
300
+ if (
301
+ self.tracking_id
302
+ and tracking_id != self.tracking_id
303
+ ):
304
+ continue
305
+
306
+ # If found, try to link to the sent email
307
+ if tracking_id:
308
+ try:
309
+ email_message = EmailMessage.objects.get(
310
+ uuid=tracking_id
311
+ )
312
+ except EmailMessage.DoesNotExist:
313
+ email_message = None
314
+ if self.verbose:
315
+ self.stdout.write(
316
+ self.style.WARNING(
317
+ f"Tracking ID {tracking_id} found "
318
+ "but no matching sent email."
319
+ )
320
+ )
321
+
322
+ except Exception as e:
323
+ raise CommandError(
324
+ "Error while linking email with IMAP ID "
325
+ f"{imap_id} to sent email: {e}"
326
+ ) from e
327
+
328
+ # Heuristic keywords commonly found in bounce messages
329
+ (bounce_type, bounce_reason) = self.analyze_bounce(msg)
330
+
331
+ # Extract all headers into a dictionary
332
+ headers = {}
333
+ for header, value in msg.items():
334
+ decoded_value, encoding = decode_header(value)[0]
335
+ if isinstance(decoded_value, bytes):
336
+ decoded_value = decoded_value.decode(
337
+ encoding or "utf-8", "ignore"
338
+ )
339
+ headers[header] = decoded_value
340
+
341
+ # Create EmailReceived object if doesn't exist
342
+ if not email_received:
343
+ overwriting = False
344
+ email_received = EmailReceived()
345
+ else:
346
+ overwriting = True
347
+
348
+ # Populate fields
349
+ email_received.imap_id = imap_id
350
+ email_received.eid = eid
351
+ email_received.efrom = efrom
352
+ email_received.eto = eto
353
+ email_received.subject = subject
354
+ email_received.headers = headers
355
+ email_received.body_text = body_plain
356
+ email_received.body_html = body_html
357
+ email_received.date_received = internal_date
358
+ email_received.email = email_message
359
+ email_received.bounce_type = bounce_type
360
+ email_received.bounce_reason = bounce_reason
361
+
362
+ # Save the received email
363
+ email_received.save()
364
+
365
+ # Count created or overwritten
366
+ if overwriting:
367
+ overwrite_count += 1
368
+ verb = "Overwritten"
369
+ else:
370
+ created_count += 1
371
+ verb = "Created"
372
+
373
+ if self.verbose:
374
+ msg = (
375
+ f"{verb} email with IMAP ID: "
376
+ f"{imap_id} (link={tracking_id})"
377
+ )
378
+ if bounce_type:
379
+ bounce_type_str = (
380
+ bounce_type == BOUNCE_HARD and "Hard" or "Soft"
381
+ )
382
+ bounce_reason_str = bounce_reason or "Unknown"
383
+ self.stdout.write(
384
+ self.style.WARNING(
385
+ f"{msg} "
386
+ f"[{bounce_type_str} bounce, "
387
+ f"reason={bounce_reason_str}]"
388
+ )
389
+ )
390
+ else:
391
+ self.stdout.write(self.style.SUCCESS(msg))
392
+
393
+ else:
394
+ if self.verbose:
395
+ self.stdout.write(
396
+ self.style.WARNING(
397
+ f"Skipping email with IMAP ID: {imap_id} (DUP)"
398
+ )
399
+ )
400
+
401
+ # Mark the message as read
402
+ # (flag \Seen) avoid reprocessing
403
+ server.add_flags(imap_id, [b"\\Seen"])
404
+
405
+ return (created_count, overwrite_count)
406
+
407
+ def find_tracking_id(self, msg: Message) -> str | None:
408
+ """
409
+ Searches for the X-Codenerix-Tracking-ID robustly in an email.
410
+
411
+ It performs the search in three steps:
412
+ 1. In the main headers of the email.
413
+ 2. In the attached parts that are a complete email (message/rfc822).
414
+ 3. As a last resort, searches the text in the body of the message.
415
+ """
416
+
417
+ # Method 1: Search in main headers (for direct replies)
418
+ tracking_id = msg.get("X-Codenerix-Tracking-ID", None)
419
+
420
+ # Method 2: Search in attached parts (for bounces and forwards)
421
+ if not tracking_id:
422
+ # Not found directly in headers
423
+ # Search in attached parts (for bounces and forwards)
424
+ if msg.is_multipart():
425
+ # Iterate through parts
426
+ for part in msg.walk():
427
+ # Get the content type of the part
428
+ content_type = part.get_content_type()
429
+
430
+ # We look for an attachment that is itself an email
431
+ if content_type == "message/rfc822":
432
+ # The payload of this part is the original email
433
+ # The payload is a list of messages, take the first one
434
+ original_msg_payload = part.get_payload()
435
+ if (
436
+ isinstance(original_msg_payload, list)
437
+ and original_msg_payload
438
+ ):
439
+ original_msg = original_msg_payload[0]
440
+ if isinstance(original_msg, Message):
441
+ tracking_id = original_msg.get(
442
+ "X-Codenerix-Tracking-ID"
443
+ )
444
+
445
+ elif content_type == "text/rfc822-headers":
446
+ # The payload is the raw headers of the original email
447
+ headers_payload = part.get_payload(decode=True)
448
+ if isinstance(headers_payload, bytes):
449
+ # Decode using the specified charset
450
+ charset = part.get_content_charset() or "utf-8"
451
+ headers_text = headers_payload.decode(
452
+ charset, errors="ignore"
453
+ )
454
+
455
+ # Parse headers text into a Message object
456
+ headers_msg = HeaderParser().parsestr(headers_text)
457
+ tracking_id = headers_msg.get(
458
+ "X-Codenerix-Tracking-ID"
459
+ )
460
+
461
+ # Method 3: Search in the body text (fallback)
462
+ if not tracking_id:
463
+ # The original email might be quoted as plain text.
464
+ body_text = ""
465
+ if msg.is_multipart():
466
+ # Concatenate all text/plain parts
467
+ for part in msg.walk():
468
+ # We only want text/plain parts
469
+ if part.get_content_type() == "text/plain":
470
+ # Get the decoded payload
471
+ payload = part.get_payload(decode=True)
472
+ if isinstance(payload, bytes):
473
+ # Decode using the specified charset
474
+ charset = part.get_content_charset() or "utf-8"
475
+ body_text += payload.decode(
476
+ charset, errors="ignore"
477
+ )
478
+ else:
479
+ # Single part email, check if it's text/plain
480
+ if msg.get_content_type() == "text/plain":
481
+ # Get the decoded payload
482
+ payload = msg.get_payload(decode=True)
483
+ if isinstance(payload, bytes):
484
+ # Decode using the specified charset
485
+ charset = msg.get_content_charset() or "utf-8"
486
+ body_text = payload.decode(
487
+ charset, errors="ignore"
488
+ )
489
+
490
+ # If we have body text, search for the header using regex
491
+ if body_text:
492
+ # We use a regex to find the header in the text
493
+ match = re.search(
494
+ r"X-Codenerix-Tracking-ID:\s*([a-fA-F0-9\-]{36})",
495
+ body_text,
496
+ )
497
+
498
+ if match:
499
+ # If found, extract the tracking ID
500
+ tracking_id = match.group(1).strip()
501
+
502
+ # Return the found tracking ID if any
503
+ return tracking_id
504
+
505
+ def analyze_bounce(
506
+ self, msg: Message
507
+ ) -> tuple[Optional[str], Optional[str]]:
508
+ """
509
+ Analyzes an email to determine if it is a bounce and of what type.
510
+
511
+ Returns:
512
+ A tuple (bounce_type, smtp_code).
513
+ - bounce_type: BOUNCE_HARD, BOUNCE_SOFT, or None if not a bounce.
514
+ - bounce_reason: the SMTP status code (e.g., '5.1.1') or None.
515
+ """
516
+
517
+ # Initialize
518
+ bounce_type: Optional[str] = None
519
+ bounce_reason: Optional[str] = None
520
+
521
+ # Method 1: Look for DSN reports
522
+ if (
523
+ msg.get_content_type() == "multipart/report"
524
+ and msg.get_param("report-type") == "delivery-status"
525
+ ):
526
+ # Iterate through parts to find the delivery-status part
527
+ for part in msg.walk():
528
+ # We look for the delivery-status part
529
+ if part.get_content_type() == "message/delivery-status":
530
+ # The payload is a list of headers
531
+ payload = part.get_payload()
532
+ if payload and isinstance(payload, list):
533
+ # The first part contains the status headers
534
+ status_headers = payload[0]
535
+ if isinstance(status_headers, Message):
536
+ # Extract Action and Status headers
537
+ action = status_headers.get("Action", "").lower()
538
+ status_code = status_headers.get("Status", "")
539
+
540
+ # Check if action indicates failure
541
+ if action == "failed":
542
+ # Determine Hard/Soft by SMTP code (RFC3463)
543
+ if status_code.startswith("5."):
544
+ # 5.x.x: permanent failure (hard)
545
+ bounce_type = BOUNCE_HARD
546
+ bounce_reason = status_code
547
+ break
548
+ elif status_code.startswith("4."):
549
+ # 4.x.x: temporary failure (soft)
550
+ bounce_type = BOUNCE_SOFT
551
+ bounce_reason = status_code
552
+ break
553
+ else:
554
+ # Unknown status, assume hard bounce
555
+ bounce_type = BOUNCE_HARD
556
+ bounce_reason = status_code or "Unknown"
557
+ break
558
+
559
+ # Method 2: Some mail servers include headers indicating a bounce
560
+ if not bounce_type:
561
+ if msg.get("X-Failed-Recipients"):
562
+ # Presence of this header usually indicates a hard bounce
563
+ bounce_type = BOUNCE_HARD
564
+ bounce_reason = "Unknown (X-Failed-Recipients)"
565
+
566
+ else:
567
+ # Check for Auto-Submitted header
568
+ if msg.get("Auto-Submitted", "").lower() in (
569
+ "auto-replied",
570
+ "auto-generated",
571
+ ):
572
+ # It could be a bounce, but also an "Out of Office",
573
+ # so we combine it with a keyword search.
574
+ subject = msg.get("Subject", "").lower()
575
+ bounce_keywords = [
576
+ "undeliverable",
577
+ "delivery failed",
578
+ "failure notice",
579
+ ]
580
+
581
+ # If we find bounce keywords in the subject
582
+ if any(keyword in subject for keyword in bounce_keywords):
583
+ # Assume is a hard bounce
584
+ bounce_type = BOUNCE_HARD
585
+ bounce_reason = "Unknown (Auto-Submitted + Keyword)"
586
+
587
+ # Method 3: keyword search (less reliable)
588
+ if not bounce_type:
589
+ # We look for common bounce keywords in the From or Subject headers
590
+ # We avoid false positives by requiring specific keywords.
591
+ from_header = msg.get("From", "").lower()
592
+ subject_header = msg.get("Subject", "").lower()
593
+
594
+ if "mailer-daemon@" in from_header or "postmaster@" in from_header:
595
+ # Common bounce sender addresses
596
+ bounce_type = BOUNCE_HARD
597
+ bounce_reason = "Unknown (From Keyword)"
598
+ else:
599
+ # Check subject for common bounce keywords
600
+ bounce_keywords = [
601
+ "undelivered",
602
+ "delivery error",
603
+ "mail delivery failed",
604
+ ]
605
+ if any(
606
+ keyword in subject_header for keyword in bounce_keywords
607
+ ):
608
+ bounce_type = BOUNCE_HARD
609
+ bounce_reason = "Unknown (Subject Keyword)"
610
+
611
+ return (bounce_type, bounce_reason)