PyPI - epstein-files - Versions diffs - 1.1.3__py3-none-any.whl → 1.2.0__py3-none-any.whl - Mend

epstein-files 1.1.3py3-none-any.whl → 1.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

epstein_files/__init__.py +15 -7
epstein_files/documents/communication.py +3 -3
epstein_files/documents/document.py +10 -3
epstein_files/documents/email.py +105 -107
epstein_files/documents/emails/email_header.py +4 -2
epstein_files/documents/imessage/text_message.py +8 -12
epstein_files/documents/messenger_log.py +8 -8
epstein_files/epstein_files.py +123 -119
epstein_files/person.py +350 -0
epstein_files/util/constant/names.py +66 -50
epstein_files/util/constant/output_files.py +1 -0
epstein_files/util/constant/strings.py +3 -1
epstein_files/util/constant/urls.py +14 -2
epstein_files/util/constants.py +134 -26
epstein_files/util/data.py +1 -12
epstein_files/util/doc_cfg.py +30 -14
epstein_files/util/env.py +3 -1
epstein_files/util/file_helper.py +4 -1
epstein_files/util/highlighted_group.py +228 -166
epstein_files/util/output.py +108 -165
epstein_files/util/rich.py +23 -45
epstein_files/util/word_count.py +2 -3
{epstein_files-1.1.3.dist-info → epstein_files-1.2.0.dist-info}/METADATA +2 -1
epstein_files-1.2.0.dist-info/RECORD +34 -0
epstein_files-1.1.3.dist-info/RECORD +0 -33
{epstein_files-1.1.3.dist-info → epstein_files-1.2.0.dist-info}/LICENSE +0 -0
{epstein_files-1.1.3.dist-info → epstein_files-1.2.0.dist-info}/WHEEL +0 -0
{epstein_files-1.1.3.dist-info → epstein_files-1.2.0.dist-info}/entry_points.txt +0 -0

epstein_files/__init__.py CHANGED Viewed

@@ -21,7 +21,8 @@ from epstein_files.util.env import args
 from epstein_files.util.file_helper import coerce_file_path, extract_file_id
 from epstein_files.util.logging import exit_with_error, logger
 from epstein_files.util.output import (print_emails_section, print_json_files, print_json_stats,
-     print_other_files_section, print_text_messages_section, print_email_timeline, print_json_metadata, write_urls)
+     print_other_files_section, print_text_messages_section, print_email_timeline, print_emailers_info_png,
+     print_json_metadata, write_urls)
 from epstein_files.util.rich import (build_highlighter, console, print_color_key, print_title_page_header,
      print_title_page_tables, print_subtitle_panel, write_html)
 from epstein_files.util.timer import Timer
@@ -43,8 +44,11 @@ def generate_html() -> None:
     elif args.json_files:
         print_json_files(epstein_files)
         exit()
+    elif args.emailers_info_png:
+        print_emailers_info_png(epstein_files)
+        exit()
-    print_title_page_header(epstein_files)
+    print_title_page_header()
     if args.email_timeline:
         print_color_key()
@@ -96,8 +100,7 @@ def epstein_search():
     for search_term in args.positional_args:
         temp_highlighter = build_highlighter(search_term)
         search_results = epstein_files.docs_matching(search_term, args.names)
-        console.line(2)
-        print_subtitle_panel(f"Found {len(search_results)} documents matching '{search_term}'", padding=(0, 0, 0, 3))
+        print_subtitle_panel(f"Found {len(search_results)} documents matching '{search_term}'")
         for search_result in search_results:
             console.line()
@@ -115,11 +118,16 @@ def epstein_search():
 def epstein_show():
     """Show the color highlighted file. If --raw arg is passed, show the raw text of the file as well."""
     _assert_positional_args()
-    ids = [extract_file_id(arg) for arg in args.positional_args]
-    raw_docs = [Document(coerce_file_path(id)) for id in ids]
-    docs = [document_cls(doc)(doc.file_path) for doc in raw_docs]
+    raw_docs: list[Document] = []
     console.line()
+    try:
+        ids = [extract_file_id(arg) for arg in args.positional_args]
+        raw_docs = [Document(coerce_file_path(id)) for id in ids]
+        docs = Document.sort_by_timestamp([document_cls(doc)(doc.file_path) for doc in raw_docs])
+    except Exception as e:
+        exit_with_error(str(e))
     for doc in docs:
         console.print('\n', doc, '\n')

epstein_files/documents/communication.py CHANGED Viewed

@@ -9,7 +9,7 @@ from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, Document
 from epstein_files.util.constant.names import UNKNOWN
 from epstein_files.util.constants import FALLBACK_TIMESTAMP
 from epstein_files.util.doc_cfg import CommunicationCfg
-from epstein_files.util.highlighted_group import get_style_for_name
+from epstein_files.util.highlighted_group import get_style_for_name, styled_name
 from epstein_files.util.rich import key_value_txt
 TIMESTAMP_SECONDS_REGEX = re.compile(r":\d{2}$")
@@ -25,10 +25,10 @@ class Communication(Document):
         return self.author or UNKNOWN
     def author_style(self) -> str:
-        return get_style_for_name(self.author_or_unknown())
+        return get_style_for_name(self.author)
     def author_txt(self) -> Text:
-        return Text(self.author_or_unknown(), style=self.author_style())
+        return styled_name(self.author)
     def external_links_txt(self, _style: str = '', include_alt_links: bool = True) -> Text:
         """Overrides super() method to apply self.author_style."""

epstein_files/documents/document.py CHANGED Viewed

@@ -63,7 +63,7 @@ class Document:
     Attributes:
         file_path (Path): Local path to file
-        author (str | None): Who is responsible for the text in the file
+        author (Name): Who is responsible for the text in the file
         config (DocCfg): Information about this fil
         file_id (str): 6 digit (or 8 digits if it's a local extract file) string ID
         filename (str): File's basename
@@ -74,7 +74,7 @@ class Document:
     """
     file_path: Path
     # Optional fields
-    author: str | None = None
+    author: Name = None
     config: EmailCfg | DocCfg | TextCfg | None = None
     file_id: str = field(init=False)
     filename: str = field(init=False)
@@ -88,6 +88,9 @@ class Document:
     strip_whitespace: ClassVar[bool] = True  # Overridden in JsonFile
     def __post_init__(self):
+        if not self.file_path.exists():
+            raise FileNotFoundError(f"File '{self.file_path.name}' does not exist!")
         self.filename = self.file_path.name
         self.file_id = extract_file_id(self.filename)
         # config and url_slug could have been pre-set in Email
@@ -118,6 +121,10 @@ class Document:
         txt.append(f" because it's {DUPE_TYPE_STRS[self.config.dupe_type]} ")
         return txt.append(epstein_media_doc_link_txt(self.config.duplicate_of_id, style='royal_blue1'))
+    def duplicate_of_id(self) -> str | None:
+        if self.config and self.config.duplicate_of_id:
+            return self.config.duplicate_of_id
     def epsteinify_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
         return self.external_link(epsteinify_doc_url, style, link_txt)
@@ -175,7 +182,7 @@ class Document:
         return None
     def is_duplicate(self) -> bool:
-        return bool(self.config and self.config.duplicate_of_id)
+        return bool(self.duplicate_of_id())
     def is_local_extract_file(self) -> bool:
         """True if extracted from other file (identifiable from filename e.g. HOUSE_OVERSIGHT_012345_1.txt)."""

epstein_files/documents/email.py CHANGED Viewed

@@ -20,11 +20,11 @@ from epstein_files.documents.emails.email_header import (BAD_EMAILER_REGEX, EMAI
 from epstein_files.util.constant.names import *
 from epstein_files.util.constant.strings import REDACTED
 from epstein_files.util.constants import *
-from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes, extract_last_name,
+from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes,
      flatten, listify, remove_timezone, uniquify)
 from epstein_files.util.doc_cfg import EmailCfg, Metadata
 from epstein_files.util.file_helper import extract_file_id, file_stem_for_id
-from epstein_files.util.highlighted_group import get_style_for_name
+from epstein_files.util.highlighted_group import JUNK_EMAILERS, get_style_for_name
 from epstein_files.util.logging import logger
 from epstein_files.util.rich import *
@@ -55,6 +55,7 @@ REPLY_SPLITTERS = [f"{field}:" for field in FIELD_NAMES] + [
 OCR_REPAIRS: dict[str | re.Pattern, str] = {
     re.compile(r'grnail\.com'): 'gmail.com',
+    'Newsmax. corn': 'Newsmax.com',
     re.compile(r"^(From|To)(: )?[_1.]{5,}", re.MULTILINE): rf"\1: {REDACTED}",  # Redacted email addresses
     # These 3 must come in this order!
     re.compile(r'([/vkT]|Ai|li|(I|7)v)rote:'): 'wrote:',
@@ -71,6 +72,7 @@ OCR_REPAIRS: dict[str | re.Pattern, str] = {
     # Signatures
     'BlackBerry by AT &T': 'BlackBerry by AT&T',
     'BlackBerry from T- Mobile': 'BlackBerry from T-Mobile',
+    'Envoy& de mon iPhone': 'Envoyé de mon iPhone',
     "from my 'Phone": 'from my iPhone',
     'from Samsung Mob.le': 'from Samsung Mobile',
     'gJeremyRubin': '@JeremyRubin',
@@ -78,6 +80,7 @@ OCR_REPAIRS: dict[str | re.Pattern, str] = {
     'twitter glhsummers': 'twitter @lhsummers',
     re.compile(r"twitter\.com[i/][lI]krauss[1lt]"): "twitter.com/lkrauss1",
     re.compile(r'from my BlackBerry[0°] wireless device'): 'from my BlackBerry® wireless device',
+    re.compile(r'^INW$', re.MULTILINE): REDACTED,
     # links
     'Imps ://': 'https://',
     re.compile(r'timestopics/people/t/landon jr thomas/inde\n?x\n?\.\n?h\n?tml'): 'timestopics/people/t/landon_jr_thomas/index.html',
@@ -133,19 +136,24 @@ MAILING_LISTS = [
     JP_MORGAN_USGIO,
 ]
-TRUNCATE_ALL_EMAILS_FROM = JUNK_EMAILERS + MAILING_LISTS + [
+BBC_LISTS = JUNK_EMAILERS + MAILING_LISTS
+TRUNCATE_ALL_EMAILS_FROM = BBC_LISTS + [
     'Alan S Halperin',
     'Mitchell Bard',
     'Skip Rimer',
+    'Steven Victor MD',
 ]
 TRUNCATION_LENGTHS = {
     '023627': 16_800,  # Micheal Wolff article with brock pierce
-    '030245': 7_500,   # Epstein rationalizes his behavior in an open letter to the world
-    '030781': 1_700,   # Bannon email about crypto coin issues
-    '032906': 750,     # David Blaine email
+    '030245': None,    # Epstein rationalizes his behavior in an open letter to the world
+    '030781': None,    # Bannon email about crypto coin issues
+    '032906': None,    # David Blaine email
     '026036': 6000,    # Gino Yu blockchain mention
-    '023208': 350_000, # Long discussion about leon black's finances
+    '023208': None,    # Long discussion about leon black's finances
+    '029609': None,    # Joi Ito
+    '025233': None,    # Reputation.com discussion
 }
 # These are long forwarded articles so we force a trim to 1,333 chars if these strings exist
@@ -242,66 +250,15 @@ TRUNCATE_TERMS = [
     'https://www.washingtonpost.com/politics/2018/09/04/transcript-phone-call',
 ]
-# Some Paul Krassner emails have a ton of CCed parties we don't care about
-KRASSNER_RECIPIENTS = uniquify(flatten([ALL_FILE_CONFIGS[id].recipients for id in ['025329', '024923', '033568']]))
-# No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
-USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIPIENTS + [
-    'Alan Dlugash',                            # CCed with Richard Kahn
-    'Alan Rogers',                           # Random CC
-    'Andrew Friendly',                       # Presumably some relation of Kelly Friendly
-    'BS Stern',                              # A random fwd of email we have
-    'Cheryl Kleen',                          # Single email from Anne Boyles, displayed under Anne Boyles
-    'Connie Zaguirre',                       # Random CC
-    'Dan Fleuette',                          # CC from sean bannon
-    'Danny Goldberg',                        # Random Paul Krassner emails
-    GERALD_LEFCOURT,                         # Single CC
-    GORDON_GETTY,                            # Random CC
-    JEFF_FULLER,                             # Random Jean Luc Brunel CC
-    'Jojo Fontanilla',                       # Random CC
-    'Joseph Vinciguerra',                    # Random CC
-    'Larry Cohen',                           # Random Bill Gates CC
-    'Lyn Fontanilla',                        # Random CC
-    'Mark Albert',                           # Random CC
-    'Matthew Schafer',                       # Random CC
-    MICHAEL_BUCHHOLTZ,                       # Terry Kafka CC
-    'Nancy Dahl',                            # covered by Lawrence Krauss (her husband)
-    'Michael Simmons',                       # Random CC
-    'Nancy Portland',                        # Lawrence Krauss CC
-    'Oliver Goodenough',                     # Robert Trivers CC
-    'Peter Aldhous',                         # Lawrence Krauss CC
-    'Players2',                              # Hoffenberg CC
-    'Sam Harris',                            # Lawrence Krauss CC
-    SAMUEL_LEFF,                             # Random CC
-    'Sean T Lehane',                         # Random CC
-    'Stephen Rubin',                         # Random CC
-    'Tim Kane',                              # Random CC
-    'Travis Pangburn',                       # Random CC
-    'Vahe Stepanian',                        # Random CC
-    # Ross Gow BCC
-    'david.brown@thetimes.co.uk',
-    'io-anne.pugh@bbc.co.uk',
-    'martin.robinson@mailonline.co.uk',
-    'nick.alwav@bbc.co.uk'
-    'nick.sommerlad@mirror.co.uk',
-    'p.peachev@independent.co.uk',
-]
-# Emails sent by epstein to himself that are just notes
-SELF_EMAILS_FILE_IDS = [
-    '026677',
-    '029752',   # TODO: jokeland...
-    '030238',
-    # '033274',  # TODO: Epstein's note to self doesn't get printed if we don't set the recipients to [None]
-]
 METADATA_FIELDS = [
     'is_junk_mail',
+    'is_mailing_list',
     'recipients',
     'sent_from_device',
     'subject',
 ]
+# Note the line repair happens *after* 'Importance: High' is removed
 LINE_REPAIR_MERGES = {
     '017523': 4,
     '019407': [2, 4],
@@ -309,9 +266,14 @@ LINE_REPAIR_MERGES = {
     '022673': 9,
     '022684': 9,
     '022695': 4,
+    '029773': [2, 5],
     '023067': 3,
     '025790': 2,
+    '029841': 3,
+    '026345': 3,
     '026609': 4,
+    '033299': 3,
+    '026829': 3,
     '026924': [2, 4],
     '028931': [3, 6],
     '029154': [2, 5],
@@ -322,6 +284,7 @@ LINE_REPAIR_MERGES = {
     '029501': 2,
     '029835': [2, 4],
     '029889': 2,
+    '029545': [3, 5],
     '029976': 3,
     '030299': [7, 10],
     '030381': [2, 4],
@@ -337,6 +300,7 @@ LINE_REPAIR_MERGES = {
     '032405': 4,
     '033097': 2,
     '033144': [2, 4],
+    '033217': 3,
     '033228': [3, 5],
     '033357': [2, 4],
     '033486': [7, 9],
@@ -354,14 +318,14 @@ class Email(Communication):
         actual_text (str) - best effort at the text actually sent in this email, excluding quoted replies and forwards
         config (EmailCfg | None) - manual config for this email (if it exists)
         header (EmailHeader) - header data extracted from the text (from/to/sent/subject etc)
-        recipients (list[str | None]) - who this email was sent to
+        recipients (list[Name]) - who this email was sent to
         sent_from_device (str | None) - "Sent from my iPhone" style signature (if it exists)
         signature_substitution_counts (dict[str, int]) - count of how many times a signature was replaced with <...snipped...> for each participant
     """
     actual_text: str = field(init=False)
     config: EmailCfg | None = None
     header: EmailHeader = field(init=False)
-    recipients: list[str | None] = field(default_factory=list)
+    recipients: list[Name] = field(default_factory=list)
     sent_from_device: str | None = None
     signature_substitution_counts: dict[str, int] = field(default_factory=dict)  # defaultdict breaks asdict :(
@@ -382,25 +346,21 @@ class Email(Communication):
         super().__post_init__()
-        try:
-            if self.config and self.config.recipients:
-                self.recipients = self.config.recipients
-            else:
-                for recipient in self.header.recipients():
-                    self.recipients.extend(self._extract_emailer_names(recipient))
-                if self.author in MAILING_LISTS and (len(self.recipients) == 0 or self.recipients == [self.author]):
-                    self.recipients = [JEFFREY_EPSTEIN]   # Assume mailing list emails are to Epstein
-        except Exception as e:
-            console.print_exception()
-            console.line(2)
-            logger.fatal(f"Failed on {self.file_id}")
-            console.line(2)
-            raise e
-        # Remove self CCs
-        recipients = [r for r in self.recipients if r != self.author or self.file_id in SELF_EMAILS_FILE_IDS]
-        self.recipients = list(set(recipients))
+        if self.config and self.config.recipients:
+            self.recipients = self.config.recipients
+        else:
+            for recipient in self.header.recipients():
+                self.recipients.extend(self._extract_emailer_names(recipient))
+            # Assume mailing list emails are to Epstein
+            if self.author in BBC_LISTS and (self.is_note_to_self() or not self.recipients):
+                self.recipients = [JEFFREY_EPSTEIN]
+        # Remove self CCs but preserve self emails
+        if not self.is_note_to_self():
+            self.recipients = [r for r in self.recipients if r != self.author]
+        self.recipients = sorted(list(set(self.recipients)), key=lambda r: r or UNKNOWN)
         self.text = self._prettify_text()
         self.actual_text = self._actual_text()
         self.sent_from_device = self._sent_from_device()
@@ -410,18 +370,30 @@ class Email(Communication):
     def info_txt(self) -> Text:
         email_type = 'fwded article' if self.is_fwded_article() else 'email'
-        txt = Text(f"OCR text of {email_type} from ", style='grey46').append(self.author_txt()).append(' to ')
-        return txt.append(self.recipients_txt()).append(highlighter(f" probably sent at {self.timestamp}"))
+        txt = Text(f"OCR text of {email_type} from ", style='grey46').append(self.author_txt())
+        if self.config and self.config.is_attribution_uncertain:
+            txt.append(f" {QUESTION_MARKS}", style=self.author_style())
+        txt.append(' to ').append(self.recipients_txt())
+        return txt.append(highlighter(f" probably sent at {self.timestamp}"))
     def is_fwded_article(self) -> bool:
         return bool(self.config and self.config.is_fwded_article)
     def is_junk_mail(self) -> bool:
-        return self.author in JUNK_EMAILERS or self.author in MAILING_LISTS
+        return self.author in JUNK_EMAILERS
+    def is_mailing_list(self) -> bool:
+        return self.author in MAILING_LISTS or self.is_junk_mail()
+    def is_note_to_self(self) -> bool:
+        return self.recipients == [self.author]
     def metadata(self) -> Metadata:
         local_metadata = asdict(self)
         local_metadata['is_junk_mail'] = self.is_junk_mail()
+        local_metadata['is_mailing_list'] = self.is_junk_mail()
         local_metadata['subject'] = self.subject() or None
         metadata = super().metadata()
         metadata.update({k: v for k, v in local_metadata.items() if v and k in METADATA_FIELDS})
@@ -438,7 +410,10 @@ class Email(Communication):
         ], join=', ')
     def subject(self) -> str:
-        return self.header.subject or ''
+        if self.config and self.config.subject:
+            return self.config.subject
+        else:
+            return self.header.subject or ''
     def summary(self) -> Text:
         """One line summary mostly for logging."""
@@ -489,11 +464,8 @@ class Email(Communication):
     def _border_style(self) -> str:
         """Color emails from epstein to others with the color for the first recipient."""
-        if self.author == JEFFREY_EPSTEIN:
-            if len(self.recipients) == 0 or self.recipients == [None]:
-                style = self.author_style()
-            else:
-                style = get_style_for_name(self.recipients[0])
+        if self.author == JEFFREY_EPSTEIN and len(self.recipients) > 0:
+            style = get_style_for_name(self.recipients[0])
         else:
             style = self.author_style()
@@ -541,6 +513,8 @@ class Email(Communication):
             self.log_top_lines(msg='No email header match found!', level=log_level)
             self.header = EmailHeader(field_names=[])
+        logger.debug(f"{self.file_id} extracted header\n\n{self.header}\n")
     def _extract_timestamp(self) -> datetime:
         if self.config and self.config.timestamp:
             return self.config.timestamp
@@ -665,6 +639,9 @@ class Email(Communication):
         elif self.file_id in ['025329']:
             for _i in range(9):
                 self._merge_lines(2)
+        elif self.file_id in ['025812']:
+            for _i in range(2):
+                self._merge_lines(3)
         elif self.file_id == '014860':
             self._merge_lines(3)
             self._merge_lines(4)
@@ -763,7 +740,7 @@ class Email(Communication):
         if args.whole_file:
             num_chars = len(self.text)
         elif self.file_id in TRUNCATION_LENGTHS:
-            num_chars = TRUNCATION_LENGTHS[self.file_id]
+            num_chars = TRUNCATION_LENGTHS[self.file_id] or self.file_size()
         elif self.author in TRUNCATE_ALL_EMAILS_FROM or includes_truncate_term:
             num_chars = int(MAX_CHARS_TO_PRINT / 3)
         elif quote_cutoff and quote_cutoff < MAX_CHARS_TO_PRINT:
@@ -830,26 +807,47 @@ class Email(Communication):
             self.log_top_lines(self.header.num_header_rows + 4, f'Original header:')
     @staticmethod
-    def build_emails_table(emails: list['Email'], _author: str | None, include_title: bool = False) -> Table:
-        """Turn a set of Emails to/from a given _author into a Table."""
-        author = _author or UNKNOWN
-        table = Table(
-            title=f"Emails to/from {author} starting {emails[0].timestamp.date()}" if include_title else None,
-            border_style=get_style_for_name(author, allow_bold=False),
-            header_style="bold"
+    def build_emails_table(emails: list['Email'], name: Name = '', title: str = '', show_length: bool = False) -> Table:
+        """Turn a set of Emails into a Table."""
+        if title and name:
+            raise ValueError(f"Can't provide both 'author' and 'title' args")
+        elif name == '' and title == '':
+            raise ValueError(f"Must provide either 'author' or 'title' arg")
+        author_style = get_style_for_name(name, allow_bold=False)
+        link_style = author_style if name else ARCHIVE_LINK_COLOR
+        min_width = len(name or UNKNOWN)
+        max_width = max(20, min_width)
+        columns = [
+            {'name': 'Sent At', 'justify': 'left', 'style': TIMESTAMP_DIM},
+            {'name': 'From', 'justify': 'left', 'min_width': min_width, 'max_width': max_width},
+            {'name': 'To', 'justify': 'left', 'min_width': min_width, 'max_width': max_width + 2},
+            {'name': 'Length', 'justify': 'right', 'style': 'wheat4'},
+            {'name': 'Subject', 'justify': 'left', 'min_width': 35, 'style': 'honeydew2'},
+        ]
+        table = build_table(
+            title or None,
+            cols=[col for col in columns if show_length or col['name'] not in ['Length']],
+            border_style=DEFAULT_TABLE_KWARGS['border_style'] if title else author_style,
+            header_style="bold",
+            highlight=True,
         )
-        table.add_column('From', justify='left')
-        table.add_column('Timestamp', justify='center')
-        table.add_column('Subject', justify='left', style='honeydew2', min_width=70)
         for email in emails:
-            table.add_row(
+            fields = [
+                email.epstein_media_link(link_txt=email.timestamp_without_seconds(), style=link_style),
                 email.author_txt(),
-                email.epstein_media_link(link_txt=email.timestamp_without_seconds()),
-                highlighter(email.subject())
-            )
+                email.recipients_txt(max_full_names=1),
+                f"{email.length()}",
+                email.subject(),
+            ]
+            if not show_length:
+                del fields[3]
+            table.add_row(*fields)
         return table

epstein_files/documents/emails/email_header.py CHANGED Viewed

@@ -8,13 +8,13 @@ from epstein_files.util.doc_cfg import EmailCfg
 from epstein_files.util.logging import logger
 from epstein_files.util.rich import UNKNOWN
-FIELD_NAMES = ['From', 'Date', 'Sent', 'Subject']
+FIELD_NAMES = ['Date', 'From', 'Sent', 'Subject']
 NON_HEADER_FIELDS = ['field_names', 'num_header_rows', 'was_initially_empty']
 ON_BEHALF_OF = 'on behalf of'
 TO_FIELDS = ['bcc', 'cc', 'to']
 EMAILER_FIELDS = [AUTHOR] + TO_FIELDS
-HEADER_REGEX_STR = r'(((?:(?:Date|From|Sent|To|C[cC]|Importance|Subject|Bee|B[cC]{2}|Attachments):|on behalf of ?)(?! +(by |from my|via )).*\n){3,})'
+HEADER_REGEX_STR = r'(((?:(?:Date|From|Sent|To|C[cC]|Importance|Subject|Bee|B[cC]{2}|Attachments|Classification|Flag):|on behalf of ?)(?! +(by |from my|via )).*\n){3,})'
 EMAIL_SIMPLE_HEADER_REGEX = re.compile(rf'^{HEADER_REGEX_STR}')
 EMAIL_SIMPLE_HEADER_LINE_BREAK_REGEX = re.compile(HEADER_REGEX_STR)
 EMAIL_PRE_FORWARD_REGEX = re.compile(r"(.{3,2000}?)" + HEADER_REGEX_STR, re.DOTALL)  # Match up to the next email header section
@@ -41,6 +41,8 @@ class EmailHeader:
     subject: str | None = None
     bcc: list[str] | None = None
     cc: list[str] | None = None
+    classification: str | None = None
+    flag: str | None = None
     importance: str | None = None
     attachments: str | None = None
     to: list[str] | None = None

epstein_files/documents/imessage/text_message.py CHANGED Viewed

@@ -4,38 +4,35 @@ from datetime import datetime
 from rich.text import Text
-from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN
+from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN, Name, extract_last_name
 from epstein_files.util.constant.strings import TIMESTAMP_DIM
-from epstein_files.util.data import extract_last_name, iso_timestamp
+from epstein_files.util.data import iso_timestamp
 from epstein_files.util.highlighted_group import get_style_for_name
 from epstein_files.util.logging import logger
 from epstein_files.util.rich import TEXT_LINK, highlighter
+EPSTEIN_TEXTERS = ['e:', 'e:jeeitunes@gmail.com']
 MSG_DATE_FORMAT = r"%m/%d/%y %I:%M:%S %p"
 PHONE_NUMBER_REGEX = re.compile(r'^[\d+]+.*')
+UNCERTAIN_SUFFIX = ' (?)'
 DISPLAY_LAST_NAME_ONLY = [
     JEFFREY_EPSTEIN,
     STEVE_BANNON,
 ]
-TEXTER_MAPPING = {
-    'e:': JEFFREY_EPSTEIN,
-    'e:jeeitunes@gmail.com': JEFFREY_EPSTEIN,
-}
 @dataclass(kw_only=True)
 class TextMessage:
     """Class representing a single iMessage text message."""
-    author: str | None
+    author: Name
     author_str: str = ''
     is_id_confirmed: bool = False
     text: str
     timestamp_str: str
     def __post_init__(self):
-        self.author = TEXTER_MAPPING.get(self.author or UNKNOWN, self.author)
+        self.author = JEFFREY_EPSTEIN if self.author in EPSTEIN_TEXTERS else self.author
         if not self.author:
             self.author_str = UNKNOWN
@@ -45,7 +42,7 @@ class TextMessage:
             self.author_str = self.author_str or self.author
         if not self.is_id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
-            self.author_str += ' (?)'
+            self.author_str += UNCERTAIN_SUFFIX
         if self.is_link():
             self.text = self.text.replace('\n', '').replace(' ', '_')
@@ -59,12 +56,11 @@ class TextMessage:
         return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
     def timestamp_txt(self) -> Text:
-        timestamp_str = self.timestamp_str
         try:
             timestamp_str = iso_timestamp(self.parse_timestamp())
         except Exception as e:
             logger.warning(f"Failed to parse timestamp for {self}")
+            timestamp_str = self.timestamp_str
         return Text(f"[{timestamp_str}]", style=TIMESTAMP_DIM)

epstein_files/documents/messenger_log.py CHANGED Viewed

@@ -10,11 +10,11 @@ from rich.text import Text
 from epstein_files.documents.communication import Communication
 from epstein_files.documents.imessage.text_message import TextMessage
-from epstein_files.util.constant.names import JEFFREY_EPSTEIN, UNKNOWN
+from epstein_files.util.constant.names import JEFFREY_EPSTEIN, Name
 from epstein_files.util.constant.strings import AUTHOR, TIMESTAMP_STYLE
 from epstein_files.util.data import days_between, days_between_str, iso_timestamp, sort_dict
 from epstein_files.util.doc_cfg import Metadata, TextCfg
-from epstein_files.util.highlighted_group import get_style_for_name
+from epstein_files.util.highlighted_group import styled_name
 from epstein_files.util.logging import logger
 from epstein_files.util.rich import LAST_TIMESTAMP_STYLE, build_table, highlighter
@@ -35,7 +35,7 @@ class MessengerLog(Communication):
         super().__post_init__()
         self.messages = [self._build_message(match) for match in MSG_REGEX.finditer(self.text)]
-    def first_message_at(self, name: str | None) -> datetime:
+    def first_message_at(self, name: Name) -> datetime:
         return self.messages_by(name)[0].parse_timestamp()
     def info_txt(self) -> Text | None:
@@ -54,10 +54,10 @@ class MessengerLog(Communication):
         return txt.append(')')
-    def last_message_at(self, name: str | None) -> datetime:
+    def last_message_at(self, name: Name) -> datetime:
         return self.messages_by(name)[-1].parse_timestamp()
-    def messages_by(self, name: str | None) -> list[TextMessage]:
+    def messages_by(self, name: Name) -> list[TextMessage]:
         """Return all messages by 'name'."""
         return [m for m in self.messages if m.author == name]
@@ -129,9 +129,9 @@ class MessengerLog(Communication):
             yield message
     @classmethod
-    def count_authors(cls, imessage_logs: list['MessengerLog']) -> dict[str | None, int]:
+    def count_authors(cls, imessage_logs: list['MessengerLog']) -> dict[Name, int]:
         """Count up how many texts were sent by each author."""
-        sender_counts: dict[str | None, int] = defaultdict(int)
+        sender_counts: dict[Name, int] = defaultdict(int)
         for message_log in imessage_logs:
             for message in message_log.messages:
@@ -160,7 +160,7 @@ class MessengerLog(Communication):
             last_at = logs[-1].first_message_at(name)
             counts_table.add_row(
-                Text(name or UNKNOWN, get_style_for_name(name)),
+                styled_name(name),
                 str(len(logs)),
                 f"{count:,}",
                 iso_timestamp(first_at),

epstein-files 1.1.3__py3-none-any.whl → 1.2.0__py3-none-any.whl

epstein-files 1.1.3py3-none-any.whl → 1.2.0py3-none-any.whl