PyPI - epstein-files - Versions diffs - 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl - Mend

epstein-files 1.1.0py3-none-any.whl → 1.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

epstein_files/__init__.py +10 -14
epstein_files/documents/communication.py +10 -14
epstein_files/documents/document.py +1 -1
epstein_files/documents/email.py +152 -66
epstein_files/documents/imessage/text_message.py +42 -25
epstein_files/documents/messenger_log.py +31 -12
epstein_files/documents/other_file.py +13 -12
epstein_files/epstein_files.py +18 -79
epstein_files/util/constant/common_words.py +3 -3
epstein_files/util/constant/html.py +4 -5
epstein_files/util/constant/names.py +9 -6
epstein_files/util/constant/strings.py +6 -2
epstein_files/util/constant/urls.py +1 -1
epstein_files/util/constants.py +18 -22
epstein_files/util/env.py +45 -36
epstein_files/util/file_helper.py +1 -2
epstein_files/util/highlighted_group.py +1005 -187
epstein_files/util/logging.py +8 -1
epstein_files/util/output.py +147 -60
epstein_files/util/rich.py +33 -67
epstein_files/util/timer.py +1 -1
epstein_files/util/word_count.py +3 -4
{epstein_files-1.1.0.dist-info → epstein_files-1.1.2.dist-info}/METADATA +1 -1
epstein_files-1.1.2.dist-info/RECORD +33 -0
epstein_files-1.1.0.dist-info/RECORD +0 -33
{epstein_files-1.1.0.dist-info → epstein_files-1.1.2.dist-info}/LICENSE +0 -0
{epstein_files-1.1.0.dist-info → epstein_files-1.1.2.dist-info}/WHEEL +0 -0
{epstein_files-1.1.0.dist-info → epstein_files-1.1.2.dist-info}/entry_points.txt +0 -0

epstein_files/__init__.py CHANGED Viewed

@@ -19,10 +19,10 @@ from epstein_files.documents.email import Email
 from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, CHRONOLOGICAL_EMAILS_PATH, TEXT_MSGS_HTML_PATH, make_clean
 from epstein_files.util.env import args
 from epstein_files.util.file_helper import coerce_file_path, extract_file_id
-from epstein_files.util.logging import logger
+from epstein_files.util.logging import exit_with_error, logger
 from epstein_files.util.output import (print_emails_section, print_json_files, print_json_stats,
      print_other_files_section, print_text_messages_section, write_complete_emails_timeline, write_json_metadata, write_urls)
-from epstein_files.util.rich import build_highlighter, console, print_title_page_header, print_title_page_tables, print_panel, write_html
+from epstein_files.util.rich import build_highlighter, console, print_color_key, print_title_page_header, print_title_page_tables, print_subtitle_panel, write_html
 from epstein_files.util.timer import Timer
 from epstein_files.util.word_count import write_word_counts_html
@@ -45,15 +45,18 @@ def generate_html() -> None:
     print_title_page_header(epstein_files)
-    if not args.email_timeline:
+    if args.email_timeline:
+        print_color_key()
+    else:
         print_title_page_tables(epstein_files)
     if args.colors_only:
         exit()
     if args.output_texts:
-        print_text_messages_section(epstein_files)
-        timer.print_at_checkpoint(f'Printed {len(epstein_files.imessage_logs)} text message logs')
+        imessage_logs = [log for log in epstein_files.imessage_logs if not args.names or log.author in args.names]
+        print_text_messages_section(imessage_logs)
+        timer.print_at_checkpoint(f'Printed {len(imessage_logs)} text message log files')
     if args.output_emails:
         emails_that_were_printed = print_emails_section(epstein_files)
@@ -101,15 +104,12 @@ def epstein_search():
         temp_highlighter = build_highlighter(search_term)
         search_results = epstein_files.docs_matching(search_term, args.names)
         console.line(2)
-        print_panel(f"Found {len(search_results)} documents matching '{search_term}'", padding=(0, 0, 0, 3))
+        print_subtitle_panel(f"Found {len(search_results)} documents matching '{search_term}'", padding=(0, 0, 0, 3))
         for search_result in search_results:
             console.line()
             if args.whole_file:
-                if isinstance(search_result.document, Email):
-                    search_result.document._truncation_allowed = False
                 console.print(search_result.document)
             else:
                 console.print(search_result.document.summary_panel())
@@ -128,9 +128,6 @@ def epstein_show():
     console.line()
     for doc in docs:
-        if isinstance(doc, Email):
-            doc._truncation_allowed = False
         console.print('\n', doc, '\n')
         if args.raw:
@@ -148,5 +145,4 @@ def epstein_word_count() -> None:
 def _assert_positional_args():
     if not args.positional_args:
-        console.print(f"\n  ERROR: No positional args!\n", style='red1')
-        exit(1)
+        exit_with_error(f"No positional args provided!\n")

epstein_files/documents/communication.py CHANGED Viewed

@@ -18,25 +18,24 @@ TIMESTAMP_SECONDS_REGEX = re.compile(r":\d{2}$")
 @dataclass
 class Communication(Document):
     """Superclass for Email and MessengerLog."""
-    author_style: str = 'white'
-    author_txt: Text = field(init=False)
     config: CommunicationCfg | None = None
     timestamp: datetime = FALLBACK_TIMESTAMP  # TODO this default sucks (though it never happens)
-    def __post_init__(self):
-        super().__post_init__()
-        self.author_style = get_style_for_name(self.author_or_unknown())
-        self.author_txt = Text(self.author_or_unknown(), style=self.author_style)
     def author_or_unknown(self) -> str:
         return self.author or UNKNOWN
-    def is_attribution_uncertain(self) -> bool:
-        return bool(self.config and self.config.is_attribution_uncertain)
+    def author_style(self) -> str:
+        return get_style_for_name(self.author_or_unknown())
+    def author_txt(self) -> Text:
+        return Text(self.author_or_unknown(), style=self.author_style())
     def external_links_txt(self, _style: str = '', include_alt_links: bool = True) -> Text:
         """Overrides super() method to apply self.author_style."""
-        return super().external_links_txt(self.author_style, include_alt_links=include_alt_links)
+        return super().external_links_txt(self.author_style(), include_alt_links=include_alt_links)
+    def is_attribution_uncertain(self) -> bool:
+        return bool(self.config and self.config.is_attribution_uncertain)
     def summary(self) -> Text:
         return self._summary().append(CLOSE_PROPERTIES_CHAR)
@@ -47,7 +46,4 @@ class Communication(Document):
     def _summary(self) -> Text:
         """One line summary mostly for logging."""
         txt = super().summary().append(', ')
-        return txt.append(key_value_txt('author', Text(f"'{self.author_or_unknown()}'", style=self.author_style)))
-CommunicationType = TypeVar('CommunicationType', bound=Document)
+        return txt.append(key_value_txt('author', Text(f"'{self.author_or_unknown()}'", style=self.author_style())))

epstein_files/documents/document.py CHANGED Viewed

@@ -251,7 +251,7 @@ class Document:
     def summary(self) -> Text:
         """Summary of this file for logging. Brackets are left open for subclasses to add stuff."""
         txt = Text('').append(self._class_name(), style=self._class_style())
-        txt.append(f" {self.url_slug}", style=FILENAME_STYLE)
+        txt.append(f" {self.file_path.stem}", style=FILENAME_STYLE)
         if self.timestamp:
             timestamp_str = remove_zero_time_from_timestamp_str(self.timestamp).replace('T', ' ')

epstein_files/documents/email.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import json
 import logging
 import re
 from copy import deepcopy
@@ -20,7 +21,7 @@ from epstein_files.util.constant.names import *
 from epstein_files.util.constant.strings import REDACTED
 from epstein_files.util.constants import *
 from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes, extract_last_name,
-     flatten, remove_timezone, uniquify)
+     flatten, listify, remove_timezone, uniquify)
 from epstein_files.util.doc_cfg import EmailCfg, Metadata
 from epstein_files.util.file_helper import extract_file_id, file_stem_for_id
 from epstein_files.util.highlighted_group import get_style_for_name
@@ -42,7 +43,7 @@ LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
 SUPPRESS_LOGS_FOR_AUTHORS = ['Undisclosed recipients:', 'undisclosed-recipients:', 'Multiple Senders Multiple Senders']
 REWRITTEN_HEADER_MSG = "(janky OCR header fields were prettified, check source if something seems off)"
 URL_SIGNIFIERS = ['gclid', 'htm', 'ref=', 'utm']
-APPEARS_IN = 'Appears in'
+APPEARS_IN = 'appears in'
 MAX_CHARS_TO_PRINT = 4000
 MAX_NUM_HEADER_LINES = 14
 MAX_QUOTED_REPLIES = 2
@@ -152,6 +153,8 @@ TRUNCATION_LENGTHS = {
     '030245': 7_500,   # Epstein rationalizes his behavior in an open letter to the world
     '030781': 1_700,   # Bannon email about crypto coin issues
     '032906': 750,     # David Blaine email
+    '026036': 6000,    # Gino Yu blockchain mention
+    '023208': 350_000, # Long discussion about leon black's finances
 }
 # These are long forwarded articles so we force a trim to 1,333 chars if these strings exist
@@ -276,6 +279,7 @@ USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIP
     'Nancy Portland',                        # Lawrence Krauss CC
     'Oliver Goodenough',                     # Robert Trivers CC
     'Peter Aldhous',                         # Lawrence Krauss CC
+    'Players2',                              # Hoffenberg CC
     'Sam Harris',                            # Lawrence Krauss CC
     SAMUEL_LEFF,                             # Random CC
     'Sean T Lehane',                         # Random CC
@@ -283,6 +287,13 @@ USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIP
     'Tim Kane',                              # Random CC
     'Travis Pangburn',                       # Random CC
     'Vahe Stepanian',                        # Random CC
+    # Ross Gow BCC
+    'david.brown@thetimes.co.uk',
+    'io-anne.pugh@bbc.co.uk',
+    'martin.robinson@mailonline.co.uk',
+    'nick.alwav@bbc.co.uk'
+    'nick.sommerlad@mirror.co.uk',
+    'p.peachev@independent.co.uk',
 ]
 # Emails sent by epstein to himself that are just notes
@@ -300,6 +311,50 @@ METADATA_FIELDS = [
     'subject',
 ]
+LINE_REPAIR_MERGES = {
+    '017523': 4,
+    '019407': [2, 4],
+    '021729': 2,
+    '022673': 9,
+    '022684': 9,
+    '022695': 4,
+    '023067': 3,
+    '025790': 2,
+    '026609': 4,
+    '026924': [2, 4],
+    '028931': [3, 6],
+    '029154': [2, 5],
+    '029163': [2, 5],
+    '029282': 2,
+    '029402': 5,
+    '029498': 2,
+    '029501': 2,
+    '029835': [2, 4],
+    '029889': 2,
+    '029976': 3,
+    '030299': [7, 10],
+    '030381': [2, 4],
+    '030384': [2, 4],
+    '030626': 2,
+    '030999': [2, 4],
+    '031384': 2,
+    '031428': 2,
+    '031442': 0,
+    '031980': [2, 4],
+    '032063': [3, 5],
+    '032272': 3,
+    '032405': 4,
+    '033097': 2,
+    '033144': [2, 4],
+    '033228': [3, 5],
+    '033357': [2, 4],
+    '033486': [7, 9],
+    '033512': 2,
+    '033575': [2, 4],
+    '033576': 3,
+    '033583': 2,
+}
 @dataclass
 class Email(Communication):
@@ -318,7 +373,6 @@ class Email(Communication):
     recipients: list[str | None] = field(default_factory=list)
     sent_from_device: str | None = None
     signature_substitution_counts: dict[str, int] = field(default_factory=dict)  # defaultdict breaks asdict :(
-    _truncation_allowed: bool = True  # Hacky way to get __rich_console__() not to truncate in epstein_show script
     # For logging how many headers we prettified while printing, kind of janky
     rewritten_header_ids: ClassVar[set[str]] = set([])
@@ -342,7 +396,7 @@ class Email(Communication):
                 self.recipients = self.config.recipients
             else:
                 for recipient in self.header.recipients():
-                    self.recipients.extend(self._emailer_names(recipient))
+                    self.recipients.extend(self._extract_emailer_names(recipient))
                 if self.author in MAILING_LISTS and (len(self.recipients) == 0 or self.recipients == [self.author]):
                     self.recipients = [JEFFREY_EPSTEIN]   # Assume mailing list emails are to Epstein
@@ -365,7 +419,7 @@ class Email(Communication):
     def info_txt(self) -> Text:
         email_type = 'fwded article' if self.is_fwded_article() else 'email'
-        txt = Text(f"OCR text of {email_type} from ", style='grey46').append(self.author_txt).append(' to ')
+        txt = Text(f"OCR text of {email_type} from ", style='grey46').append(self.author_txt()).append(' to ')
         return txt.append(self.recipients_txt()).append(highlighter(f" probably sent at {self.timestamp}"))
     def is_fwded_article(self) -> bool:
@@ -446,15 +500,23 @@ class Email(Communication):
         """Color emails from epstein to others with the color for the first recipient."""
         if self.author == JEFFREY_EPSTEIN:
             if len(self.recipients) == 0 or self.recipients == [None]:
-                style = self.author_style
+                style = self.author_style()
             else:
                 style = get_style_for_name(self.recipients[0])
         else:
-            style = self.author_style
+            style = self.author_style()
         return style.replace('bold', '').strip()
-    def _emailer_names(self, emailer_str: str) -> list[str]:
+    def _extract_author(self) -> None:
+        self._extract_header()
+        super()._extract_author()
+        if not self.author and self.header.author:
+            authors = self._extract_emailer_names(self.header.author)
+            self.author = authors[0] if (len(authors) > 0 and authors[0]) else None
+    def _extract_emailer_names(self, emailer_str: str) -> list[str]:
         """Return a list of people's names found in 'emailer_str' (email author or recipients field)."""
         emailer_str = EmailHeader.cleanup_str(emailer_str)
@@ -474,14 +536,6 @@ class Email(Communication):
         names_found = names_found or [emailer_str]
         return [_reverse_first_and_last_names(name) for name in names_found]
-    def _extract_author(self) -> None:
-        self._extract_header()
-        super()._extract_author()
-        if not self.author and self.header.author:
-            authors = self._emailer_names(self.header.author)
-            self.author = authors[0] if (len(authors) > 0 and authors[0]) else None
     def _extract_header(self) -> None:
         """Extract an EmailHeader object from the OCR text."""
         header_match = EMAIL_SIMPLE_HEADER_REGEX.search(self.text)
@@ -579,44 +633,47 @@ class Email(Communication):
         self._set_computed_fields(lines=[line for line in self.lines if not BAD_LINE_REGEX.match(line)])
         old_text = self.text
-        if self.file_id in ['031442']:
-            self._merge_lines(0)  # Merge 1st and 2nd rows
-        elif self.file_id in '021729 025790 029282 029501 029889 030626 031384 031428 033097 033512 033583 029498 033583'.split():
-            self._merge_lines(2)  # Merge 3rd and 4th rows
+        if self.file_id in LINE_REPAIR_MERGES:
+            merge = LINE_REPAIR_MERGES[self.file_id]
+            merge_args = merge if isinstance(merge, list) else [merge]
+            self._merge_lines(*merge_args)
-            if self.file_id in ['030626']:  # Merge 6th and 7th (now 5th and 6th) rows
-                self._merge_lines(4)
-            elif self.file_id == '029889':
-                self._merge_lines(2, 5)
-            elif self.file_id in ['029498', '031428']:
-                self._merge_lines(2, 4)
-        elif self.file_id in ['029976', '023067', '033576']:
-            self._merge_lines(3)  # Merge 4th and 5th rows
-        elif self.file_id in '026609 029402 032405 022695'.split():
-            self._merge_lines(4)  # Merge 5th and 6th rows
-        elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381', '033357', '026924']:
-            self._merge_lines(2, 4)
-        elif self.file_id in ['029154', '029163']:
+        # These already had 2nd line merged
+        if self.file_id in ['030626']:  # Merge 6th and 7th (now 5th and 6th) rows
+            self._merge_lines(4)
+        elif self.file_id == '029889':
             self._merge_lines(2, 5)
-        elif self.file_id in ['033228', '032063']:
-            self._merge_lines(3, 5)
-        elif self.file_id == '028931':
-            self._merge_lines(3, 6)
-        elif self.file_id == '013415':
+        elif self.file_id in ['029498', '031428']:
+            self._merge_lines(2, 4)
+        # Multiline
+        if self.file_id == '013415':
+            for _i in range(2):
+                self._merge_lines(4)
+        elif self.file_id == '013405':
+            for _i in range(2):
+                self._merge_lines(4)
+        elif self.file_id == '029458':
+            for _i in range(3):
+                self._merge_lines(4)
+        elif self.file_id in ['025233']:
             for _i in range(2):
                 self._merge_lines(4)
+            self.lines[4] = f"Attachments: {self.lines[4]}"
+            self._set_computed_fields(lines=self.lines)
+        elif self.file_id in ['023001']:
+            for _i in range(3):
+                self._merge_lines(5)
+        elif self.file_id in ['019105']:
+            for _i in range(4):
+                self._merge_lines(5)
         elif self.file_id in ['033568']:
             for _i in range(5):
                 self._merge_lines(5)
         elif self.file_id in ['025329']:
             for _i in range(9):
                 self._merge_lines(2)
-        elif self.file_id == '033486':
-            self._merge_lines(7, 9)
-        elif self.file_id == '030299':
-            self._merge_lines(7, 10)
-        elif self.file_id in ['022673', '022684']:
-            self._merge_lines(9)
         elif self.file_id == '014860':
             self._merge_lines(3)
             self._merge_lines(4)
@@ -629,7 +686,15 @@ class Email(Communication):
             self._merge_lines(4)
             self._merge_lines(2, 4)
-        elif self.file_id == '025041':
+        elif self.file_id in ['033252']:
+            for _i in range(2):
+                self._merge_lines(9)
+        elif self.file_id in ['032637']:
+            for _i in range(3):
+                self._merge_lines(9)
+        # Bad line removal
+        if self.file_id == '025041':
             self._remove_line(4)
             self._remove_line(4)
         elif self.file_id == '029692':
@@ -679,7 +744,7 @@ class Email(Communication):
         """Copy info from original config for file this document was extracted from."""
         if self.file_id in ALL_FILE_CONFIGS:
             self.config = cast(EmailCfg, deepcopy(ALL_FILE_CONFIGS[self.file_id]))
-            self.warn(f"Merging existing config for {self.file_id} with config for file this document was extracted from")
+            self.warn(f"Merging existing cfg for '{self.file_id}' with cfg for extracted document...")
         else:
             self.config = EmailCfg(id=self.file_id)
@@ -692,33 +757,55 @@ class Email(Communication):
                 extracted_description += ' email'
             if self.config.description:
-                self.warn(f"Overwriting description '{self.config.description}' with extract description '{self.config.description}'")
+                self.warn(f"Overwriting description '{self.config.description}' with extract's '{self.config.description}'")
             self.config.description = extracted_description
         self.config.is_interesting = self.config.is_interesting or extracted_from_doc_cfg.is_interesting
         self.log(f"Constructed synthetic config: {self.config}")
-    def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
-        logger.debug(f"Printing '{self.filename}'...")
-        yield self.file_info_panel()
-        should_rewrite_header = self.header.was_initially_empty and self.header.num_header_rows > 0
+    def _truncate_to_length(self) -> int:
+        """When printing truncate this email to this length."""
         quote_cutoff = self._idx_of_nth_quoted_reply(text=self.text)  # Trim if there's many quoted replies
-        num_chars = MAX_CHARS_TO_PRINT
-        trim_footer_txt = None
-        text = self.text
+        includes_truncate_term = next((term for term in TRUNCATE_TERMS if term in self.text), None)
-        if self.file_id in TRUNCATION_LENGTHS:
+        if args.whole_file:
+            num_chars = len(self.text)
+        elif self.file_id in TRUNCATION_LENGTHS:
             num_chars = TRUNCATION_LENGTHS[self.file_id]
-        elif self.author in TRUNCATE_ALL_EMAILS_FROM or any((term in self.text) for term in TRUNCATE_TERMS):
+        elif self.author in TRUNCATE_ALL_EMAILS_FROM or includes_truncate_term:
             num_chars = int(MAX_CHARS_TO_PRINT / 3)
         elif quote_cutoff and quote_cutoff < MAX_CHARS_TO_PRINT:
             num_chars = quote_cutoff
+        else:
+            num_chars = MAX_CHARS_TO_PRINT
+        if num_chars != MAX_CHARS_TO_PRINT and not self.is_duplicate():
+            log_args = {
+                'num_chars': num_chars,
+                'author_truncate': self.author in TRUNCATE_ALL_EMAILS_FROM,
+                'is_fwded_article': self.is_fwded_article(),
+                'is_quote_cutoff': quote_cutoff == num_chars,
+                'includes_truncate_term': json.dumps(includes_truncate_term) if includes_truncate_term else None,
+                'quote_cutoff': quote_cutoff,
+            }
+            if quote_cutoff != num_chars:
+                logger.debug(f'{self.summary()} truncating: ' + ', '.join([f"{k}={v}" for k, v in log_args.items() if v]) + '\n')
+        return num_chars
+    def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
+        logger.debug(f"Printing '{self.filename}'...")
+        should_rewrite_header = self.header.was_initially_empty and self.header.num_header_rows > 0
+        num_chars = self._truncate_to_length()
+        trim_footer_txt = None
+        text = self.text
         # Truncate long emails but leave a note explaining what happened w/link to source document
-        if len(text) > num_chars and self._truncation_allowed:
+        if len(text) > num_chars:
             text = text[0:num_chars]
-            doc_link_markup = epstein_media_doc_link_markup(self.url_slug, self.author_style)
+            doc_link_markup = epstein_media_doc_link_markup(self.url_slug, self.author_style())
             trim_note = f"<...trimmed to {num_chars} characters of {self.length()}, read the rest at {doc_link_markup}...>"
             trim_footer_txt = Text.from_markup(wrap_in_markup_style(trim_note, 'dim'))
@@ -738,38 +825,37 @@ class Email(Communication):
             text = _add_line_breaks(text)  # This was skipped when _prettify_text() w/a broken header so we do it now
             self.rewritten_header_ids.add(self.file_id)
-        panel_txt = highlighter(text)
         email_txt_panel = Panel(
-            panel_txt.append('\n\n').append(trim_footer_txt) if trim_footer_txt else panel_txt,
+            highlighter(text).append('\n\n').append(trim_footer_txt) if trim_footer_txt else highlighter(text),
             border_style=self._border_style(),
             expand=False,
             subtitle=REWRITTEN_HEADER_MSG if should_rewrite_header else None,
         )
+        yield self.file_info_panel()
         yield Padding(email_txt_panel, (0, 0, 1, INFO_INDENT))
         if should_rewrite_header:
             self.log_top_lines(self.header.num_header_rows + 4, f'Original header:')
     @staticmethod
-    def build_table(emails: list['Email'], _author: str | None) -> Table:
-        """Turn a set of Email objects into a Table."""
+    def build_emails_table(emails: list['Email'], _author: str | None, include_title: bool = False) -> Table:
+        """Turn a set of Emails to/from a given _author into a Table."""
         author = _author or UNKNOWN
         table = Table(
-            title=f"Emails to/from {author} starting {emails[0].timestamp.date()}",
+            title=f"Emails to/from {author} starting {emails[0].timestamp.date()}" if include_title else None,
             border_style=get_style_for_name(author, allow_bold=False),
             header_style="bold"
         )
         table.add_column('From', justify='left')
         table.add_column('Timestamp', justify='center')
-        table.add_column('Subject', justify='left', style='honeydew2', min_width=60)
+        table.add_column('Subject', justify='left', style='honeydew2', min_width=70)
         for email in emails:
             table.add_row(
-                email.author_txt,
+                email.author_txt(),
                 email.epstein_media_link(link_txt=email.timestamp_without_seconds()),
                 highlighter(email.subject())
             )

epstein_files/documents/imessage/text_message.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import re
-from dataclasses import dataclass
+from dataclasses import dataclass, field, fields
 from datetime import datetime
 from rich.text import Text
 from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN
 from epstein_files.util.constant.strings import TIMESTAMP_DIM
-from epstein_files.util.data import extract_last_name
+from epstein_files.util.data import extract_last_name, iso_timestamp
 from epstein_files.util.highlighted_group import get_style_for_name
 from epstein_files.util.logging import logger
 from epstein_files.util.rich import TEXT_LINK, highlighter
@@ -30,7 +30,7 @@ class TextMessage:
     """Class representing a single iMessage text message."""
     author: str | None
     author_str: str = ''
-    id_confirmed: bool = False
+    is_id_confirmed: bool = False
     text: str
     timestamp_str: str
@@ -44,38 +44,55 @@ class TextMessage:
         else:
             self.author_str = self.author_str or self.author
-        if not self.id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
+        if not self.is_id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
             self.author_str += ' (?)'
-    def timestamp(self) -> datetime:
-        return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
+        if self.is_link():
+            self.text = self.text.replace('\n', '').replace(' ', '_')
+        else:
+            self.text = self.text.replace('\n', ' ')
-    def _message(self) -> Text:
-        lines = self.text.split('\n')
+    def is_link(self) -> bool:
+        return self.text.startswith('http')
-        # Fix multiline links
-        if self.text.startswith('http'):
-            text = self.text
+    def parse_timestamp(self) -> datetime:
+        return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
-            if len(lines) > 1 and not lines[0].endswith('html'):
-                if len(lines) > 2 and lines[1].endswith('-'):
-                    text = text.replace('\n', '', 2)
-                else:
-                    text = text.replace('\n', '', 1)
+    def timestamp_txt(self) -> Text:
+        timestamp_str = self.timestamp_str
-            lines = text.split('\n')
-            link_text = lines.pop()
-            msg_txt = Text('').append(Text.from_markup(f"[link={link_text}]{link_text}[/link]", style=TEXT_LINK))
+        try:
+            timestamp_str = iso_timestamp(self.parse_timestamp())
+        except Exception as e:
+            logger.warning(f"Failed to parse timestamp for {self}")
-            if len(lines) > 0:
-                msg_txt.append('\n' + ' '.join(lines))
-        else:
-            msg_txt = highlighter(' '.join(lines))  # remove newlines
+        return Text(f"[{timestamp_str}]", style=TIMESTAMP_DIM)
-        return msg_txt
+    def _message(self) -> Text:
+        if self.is_link():
+            return Text.from_markup(f"[link={self.text}]{self.text}[/link]", style=TEXT_LINK)
+        else:
+            return highlighter(self.text)
     def __rich__(self) -> Text:
-        timestamp_txt = Text(f"[{self.timestamp_str}]", style=TIMESTAMP_DIM).append(' ')
+        timestamp_txt = self.timestamp_txt().append(' ')
         author_style = get_style_for_name(self.author_str if self.author_str.startswith('+') else self.author)
         author_txt = Text(self.author_str, style=author_style)
         return Text('').append(timestamp_txt).append(author_txt).append(': ', style='dim').append(self._message())
+    def __repr__(self) -> str:
+        props = []
+        add_prop = lambda k, v: props.append(f"{k}={v}")
+        for _field in sorted(fields(self), key=lambda f: f.name):
+            key = _field.name
+            value = getattr(self, key)
+            if key == 'author_str' and self.author and self.author_str.startswith(value):
+                continue
+            elif isinstance(value, str):
+                add_prop(key, f'"{value}"')
+            else:
+                add_prop(key, value)
+        return f"{type(self).__name__}(" + ', '.join(props) + f')'

epstein-files 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

epstein-files 1.1.0py3-none-any.whl → 1.1.2py3-none-any.whl