PyPI - epstein-files - Versions diffs - 1.2.5__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

epstein-files 1.2.5py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

epstein_files/__init__.py +32 -13
epstein_files/documents/document.py +8 -1
epstein_files/documents/email.py +179 -97
epstein_files/documents/emails/email_header.py +17 -8
epstein_files/documents/other_file.py +8 -6
epstein_files/epstein_files.py +16 -1
epstein_files/person.py +40 -15
epstein_files/util/constant/names.py +10 -6
epstein_files/util/constant/strings.py +2 -1
epstein_files/util/constants.py +463 -225
epstein_files/util/doc_cfg.py +33 -27
epstein_files/util/env.py +10 -3
epstein_files/util/file_helper.py +2 -0
epstein_files/util/highlighted_group.py +66 -23
epstein_files/util/output.py +17 -31
epstein_files/util/rich.py +2 -1
epstein_files/util/word_count.py +1 -1
{epstein_files-1.2.5.dist-info → epstein_files-1.4.1.dist-info}/METADATA +3 -3
epstein_files-1.4.1.dist-info/RECORD +34 -0
{epstein_files-1.2.5.dist-info → epstein_files-1.4.1.dist-info}/entry_points.txt +1 -1
epstein_files-1.2.5.dist-info/RECORD +0 -34
{epstein_files-1.2.5.dist-info → epstein_files-1.4.1.dist-info}/LICENSE +0 -0
{epstein_files-1.2.5.dist-info → epstein_files-1.4.1.dist-info}/WHEEL +0 -0

epstein_files/documents/other_file.py CHANGED Viewed

@@ -122,8 +122,8 @@ class OtherFile(Document):
             return Text(escape(self.preview_text()))
-    def is_interesting(self):
-        """False for lame prefixes, duplicates, and other boring files."""
+    def is_interesting(self) -> bool:
+        """Overloaded. False for lame prefixes, duplicates, and other boring files."""
         info_sentences = self.info()
         if self.is_duplicate():
@@ -164,8 +164,8 @@ class OtherFile(Document):
     def _extract_timestamp(self) -> datetime | None:
         """Return configured timestamp or value extracted by scanning text with datefinder."""
-        if self.config and self.config.timestamp:
-            return self.config.timestamp
+        if self.config and self.config.timestamp():
+            return self.config.timestamp()
         elif self.config and any([s in (self.config_description() or '') for s in SKIP_TIMESTAMP_EXTRACT]):
             return None
@@ -210,9 +210,10 @@ class OtherFile(Document):
             self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
     @classmethod
-    def files_preview_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
+    def files_preview_table(cls, files: Sequence['OtherFile'], title_pfx: str = '', title: str = '') -> Table:
         """Build a table of OtherFile documents."""
-        table = build_table(f'{title_pfx}Other Files Details in Chronological Order', show_lines=True)
+        title = title or f'{title_pfx}Other Files Details in Chronological Order'
+        table = build_table(title, show_lines=True, title_justify='left' if title else 'center')
         table.add_column('File', justify='center', width=FILENAME_LENGTH)
         table.add_column('Date', justify='center')
         table.add_column('Size', justify='right', style='dim')
@@ -244,6 +245,7 @@ class OtherFile(Document):
     @classmethod
     def summary_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
+        """Table showing file count by category."""
         categories = uniquify([f.category() for f in files])
         categories = sorted(categories, key=lambda c: -len([f for f in files if f.category() == c]))
         table = cls.file_info_table(f'{title_pfx}Other Files Summary', 'Category')

epstein_files/epstein_files.py CHANGED Viewed

@@ -84,6 +84,7 @@ class EpsteinFiles:
         self.json_files = [doc for doc in self.other_files if isinstance(doc, JsonFile)]
         self._set_uninteresting_ccs()
         self._copy_duplicate_email_properties()
+        self._find_email_attachments_and_set_is_first_for_user()
     @classmethod
     def get_files(cls, timer: Timer | None = None) -> 'EpsteinFiles':
@@ -123,6 +124,9 @@ class EpsteinFiles:
             lines = doc.matching_lines(pattern)
+            if args.min_line_length:
+                lines = [line for line in lines if len(line.line) > args.min_line_length]
             if len(lines) > 0:
                 results.append(SearchResult(doc, lines))
@@ -251,7 +255,7 @@ class EpsteinFiles:
                 name=name,
                 emails=self.emails_for(name),
                 imessage_logs=self.imessage_logs_for(name),
-                is_uninteresting_cc=name in self.uninteresting_emailers(),
+                is_uninteresting=name in self.uninteresting_emailers(),
                 other_files=[f for f in self.other_files if name and name == f.author]
             )
             for name in names
@@ -276,6 +280,17 @@ class EpsteinFiles:
         return self._uninteresting_emailers
+    def _find_email_attachments_and_set_is_first_for_user(self) -> None:
+        for file in self.other_files:
+            if file.config and file.config.attached_to_email_id:
+                email = self.email_for_id(file.config.attached_to_email_id)
+                file.warn(f"Attaching to {email}")
+                email.attached_docs.append(file)
+        for emailer in self.emailers():
+            first_email = emailer.emails[0]
+            first_email._is_first_for_user = True
     def _copy_duplicate_email_properties(self) -> None:
         """Ensure dupe emails have the properties of the emails they duplicate to capture any repairs, config etc."""
         for email in self.emails:

epstein_files/person.py CHANGED Viewed

@@ -9,13 +9,13 @@ from rich.table import Table
 from rich.text import Text
 from epstein_files.documents.document import Document
-from epstein_files.documents.email import MAILING_LISTS, JUNK_EMAILERS, Email
+from epstein_files.documents.email import TRUNCATE_EMAILS_FROM, MAILING_LISTS, JUNK_EMAILERS, Email
 from epstein_files.documents.messenger_log import MessengerLog
 from epstein_files.documents.other_file import OtherFile
 from epstein_files.util.constant.strings import *
 from epstein_files.util.constant.urls import *
 from epstein_files.util.constants import *
-from epstein_files.util.data import days_between, flatten, without_falsey
+from epstein_files.util.data import days_between, flatten, uniquify, without_falsey
 from epstein_files.util.env import args
 from epstein_files.util.highlighted_group import (QUESTION_MARKS_TXT, HighlightedNames,
      get_highlight_group_for_name, get_style_for_name, styled_category, styled_name)
@@ -42,7 +42,7 @@ class Person:
     emails: list[Email] = field(default_factory=list)
     imessage_logs: list[MessengerLog] = field(default_factory=list)
     other_files: list[OtherFile] = field(default_factory=list)
-    is_uninteresting_cc: bool = False
+    is_uninteresting: bool = False
     def __post_init__(self):
         self.emails = Document.sort_by_timestamp(self.emails)
@@ -62,7 +62,7 @@ class Person:
             return None
         elif self.category():
             return styled_category(self.category())
-        elif self.is_a_mystery() or self.is_uninteresting_cc:
+        elif self.is_a_mystery() or self.is_uninteresting:
             return QUESTION_MARKS_TXT
     def email_conversation_length_in_days(self) -> int:
@@ -141,7 +141,7 @@ class Person:
             if info:
                 return info
-        if self.is_uninteresting_cc:
+        if self.is_uninteresting and len(self.emails_by()) == 0:
             if self.has_any_epstein_emails():
                 return UNINTERESTING_CC_INFO
             else:
@@ -157,8 +157,10 @@ class Person:
             return Text('(emails whose author or recipient could not be determined)', style=ALT_INFO_STYLE)
         elif self.category() == JUNK:
             return Text(f"({JUNK} mail)", style='bright_black dim')
-        elif self.is_uninteresting_cc and (self.info_str() or '').startswith(UNINTERESTING_CC_INFO):
-            if self.info_str() == UNINTERESTING_CC_INFO:
+        elif self.is_uninteresting and (self.info_str() or '').startswith(UNINTERESTING_CC_INFO):
+            if self.sole_cc():
+                return Text(f"(cc: from {self.sole_cc()} only)", style='wheat4 dim')
+            elif self.info_str() == UNINTERESTING_CC_INFO:
                 return Text(f"({self.info_str()})", style='wheat4 dim')
             else:
                 return Text(f"({self.info_str()})", style='plum4 dim')
@@ -172,7 +174,7 @@ class Person:
             else:
                 return None
         else:
-            return Text(self.info_str(), style=self.style())
+            return Text(self.info_str(), style=self.style(allow_bold=False))
     def internal_link(self) -> Text:
         """Kind of like an anchor link to the section of the page containing these emails."""
@@ -180,7 +182,22 @@ class Person:
     def is_a_mystery(self) -> bool:
         """Return True if this is someone we theroetically could know more about."""
-        return self.is_unstyled() and not (self.is_email_address() or self.info_str() or self.is_uninteresting_cc)
+        return self.is_unstyled() and not (self.is_email_address() or self.info_str() or self.is_uninteresting)
+    def sole_cc(self) -> str | None:
+        """Return name if this person sent 0 emails and received CC from only one that name."""
+        email_authors = uniquify([e.author for e in self.emails_to()])
+        if len(self.unique_emails()) == 1 and len(email_authors) > 0:
+            logger.info(f"sole author of email to '{self.name}' is '{email_authors[0]}'")
+        else:
+            logger.info(f"'{self.name}' email_authors '{email_authors[0]}'")
+        if len(self.unique_emails_by()) > 0:
+            return None
+        if len(email_authors) == 1:
+            return email_authors[0]
     def is_email_address(self) -> bool:
         return '@' in (self.name or '')
@@ -196,6 +213,10 @@ class Person:
         return True
+    def should_always_truncate(self) -> bool:
+        """True if we want to truncate all emails to/from this user."""
+        return self.name in TRUNCATE_EMAILS_FROM or self.is_uninteresting
     def is_unstyled(self) -> bool:
         """True if there's no highlight group for this name."""
         return self.style() == DEFAULT_NAME_STYLE
@@ -259,8 +280,8 @@ class Person:
         else:
             return counts + [self.name_str()]
-    def style(self) -> str:
-        return get_style_for_name(self.name)
+    def style(self, allow_bold: bool = True) -> str:
+        return get_style_for_name(self.name, allow_bold=allow_bold)
     def unique_emails(self) -> Sequence[Email]:
         return Document.without_dupes(self.emails)
@@ -290,7 +311,7 @@ class Person:
         highlighted = highlighted or people
         highlighted_names = [p.name for p in highlighted]
         is_selection = len(people) != len(highlighted) or args.emailers_info
-        all_emails = Document.uniquify(flatten([list(p.unique_emails()) for p in people]))
+        all_emails = Person.emails_from_people(people)
         email_authors = [p for p in people if p.emails_by() and p.name]
         attributed_emails = [email for email in all_emails if email.author]
         footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}" \
@@ -331,14 +352,18 @@ class Person:
             table.add_row(
                 Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[0 if is_selection else grey_idx]}"),
-                person.internal_link() if is_on_page and not person.is_uninteresting_cc else person.name_txt(),
+                person.internal_link() if is_on_page and not person.is_uninteresting else person.name_txt(),
                 person.category_txt(),
                 f"{len(person.unique_emails() if show_epstein_total else person._unique_printable_emails())}",
-                Text(f"{len(person.unique_emails_by())}", style='dim' if len(person.unique_emails_by()) == 0 else ''),
-                Text(f"{len(person.unique_emails_to())}", style='dim' if len(person.unique_emails_to()) == 0 else ''),
+                str(len(person.unique_emails_by())) if len(person.unique_emails_by()) > 0 else '',
+                str(len(person.unique_emails_to())) if len(person.unique_emails_to()) > 0 else '',
                 f"{person.email_conversation_length_in_days()}",
                 person.info_txt() or '',
                 style='' if show_epstein_total or is_on_page else 'dim',
             )
         return table
+    @staticmethod
+    def emails_from_people(people: list['Person']) -> Sequence[Email]:
+        return Document.uniquify(flatten([list(p.unique_emails()) for p in people]))

epstein_files/util/constant/names.py CHANGED Viewed

@@ -218,23 +218,23 @@ UBS = 'UBS'
 NAMES_TO_NOT_HIGHLIGHT = """
     al alain alan alfredo allen alex alexander amanda andres andrew anthony
     bard barrett barry bennet bernard bill black bob boris brad brenner bruce
-    caroline carolyn chris christina cohen
+    cameron caroline carolyn chris christina cohen
     dan daniel danny darren dave david debbie donald
     ed edward edwards enforcement enterprise enterprises entourage epstein eric erika etienne
     faith fisher forget fred friendly frost fuller
     gates gerald george gold gordon
-    haddad harry hay heather henry hill hoffman howard
+    haddad hanson harry hay heather henry hill hoffman howard
     ian ivan
     jack james jay jean jeff jeffrey jennifer jeremy jessica joel john jon jonathan joseph jr
-    kahn karl kate katherine kelly ken kevin krassner
-    larry laurie lawrence leon lesley linda link lisa
+    kafka kahn karl kate katherine kelly ken kevin krassner
+    larry larsen laurie lawrence leon lesley linda link lisa
     mann marc marie mark martin matthew melanie michael mike miller mitchell miles morris moskowitz
     nancy neal new nicole norman
     owen
     paul paula pen peter philip prince
     randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubenstein rubin
-    scott sean skip stanley stern stephen steve steven stone susan
-    the thomas tim tom tony tyler
+    scott sean skip smith stanley stern stephen steve steven stone susan
+    terry the thomas tim tom tony tyler
     victor
     wade waters
     y
@@ -269,6 +269,10 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
 def constantize_name(name: str) -> str:
     if name == 'Andrzej Duda or entourage':
         return 'ANDRZEJ_DUDA'
+    elif name == MIROSLAV_LAJCAK:
+        return 'MIROSLAV_LAJCAK'
+    elif name == 'Paula Heil Fisher (???)':
+        return 'PAULA'
     variable_name = remove_question_marks(name)
     variable_name = variable_name.removesuffix('.').removesuffix('Jr').replace('ź', 'z').replace('ø', 'o').strip()

epstein_files/util/constant/strings.py CHANGED Viewed

@@ -80,6 +80,7 @@ OTHER_FILE_CLASS = 'OtherFile'
 remove_question_marks = lambda name: QUESTION_MARKS_REGEX.sub('', name).strip()
-def indented(s: str, spaces: int = 4) -> str:
+def indented(s: str, spaces: int = 4, prefix: str = '') -> str:
     indent = ' ' * spaces
+    indent += prefix
     return indent + f"\n{indent}".join(s.split('\n'))

epstein-files 1.2.5__py3-none-any.whl → 1.4.1__py3-none-any.whl

epstein-files 1.2.5py3-none-any.whl → 1.4.1py3-none-any.whl