PyPI - epstein-files - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.5__py3-none-any.whl - Mend

epstein-files 1.2.0py3-none-any.whl → 1.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

epstein_files/__init__.py +42 -30
epstein_files/documents/communication.py +0 -3
epstein_files/documents/document.py +66 -19
epstein_files/documents/email.py +203 -208
epstein_files/documents/emails/email_header.py +10 -2
epstein_files/documents/imessage/text_message.py +3 -2
epstein_files/documents/other_file.py +16 -34
epstein_files/epstein_files.py +24 -35
epstein_files/person.py +67 -73
epstein_files/util/constant/names.py +21 -12
epstein_files/util/constant/output_files.py +8 -5
epstein_files/util/constant/strings.py +2 -2
epstein_files/util/constant/urls.py +14 -2
epstein_files/util/constants.py +38 -12
epstein_files/util/data.py +2 -1
epstein_files/util/doc_cfg.py +3 -3
epstein_files/util/env.py +10 -7
epstein_files/util/highlighted_group.py +366 -202
epstein_files/util/logging.py +1 -1
epstein_files/util/output.py +54 -21
epstein_files/util/rich.py +21 -16
epstein_files/util/timer.py +14 -0
epstein_files/util/word_count.py +1 -1
{epstein_files-1.2.0.dist-info → epstein_files-1.2.5.dist-info}/METADATA +5 -2
epstein_files-1.2.5.dist-info/RECORD +34 -0
epstein_files-1.2.0.dist-info/RECORD +0 -34
{epstein_files-1.2.0.dist-info → epstein_files-1.2.5.dist-info}/LICENSE +0 -0
{epstein_files-1.2.0.dist-info → epstein_files-1.2.5.dist-info}/WHEEL +0 -0
{epstein_files-1.2.0.dist-info → epstein_files-1.2.5.dist-info}/entry_points.txt +0 -0

epstein_files/documents/imessage/text_message.py CHANGED Viewed

@@ -4,7 +4,7 @@ from datetime import datetime
 from rich.text import Text
-from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN, Name, extract_last_name
+from epstein_files.util.constant.names import ANTHONY_SCARAMUCCI, JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN, Name, extract_last_name
 from epstein_files.util.constant.strings import TIMESTAMP_DIM
 from epstein_files.util.data import iso_timestamp
 from epstein_files.util.highlighted_group import get_style_for_name
@@ -17,6 +17,7 @@ PHONE_NUMBER_REGEX = re.compile(r'^[\d+]+.*')
 UNCERTAIN_SUFFIX = ' (?)'
 DISPLAY_LAST_NAME_ONLY = [
+    ANTHONY_SCARAMUCCI,
     JEFFREY_EPSTEIN,
     STEVE_BANNON,
 ]
@@ -59,7 +60,7 @@ class TextMessage:
         try:
             timestamp_str = iso_timestamp(self.parse_timestamp())
         except Exception as e:
-            logger.warning(f"Failed to parse timestamp for {self}")
+            logger.info(f"Failed to parse timestamp for {self}")
             timestamp_str = self.timestamp_str
         return Text(f"[{timestamp_str}]", style=TIMESTAMP_DIM)

epstein_files/documents/other_file.py CHANGED Viewed

@@ -22,7 +22,7 @@ from epstein_files.util.data import days_between, escape_single_quotes, remove_t
 from epstein_files.util.file_helper import FILENAME_LENGTH, file_size_to_str
 from epstein_files.util.env import args
 from epstein_files.util.highlighted_group import QUESTION_MARKS_TXT, styled_category
-from epstein_files.util.rich import build_table, highlighter
+from epstein_files.util.rich import add_cols_to_table, build_table, highlighter
 from epstein_files.util.logging import logger
 FIRST_FEW_LINES = 'First Few Lines'
@@ -209,39 +209,8 @@ class OtherFile(Document):
         if num_days_spanned > MAX_DAYS_SPANNED_TO_BE_VALID and VAST_HOUSE not in self.text:
             self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
-    @staticmethod
-    def count_by_category_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
-        counts = defaultdict(int)
-        category_bytes = defaultdict(int)
-        for file in files:
-            if file.category() is None:
-                logger.warning(f"file {file.file_id} has no category")
-            counts[file.category()] += 1
-            category_bytes[file.category()] += file.file_size()
-        table = build_table(f'{title_pfx}Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
-        table.columns[-1].justify = 'right'
-        table.columns[0].min_width = 14
-        table.columns[-1].style = 'dim'
-        for (category, count) in sort_dict(counts):
-            category_files = [f for f in files if f.category() == category]
-            known_author_count = Document.known_author_count(category_files)
-            table.add_row(
-                styled_category(category),
-                str(count),
-                str(known_author_count),
-                str(count - known_author_count),
-                file_size_to_str(category_bytes[category]),
-            )
-        return table
-    @staticmethod
-    def files_preview_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
+    @classmethod
+    def files_preview_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
         """Build a table of OtherFile documents."""
         table = build_table(f'{title_pfx}Other Files Details in Chronological Order', show_lines=True)
         table.add_column('File', justify='center', width=FILENAME_LENGTH)
@@ -272,3 +241,16 @@ class OtherFile(Document):
             )
         return table
+    @classmethod
+    def summary_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
+        categories = uniquify([f.category() for f in files])
+        categories = sorted(categories, key=lambda c: -len([f for f in files if f.category() == c]))
+        table = cls.file_info_table(f'{title_pfx}Other Files Summary', 'Category')
+        for category in categories:
+            category_files = [f for f in files if f.category() == category]
+            table.add_row(styled_category(category), *cls.files_info_row(category_files))
+        table.columns = table.columns[:-2] + [table.columns[-1]]  # Removee unknown author col
+        return table

epstein_files/epstein_files.py CHANGED Viewed

@@ -9,6 +9,8 @@ from datetime import datetime
 from pathlib import Path
 from typing import Sequence, Type, cast
+from rich.table import Table
 from epstein_files.documents.document import Document
 from epstein_files.documents.email import DETECT_EMAIL_REGEX, Email
 from epstein_files.documents.json_file import JsonFile
@@ -22,7 +24,6 @@ from epstein_files.util.doc_cfg import EmailCfg, Metadata
 from epstein_files.util.env import DOCS_DIR, args, logger
 from epstein_files.util.file_helper import file_size_str
 from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames
-from epstein_files.util.rich import NA_TXT, add_cols_to_table, build_table, console, print_centered
 from epstein_files.util.search_result import SearchResult
 from epstein_files.util.timer import Timer
@@ -31,9 +32,13 @@ PICKLED_PATH = Path("the_epstein_files.pkl.gz")
 SLOW_FILE_SECONDS = 1.0
 EMAILS_WITH_UNINTERESTING_CCS = [
-    '025329',  # Krassner
-    '024923',  # Krassner
-    '033568',  # Krassner
+    '025329',    # Krassner
+    '024923',    # Krassner
+    '033568',    # Krassner
+]
+EMAILS_WITH_UNINTERESTING_BCCS = [
+    '014797_1',  # Ross Gow
 ]
@@ -45,7 +50,7 @@ class EpsteinFiles:
     json_files: list[JsonFile] = field(default_factory=list)
     other_files: list[OtherFile] = field(default_factory=list)
     timer: Timer = field(default_factory=lambda: Timer())
-    uninteresting_ccs: list[Name] = field(init=False)
+    uninteresting_ccs: list[Name] = field(default_factory=list)
     def __post_init__(self):
         """Iterate through files and build appropriate objects."""
@@ -88,13 +93,12 @@ class EpsteinFiles:
         if PICKLED_PATH.exists() and not args.overwrite_pickle and not args.skip_other_files:
             with gzip.open(PICKLED_PATH, 'rb') as file:
                 epstein_files = pickle.load(file)
-                epstein_files.timer = timer
                 timer_msg = f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}'"
-                epstein_files.timer.print_at_checkpoint(f"{timer_msg} ({file_size_str(PICKLED_PATH)})")
+                timer.print_at_checkpoint(f"{timer_msg} ({file_size_str(PICKLED_PATH)})")
                 return epstein_files
         logger.warning(f"Building new cache file, this will take a few minutes...")
-        epstein_files = EpsteinFiles(timer=timer)
+        epstein_files = EpsteinFiles()
         if args.skip_other_files:
             logger.warning(f"Not writing pickled data because --skip-other-files")
@@ -235,7 +239,7 @@ class EpsteinFiles:
         return json.dumps(metadata, indent=4, sort_keys=True)
     def non_duplicate_emails(self) -> list[Email]:
-        return [email for email in self.emails if not email.is_duplicate()]
+        return Document.without_dupes(self.emails)
     def non_json_other_files(self) -> list[OtherFile]:
         return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
@@ -253,34 +257,20 @@ class EpsteinFiles:
             for name in names
         ]
-    def print_files_summary(self) -> None:
-        table = build_table('File Overview')
-        add_cols_to_table(table, ['File Type', 'Count', 'Author Known', 'Author Unknown', 'Duplicates'])
-        table.columns[1].justify = 'right'
-        def add_row(label: str, docs: list):
-            known = None if isinstance(docs[0], JsonFile) else Document.known_author_count(docs)
-            table.add_row(
-                label,
-                f"{len(docs):,}",
-                f"{known:,}" if known is not None else NA_TXT,
-                f"{len(docs) - known:,}" if known is not None else NA_TXT,
-                f"{len([d for d in docs if d.is_duplicate()])}",
-            )
-        add_row('Emails', self.emails)
-        add_row('iMessage Logs', self.imessage_logs)
-        add_row('JSON Data', self.json_files)
-        add_row('Other', self.non_json_other_files())
-        print_centered(table)
-        console.line()
+    def overview_table(self) -> Table:
+        table = Document.file_info_table('Files Overview', 'File Type')
+        table.add_row('Emails', *Document.files_info_row(self.emails))
+        table.add_row('iMessage Logs', *Document.files_info_row(self.imessage_logs))
+        table.add_row('JSON Data', *Document.files_info_row(self.json_files, True))
+        table.add_row('Other', *Document.files_info_row(self.non_json_other_files()))
+        return table
     def unknown_recipient_ids(self) -> list[str]:
         """IDs of emails whose recipient is not known."""
         return sorted([e.file_id for e in self.emails if None in e.recipients or not e.recipients])
     def uninteresting_emailers(self) -> list[Name]:
+        """Emailers whom we don't want to print a separate section for because they're just CCed."""
         if '_uninteresting_emailers' not in vars(self):
             self._uninteresting_emailers = sorted(uniquify(UNINTERESTING_EMAILERS + self.uninteresting_ccs))
@@ -306,8 +296,8 @@ class EpsteinFiles:
         self.emails = Document.sort_by_timestamp(self.emails)
     def _set_uninteresting_ccs(self) -> None:
-        ross_gow_email = self.email_for_id('014797_1')
-        self.uninteresting_ccs = copy(cast(list[Name], ross_gow_email.header.bcc))
+        for id in EMAILS_WITH_UNINTERESTING_BCCS:
+            self.uninteresting_ccs += [bcc.lower() for bcc in cast(list[str], self.email_for_id(id).header.bcc)]
         for id in EMAILS_WITH_UNINTERESTING_CCS:
             self.uninteresting_ccs += self.email_for_id(id).recipients
@@ -344,5 +334,4 @@ def document_cls(doc: Document) -> Type[Document]:
 def _sorted_metadata(docs: Sequence[Document]) -> list[Metadata]:
-    docs_sorted_by_id = sorted(docs, key=lambda d: d.file_id)
-    return [json_safe(d.metadata()) for d in docs_sorted_by_id]
+    return [json_safe(d.metadata()) for d in Document.sort_by_id(docs)]

epstein_files/person.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from dataclasses import dataclass, field
 from datetime import datetime, date
+from typing import Sequence
 from rich.console import Group, RenderableType
 from rich.padding import Padding
@@ -18,13 +19,14 @@ from epstein_files.util.data import days_between, flatten, without_falsey
 from epstein_files.util.env import args
 from epstein_files.util.highlighted_group import (QUESTION_MARKS_TXT, HighlightedNames,
      get_highlight_group_for_name, get_style_for_name, styled_category, styled_name)
-from epstein_files.util.rich import GREY_NUMBERS, LAST_TIMESTAMP_STYLE, TABLE_TITLE_STYLE, build_table, console, join_texts, print_centered
+from epstein_files.util.rich import GREY_NUMBERS, TABLE_TITLE_STYLE, build_table, console, join_texts, print_centered
 ALT_INFO_STYLE = 'medium_purple4'
 CC = 'cc:'
 MIN_AUTHOR_PANEL_WIDTH = 80
 EMAILER_INFO_TITLE = 'Email Conversations Will Appear'
-UNINTERESTING_CC_INFO = "CC: or BCC: recipient only"
+UNINTERESTING_CC_INFO = "cc: or bcc: recipient only"
+UNINTERESTING_CC_INFO_NO_CONTACT = f"{UNINTERESTING_CC_INFO}, no direct contact with Epstein"
 INVALID_FOR_EPSTEIN_WEB = JUNK_EMAILERS + MAILING_LISTS + [
     'ACT for America',
@@ -100,6 +102,10 @@ class Person:
         links = [self.external_link_txt(site) for site in PERSON_LINK_BUILDERS]
         return Text('', justify='center', style='dim').append(join_texts(links, join=' / '))  #, encloser='()'))#, encloser='‹›'))
+    def has_any_epstein_emails(self) -> bool:
+        contacts = [e.author for e in self.emails] + flatten([e.recipients for e in self.emails])
+        return JEFFREY_EPSTEIN in contacts
     def highlight_group(self) -> HighlightedNames | None:
         return get_highlight_group_for_name(self.name)
@@ -114,7 +120,7 @@ class Person:
         else:
             email_count = len(self.unique_emails())
             num_days = self.email_conversation_length_in_days()
-            title_suffix = f"to/from {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
+            title_suffix = f"{TO_FROM} {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
         title = f"Found {email_count} emails {title_suffix}"
         width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category()) + 8)
@@ -130,9 +136,16 @@ class Person:
         highlight_group = self.highlight_group()
         if highlight_group and isinstance(highlight_group, HighlightedNames) and self.name:
-            return highlight_group.info_for(self.name)
-        elif self.is_uninteresting_cc:
-            return UNINTERESTING_CC_INFO
+            info = highlight_group.info_for(self.name)
+            if info:
+                return info
+        if self.is_uninteresting_cc:
+            if self.has_any_epstein_emails():
+                return UNINTERESTING_CC_INFO
+            else:
+                return UNINTERESTING_CC_INFO_NO_CONTACT
     def info_with_category(self) -> str:
         return ', '.join(without_falsey([self.category(), self.info_str()]))
@@ -143,18 +156,27 @@ class Person:
         elif self.name is None:
             return Text('(emails whose author or recipient could not be determined)', style=ALT_INFO_STYLE)
         elif self.category() == JUNK:
-            return Text(f"({JUNK} mail)", style='tan dim')
+            return Text(f"({JUNK} mail)", style='bright_black dim')
+        elif self.is_uninteresting_cc and (self.info_str() or '').startswith(UNINTERESTING_CC_INFO):
+            if self.info_str() == UNINTERESTING_CC_INFO:
+                return Text(f"({self.info_str()})", style='wheat4 dim')
+            else:
+                return Text(f"({self.info_str()})", style='plum4 dim')
         elif self.is_a_mystery():
-            return Text(QUESTION_MARKS, style='magenta dim')
-        elif self.is_uninteresting_cc and self.info_str() == UNINTERESTING_CC_INFO:
-            return Text(f"({self.info_str()})", style='wheat4 dim')
+            return Text(QUESTION_MARKS, style='honeydew2 bold')
         elif self.info_str() is None:
             if self.name in MAILING_LISTS:
-                return Text('(mailing list)', style=f"{self.style()} dim")
+                return Text('(mailing list)', style=f"pale_turquoise4 dim")
+            elif self.category():
+                return Text(QUESTION_MARKS, style=self.style())
             else:
                 return None
         else:
-            return Text(self.info_str())
+            return Text(self.info_str(), style=self.style())
+    def internal_link(self) -> Text:
+        """Kind of like an anchor link to the section of the page containing these emails."""
+        return link_text_obj(internal_link_to_emails(self.name_str()), self.name_str(), style=self.style())
     def is_a_mystery(self) -> bool:
         """Return True if this is someone we theroetically could know more about."""
@@ -214,8 +236,8 @@ class Person:
         return self._printable_emails()
     def print_emails_table(self) -> None:
-        emails = [email for email in self._printable_emails() if not email.is_duplicate()]  # Remove dupes
-        print_centered(Padding(Email.build_emails_table(emails, self.name), (0, 5, 0, 5)))
+        table = Email.build_emails_table(self._unique_printable_emails(), self.name)
+        print_centered(Padding(table, (0, 5, 0, 5)))
         if self.is_linkable():
             print_centered(self.external_links_line())
@@ -223,7 +245,13 @@ class Person:
         console.line()
     def sort_key(self) -> list[int | str]:
-        counts = [len(self.unique_emails())]
+        counts = [
+            len(self.unique_emails()),
+            -1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
+            -1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO),
+            int(self.has_any_epstein_emails()),
+        ]
         counts = [-1 * count for count in counts]
         if args.sort_alphabetical:
@@ -234,14 +262,14 @@ class Person:
     def style(self) -> str:
         return get_style_for_name(self.name)
-    def unique_emails(self) -> list[Email]:
-        return [email for email in self.emails if not email.is_duplicate()]
+    def unique_emails(self) -> Sequence[Email]:
+        return Document.without_dupes(self.emails)
     def unique_emails_by(self) -> list[Email]:
-        return [email for email in self.emails_by() if not email.is_duplicate()]
+        return Document.without_dupes(self.emails_by())
     def unique_emails_to(self) -> list[Email]:
-        return [email for email in self.emails_to() if not email.is_duplicate()]
+        return Document.without_dupes(self.emails_to())
     def _printable_emails(self):
         """For Epstein we only want to print emails he sent to himself."""
@@ -250,24 +278,32 @@ class Person:
         else:
             return self.emails
+    def _unique_printable_emails(self):
+        return Document.without_dupes(self._printable_emails())
     def __str__(self):
         return f"{self.name_str()}"
     @staticmethod
-    def emailer_info_table(people: list['Person'], highlighted: list['Person'] | None = None) -> Table:
+    def emailer_info_table(people: list['Person'], highlighted: list['Person'] | None = None, show_epstein_total: bool = False) -> Table:
         """Table of info about emailers."""
         highlighted = highlighted or people
         highlighted_names = [p.name for p in highlighted]
-        is_selection = len(people) != len(highlighted) or args.emailers_info_png
+        is_selection = len(people) != len(highlighted) or args.emailers_info
+        all_emails = Document.uniquify(flatten([list(p.unique_emails()) for p in people]))
+        email_authors = [p for p in people if p.emails_by() and p.name]
+        attributed_emails = [email for email in all_emails if email.author]
+        footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}" \
+                 f" out of {len(all_emails):,} emails, {len(all_emails) - len(attributed_emails)} still unknown)"
         if is_selection:
-            title = Text(f"{EMAILER_INFO_TITLE} in This Order for the Highlighted Names (see ", style=TABLE_TITLE_STYLE)
-            title.append(THE_OTHER_PAGE_TXT).append(" for the rest)")
+            title = Text(f"{EMAILER_INFO_TITLE} in This Order for the Highlighted Names (", style=TABLE_TITLE_STYLE)
+            title.append(THE_OTHER_PAGE_TXT).append(" has the rest)")
         else:
             title = f"{EMAILER_INFO_TITLE} in Chronological Order Based on Timestamp of First Email"
-        table = build_table(title)
-        table.add_column('Start')
+        table = build_table(title, caption=footer)
+        table.add_column('First')
         table.add_column('Name', max_width=24, no_wrap=True)
         table.add_column('Category', justify='left', style='dim italic')
         table.add_column('Num', justify='right', style='white')
@@ -281,6 +317,7 @@ class Person:
         for person in people:
             earliest_email_date = person.earliest_email_date()
+            is_on_page = False if show_epstein_total else person.name in highlighted_names
             year_months = (earliest_email_date.year * 12) + earliest_email_date.month
             # Color year rollovers more brightly
@@ -294,57 +331,14 @@ class Person:
             table.add_row(
                 Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[0 if is_selection else grey_idx]}"),
-                person.name_txt(),  # TODO: make link?
+                person.internal_link() if is_on_page and not person.is_uninteresting_cc else person.name_txt(),
                 person.category_txt(),
-                f"{len(person._printable_emails())}",
-                f"{len(person.unique_emails_by())}",
-                f"{len(person.unique_emails_to())}",
+                f"{len(person.unique_emails() if show_epstein_total else person._unique_printable_emails())}",
+                Text(f"{len(person.unique_emails_by())}", style='dim' if len(person.unique_emails_by()) == 0 else ''),
+                Text(f"{len(person.unique_emails_to())}", style='dim' if len(person.unique_emails_to()) == 0 else ''),
                 f"{person.email_conversation_length_in_days()}",
                 person.info_txt() or '',
-                style='' if person.name in highlighted_names else 'dim',
+                style='' if show_epstein_total or is_on_page else 'dim',
             )
         return table
-    @staticmethod
-    def emailer_stats_table(people: list['Person']) -> Table:
-        email_authors = [p for p in people if p.emails_by() and p.name]
-        all_emails = Document.uniquify(flatten([p.unique_emails() for p in people]))
-        attributed_emails = [email for email in all_emails if email.author]
-        footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}"
-        footer = f"{footer} out of {len(attributed_emails):,} emails)"
-        counts_table = build_table(
-            f"All {len(email_authors)} People Who Sent or Received an Email in the Files",
-            caption=footer,
-            cols=[
-                'Name',
-                {'name': 'Count', 'justify': 'right', 'style': 'bold bright_white'},
-                {'name': 'Sent', 'justify': 'right', 'style': 'gray74'},
-                {'name': 'Recv', 'justify': 'right', 'style': 'gray74'},
-                {'name': 'First', 'style': TIMESTAMP_STYLE},
-                {'name': 'Last', 'style': LAST_TIMESTAMP_STYLE},
-                {'name': 'Days', 'justify': 'right', 'style': 'dim'},
-                JMAIL,
-                EPSTEIN_MEDIA,
-                EPSTEIN_WEB,
-                'Twitter',
-            ]
-        )
-        for person in sorted(people, key=lambda person: person.sort_key()):
-            counts_table.add_row(
-                person.name_link(),
-                f"{len(person.unique_emails()):,}",
-                f"{len(person.unique_emails_by()):,}",
-                f"{len(person.unique_emails_to()):,}",
-                str(person.earliest_email_date()),
-                str(person.last_email_date()),
-                f"{person.email_conversation_length_in_days()}",
-                person.external_link_txt(JMAIL),
-                person.external_link_txt(EPSTEIN_MEDIA) if person.is_linkable() else '',
-                person.external_link_txt(EPSTEIN_WEB) if person.is_linkable() else '',
-                person.external_link_txt(TWITTER),
-            )
-        return counts_table

epstein_files/util/constant/names.py CHANGED Viewed

@@ -61,6 +61,7 @@ DIANE_ZIMAN = 'Diane Ziman'
 DONALD_TRUMP = 'Donald Trump'
 EDUARDO_ROBLES = 'Eduardo Robles'
 EDWARD_JAY_EPSTEIN = 'Edward Jay Epstein'
+EDWARD_ROD_LARSEN = 'Edward Rod Larsen'
 EHUD_BARAK = 'Ehud Barak'
 ERIC_ROTH = 'Eric Roth'
 FAITH_KATES = 'Faith Kates'
@@ -129,6 +130,7 @@ MOSHE_HOFFMAN = 'Moshe Hoffman'
 NADIA_MARCINKO = 'Nadia Marcinko'
 NEAL_KASSELL = 'Neal Kassell'
 NICHOLAS_RIBIS = 'Nicholas Ribis'
+NILI_PRIELL_BARAK = 'Nili Priell Barak'
 NOAM_CHOMSKY = 'Noam Chomsky'
 NORMAN_D_RAU = 'Norman D. Rau'
 OLIVIER_COLOM = 'Olivier Colom'
@@ -214,23 +216,23 @@ UBS = 'UBS'
 # First and last names that should be made part of a highlighting regex for emailers
 NAMES_TO_NOT_HIGHLIGHT = """
-    al alain alan alfredo allen alex alexander amanda andres andrew
-    bard barrett barry bill black bob boris brad bruce
-    carolyn chris christina
-    dan daniel danny darren dave david donald
-    ed edward edwards enterprise enterprises entourage epstein eric erika etienne
-    faith forget fred friendly frost fuller
-    gerald george gold gordon
-    haddad harry hay heather henry hill hoffman
+    al alain alan alfredo allen alex alexander amanda andres andrew anthony
+    bard barrett barry bennet bernard bill black bob boris brad brenner bruce
+    caroline carolyn chris christina cohen
+    dan daniel danny darren dave david debbie donald
+    ed edward edwards enforcement enterprise enterprises entourage epstein eric erika etienne
+    faith fisher forget fred friendly frost fuller
+    gates gerald george gold gordon
+    haddad harry hay heather henry hill hoffman howard
     ian ivan
     jack james jay jean jeff jeffrey jennifer jeremy jessica joel john jon jonathan joseph jr
     kahn karl kate katherine kelly ken kevin krassner
     larry laurie lawrence leon lesley linda link lisa
-    mann marc marie mark martin melanie michael mike miller mitchell miles morris moskowitz
-    nancy neal new nicole
+    mann marc marie mark martin matthew melanie michael mike miller mitchell miles morris moskowitz
+    nancy neal new nicole norman
     owen
     paul paula pen peter philip prince
-    randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubin
+    randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubenstein rubin
     scott sean skip stanley stern stephen steve steven stone susan
     the thomas tim tom tony tyler
     victor
@@ -243,7 +245,7 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
     aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
     baldwin barack barrett ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
     chapman charles charlie christopher clint cohen colin collins conway
-    davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
+    davis dean debbie debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
     edmond elizabeth emily entwistle erik evelyn
     ferguson flachsbart francis franco frank
     gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
@@ -296,3 +298,10 @@ def extract_last_name(name: str) -> str:
         return ' '.join(first_last_names[-2:])
     else:
         return first_last_names[-1]
+def reversed_name(name: str) -> str:
+    if ' ' not in name:
+        return name
+    return f"{extract_last_name(name)}, {extract_first_name(name)}"

epstein_files/util/constant/output_files.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from pathlib import Path
 from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
+from epstein_files.util.logging import logger
 # Files output by the code
 HTML_DIR = Path('docs')
@@ -16,9 +17,10 @@ URLS_ENV = '.urls.env'
 EMAILERS_TABLE_PNG_PATH = HTML_DIR.joinpath('emailers_info_table.png')
 # Deployment URLS
-# NOTE: don't rename these variables without changing deploy.sh!
+# NOTE: don't rename these variables without changing deploy.sh
+GH_REPO_NAME = 'epstein_text_messages'
 GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
-TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/epstein_text_messages"
+TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/{GH_REPO_NAME}"
 ALL_EMAILS_URL = f"{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}"
 CHRONOLOGICAL_EMAILS_URL = f"{TEXT_MSGS_URL}/{CHRONOLOGICAL_EMAILS_PATH.name}"
 JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
@@ -44,6 +46,7 @@ BUILD_ARTIFACTS = [
 def make_clean() -> None:
     """Delete all build artifacts."""
     for build_file in BUILD_ARTIFACTS:
-        if build_file.exists():
-            print(f"Removing build file '{build_file}'...")
-            build_file.unlink()
+        for file in [build_file, Path(f"{build_file}.txt")]:
+            if file.exists():
+                logger.warning(f"Removing build file '{file}'...")
+                file.unlink()

epstein_files/util/constant/strings.py CHANGED Viewed

@@ -9,7 +9,6 @@ ARTICLE = 'article'
 BOOK = 'book'
 BUSINESS = 'business'
 CONFERENCE = 'conference'
-ENTERTAINER = 'entertainer'
 FINANCE = 'finance'
 FRIEND = 'friend'
 FLIGHT_LOG = 'flight log'
@@ -65,7 +64,8 @@ REDACTED = '<REDACTED>'
 QUESTION_MARKS = '(???)'
 # Regexes
-FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}(\d{{6}}(_\d{{1,2}})?)")
+ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
+FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({ID_REGEX.pattern})")
 FILE_NAME_REGEX = re.compile(fr"{FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
 QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')

epstein_files/util/constant/urls.py CHANGED Viewed

@@ -6,6 +6,7 @@ from inflection import parameterize
 from rich.text import Text
 from epstein_files.util.constant.output_files import *
+from epstein_files.util.constant.strings import remove_question_marks
 from epstein_files.util.env import args
 from epstein_files.util.file_helper import coerce_file_stem
@@ -22,10 +23,11 @@ JMAIL = 'Jmail'
 ROLLCALL = 'RollCall'
 TWITTER = 'search X'
-GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
+GH_PROJECT_URL = f'https://github.com/michelcrypt4d4mus/{GH_REPO_NAME}'
 GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
 ATTRIBUTIONS_URL = f'{GH_MASTER_URL}/epstein_files/util/constants.py'
 EXTRACTS_BASE_URL = f'{GH_MASTER_URL}/emails_extracted_from_legal_filings'
+TO_FROM = 'to/from'
 extracted_file_url = lambda f: f"{EXTRACTS_BASE_URL}/{f}"
@@ -33,6 +35,7 @@ extracted_file_url = lambda f: f"{EXTRACTS_BASE_URL}/{f}"
 # External URLs
 COFFEEZILLA_ARCHIVE_URL = 'https://journaliststudio.google.com/pinpoint/search?collection=061ce61c9e70bdfd'
 COURIER_NEWSROOM_ARCHIVE_URL = 'https://journaliststudio.google.com/pinpoint/search?collection=092314e384a58618'
+EPSTEIN_DOCS_URL = 'https://epstein-docs.github.io'
 OVERSIGHT_REPUBLICANS_PRESSER_URL = 'https://oversight.house.gov/release/oversight-committee-releases-additional-epstein-estate-documents/'
 RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL = 'https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_'
 SUBSTACK_URL = 'https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great'
@@ -71,7 +74,6 @@ rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL],
 search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
 search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
 PERSON_LINK_BUILDERS: dict[ExternalSite, Callable[[str], str]] = {
     EPSTEIN_MEDIA: epstein_media_person_url,
     EPSTEIN_WEB: epstein_web_person_url,
@@ -97,6 +99,12 @@ def external_doc_link_txt(site: ExternalSite, filename_or_id: int | str, style:
     return Text.from_markup(external_doc_link_markup(site, filename_or_id, style))
+def internal_link_to_emails(name: str) -> str:
+    """e.g. https://michelcrypt4d4mus.github.io/epstein_text_messages/all_emails_epstein_files_nov_2025.html#:~:text=to%2Ffrom%20Jack%20Goldberger"""
+    search_term = urllib.parse.quote(f"{TO_FROM} {remove_question_marks(name)}")
+    return f"{this_site_url()}#:~:text={search_term}"
 def link_markup(
     url: str,
     link_text: str | None = None,
@@ -120,6 +128,10 @@ def other_site_url() -> str:
     return SITE_URLS[other_site_type()]
+def this_site_url() -> str:
+    return SITE_URLS[EMAIL if other_site_type() == TEXT_MESSAGE else TEXT_MESSAGE]
 CRYPTADAMUS_TWITTER = link_markup('https://x.com/cryptadamist', '@cryptadamist')
 THE_OTHER_PAGE_MARKUP = link_markup(other_site_url(), 'the other page', style='light_slate_grey bold')
 THE_OTHER_PAGE_TXT = Text.from_markup(THE_OTHER_PAGE_MARKUP)

epstein-files 1.2.0__py3-none-any.whl → 1.2.5__py3-none-any.whl

epstein-files 1.2.0py3-none-any.whl → 1.2.5py3-none-any.whl