PyPI - epstein-files - Versions diffs - 1.2.5__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

epstein-files 1.2.5py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

epstein_files/__init__.py +55 -23
epstein_files/documents/communication.py +9 -5
epstein_files/documents/document.py +231 -135
epstein_files/documents/doj_file.py +242 -0
epstein_files/documents/doj_files/full_text.py +166 -0
epstein_files/documents/email.py +289 -232
epstein_files/documents/emails/email_header.py +35 -16
epstein_files/documents/emails/emailers.py +223 -0
epstein_files/documents/imessage/text_message.py +2 -3
epstein_files/documents/json_file.py +18 -14
epstein_files/documents/messenger_log.py +23 -39
epstein_files/documents/other_file.py +54 -48
epstein_files/epstein_files.py +65 -29
epstein_files/person.py +151 -94
epstein_files/util/constant/names.py +37 -10
epstein_files/util/constant/output_files.py +2 -0
epstein_files/util/constant/strings.py +14 -7
epstein_files/util/constant/urls.py +17 -0
epstein_files/util/constants.py +556 -391
epstein_files/util/data.py +2 -0
epstein_files/util/doc_cfg.py +44 -33
epstein_files/util/env.py +34 -19
epstein_files/util/file_helper.py +30 -6
epstein_files/util/helpers/debugging_helper.py +13 -0
epstein_files/util/helpers/env_helpers.py +21 -0
epstein_files/util/highlighted_group.py +121 -37
epstein_files/util/layout/left_bar_panel.py +26 -0
epstein_files/util/logging.py +28 -13
epstein_files/util/output.py +49 -40
epstein_files/util/rich.py +30 -3
epstein_files/util/word_count.py +7 -7
{epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/METADATA +16 -3
epstein_files-1.5.0.dist-info/RECORD +40 -0
{epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/entry_points.txt +1 -1
epstein_files-1.2.5.dist-info/RECORD +0 -34
{epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/LICENSE +0 -0
{epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/WHEEL +0 -0

epstein_files/person.py CHANGED Viewed

@@ -9,17 +9,18 @@ from rich.table import Table
 from rich.text import Text
 from epstein_files.documents.document import Document
-from epstein_files.documents.email import MAILING_LISTS, JUNK_EMAILERS, Email
+from epstein_files.documents.email import TRUNCATE_EMAILS_FROM, MAILING_LISTS, JUNK_EMAILERS, Email
 from epstein_files.documents.messenger_log import MessengerLog
 from epstein_files.documents.other_file import OtherFile
 from epstein_files.util.constant.strings import *
 from epstein_files.util.constant.urls import *
 from epstein_files.util.constants import *
-from epstein_files.util.data import days_between, flatten, without_falsey
+from epstein_files.util.data import days_between, flatten, uniquify, without_falsey
 from epstein_files.util.env import args
 from epstein_files.util.highlighted_group import (QUESTION_MARKS_TXT, HighlightedNames,
      get_highlight_group_for_name, get_style_for_name, styled_category, styled_name)
-from epstein_files.util.rich import GREY_NUMBERS, TABLE_TITLE_STYLE, build_table, console, join_texts, print_centered
+from epstein_files.util.rich import (GREY_NUMBERS, SKIPPED_FILE_MSG_PADDING, TABLE_TITLE_STYLE, build_table,
+     console, join_texts, print_centered)
 ALT_INFO_STYLE = 'medium_purple4'
 CC = 'cc:'
@@ -42,14 +43,15 @@ class Person:
     emails: list[Email] = field(default_factory=list)
     imessage_logs: list[MessengerLog] = field(default_factory=list)
     other_files: list[OtherFile] = field(default_factory=list)
-    is_uninteresting_cc: bool = False
+    is_uninteresting: bool = False
     def __post_init__(self):
         self.emails = Document.sort_by_timestamp(self.emails)
         self.imessage_logs = Document.sort_by_timestamp(self.imessage_logs)
+    @property
     def category(self) -> str | None:
-        highlight_group = self.highlight_group()
+        highlight_group = self.highlight_group
         if highlight_group and isinstance(highlight_group, HighlightedNames):
             category = highlight_group.category or highlight_group.label
@@ -57,60 +59,63 @@ class Person:
             if category != self.name and category != 'paula':  # TODO: this sucks
                 return category
+    @property
     def category_txt(self) -> Text | None:
         if self.name is None:
             return None
-        elif self.category():
-            return styled_category(self.category())
-        elif self.is_a_mystery() or self.is_uninteresting_cc:
+        elif self.category:
+            return styled_category(self.category)
+        elif self.is_a_mystery or self.is_uninteresting:
             return QUESTION_MARKS_TXT
+    @property
     def email_conversation_length_in_days(self) -> int:
         return days_between(self.emails[0].timestamp, self.emails[-1].timestamp)
+    @property
     def earliest_email_at(self) -> datetime:
         return self.emails[0].timestamp
+    @property
     def earliest_email_date(self) -> date:
-        return self.earliest_email_at().date()
+        return self.earliest_email_at.date()
+    @property
     def last_email_at(self) -> datetime:
         return self.emails[-1].timestamp
+    @property
     def last_email_date(self) -> date:
-        return self.last_email_at().date()
+        return self.last_email_at.date()
+    @property
     def emails_by(self) -> list[Email]:
         return [e for e in self.emails if self.name == e.author]
+    @property
     def emails_to(self) -> list[Email]:
         return [
             e for e in self.emails
             if self.name in e.recipients or (self.name is None and len(e.recipients) == 0)
         ]
-    def external_link(self, site: ExternalSite = EPSTEINIFY) -> str:
-        return PERSON_LINK_BUILDERS[site](self.name_str())
-    def external_link_txt(self, site: ExternalSite = EPSTEINIFY, link_str: str | None = None) -> Text:
-        if self.name is None:
-            return Text('')
-        return link_text_obj(self.external_link(site), link_str or site, style=self.style())
+    @property
     def external_links_line(self) -> Text:
         links = [self.external_link_txt(site) for site in PERSON_LINK_BUILDERS]
         return Text('', justify='center', style='dim').append(join_texts(links, join=' / '))  #, encloser='()'))#, encloser='‹›'))
+    @property
     def has_any_epstein_emails(self) -> bool:
         contacts = [e.author for e in self.emails] + flatten([e.recipients for e in self.emails])
         return JEFFREY_EPSTEIN in contacts
+    @property
     def highlight_group(self) -> HighlightedNames | None:
         return get_highlight_group_for_name(self.name)
+    @property
     def info_panel(self) -> Padding:
-        """Print a panel with the name of an emailer and a few tidbits of information about them."""
+        """Return a `Panel` with the name of an emailer and a few tidbits of information about them."""
         style = 'white' if (not self.style() or self.style() == DEFAULT) else self.style()
         panel_style = f"black on {style} bold"
@@ -118,22 +123,23 @@ class Person:
             email_count = len(self._printable_emails())
             title_suffix = f"sent by {JEFFREY_EPSTEIN} to himself"
         else:
-            email_count = len(self.unique_emails())
-            num_days = self.email_conversation_length_in_days()
-            title_suffix = f"{TO_FROM} {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
+            email_count = len(self.unique_emails)
+            num_days = self.email_conversation_length_in_days
+            title_suffix = f"{TO_FROM} {self.name_str} starting {self.earliest_email_date} covering {num_days:,} days"
         title = f"Found {email_count} emails {title_suffix}"
-        width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category()) + 8)
+        width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category) + 8)
         panel = Panel(Text(title, justify='center'), width=width, style=panel_style)
         elements: list[RenderableType] = [panel]
-        if self.info_with_category():
-            elements.append(Text(f"({self.info_with_category()})", justify='center', style=f"{style} italic"))
+        if self.info_with_category:
+            elements.append(Text(f"({self.info_with_category})", justify='center', style=f"{style} italic"))
         return Padding(Group(*elements), (2, 0, 1, 0))
+    @property
     def info_str(self) -> str | None:
-        highlight_group = self.highlight_group()
+        highlight_group = self.highlight_group
         if highlight_group and isinstance(highlight_group, HighlightedNames) and self.name:
             info = highlight_group.info_for(self.name)
@@ -141,90 +147,161 @@ class Person:
             if info:
                 return info
-        if self.is_uninteresting_cc:
-            if self.has_any_epstein_emails():
+        if self.is_uninteresting and len(self.emails_by) == 0:
+            if self.has_any_epstein_emails:
                 return UNINTERESTING_CC_INFO
             else:
                 return UNINTERESTING_CC_INFO_NO_CONTACT
-    def info_with_category(self) -> str:
-        return ', '.join(without_falsey([self.category(), self.info_str()]))
+    @property
     def info_txt(self) -> Text | None:
         if self.name == JEFFREY_EPSTEIN:
             return Text('(emails sent by Epstein to himself are here)', style=ALT_INFO_STYLE)
         elif self.name is None:
             return Text('(emails whose author or recipient could not be determined)', style=ALT_INFO_STYLE)
-        elif self.category() == JUNK:
+        elif self.category == JUNK:
             return Text(f"({JUNK} mail)", style='bright_black dim')
-        elif self.is_uninteresting_cc and (self.info_str() or '').startswith(UNINTERESTING_CC_INFO):
-            if self.info_str() == UNINTERESTING_CC_INFO:
-                return Text(f"({self.info_str()})", style='wheat4 dim')
+        elif self.is_uninteresting and (self.info_str or '').startswith(UNINTERESTING_CC_INFO):
+            if self.sole_cc:
+                return Text(f"(cc: from {self.sole_cc} only)", style='wheat4 dim')
+            elif self.info_str == UNINTERESTING_CC_INFO:
+                return Text(f"({self.info_str})", style='wheat4 dim')
             else:
-                return Text(f"({self.info_str()})", style='plum4 dim')
-        elif self.is_a_mystery():
+                return Text(f"({self.info_str})", style='plum4 dim')
+        elif self.is_a_mystery:
             return Text(QUESTION_MARKS, style='honeydew2 bold')
-        elif self.info_str() is None:
+        elif self.info_str is None:
             if self.name in MAILING_LISTS:
                 return Text('(mailing list)', style=f"pale_turquoise4 dim")
-            elif self.category():
+            elif self.category:
                 return Text(QUESTION_MARKS, style=self.style())
             else:
                 return None
         else:
-            return Text(self.info_str(), style=self.style())
+            return Text(self.info_str, style=self.style(allow_bold=False))
+    @property
+    def info_with_category(self) -> str:
+        return ', '.join(without_falsey([self.category, self.info_str]))
+    @property
     def internal_link(self) -> Text:
         """Kind of like an anchor link to the section of the page containing these emails."""
-        return link_text_obj(internal_link_to_emails(self.name_str()), self.name_str(), style=self.style())
+        return link_text_obj(internal_link_to_emails(self.name_str), self.name_str, style=self.style())
+    @property
     def is_a_mystery(self) -> bool:
         """Return True if this is someone we theroetically could know more about."""
-        return self.is_unstyled() and not (self.is_email_address() or self.info_str() or self.is_uninteresting_cc)
+        return self.is_unstyled and not (self.is_email_address or self.info_str or self.is_uninteresting)
+    @property
     def is_email_address(self) -> bool:
         return '@' in (self.name or '')
+    @property
     def is_linkable(self) -> bool:
         """Return True if it's likely that EpsteinWeb has a page for this name."""
         if self.name is None or ' ' not in self.name:
             return False
-        elif self.is_email_address() or '/' in self.name or QUESTION_MARKS in self.name:
+        elif self.is_email_address or '/' in self.name or QUESTION_MARKS in self.name:
             return False
         elif self.name in INVALID_FOR_EPSTEIN_WEB:
             return False
         return True
+    @property
     def is_unstyled(self) -> bool:
         """True if there's no highlight group for this name."""
         return self.style() == DEFAULT_NAME_STYLE
-    def name_str(self) -> str:
-        return self.name or UNKNOWN
+    @property
     def name_link(self) -> Text:
         """Will only link if it's worth linking, otherwise just a Text object."""
-        if not self.is_linkable():
-            return self.name_txt()
+        if not self.is_linkable:
+            return self.name_txt
         else:
-            return Text.from_markup(link_markup(self.external_link(), self.name_str(), self.style()))
+            return Text.from_markup(link_markup(self.external_link(), self.name_str, self.style()))
+    @property
+    def name_str(self) -> str:
+        return self.name or UNKNOWN
+    @property
     def name_txt(self) -> Text:
         return styled_name(self.name)
+    @property  # TODO: unused?
+    def should_always_truncate(self) -> bool:
+        """True if we want to truncate all emails to/from this user."""
+        return self.name in TRUNCATE_EMAILS_FROM or self.is_uninteresting
+    @property
+    def sole_cc(self) -> str | None:
+        """Return name if this person sent 0 emails and received CC from only one that name."""
+        email_authors = uniquify([e.author for e in self.emails_to])
+        if len(self.unique_emails) == 1 and len(email_authors) > 0:
+            logger.info(f"sole author of email to '{self.name}' is '{email_authors[0]}'")
+        else:
+            logger.info(f"'{self.name}' email_authors '{email_authors[0]}'")
+        if len(self.unique_emails_by) > 0:
+            return None
+        if len(email_authors) == 1:
+            return email_authors[0]
+    @property
+    def sort_key(self) -> list[int | str]:
+        """Key used to sort `Person` objects by the number of emails sent/received."""
+        counts = [
+            len(self.unique_emails),
+            -1 * int((self.info_str or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
+            -1 * int((self.info_str or '') == UNINTERESTING_CC_INFO),
+            int(self.has_any_epstein_emails),
+        ]
+        counts = [-1 * count for count in counts]
+        if args.sort_alphabetical:
+            return [self.name_str] + counts
+        else:
+            return counts + [self.name_str]
+    @property
+    def unique_emails(self) -> Sequence[Email]:
+        return Document.without_dupes(self.emails)
+    @property
+    def unique_emails_by(self) -> list[Email]:
+        return Document.without_dupes(self.emails_by)
+    @property
+    def unique_emails_to(self) -> list[Email]:
+        return Document.without_dupes(self.emails_to)
+    def external_link(self, site: ExternalSite = EPSTEINIFY) -> str:
+        return PERSON_LINK_BUILDERS[site](self.name_str)
+    def external_link_txt(self, site: ExternalSite = EPSTEINIFY, link_str: str | None = None) -> Text:
+        if self.name is None:
+            return Text('')
+        return link_text_obj(self.external_link(site), link_str or site, style=self.style())
     def print_emails(self) -> list[Email]:
         """Print complete emails to or from a particular 'author'. Returns the Emails that were printed."""
-        print_centered(self.info_panel())
+        print_centered(self.info_panel)
         self.print_emails_table()
         last_printed_email_was_duplicate = False
-        if self.category() == JUNK:
+        if self.category == JUNK:
             logger.warning(f"Not printing junk emailer '{self.name}'")
         else:
             for email in self._printable_emails():
-                if email.is_duplicate():
-                    console.print(Padding(email.duplicate_file_txt().append('...'), (0, 0, 0, 4)))
+                if email.is_duplicate:
+                    console.print(Padding(email.duplicate_file_txt.append('...'), SKIPPED_FILE_MSG_PADDING))
                     last_printed_email_was_duplicate = True
                 else:
                     if last_printed_email_was_duplicate:
@@ -239,42 +316,18 @@ class Person:
         table = Email.build_emails_table(self._unique_printable_emails(), self.name)
         print_centered(Padding(table, (0, 5, 0, 5)))
-        if self.is_linkable():
-            print_centered(self.external_links_line())
+        if self.is_linkable:
+            print_centered(self.external_links_line)
         console.line()
-    def sort_key(self) -> list[int | str]:
-        counts = [
-            len(self.unique_emails()),
-            -1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
-            -1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO),
-            int(self.has_any_epstein_emails()),
-        ]
-        counts = [-1 * count for count in counts]
-        if args.sort_alphabetical:
-            return [self.name_str()] + counts
-        else:
-            return counts + [self.name_str()]
-    def style(self) -> str:
-        return get_style_for_name(self.name)
-    def unique_emails(self) -> Sequence[Email]:
-        return Document.without_dupes(self.emails)
-    def unique_emails_by(self) -> list[Email]:
-        return Document.without_dupes(self.emails_by())
-    def unique_emails_to(self) -> list[Email]:
-        return Document.without_dupes(self.emails_to())
+    def style(self, allow_bold: bool = True) -> str:
+        return get_style_for_name(self.name, allow_bold=allow_bold)
     def _printable_emails(self):
         """For Epstein we only want to print emails he sent to himself."""
         if self.name == JEFFREY_EPSTEIN:
-            return [e for e in self.emails if e.is_note_to_self()]
+            return [e for e in self.emails if e.is_note_to_self]
         else:
             return self.emails
@@ -282,7 +335,7 @@ class Person:
         return Document.without_dupes(self._printable_emails())
     def __str__(self):
-        return f"{self.name_str()}"
+        return f"{self.name_str}"
     @staticmethod
     def emailer_info_table(people: list['Person'], highlighted: list['Person'] | None = None, show_epstein_total: bool = False) -> Table:
@@ -290,8 +343,8 @@ class Person:
         highlighted = highlighted or people
         highlighted_names = [p.name for p in highlighted]
         is_selection = len(people) != len(highlighted) or args.emailers_info
-        all_emails = Document.uniquify(flatten([list(p.unique_emails()) for p in people]))
-        email_authors = [p for p in people if p.emails_by() and p.name]
+        all_emails = Person.emails_from_people(people)
+        email_authors = [p for p in people if p.emails_by and p.name]
         attributed_emails = [email for email in all_emails if email.author]
         footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}" \
                  f" out of {len(all_emails):,} emails, {len(all_emails) - len(attributed_emails)} still unknown)"
@@ -316,7 +369,7 @@ class Person:
         grey_idx = 0
         for person in people:
-            earliest_email_date = person.earliest_email_date()
+            earliest_email_date = person.earliest_email_date
             is_on_page = False if show_epstein_total else person.name in highlighted_names
             year_months = (earliest_email_date.year * 12) + earliest_email_date.month
@@ -331,14 +384,18 @@ class Person:
             table.add_row(
                 Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[0 if is_selection else grey_idx]}"),
-                person.internal_link() if is_on_page and not person.is_uninteresting_cc else person.name_txt(),
-                person.category_txt(),
-                f"{len(person.unique_emails() if show_epstein_total else person._unique_printable_emails())}",
-                Text(f"{len(person.unique_emails_by())}", style='dim' if len(person.unique_emails_by()) == 0 else ''),
-                Text(f"{len(person.unique_emails_to())}", style='dim' if len(person.unique_emails_to()) == 0 else ''),
-                f"{person.email_conversation_length_in_days()}",
-                person.info_txt() or '',
+                person.internal_link if is_on_page and not person.is_uninteresting else person.name_txt,
+                person.category_txt,
+                f"{len(person.unique_emails if show_epstein_total else person._unique_printable_emails())}",
+                str(len(person.unique_emails_by)) if len(person.unique_emails_by) > 0 else '',
+                str(len(person.unique_emails_to)) if len(person.unique_emails_to) > 0 else '',
+                f"{person.email_conversation_length_in_days}",
+                person.info_txt or '',
                 style='' if show_epstein_total or is_on_page else 'dim',
             )
         return table
+    @staticmethod
+    def emails_from_people(people: list['Person']) -> Sequence[Email]:
+        return Document.uniquify(flatten([list(p.unique_emails) for p in people]))

epstein_files/util/constant/names.py CHANGED Viewed

@@ -177,6 +177,16 @@ ZUBAIR_KHAN = 'Zubair Khan'
 UNKNOWN = '(unknown)'
+# DOJ files emails
+ALISON_J_NATHAN = 'Alison J. Nathan'
+AMIR_TAAKI = 'Amir Taaki'
+BROCK_PIERCE = 'Brock Pierce'
+CHRISTIAN_EVERDELL = 'Christian Everdell'
+CHRISTOPHER_DILORIO = 'Christopher Dilorio'
+DOUGLAS_WIGDOR = 'Douglas Wigdor'
+KARYNA_SHULIAK = 'Karyna Shuliak'
+STACEY_RICHMAN = 'Stacey Richman'
 # No communications but name is in the files
 BILL_GATES = 'Bill Gates'
 DONALD_TRUMP = 'Donald Trump'
@@ -216,25 +226,25 @@ UBS = 'UBS'
 # First and last names that should be made part of a highlighting regex for emailers
 NAMES_TO_NOT_HIGHLIGHT = """
-    al alain alan alfredo allen alex alexander amanda andres andrew anthony
+    al alain alan alison alfredo allen alex alexander amanda andres andrew anthony
     bard barrett barry bennet bernard bill black bob boris brad brenner bruce
-    caroline carolyn chris christina cohen
-    dan daniel danny darren dave david debbie donald
+    cameron caroline carolyn chris christian christina cohen
+    dan daniel danny darren dave david debbie donald douglas
     ed edward edwards enforcement enterprise enterprises entourage epstein eric erika etienne
     faith fisher forget fred friendly frost fuller
     gates gerald george gold gordon
-    haddad harry hay heather henry hill hoffman howard
+    haddad hanson harry hay heather henry hill hoffman howard
     ian ivan
     jack james jay jean jeff jeffrey jennifer jeremy jessica joel john jon jonathan joseph jr
-    kahn karl kate katherine kelly ken kevin krassner
-    larry laurie lawrence leon lesley linda link lisa
+    kafka kahn karl kate katherine kelly ken kevin krassner
+    larry larsen laurie lawrence leon lesley linda link lisa
     mann marc marie mark martin matthew melanie michael mike miller mitchell miles morris moskowitz
-    nancy neal new nicole norman
+    nancy nathan neal new nicole norman
     owen
-    paul paula pen peter philip prince
+    paul paula pen peter philip pierce prince
     randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubenstein rubin
-    scott sean skip stanley stern stephen steve steven stone susan
-    the thomas tim tom tony tyler
+    scott sean skip smith stacey stanley stern stephen steve steven stone susan
+    terry the thomas tim tom tony tyler
     victor
     wade waters
     y
@@ -269,6 +279,10 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
 def constantize_name(name: str) -> str:
     if name == 'Andrzej Duda or entourage':
         return 'ANDRZEJ_DUDA'
+    elif name == MIROSLAV_LAJCAK:
+        return 'MIROSLAV_LAJCAK'
+    elif name == 'Paula Heil Fisher (???)':
+        return 'PAULA'
     variable_name = remove_question_marks(name)
     variable_name = variable_name.removesuffix('.').removesuffix('Jr').replace('ź', 'z').replace('ø', 'o').strip()
@@ -300,7 +314,20 @@ def extract_last_name(name: str) -> str:
         return first_last_names[-1]
+def reverse_first_and_last_names(name: str) -> str:
+    """If there's a comma in the name in the style 'Lastname, Firstname', reverse it and remove comma."""
+    if '@' in name:
+        return name.lower()
+    if ', ' in name:
+        names = name.split(', ')
+        return f"{names[1]} {names[0]}"
+    else:
+        return name
 def reversed_name(name: str) -> str:
+    """'Jeffrey Epstein' becomes 'Epstein Jeffrey'."""
     if ' ' not in name:
         return name

epstein_files/util/constant/output_files.py CHANGED Viewed

@@ -13,6 +13,7 @@ JSON_METADATA_PATH = HTML_DIR.joinpath(f'file_metadata_{EPSTEIN_FILES_NOV_2025}.
 TEXT_MSGS_HTML_PATH = HTML_DIR.joinpath('index.html')
 WORD_COUNT_HTML_PATH = HTML_DIR.joinpath(f'communication_word_count_{EPSTEIN_FILES_NOV_2025}.html')
 # EPSTEIN_WORD_COUNT_HTML_PATH = HTML_DIR.joinpath('epstein_texts_and_emails_word_count.html')
+DOJ_2026_HTML_PATH = HTML_DIR.joinpath('doj_2026-01-30_files.html')
 URLS_ENV = '.urls.env'
 EMAILERS_TABLE_PNG_PATH = HTML_DIR.joinpath('emailers_info_table.png')
@@ -26,6 +27,7 @@ CHRONOLOGICAL_EMAILS_URL = f"{TEXT_MSGS_URL}/{CHRONOLOGICAL_EMAILS_PATH.name}"
 JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
 JSON_METADATA_URL = f"{TEXT_MSGS_URL}/{JSON_METADATA_PATH.name}"
 WORD_COUNT_URL = f"{TEXT_MSGS_URL}/{WORD_COUNT_HTML_PATH.name}"
+DOJ_2026_URL = f"{TEXT_MSGS_URL}/{DOJ_2026_HTML_PATH.name}"
 SITE_URLS: dict[SiteType, str] = {
     EMAIL: ALL_EMAILS_URL,

epstein_files/util/constant/strings.py CHANGED Viewed

@@ -57,29 +57,36 @@ TIMESTAMP_DIM = f"turquoise4 dim"
 # Misc
 AUTHOR = 'author'
 DEFAULT = 'default'
+EFTA_PREFIX = 'EFTA'
 HOUSE_OVERSIGHT_PREFIX = 'HOUSE_OVERSIGHT_'
 JSON = 'json'
 NA = 'n/a'
 REDACTED = '<REDACTED>'
 QUESTION_MARKS = '(???)'
-# Regexes
-ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
-FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({ID_REGEX.pattern})")
-FILE_NAME_REGEX = re.compile(fr"{FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
-QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
 # Document subclass names (this sucks)
 DOCUMENT_CLASS = 'Document'
+DOJ_FILE_CLASS = 'DojFile'
 EMAIL_CLASS = 'Email'
 JSON_FILE_CLASS = 'JsonFile'
 MESSENGER_LOG_CLASS = 'MessengerLog'
 OTHER_FILE_CLASS = 'OtherFile'
+# Regexes
+DOJ_FILE_STEM_REGEX = re.compile(fr"{EFTA_PREFIX}\d{{8}}")
+DOJ_FILE_NAME_REGEX = re.compile(fr"{DOJ_FILE_STEM_REGEX.pattern}(\.txt)?")
+HOUSE_OVERSIGHT_NOV_2025_ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
+HOUSE_OVERSIGHT_NOV_2025_FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({HOUSE_OVERSIGHT_NOV_2025_ID_REGEX.pattern})")
+HOUSE_OVERSIGHT_NOV_2025_FILE_NAME_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_NOV_2025_FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
+QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
 remove_question_marks = lambda name: QUESTION_MARKS_REGEX.sub('', name).strip()
-def indented(s: str, spaces: int = 4) -> str:
+def indented(s: str, spaces: int = 4, prefix: str = '') -> str:
     indent = ' ' * spaces
+    indent += prefix
     return indent + f"\n{indent}".join(s.split('\n'))

epstein_files/util/constant/urls.py CHANGED Viewed

@@ -12,6 +12,7 @@ from epstein_files.util.file_helper import coerce_file_stem
 # Style stuff
 ARCHIVE_LINK_COLOR = 'slate_blue3'
+ARCHIVE_ALT_LINK_STYLE = 'medium_purple4 italic'
 TEXT_LINK = 'text_link'
 # External site names
@@ -39,6 +40,9 @@ EPSTEIN_DOCS_URL = 'https://epstein-docs.github.io'
 OVERSIGHT_REPUBLICANS_PRESSER_URL = 'https://oversight.house.gov/release/oversight-committee-releases-additional-epstein-estate-documents/'
 RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL = 'https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_'
 SUBSTACK_URL = 'https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great'
+# DOJ docs
+DOJ_2026_URL = 'https://www.justice.gov/epstein/doj-disclosures'
+DOJ_SEARCH_URL = 'https://www.justice.gov/epstein/search'
 # Document source sites
 EPSTEINIFY_URL = 'https://epsteinify.com'
@@ -53,6 +57,9 @@ DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
     ROLLCALL: f'https://rollcall.com/factbase/epstein/file?id=',
 }
+# Example: https://www.justice.gov/epstein/files/DataSet%208/EFTA00009802.pdf
+DOJ_2026_FILE_BASE_URL = "https://www.justice.gov/epstein/files/DataSet%20"
 epsteinify_api_url = lambda file_stem: f"{EPSTEINIFY_URL}/api/documents/{file_stem}"
 epsteinify_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEINIFY, filename_or_id, style)
@@ -90,6 +97,16 @@ def build_doc_url(base_url: str, filename_or_id: int | str, case: Literal['lower
     return f"{base_url}{file_stem}"
+def doj_2026_file_url(dataset_id: int, file_stem: str) -> str:
+    """Link to justice.gov for a DOJ file."""
+    return f"{DOJ_2026_FILE_BASE_URL}{dataset_id}/{file_stem}.pdf"
+def jmail_doj_2026_file_url(dataset_id: int, file_stem: str) -> str:
+    """Link to Jmail backup of DOJ file."""
+    return f"{JMAIL_URL}/drive/vol{dataset_id:05}-{file_stem.lower()}-pdf"
 def external_doc_link_markup(site: ExternalSite, filename_or_id: int | str, style: str = TEXT_LINK) -> str:
     url = build_doc_url(DOC_LINK_BASE_URLS[site], filename_or_id)
     return link_markup(url, coerce_file_stem(filename_or_id), style)

epstein-files 1.2.5__py3-none-any.whl → 1.5.0__py3-none-any.whl

epstein-files 1.2.5py3-none-any.whl → 1.5.0py3-none-any.whl