PyPI - epstein-files - Versions diffs - 1.2.1__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

epstein-files 1.2.1py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

epstein_files/__init__.py +55 -11
epstein_files/documents/document.py +13 -2
epstein_files/documents/email.py +329 -258
epstein_files/documents/emails/email_header.py +17 -8
epstein_files/documents/other_file.py +8 -6
epstein_files/epstein_files.py +18 -4
epstein_files/person.py +65 -20
epstein_files/util/constant/names.py +18 -12
epstein_files/util/constant/output_files.py +8 -5
epstein_files/util/constant/strings.py +4 -2
epstein_files/util/constant/urls.py +13 -2
epstein_files/util/constants.py +486 -224
epstein_files/util/data.py +1 -0
epstein_files/util/doc_cfg.py +33 -27
epstein_files/util/env.py +18 -8
epstein_files/util/file_helper.py +2 -0
epstein_files/util/highlighted_group.py +321 -132
epstein_files/util/output.py +19 -24
epstein_files/util/rich.py +9 -3
epstein_files/util/word_count.py +2 -2
{epstein_files-1.2.1.dist-info → epstein_files-1.4.1.dist-info}/METADATA +3 -3
epstein_files-1.4.1.dist-info/RECORD +34 -0
{epstein_files-1.2.1.dist-info → epstein_files-1.4.1.dist-info}/entry_points.txt +1 -1
epstein_files-1.2.1.dist-info/RECORD +0 -34
{epstein_files-1.2.1.dist-info → epstein_files-1.4.1.dist-info}/LICENSE +0 -0
{epstein_files-1.2.1.dist-info → epstein_files-1.4.1.dist-info}/WHEEL +0 -0

epstein_files/documents/emails/email_header.py CHANGED Viewed

@@ -2,7 +2,7 @@ import json
 import re
 from dataclasses import asdict, dataclass, field
-from epstein_files.util.constant.strings import AUTHOR, REDACTED
+from epstein_files.util.constant.strings import AUTHOR, REDACTED, indented
 from epstein_files.util.constants import ALL_CONFIGS
 from epstein_files.util.doc_cfg import EmailCfg
 from epstein_files.util.logging import logger
@@ -13,7 +13,10 @@ ON_BEHALF_OF = 'on behalf of'
 TO_FIELDS = ['bcc', 'cc', 'to']
 EMAILER_FIELDS = [AUTHOR] + TO_FIELDS
-HEADER_REGEX_STR = r'(((?:(?:Date|From|Sent|To|C[cC]|Importance|Subject|Bee|B[cC]{2}|Attachments|Classification|Flag):|on behalf of ?)(?! +(by |from my|via )).*\n){3,})'
+FIELD_PATTERNS = ['Date', 'From', 'Sent', 'To', r"C[cC]", r"B[cC][cC]", 'Importance', 'Subject', 'Attachments', 'Classification', 'Flag', 'Reply-To']
+FIELDS_PATTERN = '|'.join(FIELD_PATTERNS)
+FIELDS_COLON_PATTERN = fr"^({FIELDS_PATTERN}):"
+HEADER_REGEX_STR = fr"(((?:(?:{FIELDS_PATTERN}|Bee):|on behalf of ?)(?! +(by |from my|via )).*\n){{3,}})"
 EMAIL_SIMPLE_HEADER_REGEX = re.compile(rf'^{HEADER_REGEX_STR}')
 EMAIL_SIMPLE_HEADER_LINE_BREAK_REGEX = re.compile(HEADER_REGEX_STR)
 EMAIL_PRE_FORWARD_REGEX = re.compile(r"(.{3,2000}?)" + HEADER_REGEX_STR, re.DOTALL)  # Match up to the next email header section
@@ -53,6 +56,7 @@ class EmailHeader:
     importance: str | None = None
     attachments: str | None = None
     to: list[str] | None = None
+    reply_to: str | None = None
     def __post_init__(self):
         self.num_header_rows = len(self.field_names)
@@ -95,13 +99,10 @@ class EmailHeader:
                     logger.info(f"{log_prefix}, trying next line...")
                     num_headers += 1
                     value = email_lines[i + num_headers]
-                elif BAD_EMAILER_REGEX.match(value):
+                elif BAD_EMAILER_REGEX.match(value) or value.startswith('http'):
                     logger.info(f"{log_prefix}, decrementing num_headers and skipping...")
                     num_headers -= 1
                     continue
-                elif value.startswith('http'):
-                    logger.info(f"{log_prefix}, using empty string instead...")
-                    value = ''
                 value = [v.strip() for v in value.split(';') if len(v.strip()) > 0]
@@ -110,7 +111,12 @@ class EmailHeader:
         self.num_header_rows = len(self.field_names) + num_headers
         self.header_chars = '\n'.join(email_lines[0:self.num_header_rows])
         log_msg = f"Corrected empty header using {self.num_header_rows} lines to:\n"
-        logger.debug(f"{log_msg}{self}\n\nTop lines:\n\n%s", '\n'.join(email_lines[0:(num_headers + 1) * 2]))
+        logger.warning(
+            f"{log_msg}{self}\n\n[top lines]:\n\n%s\n\n[body_lines]:\n\n%s\n\n",
+            indented('\n'.join(email_lines[0:(num_headers + 1) * 2]), prefix='> '),
+            indented('\n'.join(email_lines[self.num_header_rows:self.num_header_rows + 5]), prefix='> '),
+        )
     def rewrite_header(self) -> str:
         header_fields = {}
@@ -151,7 +157,7 @@ class EmailHeader:
             #logger.debug(f"extracting header line: '{line}'")
             key, value = [element.strip() for element in line.split(':', 1)]
             value = value.rstrip('_')
-            key = AUTHOR if key == 'From' else ('sent_at' if key in ['Date', 'Sent'] else key.lower())
+            key = AUTHOR if key == 'From' else ('sent_at' if key in ['Date', 'Sent'] else key.lower().replace('-', '_'))
             key = 'bcc' if key == 'bee' else key
             if kw_args.get(key):
@@ -161,6 +167,9 @@ class EmailHeader:
             field_names.append(key)
+            if key == 'reply_to':
+                logger.warning(f"Found value for Reply-To field: '{value}'")
             if key in TO_FIELDS:
                 recipients = [element.strip() for element in value.split(';')]
                 recipients = [r for r in recipients if len(r) > 0]

epstein_files/documents/other_file.py CHANGED Viewed

@@ -122,8 +122,8 @@ class OtherFile(Document):
             return Text(escape(self.preview_text()))
-    def is_interesting(self):
-        """False for lame prefixes, duplicates, and other boring files."""
+    def is_interesting(self) -> bool:
+        """Overloaded. False for lame prefixes, duplicates, and other boring files."""
         info_sentences = self.info()
         if self.is_duplicate():
@@ -164,8 +164,8 @@ class OtherFile(Document):
     def _extract_timestamp(self) -> datetime | None:
         """Return configured timestamp or value extracted by scanning text with datefinder."""
-        if self.config and self.config.timestamp:
-            return self.config.timestamp
+        if self.config and self.config.timestamp():
+            return self.config.timestamp()
         elif self.config and any([s in (self.config_description() or '') for s in SKIP_TIMESTAMP_EXTRACT]):
             return None
@@ -210,9 +210,10 @@ class OtherFile(Document):
             self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
     @classmethod
-    def files_preview_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
+    def files_preview_table(cls, files: Sequence['OtherFile'], title_pfx: str = '', title: str = '') -> Table:
         """Build a table of OtherFile documents."""
-        table = build_table(f'{title_pfx}Other Files Details in Chronological Order', show_lines=True)
+        title = title or f'{title_pfx}Other Files Details in Chronological Order'
+        table = build_table(title, show_lines=True, title_justify='left' if title else 'center')
         table.add_column('File', justify='center', width=FILENAME_LENGTH)
         table.add_column('Date', justify='center')
         table.add_column('Size', justify='right', style='dim')
@@ -244,6 +245,7 @@ class OtherFile(Document):
     @classmethod
     def summary_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
+        """Table showing file count by category."""
         categories = uniquify([f.category() for f in files])
         categories = sorted(categories, key=lambda c: -len([f for f in files if f.category() == c]))
         table = cls.file_info_table(f'{title_pfx}Other Files Summary', 'Category')

epstein_files/epstein_files.py CHANGED Viewed

@@ -84,6 +84,7 @@ class EpsteinFiles:
         self.json_files = [doc for doc in self.other_files if isinstance(doc, JsonFile)]
         self._set_uninteresting_ccs()
         self._copy_duplicate_email_properties()
+        self._find_email_attachments_and_set_is_first_for_user()
     @classmethod
     def get_files(cls, timer: Timer | None = None) -> 'EpsteinFiles':
@@ -123,6 +124,9 @@ class EpsteinFiles:
             lines = doc.matching_lines(pattern)
+            if args.min_line_length:
+                lines = [line for line in lines if len(line.line) > args.min_line_length]
             if len(lines) > 0:
                 results.append(SearchResult(doc, lines))
@@ -251,7 +255,7 @@ class EpsteinFiles:
                 name=name,
                 emails=self.emails_for(name),
                 imessage_logs=self.imessage_logs_for(name),
-                is_uninteresting_cc=name in self.uninteresting_emailers(),
+                is_uninteresting=name in self.uninteresting_emailers(),
                 other_files=[f for f in self.other_files if name and name == f.author]
             )
             for name in names
@@ -276,6 +280,17 @@ class EpsteinFiles:
         return self._uninteresting_emailers
+    def _find_email_attachments_and_set_is_first_for_user(self) -> None:
+        for file in self.other_files:
+            if file.config and file.config.attached_to_email_id:
+                email = self.email_for_id(file.config.attached_to_email_id)
+                file.warn(f"Attaching to {email}")
+                email.attached_docs.append(file)
+        for emailer in self.emailers():
+            first_email = emailer.emails[0]
+            first_email._is_first_for_user = True
     def _copy_duplicate_email_properties(self) -> None:
         """Ensure dupe emails have the properties of the emails they duplicate to capture any repairs, config etc."""
         for email in self.emails:
@@ -297,7 +312,7 @@ class EpsteinFiles:
     def _set_uninteresting_ccs(self) -> None:
         for id in EMAILS_WITH_UNINTERESTING_BCCS:
-            self.uninteresting_ccs += copy(cast(list[Name], self.email_for_id(id).header.bcc))
+            self.uninteresting_ccs += [bcc.lower() for bcc in cast(list[str], self.email_for_id(id).header.bcc)]
         for id in EMAILS_WITH_UNINTERESTING_CCS:
             self.uninteresting_ccs += self.email_for_id(id).recipients
@@ -334,5 +349,4 @@ def document_cls(doc: Document) -> Type[Document]:
 def _sorted_metadata(docs: Sequence[Document]) -> list[Metadata]:
-    docs_sorted_by_id = sorted(docs, key=lambda d: d.file_id)
-    return [json_safe(d.metadata()) for d in docs_sorted_by_id]
+    return [json_safe(d.metadata()) for d in Document.sort_by_id(docs)]

epstein_files/person.py CHANGED Viewed

@@ -9,13 +9,13 @@ from rich.table import Table
 from rich.text import Text
 from epstein_files.documents.document import Document
-from epstein_files.documents.email import MAILING_LISTS, JUNK_EMAILERS, Email
+from epstein_files.documents.email import TRUNCATE_EMAILS_FROM, MAILING_LISTS, JUNK_EMAILERS, Email
 from epstein_files.documents.messenger_log import MessengerLog
 from epstein_files.documents.other_file import OtherFile
 from epstein_files.util.constant.strings import *
 from epstein_files.util.constant.urls import *
 from epstein_files.util.constants import *
-from epstein_files.util.data import days_between, flatten, without_falsey
+from epstein_files.util.data import days_between, flatten, uniquify, without_falsey
 from epstein_files.util.env import args
 from epstein_files.util.highlighted_group import (QUESTION_MARKS_TXT, HighlightedNames,
      get_highlight_group_for_name, get_style_for_name, styled_category, styled_name)
@@ -42,7 +42,7 @@ class Person:
     emails: list[Email] = field(default_factory=list)
     imessage_logs: list[MessengerLog] = field(default_factory=list)
     other_files: list[OtherFile] = field(default_factory=list)
-    is_uninteresting_cc: bool = False
+    is_uninteresting: bool = False
     def __post_init__(self):
         self.emails = Document.sort_by_timestamp(self.emails)
@@ -62,7 +62,7 @@ class Person:
             return None
         elif self.category():
             return styled_category(self.category())
-        elif self.is_a_mystery() or self.is_uninteresting_cc:
+        elif self.is_a_mystery() or self.is_uninteresting:
             return QUESTION_MARKS_TXT
     def email_conversation_length_in_days(self) -> int:
@@ -120,7 +120,7 @@ class Person:
         else:
             email_count = len(self.unique_emails())
             num_days = self.email_conversation_length_in_days()
-            title_suffix = f"to/from {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
+            title_suffix = f"{TO_FROM} {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
         title = f"Found {email_count} emails {title_suffix}"
         width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category()) + 8)
@@ -136,8 +136,12 @@ class Person:
         highlight_group = self.highlight_group()
         if highlight_group and isinstance(highlight_group, HighlightedNames) and self.name:
-            return highlight_group.info_for(self.name)
-        elif self.is_uninteresting_cc:
+            info = highlight_group.info_for(self.name)
+            if info:
+                return info
+        if self.is_uninteresting and len(self.emails_by()) == 0:
             if self.has_any_epstein_emails():
                 return UNINTERESTING_CC_INFO
             else:
@@ -152,9 +156,11 @@ class Person:
         elif self.name is None:
             return Text('(emails whose author or recipient could not be determined)', style=ALT_INFO_STYLE)
         elif self.category() == JUNK:
-            return Text(f"({JUNK} mail)", style='tan dim')
-        elif self.is_uninteresting_cc and (self.info_str() or '').startswith(UNINTERESTING_CC_INFO):
-            if self.info_str() == UNINTERESTING_CC_INFO:
+            return Text(f"({JUNK} mail)", style='bright_black dim')
+        elif self.is_uninteresting and (self.info_str() or '').startswith(UNINTERESTING_CC_INFO):
+            if self.sole_cc():
+                return Text(f"(cc: from {self.sole_cc()} only)", style='wheat4 dim')
+            elif self.info_str() == UNINTERESTING_CC_INFO:
                 return Text(f"({self.info_str()})", style='wheat4 dim')
             else:
                 return Text(f"({self.info_str()})", style='plum4 dim')
@@ -168,11 +174,30 @@ class Person:
             else:
                 return None
         else:
-            return Text(self.info_str())
+            return Text(self.info_str(), style=self.style(allow_bold=False))
+    def internal_link(self) -> Text:
+        """Kind of like an anchor link to the section of the page containing these emails."""
+        return link_text_obj(internal_link_to_emails(self.name_str()), self.name_str(), style=self.style())
     def is_a_mystery(self) -> bool:
         """Return True if this is someone we theroetically could know more about."""
-        return self.is_unstyled() and not (self.is_email_address() or self.info_str() or self.is_uninteresting_cc)
+        return self.is_unstyled() and not (self.is_email_address() or self.info_str() or self.is_uninteresting)
+    def sole_cc(self) -> str | None:
+        """Return name if this person sent 0 emails and received CC from only one that name."""
+        email_authors = uniquify([e.author for e in self.emails_to()])
+        if len(self.unique_emails()) == 1 and len(email_authors) > 0:
+            logger.info(f"sole author of email to '{self.name}' is '{email_authors[0]}'")
+        else:
+            logger.info(f"'{self.name}' email_authors '{email_authors[0]}'")
+        if len(self.unique_emails_by()) > 0:
+            return None
+        if len(email_authors) == 1:
+            return email_authors[0]
     def is_email_address(self) -> bool:
         return '@' in (self.name or '')
@@ -188,6 +213,10 @@ class Person:
         return True
+    def should_always_truncate(self) -> bool:
+        """True if we want to truncate all emails to/from this user."""
+        return self.name in TRUNCATE_EMAILS_FROM or self.is_uninteresting
     def is_unstyled(self) -> bool:
         """True if there's no highlight group for this name."""
         return self.style() == DEFAULT_NAME_STYLE
@@ -237,7 +266,13 @@ class Person:
         console.line()
     def sort_key(self) -> list[int | str]:
-        counts = [len(self.unique_emails()), int(self.has_any_epstein_emails())]
+        counts = [
+            len(self.unique_emails()),
+            -1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
+            -1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO),
+            int(self.has_any_epstein_emails()),
+        ]
         counts = [-1 * count for count in counts]
         if args.sort_alphabetical:
@@ -245,8 +280,8 @@ class Person:
         else:
             return counts + [self.name_str()]
-    def style(self) -> str:
-        return get_style_for_name(self.name)
+    def style(self, allow_bold: bool = True) -> str:
+        return get_style_for_name(self.name, allow_bold=allow_bold)
     def unique_emails(self) -> Sequence[Email]:
         return Document.without_dupes(self.emails)
@@ -276,6 +311,11 @@ class Person:
         highlighted = highlighted or people
         highlighted_names = [p.name for p in highlighted]
         is_selection = len(people) != len(highlighted) or args.emailers_info
+        all_emails = Person.emails_from_people(people)
+        email_authors = [p for p in people if p.emails_by() and p.name]
+        attributed_emails = [email for email in all_emails if email.author]
+        footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}" \
+                 f" out of {len(all_emails):,} emails, {len(all_emails) - len(attributed_emails)} still unknown)"
         if is_selection:
             title = Text(f"{EMAILER_INFO_TITLE} in This Order for the Highlighted Names (", style=TABLE_TITLE_STYLE)
@@ -283,7 +323,7 @@ class Person:
         else:
             title = f"{EMAILER_INFO_TITLE} in Chronological Order Based on Timestamp of First Email"
-        table = build_table(title)
+        table = build_table(title, caption=footer)
         table.add_column('First')
         table.add_column('Name', max_width=24, no_wrap=True)
         table.add_column('Category', justify='left', style='dim italic')
@@ -298,6 +338,7 @@ class Person:
         for person in people:
             earliest_email_date = person.earliest_email_date()
+            is_on_page = False if show_epstein_total else person.name in highlighted_names
             year_months = (earliest_email_date.year * 12) + earliest_email_date.month
             # Color year rollovers more brightly
@@ -311,14 +352,18 @@ class Person:
             table.add_row(
                 Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[0 if is_selection else grey_idx]}"),
-                person.name_txt(),  # TODO: make link?
+                person.internal_link() if is_on_page and not person.is_uninteresting else person.name_txt(),
                 person.category_txt(),
                 f"{len(person.unique_emails() if show_epstein_total else person._unique_printable_emails())}",
-                Text(f"{len(person.unique_emails_by())}", style='dim' if len(person.unique_emails_by()) == 0 else ''),
-                Text(f"{len(person.unique_emails_to())}", style='dim' if len(person.unique_emails_to()) == 0 else ''),
+                str(len(person.unique_emails_by())) if len(person.unique_emails_by()) > 0 else '',
+                str(len(person.unique_emails_to())) if len(person.unique_emails_to()) > 0 else '',
                 f"{person.email_conversation_length_in_days()}",
                 person.info_txt() or '',
-                style='' if person.name in highlighted_names else 'dim',
+                style='' if show_epstein_total or is_on_page else 'dim',
             )
         return table
+    @staticmethod
+    def emails_from_people(people: list['Person']) -> Sequence[Email]:
+        return Document.uniquify(flatten([list(p.unique_emails()) for p in people]))

epstein_files/util/constant/names.py CHANGED Viewed

@@ -61,6 +61,7 @@ DIANE_ZIMAN = 'Diane Ziman'
 DONALD_TRUMP = 'Donald Trump'
 EDUARDO_ROBLES = 'Eduardo Robles'
 EDWARD_JAY_EPSTEIN = 'Edward Jay Epstein'
+EDWARD_ROD_LARSEN = 'Edward Rod Larsen'
 EHUD_BARAK = 'Ehud Barak'
 ERIC_ROTH = 'Eric Roth'
 FAITH_KATES = 'Faith Kates'
@@ -129,6 +130,7 @@ MOSHE_HOFFMAN = 'Moshe Hoffman'
 NADIA_MARCINKO = 'Nadia Marcinko'
 NEAL_KASSELL = 'Neal Kassell'
 NICHOLAS_RIBIS = 'Nicholas Ribis'
+NILI_PRIELL_BARAK = 'Nili Priell Barak'
 NOAM_CHOMSKY = 'Noam Chomsky'
 NORMAN_D_RAU = 'Norman D. Rau'
 OLIVIER_COLOM = 'Olivier Colom'
@@ -215,24 +217,24 @@ UBS = 'UBS'
 # First and last names that should be made part of a highlighting regex for emailers
 NAMES_TO_NOT_HIGHLIGHT = """
     al alain alan alfredo allen alex alexander amanda andres andrew anthony
-    bard barrett barry bennet bill black bob boris brad bruce
-    caroline carolyn chris christina cohen
-    dan daniel danny darren dave david donald
+    bard barrett barry bennet bernard bill black bob boris brad brenner bruce
+    cameron caroline carolyn chris christina cohen
+    dan daniel danny darren dave david debbie donald
     ed edward edwards enforcement enterprise enterprises entourage epstein eric erika etienne
-    faith forget fred friendly frost fuller
-    gerald george gold gordon
-    haddad harry hay heather henry hill hoffman
+    faith fisher forget fred friendly frost fuller
+    gates gerald george gold gordon
+    haddad hanson harry hay heather henry hill hoffman howard
     ian ivan
     jack james jay jean jeff jeffrey jennifer jeremy jessica joel john jon jonathan joseph jr
-    kahn karl kate katherine kelly ken kevin krassner
-    larry laurie lawrence leon lesley linda link lisa
+    kafka kahn karl kate katherine kelly ken kevin krassner
+    larry larsen laurie lawrence leon lesley linda link lisa
     mann marc marie mark martin matthew melanie michael mike miller mitchell miles morris moskowitz
     nancy neal new nicole norman
     owen
     paul paula pen peter philip prince
-    randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubin
-    scott sean skip stanley stern stephen steve steven stone susan
-    the thomas tim tom tony tyler
+    randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubenstein rubin
+    scott sean skip smith stanley stern stephen steve steven stone susan
+    terry the thomas tim tom tony tyler
     victor
     wade waters
     y
@@ -243,7 +245,7 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
     aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
     baldwin barack barrett ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
     chapman charles charlie christopher clint cohen colin collins conway
-    davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
+    davis dean debbie debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
     edmond elizabeth emily entwistle erik evelyn
     ferguson flachsbart francis franco frank
     gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
@@ -267,6 +269,10 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
 def constantize_name(name: str) -> str:
     if name == 'Andrzej Duda or entourage':
         return 'ANDRZEJ_DUDA'
+    elif name == MIROSLAV_LAJCAK:
+        return 'MIROSLAV_LAJCAK'
+    elif name == 'Paula Heil Fisher (???)':
+        return 'PAULA'
     variable_name = remove_question_marks(name)
     variable_name = variable_name.removesuffix('.').removesuffix('Jr').replace('ź', 'z').replace('ø', 'o').strip()

epstein_files/util/constant/output_files.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from pathlib import Path
 from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
+from epstein_files.util.logging import logger
 # Files output by the code
 HTML_DIR = Path('docs')
@@ -16,9 +17,10 @@ URLS_ENV = '.urls.env'
 EMAILERS_TABLE_PNG_PATH = HTML_DIR.joinpath('emailers_info_table.png')
 # Deployment URLS
-# NOTE: don't rename these variables without changing deploy.sh!
+# NOTE: don't rename these variables without changing deploy.sh
+GH_REPO_NAME = 'epstein_text_messages'
 GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
-TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/epstein_text_messages"
+TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/{GH_REPO_NAME}"
 ALL_EMAILS_URL = f"{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}"
 CHRONOLOGICAL_EMAILS_URL = f"{TEXT_MSGS_URL}/{CHRONOLOGICAL_EMAILS_PATH.name}"
 JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
@@ -44,6 +46,7 @@ BUILD_ARTIFACTS = [
 def make_clean() -> None:
     """Delete all build artifacts."""
     for build_file in BUILD_ARTIFACTS:
-        if build_file.exists():
-            print(f"Removing build file '{build_file}'...")
-            build_file.unlink()
+        for file in [build_file, Path(f"{build_file}.txt")]:
+            if file.exists():
+                logger.warning(f"Removing build file '{file}'...")
+                file.unlink()

epstein_files/util/constant/strings.py CHANGED Viewed

@@ -64,7 +64,8 @@ REDACTED = '<REDACTED>'
 QUESTION_MARKS = '(???)'
 # Regexes
-FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}(\d{{6}}(_\d{{1,2}})?)")
+ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
+FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({ID_REGEX.pattern})")
 FILE_NAME_REGEX = re.compile(fr"{FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
 QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
@@ -79,6 +80,7 @@ OTHER_FILE_CLASS = 'OtherFile'
 remove_question_marks = lambda name: QUESTION_MARKS_REGEX.sub('', name).strip()
-def indented(s: str, spaces: int = 4) -> str:
+def indented(s: str, spaces: int = 4, prefix: str = '') -> str:
     indent = ' ' * spaces
+    indent += prefix
     return indent + f"\n{indent}".join(s.split('\n'))

epstein_files/util/constant/urls.py CHANGED Viewed

@@ -6,6 +6,7 @@ from inflection import parameterize
 from rich.text import Text
 from epstein_files.util.constant.output_files import *
+from epstein_files.util.constant.strings import remove_question_marks
 from epstein_files.util.env import args
 from epstein_files.util.file_helper import coerce_file_stem
@@ -22,10 +23,11 @@ JMAIL = 'Jmail'
 ROLLCALL = 'RollCall'
 TWITTER = 'search X'
-GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
+GH_PROJECT_URL = f'https://github.com/michelcrypt4d4mus/{GH_REPO_NAME}'
 GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
 ATTRIBUTIONS_URL = f'{GH_MASTER_URL}/epstein_files/util/constants.py'
 EXTRACTS_BASE_URL = f'{GH_MASTER_URL}/emails_extracted_from_legal_filings'
+TO_FROM = 'to/from'
 extracted_file_url = lambda f: f"{EXTRACTS_BASE_URL}/{f}"
@@ -72,7 +74,6 @@ rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL],
 search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
 search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
 PERSON_LINK_BUILDERS: dict[ExternalSite, Callable[[str], str]] = {
     EPSTEIN_MEDIA: epstein_media_person_url,
     EPSTEIN_WEB: epstein_web_person_url,
@@ -98,6 +99,12 @@ def external_doc_link_txt(site: ExternalSite, filename_or_id: int | str, style:
     return Text.from_markup(external_doc_link_markup(site, filename_or_id, style))
+def internal_link_to_emails(name: str) -> str:
+    """e.g. https://michelcrypt4d4mus.github.io/epstein_text_messages/all_emails_epstein_files_nov_2025.html#:~:text=to%2Ffrom%20Jack%20Goldberger"""
+    search_term = urllib.parse.quote(f"{TO_FROM} {remove_question_marks(name)}")
+    return f"{this_site_url()}#:~:text={search_term}"
 def link_markup(
     url: str,
     link_text: str | None = None,
@@ -121,6 +128,10 @@ def other_site_url() -> str:
     return SITE_URLS[other_site_type()]
+def this_site_url() -> str:
+    return SITE_URLS[EMAIL if other_site_type() == TEXT_MESSAGE else TEXT_MESSAGE]
 CRYPTADAMUS_TWITTER = link_markup('https://x.com/cryptadamist', '@cryptadamist')
 THE_OTHER_PAGE_MARKUP = link_markup(other_site_url(), 'the other page', style='light_slate_grey bold')
 THE_OTHER_PAGE_TXT = Text.from_markup(THE_OTHER_PAGE_MARKUP)

epstein-files 1.2.1__py3-none-any.whl → 1.4.1__py3-none-any.whl

epstein-files 1.2.1py3-none-any.whl → 1.4.1py3-none-any.whl