PyPI - epstein-files - Versions diffs - 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl - Mend

epstein-files 1.1.0py3-none-any.whl → 1.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

epstein_files/__init__.py +10 -14
epstein_files/documents/communication.py +10 -14
epstein_files/documents/document.py +1 -1
epstein_files/documents/email.py +152 -66
epstein_files/documents/imessage/text_message.py +42 -25
epstein_files/documents/messenger_log.py +31 -12
epstein_files/documents/other_file.py +13 -12
epstein_files/epstein_files.py +18 -79
epstein_files/util/constant/common_words.py +3 -3
epstein_files/util/constant/html.py +4 -5
epstein_files/util/constant/names.py +9 -6
epstein_files/util/constant/strings.py +6 -2
epstein_files/util/constant/urls.py +1 -1
epstein_files/util/constants.py +18 -22
epstein_files/util/env.py +45 -36
epstein_files/util/file_helper.py +1 -2
epstein_files/util/highlighted_group.py +1005 -187
epstein_files/util/logging.py +8 -1
epstein_files/util/output.py +147 -60
epstein_files/util/rich.py +33 -67
epstein_files/util/timer.py +1 -1
epstein_files/util/word_count.py +3 -4
{epstein_files-1.1.0.dist-info → epstein_files-1.1.2.dist-info}/METADATA +1 -1
epstein_files-1.1.2.dist-info/RECORD +33 -0
epstein_files-1.1.0.dist-info/RECORD +0 -33
{epstein_files-1.1.0.dist-info → epstein_files-1.1.2.dist-info}/LICENSE +0 -0
{epstein_files-1.1.0.dist-info → epstein_files-1.1.2.dist-info}/WHEEL +0 -0
{epstein_files-1.1.0.dist-info → epstein_files-1.1.2.dist-info}/entry_points.txt +0 -0

epstein_files/documents/messenger_log.py CHANGED Viewed

@@ -2,7 +2,7 @@ import logging
 import re
 from collections import defaultdict
 from dataclasses import dataclass, field
-from datetime import datetime
+from datetime import datetime, timedelta
 from rich.console import Console, ConsoleOptions, RenderResult
 from rich.table import Table
@@ -36,10 +36,10 @@ class MessengerLog(Communication):
         self.messages = [self._build_message(match) for match in MSG_REGEX.finditer(self.text)]
     def first_message_at(self, name: str | None) -> datetime:
-        return self.messages_by(name)[0].timestamp()
+        return self.messages_by(name)[0].parse_timestamp()
     def info_txt(self) -> Text | None:
-        num_days_str = days_between_str(self.timestamp, self.messages[-1].timestamp())
+        num_days_str = days_between_str(self.timestamp, self.messages[-1].parse_timestamp())
         txt = Text(f"(Covers {num_days_str} starting ", style='dim')
         txt.append(self.date_str(), style=TIMESTAMP_STYLE).append(' ')
@@ -47,7 +47,7 @@ class MessengerLog(Communication):
             txt.append('with unknown counterparty')
         else:
             txt.append(GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG).append(' ')
-            txt.append(Text(self.author, style=self.author_style + ' bold'))
+            txt.append(Text(self.author, style=self.author_style() + ' bold'))
         if self.phone_number:
             txt.append(highlighter(f" using the phone number {self.phone_number}"))
@@ -55,7 +55,7 @@ class MessengerLog(Communication):
         return txt.append(')')
     def last_message_at(self, name: str | None) -> datetime:
-        return self.messages_by(name)[-1].timestamp()
+        return self.messages_by(name)[-1].parse_timestamp()
     def messages_by(self, name: str | None) -> list[TextMessage]:
         """Return all messages by 'name'."""
@@ -71,7 +71,7 @@ class MessengerLog(Communication):
         return metadata
     def _border_style(self) -> str:
-        return self.author_style
+        return self.author_style()
     def _build_message(self, match: re.Match) -> TextMessage:
         """Turn a regex match into a TextMessage."""
@@ -86,7 +86,7 @@ class MessengerLog(Communication):
         return TextMessage(
             author=self.author if (is_phone_number or not author_str) else author_str,
             author_str=author_str if is_phone_number else '',  # Preserve phone numbers
-            id_confirmed=not self.is_attribution_uncertain(),
+            is_id_confirmed=not self.is_attribution_uncertain(),
             text=match.group(4).strip(),
             timestamp_str=match.group(2).strip(),
         )
@@ -96,12 +96,31 @@ class MessengerLog(Communication):
             message = self._build_message(match)
             try:
-                return message.timestamp()
+                return message.parse_timestamp()
             except ValueError as e:
                 logger.info(f"Failed to parse '{message.timestamp_str}' to datetime! Using next match. Error: {e}'")
         raise RuntimeError(f"{self}: No timestamp found!")
+    def _set_message_timestamps(self) -> None:
+        raise NotImplementedError(f"TextMessage.timestamp no longer exists")
+        last_message: TextMessage | None = None
+        for i, message in enumerate(self.messages):
+            try:
+                message.timestamp = message.parse_timestamp()
+            except Exception as e:
+                msg = f"Failed to parse timestamp for TextMessage {i + 1}, {message}: {e}"
+                if i == 0:
+                    message.timestamp = self.timestamp
+                    self.warn(f"{msg}\nit's the first message so using the MessengerLog timestamp property {self.timestamp}")
+                else:
+                    message.timestamp = last_message.timestamp + timedelta(milliseconds=1)
+                    self.warn(f"{msg}\nadding 1 millisecond to last timestamp {last_message.timestamp}")
+            last_message = message
     def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
         yield self.file_info_panel()
         yield Text('')
@@ -126,13 +145,13 @@ class MessengerLog(Communication):
         author_counts = cls.count_authors(log_files)
         msg_count = sum([len(log.messages) for log in log_files])
-        footer = f"Deanonymized {msg_count - author_counts[None]:,} of {msg_count:,} text messages in"
-        counts_table = build_table("Text Message Counts By Author", caption=f"{footer} {len(log_files)} files")
+        footer = f"deanonymized {msg_count - author_counts[None]:,} of {msg_count:,} text messages in"
+        counts_table = build_table("Text Message Counts By Author", caption=f"({footer} {len(log_files)} files)")
         counts_table.add_column(AUTHOR.title(), justify='left', width=30)
         counts_table.add_column('Files', justify='right', style='white')
         counts_table.add_column("Msgs", justify='right')
-        counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
-        counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE, width=21)
+        counts_table.add_column('First Sent At', justify='center', highlight=True)
+        counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE)
         counts_table.add_column('Days', justify='right', style='dim')
         for name, count in sort_dict(author_counts):

epstein_files/documents/other_file.py CHANGED Viewed

@@ -21,8 +21,8 @@ from epstein_files.util.doc_cfg import DocCfg, Metadata
 from epstein_files.util.data import days_between, escape_single_quotes, remove_timezone, sort_dict, uniquify
 from epstein_files.util.file_helper import FILENAME_LENGTH, file_size_to_str
 from epstein_files.util.env import args
-from epstein_files.util.highlighted_group import styled_category
-from epstein_files.util.rich import QUESTION_MARK_TXT, build_table, highlighter
+from epstein_files.util.highlighted_group import QUESTION_MARKS_TXT, styled_category
+from epstein_files.util.rich import build_table, highlighter
 from epstein_files.util.logging import logger
 FIRST_FEW_LINES = 'First Few Lines'
@@ -105,7 +105,7 @@ class OtherFile(Document):
         return self.config and self.config.category
     def category_txt(self) -> Text | None:
-        return styled_category(self.category() or UNKNOWN)
+        return styled_category(self.category())
     def config_description(self) -> str | None:
         """Overloads superclass method."""
@@ -184,7 +184,7 @@ class OtherFile(Document):
                     if len(timestamps) >= MAX_EXTRACTED_TIMESTAMPS:
                         break
             except ValueError as e:
-                self.log(f"Error while iterating through datefinder.find_dates(): {e}", logging.WARNING)
+                self.warn(f"Error while iterating through datefinder.find_dates(): {e}")
         if len(timestamps) == 0:
             if not (self.is_duplicate() or VAST_HOUSE in self.text):
@@ -210,7 +210,7 @@ class OtherFile(Document):
             self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
     @staticmethod
-    def count_by_category_table(files: Sequence['OtherFile']) -> Table:
+    def count_by_category_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
         counts = defaultdict(int)
         category_bytes = defaultdict(int)
@@ -221,7 +221,8 @@ class OtherFile(Document):
             counts[file.category()] += 1
             category_bytes[file.category()] += file.file_size()
-        table = build_table('Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
+        table = build_table(f'{title_pfx}Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
+        table.columns[-1].justify = 'right'
         table.columns[0].min_width = 14
         table.columns[-1].style = 'dim'
@@ -230,7 +231,7 @@ class OtherFile(Document):
             known_author_count = Document.known_author_count(category_files)
             table.add_row(
-                styled_category(category or UNKNOWN),
+                styled_category(category),
                 str(count),
                 str(known_author_count),
                 str(count - known_author_count),
@@ -240,13 +241,13 @@ class OtherFile(Document):
         return table
     @staticmethod
-    def files_preview_table(files: Sequence['OtherFile']) -> Table:
+    def files_preview_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
         """Build a table of OtherFile documents."""
-        table = build_table('Other Files Details', show_lines=True)
+        table = build_table(f'{title_pfx}Other Files Details in Chronological Order', show_lines=True)
         table.add_column('File', justify='center', width=FILENAME_LENGTH)
         table.add_column('Date', justify='center')
-        table.add_column('Size', justify='center')
-        table.add_column('Type', justify='center')
+        table.add_column('Size', justify='right', style='dim')
+        table.add_column('Category', justify='center')
         table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
         for file in files:
@@ -263,7 +264,7 @@ class OtherFile(Document):
             table.add_row(
                 Group(*link_and_info),
-                Text(date_str, style=TIMESTAMP_DIM) if date_str else QUESTION_MARK_TXT,
+                Text(date_str, style=TIMESTAMP_STYLE) if date_str else QUESTION_MARKS_TXT,
                 file.file_size_str(),
                 file.category_txt(),
                 preview_text,

epstein_files/epstein_files.py CHANGED Viewed

@@ -8,45 +8,35 @@ from datetime import datetime
 from pathlib import Path
 from typing import Sequence, Type
-from rich.align import Align
 from rich.padding import Padding
 from rich.table import Table
 from rich.text import Text
 from epstein_files.documents.document import Document
-from epstein_files.documents.email import DETECT_EMAIL_REGEX, JUNK_EMAILERS, KRASSNER_RECIPIENTS, USELESS_EMAILERS, Email
+from epstein_files.documents.email import DETECT_EMAIL_REGEX, USELESS_EMAILERS, Email
 from epstein_files.documents.emails.email_header import AUTHOR
 from epstein_files.documents.json_file import JsonFile
 from epstein_files.documents.messenger_log import MSG_REGEX, MessengerLog
 from epstein_files.documents.other_file import OtherFile
 from epstein_files.util.constant.strings import *
-from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL, epstein_media_person_url,
-     epsteinify_name_url, epstein_web_person_url, search_jmail_url, search_twitter_url)
 from epstein_files.util.constants import *
-from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify, sort_dict
+from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify
 from epstein_files.util.doc_cfg import EmailCfg, Metadata
 from epstein_files.util.env import DOCS_DIR, args, logger
 from epstein_files.util.file_helper import file_size_str
 from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames, get_info_for_name, get_style_for_name
-from epstein_files.util.rich import (DEFAULT_NAME_STYLE, LAST_TIMESTAMP_STYLE, NA_TXT, add_cols_to_table,
-     print_other_page_link, build_table, console, highlighter, link_text_obj, link_markup, print_author_panel, print_centered,
-     print_panel, print_section_header, vertically_pad)
+from epstein_files.util.rich import (NA_TXT, add_cols_to_table, build_table, console, highlighter,
+     print_author_panel, print_centered, print_subtitle_panel)
 from epstein_files.util.search_result import SearchResult
 from epstein_files.util.timer import Timer
-EXCLUDED_EMAILERS = [e.lower() for e in (USELESS_EMAILERS + [JEFFREY_EPSTEIN])]
-PICKLED_PATH = Path("the_epstein_files.pkl.gz")
+EXCLUDED_EMAILERS = USELESS_EMAILERS + [JEFFREY_EPSTEIN]
+DEVICE_SIGNATURE_SUBTITLE = f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown"
 DEVICE_SIGNATURE = 'Device Signature'
 DEVICE_SIGNATURE_PADDING = (1, 0)
+PICKLED_PATH = Path("the_epstein_files.pkl.gz")
 SLOW_FILE_SECONDS = 1.0
-INVALID_FOR_EPSTEIN_WEB = JUNK_EMAILERS + KRASSNER_RECIPIENTS + [
-    'ACT for America',
-    'BS Stern',
-    INTELLIGENCE_SQUARED,
-    UNKNOWN,
-]
 @dataclass
 class EpsteinFiles:
@@ -128,7 +118,7 @@ class EpsteinFiles:
     def all_emailers(self, include_useless: bool = False) -> list[str | None]:
         """Returns all emailers except Epstein and EXCLUDED_EMAILERS, sorted from least frequent to most."""
         names = [a for a in self.email_author_counts.keys()] + [r for r in self.email_recipient_counts.keys()]
-        names = names if include_useless else [e for e in names if e is None or e.lower() not in EXCLUDED_EMAILERS]
+        names = names if include_useless else [e for e in names if e not in EXCLUDED_EMAILERS]
         return sorted(list(set(names)), key=lambda e: self.email_author_counts[e] + self.email_recipient_counts[e])
     def docs_matching(
@@ -177,7 +167,7 @@ class EpsteinFiles:
     def emails_for(self, author: str | None) -> list[Email]:
         """Returns emails to or from a given 'author' sorted chronologically."""
-        emails = self.emails if author == EVERYONE else (self.emails_by(author) + self.emails_to(author))
+        emails = self.emails_by(author) + self.emails_to(author)
         if len(emails) == 0:
             raise RuntimeError(f"No emails found for '{author}'")
@@ -230,6 +220,7 @@ class EpsteinFiles:
     def print_files_summary(self) -> None:
         table = build_table('Summary of Document Types')
         add_cols_to_table(table, ['File Type', 'Files', 'Author Known', 'Author Unknown', 'Duplicates'])
+        table.columns[1].justify = 'right'
         def add_row(label: str, docs: list):
             known = None if isinstance(docs[0], JsonFile) else Document.known_author_count(docs)
@@ -246,18 +237,19 @@ class EpsteinFiles:
         add_row('iMessage Logs', self.imessage_logs)
         add_row('JSON Data', self.json_files)
         add_row('Other', self.non_json_other_files())
-        console.print(Align.center(table))
+        print_centered(table)
         console.line()
     def print_emails_for(self, _author: str | None) -> list[Email]:
         """Print complete emails to or from a particular 'author'. Returns the Emails that were printed."""
-        conversation_length = self.email_conversation_length_in_days(_author)
         emails = self.emails_for(_author)
+        num_days = self.email_conversation_length_in_days(_author)
         unique_emails = [email for email in emails if not email.is_duplicate()]
+        start_date = emails[0].timestamp.date()
         author = _author or UNKNOWN
         print_author_panel(
-            f"Found {len(unique_emails)} {author} emails starting {emails[0].timestamp.date()} over {conversation_length:,} days",
+            f"Found {len(unique_emails)} emails to/from {author} starting {start_date} covering {num_days:,} days",
             get_style_for_name(author),
             get_info_for_name(author)
         )
@@ -280,54 +272,13 @@ class EpsteinFiles:
     def print_emails_table_for(self, author: str | None) -> None:
         emails = [email for email in self.emails_for(author) if not email.is_duplicate()]  # Remove dupes
-        console.print(Align.center(Email.build_table(emails, author)), '\n')
+        print_centered(Email.build_emails_table(emails, author))
+        console.line()
     def print_email_device_info(self) -> None:
-        print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(2, 0, 0, 0), centered=True)
-        console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
+        print_subtitle_panel(DEVICE_SIGNATURE_SUBTITLE, padding=(2, 0, 0, 0), centered=True)
         console.print(_build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
-    def table_of_emailers(self) -> Table:
-        attributed_emails = [e for e in self.non_duplicate_emails() if e.author]
-        footer = f"Identified authors of {len(attributed_emails):,} out of {len(self.non_duplicate_emails()):,} emails."
-        counts_table = build_table("Email Counts", caption=footer)
-        add_cols_to_table(counts_table, [
-            'Name',
-            'Num',
-            'Sent',
-            "Recv",
-            {'name': 'First', 'highlight': True},
-            {'name': 'Last', 'style': LAST_TIMESTAMP_STYLE},
-            JMAIL,
-            'eMedia',
-            'eWeb',
-            'Twitter',
-        ])
-        emailer_counts = {
-            emailer: self.email_author_counts[emailer] + self.email_recipient_counts[emailer]
-            for emailer in self.all_emailers(True)
-        }
-        for name, count in sort_dict(emailer_counts):
-            style = get_style_for_name(name, default_style=DEFAULT_NAME_STYLE)
-            emails = self.emails_for(name)
-            counts_table.add_row(
-                Text.from_markup(link_markup(epsteinify_name_url(name or UNKNOWN), name or UNKNOWN, style)),
-                str(count),
-                str(self.email_author_counts[name]),
-                str(self.email_recipient_counts[name]),
-                emails[0].timestamp_without_seconds(),
-                emails[-1].timestamp_without_seconds(),
-                link_text_obj(search_jmail_url(name), JMAIL) if name else '',
-                link_text_obj(epstein_media_person_url(name), 'eMedia') if is_ok_for_epstein_web(name) else '',
-                link_text_obj(epstein_web_person_url(name), 'eWeb') if is_ok_for_epstein_web(name) else '',
-                link_text_obj(search_twitter_url(name), 'search X') if name else '',
-            )
-        return counts_table
+        console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
     def _tally_email_data(self) -> None:
         """Tally up summary info about Email objects."""
@@ -373,18 +324,6 @@ def document_cls(doc: Document) -> Type[Document]:
         return OtherFile
-def is_ok_for_epstein_web(name: str | None) -> bool:
-    """Return True if it's likely that EpsteinWeb has a page for this name."""
-    if name is None or ' ' not in name:
-        return False
-    elif '@' in name or '/' in name or '??' in name:
-        return False
-    elif name in INVALID_FOR_EPSTEIN_WEB:
-        return False
-    return True
 def _build_signature_table(keyed_sets: dict[str, set[str]], cols: tuple[str, str], join_char: str = '\n') -> Padding:
     title = 'Signatures Used By Authors' if cols[0] == AUTHOR else 'Authors Seen Using Signatures'
     table = build_table(title, header_style="bold reverse", show_lines=True)

epstein_files/util/constant/common_words.py CHANGED Viewed

@@ -89,6 +89,6 @@ UNSINGULARIZABLE_WORDS = """
 """.strip().split()
-if args.deep_debug:
-    word_str = '\n'.join(COMMON_WORDS_LIST)
-    print(f"common words:\n\n{word_str}")
+# if args.deep_debug:
+#     word_str = '\n'.join(COMMON_WORDS_LIST)
+#     print(f"common words:\n\n{word_str}")

epstein_files/util/constant/html.py CHANGED Viewed

@@ -13,7 +13,8 @@ else:
     page_type = 'Text Messages'
-CONSOLE_HTML_FORMAT = """<!DOCTYPE html>
+CONSOLE_HTML_FORMAT = """
+<!DOCTYPE html>
 <html>
 <head>
     <meta charset="UTF-8">
@@ -22,17 +23,15 @@ CONSOLE_HTML_FORMAT = """<!DOCTYPE html>
     <style>
         {stylesheet}
         body {{
-            color: {foreground};
             background-color: {background};
+            color: {foreground};
         }}
     </style>
 """ + f"<title>Epstein {page_type}</title>" + """
 </head>
 <body>
     <pre style="font-family: Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace; white-space: pre-wrap; overflow-wrap: break-word;">
-        <code style="font-family: inherit; white-space: pre-wrap; overflow-wrap: break-word;">
-            {code}
-        </code>
+        <code style="font-family: inherit; white-space: pre-wrap; overflow-wrap: break-word;">{code}</code>
     </pre>
 </body>
 </html>

epstein_files/util/constant/names.py CHANGED Viewed

@@ -166,6 +166,7 @@ TOM_BARRACK = 'Tom Barrack'
 TOM_PRITZKER = 'Tom Pritzker'
 TONJA_HADDAD_COLEMAN = 'Tonja Haddad Coleman'
 TYLER_SHEARS = 'Tyler Shears'  # Reputation manager, like Al Seckel
+VINCENZO_IOZZO = 'Vincenzo Iozzo'
 VINIT_SAHNI = 'Vinit Sahni'
 ZUBAIR_KHAN = 'Zubair Khan'
@@ -197,9 +198,11 @@ GOLDMAN_SACHS = 'Goldman Sachs'
 GOLDMAN_INVESTMENT_MGMT = f'{GOLDMAN_SACHS} Investment Management Division'
 HARVARD = 'Harvard'
 INSIGHTS_POD = f"InsightsPod"  # Zubair bots
+MIT_MEDIA_LAB = 'MIT Media Lab'
 NEXT_MANAGEMENT = 'Next Management LLC'
 JP_MORGAN = 'JP Morgan'
 OSBORNE_LLP = f"{IAN_OSBORNE} & Partners LLP"  # Ian Osborne's PR firm
+ROTHSTEIN_ROSENFELDT_ADLER = 'Rothstein Rosenfeldt Adler (Rothstein was a crook & partner of Roger Stone)'
 TRUMP_ORG = 'Trump Organization'
 UBS = 'UBS'
@@ -231,12 +234,12 @@ NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
 # Names to color white in the word counts
 OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
     aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
-    baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
+    baldwin barack barrett ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
     chapman charles charlie christopher clint cohen colin collins conway
     danny davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
-    edmond elizabeth emily entwistle erik evelyn
+    edmond elizabeth emily enterprises entwistle erik evelyn
     ferguson flachsbart francis franco frank frost
-    gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
+    gardner gary geoff geoffrey gerald gilbert gloria gold goldberg gonzalez gould graham greene guarino gwyneth
     hancock harold harrison harry hay helen hill hirsch hofstadter horowitz hussein
     ian isaac isaacson
     james jamie jane janet jason jeffrey jen jim joe johnson jones josh julie justin
@@ -245,12 +248,12 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
     marcus marianne matt matthew melissa michele michelle moore moscowitz
     nancy nicole nussbaum
     owen
-    paulson philippe
-    rafael ray richard richardson rob robin ron rubin rudolph ryan
+    paulson peter philippe
+    rafael ray richard richardson rob robert robin ron rubin rudolph ryan
     sara sarah sean seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
     ted theresa thompson tiffany timothy tony
     valeria
-    walter warren weinstein weiss william
+    walter warren waters weinstein weiss william
     zach zack
 """.strip().split()

epstein_files/util/constant/strings.py CHANGED Viewed

@@ -11,7 +11,7 @@ BUSINESS = 'business'
 CONFERENCE = 'conference'
 ENTERTAINER = 'entertainer'
 FINANCE = 'finance'
-FLIGHT_LOGS = 'flight logs'
+FLIGHT_LOG = 'flight log'
 JOURNALIST = 'journalist'
 JUNK = 'junk'
 LEGAL = 'legal'
@@ -56,7 +56,6 @@ TIMESTAMP_DIM = f"turquoise4 dim"
 # Misc
 AUTHOR = 'author'
 DEFAULT = 'default'
-EVERYONE = 'everyone'
 HOUSE_OVERSIGHT_PREFIX = 'HOUSE_OVERSIGHT_'
 JSON = 'json'
 NA = 'n/a'
@@ -77,3 +76,8 @@ OTHER_FILE_CLASS = 'OtherFile'
 remove_question_marks = lambda name: QUESTION_MARKS_REGEX.sub('', name)
+def indented(s: str, spaces: int = 4) -> str:
+    indent = ' ' * spaces
+    return indent + f"\n{indent}".join(s.split('\n'))

epstein_files/util/constant/urls.py CHANGED Viewed

@@ -73,7 +73,7 @@ search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(tx
 def build_doc_url(base_url: str, filename_or_id: int | str, case: Literal['lower', 'title'] | None = None) -> str:
     file_stem = coerce_file_stem(filename_or_id)
-    file_stem = file_stem.lower() if case == 'lower' else file_stem
+    file_stem = file_stem.lower() if case == 'lower' or EPSTEIN_MEDIA in base_url else file_stem
     file_stem = file_stem.title() if case == 'title' else file_stem
     return f"{base_url}{file_stem}"

epstein_files/util/constants.py CHANGED Viewed

@@ -25,8 +25,8 @@ HEADER_ABBREVIATIONS = {
     'Jagland': 'Thorbjørn Jagland (former Norwegian prime minister)',
     'JEGE': "Epstein's airplane holding company",
     'Jeffrey Wernick': 'right wing crypto bro, former COO of Parler',
-    'Joi': 'Joi Ito (MIT Media Lab, MIT Digital Currency Initiative)',
-    "Hoffenberg": "Steven Hoffenberg (Epstein's ponzi scheme partner)",
+    'Joi': f"Joi Ito ({MIT_MEDIA_LAB}, MIT Digital Currency Initiative)",
+    "Hoffenberg": f"{STEVEN_HOFFENBERG} (Epstein's ponzi scheme partner)",
     'KSA': "Kingdom of Saudi Arabia",
     'Kurz': 'Sebastian Kurz (former Austrian Chancellor)',
     'Kwok': "Chinese criminal Miles Kwok AKA Miles Guo AKA Guo Wengui",
@@ -91,17 +91,17 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
     JOI_ITO: re.compile(r'ji@media.mit.?edu|(joichi|joi)( Ito)?', re.IGNORECASE),
     JONATHAN_FARKAS: re.compile(r'Jonathan Farka(s|il)', re.IGNORECASE),
     KATHRYN_RUEMMLER: re.compile(r'Kathr?yn? Ruemmler?', re.IGNORECASE),
-    KEN_STARR: re.compile(r'starr, ken|Ken(neth W.)?\s+starr?|starr', re.IGNORECASE),
+    KEN_STARR: re.compile(r'starr, ken|Ken(neth\s*(W.\s*)?)?\s+starr?|starr', re.IGNORECASE),
     LANDON_THOMAS: re.compile(r'lando[nr] thomas( jr)?|thomas jr.?, lando[nr]', re.IGNORECASE),
     LARRY_SUMMERS: re.compile(r'(La(wrence|rry).{1,5})?Summers?|^LH$|LHS|Ihsofficel', re.IGNORECASE),
     LAWRANCE_VISOSKI: re.compile(r'La(rry|wrance) Visoski?|Lvjet', re.IGNORECASE),
-    LAWRENCE_KRAUSS: re.compile(r'Lawrence Kraus|[jl]awkrauss|kruase', re.IGNORECASE),
-    LEON_BLACK: re.compile(r'Leon Black?', re.IGNORECASE),
+    LAWRENCE_KRAUSS: re.compile(r'Lawrence Kraus[es]?|[jl]awkrauss|kruase', re.IGNORECASE),
+    LEON_BLACK: re.compile(r'Leon\s*Black?|(?<!Marc )Leon(?! (Botstein|Jaworski|Wieseltier))', re.IGNORECASE),
+    LILLY_SANCHEZ: re.compile(r'Lilly.*Sanchez', re.IGNORECASE),
+    LISA_NEW: re.compile(r'E?Lisa New?\b', re.IGNORECASE),
     MANUELA_MARTINEZ: re.compile(fr'Manuela (- Mega Partners|Martinez)', re.IGNORECASE),
     MARIANA_IDZKOWSKA: re.compile(r'Mariana [Il]d[źi]kowska?', re.IGNORECASE),
     MARK_EPSTEIN: re.compile(r'Mark (L\. )?Epstein', re.IGNORECASE),
-    LILLY_SANCHEZ: re.compile(r'Lilly.*Sanchez', re.IGNORECASE),
-    LISA_NEW: re.compile(r'E?Lisa New?\b', re.IGNORECASE),
     MARC_LEON: re.compile(r'Marc[.\s]+(Kensington|Leon)|Kensington2', re.IGNORECASE),
     MARTIN_NOWAK: re.compile(r'(Martin.*?)?No[vw]ak|Nowak, Martin', re.IGNORECASE),
     MARTIN_WEINBERG: re.compile(r'martin.*?weinberg', re.IGNORECASE),
@@ -128,10 +128,10 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
     PRINCE_ANDREW: re.compile(r'Prince Andrew|The Duke', re.IGNORECASE),
     REID_WEINGARTEN: re.compile(r'Weingarten, Rei[cdi]|Rei[cdi] Weingarten', re.IGNORECASE),
     RICHARD_KAHN: re.compile(r'rich(ard)? kahn?', re.IGNORECASE),
-    ROBERT_D_CRITTON_JR: re.compile(r'Robert D.? Critton Jr.?', re.IGNORECASE),
+    ROBERT_D_CRITTON_JR: re.compile(r'Robert D.? Critton,? Jr.?', re.IGNORECASE),
     ROBERT_LAWRENCE_KUHN: re.compile(r'Robert\s*(Lawrence)?\s*Kuhn', re.IGNORECASE),
     ROBERT_TRIVERS: re.compile(r'tri[vy]ersr@gmail|Robert\s*Trivers?', re.IGNORECASE),
-    ROSS_GOW: re.compile(fr"{ROSS_GOW}|ross@acuityreputation.com", re.IGNORECASE),
+    ROSS_GOW: re.compile(fr"Ross(acuity)? Gow|(ross@)?acuity\s*reputation(\.com)?", re.IGNORECASE),
     SAMUEL_LEFF: re.compile(r"Sam(uel)?(/Walli)? Leff", re.IGNORECASE),
     SCOTT_J_LINK: re.compile(r'scott j. link?', re.IGNORECASE),
     SEAN_BANNON: re.compile(r'sean bannon?', re.IGNORECASE),
@@ -145,7 +145,8 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
     TERRY_KAFKA: re.compile(r'Terry Kafka?', re.IGNORECASE),
     THANU_BOONYAWATANA: re.compile(r"Thanu (BOONYAWATANA|Cnx)", re.IGNORECASE),
     THORBJORN_JAGLAND: re.compile(r'(Thor.{3,8})?Jag[il]and?', re.IGNORECASE),
-    TONJA_HADDAD_COLEMAN: re.compile(fr"To(nj|rl)a Haddad Coleman|haddadfm@aol.com", re.IGNORECASE)
+    TONJA_HADDAD_COLEMAN: re.compile(r"To(nj|rl)a Haddad Coleman|haddadfm@aol.com", re.IGNORECASE),
+    VINCENZO_IOZZO: re.compile(r"Vincenzo [IL]ozzo", re.IGNORECASE),
 }
 # If found as substring consider them the author
@@ -194,7 +195,6 @@ EMAILERS = [
     'Steven Victor MD',
     'Susan Edelman',
     TOM_BARRACK,
-    'Vincenzo Lozzo',
     'Vladimir Yudashkin',
 ]
@@ -387,6 +387,7 @@ EMAILS_CONFIG = [
     EmailCfg(
         id='023208',
         author=JEFFREY_EPSTEIN,
+        description=f"very long email chain about Leon Black's finances and things like Gratitude America",
         fwded_text_after='Date: Tue, Oct 27',
         recipients=[BRAD_WECHSLER, MELANIE_SPINELLA],
         duplicate_ids=['023291'],
@@ -499,7 +500,7 @@ EMAILS_CONFIG = [
         author=STEVEN_HOFFENBERG,
         recipients=["Players2"],
         timestamp=parse('2016-08-11 09:36:01'),
-        attribution_reason='Actually a fwd by Charles Michael but Hoffenberg email more interesting',
+        attribution_reason=f"Actually a fwd by Charles Michael but {STEVEN_HOFFENBERG} email more interesting",
     ),
     EmailCfg(
         id='026620',
@@ -859,7 +860,6 @@ UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
 WOMEN_EMPOWERMENT = f"Women Empowerment (WE) conference"
 ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
 OTHER_FILES_BOOKS = [
     DocCfg(id='017088', author=ALAN_DERSHOWITZ, description=f'"Taking the Stand: My Life in the Law" (draft)'),
     DocCfg(id='013501', author='Arnold J. Mandell', description=f'The Nearness Of Grace: A Personal Science Of Spiritual Transformation', date='2005-01-01'),
@@ -1139,11 +1139,7 @@ OTHER_FILES_LEGAL = [
     DocCfg(id='025353', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-05-19', duplicate_ids=['010723', '019224'], dupe_type='redacted'),
     DocCfg(id='025704', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-05-27', duplicate_ids=['010732', '019221'], dupe_type='redacted'),
     DocCfg(id='012130', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-06-19', duplicate_ids=['012135']),
-    DocCfg(
-        id='031447',
-        author=MARTIN_WEINBERG,
-        description=f"letter from to Melanie Ann Pustay and Sean O'Neill re: an Epstein FOIA request"
-    ),
+    DocCfg(id='031447', author=MARTIN_WEINBERG, description=f"letter from to Melanie Ann Pustay & Sean O'Neill re: Epstein FOIA request"),
     DocCfg(
         id='028965',
         author=MARTIN_WEINBERG,
@@ -1223,7 +1219,7 @@ OTHER_FILES_CONFERENCES = [
 OTHER_FILES_FINANCE = [
     DocCfg(id='024631', author='Ackrell Capital', description=f"Cannabis Investment Report 2018", is_interesting=True),
     DocCfg(id='016111', author=BOFA_MERRILL, description=f"GEMs Paper #26 Saudi Arabia: beyond oil but not so fast", date='2016-06-30'),
-    DocCfg(id='010609', author=BOFA_MERRILL, description=f"Liquid Insight Trump\'s effect on MXN", date='2016-09-22'),
+    DocCfg(id='010609', author=BOFA_MERRILL, description=f"Liquid Insight Trump's effect on MXN", date='2016-09-22'),
     DocCfg(id='025978', author=BOFA_MERRILL, description=f"Understanding when risk parity risk Increases", date='2016-08-09'),
     DocCfg(id='014404', author=BOFA_MERRILL, description=f'Japan Investment Strategy Report', date='2016-11-18'),
     DocCfg(id='014410', author=BOFA_MERRILL, description=f'Japan Investment Strategy Report', date='2016-11-18'),
@@ -1515,8 +1511,8 @@ OTHER_FILES_ARTS = [
 ]
 OTHER_FILES_MISC = [
-    DocCfg(id='022780', category=FLIGHT_LOGS),
-    DocCfg(id='022816', category=FLIGHT_LOGS),
+    DocCfg(id='022780', category=FLIGHT_LOG),
+    DocCfg(id='022816', category=FLIGHT_LOG),
     DocCfg(id='032206', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
     DocCfg(id='032208', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
     DocCfg(id='032209', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
@@ -1541,7 +1537,7 @@ OTHER_FILES_MISC = [
     DocCfg(id='031743', description=f'a few pages describing the internet as a "New Nation State" (Network State?)'),
     DocCfg(id='012718', description=f"{CVRA} congressional record", date='2011-06-17'),
     DocCfg(id='024117', description=f"FAQ about anti-money laundering (AML) and terrorist financing (CFT) law in the U.S."),
-    DocCfg(id='019448', description=f"Haitian business investment proposal called Jacmel"),
+    DocCfg(id='019448', description=f"Haitian business investment proposal called Jacmel", attached_to_email_id='019448'),
     DocCfg(id='023644', description=f"interview with Mohammed bin Salman", date='2016-04-25'),
     DocCfg(
         id='030142',

epstein-files 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

epstein-files 1.1.0py3-none-any.whl → 1.1.2py3-none-any.whl