PyPI - epstein-files - Versions diffs - 1.0.14__tar.gz → 1.0.15__tar.gz - Mend

epstein-files 1.0.14tar.gz → 1.0.15tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{epstein_files-1.0.14 → epstein_files-1.0.15}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: epstein-files
-Version: 1.0.14
+Version: 1.0.15
 Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
 Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
 License: GPL-3.0-or-later

{epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/__init__.py RENAMED Viewed

@@ -17,11 +17,11 @@ from epstein_files.epstein_files import EpsteinFiles, document_cls
 from epstein_files.documents.document import INFO_PADDING, Document
 from epstein_files.documents.email import Email
 from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_HTML_PATH, make_clean
-from epstein_files.util.env import args, specified_names
+from epstein_files.util.env import args
 from epstein_files.util.file_helper import coerce_file_path, extract_file_id
 from epstein_files.util.logging import logger
-from epstein_files.util.output import (print_emails, print_json_files, print_json_stats,
-     write_json_metadata, write_urls)
+from epstein_files.util.output import (print_emails_section, print_json_files, print_json_stats,
+     print_other_files_section, print_text_messages_section, write_json_metadata, write_urls)
 from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
 from epstein_files.util.timer import Timer
 from epstein_files.util.word_count import write_word_counts_html
@@ -49,12 +49,12 @@ def generate_html() -> None:
         exit()
     if args.output_texts:
-        epstein_files.print_text_messages_section()
+        print_text_messages_section(epstein_files)
         timer.print_at_checkpoint(f'Printed {len(epstein_files.imessage_logs)} text message logs')
     if args.output_emails:
-        emails_printed = print_emails(epstein_files)
-        timer.print_at_checkpoint(f"Printed {emails_printed:,} emails")
+        emails_that_were_printed = print_emails_section(epstein_files)
+        timer.print_at_checkpoint(f"Printed {len(emails_that_were_printed):,} emails")
     if args.output_other:
         if args.uninteresting:
@@ -62,7 +62,7 @@ def generate_html() -> None:
         else:
             files = [f for f in epstein_files.other_files if args.all_other_files or f.is_interesting()]
-        epstein_files.print_other_files_section(files)
+        print_other_files_section(files, epstein_files)
         timer.print_at_checkpoint(f"Printed {len(files)} other files (skipped {len(epstein_files.other_files) - len(files)})")
     # Save output
@@ -86,7 +86,7 @@ def epstein_search():
     for search_term in args.positional_args:
         temp_highlighter = build_highlighter(search_term)
-        search_results = epstein_files.docs_matching(search_term, specified_names)
+        search_results = epstein_files.docs_matching(search_term, args.names)
         console.line(2)
         print_panel(f"Found {len(search_results)} documents matching '{search_term}'", padding=(0, 0, 0, 3))

{epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/documents/document.py RENAMED Viewed

@@ -160,8 +160,8 @@ class Document:
     def file_size(self) -> int:
         return file_size(self.file_path)
-    def file_size_str(self) -> str:
-        return file_size_str(self.file_path)
+    def file_size_str(self, decimal_places: int | None = None) -> str:
+        return file_size_str(self.file_path, decimal_places)
     def info(self) -> list[Text]:
         """0 to 2 sentences containing the info_txt() as well as any configured description."""
@@ -171,14 +171,14 @@ class Document:
         ])
     def info_txt(self) -> Text | None:
-        """Secondary info about this file (recipients, level of certainty, etc). Overload in subclasses."""
+        """Secondary info about this file (description recipients, etc). Overload in subclasses."""
         return None
     def is_duplicate(self) -> bool:
         return bool(self.config and self.config.duplicate_of_id)
     def is_local_extract_file(self) -> bool:
-        """True if file created by extracting text from a court doc (identifiable from filename e.g. HOUSE_OVERSIGHT_012345_1.txt)."""
+        """True if extracted from other file (identifiable from filename e.g. HOUSE_OVERSIGHT_012345_1.txt)."""
         return is_local_extract_file(self.filename)
     def length(self) -> int:
@@ -234,6 +234,7 @@ class Document:
         return text
     def sort_key(self) -> tuple[datetime, str, int]:
+        """Sort by timestamp, file_id, then whether or not it's a duplicate file."""
         if self.is_duplicate():
             sort_id = self.config.duplicate_of_id
             dupe_idx = 1
@@ -253,7 +254,7 @@ class Document:
             txt.append(' (', style=SYMBOL_STYLE)
             txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
-        txt.append(' [').append(key_value_txt('size', Text(self.file_size_str(), style='aquamarine1')))
+        txt.append(' [').append(key_value_txt('size', Text(self.file_size_str(0), style='aquamarine1')))
         txt.append(", ").append(key_value_txt('lines', self.num_lines()))
         if self.config and self.config.duplicate_of_id:
@@ -271,6 +272,7 @@ class Document:
         return Panel(Group(*sentences), border_style=self._class_style(), expand=False)
     def top_lines(self, n: int = 10) -> str:
+        """First n lines."""
         return '\n'.join(self.lines[0:n])[:MAX_TOP_LINES_LEN]
     def warn(self, msg: str) -> None:

{epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/documents/email.py RENAMED Viewed

@@ -17,7 +17,7 @@ from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, INFO_INDENT
 from epstein_files.documents.emails.email_header import (BAD_EMAILER_REGEX, EMAIL_SIMPLE_HEADER_REGEX,
      EMAIL_SIMPLE_HEADER_LINE_BREAK_REGEX, FIELD_NAMES, TIME_REGEX, EmailHeader)
 from epstein_files.util.constant.names import *
-from epstein_files.util.constant.strings import REDACTED, URL_SIGNIFIERS
+from epstein_files.util.constant.strings import REDACTED
 from epstein_files.util.constants import *
 from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes, extract_last_name,
      flatten, remove_timezone, uniquify)
@@ -41,6 +41,7 @@ LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
 SUPPRESS_LOGS_FOR_AUTHORS = ['Undisclosed recipients:', 'undisclosed-recipients:', 'Multiple Senders Multiple Senders']
 REWRITTEN_HEADER_MSG = "(janky OCR header fields were prettified, check source if something seems off)"
+URL_SIGNIFIERS = ['gclid', 'htm', 'ref=', 'utm']
 APPEARS_IN = 'Appears in'
 MAX_CHARS_TO_PRINT = 4000
 MAX_NUM_HEADER_LINES = 14

{epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/documents/messenger_log.py RENAMED Viewed

@@ -121,23 +121,22 @@ class MessengerLog(Communication):
         return sender_counts
     @classmethod
-    def logs_for(cls, author: str | None | list[str | None], logs: list['MessengerLog']) -> list['MessengerLog']:
-        authors = listify(author)
-        return logs if JEFFREY_EPSTEIN in authors else [log for log in logs if log.author in authors]
-    @classmethod
-    def summary_table(cls, imessage_logs: list['MessengerLog']) -> Table:
+    def summary_table(cls, log_files: list['MessengerLog']) -> Table:
         """Build a table summarizing the text messages in 'imessage_logs'."""
-        counts_table = build_table("Text Message Counts By Author")
-        counts_table.add_column(AUTHOR.title(), justify='left', style="steel_blue bold", width=30)
+        author_counts = cls.count_authors(log_files)
+        msg_count = sum([len(log.messages) for log in log_files])
+        footer = f"Deanonymized {msg_count - author_counts[None]:,} of {msg_count:,} text messages in"
+        counts_table = build_table("Text Message Counts By Author", caption=f"{footer} {len(log_files)} files")
+        counts_table.add_column(AUTHOR.title(), justify='left', width=30)
         counts_table.add_column('Files', justify='right', style='white')
         counts_table.add_column("Msgs", justify='right')
         counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
         counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE, width=21)
         counts_table.add_column('Days', justify='right', style='dim')
-        for name, count in sort_dict(cls.count_authors(imessage_logs)):
-            logs = cls.logs_for(name, imessage_logs)
+        for name, count in sort_dict(author_counts):
+            logs = log_files if name == JEFFREY_EPSTEIN else [log for log in log_files if log.author == name]
             first_at = logs[0].first_message_at(name)
             last_at = logs[-1].first_message_at(name)

{epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/documents/other_file.py RENAMED Viewed

@@ -17,14 +17,15 @@ from rich.text import Text
 from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, WHITESPACE_REGEX, Document
 from epstein_files.util.constant.strings import *
 from epstein_files.util.constants import *
-from epstein_files.util.doc_cfg import FINANCIAL_REPORTS_AUTHORS, DocCfg, Metadata
+from epstein_files.util.doc_cfg import DocCfg, Metadata
 from epstein_files.util.data import days_between, escape_single_quotes, remove_timezone, sort_dict, uniquify
 from epstein_files.util.file_helper import FILENAME_LENGTH, file_size_to_str
 from epstein_files.util.env import args
 from epstein_files.util.highlighted_group import styled_category
-from epstein_files.util.rich import QUESTION_MARK_TXT, add_cols_to_table, build_table, highlighter
+from epstein_files.util.rich import QUESTION_MARK_TXT, build_table, highlighter
 from epstein_files.util.logging import logger
+FIRST_FEW_LINES = 'First Few Lines'
 MAX_DAYS_SPANNED_TO_BE_VALID = 10
 MAX_EXTRACTED_TIMESTAMPS = 100
 MIN_TIMESTAMP = datetime(2000, 1, 1)
@@ -208,6 +209,36 @@ class OtherFile(Document):
         if num_days_spanned > MAX_DAYS_SPANNED_TO_BE_VALID and VAST_HOUSE not in self.text:
             self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
+    @staticmethod
+    def count_by_category_table(files: Sequence['OtherFile']) -> Table:
+        counts = defaultdict(int)
+        category_bytes = defaultdict(int)
+        for file in files:
+            if file.category() is None:
+                logger.warning(f"file {file.file_id} has no category")
+            counts[file.category()] += 1
+            category_bytes[file.category()] += file.file_size()
+        table = build_table('Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
+        table.columns[0].min_width = 14
+        table.columns[-1].style = 'dim'
+        for (category, count) in sort_dict(counts):
+            category_files = [f for f in files if f.category() == category]
+            known_author_count = Document.known_author_count(category_files)
+            table.add_row(
+                styled_category(category or UNKNOWN),
+                str(count),
+                str(known_author_count),
+                str(count - known_author_count),
+                file_size_to_str(category_bytes[category]),
+            )
+        return table
     @staticmethod
     def files_preview_table(files: Sequence['OtherFile']) -> Table:
         """Build a table of OtherFile documents."""
@@ -240,33 +271,3 @@ class OtherFile(Document):
             )
         return table
-    @staticmethod
-    def count_by_category_table(files: Sequence['OtherFile']) -> Table:
-        counts = defaultdict(int)
-        category_bytes = defaultdict(int)
-        for file in files:
-            if file.category() is None:
-                logger.warning(f"file {file.file_id} has no category")
-            counts[file.category()] += 1
-            category_bytes[file.category()] += file.file_size()
-        table = build_table('Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
-        table.columns[0].min_width = 14
-        table.columns[-1].style = 'dim'
-        for (category, count) in sort_dict(counts):
-            category_files = [f for f in files if f.category() == category]
-            known_author_count = Document.known_author_count(category_files)
-            table.add_row(
-                styled_category(category or UNKNOWN),
-                str(count),
-                str(known_author_count),
-                str(count - known_author_count),
-                file_size_to_str(category_bytes[category]),
-            )
-        return table

{epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/epstein_files.py RENAMED Viewed

@@ -25,7 +25,7 @@ from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL,
 from epstein_files.util.constants import *
 from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify, sort_dict
 from epstein_files.util.doc_cfg import EmailCfg, Metadata
-from epstein_files.util.env import DOCS_DIR, args, logger, specified_names
+from epstein_files.util.env import DOCS_DIR, args, logger
 from epstein_files.util.file_helper import file_size_str
 from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames, get_info_for_name, get_style_for_name
 from epstein_files.util.rich import (DEFAULT_NAME_STYLE, LAST_TIMESTAMP_STYLE, NA_TXT, add_cols_to_table,
@@ -201,9 +201,6 @@ class EpsteinFiles:
         return docs
-    def imessage_logs_for(self, author: str | None | list[str | None]) -> Sequence[MessengerLog]:
-        return MessengerLog.logs_for(author, self.imessage_logs)
     def json_metadata(self) -> str:
         """Create a JSON string containing metadata for all the files."""
         metadata = {
@@ -290,44 +287,6 @@ class EpsteinFiles:
         console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
         console.print(_build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
-    def print_other_files_section(self, files: list[OtherFile]) -> None:
-        """Returns the OtherFile objects that were interesting enough to print."""
-        category_table = OtherFile.count_by_category_table(files)
-        other_files_preview_table = OtherFile.files_preview_table(files)
-        header_pfx = '' if args.all_other_files else 'Selected '
-        print_section_header(f"{FIRST_FEW_LINES} of {len(files)} {header_pfx}Files That Are Neither Emails Nor Text Messages")
-        if args.all_other_files:
-            console.line(1)
-        else:
-            print_all_files_page_link(self)
-            console.line(2)
-            for table in [category_table, other_files_preview_table]:
-                table.title = f"{header_pfx}{table.title}"
-        print_centered(category_table)
-        console.line(2)
-        console.print(other_files_preview_table)
-    def print_text_messages_section(self) -> None:
-        """Print summary table and stats for text messages."""
-        print_section_header('All of His Text Messages')
-        print_centered("(conversations are sorted chronologically based on timestamp of first message)\n", style='gray30')
-        authors: list[str | None] = specified_names if specified_names else [JEFFREY_EPSTEIN]
-        log_files = self.imessage_logs_for(authors)
-        for log_file in log_files:
-            console.print(Padding(log_file))
-            console.line(2)
-        print_centered(MessengerLog.summary_table(self.imessage_logs))
-        text_summary_msg = f"\nDeanonymized {Document.known_author_count(self.imessage_logs)} of "
-        text_summary_msg += f"{len(self.imessage_logs)} {TEXT_MESSAGE} logs found in {len(self.all_files):,} files."
-        console.print(text_summary_msg)
-        imessage_msg_count = sum([len(log.messages) for log in self.imessage_logs])
-        console.print(f"Found {imessage_msg_count} text messages in {len(self.imessage_logs)} iMessage log files.")
     def table_of_emailers(self) -> Table:
         attributed_emails = [e for e in self.non_duplicate_emails() if e.author]
         footer = f"Identified authors of {len(attributed_emails):,} out of {len(self.non_duplicate_emails()):,} emails."

{epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/constant/names.py RENAMED Viewed

@@ -187,9 +187,11 @@ VIRGINIA_GIUFFRE = 'Virginia Giuffre'
 # Organizations
 BOFA = 'BofA'
+BOFA_MERRILL = f'{BOFA} / Merrill Lynch'
 CNN = 'CNN'
 DEUTSCHE_BANK = 'Deutsche Bank'
 ELECTRON_CAPITAL_PARTNERS = 'Electron Capital Partners'
+EPSTEIN_FOUNDATION = 'Jeffrey Epstein VI Foundation'
 GOLDMAN_SACHS = 'Goldman Sachs'
 GOLDMAN_INVESTMENT_MGMT = f'{GOLDMAN_SACHS} Investment Management Division'
 HARVARD = 'Harvard'

{epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/constant/strings.py RENAMED Viewed

@@ -57,12 +57,10 @@ TIMESTAMP_DIM = f"turquoise4 dim"
 AUTHOR = 'author'
 DEFAULT = 'default'
 EVERYONE = 'everyone'
-FIRST_FEW_LINES = 'First Few Lines'
 HOUSE_OVERSIGHT_PREFIX = 'HOUSE_OVERSIGHT_'
 JSON = 'json'
 NA = 'n/a'
 REDACTED = '<REDACTED>'
-URL_SIGNIFIERS = ['gclid', 'htm', 'ref=', 'utm']
 QUESTION_MARKS = '(???)'
 # Regexes

{epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/constant/urls.py RENAMED Viewed

@@ -49,7 +49,7 @@ DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
 }
-epsteinify_api_url = lambda file_id: f"{EPSTEINIFY_URL}/api/documents/HOUSE_OVERSIGHT_{file_id}"
+epsteinify_api_url = lambda file_stem: f"{EPSTEINIFY_URL}/api/documents/{file_stem}"
 epsteinify_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEINIFY, filename_or_id, style)
 epsteinify_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_markup(external_doc_link_markup(filename_or_id, style))
 epsteinify_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEINIFY], file_stem)
@@ -66,8 +66,6 @@ epstein_web_search_url = lambda s: f"{EPSTEIN_WEB_URL}/?ewmfileq={urllib.parse.q
 rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL], file_stem, 'title')
-search_archive_url = lambda txt: f"{COURIER_NEWSROOM_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
-search_coffeezilla_url = lambda txt: f"{COFFEEZILLA_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
 search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
 search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
@@ -103,8 +101,4 @@ def link_text_obj(url: str, link_text: str | None = None, style: str = ARCHIVE_L
     return Text.from_markup(link_markup(url, link_text, style))
-def search_coffeezilla_link(text: str, link_txt: str, style: str = ARCHIVE_LINK_COLOR) -> Text:
-    return link_text_obj(search_coffeezilla_url(text), link_txt or text, style)
 CRYPTADAMUS_TWITTER = link_markup('https://x.com/cryptadamist', '@cryptadamist')

{epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/constants.py RENAMED Viewed

@@ -208,73 +208,6 @@ for emailer in EMAILERS:
     EMAILER_REGEXES[emailer] = re.compile(emailer, re.IGNORECASE)
-##########################
-# OtherFile config stuff #
-##########################
-# strings
-FBI = 'FBI'
-MEME = 'meme of'
-PRESS_RELEASE = 'press release'
-RESUME_OF = 'professional resumé'
-SCREENSHOT = 'screenshot of'
-TRANSLATION = 'translation of'
-TWEET = 'tweet'
-# Legal cases
-BRUNEL_V_EPSTEIN = f"{JEAN_LUC_BRUNEL} v. {JEFFREY_EPSTEIN} and Tyler McDonald d/b/a YI.org"
-EDWARDS_V_DERSHOWITZ = f"{BRAD_EDWARDS} & {PAUL_G_CASSELL} v. {ALAN_DERSHOWITZ}"
-EPSTEIN_V_ROTHSTEIN_EDWARDS = f"Epstein v. Scott Rothstein, {BRAD_EDWARDS}, and L.M."
-GIUFFRE_V_DERSHOWITZ = f"{VIRGINIA_GIUFFRE} v. {ALAN_DERSHOWITZ}"
-GIUFFRE_V_EPSTEIN = f"{VIRGINIA_GIUFFRE} v. {JEFFREY_EPSTEIN}"
-GIUFFRE_V_MAXWELL = f"{VIRGINIA_GIUFFRE} v. {GHISLAINE_MAXWELL}"
-JANE_DOE_V_EPSTEIN_TRUMP = f"Jane Doe v. Donald Trump and {JEFFREY_EPSTEIN}"
-JANE_DOE_V_USA = 'Jane Doe #1 and Jane Doe #2 v. United States'
-NEW_YORK_V_EPSTEIN = f"New York v. {JEFFREY_EPSTEIN}"
-# Descriptions of non-email, non-text message files
-ARTICLE_DRAFT = 'draft of an article about'
-BOFA_MERRILL = f'{BOFA} / Merrill Lynch Report'
-BOFA_WEALTH_MGMT = f'{BOFA} Wealth Management'
-BROCKMAN_INC = 'Brockman, Inc.'
-CVRA = "Crime Victims' Rights Act [CVRA]"
-DAVID_BLAINE_VISA_LETTER = f"letter of recommendation for visa for a model"
-DERSH_GIUFFRE_TWEET = f"{TWEET} about {VIRGINIA_GIUFFRE}"
-DEUTSCHE_BANK_TAX_TOPICS = f'{DEUTSCHE_BANK} Wealth Management Tax Topics'
-DIANA_DEGETTE_CAMPAIGN = "Colorado legislator Diana DeGette's campaign"
-EPSTEIN_FOUNDATION = 'Jeffrey Epstein VI Foundation'
-FBI_REPORT = f"report on Epstein investigation (redacted)"
-FBI_SEIZED_PROPERTY = f"seized property inventory (redacted)"
-FEMALE_HEALTH_COMPANY = 'Female Health Company (FHX)'
-FIRE_AND_FURY = f"Fire And Fury"
-HARVARD_POETRY = f'{HARVARD} poetry stuff from {LISA_NEW}'
-HBS_APPLICATION = f"{HARVARD} Business School application letter"
-JASTA = 'JASTA'
-JASTA_SAUDI_LAWSUIT = f"{JASTA} lawsuit against Saudi Arabia by 9/11 victims"
-JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
-LAWRENCE_KRAUSS_ASU_ORIGINS = f"{LAWRENCE_KRAUSS}'s ASU Origins Project"
-LEXIS_NEXIS_CVRA_SEARCH = f"{LEXIS_NEXIS} search for case law around the {CVRA}"
-KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
-MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
-NIGHT_FLIGHT_BOOK = f'"Night Flight" (draft)'
-NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
-OBAMA_JOKE = 'joke about Obama'
-PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} Code Enforcement'
-PALM_BEACH_PROPERTY_INFO = f"{PALM_BEACH} property info"
-PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
-PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
-PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
-REAL_DEAL_ARTICLE = 'article by Keith Larsen'
-SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
-SINGLE_PAGE = 'single page of'
-STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
-SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
-TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
-UBS_CIO_REPORT = 'CIO Monthly Extended report'
-UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
-WOMEN_EMPOWERMENT = f"Women Empowerment (WE) conference"
-ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
 # Atribution reasons
 BOLOTOVA_REASON = 'Same signature style as 029020 ("--" followed by "Sincerely Renata Bolotova")'
 KATHY_REASON = 'from "Kathy" about dems, sent from iPad'
@@ -369,6 +302,8 @@ TEXTS_CONFIG = CONFIRMED_TEXTS_CONFIG + UNCONFIRMED_TEXTS_CONFIG
 ################################################ EMAILS ################################################
 ########################################################################################################
+MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
 # Some emails have a lot of uninteresting CCs
 IRAN_DEAL_RECIPIENTS = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
 FLIGHT_IN_2012_PEOPLE = ['Francis Derby', 'Januiz Banasiak', 'Louella Rabuyo', 'Richard Barnnet']
@@ -866,6 +801,65 @@ EMAILS_CONFIG = [
 ####################################### OTHER FILES ############################################
 ################################################################################################
+# strings
+FBI = 'FBI'
+MEME = 'meme of'
+PRESS_RELEASE = 'press release'
+RESUME_OF = 'professional resumé'
+SCREENSHOT = 'screenshot of'
+TRANSLATION = 'translation of'
+TWEET = 'tweet'
+# Legal cases
+BRUNEL_V_EPSTEIN = f"{JEAN_LUC_BRUNEL} v. {JEFFREY_EPSTEIN} and Tyler McDonald d/b/a YI.org"
+EDWARDS_V_DERSHOWITZ = f"{BRAD_EDWARDS} & {PAUL_G_CASSELL} v. {ALAN_DERSHOWITZ}"
+EPSTEIN_V_ROTHSTEIN_EDWARDS = f"Epstein v. Scott Rothstein, {BRAD_EDWARDS}, and L.M."
+GIUFFRE_V_DERSHOWITZ = f"{VIRGINIA_GIUFFRE} v. {ALAN_DERSHOWITZ}"
+GIUFFRE_V_EPSTEIN = f"{VIRGINIA_GIUFFRE} v. {JEFFREY_EPSTEIN}"
+GIUFFRE_V_MAXWELL = f"{VIRGINIA_GIUFFRE} v. {GHISLAINE_MAXWELL}"
+JANE_DOE_V_EPSTEIN_TRUMP = f"Jane Doe v. Donald Trump and {JEFFREY_EPSTEIN}"
+JANE_DOE_V_USA = 'Jane Doe #1 and Jane Doe #2 v. United States'
+NEW_YORK_V_EPSTEIN = f"New York v. {JEFFREY_EPSTEIN}"
+# Descriptions of non-email, non-text message files
+ARTICLE_DRAFT = 'draft of an article about'
+BOFA_WEALTH_MGMT = f'{BOFA} Wealth Management'
+BROCKMAN_INC = 'Brockman, Inc.'
+CVRA = "Crime Victims' Rights Act [CVRA]"
+DAVID_BLAINE_VISA_LETTER = f"letter of recommendation for visa for a model"
+DERSH_GIUFFRE_TWEET = f"{TWEET} about {VIRGINIA_GIUFFRE}"
+DEUTSCHE_BANK_TAX_TOPICS = f'{DEUTSCHE_BANK} Wealth Management Tax Topics'
+DIANA_DEGETTE_CAMPAIGN = "Colorado legislator Diana DeGette's campaign"
+FBI_REPORT = f"report on Epstein investigation (redacted)"
+FBI_SEIZED_PROPERTY = f"seized property inventory (redacted)"
+FEMALE_HEALTH_COMPANY = 'Female Health Company (FHX)'
+FIRE_AND_FURY = f"Fire And Fury"
+HARVARD_POETRY = f'{HARVARD} poetry stuff from {LISA_NEW}'
+HBS_APPLICATION = f"{HARVARD} Business School application letter"
+JASTA = 'JASTA'
+JASTA_SAUDI_LAWSUIT = f"{JASTA} lawsuit against Saudi Arabia by 9/11 victims"
+JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
+LAWRENCE_KRAUSS_ASU_ORIGINS = f"{LAWRENCE_KRAUSS}'s ASU Origins Project"
+KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
+LEXIS_NEXIS_CVRA_SEARCH = f"{LEXIS_NEXIS} search for case law around the {CVRA}"
+NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
+OBAMA_JOKE = 'joke about Obama'
+PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} Code Enforcement'
+PALM_BEACH_PROPERTY_INFO = f"{PALM_BEACH} property info"
+PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
+PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
+PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
+REAL_DEAL_ARTICLE = 'article by Keith Larsen'
+SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
+STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
+SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
+TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
+UBS_CIO_REPORT = 'CIO Monthly Extended report'
+UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
+WOMEN_EMPOWERMENT = f"Women Empowerment (WE) conference"
+ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
 OTHER_FILES_BOOKS = [
     DocCfg(id='017088', author=ALAN_DERSHOWITZ, description=f'"Taking the Stand: My Life in the Law" (draft)'),
     DocCfg(id='013501', author='Arnold J. Mandell', description=f'The Nearness Of Grace: A Personal Science Of Spiritual Transformation', date='2005-01-01'),
@@ -873,7 +867,7 @@ OTHER_FILES_BOOKS = [
     DocCfg(id='018438', author='Clarisse Thorn', description=f'The S&M Feminist'),
     DocCfg(id='019477', author=EDWARD_JAY_EPSTEIN, description=f'How America Lost Its Secrets: Edward Snowden, the Man, and the Theft'),
     DocCfg(id='020153', author=EDWARD_JAY_EPSTEIN, description=f'The Snowden Affair: A Spy Story In Six Parts'),
-    DocCfg(id='011472', author=EHUD_BARAK, description=NIGHT_FLIGHT_BOOK, date='2006-07-12', duplicate_ids=['027849']),  # date from _extract_timestamp()
+    DocCfg(id='011472', author=EHUD_BARAK, description=f'"Night Flight" (draft)', date='2006-07-12', duplicate_ids=['027849']),  # date from _extract_timestamp()
     DocCfg(id='010912', author=GORDON_GETTY, description=f'"Free Growth and Other Surprises" (draft)', date='2018-10-18'),
     DocCfg(id='010477', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
     DocCfg(id='010486', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
@@ -1002,7 +996,7 @@ OTHER_FILES_ARTICLES = [
     DocCfg(id='024997', author=SHIMON_POST, description=SHIMON_POST_ARTICLE, date='2011-09-08'),
     DocCfg(id='031941', author=SHIMON_POST, description=SHIMON_POST_ARTICLE, date='2011-11-17'),
     DocCfg(id='030829', author=f'South Florida Sun Sentinel', description=f'article about {BRAD_EDWARDS} and {JEFFREY_EPSTEIN}'),
-    DocCfg(id='021092', author='Tatler', description=f'{SINGLE_PAGE} of article about {GHISLAINE_MAXWELL} shredding documents', date='2019-08-15'),
+    DocCfg(id='021092', author='Tatler', description=f'single page of article about {GHISLAINE_MAXWELL} shredding documents', date='2019-08-15'),
     DocCfg(id='030333', author=f'The Independent', description=f'article about Prince Andrew, Epstein, and Epstein\'s butler who stole his address book'),
     DocCfg(id='010754', author=f'U.S. News', description=f"article about Yitzhak Rabin"),
     DocCfg(id='014498', author=VI_DAILY_NEWS, description='article', date='2016-12-13'),
@@ -1030,7 +1024,7 @@ OTHER_FILES_ARTICLES = [
     DocCfg(id='033480', description=f"John Bolton press clipping", date='2018-04-06', duplicate_ids=['033481']),
     DocCfg(id='013403', description=f"{LEXIS_NEXIS} result from The Evening Standard about Bernie Madoff", date='2009-12-24'),
     DocCfg(id='021093', description=f"page of unknown article about Epstein and Maxwell"),
-    DocCfg(id='031191', description=f"{SINGLE_PAGE} unknown article about Epstein and Trump's relationship in 1997"),
+    DocCfg(id='031191', description=f"single page of unknown article about Epstein and Trump's relationship in 1997"),
     DocCfg(id='026520', description=f'Spanish language article about {SULTAN_BIN_SULAYEM}', date='2013-09-27'),
     DocCfg(
         id='031736',
@@ -1186,16 +1180,16 @@ OTHER_FILES_LEGAL = [
 ]
 OTHER_FILES_CONFERENCES = [
-    DocCfg(id='014315', author=BOFA_MERRILL, description=f'2016 Future of Financials Conference, attached to 014312'),
+    DocCfg(id='014315', author=BOFA_MERRILL, description=f'2016 Future of Financials Conference', attached_to_email_id='014312'),
     DocCfg(id='026825', author=DEUTSCHE_BANK, description=f"Asset & Wealth Management featured speaker bios"),  # Really "Deutsche Asset" which may not be Deutsche Bank?
     DocCfg(id='023123', author=LAWRENCE_KRAUSS_ASU_ORIGINS, description=f"{STRANGE_BEDFELLOWS} (old draft)"),
     DocCfg(id='023120', author=LAWRENCE_KRAUSS_ASU_ORIGINS, description=STRANGE_BEDFELLOWS, duplicate_ids=['023121'], dupe_type='earlier'),
-    DocCfg(id='031359', author=NOBEL_CHARITABLE_TRUST, description=f"Earth Environment Convention about ESG investing"),
+    DocCfg(id='031359', author=NOBEL_CHARITABLE_TRUST, description=f'"Earth Environment Convention" about ESG investing'),
     DocCfg(id='031354', author=NOBEL_CHARITABLE_TRUST, description=f'"Thinking About the Environment and Technology" report 2011'),
     DocCfg(id='019300', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} f. {KATHRYN_RUEMMLER}', date='2019-04-05'),
     DocCfg(id='022267', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} founder essay about growing the seminar business'),
     DocCfg(id='022407', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} seminar pitch deck'),
-    DocCfg(id='017524', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2012 program emailed to epstein BY {BARBRO_C_EHNBOM} in 031226", date='2012-08-18'),
+    DocCfg(id='017524', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2012 program", date='2012-08-18', attached_to_email_id='031226'),
     DocCfg(id='026747', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2017 program", date='2017-08-23'),
     DocCfg(id='014951', author='TED Talks', description=f"2017 program", date='2017-04-20'),
     DocCfg(id='024179', author=UN_GENERAL_ASSEMBLY, description=f'president and first lady schedule', date='2012-09-21'),
@@ -1411,7 +1405,7 @@ OTHER_FILES_POLITICS = [
         description=f"'Breaking Down Democracy: Goals, Strategies, and Methods of Modern Authoritarians'",
         date='2017-06-02',
     ),
-    DocCfg(id='026856', author='Kevin Rudd', description=f"speech 'Xi Jinping, China And The Global Order'", date='2018-06-26'),
+    DocCfg(id='026856', author='Kevin Rudd', description=f'speech "Xi Jinping, China And The Global Order"', date='2018-06-26'),
     DocCfg(id='026827', author='Scowcroft Group', description=f'report on ISIS', date='2015-11-14'),
     DocCfg(id='024294', author=STACEY_PLASKETT, description=f"campaign flier", date='2016-10-01'),
     DocCfg(

{epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/doc_cfg.py RENAMED Viewed

@@ -36,7 +36,7 @@ FIELD_SORT_KEY = {
 }
 FINANCIAL_REPORTS_AUTHORS = [
-    BOFA,
+    BOFA_MERRILL,
     DEUTSCHE_BANK,
     ELECTRON_CAPITAL_PARTNERS,
     GOLDMAN_INVESTMENT_MGMT,
@@ -73,6 +73,7 @@ class DocCfg:
         is_synthetic (bool): True if this config was generated by the duplicate_cfgs() method
     """
     id: str
+    attached_to_email_id: str | None = None
     author: str | None = None
     category: str | None = None
     date: str | None = None
@@ -102,10 +103,10 @@ class DocCfg:
             return f"{msg} {self.description}" if self.description else msg
         elif self.author and self.description:
             if self.category in [ACADEMIA, BOOK]:
-                title = self.description if '"' in self.description else f"'{self.description}'"
+                title = self.description if '"' in self.description else f'"{self.description}"'
                 return f"{title} by {self.author}"
             elif self.category == FINANCE and self.author in FINANCIAL_REPORTS_AUTHORS:
-                return f"{self.author} report: '{self.description}'"
+                return f'{self.author} report: "{self.description}"'
             elif self.category == LEGAL and 'v.' in self.author:
                 return f"{self.author}: {self.description}"
         elif self.category and self.author is None and self.description is None:

{epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/env.py RENAMED Viewed

@@ -8,10 +8,8 @@ from rich_argparse_plus import RichHelpFormatterPlus
 from epstein_files.util.logging import env_log_level, logger
-COUNT_WORDS_SCRIPT = 'epstein_word_count'
 DEFAULT_WIDTH = 145
-HTML_SCRIPTS = ['epstein_generate', COUNT_WORDS_SCRIPT]
-EPSTEIN_DOCS_DIR_ENV_VAR_NAME = 'EPSTEIN_DOCS_DIR'
+HTML_SCRIPTS = ['epstein_generate', 'epstein_word_count']
 RichHelpFormatterPlus.choose_theme('morning_glory')
@@ -50,6 +48,7 @@ args = parser.parse_args()
 # Verify Epstein docs can be found
+EPSTEIN_DOCS_DIR_ENV_VAR_NAME = 'EPSTEIN_DOCS_DIR'
 DOCS_DIR_ENV = environ.get(EPSTEIN_DOCS_DIR_ENV_VAR_NAME)
 DOCS_DIR = Path(DOCS_DIR_ENV or '').resolve()
@@ -65,13 +64,13 @@ is_env_var_set = lambda s: len(environ.get(s) or '') > 0
 is_html_script = current_script in HTML_SCRIPTS
 args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
+args.names = [None if n == 'None' else n for n in (args.names or [])]
 args.output_emails = args.output_emails or args.all_emails
 args.output_other = args.output_other or args.all_other_files or args.uninteresting
 args.overwrite_pickle = args.overwrite_pickle or (is_env_var_set('OVERWRITE_PICKLE') and not is_env_var_set('PICKLED'))
 args.width = args.width if is_html_script else None
-is_output_selected = any([arg.startswith('output_') and value for arg, value in vars(args).items()])
-is_output_selected = is_output_selected or args.json_metadata or args.colors_only
-specified_names: list[str | None] = [None if n == 'None' else n for n in (args.names or [])]
+is_any_output_selected = any([arg.startswith('output_') and value for arg, value in vars(args).items()])
+is_any_output_selected = is_any_output_selected or args.json_metadata or args.colors_only
 # Log level args
 if args.deep_debug:
@@ -86,9 +85,12 @@ elif not env_log_level:
 logger.info(f'Log level set to {logger.level}...')
 # Massage args that depend on other args to the appropriate state
-if current_script == 'epstein_generate' and not (is_output_selected or args.make_clean):
+if current_script == 'epstein_generate' and not (is_any_output_selected or args.make_clean):
     logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
     args.output_texts = args.output_emails = args.output_other = True
 if args.debug:
-    logger.warning(f"Invocation args:\ncurrent_script={current_script}\nis_html_script={is_html_script},\nis_output_selected={is_output_selected}\nspecified_names={specified_names},\nargs={args}")
+    logger.warning(f"Invocation args:\ncurrent_script={current_script}\nis_html_script={is_html_script},\nis_output_selected={is_any_output_selected},\nargs={args}")
+if args.names:
+    logger.warning(f"Output restricted to {args.names}")

{epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/file_helper.py RENAMED Viewed

@@ -12,7 +12,6 @@ KB = 1024
 MB = KB * KB
 file_size = lambda file_path: Path(file_path).stat().st_size
-file_size_str = lambda file_path: file_size_to_str(file_size(file_path))
 # Coerce methods handle both string and int arguments.
 coerce_file_name = lambda filename_or_id: coerce_file_stem(filename_or_id) + '.txt'
@@ -46,8 +45,12 @@ def extract_file_id(filename_or_id: int | str | Path) -> str:
     return file_match.group(1)
-def file_size_to_str(size: int) -> str:
-    digits = 2
+def file_size_str(file_path, digits: int | None = None):
+    return file_size_to_str(file_size(file_path), digits)
+def file_size_to_str(size: int, digits: int | None = None) -> str:
+    _digits = 2
     if size > MB:
         size_num = float(size) / MB
@@ -55,10 +58,11 @@ def file_size_to_str(size: int) -> str:
     elif size > KB:
         size_num = float(size) / KB
         size_str = 'kb'
-        digits = 1
+        _digits = 1
     else:
         return f"{size} b"
+    digits = _digits if digits is None else digits
     return f"{size_num:,.{digits}f} {size_str}"

{epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/highlighted_group.py RENAMED Viewed

@@ -302,7 +302,7 @@ HIGHLIGHTED_NAMES = [
     HighlightedNames(
         label=FINANCE,
         style='green',
-        pattern=r'Apollo|Ari\s*Glass|Bank|(Bernie\s*)?Madoff|Black(rock|stone)|B\s*of\s*A|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|Mizrahi\s*Bank|MLPF&S|((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|(Richard\s*)?LeFrak|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
+        pattern=r'Apollo|Ari\s*Glass|Bank|(Bernie\s*)?Madoff|Black(rock|stone)|B\s*of\s*A|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche?\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|Mizrahi\s*Bank|MLPF&S|((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|(Richard\s*)?LeFrak|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
         emailers={
             AMANDA_ENS: 'Citigroup',
             BRAD_WECHSLER: f"head of {LEON_BLACK}'s personal investment vehicle according to FT",
@@ -396,7 +396,7 @@ HIGHLIGHTED_NAMES = [
     HighlightedNames(
         label='law enforcement',
         style='color(24) bold',
-        pattern=r'ag|(Alicia\s*)?Valle|AML|(Andrew\s*)?McCabe|attorney|((Bob|Robert)\s*)?Mueller|(Byung\s)?Pak|CFTC?|CIA|CIS|CVRA|Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)|DHS|DOJ|FBI|FCPA|FDIC|Federal\s*Bureau\s*of\s*Investigation|FinCEN|FINRA|FOIA|FTC|IRS|(James\s*)?Comey|(Jennifer\s*Shasky\s*)?Calvery|((Judge|Mark)\s*)?(Carney|Filip)|(Kirk )?Blouin|KYC|NIH|NS(A|C)|OCC|OFAC|(Lann?a\s*)?Belohlavek|lawyer|(Michael\s*)?Reiter|OGE|Office\s*of\s*Government\s*Ethics|Police Code Enforcement|(Preet\s*)?Bharara|SCOTUS|SD(FL|NY)|SEC|Secret\s*Service|Securities\s*and\s*Exchange\s*Commission|Southern\s*District\s*of\s*(Florida|New\s*York)|State\s*Dep(artmen)?t|Strzok|Supreme\s*Court|Treasury\s*(Dep(artmen)?t|Secretary)|TSA|USAID|(William\s*J\.?\s*)?Zloch',
+        pattern=r'ag|(Alicia\s*)?Valle|AML|(Andrew\s*)?McCabe|((Bob|Robert)\s*)?Mueller|(Byung\s)?Pak|CFTC?|CIA|CIS|CVRA|Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)|DHS|DOJ|FBI|FCPA|FDIC|Federal\s*Bureau\s*of\s*Investigation|FinCEN|FINRA|FOIA|FTC|IRS|(James\s*)?Comey|(Jennifer\s*Shasky\s*)?Calvery|((Judge|Mark)\s*)?(Carney|Filip)|(Kirk )?Blouin|KYC|NIH|NS(A|C)|OCC|OFAC|(Lann?a\s*)?Belohlavek|(Michael\s*)?Reiter|OGE|Office\s*of\s*Government\s*Ethics|Police Code Enforcement|(Preet\s*)?Bharara|SCOTUS|SD(FL|NY)|SEC|Secret\s*Service|Securities\s*and\s*Exchange\s*Commission|Southern\s*District\s*of\s*(Florida|New\s*York)|State\s*Dep(artmen)?t|Strzok|Supreme\s*Court|Treasury\s*(Dep(artmen)?t|Secretary)|TSA|USAID|(William\s*J\.?\s*)?Zloch',
         emailers = {
             ANN_MARIE_VILLAFANA: 'southern district of Florida U.S. Attorney',
             DANNY_FROST: 'Director of Communications at Manhattan DA',

{epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/output.py RENAMED Viewed

@@ -4,13 +4,14 @@ from rich.padding import Padding
 from epstein_files.documents.email import Email
 from epstein_files.documents.messenger_log import MessengerLog
+from epstein_files.documents.other_file import FIRST_FEW_LINES, OtherFile
 from epstein_files.epstein_files import EpsteinFiles, count_by_month
 from epstein_files.util.constant import output_files
 from epstein_files.util.constant.html import *
 from epstein_files.util.constant.names import *
 from epstein_files.util.constant.output_files import JSON_FILES_JSON_PATH, JSON_METADATA_PATH
 from epstein_files.util.data import dict_sets_to_lists
-from epstein_files.util.env import args, specified_names
+from epstein_files.util.env import args
 from epstein_files.util.file_helper import log_file_write
 from epstein_files.util.logging import logger
 from epstein_files.util.rich import *
@@ -20,17 +21,17 @@ PRINT_COLOR_KEY_EVERY_N_EMAILS = 150
 # Order matters. Default names to print emails for.
 DEFAULT_EMAILERS = [
     JEREMY_RUBIN,
-    AL_SECKEL,
     JOI_ITO,
     JABOR_Y,
     STEVEN_SINOFSKY,
+    AL_SECKEL,
     DANIEL_SIAD,
     JEAN_LUC_BRUNEL,
     STEVEN_HOFFENBERG,
+    RENATA_BOLOTOVA,
+    MASHA_DROKOVA,
     EHUD_BARAK,
     MARTIN_NOWAK,
-    MASHA_DROKOVA,
-    RENATA_BOLOTOVA,
     STEVE_BANNON,
     PRINCE_ANDREW,
     JIDE_ZEITLIN,
@@ -39,6 +40,7 @@ DEFAULT_EMAILERS = [
     JENNIFER_JACQUET,
     TYLER_SHEARS,
     CHRISTINA_GALBRAITH,
+    ZUBAIR_KHAN,
     None,
 ]
@@ -55,8 +57,8 @@ if len(set(DEFAULT_EMAILERS).intersection(set(DEFAULT_EMAILER_TABLES))) > 0:
     raise RuntimeError(f"Some names appear in both DEFAULT_EMAILERS and DEFAULT_EMAILER_TABLES")
-def print_emails(epstein_files: EpsteinFiles) -> int:
-    """Returns number of emails printed."""
+def print_emails_section(epstein_files: EpsteinFiles) -> list[Email]:
+    """Returns emails that were printed (may contain dupes if printed for both author and recipient)."""
     print_section_header(('Selections from ' if not args.all_emails else '') + 'His Emails')
     print_all_files_page_link(epstein_files)
     emailers_to_print: list[str | None]
@@ -64,8 +66,8 @@ def print_emails(epstein_files: EpsteinFiles) -> int:
     already_printed_emails: list[Email] = []
     num_emails_printed_since_last_color_key = 0
-    if specified_names:
-        emailers_to_print = specified_names
+    if args.names:
+        emailers_to_print = args.names
     else:
         print_centered(Padding(epstein_files.table_of_emailers(), (2, 0)))
@@ -97,7 +99,7 @@ def print_emails(epstein_files: EpsteinFiles) -> int:
         for name in DEFAULT_EMAILER_TABLES:
             epstein_files.print_emails_table_for(name)
-    if not specified_names:
+    if not args.names:
         epstein_files.print_email_device_info()
     if args.all_emails:
@@ -106,7 +108,7 @@ def print_emails(epstein_files: EpsteinFiles) -> int:
     fwded_articles = [e for e in already_printed_emails if e.config and e.is_fwded_article()]
     log_msg = f"Rewrote {len(Email.rewritten_header_ids)} of {len(already_printed_emails)} email headers"
     logger.warning(f"{log_msg}, {len(fwded_articles)} of the emails were forwarded articles.")
-    return len(already_printed_emails)
+    return already_printed_emails
 def print_json_files(epstein_files: EpsteinFiles):
@@ -136,6 +138,39 @@ def print_json_stats(epstein_files: EpsteinFiles) -> None:
     print_json("count_by_month", count_by_month(epstein_files.all_documents()))
+def print_other_files_section(files: list[OtherFile], epstein_files: EpsteinFiles) -> None:
+    """Returns the OtherFile objects that were interesting enough to print."""
+    category_table = OtherFile.count_by_category_table(files)
+    other_files_preview_table = OtherFile.files_preview_table(files)
+    header_pfx = '' if args.all_other_files else 'Selected '
+    print_section_header(f"{FIRST_FEW_LINES} of {len(files)} {header_pfx}Files That Are Neither Emails Nor Text Messages")
+    if args.all_other_files:
+        console.line(1)
+    else:
+        print_all_files_page_link(epstein_files)
+        console.line(2)
+        for table in [category_table, other_files_preview_table]:
+            table.title = f"{header_pfx}{table.title}"
+    print_centered(category_table)
+    console.line(2)
+    console.print(other_files_preview_table)
+def print_text_messages_section(epstein_files: EpsteinFiles) -> None:
+    """Print summary table and stats for text messages."""
+    print_section_header('All of His Text Messages')
+    print_centered("(conversations are sorted chronologically based on timestamp of first message)\n", style='gray30')
+    for log_file in epstein_files.imessage_logs:
+        console.print(Padding(log_file))
+        console.line(2)
+    print_centered(MessengerLog.summary_table(epstein_files.imessage_logs))
 def write_json_metadata(epstein_files: EpsteinFiles) -> None:
     json_str = epstein_files.json_metadata()

{epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/word_count.py RENAMED Viewed

@@ -14,7 +14,7 @@ from epstein_files.util.constant.common_words import COMMON_WORDS_LIST, COMMON_W
 from epstein_files.util.constant.names import OTHER_NAMES
 from epstein_files.util.constant.output_files import WORD_COUNT_HTML_PATH
 from epstein_files.util.data import ALL_NAMES, flatten, sort_dict
-from epstein_files.util.env import args, specified_names
+from epstein_files.util.env import args
 from epstein_files.util.logging import logger
 from epstein_files.util.rich import (console, highlighter, print_centered, print_color_key, print_page_title,
      print_panel, print_starred_header, write_html)
@@ -201,7 +201,7 @@ def write_word_counts_html() -> None:
     emails = [e for e in epstein_files.non_duplicate_emails() if not (e.is_junk_mail() or e.is_fwded_article())]
     for email in emails:
-        if specified_names and email.author not in specified_names:
+        if args.names and email.author not in args.names:
             continue
         logger.info(f"Counting words in {email}\n  [SUBJECT] {email.subject()}")
@@ -218,14 +218,12 @@ def write_word_counts_html() -> None:
             for word in line.split():
                 word_count.tally_word(word, SearchResult(email, [MatchedLine(line, i)]))
-    # Add in iMessage conversation words
-    imessage_logs = epstein_files.imessage_logs_for(specified_names) if specified_names else epstein_files.imessage_logs
-    for imessage_log in imessage_logs:
+    # Add in iMessage conversations
+    for imessage_log in epstein_files.imessage_logs:
         logger.info(f"Counting words in {imessage_log}")
         for i, msg in enumerate(imessage_log.messages):
-            if specified_names and msg.author not in specified_names:
+            if args.names and msg.author not in args.names:
                 continue
             elif HTML_REGEX.search(line):
                 continue
@@ -234,7 +232,7 @@ def write_word_counts_html() -> None:
                 word_count.tally_word(word, SearchResult(imessage_log, [MatchedLine(msg.text, i)]))
     print_page_title(expand=False)
-    print_starred_header(f"Most Common Words in {len(emails):,} Emails and {len(imessage_logs)} iMessage Logs")
+    print_starred_header(f"Most Common Words in {len(emails):,} Emails and {len(epstein_files.imessage_logs)} iMessage Logs")
     print_centered(f"(excluding {len(COMMON_WORDS_LIST)} particularly common words at bottom)", style='dim')
     console.line()
     print_color_key()

{epstein_files-1.0.14 → epstein_files-1.0.15}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "epstein-files"
-version = "1.0.14"
+version = "1.0.15"
 description = "Tools for working with the Jeffrey Epstein documents released in November 2025."
 authors = ["Michel de Cryptadamus"]
 readme = "README.md"