PyPI - epstein-files - Versions diffs - 1.0.5__tar.gz → 1.0.6__tar.gz - Mend

epstein-files 1.0.5tar.gz → 1.0.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

{epstein_files-1.0.5 → epstein_files-1.0.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: epstein-files
-Version: 1.0.5
+Version: 1.0.6
 Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
 Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
 License: GPL-3.0-or-later
@@ -70,6 +70,9 @@ epstein_show --raw 030999
 # This also works:
 epstein_show HOUSE_OVERSIGHT_030999
+# Count words used by Epstein and Bannon
+epstein_word_count --name 'Jeffrey Epstein' --name 'Steve Bannon'
 # Diff two epstein files after all the cleanup (stripping BOMs, matching newline chars, etc):
 epstein_diff 030999 020442
 ```

{epstein_files-1.0.5 → epstein_files-1.0.6}/README.md RENAMED Viewed

@@ -38,6 +38,9 @@ epstein_show --raw 030999
 # This also works:
 epstein_show HOUSE_OVERSIGHT_030999
+# Count words used by Epstein and Bannon
+epstein_word_count --name 'Jeffrey Epstein' --name 'Steve Bannon'
 # Diff two epstein files after all the cleanup (stripping BOMs, matching newline chars, etc):
 epstein_diff 030999 020442
 ```

{epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/__init__.py RENAMED Viewed

@@ -10,11 +10,12 @@ from sys import exit
 from dotenv import load_dotenv
 load_dotenv()
 from rich.markup import escape
 from rich.padding import Padding
 from rich.panel import Panel
+from rich.text import Text
+from epstein_files.count_words import write_word_counts_html
 from epstein_files.epstein_files import EpsteinFiles, document_cls
 from epstein_files.documents.document import INFO_PADDING, Document
 from epstein_files.documents.email import Email
@@ -24,22 +25,25 @@ from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_
 from epstein_files.util.env import args, specified_names
 from epstein_files.util.file_helper import coerce_file_path, extract_file_id
 from epstein_files.util.logging import logger
-from epstein_files.util.output import print_emails, print_json_metadata, print_json_stats, print_text_messages, write_urls
+from epstein_files.util.output import print_emails, print_json_files, print_json_metadata, print_json_stats, print_text_messages, write_urls
 from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
 from epstein_files.util.timer import Timer
+timer = Timer()
+epstein_files = EpsteinFiles.get_files(timer)
 def generate_html() -> None:
     if args.make_clean:
         make_clean()
+        write_urls()
         exit()
-    timer = Timer()
-    epstein_files = EpsteinFiles.get_files(timer)
-    if args.json_metadata:
+    elif args.json_metadata:
         print_json_metadata(epstein_files)
         exit()
+    elif args.output_json_files:
+        print_json_files(epstein_files)
+        exit()
     print_header(epstein_files)
@@ -108,22 +112,22 @@ def epstein_show():
     console.line()
     for doc in docs:
-        console.line()
-        console.print(doc)
+        if isinstance(doc, Email):
+            doc.truncation_allowed = False
+        console.print('\n', doc, '\n')
         if args.raw:
-            console.line()
-            console.print(Panel(f"RAW {doc.filename} RAW", expand=False, style=doc._border_style()))
-            console.print(escape(doc.raw_text()))
+            console.print(Panel(Text("RAW: ").append(doc.summary()), expand=False, style=doc._border_style()))
+            console.print(escape(doc.raw_text()), '\n')
             if isinstance(doc, Email):
-                console.line()
-                console.print(Panel(f"{doc.filename}: actual_text() output", expand=False, style=doc._border_style()))
-                console.print(escape(doc._actual_text()))
+                console.print(Panel(Text("actual_text: ").append(doc.summary()), expand=False, style=doc._border_style()))
+                console.print(escape(doc._actual_text()), '\n')
-def epstein_dump_urls() -> None:
-    write_urls()
+def epstein_word_count() -> None:
+    write_word_counts_html()
 def _assert_positional_args():

epstein_files-1.0.6/epstein_files/count_words.py ADDED Viewed

@@ -0,0 +1,72 @@
+# Count word usage in emails and texts
+import re
+from epstein_files.epstein_files import EpsteinFiles
+from epstein_files.util.constant.common_words import COMMON_WORDS_LIST
+from epstein_files.util.constant.output_files import WORD_COUNT_HTML_PATH
+from epstein_files.util.env import args, specified_names
+from epstein_files.util.logging import logger
+from epstein_files.util.rich import (console, print_centered, print_color_key, print_page_title, print_panel,
+     print_starred_header, write_html)
+from epstein_files.util.search_result import MatchedLine, SearchResult
+from epstein_files.util.timer import Timer
+from epstein_files.util.word_count import WordCount
+HTML_REGEX = re.compile(r"^http|#yiv")
+def write_word_counts_html() -> None:
+    timer = Timer()
+    epstein_files = EpsteinFiles.get_files(timer)
+    email_subjects: set[str] = set()
+    word_count = WordCount()
+    # Remove dupes, junk mail, and fwded articles from emails
+    emails = [
+        e for e in epstein_files.emails
+        if not (e.is_duplicate or e.is_junk_mail() or (e.config and e.config.is_fwded_article)) \
+            and (len(specified_names) == 0 or e.author in specified_names)
+    ]
+    for email in emails:
+        logger.info(f"Counting words in {email}\n  [SUBJECT] {email.subject()}")
+        lines = email.actual_text.split('\n')
+        if email.subject() not in email_subjects and f'Re: {email.subject()}' not in email_subjects:
+            email_subjects.add(email.subject())
+            lines.append(email.subject())
+        for i, line in enumerate(lines):
+            if HTML_REGEX.search(line):
+                continue
+            for word in line.split():
+                word_count.tally_word(word, SearchResult(email, [MatchedLine(line, i)]))
+    # Add in iMessage conversation words
+    imessage_logs = epstein_files.imessage_logs_for(specified_names) if specified_names else epstein_files.imessage_logs
+    for imessage_log in imessage_logs:
+        logger.info(f"Counting words in {imessage_log}")
+        for msg in imessage_log.messages():
+            if len(specified_names) > 0 and msg.author not in specified_names:
+                continue
+            elif HTML_REGEX.search(line):
+                continue
+            for word in msg.text.split():
+                word_count.tally_word(word, SearchResult(imessage_log, [msg.text]))
+    print_page_title(expand=False)
+    print_starred_header(f"Most Common Words in {len(emails):,} Emails and {len(imessage_logs)} iMessage Logs")
+    print_centered(f"(excluding {len(COMMON_WORDS_LIST)} particularly common words at bottom)", style='dim')
+    console.line()
+    print_color_key()
+    console.line()
+    console.print(word_count)
+    console.line(2)
+    print_panel(f"{len(COMMON_WORDS_LIST):,} Excluded Words", centered=True)
+    console.print(', '.join(COMMON_WORDS_LIST), highlight=False)
+    write_html(WORD_COUNT_HTML_PATH)
+    timer.print_at_checkpoint(f"Finished counting words")

{epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/documents/json_file.py RENAMED Viewed

@@ -21,14 +21,11 @@ class JsonFile(OtherFile):
         if self.url_slug.endswith('.txt') or self.url_slug.endswith('.json'):
             self.url_slug = Path(self.url_slug).stem
-        self._set_computed_fields(text=self.formatted_json())
+        self._set_computed_fields(text=self.json_str())
     def category(self) -> str:
         return JSON
-    def formatted_json(self) -> str:
-        return json.dumps(self.json_data(), indent=4)
     def info_txt(self) -> Text | None:
         return Text(f"JSON file, possibly iMessage or similar app metadata", style='white dim italic')
@@ -38,3 +35,6 @@ class JsonFile(OtherFile):
     def json_data(self) -> object:
         with open(self.file_path, encoding='utf-8-sig') as f:
             return json.load(f)
+    def json_str(self) -> str:
+        return json.dumps(self.json_data(), indent=4)

{epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/documents/messenger_log.py RENAMED Viewed

@@ -15,6 +15,7 @@ from epstein_files.util.data import iso_timestamp, listify, sort_dict
 from epstein_files.util.doc_cfg import Metadata, TextCfg
 from epstein_files.util.highlighted_group import get_style_for_name
 from epstein_files.util.logging import logger
+from epstein_files.util.rich import build_table
 CONFIRMED_MSG = 'Found confirmed counterparty'
 GUESSED_MSG = 'This is probably a conversation with'
@@ -111,7 +112,7 @@ class MessengerLog(Communication):
     @classmethod
     def summary_table(cls, imessage_logs: list['MessengerLog']) -> Table:
         """Build a table summarizing the text messages in 'imessage_logs'."""
-        counts_table = Table(title="Text Message Counts By Author", header_style="bold")
+        counts_table = build_table("Text Message Counts By Author")
         counts_table.add_column(AUTHOR.title(), justify='left', style="steel_blue bold", width=30)
         counts_table.add_column('Files', justify='right', style='white')
         counts_table.add_column("Msgs", justify='right')

{epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/documents/other_file.py RENAMED Viewed

@@ -20,7 +20,7 @@ from epstein_files.util.data import escape_single_quotes, remove_timezone, uniqu
 from epstein_files.util.file_helper import FILENAME_LENGTH
 from epstein_files.util.env import args
 from epstein_files.util.highlighted_group import get_style_for_category
-from epstein_files.util.rich import QUESTION_MARK_TXT, highlighter
+from epstein_files.util.rich import QUESTION_MARK_TXT, build_table, highlighter
 from epstein_files.util.logging import logger
 MAX_DAYS_SPANNED_TO_BE_VALID = 10
@@ -233,7 +233,7 @@ class OtherFile(Document):
     @staticmethod
     def build_table(docs: list['OtherFile']) -> Table:
         """Build a table of OtherFile documents."""
-        table = Table(header_style='bold', show_lines=True)
+        table = build_table(None, show_lines=True)
         table.add_column('File', justify='center', width=FILENAME_LENGTH)
         table.add_column('Date', justify='center')
         table.add_column('Size', justify='center')

{epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/epstein_files.py RENAMED Viewed

@@ -28,9 +28,9 @@ from epstein_files.util.doc_cfg import EmailCfg, Metadata
 from epstein_files.util.env import args, logger
 from epstein_files.util.file_helper import DOCS_DIR, file_size_str
 from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
-from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, add_cols_to_table, console, highlighter,
-     link_text_obj, link_markup, print_author_header, print_centered, print_other_site_link, print_panel,
-     print_section_header, vertically_pad)
+from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, TABLE_BORDER_STYLE, add_cols_to_table,
+     build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
+     print_other_site_link, print_panel, print_section_header, vertically_pad)
 from epstein_files.util.search_result import SearchResult
 from epstein_files.util.timer import Timer
@@ -212,7 +212,7 @@ class EpsteinFiles:
         return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
     def print_files_summary(self) -> None:
-        table = Table(title='Summary of Document Types')
+        table = build_table('Summary of Document Types')
         add_cols_to_table(table, ['File Type', 'Files', 'Author Known', 'Author Unknown', 'Duplicates'])
         def add_row(label: str, docs: list):
@@ -268,12 +268,12 @@ class EpsteinFiles:
     def print_email_device_info(self) -> None:
         print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(4, 0, 0, 0), centered=True)
-        console.print(build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
-        console.print(build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
+        console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
+        console.print(_build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
     def print_emailer_counts_table(self) -> None:
         footer = f"Identified authors of {self.attributed_email_count():,} emails out of {len(self.emails):,}."
-        counts_table = Table(title=f"Email Counts", caption=footer, header_style="bold")
+        counts_table = build_table("Email Counts", caption=footer)
         add_cols_to_table(counts_table, ['Name', 'Count', 'Sent', "Recv'd", JMAIL, EPSTEIN_MEDIA, EPSTEIN_WEB, 'Twitter'])
         emailer_counts = {
@@ -345,21 +345,6 @@ class EpsteinFiles:
                 self.email_device_signatures_to_authors[email.sent_from_device].add(email.author_or_unknown())
-def build_signature_table(keyed_sets: dict[str, set[str]], cols: tuple[str, str], join_char: str = '\n') -> Padding:
-    title = 'Signatures Used By Authors' if cols[0] == AUTHOR else 'Authors Seen Using Signatures'
-    table = Table(header_style="bold reverse", show_lines=True, title=title)
-    for i, col in enumerate(cols):
-        table.add_column(col.title() + ('s' if i == 1 else ''))
-    new_dict = dict_sets_to_lists(keyed_sets)
-    for k in sorted(new_dict.keys()):
-        table.add_row(highlighter(k or UNKNOWN), highlighter(join_char.join(sorted(new_dict[k]))))
-    return Padding(table, DEVICE_SIGNATURE_PADDING)
 def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
     counts: dict[str | None, int] = defaultdict(int)
@@ -397,6 +382,21 @@ def is_ok_for_epstein_web(name: str | None) -> bool:
     return True
+def _build_signature_table(keyed_sets: dict[str, set[str]], cols: tuple[str, str], join_char: str = '\n') -> Padding:
+    title = 'Signatures Used By Authors' if cols[0] == AUTHOR else 'Authors Seen Using Signatures'
+    table = build_table(title, header_style="bold reverse", show_lines=True)
+    for i, col in enumerate(cols):
+        table.add_column(col.title() + ('s' if i == 1 else ''))
+    new_dict = dict_sets_to_lists(keyed_sets)
+    for k in sorted(new_dict.keys()):
+        table.add_row(highlighter(k or UNKNOWN), highlighter(join_char.join(sorted(new_dict[k]))))
+    return Padding(table, DEVICE_SIGNATURE_PADDING)
 def _sorted_metadata(docs: Sequence[Document]) -> list[Metadata]:
     docs_sorted_by_id = sorted(docs, key=lambda d: d.file_id)
     return [json_safe(d.metadata()) for d in docs_sorted_by_id]

{epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/constant/output_files.py RENAMED Viewed

@@ -1,19 +1,36 @@
 from pathlib import Path
-URLS_ENV = '.urls.env'
+from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
 # Files output by the code
 HTML_DIR = Path('docs')
 EPSTEIN_FILES_NOV_2025 = 'epstein_files_nov_2025'
 ALL_EMAILS_PATH = HTML_DIR.joinpath(f'all_emails_{EPSTEIN_FILES_NOV_2025}.html')
+JSON_FILES_JSON_PATH = HTML_DIR.joinpath(f'json_files_from_{EPSTEIN_FILES_NOV_2025}.json')
 JSON_METADATA_PATH = HTML_DIR.joinpath(f'file_metadata_{EPSTEIN_FILES_NOV_2025}.json')
 TEXT_MSGS_HTML_PATH = HTML_DIR.joinpath('index.html')
 WORD_COUNT_HTML_PATH = HTML_DIR.joinpath(f'communication_word_count_{EPSTEIN_FILES_NOV_2025}.html')
 # EPSTEIN_WORD_COUNT_HTML_PATH = HTML_DIR.joinpath('epstein_texts_and_emails_word_count.html')
+URLS_ENV = '.urls.env'
+# Deployment URLS
+# NOTE: don't rename these variables without changing deploy.sh!
+GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
+TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/epstein_text_messages"
+ALL_EMAILS_URL = f"{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}"
+JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
+JSON_METADATA_URL = f"{TEXT_MSGS_URL}/{JSON_METADATA_PATH.name}"
+WORD_COUNT_URL = f"{TEXT_MSGS_URL}/{WORD_COUNT_HTML_PATH.name}"
+SITE_URLS: dict[SiteType, str] = {
+    EMAIL: ALL_EMAILS_URL,
+    TEXT_MESSAGE: TEXT_MSGS_URL,
+}
 BUILD_ARTIFACTS = [
     ALL_EMAILS_PATH,
     # EPSTEIN_WORD_COUNT_HTML_PATH,
+    JSON_FILES_JSON_PATH,
     JSON_METADATA_PATH,
     TEXT_MSGS_HTML_PATH,
     WORD_COUNT_HTML_PATH,

{epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/constant/strings.py RENAMED Viewed

@@ -49,7 +49,6 @@ TEXT_MESSAGE = 'text message'
 SiteType = Literal['email', 'text message']
 # Styles
-OTHER_SITE_LINK_STYLE = 'dark_goldenrod'
 TIMESTAMP_STYLE = 'turquoise4'
 TIMESTAMP_DIM = f"turquoise4 dim"

{epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/constant/urls.py RENAMED Viewed

@@ -6,7 +6,6 @@ from inflection import parameterize
 from rich.text import Text
 from epstein_files.util.constant.output_files import *
-from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
 from epstein_files.util.file_helper import coerce_file_stem
 # Style stuff
@@ -15,26 +14,11 @@ TEXT_LINK = 'text_link'
 # External site names
 ExternalSite = Literal['epstein.media', 'epsteinify', 'EpsteinWeb']
 EPSTEIN_MEDIA = 'epstein.media'
 EPSTEIN_WEB = 'EpsteinWeb'
 EPSTEINIFY = 'epsteinify'
 JMAIL = 'Jmail'
-# Deployment URLS
-# NOTE: don't rename these variables without changing deploy.sh!
-GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
-TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/epstein_text_messages"
-ALL_EMAILS_URL = f'{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}'
-JSON_METADATA_URL = f'{TEXT_MSGS_URL}/{JSON_METADATA_PATH.name}'
-WORD_COUNT_URL = f'{TEXT_MSGS_URL}/{WORD_COUNT_HTML_PATH.name}'
-SITE_URLS: dict[SiteType, str] = {
-    EMAIL: ALL_EMAILS_URL,
-    TEXT_MESSAGE: TEXT_MSGS_URL,
-}
 GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
 GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
 ATTRIBUTIONS_URL = f'{GH_MASTER_URL}/epstein_files/util/constants.py'
@@ -46,13 +30,15 @@ extracted_file_url = lambda f: f"{EXTRACTS_BASE_URL}/{f}"
 # External URLs
 COFFEEZILLA_ARCHIVE_URL = 'https://journaliststudio.google.com/pinpoint/search?collection=061ce61c9e70bdfd'
 COURIER_NEWSROOM_ARCHIVE_URL = 'https://journaliststudio.google.com/pinpoint/search?collection=092314e384a58618'
+OVERSIGHT_REPUBLICANS_PRESSER_URL = 'https://oversight.house.gov/release/oversight-committee-releases-additional-epstein-estate-documents/'
+RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL = 'https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_'
+SUBSTACK_URL = 'https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great'
+# Document source sites
 EPSTEINIFY_URL = 'https://epsteinify.com'
 EPSTEIN_MEDIA_URL = 'https://epstein.media'
 EPSTEIN_WEB_URL = 'https://epsteinweb.org'
 JMAIL_URL = 'https://jmail.world'
-OVERSIGHT_REPUBLICANS_PRESSER_URL = 'https://oversight.house.gov/release/oversight-committee-releases-additional-epstein-estate-documents/'
-RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL = 'https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_'
-SUBSTACK_URL = 'https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great'
 DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
     EPSTEIN_MEDIA: f"{EPSTEIN_MEDIA_URL}/files",
@@ -61,7 +47,6 @@ DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
 }
-# TODO: epsteinify.com seems to be down as of 2025-12-30, switched to epstein.web for links
 epsteinify_api_url = lambda file_id: f"{EPSTEINIFY_URL}/api/documents/HOUSE_OVERSIGHT_{file_id}"
 epsteinify_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEINIFY, filename_or_id, style)
 epsteinify_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_markup(external_doc_link_markup(filename_or_id, style))

{epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/env.py RENAMED Viewed

@@ -6,9 +6,9 @@ from sys import argv
 from epstein_files.util.logging import datefinder_logger, env_log_level, logger
-COUNT_WORDS_SCRIPT = 'count_words.py'
+COUNT_WORDS_SCRIPT = 'epstein_word_count'
 DEFAULT_WIDTH = 145
-HTML_SCRIPTS = ['epstein_generate', 'generate_html.py', COUNT_WORDS_SCRIPT]
+HTML_SCRIPTS = ['epstein_generate', COUNT_WORDS_SCRIPT]
 parser = ArgumentParser(description="Parse epstein OCR docs and generate HTML page.")
@@ -19,8 +19,9 @@ output = parser.add_argument_group('OUTPUT')
 output.add_argument('--all-emails', '-ae', action='store_true', help='all the emails instead of just the interesting ones')
 output.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just the interesting ones')
 output.add_argument('--build', '-b', action='store_true', help='write output to HTML file')
-output.add_argument('--make-clean', '-mc', action='store_true', help='delete all build artifact HTML and JSON files')
+output.add_argument('--make-clean', action='store_true', help='delete all HTML build artifact and write latest URLs to .urls.env')
 output.add_argument('--output-emails', '-oe', action='store_true', help='generate other files section')
+output.add_argument('--output-json-files', action='store_true', help='pretty print all the raw JSON data files in the collection')
 output.add_argument('--output-other-files', '-oo', action='store_true', help='generate other files section')
 output.add_argument('--output-texts', '-ot', action='store_true', help='generate other files section')
 output.add_argument('--suppress-output', action='store_true', help='no output to terminal (use with --build)')

{epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/output.py RENAMED Viewed

@@ -1,12 +1,14 @@
+import json
 from rich.padding import Padding
 from epstein_files.documents.email import Email
 from epstein_files.documents.messenger_log import MessengerLog
 from epstein_files.epstein_files import EpsteinFiles, count_by_month
-from epstein_files.util.constant.output_files import JSON_METADATA_PATH
-from epstein_files.util.constant import urls
+from epstein_files.util.constant import output_files
 from epstein_files.util.constant.html import *
 from epstein_files.util.constant.names import *
+from epstein_files.util.constant.output_files import JSON_FILES_JSON_PATH, JSON_METADATA_PATH
 from epstein_files.util.data import dict_sets_to_lists
 from epstein_files.util.env import args, specified_names
 from epstein_files.util.logging import log_file_write, logger
@@ -107,6 +109,20 @@ def print_emails(epstein_files: EpsteinFiles) -> int:
     return len(already_printed_emails)
+def print_json_files(epstein_files: EpsteinFiles):
+    if args.build:
+        json_data = {json_file.url_slug: json_file.json_data() for json_file in epstein_files.json_files}
+        with open(JSON_FILES_JSON_PATH, 'w') as f:
+            f.write(json.dumps(json_data, sort_keys=True))
+            log_file_write(JSON_FILES_JSON_PATH)
+    else:
+        for json_file in epstein_files.json_files:
+            console.line(2)
+            console.print(json_file.description_panel())
+            console.print_json(json_file.json_str(), indent=4, sort_keys=False)
 def print_json_metadata(epstein_files: EpsteinFiles) -> None:
     json_str = epstein_files.json_metadata()
@@ -146,10 +162,10 @@ def print_text_messages(epstein_files: EpsteinFiles) -> None:
 def write_urls() -> None:
     """Write _URL style constant variables to a file bash scripts can load as env vars."""
-    url_vars = {
-        k: v for k, v in vars(urls).items()
-        if isinstance(v, str) and k.split('_')[-1] in ['URL'] and 'github.io' in v and 'BASE' not in k
-    }
+    url_vars = {k: v for k, v in vars(output_files).items() if k.endswith('URL') and not k.startswith('GH')}
+    if not args.suppress_output:
+        console.line()
     with open(URLS_ENV, 'w') as f:
         for var_name, url in url_vars.items():
@@ -160,7 +176,9 @@ def write_urls() -> None:
             f.write(f"{key_value}\n")
-    console.line()
+    if not args.suppress_output:
+        console.line()
     logger.warning(f"Wrote {len(url_vars)} URL variables to '{URLS_ENV}'\n")

{epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/rich.py RENAMED Viewed

@@ -14,7 +14,8 @@ from rich.theme import Theme
 from epstein_files.util.constant.html import CONSOLE_HTML_FORMAT, HTML_TERMINAL_THEME, PAGE_TITLE
 from epstein_files.util.constant.names import UNKNOWN
-from epstein_files.util.constant.strings import DEFAULT, EMAIL, NA, OTHER_SITE_LINK_STYLE, QUESTION_MARKS, SiteType
+from epstein_files.util.constant.output_files import SITE_URLS
+from epstein_files.util.constant.strings import DEFAULT, EMAIL, NA, QUESTION_MARKS, TEXT_MESSAGE, SiteType
 from epstein_files.util.constant.urls import *
 from epstein_files.util.constants import FALLBACK_TIMESTAMP, HEADER_ABBREVIATIONS
 from epstein_files.util.data import json_safe
@@ -31,11 +32,22 @@ GREY_NUMBERS = [58, 39, 39, 35, 30, 27, 23, 23, 19, 19, 15, 15, 15]
 DEFAULT_NAME_STYLE = 'gray46'
 KEY_STYLE='honeydew2 bold'
 SECTION_HEADER_STYLE = 'bold white on blue3'
-SOCIAL_MEDIA_LINK_STYLE = 'cyan3 bold'
+SOCIAL_MEDIA_LINK_STYLE = 'pale_turquoise4'
 SUBSTACK_POST_LINK_STYLE = 'bright_cyan'
 SYMBOL_STYLE = 'grey70'
+TABLE_BORDER_STYLE = 'grey46'
+TABLE_TITLE_STYLE = f"gray85 italic"
 TITLE_STYLE = 'black on bright_white bold'
+AUX_SITE_LINK_STYLE = 'dark_orange3'
+OTHER_SITE_LINK_STYLE = 'dark_goldenrod'
+DEFAULT_TABLE_KWARGS = {
+    'border_style': TABLE_BORDER_STYLE,
+    'header_style': "bold",
+    'title_style': TABLE_TITLE_STYLE,
+}
 HIGHLIGHTED_GROUP_COLOR_KEYS = [
     Text(highlight_group.label.replace('_', ' '), style=highlight_group.style)
     for highlight_group in sorted(HIGHLIGHTED_NAMES, key=lambda hg: hg.label)
@@ -79,7 +91,11 @@ def build_highlighter(pattern: str) -> EpsteinHighlighter:
     return TempHighlighter()
-def join_texts(txts: list[Text], join: str = ' ', encloser: str = '') -> Text:
+def build_table(title: str | None, **kwargs) -> Table:
+    return Table(title=title, **{**DEFAULT_TABLE_KWARGS, **kwargs})
+def join_texts(txts: list[Text], join: str = ' ', encloser: str = '', encloser_style: str = 'wheat4') -> Text:
     """Join rich.Text objs into one."""
     if encloser:
         if len(encloser) != 2:
@@ -91,8 +107,9 @@ def join_texts(txts: list[Text], join: str = ' ', encloser: str = '') -> Text:
     txt = Text('')
-    for i, link in enumerate(txts):
-        txt.append(join if i >= 1 else '').append(enclose_start).append(link).append(enclose_end)
+    for i, _txt in enumerate(txts):
+        txt.append(join if i >= 1 else '').append(enclose_start, style=encloser_style)
+        txt.append(_txt).append(enclose_end, style=encloser_style)
     return txt
@@ -132,7 +149,7 @@ def print_centered_link(url: str, link_text: str, style: str | None = None) -> N
 def print_color_key() -> None:
-    color_table = Table(title=f'Rough Guide to Highlighted Colors', show_header=False)
+    color_table = build_table('Rough Guide to Highlighted Colors', show_header=False)
     num_colors = len(HIGHLIGHTED_GROUP_COLOR_KEYS)
     row_number = 0
@@ -164,7 +181,7 @@ def print_header(epstein_files: 'EpsteinFiles') -> None:
     print_centered(f"if you think there's an attribution error or can deanonymize an {UNKNOWN} contact {CRYPTADAMUS_TWITTER}", 'grey46')
     print_centered('note this site is based on the OCR text provided by Congress which is not always the greatest', 'grey23')
     print_centered(f"(thanks to {link_markup('https://x.com/ImDrinknWyn', '@ImDrinknWyn', 'dodger_blue3')} + others for help attributing redacted emails)")
-    print_centered_link(ATTRIBUTIONS_URL, "(some explanations of author attributions)", style='magenta')
+    print_centered_link(JSON_METADATA_URL, "(explanations of author attributions)", style='magenta')
 def print_json(label: str, obj: object, skip_falsey: bool = False) -> None:
@@ -233,17 +250,18 @@ def print_other_site_link(is_header: bool = True) -> None:
     print_centered(parenthesize(Text.from_markup(markup_msg)), style='bold')
     if is_header:
-        metadata_link = link_text_obj(JSON_METADATA_URL, 'metadata with author attribution explanations', OTHER_SITE_LINK_STYLE)
-        print_centered(parenthesize(metadata_link))
-        word_count_link = link_text_obj(WORD_COUNT_URL, 'most frequently used words', OTHER_SITE_LINK_STYLE)
+        word_count_link = link_text_obj(WORD_COUNT_URL, 'most frequently used words in the emails and texts', AUX_SITE_LINK_STYLE)
         print_centered(parenthesize(word_count_link))
-        print_centered(parenthesize(link_text_obj(GH_PROJECT_URL, '@github', 'dark_orange3 bold')))
+        metadata_link = link_text_obj(JSON_METADATA_URL, 'author attribution explanations', AUX_SITE_LINK_STYLE)
+        print_centered(parenthesize(metadata_link))
+        json_link = link_text_obj(WORD_COUNT_URL, "epstein's json files", AUX_SITE_LINK_STYLE)
+        print_centered(parenthesize(json_link))
 def print_page_title(expand: bool = True, width: int | None = None) -> None:
     title_panel = Panel(Text(PAGE_TITLE, justify='center'), expand=expand, style=TITLE_STYLE, width=width)
     console.print(Align.center(vertically_pad(title_panel)))
-    print_social_media_links()
+    _print_social_media_links()
     console.line(2)
@@ -265,19 +283,6 @@ def print_section_header(msg: str, style: str = SECTION_HEADER_STYLE, is_centere
     console.print(Padding(panel, (3, 0, 1, 0)))
-def print_social_media_links() -> None:
-    print_centered_link(SUBSTACK_URL, "I Made Epstein's Text Messages Great Again (And You Should Read Them)", style=f'{SUBSTACK_POST_LINK_STYLE} bold')
-    print_centered_link(SUBSTACK_URL, SUBSTACK_URL.removeprefix('https://'), style=f'{SUBSTACK_POST_LINK_STYLE} dim')
-    social_links = [
-        link_text_obj('https://x.com/Cryptadamist/status/1990866804630036988', '@cryptadamist', style=SOCIAL_MEDIA_LINK_STYLE),
-        link_text_obj('https://cryptadamus.substack.com/', 'substack', style=SOCIAL_MEDIA_LINK_STYLE),
-        link_text_obj('https://universeodon.com/@cryptadamist/115572634993386057', 'mastodon', style=SOCIAL_MEDIA_LINK_STYLE),
-    ]
-    print_centered(join_texts(social_links, join='     ', encloser='[]'))
 def print_starred_header(msg: str, num_stars: int = 7, num_spaces: int = 2, style: str = TITLE_STYLE) -> None:
     stars = '*' * num_stars
     spaces = ' ' * num_spaces
@@ -317,7 +322,7 @@ def write_html(output_path: Path) -> None:
 def _print_abbreviations_table() -> None:
-    table = Table(title="Abbreviations Used Frequently In These Conversations", header_style="bold", show_header=False)
+    table = build_table(title="Abbreviations Used Frequently In These Conversations", show_header=False)
     table.add_column("Abbreviation", justify="center", style='bold')
     table.add_column("Translation", style="white", justify="center")
@@ -329,7 +334,7 @@ def _print_abbreviations_table() -> None:
 def _print_external_links() -> None:
     console.line()
-    print_starred_header('External Links', num_stars=0, num_spaces=20, style=f"italic")
+    print_centered(Text('External Links', style=TABLE_TITLE_STYLE))
     presser_link = link_text_obj(OVERSIGHT_REPUBLICANS_PRESSER_URL, 'Official Oversight Committee Press Release')
     raw_docs_link = join_texts([link_text_obj(RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL, 'raw files', style=f"{ARCHIVE_LINK_COLOR} dim")], encloser='()')
     print_centered(join_texts([presser_link, raw_docs_link]))
@@ -341,5 +346,24 @@ def _print_external_links() -> None:
     print_centered(link_markup(EPSTEIN_MEDIA_URL) + " (raw document images)")
+def _print_social_media_links() -> None:
+    print_centered_link(
+        SUBSTACK_URL,
+        "I Made Epstein's Text Messages Great Again (And You Should Read Them)",
+        style=f'{SUBSTACK_POST_LINK_STYLE} bold'
+    )
+    print_centered_link(SUBSTACK_URL, SUBSTACK_URL.removeprefix('https://'), style=f'{SUBSTACK_POST_LINK_STYLE} dim')
+    social_links = [
+        link_text_obj('https://universeodon.com/@cryptadamist/115572634993386057', '@mastodon', style=SOCIAL_MEDIA_LINK_STYLE),
+        link_text_obj(SUBSTACK_URL, '@substack', style=SOCIAL_MEDIA_LINK_STYLE),
+        link_text_obj('https://x.com/Cryptadamist/status/1990866804630036988', '@twitter', style=SOCIAL_MEDIA_LINK_STYLE),
+        link_text_obj('https://github.com/michelcrypt4d4mus/epstein_text_messages', '@github', style=SOCIAL_MEDIA_LINK_STYLE)
+    ]
+    print_centered(join_texts(social_links, join='  /  '))#, encloser='()'))#, encloser='‹›'))
 # if args.deep_debug:
 #     print_json('THEME_STYLES', THEME_STYLES)

{epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/word_count.py RENAMED Viewed

@@ -20,6 +20,14 @@ from epstein_files.util.search_result import SearchResult
 FIRST_AND_LAST_NAMES = flatten([n.split() for n in ALL_NAMES])
 FIRST_AND_LAST_NAMES = [n.lower() for n in FIRST_AND_LAST_NAMES] + OTHER_NAMES
+HTML_REGEX = re.compile(r"com/|cae-v2w=|content-(transfe|type)|font(/|-(family|size))|http|\.html?\??|margin-bottom|padding-left|quoted-printable|region=|text-decoration|ttps|www|\.(gif|jpe?g|png);?$")
+HYPHENATED_WORD_REGEX = re.compile(r"[a-z]+-[a-z]+", re.IGNORECASE)
+OK_SYMBOL_WORDS = ['mar-a-lago', 'p/e', 's&p', ':)', ':).', ';)', ':-)', ';-)']
+ONLY_SYMBOLS_REGEX = re.compile(r"^[^a-zA-Z0-9]+$")
+SYMBOL_WORD_REGEX = re.compile(r"^[-—–@%/?.,&=]+$")
+SPLIT_WORDS_BY = ['@', '/']
+FLAGGED_WORDS = []  # For debugging, log extra info when one of these is encountered
 NON_SINGULARIZABLE = UNSINGULARIZABLE_WORDS + [n for n in FIRST_AND_LAST_NAMES if n.endswith('s')]
 SKIP_WORDS_REGEX = re.compile(r"^(asmallworld@|enwiki|http|imagepng|nymagcomnymetro|addresswww|mailto|www|/font|colordu|classdms|targetdblank|nymagcom|palmbeachdailynews)|jee[vy]acation|fontfamily|(gif|html?|jpe?g|utm)$")
 BAD_CHARS_REGEX = re.compile(r"[-–=+()$€£©°«—^&%!#_`,.;:'‘’\"„“”?\d\\]")
@@ -100,21 +108,13 @@ SINGULARIZATIONS = {
     'twittercom': 'twitter',
 }
-HTML_REGEX = re.compile(r"com/|cae-v2w=|content-(transfe|type)|font(/|-(family|size))|http|\.html?\??|margin-bottom|padding-left|quoted-printable|region=|text-decoration|ttps|www|\.(gif|jpe?g|png);?$")
-HYPHENATED_WORD_REGEX = re.compile(r"[a-z]+-[a-z]+", re.IGNORECASE)
-OK_SYMBOL_WORDS = ['mar-a-lago', 'p/e', 's&p', ':)', ':).', ';)', ':-)', ';-)']
-SYMBOL_WORD_REGEX = re.compile(r"^[-—–@%/?.,&=]+$")
-ONLY_SYMBOLS_REGEX = re.compile(r"^[^a-zA-Z0-9]+$")
-SPLIT_WORDS_BY = ['@', '/']
-FLAGGED_WORDS = []  # For debugging, log extra info when one of these is encountered
 @dataclass
 class WordCount:
     count: dict[str, int] = field(default_factory=lambda: defaultdict(int))
     singularized: dict[str, int] = field(default_factory=lambda: defaultdict(int))
-    def count_word(self, word: str, document_line: SearchResult) -> None:
+    def tally_word(self, word: str, document_line: SearchResult) -> None:
         word = EmailHeader.cleanup_str(word).lower().strip()
         raw_word = word
@@ -148,7 +148,7 @@ class WordCount:
                 continue
             for w in word.split(symbol):
-                self.count_word(w, document_line)
+                self.tally_word(w, document_line)
             logger.info(f"  Split word with '{symbol}' in it '{word}'...")
             return

{epstein_files-1.0.5 → epstein_files-1.0.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "epstein-files"
-version = "1.0.5"
+version = "1.0.6"
 description = "Tools for working with the Jeffrey Epstein documents released in November 2025."
 authors = ["Michel de Cryptadamus"]
 readme = "README.md"
@@ -44,10 +44,10 @@ pytest = "^9.0.1"
 [tool.poetry.scripts]
 epstein_diff = 'epstein_files:epstein_diff'
-epstein_dump_urls = 'epstein_files:epstein_dump_urls'
 epstein_generate = 'epstein_files:generate_html'
 epstein_search = 'epstein_files:epstein_search'
 epstein_show = 'epstein_files:epstein_show'
+epstein_word_count = 'epstein_files:epstein_word_count'
 [tool.poetry.urls]