PyPI - epstein-files - Versions diffs - 1.0.16__py3-none-any.whl → 1.1.2__py3-none-any.whl - Mend

epstein-files 1.0.16py3-none-any.whl → 1.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

epstein_files/__init__.py +26 -17
epstein_files/documents/communication.py +10 -14
epstein_files/documents/document.py +5 -1
epstein_files/documents/email.py +164 -78
epstein_files/documents/imessage/text_message.py +42 -25
epstein_files/documents/messenger_log.py +31 -12
epstein_files/documents/other_file.py +13 -12
epstein_files/epstein_files.py +19 -80
epstein_files/util/constant/common_words.py +3 -3
epstein_files/util/constant/html.py +13 -6
epstein_files/util/constant/names.py +10 -7
epstein_files/util/constant/output_files.py +3 -0
epstein_files/util/constant/strings.py +6 -2
epstein_files/util/constant/urls.py +1 -1
epstein_files/util/constants.py +18 -22
epstein_files/util/env.py +46 -36
epstein_files/util/file_helper.py +1 -2
epstein_files/util/highlighted_group.py +1007 -187
epstein_files/util/logging.py +8 -1
epstein_files/util/output.py +166 -51
epstein_files/util/rich.py +55 -79
epstein_files/util/timer.py +1 -1
epstein_files/util/word_count.py +3 -4
{epstein_files-1.0.16.dist-info → epstein_files-1.1.2.dist-info}/METADATA +1 -1
epstein_files-1.1.2.dist-info/RECORD +33 -0
epstein_files-1.0.16.dist-info/RECORD +0 -33
{epstein_files-1.0.16.dist-info → epstein_files-1.1.2.dist-info}/LICENSE +0 -0
{epstein_files-1.0.16.dist-info → epstein_files-1.1.2.dist-info}/WHEEL +0 -0
{epstein_files-1.0.16.dist-info → epstein_files-1.1.2.dist-info}/entry_points.txt +0 -0

epstein_files/documents/imessage/text_message.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import re
-from dataclasses import dataclass
+from dataclasses import dataclass, field, fields
 from datetime import datetime
 from rich.text import Text
 from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN
 from epstein_files.util.constant.strings import TIMESTAMP_DIM
-from epstein_files.util.data import extract_last_name
+from epstein_files.util.data import extract_last_name, iso_timestamp
 from epstein_files.util.highlighted_group import get_style_for_name
 from epstein_files.util.logging import logger
 from epstein_files.util.rich import TEXT_LINK, highlighter
@@ -30,7 +30,7 @@ class TextMessage:
     """Class representing a single iMessage text message."""
     author: str | None
     author_str: str = ''
-    id_confirmed: bool = False
+    is_id_confirmed: bool = False
     text: str
     timestamp_str: str
@@ -44,38 +44,55 @@ class TextMessage:
         else:
             self.author_str = self.author_str or self.author
-        if not self.id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
+        if not self.is_id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
             self.author_str += ' (?)'
-    def timestamp(self) -> datetime:
-        return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
+        if self.is_link():
+            self.text = self.text.replace('\n', '').replace(' ', '_')
+        else:
+            self.text = self.text.replace('\n', ' ')
-    def _message(self) -> Text:
-        lines = self.text.split('\n')
+    def is_link(self) -> bool:
+        return self.text.startswith('http')
-        # Fix multiline links
-        if self.text.startswith('http'):
-            text = self.text
+    def parse_timestamp(self) -> datetime:
+        return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
-            if len(lines) > 1 and not lines[0].endswith('html'):
-                if len(lines) > 2 and lines[1].endswith('-'):
-                    text = text.replace('\n', '', 2)
-                else:
-                    text = text.replace('\n', '', 1)
+    def timestamp_txt(self) -> Text:
+        timestamp_str = self.timestamp_str
-            lines = text.split('\n')
-            link_text = lines.pop()
-            msg_txt = Text('').append(Text.from_markup(f"[link={link_text}]{link_text}[/link]", style=TEXT_LINK))
+        try:
+            timestamp_str = iso_timestamp(self.parse_timestamp())
+        except Exception as e:
+            logger.warning(f"Failed to parse timestamp for {self}")
-            if len(lines) > 0:
-                msg_txt.append('\n' + ' '.join(lines))
-        else:
-            msg_txt = highlighter(' '.join(lines))  # remove newlines
+        return Text(f"[{timestamp_str}]", style=TIMESTAMP_DIM)
-        return msg_txt
+    def _message(self) -> Text:
+        if self.is_link():
+            return Text.from_markup(f"[link={self.text}]{self.text}[/link]", style=TEXT_LINK)
+        else:
+            return highlighter(self.text)
     def __rich__(self) -> Text:
-        timestamp_txt = Text(f"[{self.timestamp_str}]", style=TIMESTAMP_DIM).append(' ')
+        timestamp_txt = self.timestamp_txt().append(' ')
         author_style = get_style_for_name(self.author_str if self.author_str.startswith('+') else self.author)
         author_txt = Text(self.author_str, style=author_style)
         return Text('').append(timestamp_txt).append(author_txt).append(': ', style='dim').append(self._message())
+    def __repr__(self) -> str:
+        props = []
+        add_prop = lambda k, v: props.append(f"{k}={v}")
+        for _field in sorted(fields(self), key=lambda f: f.name):
+            key = _field.name
+            value = getattr(self, key)
+            if key == 'author_str' and self.author and self.author_str.startswith(value):
+                continue
+            elif isinstance(value, str):
+                add_prop(key, f'"{value}"')
+            else:
+                add_prop(key, value)
+        return f"{type(self).__name__}(" + ', '.join(props) + f')'

epstein_files/documents/messenger_log.py CHANGED Viewed

@@ -2,7 +2,7 @@ import logging
 import re
 from collections import defaultdict
 from dataclasses import dataclass, field
-from datetime import datetime
+from datetime import datetime, timedelta
 from rich.console import Console, ConsoleOptions, RenderResult
 from rich.table import Table
@@ -36,10 +36,10 @@ class MessengerLog(Communication):
         self.messages = [self._build_message(match) for match in MSG_REGEX.finditer(self.text)]
     def first_message_at(self, name: str | None) -> datetime:
-        return self.messages_by(name)[0].timestamp()
+        return self.messages_by(name)[0].parse_timestamp()
     def info_txt(self) -> Text | None:
-        num_days_str = days_between_str(self.timestamp, self.messages[-1].timestamp())
+        num_days_str = days_between_str(self.timestamp, self.messages[-1].parse_timestamp())
         txt = Text(f"(Covers {num_days_str} starting ", style='dim')
         txt.append(self.date_str(), style=TIMESTAMP_STYLE).append(' ')
@@ -47,7 +47,7 @@ class MessengerLog(Communication):
             txt.append('with unknown counterparty')
         else:
             txt.append(GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG).append(' ')
-            txt.append(Text(self.author, style=self.author_style + ' bold'))
+            txt.append(Text(self.author, style=self.author_style() + ' bold'))
         if self.phone_number:
             txt.append(highlighter(f" using the phone number {self.phone_number}"))
@@ -55,7 +55,7 @@ class MessengerLog(Communication):
         return txt.append(')')
     def last_message_at(self, name: str | None) -> datetime:
-        return self.messages_by(name)[-1].timestamp()
+        return self.messages_by(name)[-1].parse_timestamp()
     def messages_by(self, name: str | None) -> list[TextMessage]:
         """Return all messages by 'name'."""
@@ -71,7 +71,7 @@ class MessengerLog(Communication):
         return metadata
     def _border_style(self) -> str:
-        return self.author_style
+        return self.author_style()
     def _build_message(self, match: re.Match) -> TextMessage:
         """Turn a regex match into a TextMessage."""
@@ -86,7 +86,7 @@ class MessengerLog(Communication):
         return TextMessage(
             author=self.author if (is_phone_number or not author_str) else author_str,
             author_str=author_str if is_phone_number else '',  # Preserve phone numbers
-            id_confirmed=not self.is_attribution_uncertain(),
+            is_id_confirmed=not self.is_attribution_uncertain(),
             text=match.group(4).strip(),
             timestamp_str=match.group(2).strip(),
         )
@@ -96,12 +96,31 @@ class MessengerLog(Communication):
             message = self._build_message(match)
             try:
-                return message.timestamp()
+                return message.parse_timestamp()
             except ValueError as e:
                 logger.info(f"Failed to parse '{message.timestamp_str}' to datetime! Using next match. Error: {e}'")
         raise RuntimeError(f"{self}: No timestamp found!")
+    def _set_message_timestamps(self) -> None:
+        raise NotImplementedError(f"TextMessage.timestamp no longer exists")
+        last_message: TextMessage | None = None
+        for i, message in enumerate(self.messages):
+            try:
+                message.timestamp = message.parse_timestamp()
+            except Exception as e:
+                msg = f"Failed to parse timestamp for TextMessage {i + 1}, {message}: {e}"
+                if i == 0:
+                    message.timestamp = self.timestamp
+                    self.warn(f"{msg}\nit's the first message so using the MessengerLog timestamp property {self.timestamp}")
+                else:
+                    message.timestamp = last_message.timestamp + timedelta(milliseconds=1)
+                    self.warn(f"{msg}\nadding 1 millisecond to last timestamp {last_message.timestamp}")
+            last_message = message
     def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
         yield self.file_info_panel()
         yield Text('')
@@ -126,13 +145,13 @@ class MessengerLog(Communication):
         author_counts = cls.count_authors(log_files)
         msg_count = sum([len(log.messages) for log in log_files])
-        footer = f"Deanonymized {msg_count - author_counts[None]:,} of {msg_count:,} text messages in"
-        counts_table = build_table("Text Message Counts By Author", caption=f"{footer} {len(log_files)} files")
+        footer = f"deanonymized {msg_count - author_counts[None]:,} of {msg_count:,} text messages in"
+        counts_table = build_table("Text Message Counts By Author", caption=f"({footer} {len(log_files)} files)")
         counts_table.add_column(AUTHOR.title(), justify='left', width=30)
         counts_table.add_column('Files', justify='right', style='white')
         counts_table.add_column("Msgs", justify='right')
-        counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
-        counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE, width=21)
+        counts_table.add_column('First Sent At', justify='center', highlight=True)
+        counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE)
         counts_table.add_column('Days', justify='right', style='dim')
         for name, count in sort_dict(author_counts):

epstein_files/documents/other_file.py CHANGED Viewed

@@ -21,8 +21,8 @@ from epstein_files.util.doc_cfg import DocCfg, Metadata
 from epstein_files.util.data import days_between, escape_single_quotes, remove_timezone, sort_dict, uniquify
 from epstein_files.util.file_helper import FILENAME_LENGTH, file_size_to_str
 from epstein_files.util.env import args
-from epstein_files.util.highlighted_group import styled_category
-from epstein_files.util.rich import QUESTION_MARK_TXT, build_table, highlighter
+from epstein_files.util.highlighted_group import QUESTION_MARKS_TXT, styled_category
+from epstein_files.util.rich import build_table, highlighter
 from epstein_files.util.logging import logger
 FIRST_FEW_LINES = 'First Few Lines'
@@ -105,7 +105,7 @@ class OtherFile(Document):
         return self.config and self.config.category
     def category_txt(self) -> Text | None:
-        return styled_category(self.category() or UNKNOWN)
+        return styled_category(self.category())
     def config_description(self) -> str | None:
         """Overloads superclass method."""
@@ -184,7 +184,7 @@ class OtherFile(Document):
                     if len(timestamps) >= MAX_EXTRACTED_TIMESTAMPS:
                         break
             except ValueError as e:
-                self.log(f"Error while iterating through datefinder.find_dates(): {e}", logging.WARNING)
+                self.warn(f"Error while iterating through datefinder.find_dates(): {e}")
         if len(timestamps) == 0:
             if not (self.is_duplicate() or VAST_HOUSE in self.text):
@@ -210,7 +210,7 @@ class OtherFile(Document):
             self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
     @staticmethod
-    def count_by_category_table(files: Sequence['OtherFile']) -> Table:
+    def count_by_category_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
         counts = defaultdict(int)
         category_bytes = defaultdict(int)
@@ -221,7 +221,8 @@ class OtherFile(Document):
             counts[file.category()] += 1
             category_bytes[file.category()] += file.file_size()
-        table = build_table('Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
+        table = build_table(f'{title_pfx}Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
+        table.columns[-1].justify = 'right'
         table.columns[0].min_width = 14
         table.columns[-1].style = 'dim'
@@ -230,7 +231,7 @@ class OtherFile(Document):
             known_author_count = Document.known_author_count(category_files)
             table.add_row(
-                styled_category(category or UNKNOWN),
+                styled_category(category),
                 str(count),
                 str(known_author_count),
                 str(count - known_author_count),
@@ -240,13 +241,13 @@ class OtherFile(Document):
         return table
     @staticmethod
-    def files_preview_table(files: Sequence['OtherFile']) -> Table:
+    def files_preview_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
         """Build a table of OtherFile documents."""
-        table = build_table('Other Files Details', show_lines=True)
+        table = build_table(f'{title_pfx}Other Files Details in Chronological Order', show_lines=True)
         table.add_column('File', justify='center', width=FILENAME_LENGTH)
         table.add_column('Date', justify='center')
-        table.add_column('Size', justify='center')
-        table.add_column('Type', justify='center')
+        table.add_column('Size', justify='right', style='dim')
+        table.add_column('Category', justify='center')
         table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
         for file in files:
@@ -263,7 +264,7 @@ class OtherFile(Document):
             table.add_row(
                 Group(*link_and_info),
-                Text(date_str, style=TIMESTAMP_DIM) if date_str else QUESTION_MARK_TXT,
+                Text(date_str, style=TIMESTAMP_STYLE) if date_str else QUESTION_MARKS_TXT,
                 file.file_size_str(),
                 file.category_txt(),
                 preview_text,

epstein_files/epstein_files.py CHANGED Viewed

@@ -8,45 +8,35 @@ from datetime import datetime
 from pathlib import Path
 from typing import Sequence, Type
-from rich.align import Align
 from rich.padding import Padding
 from rich.table import Table
 from rich.text import Text
 from epstein_files.documents.document import Document
-from epstein_files.documents.email import DETECT_EMAIL_REGEX, JUNK_EMAILERS, KRASSNER_RECIPIENTS, USELESS_EMAILERS, Email
+from epstein_files.documents.email import DETECT_EMAIL_REGEX, USELESS_EMAILERS, Email
 from epstein_files.documents.emails.email_header import AUTHOR
 from epstein_files.documents.json_file import JsonFile
 from epstein_files.documents.messenger_log import MSG_REGEX, MessengerLog
 from epstein_files.documents.other_file import OtherFile
 from epstein_files.util.constant.strings import *
-from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL, epstein_media_person_url,
-     epsteinify_name_url, epstein_web_person_url, search_jmail_url, search_twitter_url)
 from epstein_files.util.constants import *
-from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify, sort_dict
+from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify
 from epstein_files.util.doc_cfg import EmailCfg, Metadata
 from epstein_files.util.env import DOCS_DIR, args, logger
 from epstein_files.util.file_helper import file_size_str
 from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames, get_info_for_name, get_style_for_name
-from epstein_files.util.rich import (DEFAULT_NAME_STYLE, LAST_TIMESTAMP_STYLE, NA_TXT, add_cols_to_table,
-     print_other_page_link, build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
-     print_panel, print_section_header, vertically_pad)
+from epstein_files.util.rich import (NA_TXT, add_cols_to_table, build_table, console, highlighter,
+     print_author_panel, print_centered, print_subtitle_panel)
 from epstein_files.util.search_result import SearchResult
 from epstein_files.util.timer import Timer
-EXCLUDED_EMAILERS = [e.lower() for e in (USELESS_EMAILERS + [JEFFREY_EPSTEIN])]
-PICKLED_PATH = Path("the_epstein_files.pkl.gz")
+EXCLUDED_EMAILERS = USELESS_EMAILERS + [JEFFREY_EPSTEIN]
+DEVICE_SIGNATURE_SUBTITLE = f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown"
 DEVICE_SIGNATURE = 'Device Signature'
 DEVICE_SIGNATURE_PADDING = (1, 0)
+PICKLED_PATH = Path("the_epstein_files.pkl.gz")
 SLOW_FILE_SECONDS = 1.0
-INVALID_FOR_EPSTEIN_WEB = JUNK_EMAILERS + KRASSNER_RECIPIENTS + [
-    'ACT for America',
-    'BS Stern',
-    INTELLIGENCE_SQUARED,
-    UNKNOWN,
-]
 @dataclass
 class EpsteinFiles:
@@ -128,7 +118,7 @@ class EpsteinFiles:
     def all_emailers(self, include_useless: bool = False) -> list[str | None]:
         """Returns all emailers except Epstein and EXCLUDED_EMAILERS, sorted from least frequent to most."""
         names = [a for a in self.email_author_counts.keys()] + [r for r in self.email_recipient_counts.keys()]
-        names = names if include_useless else [e for e in names if e is None or e.lower() not in EXCLUDED_EMAILERS]
+        names = names if include_useless else [e for e in names if e not in EXCLUDED_EMAILERS]
         return sorted(list(set(names)), key=lambda e: self.email_author_counts[e] + self.email_recipient_counts[e])
     def docs_matching(
@@ -177,7 +167,7 @@ class EpsteinFiles:
     def emails_for(self, author: str | None) -> list[Email]:
         """Returns emails to or from a given 'author' sorted chronologically."""
-        emails = self.emails if author == EVERYONE else (self.emails_by(author) + self.emails_to(author))
+        emails = self.emails_by(author) + self.emails_to(author)
         if len(emails) == 0:
             raise RuntimeError(f"No emails found for '{author}'")
@@ -230,6 +220,7 @@ class EpsteinFiles:
     def print_files_summary(self) -> None:
         table = build_table('Summary of Document Types')
         add_cols_to_table(table, ['File Type', 'Files', 'Author Known', 'Author Unknown', 'Duplicates'])
+        table.columns[1].justify = 'right'
         def add_row(label: str, docs: list):
             known = None if isinstance(docs[0], JsonFile) else Document.known_author_count(docs)
@@ -246,18 +237,19 @@ class EpsteinFiles:
         add_row('iMessage Logs', self.imessage_logs)
         add_row('JSON Data', self.json_files)
         add_row('Other', self.non_json_other_files())
-        console.print(Align.center(table))
+        print_centered(table)
         console.line()
     def print_emails_for(self, _author: str | None) -> list[Email]:
         """Print complete emails to or from a particular 'author'. Returns the Emails that were printed."""
-        conversation_length = self.email_conversation_length_in_days(_author)
         emails = self.emails_for(_author)
+        num_days = self.email_conversation_length_in_days(_author)
         unique_emails = [email for email in emails if not email.is_duplicate()]
+        start_date = emails[0].timestamp.date()
         author = _author or UNKNOWN
-        print_author_header(
-            f"Found {len(unique_emails)} {author} emails starting {emails[0].timestamp.date()} over {conversation_length:,} days",
+        print_author_panel(
+            f"Found {len(unique_emails)} emails to/from {author} starting {start_date} covering {num_days:,} days",
             get_style_for_name(author),
             get_info_for_name(author)
         )
@@ -280,54 +272,13 @@ class EpsteinFiles:
     def print_emails_table_for(self, author: str | None) -> None:
         emails = [email for email in self.emails_for(author) if not email.is_duplicate()]  # Remove dupes
-        console.print(Align.center(Email.build_table(emails, author)), '\n')
+        print_centered(Email.build_emails_table(emails, author))
+        console.line()
     def print_email_device_info(self) -> None:
-        print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(2, 0, 0, 0), centered=True)
-        console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
+        print_subtitle_panel(DEVICE_SIGNATURE_SUBTITLE, padding=(2, 0, 0, 0), centered=True)
         console.print(_build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
-    def table_of_emailers(self) -> Table:
-        attributed_emails = [e for e in self.non_duplicate_emails() if e.author]
-        footer = f"Identified authors of {len(attributed_emails):,} out of {len(self.non_duplicate_emails()):,} emails."
-        counts_table = build_table("Email Counts", caption=footer)
-        add_cols_to_table(counts_table, [
-            'Name',
-            'Num',
-            'Sent',
-            "Recv",
-            {'name': 'First', 'highlight': True},
-            {'name': 'Last', 'style': LAST_TIMESTAMP_STYLE},
-            JMAIL,
-            'eMedia',
-            'eWeb',
-            'Twitter',
-        ])
-        emailer_counts = {
-            emailer: self.email_author_counts[emailer] + self.email_recipient_counts[emailer]
-            for emailer in self.all_emailers(True)
-        }
-        for name, count in sort_dict(emailer_counts):
-            style = get_style_for_name(name, default_style=DEFAULT_NAME_STYLE)
-            emails = self.emails_for(name)
-            counts_table.add_row(
-                Text.from_markup(link_markup(epsteinify_name_url(name or UNKNOWN), name or UNKNOWN, style)),
-                str(count),
-                str(self.email_author_counts[name]),
-                str(self.email_recipient_counts[name]),
-                emails[0].timestamp_without_seconds(),
-                emails[-1].timestamp_without_seconds(),
-                link_text_obj(search_jmail_url(name), JMAIL) if name else '',
-                link_text_obj(epstein_media_person_url(name), 'eMedia') if is_ok_for_epstein_web(name) else '',
-                link_text_obj(epstein_web_person_url(name), 'eWeb') if is_ok_for_epstein_web(name) else '',
-                link_text_obj(search_twitter_url(name), 'search X') if name else '',
-            )
-        return counts_table
+        console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
     def _tally_email_data(self) -> None:
         """Tally up summary info about Email objects."""
@@ -373,18 +324,6 @@ def document_cls(doc: Document) -> Type[Document]:
         return OtherFile
-def is_ok_for_epstein_web(name: str | None) -> bool:
-    """Return True if it's likely that EpsteinWeb has a page for this name."""
-    if name is None or ' ' not in name:
-        return False
-    elif '@' in name or '/' in name or '??' in name:
-        return False
-    elif name in INVALID_FOR_EPSTEIN_WEB:
-        return False
-    return True
 def _build_signature_table(keyed_sets: dict[str, set[str]], cols: tuple[str, str], join_char: str = '\n') -> Padding:
     title = 'Signatures Used By Authors' if cols[0] == AUTHOR else 'Authors Seen Using Signatures'
     table = build_table(title, header_style="bold reverse", show_lines=True)

epstein_files/util/constant/common_words.py CHANGED Viewed

@@ -89,6 +89,6 @@ UNSINGULARIZABLE_WORDS = """
 """.strip().split()
-if args.deep_debug:
-    word_str = '\n'.join(COMMON_WORDS_LIST)
-    print(f"common words:\n\n{word_str}")
+# if args.deep_debug:
+#     word_str = '\n'.join(COMMON_WORDS_LIST)
+#     print(f"common words:\n\n{word_str}")

epstein_files/util/constant/html.py CHANGED Viewed

@@ -5,7 +5,16 @@ from epstein_files.util.env import args
 PAGE_TITLE = '   ∞ Michel de Cryptadamus ∞   '
-CONSOLE_HTML_FORMAT = """<!DOCTYPE html>
+if args.all_emails:
+    page_type = 'Emails'
+elif args.email_timeline:
+    page_type = 'Chronological Emails'
+else:
+    page_type = 'Text Messages'
+CONSOLE_HTML_FORMAT = """
+<!DOCTYPE html>
 <html>
 <head>
     <meta charset="UTF-8">
@@ -14,17 +23,15 @@ CONSOLE_HTML_FORMAT = """<!DOCTYPE html>
     <style>
         {stylesheet}
         body {{
-            color: {foreground};
             background-color: {background};
+            color: {foreground};
         }}
     </style>
-""" + f"<title>Epstein {'Emails' if args.all_emails else 'Text Messages'}</title>" + """
+""" + f"<title>Epstein {page_type}</title>" + """
 </head>
 <body>
     <pre style="font-family: Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace; white-space: pre-wrap; overflow-wrap: break-word;">
-        <code style="font-family: inherit; white-space: pre-wrap; overflow-wrap: break-word;">
-            {code}
-        </code>
+        <code style="font-family: inherit; white-space: pre-wrap; overflow-wrap: break-word;">{code}</code>
     </pre>
 </body>
 </html>

epstein_files/util/constant/names.py CHANGED Viewed

@@ -166,6 +166,7 @@ TOM_BARRACK = 'Tom Barrack'
 TOM_PRITZKER = 'Tom Pritzker'
 TONJA_HADDAD_COLEMAN = 'Tonja Haddad Coleman'
 TYLER_SHEARS = 'Tyler Shears'  # Reputation manager, like Al Seckel
+VINCENZO_IOZZO = 'Vincenzo Iozzo'
 VINIT_SAHNI = 'Vinit Sahni'
 ZUBAIR_KHAN = 'Zubair Khan'
@@ -197,9 +198,11 @@ GOLDMAN_SACHS = 'Goldman Sachs'
 GOLDMAN_INVESTMENT_MGMT = f'{GOLDMAN_SACHS} Investment Management Division'
 HARVARD = 'Harvard'
 INSIGHTS_POD = f"InsightsPod"  # Zubair bots
+MIT_MEDIA_LAB = 'MIT Media Lab'
 NEXT_MANAGEMENT = 'Next Management LLC'
 JP_MORGAN = 'JP Morgan'
 OSBORNE_LLP = f"{IAN_OSBORNE} & Partners LLP"  # Ian Osborne's PR firm
+ROTHSTEIN_ROSENFELDT_ADLER = 'Rothstein Rosenfeldt Adler (Rothstein was a crook & partner of Roger Stone)'
 TRUMP_ORG = 'Trump Organization'
 UBS = 'UBS'
@@ -231,26 +234,26 @@ NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
 # Names to color white in the word counts
 OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
     aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
-    baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
+    baldwin barack barrett ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
     chapman charles charlie christopher clint cohen colin collins conway
     danny davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
-    edmond elizabeth emily entwistle erik evelyn
+    edmond elizabeth emily enterprises entwistle erik evelyn
     ferguson flachsbart francis franco frank frost
-    gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
+    gardner gary geoff geoffrey gerald gilbert gloria gold goldberg gonzalez gould graham greene guarino gwyneth
     hancock harold harrison harry hay helen hill hirsch hofstadter horowitz hussein
     ian isaac isaacson
-    james jamie jane janet jason jen jim joe johnson jones josh julie justin
+    james jamie jane janet jason jeffrey jen jim joe johnson jones josh julie justin
     karl kate kathy kelly kim kruger kyle
     laurie lawrence leo leonard lenny leslie lieberman louis lynch lynn
     marcus marianne matt matthew melissa michele michelle moore moscowitz
     nancy nicole nussbaum
     owen
-    paulson philippe
-    rafael ray richard richardson rob robin ron rubin rudolph ryan
+    paulson peter philippe
+    rafael ray richard richardson rob robert robin ron rubin rudolph ryan
     sara sarah sean seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
     ted theresa thompson tiffany timothy tony
     valeria
-    walter warren weinstein weiss william
+    walter warren waters weinstein weiss william
     zach zack
 """.strip().split()

epstein_files/util/constant/output_files.py CHANGED Viewed

@@ -6,6 +6,7 @@ from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
 HTML_DIR = Path('docs')
 EPSTEIN_FILES_NOV_2025 = 'epstein_files_nov_2025'
 ALL_EMAILS_PATH = HTML_DIR.joinpath(f'all_emails_{EPSTEIN_FILES_NOV_2025}.html')
+CHRONOLOGICAL_EMAILS_PATH = HTML_DIR.joinpath(f'chronological_emails_{EPSTEIN_FILES_NOV_2025}.html')
 JSON_FILES_JSON_PATH = HTML_DIR.joinpath(f'json_files_from_{EPSTEIN_FILES_NOV_2025}.json')
 JSON_METADATA_PATH = HTML_DIR.joinpath(f'file_metadata_{EPSTEIN_FILES_NOV_2025}.json')
 TEXT_MSGS_HTML_PATH = HTML_DIR.joinpath('index.html')
@@ -18,6 +19,7 @@ URLS_ENV = '.urls.env'
 GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
 TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/epstein_text_messages"
 ALL_EMAILS_URL = f"{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}"
+CHRONOLOGICAL_EMAILS_URL = f"{TEXT_MSGS_URL}/{CHRONOLOGICAL_EMAILS_PATH.name}"
 JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
 JSON_METADATA_URL = f"{TEXT_MSGS_URL}/{JSON_METADATA_PATH.name}"
 WORD_COUNT_URL = f"{TEXT_MSGS_URL}/{WORD_COUNT_HTML_PATH.name}"
@@ -29,6 +31,7 @@ SITE_URLS: dict[SiteType, str] = {
 BUILD_ARTIFACTS = [
     ALL_EMAILS_PATH,
+    CHRONOLOGICAL_EMAILS_PATH,
     # EPSTEIN_WORD_COUNT_HTML_PATH,
     JSON_FILES_JSON_PATH,
     JSON_METADATA_PATH,

epstein_files/util/constant/strings.py CHANGED Viewed

@@ -11,7 +11,7 @@ BUSINESS = 'business'
 CONFERENCE = 'conference'
 ENTERTAINER = 'entertainer'
 FINANCE = 'finance'
-FLIGHT_LOGS = 'flight logs'
+FLIGHT_LOG = 'flight log'
 JOURNALIST = 'journalist'
 JUNK = 'junk'
 LEGAL = 'legal'
@@ -56,7 +56,6 @@ TIMESTAMP_DIM = f"turquoise4 dim"
 # Misc
 AUTHOR = 'author'
 DEFAULT = 'default'
-EVERYONE = 'everyone'
 HOUSE_OVERSIGHT_PREFIX = 'HOUSE_OVERSIGHT_'
 JSON = 'json'
 NA = 'n/a'
@@ -77,3 +76,8 @@ OTHER_FILE_CLASS = 'OtherFile'
 remove_question_marks = lambda name: QUESTION_MARKS_REGEX.sub('', name)
+def indented(s: str, spaces: int = 4) -> str:
+    indent = ' ' * spaces
+    return indent + f"\n{indent}".join(s.split('\n'))

epstein_files/util/constant/urls.py CHANGED Viewed

@@ -73,7 +73,7 @@ search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(tx
 def build_doc_url(base_url: str, filename_or_id: int | str, case: Literal['lower', 'title'] | None = None) -> str:
     file_stem = coerce_file_stem(filename_or_id)
-    file_stem = file_stem.lower() if case == 'lower' else file_stem
+    file_stem = file_stem.lower() if case == 'lower' or EPSTEIN_MEDIA in base_url else file_stem
     file_stem = file_stem.title() if case == 'title' else file_stem
     return f"{base_url}{file_stem}"

epstein-files 1.0.16__py3-none-any.whl → 1.1.2__py3-none-any.whl

epstein-files 1.0.16py3-none-any.whl → 1.1.2py3-none-any.whl