PyPI - epstein-files - Versions diffs - 1.0.11__py3-none-any.whl → 1.0.13__py3-none-any.whl - Mend

epstein-files 1.0.11py3-none-any.whl → 1.0.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

epstein_files/__init__.py +3 -3
epstein_files/documents/communication.py +2 -2
epstein_files/documents/document.py +43 -69
epstein_files/documents/email.py +48 -6
epstein_files/documents/imessage/text_message.py +1 -1
epstein_files/documents/json_file.py +1 -1
epstein_files/documents/messenger_log.py +3 -3
epstein_files/documents/other_file.py +2 -2
epstein_files/epstein_files.py +27 -12
epstein_files/util/constant/names.py +12 -9
epstein_files/util/constant/strings.py +2 -1
epstein_files/util/constant/urls.py +13 -8
epstein_files/util/constants.py +21 -15
epstein_files/util/data.py +1 -1
epstein_files/util/doc_cfg.py +20 -42
epstein_files/util/file_helper.py +3 -9
epstein_files/util/highlighted_group.py +32 -21
epstein_files/util/logging.py +1 -1
epstein_files/util/output.py +1 -1
epstein_files/util/rich.py +11 -2
{epstein_files-1.0.11.dist-info → epstein_files-1.0.13.dist-info}/METADATA +1 -1
epstein_files-1.0.13.dist-info/RECORD +33 -0
epstein_files-1.0.11.dist-info/RECORD +0 -33
{epstein_files-1.0.11.dist-info → epstein_files-1.0.13.dist-info}/LICENSE +0 -0
{epstein_files-1.0.11.dist-info → epstein_files-1.0.13.dist-info}/WHEEL +0 -0
{epstein_files-1.0.11.dist-info → epstein_files-1.0.13.dist-info}/entry_points.txt +0 -0

epstein_files/__init__.py CHANGED Viewed

@@ -20,8 +20,8 @@ from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_
 from epstein_files.util.env import args, specified_names
 from epstein_files.util.file_helper import coerce_file_path, extract_file_id
 from epstein_files.util.logging import logger
-from epstein_files.util.output import (print_emails, print_json_files, print_json_metadata, print_json_stats,
-     print_text_messages, write_urls)
+from epstein_files.util.output import (print_emails, print_json_files, print_json_stats,
+     print_text_messages, write_json_metadata, write_urls)
 from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
 from epstein_files.util.timer import Timer
 from epstein_files.util.word_count import write_word_counts_html
@@ -37,7 +37,7 @@ def generate_html() -> None:
     epstein_files = EpsteinFiles.get_files(timer)
     if args.json_metadata:
-        print_json_metadata(epstein_files)
+        write_json_metadata(epstein_files)
         exit()
     elif args.json_files:
         print_json_files(epstein_files)

epstein_files/documents/communication.py CHANGED Viewed

@@ -34,9 +34,9 @@ class Communication(Document):
     def is_attribution_uncertain(self) -> bool:
         return bool(self.config and self.config.is_attribution_uncertain)
-    def raw_document_link_txt(self, _style: str = '', include_alt_link: bool = True) -> Text:
+    def external_links(self, _style: str = '', include_alt_links: bool = True) -> Text:
         """Overrides super() method to apply self.author_style."""
-        return super().raw_document_link_txt(self.author_style, include_alt_link=include_alt_link)
+        return super().external_links(self.author_style, include_alt_links=include_alt_links)
     def summary(self) -> Text:
         return self._summary().append(CLOSE_PROPERTIES_CHAR)

epstein_files/documents/document.py CHANGED Viewed

@@ -5,7 +5,7 @@ from dataclasses import asdict, dataclass, field
 from datetime import datetime
 from pathlib import Path
 from subprocess import run
-from typing import ClassVar, Sequence, TypeVar
+from typing import Callable, ClassVar, Sequence, TypeVar
 from rich.console import Console, ConsoleOptions, Group, RenderResult
 from rich.padding import Padding
@@ -16,8 +16,8 @@ from epstein_files.util.constant.names import *
 from epstein_files.util.constant.strings import *
 from epstein_files.util.constant.urls import *
 from epstein_files.util.constants import ALL_FILE_CONFIGS, FALLBACK_TIMESTAMP
-from epstein_files.util.data import collapse_newlines, date_str, iso_timestamp, patternize, without_falsey
-from epstein_files.util.doc_cfg import EmailCfg, DocCfg, Metadata, TextCfg
+from epstein_files.util.data import collapse_newlines, date_str, patternize, remove_zero_time_from_timestamp_str, without_falsey
+from epstein_files.util.doc_cfg import DUPE_TYPE_STRS, EmailCfg, DocCfg, Metadata, TextCfg
 from epstein_files.util.env import DOCS_DIR, args
 from epstein_files.util.file_helper import (file_stem_for_id, extract_file_id, file_size,
      file_size_str, is_local_extract_file)
@@ -31,10 +31,8 @@ INFO_INDENT = 2
 INFO_PADDING = (0, 0, 0, INFO_INDENT)
 MAX_TOP_LINES_LEN = 4000  # Only for logging
 MIN_DOCUMENT_ID = 10477
-LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
 WHITESPACE_REGEX = re.compile(r"\s{2,}|\t|\n", re.MULTILINE)
-EXTRACTED_FROM = 'Extracted from'
 MIN_TIMESTAMP = datetime(1991, 1, 1)
 MID_TIMESTAMP = datetime(2007, 1, 1)
 MAX_TIMESTAMP = datetime(2020, 1, 1)
@@ -96,15 +94,9 @@ class Document:
     def __post_init__(self):
         self.filename = self.file_path.name
         self.file_id = extract_file_id(self.filename)
-        self.config = deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
+        self.config = self.config or deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
-        if self.is_local_extract_file():
-            self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
-            extracted_from_doc_id = self.url_slug.split('_')[-1]
-            if extracted_from_doc_id in ALL_FILE_CONFIGS:
-                self._set_extract_config(deepcopy(ALL_FILE_CONFIGS[extracted_from_doc_id]))
-        else:
+        if 'url_slug' not in vars(self):
             self.url_slug = self.file_path.stem
         self._set_computed_fields(text=self.text or self._load_file())
@@ -122,28 +114,51 @@ class Document:
     def duplicate_file_txt(self) -> Text:
         """If the file is a dupe make a nice message to explain what file it's a duplicate of."""
-        if not self.config or not self.config.dupe_of_id:
+        if not self.config or not self.config.dupe_of_id or self.config.dupe_type is None:
             raise RuntimeError(f"duplicate_file_txt() called on {self.summary()} but not a dupe! config:\n\n{self.config}")
         txt = Text(f"Not showing ", style=INFO_STYLE).append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
-        txt.append(f" because it's {self.config.duplicate_reason()} ")
+        txt.append(f" because it's {DUPE_TYPE_STRS[self.config.dupe_type]} ")
         return txt.append(epstein_media_doc_link_txt(self.config.dupe_of_id, style='royal_blue1'))
     def epsteinify_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
-        """Create a Text obj link to this document on epsteinify.com."""
-        return link_text_obj(epsteinify_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
+        return self.external_url(epsteinify_doc_url, style, link_txt)
     def epstein_media_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
-        """Create a Text obj link to this document on epstein.media."""
-        return link_text_obj(epstein_media_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
+        return self.external_url(epstein_media_doc_url, style, link_txt)
     def epstein_web_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
-        """Create a Text obj link to this document on EpsteinWeb."""
-        return link_text_obj(epstein_web_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
+        return self.external_url(epstein_web_doc_url, style, link_txt)
+    def rollcall_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
+        return self.external_url(rollcall_doc_url, style, link_txt)
+    def external_url(self, fxn: Callable[[str], str], style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
+        return link_text_obj(fxn(self.url_slug), link_txt or self.file_path.stem, style)
+    def external_links(self, style: str = '', include_alt_links: bool = False) -> Text:
+        """Returns colored links to epstein.media and and epsteinweb in a Text object."""
+        txt = Text('', style='white' if include_alt_links else ARCHIVE_LINK_COLOR)
+        if args.use_epstein_web:
+            txt.append(self.epstein_web_link(style=style))
+            alt_link = self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)
+        else:
+            txt.append(self.epstein_media_link(style=style))
+            alt_link = self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)
+        if include_alt_links:
+            txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
+            txt.append(' (').append(alt_link).append(')')
+            if self._class_name() == 'Email':
+                txt.append(' (').append(self.rollcall_link(style='white dim', link_txt=ROLLCALL)).append(')')
+        return txt
     def file_info_panel(self) -> Group:
         """Panel with filename linking to raw file plus any additional info about the file."""
-        panel = Panel(self.raw_document_link_txt(include_alt_link=True), border_style=self._border_style(), expand=False)
+        panel = Panel(self.external_links(include_alt_links=True), border_style=self._border_style(), expand=False)
         padded_info = [Padding(sentence, INFO_PADDING) for sentence in self.info()]
         return Group(*([panel] + padded_info))
@@ -155,12 +170,10 @@ class Document:
     def info(self) -> list[Text]:
         """0 to 2 sentences containing the info_txt() as well as any configured description."""
-        sentences = [
+        return without_falsey([
             self.info_txt(),
             highlighter(Text(self.config_description(), style=INFO_STYLE)) if self.config_description() else None
-        ]
-        return without_falsey(sentences)
+        ])
     def info_txt(self) -> Text | None:
         """Secondary info about this file (recipients, level of certainty, etc). Overload in subclasses."""
@@ -197,9 +210,9 @@ class Document:
         if self.is_local_extract_file():
             metadata['extracted_file'] = {
-                'explanation': 'This file was extracted from a court filing, not distributed directly. A copy can be found on github.',
-                'extracted_from_file': self.url_slug + '.txt',
-                'extracted_file_url': extracted_file_url(self.filename),
+                'explanation': 'Manually extracted from one of the court filings.',
+                'extracted_from': self.url_slug + '.txt',
+                'url': extracted_file_url(self.filename),
             }
         return metadata
@@ -208,25 +221,6 @@ class Document:
         with open(self.file_path) as f:
             return f.read()
-    def raw_document_link_txt(self, style: str = '', include_alt_link: bool = False) -> Text:
-        """Returns colored links to epstein.media and and epsteinweb in a Text object."""
-        txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
-        if args.use_epstein_web:
-            txt.append(self.epstein_web_link(style=style))
-            if include_alt_link:
-                txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
-                txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
-        else:
-            txt.append(self.epstein_media_link(style=style))
-            if include_alt_link:
-                txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
-                txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
-        return txt
     def repair_ocr_text(self, repairs: dict[str | re.Pattern, str], text: str) -> str:
         """Apply a dict of repairs (key is pattern or string, value is replacement string) to text."""
         for k, v in repairs.items():
@@ -253,7 +247,7 @@ class Document:
         txt.append(f" {self.url_slug}", style=FILENAME_STYLE)
         if self.timestamp:
-            timestamp_str = iso_timestamp(self.timestamp).removesuffix(' 00:00:00')
+            timestamp_str = remove_zero_time_from_timestamp_str(self.timestamp).replace('T', ' ')
             txt.append(' (', style=SYMBOL_STYLE)
             txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
@@ -327,26 +321,6 @@ class Document:
         self.lines = [line.strip() if self.strip_whitespace else line for line in self.text.split('\n')]
         self.num_lines = len(self.lines)
-    def _set_extract_config(self, doc_cfg: DocCfg | EmailCfg) -> None:
-        """Copy info from original config for file this document was extracted from."""
-        if self.config:
-            self.warn(f"Merging existing config with config for file this document was extracted from")
-        else:
-            self.config = EmailCfg(id=self.file_id)
-        extracted_from_description = doc_cfg.complete_description()
-        if extracted_from_description:
-            extracted_description = f"{EXTRACTED_FROM} {extracted_from_description}"
-            if self.config.description:
-                self.warn(f"Overwriting description '{self.config.description}' with extract description '{doc_cfg.description}'")
-            self.config.description = extracted_description
-        self.config.is_interesting = self.config.is_interesting or doc_cfg.is_interesting
-        self.warn(f"Constructed local config\n{self.config}")
     def _write_clean_text(self, output_path: Path) -> None:
         """Write self.text to 'output_path'. Used only for diffing files."""
         if output_path.exists():

epstein_files/documents/email.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import logging
 import re
+from copy import deepcopy
 from dataclasses import asdict, dataclass, field
 from datetime import datetime
 from typing import ClassVar, cast
@@ -21,6 +22,7 @@ from epstein_files.util.constants import *
 from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes, extract_last_name,
      flatten, remove_timezone, uniquify)
 from epstein_files.util.doc_cfg import EmailCfg, Metadata
+from epstein_files.util.file_helper import extract_file_id, file_stem_for_id
 from epstein_files.util.highlighted_group import get_style_for_name
 from epstein_files.util.logging import logger
 from epstein_files.util.rich import *
@@ -35,9 +37,11 @@ REPLY_TEXT_REGEX = re.compile(rf"^(.*?){REPLY_LINE_PATTERN}", re.DOTALL | re.IGN
 BAD_TIMEZONE_REGEX = re.compile(fr'\((UTC|GMT\+\d\d:\d\d)\)|{REDACTED}')
 DATE_HEADER_REGEX = re.compile(r'(?:Date|Sent):? +(?!by|from|to|via)([^\n]{6,})\n')
 TIMESTAMP_LINE_REGEX = re.compile(r"\d+:\d+")
+LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
 SUPPRESS_LOGS_FOR_AUTHORS = ['Undisclosed recipients:', 'undisclosed-recipients:', 'Multiple Senders Multiple Senders']
 REWRITTEN_HEADER_MSG = "(janky OCR header fields were prettified, check source if something seems off)"
+APPEARS_IN = 'Appears in'
 MAX_CHARS_TO_PRINT = 4000
 MAX_NUM_HEADER_LINES = 14
 MAX_QUOTED_REPLIES = 2
@@ -128,7 +132,6 @@ JUNK_EMAILERS = [
     'How To Academy',
     'Jokeland',
     JP_MORGAN_USGIO,
-    'Saved by Internet Explorer 11',
 ]
 MAILING_LISTS = [
@@ -248,6 +251,7 @@ KRASSNER_RECIPIENTS = uniquify(flatten([ALL_FILE_CONFIGS[id].recipients for id i
 # No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
 USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIPIENTS + [
+    'Alan Dlugash',                            # CCed with Richard Kahn
     'Alan Rogers',                           # Random CC
     'Andrew Friendly',                       # Presumably some relation of Kelly Friendly
     'BS Stern',                              # A random fwd of email we have
@@ -264,14 +268,14 @@ USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIP
     'Lyn Fontanilla',                        # Random CC
     'Mark Albert',                           # Random CC
     'Matthew Schafer',                       # Random CC
+    MICHAEL_BUCHHOLTZ,                       # Terry Kafka CC
+    'Nancy Dahl',                            # covered by Lawrence Krauss (her husband)
     'Michael Simmons',                       # Random CC
     'Nancy Portland',                        # Lawrence Krauss CC
     'Oliver Goodenough',                     # Robert Trivers CC
-    'Owen Blicksilver',                      # Landon Thomas CC
     'Peter Aldhous',                         # Lawrence Krauss CC
     'Sam Harris',                            # Lawrence Krauss CC
     SAMUEL_LEFF,                             # Random CC
-    "Saved by Internet Explorer 11",
     'Sean T Lehane',                         # Random CC
     'Stephen Rubin',                         # Random CC
     'Tim Kane',                              # Random CC
@@ -318,6 +322,17 @@ class Email(Communication):
     rewritten_header_ids: ClassVar[set[str]] = set([])
     def __post_init__(self):
+        self.filename = self.file_path.name
+        self.file_id = extract_file_id(self.filename)
+        # Special handling for copying properties out of the config for the document this one was extracted from
+        if self.is_local_extract_file():
+            self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
+            extracted_from_doc_id = self.url_slug.split('_')[-1]
+            if extracted_from_doc_id in ALL_FILE_CONFIGS:
+                self._set_config_for_extracted_file(ALL_FILE_CONFIGS[extracted_from_doc_id])
         super().__post_init__()
         try:
@@ -340,8 +355,12 @@ class Email(Communication):
         self.actual_text = self._actual_text()
         self.sent_from_device = self._sent_from_device()
+    def attachments(self) -> list[str]:
+        return (self.header.attachments or '').split(';')
     def info_txt(self) -> Text:
-        txt = Text("OCR text of email from ", style='grey46').append(self.author_txt).append(' to ')
+        email_type = 'fwded article' if self.is_fwded_article() else 'email'
+        txt = Text(f"OCR text of {email_type} from ", style='grey46').append(self.author_txt).append(' to ')
         return txt.append(self._recipients_txt()).append(highlighter(f" probably sent at {self.timestamp}"))
     def is_fwded_article(self) -> bool:
@@ -566,11 +585,11 @@ class Email(Communication):
                 self._merge_lines(2, 5)
             elif self.file_id in ['029498', '031428']:
                 self._merge_lines(2, 4)
-        elif self.file_id in ['029976', '023067']:
+        elif self.file_id in ['029976', '023067', '033576']:
             self._merge_lines(3)  # Merge 4th and 5th rows
         elif self.file_id in '026609 029402 032405 022695'.split():
             self._merge_lines(4)  # Merge 5th and 6th rows
-        elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381']:
+        elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381', '033357']:
             self._merge_lines(2, 4)
         elif self.file_id in ['029154', '029163']:
             self._merge_lines(2, 5)
@@ -591,6 +610,8 @@ class Email(Communication):
             self._merge_lines(7, 9)
         elif self.file_id == '030299':
             self._merge_lines(7, 10)
+        elif self.file_id in ['022673', '022684']:
+            self._merge_lines(9)
         elif self.file_id == '014860':
             self._merge_lines(3)
             self._merge_lines(4)
@@ -649,6 +670,27 @@ class Email(Communication):
             sent_from = sent_from_match.group(0)
             return 'S' + sent_from[1:] if sent_from.startswith('sent') else sent_from
+    def _set_config_for_extracted_file(self, extracted_from_doc_cfg: DocCfg) -> None:
+        """Copy info from original config for file this document was extracted from."""
+        if self.file_id in ALL_FILE_CONFIGS:
+            self.config = cast(EmailCfg, deepcopy(ALL_FILE_CONFIGS[self.file_id]))
+            self.warn(f"Merging existing config for {self.file_id} with config for file this document was extracted from")
+        else:
+            self.config = EmailCfg(id=self.file_id)
+        extracted_from_description = extracted_from_doc_cfg.complete_description()
+        if extracted_from_description:
+            extracted_description = f"{APPEARS_IN} {extracted_from_description}"
+            if self.config.description:
+                self.warn(f"Overwriting description '{self.config.description}' with extract description '{self.config.description}'")
+            self.config.description = extracted_description
+        self.config.is_interesting = self.config.is_interesting or extracted_from_doc_cfg.is_interesting
+        self.warn(f"Constructed synthetic config: {self.config}")
     def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
         logger.debug(f"Printing '{self.filename}'...")
         yield self.file_info_panel()

epstein_files/documents/imessage/text_message.py CHANGED Viewed

@@ -45,7 +45,7 @@ class TextMessage:
             self.author_str = self.author_str or self.author
         if not self.id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
-            self.author_str = self.author + ' (?)'
+            self.author_str += ' (?)'
     def timestamp(self) -> datetime:
         return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)

epstein_files/documents/json_file.py CHANGED Viewed

@@ -39,7 +39,7 @@ class JsonFile(OtherFile):
         return JSON
     def info_txt(self) -> Text | None:
-        return Text(f"JSON file, seems to contain link unfurl/embed data for iMessage or similar", style=INFO_STYLE)
+        return Text(f"JSON file, contains preview data for links sent a messaging app", style=INFO_STYLE)
     def is_interesting(self):
         return False

epstein_files/documents/messenger_log.py CHANGED Viewed

@@ -16,7 +16,7 @@ from epstein_files.util.data import iso_timestamp, listify, sort_dict
 from epstein_files.util.doc_cfg import Metadata, TextCfg
 from epstein_files.util.highlighted_group import get_style_for_name
 from epstein_files.util.logging import logger
-from epstein_files.util.rich import build_table, highlighter
+from epstein_files.util.rich import LAST_TIMESTAMP_STYLE, build_table, highlighter
 CONFIRMED_MSG = 'Found confirmed counterparty'
 GUESSED_MSG = 'This is probably a conversation with'
@@ -76,7 +76,7 @@ class MessengerLog(Communication):
         is_phone_number = author_str.startswith('+')
         if is_phone_number:
-            logger.warning(f"{self.summary()} Found phone number: {author_str}")
+            logger.info(f"{self.summary()} Found phone number: {author_str}")
             self.phone_number = author_str
         # If the Sender: is redacted or if it's an unredacted phone number that means it's from self.author
@@ -130,7 +130,7 @@ class MessengerLog(Communication):
         counts_table.add_column('Files', justify='right', style='white')
         counts_table.add_column("Msgs", justify='right')
         counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
-        counts_table.add_column('Last Sent At', justify='center', style='wheat4', width=21)
+        counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE, width=21)
         counts_table.add_column('Days', justify='right', style='dim')
         for name, count in sort_dict(cls.count_authors(imessage_logs)):

epstein_files/documents/other_file.py CHANGED Viewed

@@ -107,7 +107,7 @@ UNINTERESTING_PREFIXES = FINANCIAL_REPORTS_AUTHORS + [
     TEXT_OF_US_LAW,
     TRANSLATION,
     TWEET,
-    THE_REAL_DEAL_ARTICLE,
+    REAL_DEAL_ARTICLE,
     TRUMP_DISCLOSURES,
     UBS_CIO_REPORT,
     UN_GENERAL_ASSEMBLY,
@@ -240,7 +240,7 @@ class OtherFile(Document):
         table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
         for file in files:
-            link_and_info = [file.raw_document_link_txt()]
+            link_and_info = [file.external_links()]
             date_str = file.date_str()
             if file.is_duplicate():

epstein_files/epstein_files.py CHANGED Viewed

@@ -23,12 +23,12 @@ from epstein_files.util.constant.strings import *
 from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL, epstein_media_person_url,
      epsteinify_name_url, epstein_web_person_url, search_jmail_url, search_twitter_url)
 from epstein_files.util.constants import *
-from epstein_files.util.data import dict_sets_to_lists, json_safe, listify, sort_dict
+from epstein_files.util.data import dict_sets_to_lists, iso_timestamp, json_safe, listify, sort_dict
 from epstein_files.util.doc_cfg import EmailCfg, Metadata
 from epstein_files.util.env import DOCS_DIR, args, logger
 from epstein_files.util.file_helper import file_size_str
 from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
-from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, add_cols_to_table,
+from epstein_files.util.rich import (DEFAULT_NAME_STYLE, LAST_TIMESTAMP_STYLE, NA_TXT, add_cols_to_table,
      build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
      print_other_site_link, print_panel, print_section_header, vertically_pad)
 from epstein_files.util.search_result import SearchResult
@@ -278,25 +278,40 @@ class EpsteinFiles:
     def print_emailer_counts_table(self) -> None:
         footer = f"Identified authors of {self.attributed_email_count():,} out of {len(self.emails):,} emails ."
         counts_table = build_table("Email Counts", caption=footer)
-        add_cols_to_table(counts_table, ['Name', 'Count', 'Sent', "Recv'd", JMAIL, EPSTEIN_MEDIA, EPSTEIN_WEB, 'Twitter'])
+        add_cols_to_table(counts_table, [
+            'Name',
+            'Num',
+            'Sent',
+            "Recv",
+            {'name': 'First', 'highlight': True},
+            {'name': 'Last', 'style': LAST_TIMESTAMP_STYLE},
+            JMAIL,
+            'eMedia',
+            'eWeb',
+            'Twitter',
+        ])
         emailer_counts = {
             emailer: self.email_author_counts[emailer] + self.email_recipient_counts[emailer]
             for emailer in self.all_emailers(True)
         }
-        for p, count in sort_dict(emailer_counts):
-            style = get_style_for_name(p, default_style=DEFAULT_NAME_STYLE)
+        for name, count in sort_dict(emailer_counts):
+            style = get_style_for_name(name, default_style=DEFAULT_NAME_STYLE)
+            emails = self.emails_for(name)
             counts_table.add_row(
-                Text.from_markup(link_markup(epsteinify_name_url(p or UNKNOWN), p or UNKNOWN, style)),
+                Text.from_markup(link_markup(epsteinify_name_url(name or UNKNOWN), name or UNKNOWN, style)),
                 str(count),
-                str(self.email_author_counts[p]),
-                str(self.email_recipient_counts[p]),
-                '' if p is None else link_text_obj(search_jmail_url(p), JMAIL),
-                '' if not is_ok_for_epstein_web(p) else link_text_obj(epstein_media_person_url(p), EPSTEIN_MEDIA),
-                '' if not is_ok_for_epstein_web(p) else link_text_obj(epstein_web_person_url(p), EPSTEIN_WEB),
-                '' if p is None else link_text_obj(search_twitter_url(p), 'search X'),
+                str(self.email_author_counts[name]),
+                str(self.email_recipient_counts[name]),
+                emails[0].timestamp_without_seconds(),
+                emails[-1].timestamp_without_seconds(),
+                '' if name is None else link_text_obj(search_jmail_url(name), JMAIL),
+                '' if not is_ok_for_epstein_web(name) else link_text_obj(epstein_media_person_url(name), 'eMedia'),
+                '' if not is_ok_for_epstein_web(name) else link_text_obj(epstein_web_person_url(name), 'eWeb'),
+                '' if name is None else link_text_obj(search_twitter_url(name), 'search X'),
             )
         console.print(vertically_pad(counts_table, 2))

epstein_files/util/constant/names.py CHANGED Viewed

@@ -42,6 +42,7 @@ CECILE_DE_JONGH = 'Cecile de Jongh'
 CECILIA_STEEN = 'Cecilia Steen'
 CELINA_DUBIN = 'Celina Dubin'
 CHRISTINA_GALBRAITH = 'Christina Galbraith'  # Works with Tyler Shears on reputation stuff
+DANGENE_AND_JENNIE_ENTERPRISE = 'Dangene and Jennie Enterprise'
 DANIEL_SABBA = 'Daniel Sabba'
 DANIEL_SIAD = 'Daniel Siad'
 DANNY_FROST = 'Danny Frost'
@@ -143,7 +144,7 @@ REID_HOFFMAN = 'Reid Hoffman'
 REID_WEINGARTEN = 'Reid Weingarten'
 RENATA_BOLOTOVA = 'Renata Bolotova'
 RICHARD_KAHN = 'Richard Kahn'
-ROBERT_D_CRITTON = 'Robert D. Critton Jr.'
+ROBERT_D_CRITTON_JR = 'Robert D. Critton Jr.'
 ROBERT_LAWRENCE_KUHN = 'Robert Lawrence Kuhn'
 ROBERT_TRIVERS = 'Robert Trivers'
 ROGER_SCHANK = 'Roger Schank'
@@ -178,6 +179,7 @@ JARED_KUSHNER = 'Jared Kushner'
 JULIE_K_BROWN = 'Julie K. Brown'
 KARIM_SADJADPOUR = 'KARIM SADJADPOUR'.title()
 MICHAEL_J_BOCCIO = 'Michael J. Boccio'
+NERIO_ALESSANDRI = 'Nerio Alessandri (Founder and Chairman of Technogym S.p.A. Italy)'
 PAUL_G_CASSELL = 'Paul G. Cassell'
 RUDY_GIULIANI = 'Rudy Giuliani'
 TULSI_GABBARD = 'Tulsi Gabbard'
@@ -226,22 +228,23 @@ NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
 # Names to color white in the word counts
 OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
     aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
-    baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bruno bryant burton
+    baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
     chapman charles charlie christopher clint cohen colin collins conway
-    davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
+    danny davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
     edmond elizabeth emily entwistle erik evelyn
-    ferguson flachsbart francis franco frank
+    ferguson flachsbart francis franco frank frost
     gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
-    hancock harold harrison harry helen hirsch hofstadter horowitz hussein
+    hancock harold harrison harry hay helen hill hirsch hofstadter horowitz hussein
     ian isaac isaacson
-    jamie jane janet jason jen jim joe johnson jones josh julie justin
+    james jamie jane janet jason jen jim joe johnson jones josh julie justin
     karl kate kathy kelly kim kruger kyle
-    leo leonard lenny leslie lieberman louis lynch lynn
+    laurie leo leonard lenny leslie lieberman louis lynch lynn
     marcus marianne matt matthew melissa michele michelle moore moscowitz
-    nicole nussbaum
+    nancy nicole nussbaum
+    owen
     paulson philippe
     rafael ray richard richardson rob robin ron rubin rudolph ryan
-    sara sarah seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
+    sara sarah sean seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
     ted theresa thompson tiffany timothy tony
     valeria
     walter warren weinstein weiss william

epstein_files/util/constant/strings.py CHANGED Viewed

@@ -20,7 +20,7 @@ POLITICS = 'politics'
 PROPERTY = 'property'
 PUBLICIST = 'publicist'
 REPUTATION = 'reputation'
-SKYPE_LOG= 'skype log'
+SKYPE_LOG = 'Skype log'
 SOCIAL = 'social'
 SPEECH = 'speech'
@@ -39,6 +39,7 @@ MIAMI_HERALD = 'Miami Herald'
 NYT = "New York Times"
 PALM_BEACH_DAILY_NEWS = f'{PALM_BEACH} Daily News'
 PALM_BEACH_POST = f'{PALM_BEACH} Post'
+SHIMON_POST = 'The Shimon Post'
 THE_REAL_DEAL = 'The Real Deal'
 WAPO = 'WaPo'
 VI_DAILY_NEWS = f'{VIRGIN_ISLANDS} Daily News'

epstein_files/util/constant/urls.py CHANGED Viewed

@@ -13,11 +13,12 @@ ARCHIVE_LINK_COLOR = 'slate_blue3'
 TEXT_LINK = 'text_link'
 # External site names
-ExternalSite = Literal['epstein.media', 'epsteinify', 'EpsteinWeb']
+ExternalSite = Literal['epstein.media', 'epsteinify', 'EpsteinWeb', 'RollCall']
 EPSTEIN_MEDIA = 'epstein.media'
 EPSTEIN_WEB = 'EpsteinWeb'
 EPSTEINIFY = 'epsteinify'
 JMAIL = 'Jmail'
+ROLLCALL = 'RollCall'
 GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
 GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
@@ -41,9 +42,10 @@ EPSTEIN_WEB_URL = 'https://epsteinweb.org'
 JMAIL_URL = 'https://jmail.world'
 DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
-    EPSTEIN_MEDIA: f"{EPSTEIN_MEDIA_URL}/files",
-    EPSTEIN_WEB: f'{EPSTEIN_WEB_URL}/wp-content/uploads/epstein_evidence/images',
-    EPSTEINIFY: f"{EPSTEINIFY_URL}/document",
+    EPSTEIN_MEDIA: f"{EPSTEIN_MEDIA_URL}/files/",
+    EPSTEIN_WEB: f'{EPSTEIN_WEB_URL}/wp-content/uploads/epstein_evidence/images/',
+    EPSTEINIFY: f"{EPSTEINIFY_URL}/document/",
+    ROLLCALL: f'https://rollcall.com/factbase/epstein/file?id=',
 }
@@ -53,7 +55,7 @@ epsteinify_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_ma
 epsteinify_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEINIFY], file_stem)
 epsteinify_name_url = lambda name: f"{EPSTEINIFY_URL}/?name={urllib.parse.quote(name)}"
-epstein_media_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEIN_MEDIA], file_stem, True)
+epstein_media_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEIN_MEDIA], file_stem, 'lower')
 epstein_media_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEIN_MEDIA, filename_or_id, style)
 epstein_media_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_markup(epstein_media_doc_link_markup(filename_or_id, style))
 epstein_media_person_url = lambda person: f"{EPSTEIN_MEDIA_URL}/people/{parameterize(person)}"
@@ -62,16 +64,19 @@ epstein_web_doc_url = lambda file_stem: f"{DOC_LINK_BASE_URLS[EPSTEIN_WEB]}/{fil
 epstein_web_person_url = lambda person: f"{EPSTEIN_WEB_URL}/{parameterize(person)}"
 epstein_web_search_url = lambda s: f"{EPSTEIN_WEB_URL}/?ewmfileq={urllib.parse.quote(s)}&ewmfilepp=20"
+rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL], file_stem, 'title')
 search_archive_url = lambda txt: f"{COURIER_NEWSROOM_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
 search_coffeezilla_url = lambda txt: f"{COFFEEZILLA_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
 search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
 search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
-def build_doc_url(base_url: str, filename_or_id: int | str, lowercase: bool = False) -> str:
+def build_doc_url(base_url: str, filename_or_id: int | str, case: Literal['lower', 'title'] | None = None) -> str:
     file_stem = coerce_file_stem(filename_or_id)
-    file_stem = file_stem.lower() if lowercase else file_stem
-    return f"{base_url}/{file_stem}"
+    file_stem = file_stem.lower() if case == 'lower' else file_stem
+    file_stem = file_stem.title() if case == 'title' else file_stem
+    return f"{base_url}{file_stem}"
 def external_doc_link_markup(site: ExternalSite, filename_or_id: int | str, style: str = TEXT_LINK) -> str:

epstein-files 1.0.11__py3-none-any.whl → 1.0.13__py3-none-any.whl

epstein-files 1.0.11py3-none-any.whl → 1.0.13py3-none-any.whl