PyPI - epstein-files - Versions diffs - 1.0.11__py3-none-any.whl → 1.0.12__py3-none-any.whl - Mend

epstein-files 1.0.11py3-none-any.whl → 1.0.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

epstein_files/__init__.py +3 -3
epstein_files/documents/communication.py +2 -2
epstein_files/documents/document.py +32 -62
epstein_files/documents/email.py +40 -1
epstein_files/documents/imessage/text_message.py +1 -1
epstein_files/documents/json_file.py +1 -1
epstein_files/documents/messenger_log.py +1 -1
epstein_files/documents/other_file.py +2 -2
epstein_files/util/constant/names.py +9 -8
epstein_files/util/constant/strings.py +2 -1
epstein_files/util/constants.py +17 -13
epstein_files/util/data.py +1 -1
epstein_files/util/doc_cfg.py +20 -42
epstein_files/util/file_helper.py +3 -9
epstein_files/util/highlighted_group.py +13 -4
epstein_files/util/logging.py +1 -1
epstein_files/util/output.py +1 -1
{epstein_files-1.0.11.dist-info → epstein_files-1.0.12.dist-info}/METADATA +1 -1
epstein_files-1.0.12.dist-info/RECORD +33 -0
epstein_files-1.0.11.dist-info/RECORD +0 -33
{epstein_files-1.0.11.dist-info → epstein_files-1.0.12.dist-info}/LICENSE +0 -0
{epstein_files-1.0.11.dist-info → epstein_files-1.0.12.dist-info}/WHEEL +0 -0
{epstein_files-1.0.11.dist-info → epstein_files-1.0.12.dist-info}/entry_points.txt +0 -0

epstein_files/__init__.py CHANGED Viewed

@@ -20,8 +20,8 @@ from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_
 from epstein_files.util.env import args, specified_names
 from epstein_files.util.file_helper import coerce_file_path, extract_file_id
 from epstein_files.util.logging import logger
-from epstein_files.util.output import (print_emails, print_json_files, print_json_metadata, print_json_stats,
-     print_text_messages, write_urls)
+from epstein_files.util.output import (print_emails, print_json_files, print_json_stats,
+     print_text_messages, write_json_metadata, write_urls)
 from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
 from epstein_files.util.timer import Timer
 from epstein_files.util.word_count import write_word_counts_html
@@ -37,7 +37,7 @@ def generate_html() -> None:
     epstein_files = EpsteinFiles.get_files(timer)
     if args.json_metadata:
-        print_json_metadata(epstein_files)
+        write_json_metadata(epstein_files)
         exit()
     elif args.json_files:
         print_json_files(epstein_files)

epstein_files/documents/communication.py CHANGED Viewed

@@ -34,9 +34,9 @@ class Communication(Document):
     def is_attribution_uncertain(self) -> bool:
         return bool(self.config and self.config.is_attribution_uncertain)
-    def raw_document_link_txt(self, _style: str = '', include_alt_link: bool = True) -> Text:
+    def external_links(self, _style: str = '', include_alt_link: bool = True) -> Text:
         """Overrides super() method to apply self.author_style."""
-        return super().raw_document_link_txt(self.author_style, include_alt_link=include_alt_link)
+        return super().external_links(self.author_style, include_alt_link=include_alt_link)
     def summary(self) -> Text:
         return self._summary().append(CLOSE_PROPERTIES_CHAR)

epstein_files/documents/document.py CHANGED Viewed

@@ -16,8 +16,8 @@ from epstein_files.util.constant.names import *
 from epstein_files.util.constant.strings import *
 from epstein_files.util.constant.urls import *
 from epstein_files.util.constants import ALL_FILE_CONFIGS, FALLBACK_TIMESTAMP
-from epstein_files.util.data import collapse_newlines, date_str, iso_timestamp, patternize, without_falsey
-from epstein_files.util.doc_cfg import EmailCfg, DocCfg, Metadata, TextCfg
+from epstein_files.util.data import collapse_newlines, date_str, patternize, remove_time_from_timestamp_str, without_falsey
+from epstein_files.util.doc_cfg import DUPE_TYPE_STRS, EmailCfg, DocCfg, Metadata, TextCfg
 from epstein_files.util.env import DOCS_DIR, args
 from epstein_files.util.file_helper import (file_stem_for_id, extract_file_id, file_size,
      file_size_str, is_local_extract_file)
@@ -31,10 +31,8 @@ INFO_INDENT = 2
 INFO_PADDING = (0, 0, 0, INFO_INDENT)
 MAX_TOP_LINES_LEN = 4000  # Only for logging
 MIN_DOCUMENT_ID = 10477
-LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
 WHITESPACE_REGEX = re.compile(r"\s{2,}|\t|\n", re.MULTILINE)
-EXTRACTED_FROM = 'Extracted from'
 MIN_TIMESTAMP = datetime(1991, 1, 1)
 MID_TIMESTAMP = datetime(2007, 1, 1)
 MAX_TIMESTAMP = datetime(2020, 1, 1)
@@ -96,15 +94,9 @@ class Document:
     def __post_init__(self):
         self.filename = self.file_path.name
         self.file_id = extract_file_id(self.filename)
-        self.config = deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
+        self.config = self.config or deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
-        if self.is_local_extract_file():
-            self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
-            extracted_from_doc_id = self.url_slug.split('_')[-1]
-            if extracted_from_doc_id in ALL_FILE_CONFIGS:
-                self._set_extract_config(deepcopy(ALL_FILE_CONFIGS[extracted_from_doc_id]))
-        else:
+        if 'url_slug' not in vars(self):
             self.url_slug = self.file_path.stem
         self._set_computed_fields(text=self.text or self._load_file())
@@ -122,11 +114,11 @@ class Document:
     def duplicate_file_txt(self) -> Text:
         """If the file is a dupe make a nice message to explain what file it's a duplicate of."""
-        if not self.config or not self.config.dupe_of_id:
+        if not self.config or not self.config.dupe_of_id or self.config.dupe_type is None:
             raise RuntimeError(f"duplicate_file_txt() called on {self.summary()} but not a dupe! config:\n\n{self.config}")
         txt = Text(f"Not showing ", style=INFO_STYLE).append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
-        txt.append(f" because it's {self.config.duplicate_reason()} ")
+        txt.append(f" because it's {DUPE_TYPE_STRS[self.config.dupe_type]} ")
         return txt.append(epstein_media_doc_link_txt(self.config.dupe_of_id, style='royal_blue1'))
     def epsteinify_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
@@ -141,9 +133,28 @@ class Document:
         """Create a Text obj link to this document on EpsteinWeb."""
         return link_text_obj(epstein_web_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
+    def external_links(self, style: str = '', include_alt_link: bool = False) -> Text:
+        """Returns colored links to epstein.media and and epsteinweb in a Text object."""
+        txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
+        if args.use_epstein_web:
+            txt.append(self.epstein_web_link(style=style))
+            if include_alt_link:
+                txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
+                txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
+        else:
+            txt.append(self.epstein_media_link(style=style))
+            if include_alt_link:
+                txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
+                txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
+        return txt
     def file_info_panel(self) -> Group:
         """Panel with filename linking to raw file plus any additional info about the file."""
-        panel = Panel(self.raw_document_link_txt(include_alt_link=True), border_style=self._border_style(), expand=False)
+        panel = Panel(self.external_links(include_alt_link=True), border_style=self._border_style(), expand=False)
         padded_info = [Padding(sentence, INFO_PADDING) for sentence in self.info()]
         return Group(*([panel] + padded_info))
@@ -155,12 +166,10 @@ class Document:
     def info(self) -> list[Text]:
         """0 to 2 sentences containing the info_txt() as well as any configured description."""
-        sentences = [
+        return without_falsey([
             self.info_txt(),
             highlighter(Text(self.config_description(), style=INFO_STYLE)) if self.config_description() else None
-        ]
-        return without_falsey(sentences)
+        ])
     def info_txt(self) -> Text | None:
         """Secondary info about this file (recipients, level of certainty, etc). Overload in subclasses."""
@@ -197,9 +206,9 @@ class Document:
         if self.is_local_extract_file():
             metadata['extracted_file'] = {
-                'explanation': 'This file was extracted from a court filing, not distributed directly. A copy can be found on github.',
-                'extracted_from_file': self.url_slug + '.txt',
-                'extracted_file_url': extracted_file_url(self.filename),
+                'explanation': 'Manually extracted from one of the court filings.',
+                'extracted_from': self.url_slug + '.txt',
+                'url': extracted_file_url(self.filename),
             }
         return metadata
@@ -208,25 +217,6 @@ class Document:
         with open(self.file_path) as f:
             return f.read()
-    def raw_document_link_txt(self, style: str = '', include_alt_link: bool = False) -> Text:
-        """Returns colored links to epstein.media and and epsteinweb in a Text object."""
-        txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
-        if args.use_epstein_web:
-            txt.append(self.epstein_web_link(style=style))
-            if include_alt_link:
-                txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
-                txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
-        else:
-            txt.append(self.epstein_media_link(style=style))
-            if include_alt_link:
-                txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
-                txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
-        return txt
     def repair_ocr_text(self, repairs: dict[str | re.Pattern, str], text: str) -> str:
         """Apply a dict of repairs (key is pattern or string, value is replacement string) to text."""
         for k, v in repairs.items():
@@ -253,7 +243,7 @@ class Document:
         txt.append(f" {self.url_slug}", style=FILENAME_STYLE)
         if self.timestamp:
-            timestamp_str = iso_timestamp(self.timestamp).removesuffix(' 00:00:00')
+            timestamp_str = remove_time_from_timestamp_str(self.timestamp)
             txt.append(' (', style=SYMBOL_STYLE)
             txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
@@ -327,26 +317,6 @@ class Document:
         self.lines = [line.strip() if self.strip_whitespace else line for line in self.text.split('\n')]
         self.num_lines = len(self.lines)
-    def _set_extract_config(self, doc_cfg: DocCfg | EmailCfg) -> None:
-        """Copy info from original config for file this document was extracted from."""
-        if self.config:
-            self.warn(f"Merging existing config with config for file this document was extracted from")
-        else:
-            self.config = EmailCfg(id=self.file_id)
-        extracted_from_description = doc_cfg.complete_description()
-        if extracted_from_description:
-            extracted_description = f"{EXTRACTED_FROM} {extracted_from_description}"
-            if self.config.description:
-                self.warn(f"Overwriting description '{self.config.description}' with extract description '{doc_cfg.description}'")
-            self.config.description = extracted_description
-        self.config.is_interesting = self.config.is_interesting or doc_cfg.is_interesting
-        self.warn(f"Constructed local config\n{self.config}")
     def _write_clean_text(self, output_path: Path) -> None:
         """Write self.text to 'output_path'. Used only for diffing files."""
         if output_path.exists():

epstein_files/documents/email.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import logging
 import re
+from copy import deepcopy
 from dataclasses import asdict, dataclass, field
 from datetime import datetime
 from typing import ClassVar, cast
@@ -21,6 +22,7 @@ from epstein_files.util.constants import *
 from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes, extract_last_name,
      flatten, remove_timezone, uniquify)
 from epstein_files.util.doc_cfg import EmailCfg, Metadata
+from epstein_files.util.file_helper import extract_file_id, file_stem_for_id
 from epstein_files.util.highlighted_group import get_style_for_name
 from epstein_files.util.logging import logger
 from epstein_files.util.rich import *
@@ -35,9 +37,11 @@ REPLY_TEXT_REGEX = re.compile(rf"^(.*?){REPLY_LINE_PATTERN}", re.DOTALL | re.IGN
 BAD_TIMEZONE_REGEX = re.compile(fr'\((UTC|GMT\+\d\d:\d\d)\)|{REDACTED}')
 DATE_HEADER_REGEX = re.compile(r'(?:Date|Sent):? +(?!by|from|to|via)([^\n]{6,})\n')
 TIMESTAMP_LINE_REGEX = re.compile(r"\d+:\d+")
+LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
 SUPPRESS_LOGS_FOR_AUTHORS = ['Undisclosed recipients:', 'undisclosed-recipients:', 'Multiple Senders Multiple Senders']
 REWRITTEN_HEADER_MSG = "(janky OCR header fields were prettified, check source if something seems off)"
+APPEARS_IN = 'Appears in'
 MAX_CHARS_TO_PRINT = 4000
 MAX_NUM_HEADER_LINES = 14
 MAX_QUOTED_REPLIES = 2
@@ -248,6 +252,7 @@ KRASSNER_RECIPIENTS = uniquify(flatten([ALL_FILE_CONFIGS[id].recipients for id i
 # No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
 USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIPIENTS + [
+    'Alan Dlugash',                            # CCed with Richard Kahn
     'Alan Rogers',                           # Random CC
     'Andrew Friendly',                       # Presumably some relation of Kelly Friendly
     'BS Stern',                              # A random fwd of email we have
@@ -264,6 +269,8 @@ USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIP
     'Lyn Fontanilla',                        # Random CC
     'Mark Albert',                           # Random CC
     'Matthew Schafer',                       # Random CC
+    MICHAEL_BUCHHOLTZ,                       # Terry Kafka CC
+    'Nancy Dahl',                            # covered by Lawrence Krauss (her husband)
     'Michael Simmons',                       # Random CC
     'Nancy Portland',                        # Lawrence Krauss CC
     'Oliver Goodenough',                     # Robert Trivers CC
@@ -318,6 +325,17 @@ class Email(Communication):
     rewritten_header_ids: ClassVar[set[str]] = set([])
     def __post_init__(self):
+        self.filename = self.file_path.name
+        self.file_id = extract_file_id(self.filename)
+        # Special handling for copying properties out of the config for the document this one was extracted from
+        if self.is_local_extract_file():
+            self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
+            extracted_from_doc_id = self.url_slug.split('_')[-1]
+            if extracted_from_doc_id in ALL_FILE_CONFIGS:
+                self._set_config_for_extracted_file(ALL_FILE_CONFIGS[extracted_from_doc_id])
         super().__post_init__()
         try:
@@ -570,7 +588,7 @@ class Email(Communication):
             self._merge_lines(3)  # Merge 4th and 5th rows
         elif self.file_id in '026609 029402 032405 022695'.split():
             self._merge_lines(4)  # Merge 5th and 6th rows
-        elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381']:
+        elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381', '033357']:
             self._merge_lines(2, 4)
         elif self.file_id in ['029154', '029163']:
             self._merge_lines(2, 5)
@@ -649,6 +667,27 @@ class Email(Communication):
             sent_from = sent_from_match.group(0)
             return 'S' + sent_from[1:] if sent_from.startswith('sent') else sent_from
+    def _set_config_for_extracted_file(self, extracted_from_doc_cfg: DocCfg) -> None:
+        """Copy info from original config for file this document was extracted from."""
+        if self.file_id in ALL_FILE_CONFIGS:
+            self.config = cast(EmailCfg, deepcopy(ALL_FILE_CONFIGS[self.file_id]))
+            self.warn(f"Merging existing config for {self.file_id} with config for file this document was extracted from")
+        else:
+            self.config = EmailCfg(id=self.file_id)
+        extracted_from_description = extracted_from_doc_cfg.complete_description()
+        if extracted_from_description:
+            extracted_description = f"{APPEARS_IN} {extracted_from_description}"
+            if self.config.description:
+                self.warn(f"Overwriting description '{self.config.description}' with extract description '{self.config.description}'")
+            self.config.description = extracted_description
+        self.config.is_interesting = self.config.is_interesting or extracted_from_doc_cfg.is_interesting
+        self.warn(f"Constructed synthetic config: {self.config}")
     def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
         logger.debug(f"Printing '{self.filename}'...")
         yield self.file_info_panel()

epstein_files/documents/imessage/text_message.py CHANGED Viewed

@@ -45,7 +45,7 @@ class TextMessage:
             self.author_str = self.author_str or self.author
         if not self.id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
-            self.author_str = self.author + ' (?)'
+            self.author_str += ' (?)'
     def timestamp(self) -> datetime:
         return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)

epstein_files/documents/json_file.py CHANGED Viewed

@@ -39,7 +39,7 @@ class JsonFile(OtherFile):
         return JSON
     def info_txt(self) -> Text | None:
-        return Text(f"JSON file, seems to contain link unfurl/embed data for iMessage or similar", style=INFO_STYLE)
+        return Text(f"JSON file, contains preview data for links sent a messaging app", style=INFO_STYLE)
     def is_interesting(self):
         return False

epstein_files/documents/messenger_log.py CHANGED Viewed

@@ -76,7 +76,7 @@ class MessengerLog(Communication):
         is_phone_number = author_str.startswith('+')
         if is_phone_number:
-            logger.warning(f"{self.summary()} Found phone number: {author_str}")
+            logger.info(f"{self.summary()} Found phone number: {author_str}")
             self.phone_number = author_str
         # If the Sender: is redacted or if it's an unredacted phone number that means it's from self.author

epstein_files/documents/other_file.py CHANGED Viewed

@@ -107,7 +107,7 @@ UNINTERESTING_PREFIXES = FINANCIAL_REPORTS_AUTHORS + [
     TEXT_OF_US_LAW,
     TRANSLATION,
     TWEET,
-    THE_REAL_DEAL_ARTICLE,
+    REAL_DEAL_ARTICLE,
     TRUMP_DISCLOSURES,
     UBS_CIO_REPORT,
     UN_GENERAL_ASSEMBLY,
@@ -240,7 +240,7 @@ class OtherFile(Document):
         table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
         for file in files:
-            link_and_info = [file.raw_document_link_txt()]
+            link_and_info = [file.external_links()]
             date_str = file.date_str()
             if file.is_duplicate():

epstein_files/util/constant/names.py CHANGED Viewed

@@ -143,7 +143,7 @@ REID_HOFFMAN = 'Reid Hoffman'
 REID_WEINGARTEN = 'Reid Weingarten'
 RENATA_BOLOTOVA = 'Renata Bolotova'
 RICHARD_KAHN = 'Richard Kahn'
-ROBERT_D_CRITTON = 'Robert D. Critton Jr.'
+ROBERT_D_CRITTON_JR = 'Robert D. Critton Jr.'
 ROBERT_LAWRENCE_KUHN = 'Robert Lawrence Kuhn'
 ROBERT_TRIVERS = 'Robert Trivers'
 ROGER_SCHANK = 'Roger Schank'
@@ -178,6 +178,7 @@ JARED_KUSHNER = 'Jared Kushner'
 JULIE_K_BROWN = 'Julie K. Brown'
 KARIM_SADJADPOUR = 'KARIM SADJADPOUR'.title()
 MICHAEL_J_BOCCIO = 'Michael J. Boccio'
+NERIO_ALESSANDRI = 'Nerio Alessandri (Founder and Chairman of Technogym S.p.A. Italy)'
 PAUL_G_CASSELL = 'Paul G. Cassell'
 RUDY_GIULIANI = 'Rudy Giuliani'
 TULSI_GABBARD = 'Tulsi Gabbard'
@@ -226,22 +227,22 @@ NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
 # Names to color white in the word counts
 OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
     aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
-    baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bruno bryant burton
+    baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
     chapman charles charlie christopher clint cohen colin collins conway
-    davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
+    danny davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
     edmond elizabeth emily entwistle erik evelyn
-    ferguson flachsbart francis franco frank
+    ferguson flachsbart francis franco frank frost
     gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
-    hancock harold harrison harry helen hirsch hofstadter horowitz hussein
+    hancock harold harrison harry hay helen hirsch hofstadter horowitz hussein
     ian isaac isaacson
     jamie jane janet jason jen jim joe johnson jones josh julie justin
     karl kate kathy kelly kim kruger kyle
-    leo leonard lenny leslie lieberman louis lynch lynn
+    laurie leo leonard lenny leslie lieberman louis lynch lynn
     marcus marianne matt matthew melissa michele michelle moore moscowitz
-    nicole nussbaum
+    nancy nicole nussbaum
     paulson philippe
     rafael ray richard richardson rob robin ron rubin rudolph ryan
-    sara sarah seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
+    sara sarah sean seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
     ted theresa thompson tiffany timothy tony
     valeria
     walter warren weinstein weiss william

epstein_files/util/constant/strings.py CHANGED Viewed

@@ -20,7 +20,7 @@ POLITICS = 'politics'
 PROPERTY = 'property'
 PUBLICIST = 'publicist'
 REPUTATION = 'reputation'
-SKYPE_LOG= 'skype log'
+SKYPE_LOG = 'Skype log'
 SOCIAL = 'social'
 SPEECH = 'speech'
@@ -39,6 +39,7 @@ MIAMI_HERALD = 'Miami Herald'
 NYT = "New York Times"
 PALM_BEACH_DAILY_NEWS = f'{PALM_BEACH} Daily News'
 PALM_BEACH_POST = f'{PALM_BEACH} Post'
+SHIMON_POST = 'The Shimon Post'
 THE_REAL_DEAL = 'The Real Deal'
 WAPO = 'WaPo'
 VI_DAILY_NEWS = f'{VIRGIN_ISLANDS} Daily News'

epstein_files/util/constants.py CHANGED Viewed

@@ -66,7 +66,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
     BRAD_EDWARDS:  re.compile(r'Brad(ley)?(\s*J(.?|ames))?\s*Edwards', re.IGNORECASE),
     BRAD_KARP: re.compile(r'Brad (S.? )?Karp|Karp, Brad', re.IGNORECASE),
     'Dangene and Jennie Enterprise': re.compile(r'Dangene and Jennie Enterprise?', re.IGNORECASE),
-    DANNY_FROST: re.compile(r'Frost, Danny|frostd@dany.nyc.gov', re.IGNORECASE),
+    DANNY_FROST: re.compile(r'Frost, Danny|frostd@dany.nyc.gov|Danny\s*Frost', re.IGNORECASE),
     DARREN_INDYKE: re.compile(r'darren$|Darren\s*(K\.?\s*)?[il]n[dq]_?yke?|dkiesq', re.IGNORECASE),
     DAVID_FISZEL: re.compile(r'David\s*Fis?zel', re.IGNORECASE),
     DAVID_HAIG: re.compile(fr'{DAVID_HAIG}|Haig, David', re.IGNORECASE),
@@ -128,7 +128,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
     PRINCE_ANDREW: re.compile(r'Prince Andrew|The Duke', re.IGNORECASE),
     REID_WEINGARTEN: re.compile(r'Weingarten, Rei[cdi]|Rei[cdi] Weingarten', re.IGNORECASE),
     RICHARD_KAHN: re.compile(r'rich(ard)? kahn?', re.IGNORECASE),
-    ROBERT_D_CRITTON: re.compile(r'Robert D.? Critton Jr.?', re.IGNORECASE),
+    ROBERT_D_CRITTON_JR: re.compile(r'Robert D.? Critton Jr.?', re.IGNORECASE),
     ROBERT_LAWRENCE_KUHN: re.compile(r'Robert\s*(Lawrence)?\s*Kuhn', re.IGNORECASE),
     ROBERT_TRIVERS: re.compile(r'tri[vy]ersr@gmail|Robert\s*Trivers?', re.IGNORECASE),
     ROSS_GOW: re.compile(fr"{ROSS_GOW}|ross@acuityreputation.com", re.IGNORECASE),
@@ -163,6 +163,7 @@ EMAILERS = [
     DEEPAK_CHOPRA,
     GLENN_DUBIN,
     GORDON_GETTY,
+    'Kevin Bright',
     'Jack Lang',
     JACK_SCAROLA,
     JAY_LEFKOWITZ,
@@ -257,7 +258,6 @@ JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
 LAWRENCE_KRAUSS_ASU_ORIGINS = f"{LAWRENCE_KRAUSS}'s ASU Origins Project"
 KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
 MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
-NERIO_ALESSANDRI = 'Nerio Alessandri (Founder and Chairman of Technogym S.p.A. Italy)'
 NIGHT_FLIGHT_BOOK = f'"Night Flight" (draft)'
 NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
 OBAMA_JOKE = 'joke about Obama'
@@ -265,12 +265,11 @@ PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} Code Enforcement'
 PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
 PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
 PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
-SHIMON_POST = 'The Shimon Post'
+REAL_DEAL_ARTICLE = 'article by Keith Larsen'
 SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
 SINGLE_PAGE = 'single page of'
 STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
 SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
-THE_REAL_DEAL_ARTICLE = 'article by Keith Larsen'
 TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
 UBS_CIO_REPORT = 'CIO Monthly Extended report'
 UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
@@ -500,7 +499,7 @@ EMAILS_CONFIG = [
     EmailCfg(
         id='029977',
         author=LAWRANCE_VISOSKI,
-        recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE,
+        recipients=cast(list[str | None], [JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE),
         attribution_reason=LARRY_REASON,
         duplicate_ids=['031129'],
     ),
@@ -508,7 +507,7 @@ EMAILS_CONFIG = [
     EmailCfg(id='033488', author=LAWRANCE_VISOSKI, duplicate_ids=['033154']),
     EmailCfg(id='033309', author=LINDA_STONE, attribution_reason='"Co-authored with iPhone autocorrect"'),
     EmailCfg(id='017581', author='Lisa Randall', attribution_reason='reply header'),
-    EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd'),
+    EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd, Mark Green is in signature'),
     EmailCfg(id='030472', author=MARTIN_WEINBERG, attribution_reason='Maybe. in reply', is_attribution_uncertain=True),
     EmailCfg(id='030235', author=MELANIE_WALKER, attribution_reason='In fwd'),
     EmailCfg(id='032343', author=MELANIE_WALKER, attribution_reason='Name seen in later reply 032346'),
@@ -573,7 +572,7 @@ EMAILS_CONFIG = [
         attribution_reason='ends with "Respectfully, terry"',
         author=TERRY_KAFKA,
         fwded_text_after='From: Mike Cohen',
-        recipients=[JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS,
+        recipients=cast(list[str | None], [JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS),
         duplicate_ids=['028482'],
     ),
     EmailCfg(id='029992', author=TERRY_KAFKA, attribution_reason='Quoted reply'),
@@ -665,6 +664,10 @@ EMAILS_CONFIG = [
     EmailCfg(id='029849', is_fwded_article=True, duplicate_ids=['033482']),  # Fareed Zakaria: Trump sells America short),
     EmailCfg(id='032023', is_fwded_article=True, duplicate_ids=['032012']),  # American-Israeli Cooperative Enterprise Newsletter
     EmailCfg(id='021758', is_fwded_article=True, duplicate_ids=['030616']),  # Radar Online article about Epstein's early prison release
+    EmailCfg(id='031774', is_fwded_article=True),  # Krassner fwd of Palmer Report article
+    EmailCfg(id='033345', is_fwded_article=True),  # Krassner fwd of Palmer Report article
+    EmailCfg(id='029903', is_fwded_article=True),  # Krassner fwd of Ann Coulter article about Epstein
+    EmailCfg(id='030266', is_fwded_article=True),  # Krassner fwd of article about Dershowitz
     EmailCfg(id='030868', is_fwded_article=True),  # 'He doesn't like this sh*t': Trump reportedly hates his job and his staff after 1 month
     EmailCfg(id='026755', is_fwded_article=True),  # HuffPo
     EmailCfg(id='016218', is_fwded_article=True),  # AT&T confirms it paid Trump lawyer Cohen for insights on Trump
@@ -710,6 +713,7 @@ EMAILS_CONFIG = [
     EmailCfg(id='033311', is_fwded_article=True),  # 2016 election polls
     EmailCfg(id='026580', is_fwded_article=True),  # NPR: Antigua: Land Of Sun, Sand, And Super Cheap
     EmailCfg(id='031340', is_fwded_article=True),  # Article about Alex Jones threatening Robert Mueller
+    EmailCfg(id='030209', is_fwded_article=True),  # Atlantic Council  Syria: Blackberry Diplomacy
     EmailCfg(id='033297', is_fwded_article=True, duplicate_ids=['033586']),  # Sultan Sulayem fwding article about Trump and Russia
     EmailCfg(id='032475', timestamp=parse('2017-02-15 13:31:25')),
     EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
@@ -852,9 +856,9 @@ EMAILS_CONFIG = [
     EmailCfg(id='030015', fwded_text_after='Bill Clinton reportedly'),
     EmailCfg(id='026312', fwded_text_after='Steve Bannon trying to get on disgraced'),
     EmailCfg(id='031742', fwded_text_after="Trump's former campaign manager Paul Manafort"),
-    EmailCfg(id='012197_4', fwded_text_after="Thanks -- Jay"),
     EmailCfg(id='028925', fwded_text_after='> on Jan 4, 2015'),
     EmailCfg(id='029773', fwded_text_after='Omar Quadhafi', duplicate_ids=['012685']),
+    EmailCfg(id='012197_4', fwded_text_after="Thanks -- Jay"),
 ]
@@ -1335,8 +1339,8 @@ OTHER_FILES_PROPERTY = [
     DocCfg(id='016554', author=PALM_BEACH_CODE_ENFORCEMENT, description='board minutes', date='2008-07-17', duplicate_ids=['016616', '016574']),
     DocCfg(id='016636', author=PALM_BEACH_WATER_COMMITTEE, description=f"Meeting on January 29, 2009"),
     DocCfg(id='022417', author='Park Partners NYC', description=f"letter to partners in real estate project with architectural plans"),
-    DocCfg(id='027068', author=THE_REAL_DEAL, description=f"{THE_REAL_DEAL_ARTICLE} Palm House Hotel Bankruptcy and EB-5 Visa Fraud Allegations"),
-    DocCfg(id='029520', author=THE_REAL_DEAL, description=f"{THE_REAL_DEAL_ARTICLE} 'Lost Paradise at the Palm House'", date='2019-06-17'),
+    DocCfg(id='027068', author=THE_REAL_DEAL, description=f"{REAL_DEAL_ARTICLE} Palm House Hotel Bankruptcy and EB-5 Visa Fraud Allegations"),
+    DocCfg(id='029520', author=THE_REAL_DEAL, description=f"{REAL_DEAL_ARTICLE} 'Lost Paradise at the Palm House'", date='2019-06-17'),
     DocCfg(id='016597', author='Trump Properties LLC', description=f'appeal of some decision about Mar-a-Lago by {PALM_BEACH} authorities'),
     DocCfg(id='018743', description=f"Las Vegas property listing"),
     DocCfg(id='016695', description=f"{PALM_BEACH} property info (?)"),
@@ -1497,13 +1501,13 @@ OTHER_FILES_MISC = [
     DocCfg(id='032206', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
     DocCfg(id='032208', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
     DocCfg(id='032209', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
+    DocCfg(id='032210', category=SKYPE_LOG, author='linkspirit', is_interesting=True),
     DocCfg(
         id='018224',
         category=SKYPE_LOG,
-        description=f'Skype conversations with linkspirit (French?) and {LAWRENCE_KRAUSS}',
+        author=f'linkspirit (French?) and {LAWRENCE_KRAUSS}',
         is_interesting=True,  # we don't know who linkspirit is yet
     ),
-    DocCfg(id='032210', category=SKYPE_LOG, description=f'Skype conversation with linkspirit', is_interesting=True),
     DocCfg(
         id='025147',
         author=BROCKMAN_INC,

epstein_files/util/data.py CHANGED Viewed

@@ -21,12 +21,12 @@ ALL_NAMES = [v for k, v in vars(names).items() if isinstance(v, str) and CONSTAN
 PACIFIC_TZ = tz.gettz("America/Los_Angeles")
 TIMEZONE_INFO = {"PDT": PACIFIC_TZ, "PST": PACIFIC_TZ}  # Suppresses annoying warnings from parse() calls
 collapse_newlines = lambda text: MULTINEWLINE_REGEX.sub('\n\n', text)
 date_str = lambda dt: dt.isoformat()[0:10] if dt else None
 escape_double_quotes = lambda text: text.replace('"', r'\"')
 escape_single_quotes = lambda text: text.replace("'", r"\'")
 iso_timestamp = lambda dt: dt.isoformat().replace('T', ' ')
+remove_time_from_timestamp_str = lambda dt: dt.isoformat().removesuffix('T00:00:00')
 uniquify = lambda _list: list(set(_list))
 without_falsey = lambda _list: [e for e in _list if e]

epstein_files/util/doc_cfg.py CHANGED Viewed

@@ -8,7 +8,7 @@ from dateutil.parser import parse
 from epstein_files.util.constant.names import *
 from epstein_files.util.constant.strings import *
-from epstein_files.util.data import without_falsey
+from epstein_files.util.data import remove_time_from_timestamp_str, without_falsey
 DuplicateType = Literal['earlier', 'quoted', 'redacted', 'same']
 Metadata = dict[str, bool | datetime | int | str | list[str | None] |dict[str, bool | str]]
@@ -47,12 +47,11 @@ FINANCIAL_REPORTS_AUTHORS = [
 ]
 # Fields like timestamp and author are better added from the Document object
-INVALID_FOR_METADATA = [
+NON_METADATA_FIELDS = [
     'actual_text',
     'date',
     'id',
-    'timestamp',
-    'was_generated',
+    'is_synthetic',
 ]
@@ -68,10 +67,10 @@ class DocCfg:
         date (str | None): If passed will be immediated parsed into the 'timestamp' field
         dupe_of_id (str | None): If this is a dupe the ID of the duplicated file. This file will be suppressed
         dupe_type (DuplicateType | None): The type of duplicate this file is or its 'duplicate_ids' are
-        duplicate_ids (list[str]): Inverse of 'dupe_of_id' - this file will NOT be suppressed but 'duplicate_ids' will be
+        duplicate_ids (list[str]): IDs of *other* documents that are dupes of this document
         is_interesting (bool): Override other considerations and always consider this file interesting
         timestamp (datetime | None): Time this email was sent, file was created, article published, etc.
-        was_generated (bool): True if this object was generated by the duplicate_cfgs() method
+        is_synthetic (bool): True if this config was generated by the duplicate_cfgs() method
     """
     id: str
     author: str | None = None
@@ -82,8 +81,8 @@ class DocCfg:
     dupe_type: DuplicateType | None = None
     duplicate_ids: list[str] = field(default_factory=list)
     is_interesting: bool = False
+    is_synthetic: bool = False
     timestamp: datetime | None = None
-    was_generated: bool = False
     def __post_init__(self):
         if self.date:
@@ -94,13 +93,17 @@ class DocCfg:
     def complete_description(self) -> str | None:
         """String that summarizes what is known about this document."""
-        if self.category and not self.description:
+        if self.category and not self.description and not self.author:
             return self.category
         elif self.category == REPUTATION:
             return f"{REPUTATION_MGMT}: {self.description}"
+        elif self.category == SKYPE_LOG:
+            msg = f"{self.category} of conversation with {self.author}" if self.author else self.category
+            return f"{msg} {self.description}" if self.description else msg
         elif self.author and self.description:
             if self.category in [ACADEMIA, BOOK]:
-                return self.title_by_author()
+                title = self.description if '"' in self.description else f"'{self.description}'"
+                return f"{title} by {self.author}"
             elif self.category == FINANCE and self.author in FINANCIAL_REPORTS_AUTHORS:
                 return f"{self.author} report: '{self.description}'"
             elif self.category == LEGAL and 'v.' in self.author:
@@ -111,10 +114,6 @@ class DocCfg:
         pieces = without_falsey([self.author, self.description])
         return ' '.join(pieces) if pieces else None
-    def duplicate_reason(self) -> str | None:
-        if self.dupe_type is not None:
-            return DUPE_TYPE_STRS[self.dupe_type]
     def duplicate_cfgs(self) -> Generator['DocCfg', None, None]:
         """Create synthetic DocCfg objects that set the 'dupe_of_id' field to point back to this object."""
         for id in self.duplicate_ids:
@@ -123,35 +122,17 @@ class DocCfg:
             dupe_cfg.dupe_of_id = self.id
             dupe_cfg.duplicate_ids = []
             dupe_cfg.dupe_type = self.dupe_type
-            dupe_cfg.was_generated = True
+            dupe_cfg.is_synthetic = True
             yield dupe_cfg
     def metadata(self) -> Metadata:
-        non_null_fields = {k: v for k, v in asdict(self).items() if v and k not in INVALID_FOR_METADATA}
-        if self.category in [EMAIL, TEXT_MESSAGE]:
-            del non_null_fields['category']
-        return non_null_fields
-    def non_null_field_names(self) -> list[str]:
-        return [f.name for f in self.sorted_fields() if getattr(self, f.name)]
-    def sorted_fields(self) -> list[Field]:
-        return sorted(fields(self), key=lambda f: FIELD_SORT_KEY.get(f.name, f.name))
-    def title_by_author(self) -> str:
-        if not (self.author and self.description):
-            raise RuntimeError(f"Can't call title_by_author() without author and description!")
-        title = self.description if '"' in self.description else f"'{self.description}'"
-        return f"{title} by {self.author}"
+        return {k: v for k, v in asdict(self).items() if k not in NON_METADATA_FIELDS and v}
     def _props_strs(self) -> list[str]:
         props = []
         add_prop = lambda f, value: props.append(f"{f.name}={value}")
-        for _field in self.sorted_fields():
+        for _field in sorted(fields(self), key=lambda f: FIELD_SORT_KEY.get(f.name, f.name)):
             value = getattr(self, _field.name)
             if value is None or value is False or (isinstance(value, list) and len(value) == 0):
@@ -160,13 +141,13 @@ class DocCfg:
                 add_prop(_field, constantize_name(str(value)) if CONSTANTIZE_NAMES else f"'{value}'")
             elif _field.name == 'category' and value in [EMAIL, TEXT_MESSAGE]:
                 continue
-            elif _field.name == 'recipients' and isinstance(value, list):
+            elif _field.name == 'recipients' and value:
                 recipients_str = str([constantize_name(r) if (CONSTANTIZE_NAMES and r) else r for r in value])
                 add_prop(_field, recipients_str.replace("'", '') if CONSTANTIZE_NAMES else recipients_str)
             elif _field.name == 'timestamp' and self.date is not None:
                 continue  # Don't print both timestamp and date
             elif isinstance(value, datetime):
-                value_str = re.sub(' 00:00:00', '', str(value))
+                value_str = remove_time_from_timestamp_str(value)
                 add_prop(_field, f"parse('{value_str}')" if CONSTANTIZE_NAMES else f"'{value}'")
             elif isinstance(value, str):
                 if "'" in value:
@@ -221,18 +202,15 @@ class EmailCfg(CommunicationCfg):
     """
     Attributes:
         actual_text (str | None): In dire cases of broken OCR we just configure the body of the email as a string.
+        fwded_text_after (str | None): If set, any text after this is a fwd of an article or similar
         is_fwded_article (bool): True if this is a newspaper article someone fwded. Used to exclude articles from word counting.
         recipients (list[str | None]): Who received the email
     """
-    actual_text: str | None = None  # Override for the Email._actual_text() method for particularly broken emails
-    fwded_text_after: str | None = None  # If set, any text after this is a fwd of an article or similar
+    actual_text: str | None = None
+    fwded_text_after: str | None = None
     is_fwded_article: bool = False
     recipients: list[str | None] = field(default_factory=list)
-    @classmethod
-    def from_doc_cfg(cls, cfg: DocCfg) -> 'EmailCfg':
-        return cls(**asdict(cfg))
     # This is necessary because for some dumb reason @dataclass(repr=False) doesn't cut it
     def __repr__(self) -> str:
         return super().__repr__()

epstein_files/util/file_helper.py CHANGED Viewed

@@ -11,8 +11,10 @@ FILENAME_LENGTH = len(HOUSE_OVERSIGHT_PREFIX) + 6
 KB = 1024
 MB = KB * KB
+file_size = lambda file_path: Path(file_path).stat().st_size
+file_size_str = lambda file_path: file_size_to_str(file_size(file_path))
-# Coerce methods hands both string and int arguments.
+# Coerce methods handle both string and int arguments.
 coerce_file_name = lambda filename_or_id: coerce_file_stem(filename_or_id) + '.txt'
 coerce_file_path = lambda filename_or_id: DOCS_DIR.joinpath(coerce_file_name(filename_or_id))
 id_str = lambda id: f"{int(id):06d}"
@@ -44,14 +46,6 @@ def extract_file_id(filename_or_id: int | str | Path) -> str:
     return file_match.group(1)
-def file_size(file_path: str | Path) -> int:
-    return Path(file_path).stat().st_size
-def file_size_str(file_path: str | Path) -> str:
-    return file_size_to_str(file_size(file_path))
 def file_size_to_str(size: int) -> str:
     digits = 2

epstein_files/util/highlighted_group.py CHANGED Viewed

@@ -223,6 +223,7 @@ HIGHLIGHTED_NAMES = [
             'Linda Pinto': 'interior design at Alberto Pinto Cabinet',
             MERWIN_DELA_CRUZ: None,  # HOUSE_OVERSIGHT_032652 Groff says "Jojo and Merwin both requested off Nov. 25 and 26"
             NADIA_MARCINKO: 'pilot',
+            'Sean J. Lancaster': 'airplane reseller',
         }
     ),
     HighlightedNames(
@@ -260,6 +261,8 @@ HIGHLIGHTED_NAMES = [
             MARTIN_WEINBERG: CRIMINAL_DEFENSE_ATTORNEY,
             MICHAEL_MILLER: 'Steptoe LLP partner',
             REID_WEINGARTEN: 'Steptoe LLP partner',
+            ROBERT_D_CRITTON_JR: 'criminal defense attorney',
+            'Robert Gold': None,
             'Roy Black': CRIMINAL_DEFENSE_2008,
             SCOTT_J_LINK: None,
             TONJA_HADDAD_COLEMAN: f'{EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY}, maybe daughter of Fred Haddad?',
@@ -310,15 +313,17 @@ HIGHLIGHTED_NAMES = [
         }
     ),
     HighlightedNames(
-        label='finance',
+        label=FINANCE,
         style='green',
         pattern=r'Apollo|Ari\s*Glass|Bank|(Bernie\s*)?Madoff|Black(rock|stone)|B\s*of\s*A|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|Mizrahi\s*Bank|MLPF&S|((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
         emailers={
             AMANDA_ENS: 'Citigroup',
+            BRAD_WECHSLER: f"head of {LEON_BLACK}'s personal investment vehicle according to FT",
             DANIEL_SABBA: 'UBS Investment Bank',
             DAVID_FISZEL: 'CIO Honeycomb Asset Management',
             JES_STALEY: 'former CEO of Barclays',
             JIDE_ZEITLIN: 'former partner at Goldman Sachs, allegations of sexual misconduct',
+            'Laurie Cameron': 'currency trading',
             LEON_BLACK: 'Apollo CEO',
             MARC_LEON: 'Luxury Properties Sari Morrocco',
             MELANIE_SPINELLA: f'representative of {LEON_BLACK}',
@@ -378,7 +383,7 @@ HIGHLIGHTED_NAMES = [
     HighlightedNames(
         label=JOURNALIST,
         style='bright_yellow',
-        pattern=r'Palm\s*Beach\s*(Daily\s*News|Post)|ABC(\s*News)?|Alex\s*Yablon|(Andrew\s*)?Marra|Arianna(\s*Huffington)?|(Arthur\s*)?Kretchmer|BBC|Bloomberg|Breitbart|Charlie\s*Rose|China\s*Daily|CNBC|CNN(politics?)?|Con[cs]hita|Sarnoff|(?<!Virgin[-\s]Islands[-\s])Daily\s*(Beast|Mail|News|Telegraph)|(David\s*)?Pecker|David\s*Brooks|Ed\s*Krassenstein|(Emily\s*)?Michot|Ezra\s*Klein|(George\s*)?Stephanopoulus|Globe\s*and\s*Mail|Good\s*Morning\s*America|Graydon(\s*Carter)?|Huffington(\s*Post)?|Ingram, David|(James\s*)?Patterson|Jonathan\s*Karl|Julie\s*(K.?\s*)?Brown|(Katie\s*)?Couric|Keith\s*Larsen|L\.?A\.?\s*Times|Miami\s*Herald|(Michele\s*)?Dargan|(National\s*)?Enquirer|(The\s*)?N(ew\s*)?Y(ork\s*)?(P(ost)?|T(imes)?)|(The\s*)?New\s*Yorker|NYer|PERVERSION\s*OF\s*JUSTICE|Politico|Pro\s*Publica|(Sean\s*)?Hannity|Sulzberger|SunSentinel|Susan Edelman|(Uma\s*)?Sanghvi|(The\s*)?Wa(shington\s*)?Po(st)?|Viceland|Vick[iy]\s*Ward|Vox|WGBH|(The\s*)?Wall\s*Street\s*Journal|WSJ|[-\w.]+@(bbc|independent|mailonline|mirror|thetimes)\.co\.uk',
+        pattern=r'Palm\s*Beach\s*(Daily\s*News|Post)|ABC(\s*News)?|Alex\s*Yablon|(Andrew\s*)?Marra|Arianna(\s*Huffington)?|(Arthur\s*)?Kretchmer|BBC|Bloomberg|Breitbart|Charlie\s*Rose|China\s*Daily|CNBC|CNN(politics?)?|Con[cs]hita|Sarnoff|(?<!Virgin[-\s]Islands[-\s])Daily\s*(Beast|Mail|News|Telegraph)|(David\s*)?Pecker|David\s*Brooks|Ed\s*Krassenstein|(Emily\s*)?Michot|Ezra\s*Klein|(George\s*)?Stephanopoulus|Globe\s*and\s*Mail|Good\s*Morning\s*America|Graydon(\s*Carter)?|Huffington(\s*Post)?|Ingram, David|(James\s*)?(Hill|Patterson)|Jonathan\s*Karl|Julie\s*(K.?\s*)?Brown|(Katie\s*)?Couric|Keith\s*Larsen|L\.?A\.?\s*Times|Miami\s*Herald|(Michele\s*)?Dargan|(National\s*)?Enquirer|(The\s*)?N(ew\s*)?Y(ork\s*)?(P(ost)?|T(imes)?)|(The\s*)?New\s*Yorker|NYer|PERVERSION\s*OF\s*JUSTICE|Politico|Pro\s*Publica|(Sean\s*)?Hannity|Sulzberger|SunSentinel|Susan Edelman|(Uma\s*)?Sanghvi|(The\s*)?Wa(shington\s*)?Po(st)?|Viceland|Vick[iy]\s*Ward|Vox|WGBH|(The\s*)?Wall\s*Street\s*Journal|WSJ|[-\w.]+@(bbc|independent|mailonline|mirror|thetimes)\.co\.uk',
         emailers = {
             EDWARD_JAY_EPSTEIN: 'reporter who wrote about the kinds of crimes Epstein was involved in, no relation to Jeffrey',
             'James Hill': 'ABC News',
@@ -458,6 +463,7 @@ HIGHLIGHTED_NAMES = [
             IAN_OSBORNE: f"{OSBORNE_LLP} reputation repairer possibly hired by Epstein ca. 2011-06",
             MICHAEL_SITRICK: 'crisis PR',
             PEGGY_SIEGAL: 'socialite',
+            'R. Couri Hay': None,
             ROSS_GOW: 'Acuity Reputation Management',
             TYLER_SHEARS: f"{REPUTATION_MGMT}, worked on Epstein's Google search results with {CHRISTINA_GALBRAITH}",
         }
@@ -485,6 +491,7 @@ HIGHLIGHTED_NAMES = [
         style='red bold',
         pattern=r'Alfa\s*Bank|Anya\s*Rasulova|Chernobyl|Day\s+One\s+Ventures|(Dmitry\s)?(Kiselyov|(Lana\s*)?Pozhidaeva|Medvedev|Rybolo(o?l?ev|vlev))|Dmitry|FSB|GRU|KGB|Kislyak|Kremlin|Kuznetsova|Lavrov|Lukoil|Moscow|(Oleg\s*)?Deripaska|Oleksandr Vilkul|Rosneft|RT|St.?\s*?Petersburg|Russian?|Sberbank|Soviet(\s*Union)?|USSR|Vladimir|(Vladimir\s*)?(Putin|Yudashkin)|Women\s*Empowerment|Xitrans',
         emailers = {
+            'Dasha Zhukova': 'art collector, daughter of Alexander Zhukov',
             MASHA_DROKOVA: 'silicon valley VC, former Putin Youth',
             RENATA_BOLOTOVA: 'former aspiring model, now fund manager at New York State Insurance Fund',
             SVETLANA_POZHIDAEVA: f'Epstein\'s Russian assistant who was recommended for a visa by Sergei Belyakov (FSB) and {DAVID_BLAINE}',
@@ -493,14 +500,16 @@ HIGHLIGHTED_NAMES = [
     HighlightedNames(
         label=ACADEMIA,
         style='light_goldenrod2',
-        pattern=r'Alain Forget|Brotherton|Carl\s*Sagan|Columbia|David Grosof|J(ames|im)\s*Watson|(Lord\s*)?Martin\s*Rees|Massachusetts\s*Institute\s*of\s*Technology|MIT(\s*Media\s*Lab)?|Media\s*Lab|Minsky|((Noam|Valeria)\s*)?Chomsky|Praluent|Regeneron|(Richard\s*)?Dawkins|Sanofi|Stanford|(Stephen\s*)?Hawking|(Steven?\s*)?Pinker|UCLA',
+        pattern=r'Alain Forget|Brotherton|Carl\s*Sagan|Columbia|David Grosof|J(ames|im)\s*Watson|(Lord\s*)?Martin\s*Rees|Massachusetts\s*Institute\s*of\s*Technology|MIT(\s*Media\s*Lab)?|Media\s*Lab|Minsky|((Noam|Valeria)\s*)?Chomsky|Norman\s*Finkelstein|Praluent|Regeneron|(Richard\s*)?Dawkins|Sanofi|Stanford|(Stephen\s*)?Hawking|(Steven?\s*)?Pinker|UCLA',
         emailers = {
             DAVID_HAIG: None,
             JOSCHA_BACH: 'cognitive science / AI research',
             'Daniel Kahneman': 'Nobel economic sciences laureate and cognitivie psychologist (?)',
+            'Ed Boyden': 'Associate Professor, MIT Media Lab neurobiology',
             LAWRENCE_KRAUSS: 'theoretical physicist',
             LINDA_STONE: 'ex-Microsoft, MIT Media Lab',
             MARK_TRAMO: 'professor of neurology at UCLA',
+            'Nancy Dahl': f'wife of {LAWRENCE_KRAUSS}',
             NEAL_KASSELL: 'professor of neurosurgery at University of Virginia',
             PETER_ATTIA: 'longevity medicine',
             ROBERT_TRIVERS: 'evolutionary biology',
@@ -661,7 +670,7 @@ def get_style_for_category(category: str) -> str | None:
     elif category in [CONFERENCE, SPEECH]:
         return f"{get_style_for_category(ACADEMIA)} dim"
     elif category == SOCIAL:
-        return f"{get_style_for_category(PUBLICIST)}"
+        return get_style_for_category(PUBLICIST)
     category = CATEGORY_STYLE_MAPPING.get(category, category)

epstein_files/util/logging.py CHANGED Viewed

@@ -32,7 +32,7 @@ LOG_LEVEL_ENV_VAR = 'LOG_LEVEL'
 # Augment the standard log highlighter with 'epstein_filename' matcher
 class LogHighlighter(ReprHighlighter):
     highlights = ReprHighlighter.highlights + [
-        *[fr"(?P<{doc_type}>{doc_type})" for doc_type in DOC_TYPE_STYLES.keys()],
+        *[fr"(?P<{doc_type}>{doc_type}(Cfg)?)" for doc_type in DOC_TYPE_STYLES.keys()],
         "(?P<epstein_filename>" + FILE_NAME_REGEX.pattern + ')',
     ]

epstein_files/util/output.py CHANGED Viewed

@@ -125,7 +125,7 @@ def print_json_files(epstein_files: EpsteinFiles):
             console.print_json(json_file.json_str(), indent=4, sort_keys=False)
-def print_json_metadata(epstein_files: EpsteinFiles) -> None:
+def write_json_metadata(epstein_files: EpsteinFiles) -> None:
     json_str = epstein_files.json_metadata()
     if args.build:

{epstein_files-1.0.11.dist-info → epstein_files-1.0.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: epstein-files
-Version: 1.0.11
+Version: 1.0.12
 Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
 Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
 License: GPL-3.0-or-later

epstein_files-1.0.12.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,33 @@
+epstein_files/__init__.py,sha256=qVFB7sS6XSlZX-ByyDwdbGSn2h06aoX2Mx8WcgRb-To,4710
+epstein_files/documents/communication.py,sha256=XapJlNfcaww3TpSkZIBE5c1Skqv_pFEFlIVi06V7k3E,2046
+epstein_files/documents/document.py,sha256=2FxyqWKROi7w2SmaQ493oGKekNvYAHSuv2YsDhPNQBU,16987
+epstein_files/documents/email.py,sha256=y8QTq349LWlm2LLUJ8rGcdkDbaGYJCV99wJytPcEMew,40587
+epstein_files/documents/emails/email_header.py,sha256=wkPfSLbmzkAeQwvhf0bAeFDLPbQT-EeG0v8vNNLYktM,7502
+epstein_files/documents/imessage/text_message.py,sha256=4gFvTfulj_Su10lNQl6Hq_p9ArTrSmn5pfC22YRJXjI,2794
+epstein_files/documents/json_file.py,sha256=tIYTwA3FYkwVZSpXvFYyUoH9m2sGYCD1U0ttamH6r1o,1306
+epstein_files/documents/messenger_log.py,sha256=yT4WQyTE_W6yelug_YGpBMRJ0YxWNtX4rKoEj8n5TMA,6260
+epstein_files/documents/other_file.py,sha256=pnl_q1o7ur3eeqGPwsYL2qbM3Y8O9LX6j6LbWnoxAiE,9939
+epstein_files/epstein_files.py,sha256=SaD4DJJ5tRxY97Ei4BdOgLzHQ9wrBVGrP64CSqdmk-w,18691
+epstein_files/util/constant/common_words.py,sha256=aR0UjoWmxyR49XS-DtHECQ1CiA_bK8hNP6CQ1TS9yZA,3696
+epstein_files/util/constant/html.py,sha256=9U098TGzlghGg4WfxLYHyub5JGR17Dv7VP5i2MSu8Kk,1415
+epstein_files/util/constant/names.py,sha256=CLWXrln8J-Dth6C-YF7Wdy7UoA8dybKJyqOLETrBeek,10284
+epstein_files/util/constant/output_files.py,sha256=BkV4_gmdj46RfGy5SFYp6dgTty3FtlBth5YGmaGutls,1700
+epstein_files/util/constant/strings.py,sha256=02DwbhAe8qBRq5HOUFx5FafXJ1P2-RJf9TCVu2b7UDQ,1932
+epstein_files/util/constant/urls.py,sha256=0IdCVVvXib0i-4TZFkVHoS4zCbjOBZWcr6NkGxsmQWM,4981
+epstein_files/util/constants.py,sha256=BpPRivoDYFI0uLU35kKpOdrSI6Rr9cmcrRj9-kANVrs,111834
+epstein_files/util/data.py,sha256=X3AutdW-ascIlE2bz1BtN0Bywqpe4OwYzJ-diEpfogI,2992
+epstein_files/util/doc_cfg.py,sha256=_f03qtA7qVbViHwqMXC4O5nfNbh90zDSq6El9Ior6f0,8996
+epstein_files/util/env.py,sha256=HnYcfHSNkwVJ_T75Woy43_OpDyxD0KHPj3GxcVx86N4,5751
+epstein_files/util/file_helper.py,sha256=tacTe1GcAnckPFvjMgxRRSLnFgr2aVIYsgfDR_C9uXk,2780
+epstein_files/util/highlighted_group.py,sha256=xrDLB05YUYGsU6vDvhvENMvIyjEz-9eb9xN-RjfCQbQ,36531
+epstein_files/util/logging.py,sha256=fuREq06xUUI3DfCV2JE-8QM-sQKxpLDj0_AYFO6qR1M,1983
+epstein_files/util/output.py,sha256=XcflgSOlzUGj6FsFaK6j4Dljld8A0h_uVV7ERcI_EYw,8120
+epstein_files/util/rich.py,sha256=8-4IA5bwPBdDPqkPdymq3zVKB9hfy3nrT7fUrN_XevY,14744
+epstein_files/util/search_result.py,sha256=1fxe0KPBQXBk4dLfu6m0QXIzYfZCzvaSkWqvghJGzxY,567
+epstein_files/util/timer.py,sha256=8hxW4Y1JcTUfnBrHh7sL2pM9xu1sL4HFQM4CmmzTarU,837
+epstein_files/util/word_count.py,sha256=8qBTuq3d0Q-3fwiuECKWi2RfL-KUiZD8TciwvfL0D_o,9353
+epstein_files-1.0.12.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+epstein_files-1.0.12.dist-info/METADATA,sha256=imTDdrHjWC-bWuw58SAyjYyiziZsqHkO7ODQUntw6YQ,5480
+epstein_files-1.0.12.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
+epstein_files-1.0.12.dist-info/entry_points.txt,sha256=5qYgwAXpxegeAicD_rzda_trDRnUC51F5UVDpcZ7j6Q,240
+epstein_files-1.0.12.dist-info/RECORD,,

epstein_files-1.0.11.dist-info/RECORD DELETED Viewed

@@ -1,33 +0,0 @@
-epstein_files/__init__.py,sha256=4zxX1tw-0xMwpM-Sbq7PezV0YNS9zN-P6gc9BQ1BqKU,4710
-epstein_files/documents/communication.py,sha256=SunZdjMhR9v6y8LlQ6jhIu8vYjSndaBK0Su1mKnhfj0,2060
-epstein_files/documents/document.py,sha256=dECV0bSnOJzPfOIHyHeG5rNxKd6uwuiso35-sQZg9No,18353
-epstein_files/documents/email.py,sha256=yXiW7mB4myU8G9DY7PnnqazaCqeAR3dHr35NfBplfRU,38519
-epstein_files/documents/emails/email_header.py,sha256=wkPfSLbmzkAeQwvhf0bAeFDLPbQT-EeG0v8vNNLYktM,7502
-epstein_files/documents/imessage/text_message.py,sha256=3HlNp75JIoMlWj7PaUWIFry3qlGEmpGu5OmdmsBYS34,2807
-epstein_files/documents/json_file.py,sha256=HsnVWPZXVxTF_DadL2YtJtsiXKXOd18PUs05O33tjNc,1317
-epstein_files/documents/messenger_log.py,sha256=uSPlg85jGTwod1cV9f7MtxSNqmMZ61JBFzoiRNqg52M,6263
-epstein_files/documents/other_file.py,sha256=S_Y-SxYYYXtx42JHmhFWl5BbTduNI7cwQjeYHBJA7sc,9950
-epstein_files/epstein_files.py,sha256=SaD4DJJ5tRxY97Ei4BdOgLzHQ9wrBVGrP64CSqdmk-w,18691
-epstein_files/util/constant/common_words.py,sha256=aR0UjoWmxyR49XS-DtHECQ1CiA_bK8hNP6CQ1TS9yZA,3696
-epstein_files/util/constant/html.py,sha256=9U098TGzlghGg4WfxLYHyub5JGR17Dv7VP5i2MSu8Kk,1415
-epstein_files/util/constant/names.py,sha256=KKJEYFpdOp4xDwXe5dhrqYgF12oJODvVSFpAB28Q76A,10153
-epstein_files/util/constant/output_files.py,sha256=BkV4_gmdj46RfGy5SFYp6dgTty3FtlBth5YGmaGutls,1700
-epstein_files/util/constant/strings.py,sha256=FDtksfH50PSxtSBw9XhmqxtrgRgGxdIvGiAR2bbPpu4,1899
-epstein_files/util/constant/urls.py,sha256=0IdCVVvXib0i-4TZFkVHoS4zCbjOBZWcr6NkGxsmQWM,4981
-epstein_files/util/constants.py,sha256=LPSI6Z0n3ChFDnMGYVO80cGuSKZf0OoyUzLih_jlRKI,111434
-epstein_files/util/data.py,sha256=xwTqrbAi7ZDJM0iyFVOevnokP_oIQ2npkRjHzF1KGGY,2908
-epstein_files/util/doc_cfg.py,sha256=OZlocAWldfR8Nomiad4FxQeyhNMbd0PQ-rumKn2nWBg,9641
-epstein_files/util/env.py,sha256=HnYcfHSNkwVJ_T75Woy43_OpDyxD0KHPj3GxcVx86N4,5751
-epstein_files/util/file_helper.py,sha256=-higKqc9J5IfNpzMzg-9j1ps3beV4N2cw8kdAxfm7NA,2835
-epstein_files/util/highlighted_group.py,sha256=fU-8ns50uUolzPEAxadF5AnPLjn383KpEeyRXfFbv_U,35971
-epstein_files/util/logging.py,sha256=8e22WaBfDAKEmkcr3Gb4TdqtFSkU4FQDpk3Z6hfSzbw,1977
-epstein_files/util/output.py,sha256=UzTU0mNHEmeJr3w2TXAp19X497GB6_-HyW0mfztI1jk,8120
-epstein_files/util/rich.py,sha256=8-4IA5bwPBdDPqkPdymq3zVKB9hfy3nrT7fUrN_XevY,14744
-epstein_files/util/search_result.py,sha256=1fxe0KPBQXBk4dLfu6m0QXIzYfZCzvaSkWqvghJGzxY,567
-epstein_files/util/timer.py,sha256=8hxW4Y1JcTUfnBrHh7sL2pM9xu1sL4HFQM4CmmzTarU,837
-epstein_files/util/word_count.py,sha256=8qBTuq3d0Q-3fwiuECKWi2RfL-KUiZD8TciwvfL0D_o,9353
-epstein_files-1.0.11.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-epstein_files-1.0.11.dist-info/METADATA,sha256=HBW3t1F9lkoN6GIR7ySV2kBYnJhNEF9otDZWnf03jUo,5480
-epstein_files-1.0.11.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
-epstein_files-1.0.11.dist-info/entry_points.txt,sha256=5qYgwAXpxegeAicD_rzda_trDRnUC51F5UVDpcZ7j6Q,240
-epstein_files-1.0.11.dist-info/RECORD,,

{epstein_files-1.0.11.dist-info → epstein_files-1.0.12.dist-info}/LICENSE RENAMED Viewed

File without changes

{epstein_files-1.0.11.dist-info → epstein_files-1.0.12.dist-info}/WHEEL RENAMED Viewed

File without changes

{epstein_files-1.0.11.dist-info → epstein_files-1.0.12.dist-info}/entry_points.txt RENAMED Viewed

File without changes

epstein-files 1.0.11__py3-none-any.whl → 1.0.12__py3-none-any.whl

epstein-files 1.0.11py3-none-any.whl → 1.0.12py3-none-any.whl