PyPI - epstein-files - Versions diffs - 1.0.13__py3-none-any.whl → 1.0.15__py3-none-any.whl - Mend

epstein-files 1.0.13py3-none-any.whl → 1.0.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

epstein_files/__init__.py +16 -11
epstein_files/documents/communication.py +2 -2
epstein_files/documents/document.py +59 -51
epstein_files/documents/email.py +34 -30
epstein_files/documents/imessage/text_message.py +4 -4
epstein_files/documents/json_file.py +9 -3
epstein_files/documents/messenger_log.py +29 -27
epstein_files/documents/other_file.py +80 -100
epstein_files/epstein_files.py +50 -69
epstein_files/util/constant/names.py +3 -1
epstein_files/util/constant/strings.py +1 -3
epstein_files/util/constant/urls.py +1 -7
epstein_files/util/constants.py +126 -114
epstein_files/util/data.py +2 -0
epstein_files/util/doc_cfg.py +11 -10
epstein_files/util/env.py +12 -13
epstein_files/util/file_helper.py +8 -4
epstein_files/util/highlighted_group.py +8 -16
epstein_files/util/output.py +56 -36
epstein_files/util/rich.py +29 -29
epstein_files/util/word_count.py +7 -9
{epstein_files-1.0.13.dist-info → epstein_files-1.0.15.dist-info}/METADATA +10 -3
epstein_files-1.0.15.dist-info/RECORD +33 -0
epstein_files-1.0.13.dist-info/RECORD +0 -33
{epstein_files-1.0.13.dist-info → epstein_files-1.0.15.dist-info}/LICENSE +0 -0
{epstein_files-1.0.13.dist-info → epstein_files-1.0.15.dist-info}/WHEEL +0 -0
{epstein_files-1.0.13.dist-info → epstein_files-1.0.15.dist-info}/entry_points.txt +0 -0

epstein_files/documents/messenger_log.py CHANGED Viewed

@@ -9,17 +9,17 @@ from rich.table import Table
 from rich.text import Text
 from epstein_files.documents.communication import Communication
-from epstein_files.documents.imessage.text_message import MSG_DATE_FORMAT, TextMessage
+from epstein_files.documents.imessage.text_message import TextMessage
 from epstein_files.util.constant.names import JEFFREY_EPSTEIN, UNKNOWN
-from epstein_files.util.constant.strings import AUTHOR
-from epstein_files.util.data import iso_timestamp, listify, sort_dict
+from epstein_files.util.constant.strings import AUTHOR, TIMESTAMP_STYLE
+from epstein_files.util.data import days_between, days_between_str, iso_timestamp, listify, sort_dict
 from epstein_files.util.doc_cfg import Metadata, TextCfg
 from epstein_files.util.highlighted_group import get_style_for_name
 from epstein_files.util.logging import logger
 from epstein_files.util.rich import LAST_TIMESTAMP_STYLE, build_table, highlighter
-CONFIRMED_MSG = 'Found confirmed counterparty'
-GUESSED_MSG = 'This is probably a conversation with'
+CONFIRMED_MSG = 'with confirmed counterparty'
+GUESSED_MSG = 'and is probably with'
 MSG_REGEX = re.compile(r'Sender:(.*?)\nTime:(.*? (AM|PM)).*?Message:(.*?)\s*?((?=(\nSender)|\Z))', re.DOTALL)
 REDACTED_AUTHOR_REGEX = re.compile(r"^([-+•_1MENO.=F]+|[4Ide])$")
@@ -39,17 +39,20 @@ class MessengerLog(Communication):
         return self.messages_by(name)[0].timestamp()
     def info_txt(self) -> Text | None:
-        if self.author is None:
-            return None
+        num_days_str = days_between_str(self.timestamp, self.messages[-1].timestamp())
+        txt = Text(f"(Covers {num_days_str} starting ", style='dim')
+        txt.append(self.date_str(), style=TIMESTAMP_STYLE).append(' ')
-        info_msg = GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG
-        author_txt = Text(self.author, style=self.author_style + ' bold')
-        txt = Text(f"({info_msg} ", style='dim').append(author_txt)
+        if not self.author:
+            txt.append('with unknown counterparty')
+        else:
+            txt.append(GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG).append(' ')
+            txt.append(Text(self.author, style=self.author_style + ' bold'))
         if self.phone_number:
-            txt.append(f" using the phone number {self.phone_number}")
+            txt.append(highlighter(f" using the phone number {self.phone_number}"))
-        return highlighter(txt.append(')'))
+        return txt.append(')')
     def last_message_at(self, name: str | None) -> datetime:
         return self.messages_by(name)[-1].timestamp()
@@ -82,7 +85,7 @@ class MessengerLog(Communication):
         # If the Sender: is redacted or if it's an unredacted phone number that means it's from self.author
         return TextMessage(
             author=self.author if (is_phone_number or not author_str) else author_str,
-            author_str=author_str if is_phone_number else None,  # Preserve phone numbers
+            author_str=author_str if is_phone_number else '',  # Preserve phone numbers
             id_confirmed=not self.is_attribution_uncertain(),
             text=match.group(4).strip(),
             timestamp_str=match.group(2).strip(),
@@ -90,12 +93,12 @@ class MessengerLog(Communication):
     def _extract_timestamp(self) -> datetime:
         for match in MSG_REGEX.finditer(self.text):
-            timestamp_str = match.group(2).strip()
+            message = self._build_message(match)
             try:
-                return datetime.strptime(timestamp_str, MSG_DATE_FORMAT)
+                return message.timestamp()
             except ValueError as e:
-                logger.info(f"Failed to parse '{timestamp_str}' to datetime! Using next match. Error: {e}'")
+                logger.info(f"Failed to parse '{message.timestamp_str}' to datetime! Using next match. Error: {e}'")
         raise RuntimeError(f"{self}: No timestamp found!")
@@ -118,23 +121,22 @@ class MessengerLog(Communication):
         return sender_counts
     @classmethod
-    def logs_for(cls, author: str | None | list[str | None], logs: list['MessengerLog']) -> list['MessengerLog']:
-        authors = listify(author)
-        return logs if JEFFREY_EPSTEIN in authors else [log for log in logs if log.author in authors]
-    @classmethod
-    def summary_table(cls, imessage_logs: list['MessengerLog']) -> Table:
+    def summary_table(cls, log_files: list['MessengerLog']) -> Table:
         """Build a table summarizing the text messages in 'imessage_logs'."""
-        counts_table = build_table("Text Message Counts By Author")
-        counts_table.add_column(AUTHOR.title(), justify='left', style="steel_blue bold", width=30)
+        author_counts = cls.count_authors(log_files)
+        msg_count = sum([len(log.messages) for log in log_files])
+        footer = f"Deanonymized {msg_count - author_counts[None]:,} of {msg_count:,} text messages in"
+        counts_table = build_table("Text Message Counts By Author", caption=f"{footer} {len(log_files)} files")
+        counts_table.add_column(AUTHOR.title(), justify='left', width=30)
         counts_table.add_column('Files', justify='right', style='white')
         counts_table.add_column("Msgs", justify='right')
         counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
         counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE, width=21)
         counts_table.add_column('Days', justify='right', style='dim')
-        for name, count in sort_dict(cls.count_authors(imessage_logs)):
-            logs = cls.logs_for(name, imessage_logs)
+        for name, count in sort_dict(author_counts):
+            logs = log_files if name == JEFFREY_EPSTEIN else [log for log in log_files if log.author == name]
             first_at = logs[0].first_message_at(name)
             last_at = logs[-1].first_message_at(name)
@@ -144,7 +146,7 @@ class MessengerLog(Communication):
                 f"{count:,}",
                 iso_timestamp(first_at),
                 iso_timestamp(last_at),
-                str((last_at - first_at).days + 1),
+                str(days_between(first_at, last_at)),
             )
         return counts_table

epstein_files/documents/other_file.py CHANGED Viewed

@@ -17,14 +17,15 @@ from rich.text import Text
 from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, WHITESPACE_REGEX, Document
 from epstein_files.util.constant.strings import *
 from epstein_files.util.constants import *
-from epstein_files.util.doc_cfg import FINANCIAL_REPORTS_AUTHORS, DocCfg, Metadata
-from epstein_files.util.data import escape_single_quotes, remove_timezone, sort_dict, uniquify
+from epstein_files.util.doc_cfg import DocCfg, Metadata
+from epstein_files.util.data import days_between, escape_single_quotes, remove_timezone, sort_dict, uniquify
 from epstein_files.util.file_helper import FILENAME_LENGTH, file_size_to_str
 from epstein_files.util.env import args
 from epstein_files.util.highlighted_group import styled_category
-from epstein_files.util.rich import QUESTION_MARK_TXT, add_cols_to_table, build_table, highlighter
+from epstein_files.util.rich import QUESTION_MARK_TXT, build_table, highlighter
 from epstein_files.util.logging import logger
+FIRST_FEW_LINES = 'First Few Lines'
 MAX_DAYS_SPANNED_TO_BE_VALID = 10
 MAX_EXTRACTED_TIMESTAMPS = 100
 MIN_TIMESTAMP = datetime(2000, 1, 1)
@@ -36,94 +37,62 @@ TIMESTAMP_LOG_INDENT = f'{LOG_INDENT}    '
 VAST_HOUSE = 'vast house'  # Michael Wolff article draft about Epstein indicator
 VI_DAILY_NEWS_REGEX = re.compile(r'virgin\s*is[kl][ai]nds\s*daily\s*news', re.IGNORECASE)
-UNINTERESTING_CATEGORES = [
+SKIP_TIMESTAMP_EXTRACT = [
+    PALM_BEACH_TSV,
+    PALM_BEACH_PROPERTY_INFO,
+]
+UNINTERESTING_CATEGORIES = [
+    ACADEMIA,
+    ARTICLE,
     ARTS,
     BOOK,
+    CONFERENCE,
     JUNK,
+    POLITICS,
     SKYPE_LOG,
-    SPEECH,
 ]
 # OtherFiles whose descriptions/info match these prefixes are not displayed unless --all-other-files is used
-UNINTERESTING_PREFIXES = FINANCIAL_REPORTS_AUTHORS + [
+UNINTERESTING_PREFIXES = [
     'article about',
-    ARTICLE_DRAFT,
-    'Aviation International',
-    BBC,
-    BLOOMBERG,
-    'Boston Globe',
     BROCKMAN_INC,
-    CHINA_DAILY,
-    CNN,
-    'completely redacted',
     CVRA,
-    DAILY_MAIL,
-    DAILY_TELEGRAPH,
-    CVRA_LEXIS_SEARCH[0:-12],  # Because date at end :(
     DERSH_GIUFFRE_TWEET,
-    'Financial Times',
-    'Forbes',
-    'Frontlines',
-    'Future Science',
-    'Globe and Mail',
     GORDON_GETTY,
     f"{HARVARD} Econ",
     HARVARD_POETRY,
-    'Inference',
     JASTA,
-    'JetGala',
-    JOHN_BOLTON_PRESS_CLIPPING,
-    'Journal of Criminal',
-    LA_TIMES,
-    'Litigation Daily',
-    LAWRENCE_KRAUSS,
-    LAWRENCE_KRAUSS_ASU_ORIGINS,
-    'MarketWatch',
-    MARTIN_NOWAK,
-    'Morning News',
+    LEXIS_NEXIS,
     NOBEL_CHARITABLE_TRUST,
-    'Nautilus',
-    'New Yorker',
-    NYT,
     PALM_BEACH_CODE_ENFORCEMENT,
-    PALM_BEACH_DAILY_NEWS,
-    PALM_BEACH_POST,
     PALM_BEACH_TSV,
     PALM_BEACH_WATER_COMMITTEE,
-    PAUL_KRASSNER,
-    PEGGY_SIEGAL,
-    'Politifact',
-    'Rafanelli',
-    ROBERT_LAWRENCE_KUHN,
-    ROBERT_TRIVERS,
-    'SCMP',
-    'SciencExpress',
-    'Scowcroft',
-    SHIMON_POST_ARTICLE,
-    SINGLE_PAGE,
-    STACEY_PLASKETT,
-    'Tatler',
-    TERJE_ROD_LARSEN,
-    TEXT_OF_US_LAW,
-    TRANSLATION,
     TWEET,
-    REAL_DEAL_ARTICLE,
-    TRUMP_DISCLOSURES,
-    UBS_CIO_REPORT,
     UN_GENERAL_ASSEMBLY,
-    'U.S. News',
     'US Office',
-    'Vanity Fair',
-    VI_DAILY_NEWS,
-    WAPO,
+]
+INTERESTING_AUTHORS = [
+    EDWARD_JAY_EPSTEIN,
+    EHUD_BARAK,
+    JOI_ITO,
+    NOAM_CHOMSKY,
+    MICHAEL_WOLFF,
+    SVETLANA_POZHIDAEVA,
 ]
 @dataclass
 class OtherFile(Document):
-    """File that is not an email, an iMessage log, or JSON data."""
+    """
+    File that is not an email, an iMessage log, or JSON data.
-    include_description_in_summary_panel: ClassVar[bool] = True
+    Attributes:
+        was_timestamp_extracted (bool): True if the timestamp was programmatically extracted (and could be wrong)
+    """
+    was_timestamp_extracted: bool = False
+    include_description_in_summary_panel: ClassVar[bool] = True  # Class var for logging output
     def __post_init__(self):
         super().__post_init__()
@@ -162,11 +131,13 @@ class OtherFile(Document):
         elif len(info_sentences) == 0:
             return True
         elif self.config:
-            if self.config.is_interesting:
+            if self.config.is_interesting is not None:
+                return self.config.is_interesting
+            elif self.config.author in INTERESTING_AUTHORS:
                 return True
             elif self.category() == FINANCE and self.author is not None:
                 return False
-            elif self.category() in UNINTERESTING_CATEGORES:
+            elif self.category() in UNINTERESTING_CATEGORIES:
                 return False
         for prefix in UNINTERESTING_PREFIXES:
@@ -178,6 +149,10 @@ class OtherFile(Document):
     def metadata(self) -> Metadata:
         metadata = super().metadata()
         metadata['is_interesting'] = self.is_interesting()
+        if self.was_timestamp_extracted:
+            metadata['was_timestamp_extracted'] = self.was_timestamp_extracted
         return metadata
     def preview_text(self) -> str:
@@ -191,6 +166,8 @@ class OtherFile(Document):
         """Return configured timestamp or value extracted by scanning text with datefinder."""
         if self.config and self.config.timestamp:
             return self.config.timestamp
+        elif self.config and any([s in (self.config_description() or '') for s in SKIP_TIMESTAMP_EXTRACT]):
+            return None
         timestamps: list[datetime] = []
@@ -214,7 +191,10 @@ class OtherFile(Document):
                 self.log_top_lines(15, msg=f"No timestamps found")
             return None
-        elif len(timestamps) == 1:
+        self.was_timestamp_extracted = True
+        if len(timestamps) == 1:
             return timestamps[0]
         else:
             timestamps = sorted(uniquify(timestamps), reverse=True)
@@ -222,7 +202,7 @@ class OtherFile(Document):
             return timestamps[0]  # Most recent timestamp appearing in text is usually the closest
     def _log_extracted_timestamps_info(self, timestamps: list[datetime]) -> None:
-        num_days_spanned = (timestamps[0] - timestamps[-1]).days
+        num_days_spanned = days_between(timestamps[-1], timestamps[0])
         timestamps_log_msg = f"Extracted {len(timestamps)} timestamps spanning {num_days_spanned} days{TIMESTAMP_LOG_INDENT}"
         timestamps_log_msg += TIMESTAMP_LOG_INDENT.join([str(dt) for dt in timestamps])
@@ -230,9 +210,39 @@ class OtherFile(Document):
             self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
     @staticmethod
-    def build_table(files: Sequence['OtherFile']) -> Table:
+    def count_by_category_table(files: Sequence['OtherFile']) -> Table:
+        counts = defaultdict(int)
+        category_bytes = defaultdict(int)
+        for file in files:
+            if file.category() is None:
+                logger.warning(f"file {file.file_id} has no category")
+            counts[file.category()] += 1
+            category_bytes[file.category()] += file.file_size()
+        table = build_table('Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
+        table.columns[0].min_width = 14
+        table.columns[-1].style = 'dim'
+        for (category, count) in sort_dict(counts):
+            category_files = [f for f in files if f.category() == category]
+            known_author_count = Document.known_author_count(category_files)
+            table.add_row(
+                styled_category(category or UNKNOWN),
+                str(count),
+                str(known_author_count),
+                str(count - known_author_count),
+                file_size_to_str(category_bytes[category]),
+            )
+        return table
+    @staticmethod
+    def files_preview_table(files: Sequence['OtherFile']) -> Table:
         """Build a table of OtherFile documents."""
-        table = build_table(None, show_lines=True)
+        table = build_table('Other Files Details', show_lines=True)
         table.add_column('File', justify='center', width=FILENAME_LENGTH)
         table.add_column('Date', justify='center')
         table.add_column('Size', justify='center')
@@ -240,7 +250,7 @@ class OtherFile(Document):
         table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
         for file in files:
-            link_and_info = [file.external_links()]
+            link_and_info = [file.external_links_txt()]
             date_str = file.date_str()
             if file.is_duplicate():
@@ -261,33 +271,3 @@ class OtherFile(Document):
             )
         return table
-    @staticmethod
-    def count_by_category_table(files: Sequence['OtherFile']) -> Table:
-        counts = defaultdict(int)
-        category_bytes = defaultdict(int)
-        for file in files:
-            if file.category() is None:
-                logger.warning(f"file {file.file_id} has no category")
-            counts[file.category()] += 1
-            category_bytes[file.category()] += file.length
-        table = build_table('Other Files Summary')
-        add_cols_to_table(table, ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
-        table.columns[-1].style = 'dim'
-        for (category, count) in sort_dict(counts):
-            category_files = [f for f in files if f.category() == category]
-            known_author_count = Document.known_author_count(category_files)
-            table.add_row(
-                styled_category(category or UNKNOWN),
-                str(count),
-                str(known_author_count),
-                str(count - known_author_count),
-                file_size_to_str(category_bytes[category]),
-            )
-        return table

epstein_files/epstein_files.py CHANGED Viewed

@@ -23,14 +23,14 @@ from epstein_files.util.constant.strings import *
 from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL, epstein_media_person_url,
      epsteinify_name_url, epstein_web_person_url, search_jmail_url, search_twitter_url)
 from epstein_files.util.constants import *
-from epstein_files.util.data import dict_sets_to_lists, iso_timestamp, json_safe, listify, sort_dict
+from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify, sort_dict
 from epstein_files.util.doc_cfg import EmailCfg, Metadata
 from epstein_files.util.env import DOCS_DIR, args, logger
 from epstein_files.util.file_helper import file_size_str
-from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
+from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames, get_info_for_name, get_style_for_name
 from epstein_files.util.rich import (DEFAULT_NAME_STYLE, LAST_TIMESTAMP_STYLE, NA_TXT, add_cols_to_table,
-     build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
-     print_other_site_link, print_panel, print_section_header, vertically_pad)
+     print_all_files_page_link, build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
+     print_panel, print_section_header, vertically_pad)
 from epstein_files.util.search_result import SearchResult
 from epstein_files.util.timer import Timer
@@ -72,18 +72,18 @@ class EpsteinFiles:
         # Read through and classify all the files
         for file_arg in self.all_files:
-            doc_timer = Timer(decimals=4)
+            doc_timer = Timer(decimals=2)
             document = Document(file_arg)
             cls = document_cls(document)
-            if document.length == 0:
+            if document.length() == 0:
                 logger.warning(f"Skipping empty file: {document}]")
                 continue
             elif args.skip_other_files and cls == OtherFile and file_type_count[cls.__name__] > 1:
-                logger.warning(f"Skipping {document.filename}...")
+                document.log(f"Skipping OtherFile...")
                 continue
-            documents.append(cls(file_arg, text=document.text))
+            documents.append(cls(file_arg, lines=document.lines, text=document.text))
             logger.info(str(documents[-1]))
             file_type_count[cls.__name__] += 1
@@ -104,16 +104,20 @@ class EpsteinFiles:
         if PICKLED_PATH.exists() and not args.overwrite_pickle:
             with gzip.open(PICKLED_PATH, 'rb') as file:
                 epstein_files = pickle.load(file)
-                timer.print_at_checkpoint(f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})")
                 epstein_files.timer = timer
+                timer_msg = f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}'"
+                epstein_files.timer.print_at_checkpoint(f"{timer_msg} ({file_size_str(PICKLED_PATH)})")
                 return epstein_files
         logger.warning(f"Building new cache file, this will take a few minutes...")
         epstein_files = EpsteinFiles(timer=timer)
-        with gzip.open(PICKLED_PATH, 'wb') as file:
-            pickle.dump(epstein_files, file)
-            logger.warning(f"Pickled data to '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})...")
+        if args.skip_other_files:
+            logger.warning(f"Not writing pickled data because --skip-other-files")
+        else:
+            with gzip.open(PICKLED_PATH, 'wb') as file:
+                pickle.dump(epstein_files, file)
+                logger.warning(f"Pickled data to '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})...")
         timer.print_at_checkpoint(f'Processed {len(epstein_files.all_files):,} documents')
         return epstein_files
@@ -127,9 +131,6 @@ class EpsteinFiles:
         names = names if include_useless else [e for e in names if e is None or e.lower() not in EXCLUDED_EMAILERS]
         return sorted(list(set(names)), key=lambda e: self.email_author_counts[e] + self.email_recipient_counts[e])
-    def attributed_email_count(self) -> int:
-        return sum([i for author, i in self.email_author_counts.items() if author != UNKNOWN])
     def docs_matching(
             self,
             pattern: re.Pattern | str,
@@ -156,7 +157,7 @@ class EpsteinFiles:
         return self.emails_for(author)[-1].timestamp
     def email_conversation_length_in_days(self, author: str | None) -> int:
-        return (self.last_email_at(author) - self.earliest_email_at(author)).days + 1
+        return days_between(self.earliest_email_at(author), self.last_email_at(author))
     def email_signature_substitution_counts(self) -> dict[str, int]:
         """Return the number of times an email signature was replaced with "<...snipped...>" for each author."""
@@ -172,7 +173,7 @@ class EpsteinFiles:
         return sorted(list(self.unknown_recipient_email_ids))
     def emails_by(self, author: str | None) -> list[Email]:
-        return [e for e in self.emails if e.author == author]
+        return Document.sort_by_timestamp([e for e in self.emails if e.author == author])
     def emails_for(self, author: str | None) -> list[Email]:
         """Returns emails to or from a given 'author' sorted chronologically."""
@@ -185,9 +186,11 @@ class EpsteinFiles:
     def emails_to(self, author: str | None) -> list[Email]:
         if author is None:
-            return [e for e in self.emails if len(e.recipients) == 0 or None in e.recipients]
+            emails = [e for e in self.emails if len(e.recipients) == 0 or None in e.recipients]
         else:
-            return [e for e in self.emails if author in e.recipients]
+            emails = [e for e in self.emails if author in e.recipients]
+        return Document.sort_by_timestamp(emails)
     def get_documents_by_id(self, file_ids: str | list[str]) -> list[Document]:
         file_ids = listify(file_ids)
@@ -198,20 +201,29 @@ class EpsteinFiles:
         return docs
-    def imessage_logs_for(self, author: str | None | list[str | None]) -> Sequence[MessengerLog]:
-        return MessengerLog.logs_for(author, self.imessage_logs)
     def json_metadata(self) -> str:
         """Create a JSON string containing metadata for all the files."""
         metadata = {
-            Email.__name__: _sorted_metadata(self.emails),
-            JsonFile.__name__: _sorted_metadata(self.json_files),
-            MessengerLog.__name__: _sorted_metadata(self.imessage_logs),
-            OtherFile.__name__: _sorted_metadata(self.non_json_other_files()),
+            'files': {
+                Email.__name__: _sorted_metadata(self.emails),
+                JsonFile.__name__: _sorted_metadata(self.json_files),
+                MessengerLog.__name__: _sorted_metadata(self.imessage_logs),
+                OtherFile.__name__: _sorted_metadata(self.non_json_other_files()),
+            },
+            'people': {
+                name: highlighted_group.get_info(name)
+                for highlighted_group in HIGHLIGHTED_NAMES
+                if isinstance(highlighted_group, HighlightedNames)
+                for name, description in highlighted_group.emailers.items()
+                if description
+            }
         }
         return json.dumps(metadata, indent=4, sort_keys=True)
+    def non_duplicate_emails(self) -> list[Email]:
+        return [email for email in self.emails if not email.is_duplicate()]
     def non_json_other_files(self) -> list[OtherFile]:
         return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
@@ -230,8 +242,8 @@ class EpsteinFiles:
                 f"{len([d for d in docs if d.is_duplicate()])}",
             )
-        add_row('iMessage Logs', self.imessage_logs)
         add_row('Emails', self.emails)
+        add_row('iMessage Logs', self.imessage_logs)
         add_row('JSON Data', self.json_files)
         add_row('Other', self.non_json_other_files())
         console.print(Align.center(table))
@@ -271,12 +283,13 @@ class EpsteinFiles:
         console.print(Align.center(Email.build_table(emails, author)), '\n')
     def print_email_device_info(self) -> None:
-        print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(4, 0, 0, 0), centered=True)
+        print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(2, 0, 0, 0), centered=True)
         console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
         console.print(_build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
-    def print_emailer_counts_table(self) -> None:
-        footer = f"Identified authors of {self.attributed_email_count():,} out of {len(self.emails):,} emails ."
+    def table_of_emailers(self) -> Table:
+        attributed_emails = [e for e in self.non_duplicate_emails() if e.author]
+        footer = f"Identified authors of {len(attributed_emails):,} out of {len(self.non_duplicate_emails()):,} emails."
         counts_table = build_table("Email Counts", caption=footer)
         add_cols_to_table(counts_table, [
@@ -308,49 +321,17 @@ class EpsteinFiles:
                 str(self.email_recipient_counts[name]),
                 emails[0].timestamp_without_seconds(),
                 emails[-1].timestamp_without_seconds(),
-                '' if name is None else link_text_obj(search_jmail_url(name), JMAIL),
-                '' if not is_ok_for_epstein_web(name) else link_text_obj(epstein_media_person_url(name), 'eMedia'),
-                '' if not is_ok_for_epstein_web(name) else link_text_obj(epstein_web_person_url(name), 'eWeb'),
-                '' if name is None else link_text_obj(search_twitter_url(name), 'search X'),
+                link_text_obj(search_jmail_url(name), JMAIL) if name else '',
+                link_text_obj(epstein_media_person_url(name), 'eMedia') if is_ok_for_epstein_web(name) else '',
+                link_text_obj(epstein_web_person_url(name), 'eWeb') if is_ok_for_epstein_web(name) else '',
+                link_text_obj(search_twitter_url(name), 'search X') if name else '',
             )
-        console.print(vertically_pad(counts_table, 2))
-    def print_imessage_summary(self) -> None:
-        """Print summary table and stats for text messages."""
-        console.print(MessengerLog.summary_table(self.imessage_logs))
-        text_summary_msg = f"\nDeanonymized {Document.known_author_count(self.imessage_logs)} of "
-        text_summary_msg += f"{len(self.imessage_logs)} {TEXT_MESSAGE} logs found in {len(self.all_files):,} files."
-        console.print(text_summary_msg)
-        imessage_msg_count = sum([len(log.messages) for log in self.imessage_logs])
-        console.print(f"Found {imessage_msg_count} text messages in {len(self.imessage_logs)} iMessage log files.")
-    def print_other_files_table(self) -> list[OtherFile]:
-        """Returns the OtherFile objects that were interesting enough to print."""
-        interesting_files = [doc for doc in self.other_files if args.all_other_files or doc.is_interesting()]
-        header_pfx = '' if args.all_other_files else 'Selected '
-        print_section_header(f"{FIRST_FEW_LINES} of {len(interesting_files)} {header_pfx}Files That Are Neither Emails Nor Text Msgs")
-        if not args.all_other_files:
-            print_centered(f"(the other site is uncurated and has all {len(self.other_files)} unclassifiable files and {len(self.emails):,} emails)", style='dim')
-            print_other_site_link(False)
-            console.line(2)
-        console.print(OtherFile.build_table(interesting_files))
-        console.print(Padding(OtherFile.count_by_category_table(interesting_files), (2, 0, 2, 2)))
-        skipped_file_count = len(self.other_files) - len(interesting_files)
-        if skipped_file_count > 0:
-            logger.warning(f"Skipped {skipped_file_count} uninteresting other files...")
-        return interesting_files
+        return counts_table
     def _tally_email_data(self) -> None:
         """Tally up summary info about Email objects."""
-        for email in self.emails:
-            if email.is_duplicate():
-                continue
+        for email in self.non_duplicate_emails():
             self.email_author_counts[email.author] += 1
             if len(email.recipients) == 0:
@@ -380,7 +361,7 @@ def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
 def document_cls(doc: Document) -> Type[Document]:
     search_area = doc.text[0:5000]  # Limit search area to avoid pointless scans of huge files
-    if doc.length == 0:
+    if doc.length() == 0:
         return Document
     if doc.text[0] == '{':
         return JsonFile

epstein_files/util/constant/names.py CHANGED Viewed

@@ -187,9 +187,11 @@ VIRGINIA_GIUFFRE = 'Virginia Giuffre'
 # Organizations
 BOFA = 'BofA'
+BOFA_MERRILL = f'{BOFA} / Merrill Lynch'
 CNN = 'CNN'
 DEUTSCHE_BANK = 'Deutsche Bank'
 ELECTRON_CAPITAL_PARTNERS = 'Electron Capital Partners'
+EPSTEIN_FOUNDATION = 'Jeffrey Epstein VI Foundation'
 GOLDMAN_SACHS = 'Goldman Sachs'
 GOLDMAN_INVESTMENT_MGMT = f'{GOLDMAN_SACHS} Investment Management Division'
 HARVARD = 'Harvard'
@@ -238,7 +240,7 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
     ian isaac isaacson
     james jamie jane janet jason jen jim joe johnson jones josh julie justin
     karl kate kathy kelly kim kruger kyle
-    laurie leo leonard lenny leslie lieberman louis lynch lynn
+    laurie lawrence leo leonard lenny leslie lieberman louis lynch lynn
     marcus marianne matt matthew melissa michele michelle moore moscowitz
     nancy nicole nussbaum
     owen

epstein-files 1.0.13__py3-none-any.whl → 1.0.15__py3-none-any.whl

epstein-files 1.0.13py3-none-any.whl → 1.0.15py3-none-any.whl