PyPI - epstein-files - Versions diffs - 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

epstein-files 1.0.0py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

epstein_files/__init__.py +59 -51
epstein_files/documents/communication.py +9 -9
epstein_files/documents/document.py +111 -87
epstein_files/documents/email.py +154 -85
epstein_files/documents/emails/email_header.py +7 -6
epstein_files/documents/imessage/text_message.py +3 -2
epstein_files/documents/json_file.py +17 -0
epstein_files/documents/messenger_log.py +62 -3
epstein_files/documents/other_file.py +165 -17
epstein_files/epstein_files.py +100 -143
epstein_files/util/constant/names.py +6 -0
epstein_files/util/constant/strings.py +27 -0
epstein_files/util/constant/urls.py +22 -9
epstein_files/util/constants.py +968 -1015
epstein_files/util/data.py +14 -28
epstein_files/util/{file_cfg.py → doc_cfg.py} +120 -34
epstein_files/util/env.py +16 -18
epstein_files/util/file_helper.py +56 -17
epstein_files/util/highlighted_group.py +227 -175
epstein_files/util/logging.py +57 -0
epstein_files/util/rich.py +18 -13
epstein_files/util/search_result.py +14 -6
epstein_files/util/timer.py +24 -0
epstein_files/util/word_count.py +2 -1
{epstein_files-1.0.0.dist-info → epstein_files-1.0.1.dist-info}/METADATA +3 -2
epstein_files-1.0.1.dist-info/RECORD +30 -0
epstein_files-1.0.0.dist-info/RECORD +0 -28
{epstein_files-1.0.0.dist-info → epstein_files-1.0.1.dist-info}/LICENSE +0 -0
{epstein_files-1.0.0.dist-info → epstein_files-1.0.1.dist-info}/WHEEL +0 -0

epstein_files/documents/messenger_log.py CHANGED Viewed

@@ -1,13 +1,20 @@
 import re
+from collections import defaultdict
 from dataclasses import dataclass, field
 from datetime import datetime
 from rich.console import Console, ConsoleOptions, RenderResult
+from rich.table import Table
 from rich.text import Text
 from epstein_files.documents.communication import Communication
 from epstein_files.documents.imessage.text_message import MSG_DATE_FORMAT, TextMessage
-from epstein_files.util.rich import logger
+from epstein_files.util.constant.names import JEFFREY_EPSTEIN, UNKNOWN
+from epstein_files.util.constant.strings import AUTHOR
+from epstein_files.util.data import iso_timestamp, listify, sort_dict
+from epstein_files.util.doc_cfg import Metadata, TextCfg
+from epstein_files.util.highlighted_group import get_style_for_name
+from epstein_files.util.logging import logger
 CONFIRMED_MSG = 'Found confirmed counterparty'
 GUESSED_MSG = 'This is probably a conversation with'
@@ -18,12 +25,16 @@ REDACTED_AUTHOR_REGEX = re.compile(r"^([-+•_1MENO.=F]+|[4Ide])$")
 @dataclass
 class MessengerLog(Communication):
     """Class representing one iMessage log file (one conversation between Epstein and some counterparty)."""
+    config: TextCfg | None = None
     _messages: list[TextMessage] = field(default_factory=list)
     def first_message_at(self, name: str | None) -> datetime:
         return self.messages_by(name)[0].timestamp()
     def info_txt(self) -> Text | None:
+        if self.author is None:
+            return None
         hint_msg = GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG
         author_txt = Text(self.author_or_unknown(), style=self.author_style + ' bold')
         return Text(f"({hint_msg} ", style='dim').append(author_txt).append(')')
@@ -51,6 +62,11 @@ class MessengerLog(Communication):
         """Return all messages by 'name'."""
         return [m for m in self.messages() if m.author == name]
+    def metadata(self) -> Metadata:
+        metadata = super().metadata()
+        metadata.update({'num_messages': len(self.messages())})
+        return metadata
     def _border_style(self) -> str:
         return self.author_style
@@ -61,13 +77,56 @@ class MessengerLog(Communication):
             try:
                 return datetime.strptime(timestamp_str, MSG_DATE_FORMAT)
             except ValueError as e:
-                logger.info(f"[WARNING] Failed to parse '{timestamp_str}' to datetime! Using next match. Error: {e}'")
+                logger.info(f"Failed to parse '{timestamp_str}' to datetime! Using next match. Error: {e}'")
         raise RuntimeError(f"{self}: No timestamp found!")
-    def __rich_console__(self, _console: Console, _options: ConsoleOptions) -> RenderResult:
+    def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
         yield self.file_info_panel()
         yield Text('')
         for message in self.messages():
             yield message
+    @classmethod
+    def count_authors(cls, imessage_logs: list['MessengerLog']) -> dict[str | None, int]:
+        """Count up how many texts were sent by each author."""
+        sender_counts: dict[str | None, int] = defaultdict(int)
+        for message_log in imessage_logs:
+            for message in message_log.messages():
+                sender_counts[message.author] += 1
+        return sender_counts
+    @classmethod
+    def logs_for(cls, author: str | None | list[str | None], logs: list['MessengerLog']) -> list['MessengerLog']:
+        authors = listify(author)
+        return logs if JEFFREY_EPSTEIN in authors else [log for log in logs if log.author in authors]
+    @classmethod
+    def summary_table(cls, imessage_logs: list['MessengerLog']) -> Table:
+        """Build a table summarizing the text messages in 'imessage_logs'."""
+        counts_table = Table(title="Text Message Counts By Author", header_style="bold")
+        counts_table.add_column(AUTHOR.title(), justify='left', style="steel_blue bold", width=30)
+        counts_table.add_column('Files', justify='right', style='white')
+        counts_table.add_column("Msgs", justify='right')
+        counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
+        counts_table.add_column('Last Sent At', justify='center', style='wheat4', width=21)
+        counts_table.add_column('Days', justify='right', style='dim')
+        for name, count in sort_dict(cls.count_authors(imessage_logs)):
+            logs = cls.logs_for(name, imessage_logs)
+            first_at = logs[0].first_message_at(name)
+            last_at = logs[-1].first_message_at(name)
+            counts_table.add_row(
+                Text(name or UNKNOWN, get_style_for_name(name)),
+                str(len(logs)),
+                f"{count:,}",
+                iso_timestamp(first_at),
+                iso_timestamp(last_at),
+                str((last_at - first_at).days + 1),
+            )
+        return counts_table

epstein_files/documents/other_file.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import re
 import logging
 import warnings
 from dataclasses import dataclass
@@ -5,18 +6,25 @@ from datetime import datetime
 import datefinder
 import dateutil
+from rich.console import Group
 from rich.markup import escape
 from rich.panel import Panel
+from rich.table import Table
 from rich.text import Text
 from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, WHITESPACE_REGEX, Document
-from epstein_files.util.constants import UNINTERESTING_PREFIXES
+from epstein_files.util.constant.strings import *
+from epstein_files.util.constants import *
+from epstein_files.util.doc_cfg import FINANCIAL_REPORTS_AUTHORS, DocCfg
 from epstein_files.util.data import escape_single_quotes, remove_timezone, uniquify
-from epstein_files.util.env import args, logger
-from epstein_files.util.rich import highlighter, logger
+from epstein_files.util.file_helper import FILENAME_LENGTH
+from epstein_files.util.env import args
+from epstein_files.util.highlighted_group import get_style_for_category
+from epstein_files.util.rich import QUESTION_MARK_TXT, highlighter
+from epstein_files.util.logging import logger
-MAX_EXTRACTED_TIMESTAMPS = 100
 MAX_DAYS_SPANNED_TO_BE_VALID = 10
+MAX_EXTRACTED_TIMESTAMPS = 100
 MIN_TIMESTAMP = datetime(2000, 1, 1)
 MID_TIMESTAMP = datetime(2007, 1, 1)
 MAX_TIMESTAMP = datetime(2022, 12, 31)
@@ -24,23 +32,110 @@ PREVIEW_CHARS = int(580 * (1 if args.all_other_files else 1.5))
 LOG_INDENT = '\n         '
 TIMESTAMP_LOG_INDENT = f'{LOG_INDENT}    '
 VAST_HOUSE = 'vast house'  # Michael Wolff article draft about Epstein indicator
+VI_DAILY_NEWS_REGEX = re.compile(r'virgin\s*is[kl][ai]nds\s*daily\s*news', re.IGNORECASE)
+UNINTERESTING_CATEGORES = [
+    ARTS,
+    BOOK,
+    JUNK,
+    SPEECH,
+]
+UNINTERESTING_IDS = [
+    '031794',
+]
+# OtherFiles whose description/hints match these prefixes are not displayed unless --all-other-files is used
+UNINTERESTING_PREFIXES = FINANCIAL_REPORTS_AUTHORS + [
+    'article about',
+    ARTICLE_DRAFT,
+    'Aviation International',
+    BBC,
+    BLOOMBERG,
+    'Boston Globe',
+    BROCKMAN_INC,
+    CHINA_DAILY,
+    CNN,
+    'completely redacted',
+    CVRA,
+    DAILY_MAIL,
+    DAILY_TELEGRAPH,
+    DAVID_SCHOEN_CVRA_LEXIS_SEARCH[0:-12],  # Because date at end :(
+    DERSH_GIUFFRE_TWEET,
+    'Financial Times',
+    'Forbes',
+    'Frontlines',
+    'Future Science',
+    'Globe and Mail',
+    GORDON_GETTY,
+    f"{HARVARD} Econ",
+    HARVARD_POETRY,
+    'Inference',
+    JASTA,
+    'JetGala',
+    JOHN_BOLTON_PRESS_CLIPPING,
+    'Journal of Criminal',
+    LA_TIMES,
+    'Litigation Daily',
+    LAWRENCE_KRAUSS,
+    'MarketWatch',
+    MARTIN_NOWAK,
+    NOBEL_CHARITABLE_TRUST,
+    'Nautilus',
+    'New Yorker',
+    NYT_ARTICLE,
+    NYT_COLUMN,
+    PALM_BEACH_CODE_ENFORCEMENT,
+    PALM_BEACH_DAILY_ARTICLE,
+    PALM_BEACH_POST_ARTICLE,
+    PALM_BEACH_TSV,
+    PALM_BEACH_WATER_COMMITTEE,
+    PAUL_KRASSNER,
+    PEGGY_SIEGAL,
+    'Politifact',
+    'Rafanelli',
+    ROBERT_LAWRENCE_KUHN,
+    ROBERT_TRIVERS,
+    'SCMP',
+    'SciencExpress',
+    'Scowcroft',
+    SHIMON_POST_ARTICLE,
+    SINGLE_PAGE,
+    STACEY_PLASKETT,
+    TERJE_ROD_LARSEN,
+    TEXT_OF_US_LAW,
+    TRANSLATION,
+    TWEET,
+    THE_REAL_DEAL_ARTICLE,
+    TRUMP_DISCLOSURES,
+    UBS_CIO_REPORT,
+    UN_GENERAL_ASSEMBLY,
+    'U.S. News',
+    'US Office',
+    'Vanity Fair',
+    VI_DAILY_NEWS_ARTICLE,
+    WAPO,
+]
 @dataclass
 class OtherFile(Document):
     """File that is not an email, an iMessage log, or JSON data."""
-    def configured_description(self) -> str | None:
-        """Overloads superclass method."""
-        if self.config is None:
-            return None
+    def __post_init__(self):
+        super().__post_init__()
-        pieces = [p for p in [self.config.author, self.config.description] if p]
-        return ' '.join(pieces) if pieces else None
+        if self.config is None and VI_DAILY_NEWS_REGEX.search(self.text):
+            self.log(f"Creating synthetic config for VI Daily News article...", logging.INFO)
+            self.config = DocCfg(id=self.file_id, description=VI_DAILY_NEWS_ARTICLE, category=ARTICLE)
-    def description(self) -> Text:
-        """One line summary mostly for logging."""
-        return super().description().append(CLOSE_PROPERTIES_CHAR)
+    def category(self) -> str | None:
+        return self.config and self.config.category
+    def configured_description(self) -> str | None:
+        """Overloads superclass method."""
+        if self.config is not None:
+            return self.config.info_str()
     def description_panel(self, include_hints=True) -> Panel:
         """Panelized description() with info_txt(), used in search results."""
@@ -57,13 +152,22 @@ class OtherFile(Document):
             return Text(escape(self.preview_text()))
     def is_interesting(self):
-        """False for lame prefixes and duplicates."""
+        """False for lame prefixes, duplicates, and other boring files."""
         hints = self.hints()
         if self.is_duplicate:
             return False
+        elif self.file_id in UNINTERESTING_IDS:
+            return False
         elif len(hints) == 0:
             return True
+        elif self.config:
+            if self.config.is_interesting:
+                return True
+            elif self.category() == FINANCE and self.author is not None:
+                return False
+            elif self.category() in UNINTERESTING_CATEGORES:
+                return False
         for prefix in UNINTERESTING_PREFIXES:
             if hints[0].plain.startswith(prefix):
@@ -74,6 +178,10 @@ class OtherFile(Document):
     def preview_text(self) -> str:
         return WHITESPACE_REGEX.sub(' ', self.text)[0:PREVIEW_CHARS]
+    def summary(self) -> Text:
+        """One line summary mostly for logging."""
+        return super().summary().append(CLOSE_PROPERTIES_CHAR)
     def _extract_timestamp(self) -> datetime | None:
         """Return configured timestamp or value extracted by scanning text with datefinder."""
         if self.config and self.config.timestamp:
@@ -98,7 +206,9 @@ class OtherFile(Document):
                 logger.warning(f"Error while iterating through datefinder.find_dates(): {e}")
         if len(timestamps) == 0:
-            self.log_top_lines(15, msg=f"{self.file_id}: No timestamps found", level=logging.INFO)
+            if not self.is_duplicate and VAST_HOUSE not in self.text:
+                self.log_top_lines(15, msg=f"No timestamps found", level=logging.INFO)
             return None
         elif len(timestamps) == 1:
             return timestamps[0]
@@ -113,5 +223,43 @@ class OtherFile(Document):
         timestamps_log_msg += TIMESTAMP_LOG_INDENT.join([str(dt) for dt in timestamps])
         if num_days_spanned > MAX_DAYS_SPANNED_TO_BE_VALID and VAST_HOUSE not in self.text:
-            log_level = logging.DEBUG if VAST_HOUSE in self.text else logging.INFO
-            self.log_top_lines(15, msg=timestamps_log_msg, level=log_level)
+            self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
+    @staticmethod
+    def build_table(docs: list['OtherFile']) -> Table:
+        """Build a table of OtherFile documents."""
+        table = Table(header_style='bold', show_lines=True)
+        table.add_column('File', justify='center', width=FILENAME_LENGTH)
+        table.add_column('Date', justify='center')
+        table.add_column('Size', justify='center')
+        table.add_column('Type', justify='center')
+        table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
+        for doc in docs:
+            link_and_info = [doc.raw_document_link_txt()]
+            category = doc.category()
+            date_str = doc.date_str()
+            if doc.is_duplicate:
+                preview_text = doc.duplicate_file_txt()
+                row_style = ' dim'
+            else:
+                link_and_info += doc.hints()
+                preview_text = doc.highlighted_preview_text()
+                row_style = ''
+            if category:
+                category_txt = Text(category, get_style_for_category(category) or 'wheat4')
+            else:
+                category_txt = Text('')
+            table.add_row(
+                Group(*link_and_info),
+                Text(date_str, style=TIMESTAMP_DIM) if date_str else QUESTION_MARK_TXT,
+                doc.file_size_str(),
+                category_txt,
+                preview_text,
+                style=row_style
+            )
+        return table

epstein-files 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

epstein-files 1.0.0py3-none-any.whl → 1.0.1py3-none-any.whl