PyPI - epstein-files - Versions diffs - 1.0.10__py3-none-any.whl → 1.0.12__py3-none-any.whl - Mend

epstein-files 1.0.10py3-none-any.whl → 1.0.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

epstein_files/__init__.py +7 -9
epstein_files/documents/communication.py +2 -2
epstein_files/documents/document.py +94 -81
epstein_files/documents/email.py +47 -5
epstein_files/documents/imessage/text_message.py +4 -13
epstein_files/documents/json_file.py +13 -1
epstein_files/documents/messenger_log.py +32 -19
epstein_files/documents/other_file.py +67 -44
epstein_files/epstein_files.py +22 -15
epstein_files/util/constant/names.py +11 -10
epstein_files/util/constant/strings.py +2 -1
epstein_files/util/constants.py +98 -88
epstein_files/util/data.py +1 -1
epstein_files/util/doc_cfg.py +32 -62
epstein_files/util/env.py +29 -17
epstein_files/util/file_helper.py +12 -29
epstein_files/util/highlighted_group.py +34 -17
epstein_files/util/logging.py +1 -7
epstein_files/util/output.py +13 -8
epstein_files/util/rich.py +15 -10
epstein_files/util/word_count.py +65 -5
{epstein_files-1.0.10.dist-info → epstein_files-1.0.12.dist-info}/METADATA +1 -1
epstein_files-1.0.12.dist-info/RECORD +33 -0
epstein_files/count_words.py +0 -72
epstein_files-1.0.10.dist-info/RECORD +0 -34
{epstein_files-1.0.10.dist-info → epstein_files-1.0.12.dist-info}/LICENSE +0 -0
{epstein_files-1.0.10.dist-info → epstein_files-1.0.12.dist-info}/WHEEL +0 -0
{epstein_files-1.0.10.dist-info → epstein_files-1.0.12.dist-info}/entry_points.txt +0 -0

epstein_files/__init__.py CHANGED Viewed

@@ -1,9 +1,7 @@
 #!/usr/bin/env python
 """
 Reformat Epstein text message files for readability and count email senders.
-For use with iMessage log files from https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_
-Install: 'poetry install'
     Run: 'EPSTEIN_DOCS_DIR=/path/to/TXT epstein_generate'
 """
 from sys import exit
@@ -15,7 +13,6 @@ from rich.padding import Padding
 from rich.panel import Panel
 from rich.text import Text
-from epstein_files.count_words import write_word_counts_html
 from epstein_files.epstein_files import EpsteinFiles, document_cls
 from epstein_files.documents.document import INFO_PADDING, Document
 from epstein_files.documents.email import Email
@@ -23,10 +20,11 @@ from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_
 from epstein_files.util.env import args, specified_names
 from epstein_files.util.file_helper import coerce_file_path, extract_file_id
 from epstein_files.util.logging import logger
-from epstein_files.util.output import (print_emails, print_json_files, print_json_metadata, print_json_stats,
-     print_text_messages, write_urls)
+from epstein_files.util.output import (print_emails, print_json_files, print_json_stats,
+     print_text_messages, write_json_metadata, write_urls)
 from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
 from epstein_files.util.timer import Timer
+from epstein_files.util.word_count import write_word_counts_html
 def generate_html() -> None:
@@ -39,9 +37,9 @@ def generate_html() -> None:
     epstein_files = EpsteinFiles.get_files(timer)
     if args.json_metadata:
-        print_json_metadata(epstein_files)
+        write_json_metadata(epstein_files)
         exit()
-    elif args.output_json_files:
+    elif args.json_files:
         print_json_files(epstein_files)
         exit()
@@ -58,7 +56,7 @@ def generate_html() -> None:
         emails_printed = print_emails(epstein_files)
         timer.print_at_checkpoint(f"Printed {emails_printed:,} emails")
-    if args.output_other_files:
+    if args.output_other:
         files_printed = epstein_files.print_other_files_table()
         timer.print_at_checkpoint(f"Printed {len(files_printed)} other files")
@@ -96,7 +94,7 @@ def epstein_search():
                 console.print(search_result.document)
             else:
-                console.print(search_result.document.description_panel())
+                console.print(search_result.document.summary_panel())
                 for matching_line in search_result.lines:
                     line_txt = matching_line.__rich__()

epstein_files/documents/communication.py CHANGED Viewed

@@ -34,9 +34,9 @@ class Communication(Document):
     def is_attribution_uncertain(self) -> bool:
         return bool(self.config and self.config.is_attribution_uncertain)
-    def raw_document_link_txt(self, _style: str = '', include_alt_link: bool = True) -> Text:
+    def external_links(self, _style: str = '', include_alt_link: bool = True) -> Text:
         """Overrides super() method to apply self.author_style."""
-        return super().raw_document_link_txt(self.author_style, include_alt_link=include_alt_link)
+        return super().external_links(self.author_style, include_alt_link=include_alt_link)
     def summary(self) -> Text:
         return self._summary().append(CLOSE_PROPERTIES_CHAR)

epstein_files/documents/document.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import logging
 import re
+from copy import deepcopy
 from dataclasses import asdict, dataclass, field
 from datetime import datetime
 from pathlib import Path
@@ -15,13 +16,13 @@ from epstein_files.util.constant.names import *
 from epstein_files.util.constant.strings import *
 from epstein_files.util.constant.urls import *
 from epstein_files.util.constants import ALL_FILE_CONFIGS, FALLBACK_TIMESTAMP
-from epstein_files.util.data import collapse_newlines, date_str, iso_timestamp, listify, patternize, without_falsey
-from epstein_files.util.doc_cfg import EmailCfg, DocCfg, Metadata, TextCfg
-from epstein_files.util.env import args
-from epstein_files.util.file_helper import (DOCS_DIR, file_stem_for_id, extract_file_id, file_size,
+from epstein_files.util.data import collapse_newlines, date_str, patternize, remove_time_from_timestamp_str, without_falsey
+from epstein_files.util.doc_cfg import DUPE_TYPE_STRS, EmailCfg, DocCfg, Metadata, TextCfg
+from epstein_files.util.env import DOCS_DIR, args
+from epstein_files.util.file_helper import (file_stem_for_id, extract_file_id, file_size,
      file_size_str, is_local_extract_file)
 from epstein_files.util.logging import DOC_TYPE_STYLES, FILENAME_STYLE, logger
-from epstein_files.util.rich import SYMBOL_STYLE, console, highlighter, key_value_txt, link_text_obj
+from epstein_files.util.rich import INFO_STYLE, SYMBOL_STYLE, console, highlighter, key_value_txt, link_text_obj
 from epstein_files.util.search_result import MatchedLine
 CLOSE_PROPERTIES_CHAR = ']'
@@ -30,7 +31,6 @@ INFO_INDENT = 2
 INFO_PADDING = (0, 0, 0, INFO_INDENT)
 MAX_TOP_LINES_LEN = 4000  # Only for logging
 MIN_DOCUMENT_ID = 10477
-LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
 WHITESPACE_REGEX = re.compile(r"\s{2,}|\t|\n", re.MULTILINE)
 MIN_TIMESTAMP = datetime(1991, 1, 1)
@@ -59,14 +59,27 @@ OCR_REPAIRS = {
 @dataclass
 class Document:
-    """Base class for all Epstein Files documents."""
+    """
+    Base class for all Epstein Files documents.
+    Attributes:
+        file_path (Path): Local path to file
+        author (str | None): Who is responsible for the text in the file
+        config (DocCfg): Information about this fil
+        file_id (str): 6 digit (or 8 digits if it's a local extract file) string ID
+        filename (str): File's basename
+        length (int): Number of characters in the file after all the cleanup
+        lines (str): Number of lines in the file after all the cleanup
+        text (str): Contents of the file
+        timestamp (datetime | None): When the file was originally created
+        url_slug (str): Version of the filename that works in links to epsteinify etc.
+    """
     file_path: Path
     # Optional fields
     author: str | None = None
     config: EmailCfg | DocCfg | TextCfg | None = None
     file_id: str = field(init=False)
     filename: str = field(init=False)
-    is_duplicate: bool = False
     length: int = field(init=False)
     lines: list[str] = field(init=False)
     num_lines: int = field(init=False)
@@ -74,22 +87,16 @@ class Document:
     timestamp: datetime | None = None
     url_slug: str = field(init=False)  # e.g. 'HOUSE_OVERSIGHT_123456
-    # Class variable overridden in JsonFile
-    strip_whitespace: ClassVar[bool] = True
+    # Class variables
+    include_description_in_summary_panel: ClassVar[bool] = False
+    strip_whitespace: ClassVar[bool] = True  # Overridden in JsonFile
     def __post_init__(self):
         self.filename = self.file_path.name
         self.file_id = extract_file_id(self.filename)
-        self.config = ALL_FILE_CONFIGS.get(self.file_id)
-        self.is_duplicate = bool(self.config.dupe_of_id) if self.config else False
+        self.config = self.config or deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
-        if self.is_local_extract_file():
-            self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
-            # Coerce FileConfig for court docs etc. to MessageCfg for email files extracted from that document
-            if self.class_name() == EMAIL_CLASS and self.config and not isinstance(self.config, EmailCfg):
-                self.config = EmailCfg.from_doc_cfg(self.config)
-        else:
+        if 'url_slug' not in vars(self):
             self.url_slug = self.file_path.stem
         self._set_computed_fields(text=self.text or self._load_file())
@@ -97,11 +104,7 @@ class Document:
         self._extract_author()
         self.timestamp = self._extract_timestamp()
-    def class_name(self) -> str:
-        """Annoying workaround for circular import issues and isinstance()."""
-        return str(type(self).__name__)
-    def configured_description(self) -> str | None:
+    def config_description(self) -> str | None:
         """Overloaded in OtherFile."""
         if self.config and self.config.description:
             return f"({self.config.description})"
@@ -109,40 +112,51 @@ class Document:
     def date_str(self) -> str | None:
         return date_str(self.timestamp)
-    def description_panel(self, include_hints: bool = False) -> Panel:
-        """Panelized description() with info_txt(), used in search results."""
-        hints = [Text('', style='italic').append(h) for h in (self.hints() if include_hints else [])]
-        return Panel(Group(*([self.summary()] + hints)), border_style=self.document_type_style(), expand=False)
-    def document_type_style(self) -> str:
-        return DOC_TYPE_STYLES[self.class_name()]
     def duplicate_file_txt(self) -> Text:
         """If the file is a dupe make a nice message to explain what file it's a duplicate of."""
-        if not self.config or not self.config.dupe_of_id:
+        if not self.config or not self.config.dupe_of_id or self.config.dupe_type is None:
             raise RuntimeError(f"duplicate_file_txt() called on {self.summary()} but not a dupe! config:\n\n{self.config}")
-        txt = Text(f"Not showing ", style='white dim italic').append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
-        txt.append(f" because it's {self.config.duplicate_reason()} ")
+        txt = Text(f"Not showing ", style=INFO_STYLE).append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
+        txt.append(f" because it's {DUPE_TYPE_STRS[self.config.dupe_type]} ")
         return txt.append(epstein_media_doc_link_txt(self.config.dupe_of_id, style='royal_blue1'))
     def epsteinify_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
         """Create a Text obj link to this document on epsteinify.com."""
-        return link_text_obj(epsteinify_doc_url(self.url_slug), link_txt or self.url_slug, style)
+        return link_text_obj(epsteinify_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
     def epstein_media_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
         """Create a Text obj link to this document on epstein.media."""
-        return link_text_obj(epstein_media_doc_url(self.url_slug), link_txt or self.url_slug, style)
+        return link_text_obj(epstein_media_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
     def epstein_web_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
         """Create a Text obj link to this document on EpsteinWeb."""
-        return link_text_obj(epstein_web_doc_url(self.url_slug), link_txt or self.url_slug, style)
+        return link_text_obj(epstein_web_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
+    def external_links(self, style: str = '', include_alt_link: bool = False) -> Text:
+        """Returns colored links to epstein.media and and epsteinweb in a Text object."""
+        txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
+        if args.use_epstein_web:
+            txt.append(self.epstein_web_link(style=style))
+            if include_alt_link:
+                txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
+                txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
+        else:
+            txt.append(self.epstein_media_link(style=style))
+            if include_alt_link:
+                txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
+                txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
+        return txt
     def file_info_panel(self) -> Group:
-        """Panel with filename linking to raw file plus any hints/info about the file."""
-        panel = Panel(self.raw_document_link_txt(include_alt_link=True), border_style=self._border_style(), expand=False)
-        hints = [Padding(hint, INFO_PADDING) for hint in self.hints()]
-        return Group(*([panel] + hints))
+        """Panel with filename linking to raw file plus any additional info about the file."""
+        panel = Panel(self.external_links(include_alt_link=True), border_style=self._border_style(), expand=False)
+        padded_info = [Padding(sentence, INFO_PADDING) for sentence in self.info()]
+        return Group(*([panel] + padded_info))
     def file_size(self) -> int:
         return file_size(self.file_path)
@@ -150,34 +164,33 @@ class Document:
     def file_size_str(self) -> str:
         return file_size_str(self.file_path)
-    def hints(self) -> list[Text]:
-        """Additional info about the Document (author, description, and so on) to be desplayed in doc header."""
-        hints = listify(self.info_txt())
-        hint_msg = self.configured_description()
-        if hint_msg:
-            hints.append(highlighter(Text(hint_msg, style='white dim italic')))
-        return without_falsey(hints)
+    def info(self) -> list[Text]:
+        """0 to 2 sentences containing the info_txt() as well as any configured description."""
+        return without_falsey([
+            self.info_txt(),
+            highlighter(Text(self.config_description(), style=INFO_STYLE)) if self.config_description() else None
+        ])
     def info_txt(self) -> Text | None:
         """Secondary info about this file (recipients, level of certainty, etc). Overload in subclasses."""
         return None
+    def is_duplicate(self) -> bool:
+        return bool(self.config and self.config.dupe_of_id)
     def is_local_extract_file(self) -> bool:
         """True if file created by extracting text from a court doc (identifiable from filename e.g. HOUSE_OVERSIGHT_012345_1.txt)."""
         return is_local_extract_file(self.filename)
-    def log(self, msg: str, level: int = logging.WARNING):
+    def log(self, msg: str, level: int = logging.INFO):
         """Log with filename as a prefix."""
-        logger.log(level, f"{self.url_slug} {msg}")
+        logger.log(level, f"{self.file_path.stem} {msg}")
     def log_top_lines(self, n: int = 10, msg: str = '', level: int = logging.INFO) -> None:
         """Log first 'n' lines of self.text at 'level'. 'msg' can be optionally provided."""
         separator = '\n\n' if '\n' in msg else '. '
         msg = (msg + separator) if msg else ''
-        msg = f"{self.filename}: {msg}First {n} lines:"
-        logger.log(level, f"{msg}\n\n{self.top_lines(n)}\n")
+        self.log(f"{msg}First {n} lines:\n\n{self.top_lines(n)}\n", level)
     def matching_lines(self, _pattern: re.Pattern | str) -> list[MatchedLine]:
         """Return lines matching a regex as colored list[Text]."""
@@ -189,13 +202,13 @@ class Document:
         metadata.update({k: v for k, v in asdict(self).items() if k in METADATA_FIELDS and v is not None})
         metadata['bytes'] = self.file_size()
         metadata['filename'] = f"{self.url_slug}.txt"
-        metadata['type'] = self.class_name()
+        metadata['type'] = self._class_name()
         if self.is_local_extract_file():
             metadata['extracted_file'] = {
-                'explanation': 'This file was extracted from a court filing, not distributed directly. A copy can be found on github.',
-                'extracted_from_file': self.url_slug + '.txt',
-                'extracted_file_url': extracted_file_url(self.filename),
+                'explanation': 'Manually extracted from one of the court filings.',
+                'extracted_from': self.url_slug + '.txt',
+                'url': extracted_file_url(self.filename),
             }
         return metadata
@@ -204,25 +217,6 @@ class Document:
         with open(self.file_path) as f:
             return f.read()
-    def raw_document_link_txt(self, style: str = '', include_alt_link: bool = False) -> Text:
-        """Returns colored links to epstein.media and and epsteinweb in a Text object."""
-        txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
-        if args.use_epstein_web_links:
-            txt.append(self.epstein_web_link(style=style))
-            if include_alt_link:
-                txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
-                txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
-        else:
-            txt.append(self.epstein_media_link(style=style))
-            if include_alt_link:
-                txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
-                txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
-        return txt
     def repair_ocr_text(self, repairs: dict[str | re.Pattern, str], text: str) -> str:
         """Apply a dict of repairs (key is pattern or string, value is replacement string) to text."""
         for k, v in repairs.items():
@@ -234,7 +228,7 @@ class Document:
         return text
     def sort_key(self) -> tuple[datetime, str, int]:
-        if self.config and self.config.dupe_of_id:
+        if self.is_duplicate():
             sort_id = self.config.dupe_of_id
             dupe_idx = 1
         else:
@@ -245,11 +239,11 @@ class Document:
     def summary(self) -> Text:
         """Summary of this file for logging. Brackets are left open for subclasses to add stuff."""
-        txt = Text('').append(self.class_name(), style=self.document_type_style())
+        txt = Text('').append(self._class_name(), style=self._class_style())
         txt.append(f" {self.url_slug}", style=FILENAME_STYLE)
         if self.timestamp:
-            timestamp_str = iso_timestamp(self.timestamp).removesuffix(' 00:00:00')
+            timestamp_str = remove_time_from_timestamp_str(self.timestamp)
             txt.append(' (', style=SYMBOL_STYLE)
             txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
@@ -261,13 +255,32 @@ class Document:
         return txt
+    def summary_panel(self) -> Panel:
+        """Panelized description() with info_txt(), used in search results."""
+        sentences = [self.summary()]
+        if self.include_description_in_summary_panel:
+            sentences += [Text('', style='italic').append(h) for h in self.info()]
+        return Panel(Group(*sentences), border_style=self._class_style(), expand=False)
     def top_lines(self, n: int = 10) -> str:
         return '\n'.join(self.lines[0:n])[:MAX_TOP_LINES_LEN]
+    def warn(self, msg: str) -> None:
+        self.log(msg, level=logging.WARNING)
     def _border_style(self) -> str:
         """Should be overloaded in subclasses."""
         return 'white'
+    def _class_name(self) -> str:
+        """Annoying workaround for circular import issues and isinstance()."""
+        return str(type(self).__name__)
+    def _class_style(self) -> str:
+        return DOC_TYPE_STYLES[self._class_name()]
     def _extract_author(self) -> None:
         """Get author from config. Extended in Email subclass to also check headers."""
         if self.config and self.config.author:

epstein_files/documents/email.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import logging
 import re
+from copy import deepcopy
 from dataclasses import asdict, dataclass, field
 from datetime import datetime
 from typing import ClassVar, cast
@@ -21,6 +22,7 @@ from epstein_files.util.constants import *
 from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes, extract_last_name,
      flatten, remove_timezone, uniquify)
 from epstein_files.util.doc_cfg import EmailCfg, Metadata
+from epstein_files.util.file_helper import extract_file_id, file_stem_for_id
 from epstein_files.util.highlighted_group import get_style_for_name
 from epstein_files.util.logging import logger
 from epstein_files.util.rich import *
@@ -35,9 +37,11 @@ REPLY_TEXT_REGEX = re.compile(rf"^(.*?){REPLY_LINE_PATTERN}", re.DOTALL | re.IGN
 BAD_TIMEZONE_REGEX = re.compile(fr'\((UTC|GMT\+\d\d:\d\d)\)|{REDACTED}')
 DATE_HEADER_REGEX = re.compile(r'(?:Date|Sent):? +(?!by|from|to|via)([^\n]{6,})\n')
 TIMESTAMP_LINE_REGEX = re.compile(r"\d+:\d+")
+LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
 SUPPRESS_LOGS_FOR_AUTHORS = ['Undisclosed recipients:', 'undisclosed-recipients:', 'Multiple Senders Multiple Senders']
 REWRITTEN_HEADER_MSG = "(janky OCR header fields were prettified, check source if something seems off)"
+APPEARS_IN = 'Appears in'
 MAX_CHARS_TO_PRINT = 4000
 MAX_NUM_HEADER_LINES = 14
 MAX_QUOTED_REPLIES = 2
@@ -248,6 +252,7 @@ KRASSNER_RECIPIENTS = uniquify(flatten([ALL_FILE_CONFIGS[id].recipients for id i
 # No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
 USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIPIENTS + [
+    'Alan Dlugash',                            # CCed with Richard Kahn
     'Alan Rogers',                           # Random CC
     'Andrew Friendly',                       # Presumably some relation of Kelly Friendly
     'BS Stern',                              # A random fwd of email we have
@@ -264,6 +269,8 @@ USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIP
     'Lyn Fontanilla',                        # Random CC
     'Mark Albert',                           # Random CC
     'Matthew Schafer',                       # Random CC
+    MICHAEL_BUCHHOLTZ,                       # Terry Kafka CC
+    'Nancy Dahl',                            # covered by Lawrence Krauss (her husband)
     'Michael Simmons',                       # Random CC
     'Nancy Portland',                        # Lawrence Krauss CC
     'Oliver Goodenough',                     # Robert Trivers CC
@@ -318,6 +325,17 @@ class Email(Communication):
     rewritten_header_ids: ClassVar[set[str]] = set([])
     def __post_init__(self):
+        self.filename = self.file_path.name
+        self.file_id = extract_file_id(self.filename)
+        # Special handling for copying properties out of the config for the document this one was extracted from
+        if self.is_local_extract_file():
+            self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
+            extracted_from_doc_id = self.url_slug.split('_')[-1]
+            if extracted_from_doc_id in ALL_FILE_CONFIGS:
+                self._set_config_for_extracted_file(ALL_FILE_CONFIGS[extracted_from_doc_id])
         super().__post_init__()
         try:
@@ -570,7 +588,7 @@ class Email(Communication):
             self._merge_lines(3)  # Merge 4th and 5th rows
         elif self.file_id in '026609 029402 032405 022695'.split():
             self._merge_lines(4)  # Merge 5th and 6th rows
-        elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381']:
+        elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381', '033357']:
             self._merge_lines(2, 4)
         elif self.file_id in ['029154', '029163']:
             self._merge_lines(2, 5)
@@ -591,6 +609,10 @@ class Email(Communication):
             self._merge_lines(7, 9)
         elif self.file_id == '030299':
             self._merge_lines(7, 10)
+        elif self.file_id == '014860':
+            self._merge_lines(3)
+            self._merge_lines(4)
+            self._merge_lines(4)
         elif self.file_id == '029977':
             self._set_computed_fields(text=self.text.replace('Sent 9/28/2012 2:41:02 PM', 'Sent: 9/28/2012 2:41:02 PM'))
@@ -606,9 +628,8 @@ class Email(Communication):
             self._remove_line(3)
         if old_text != self.text:
-            self.log(f"Modified text, old:\n\n" + '\n'.join(old_text.split('\n')[0:12]) + '\n', logging.INFO)
-            self.log_top_lines(12, 'Result of modifications', logging.INFO)
-            self.log('', logging.INFO)
+            self.log(f"Modified text, old:\n\n" + '\n'.join(old_text.split('\n')[0:12]) + '\n')
+            self.log_top_lines(12, 'Result of modifications')
         lines = self.repair_ocr_text(OCR_REPAIRS, self.text).split('\n')
         new_lines = []
@@ -646,6 +667,27 @@ class Email(Communication):
             sent_from = sent_from_match.group(0)
             return 'S' + sent_from[1:] if sent_from.startswith('sent') else sent_from
+    def _set_config_for_extracted_file(self, extracted_from_doc_cfg: DocCfg) -> None:
+        """Copy info from original config for file this document was extracted from."""
+        if self.file_id in ALL_FILE_CONFIGS:
+            self.config = cast(EmailCfg, deepcopy(ALL_FILE_CONFIGS[self.file_id]))
+            self.warn(f"Merging existing config for {self.file_id} with config for file this document was extracted from")
+        else:
+            self.config = EmailCfg(id=self.file_id)
+        extracted_from_description = extracted_from_doc_cfg.complete_description()
+        if extracted_from_description:
+            extracted_description = f"{APPEARS_IN} {extracted_from_description}"
+            if self.config.description:
+                self.warn(f"Overwriting description '{self.config.description}' with extract description '{self.config.description}'")
+            self.config.description = extracted_description
+        self.config.is_interesting = self.config.is_interesting or extracted_from_doc_cfg.is_interesting
+        self.warn(f"Constructed synthetic config: {self.config}")
     def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
         logger.debug(f"Printing '{self.filename}'...")
         yield self.file_info_panel()
@@ -697,7 +739,7 @@ class Email(Communication):
         yield Padding(email_txt_panel, (0, 0, 1, INFO_INDENT))
         if should_rewrite_header:
-            self.log_top_lines(self.header.num_header_rows + 4, f'Original header:', logging.INFO)
+            self.log_top_lines(self.header.num_header_rows + 4, f'Original header:')
     @staticmethod
     def build_table(emails: list['Email'], _author: str | None) -> Table:

epstein_files/documents/imessage/text_message.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import re
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from datetime import datetime
 from rich.text import Text
-from epstein_files.util.constant.names import JEFFREY_EPSTEIN, ANTHONY_SCARAMUCCI, CELINA_DUBIN, EVA, STEVE_BANNON, UNKNOWN
+from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN
 from epstein_files.util.data import extract_last_name
 from epstein_files.util.highlighted_group import get_style_for_name
 from epstein_files.util.logging import logger
@@ -19,15 +19,6 @@ DISPLAY_LAST_NAME_ONLY = [
     STEVE_BANNON,
 ]
-PHONE_NUMBER_MAPPING = {
-    '+19174393646': ANTHONY_SCARAMUCCI,
-    '+13109906526': STEVE_BANNON,
-    '+16463880059': EVA,
-    '+13108737937': CELINA_DUBIN,
-    '+13108802851': STEVE_BANNON,
-}
 TEXTER_MAPPING = {
     'e:': JEFFREY_EPSTEIN,
     'e:jeeitunes@gmail.com': JEFFREY_EPSTEIN,
@@ -48,13 +39,13 @@ class TextMessage:
         if self.author is None:
             self.author_str = UNKNOWN
-        elif self.author in DISPLAY_LAST_NAME_ONLY:
+        elif self.author in DISPLAY_LAST_NAME_ONLY and not self.author_str:
             self.author_str = extract_last_name(self.author)
         else:
             self.author_str = self.author_str or self.author
         if not self.id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
-            self.author_str = self.author + ' (?)'
+            self.author_str += ' (?)'
     def timestamp(self) -> datetime:
         return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)

epstein_files/documents/json_file.py CHANGED Viewed

@@ -8,11 +8,23 @@ from rich.text import Text
 from epstein_files.documents.other_file import OtherFile
 from epstein_files.util.constant.strings import JSON
+from epstein_files.util.rich import INFO_STYLE
+TEXT_FIELDS = [
+    'caption',
+    'standard',
+    'subtitle',
+    'text',
+    'title',
+    'to',
+]
 @dataclass
 class JsonFile(OtherFile):
     """File containing JSON data."""
+    include_description_in_summary_panel: ClassVar[bool] = False
     strip_whitespace: ClassVar[bool] = False
     def __post_init__(self):
@@ -27,7 +39,7 @@ class JsonFile(OtherFile):
         return JSON
     def info_txt(self) -> Text | None:
-        return Text(f"JSON file, possibly iMessage or similar app metadata", style='white dim italic')
+        return Text(f"JSON file, contains preview data for links sent a messaging app", style=INFO_STYLE)
     def is_interesting(self):
         return False

epstein-files 1.0.10__py3-none-any.whl → 1.0.12__py3-none-any.whl

epstein-files 1.0.10py3-none-any.whl → 1.0.12py3-none-any.whl