PyPI - epstein-files - Versions diffs - 1.0.12__py3-none-any.whl → 1.0.14__py3-none-any.whl - Mend

epstein-files 1.0.12py3-none-any.whl → 1.0.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

epstein_files/__init__.py +11 -6
epstein_files/documents/communication.py +2 -2
epstein_files/documents/document.py +60 -50
epstein_files/documents/email.py +40 -34
epstein_files/documents/imessage/text_message.py +4 -4
epstein_files/documents/json_file.py +9 -3
epstein_files/documents/messenger_log.py +22 -19
epstein_files/documents/other_file.py +50 -71
epstein_files/epstein_files.py +108 -71
epstein_files/util/constant/names.py +5 -3
epstein_files/util/constant/strings.py +1 -1
epstein_files/util/constant/urls.py +13 -8
epstein_files/util/constants.py +66 -46
epstein_files/util/data.py +3 -1
epstein_files/util/doc_cfg.py +9 -9
epstein_files/util/env.py +2 -5
epstein_files/util/highlighted_group.py +25 -31
epstein_files/util/output.py +15 -30
epstein_files/util/rich.py +40 -31
epstein_files/util/word_count.py +1 -1
{epstein_files-1.0.12.dist-info → epstein_files-1.0.14.dist-info}/METADATA +10 -3
epstein_files-1.0.14.dist-info/RECORD +33 -0
epstein_files-1.0.12.dist-info/RECORD +0 -33
{epstein_files-1.0.12.dist-info → epstein_files-1.0.14.dist-info}/LICENSE +0 -0
{epstein_files-1.0.12.dist-info → epstein_files-1.0.14.dist-info}/WHEEL +0 -0
{epstein_files-1.0.12.dist-info → epstein_files-1.0.14.dist-info}/entry_points.txt +0 -0

epstein_files/__init__.py CHANGED Viewed

@@ -21,7 +21,7 @@ from epstein_files.util.env import args, specified_names
 from epstein_files.util.file_helper import coerce_file_path, extract_file_id
 from epstein_files.util.logging import logger
 from epstein_files.util.output import (print_emails, print_json_files, print_json_stats,
-     print_text_messages, write_json_metadata, write_urls)
+     write_json_metadata, write_urls)
 from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
 from epstein_files.util.timer import Timer
 from epstein_files.util.word_count import write_word_counts_html
@@ -49,7 +49,7 @@ def generate_html() -> None:
         exit()
     if args.output_texts:
-        print_text_messages(epstein_files)
+        epstein_files.print_text_messages_section()
         timer.print_at_checkpoint(f'Printed {len(epstein_files.imessage_logs)} text message logs')
     if args.output_emails:
@@ -57,8 +57,13 @@ def generate_html() -> None:
         timer.print_at_checkpoint(f"Printed {emails_printed:,} emails")
     if args.output_other:
-        files_printed = epstein_files.print_other_files_table()
-        timer.print_at_checkpoint(f"Printed {len(files_printed)} other files")
+        if args.uninteresting:
+            files = [f for f in epstein_files.other_files if not f.is_interesting()]
+        else:
+            files = [f for f in epstein_files.other_files if args.all_other_files or f.is_interesting()]
+        epstein_files.print_other_files_section(files)
+        timer.print_at_checkpoint(f"Printed {len(files)} other files (skipped {len(epstein_files.other_files) - len(files)})")
     # Save output
     write_html(ALL_EMAILS_PATH if args.all_emails else TEXT_MSGS_HTML_PATH)
@@ -90,7 +95,7 @@ def epstein_search():
             if args.whole_file:
                 if isinstance(search_result.document, Email):
-                    search_result.document.truncation_allowed = False
+                    search_result.document._truncation_allowed = False
                 console.print(search_result.document)
             else:
@@ -111,7 +116,7 @@ def epstein_show():
     for doc in docs:
         if isinstance(doc, Email):
-            doc.truncation_allowed = False
+            doc._truncation_allowed = False
         console.print('\n', doc, '\n')

epstein_files/documents/communication.py CHANGED Viewed

@@ -34,9 +34,9 @@ class Communication(Document):
     def is_attribution_uncertain(self) -> bool:
         return bool(self.config and self.config.is_attribution_uncertain)
-    def external_links(self, _style: str = '', include_alt_link: bool = True) -> Text:
+    def external_links_txt(self, _style: str = '', include_alt_links: bool = True) -> Text:
         """Overrides super() method to apply self.author_style."""
-        return super().external_links(self.author_style, include_alt_link=include_alt_link)
+        return super().external_links_txt(self.author_style, include_alt_links=include_alt_links)
     def summary(self) -> Text:
         return self._summary().append(CLOSE_PROPERTIES_CHAR)

epstein_files/documents/document.py CHANGED Viewed

@@ -5,7 +5,7 @@ from dataclasses import asdict, dataclass, field
 from datetime import datetime
 from pathlib import Path
 from subprocess import run
-from typing import ClassVar, Sequence, TypeVar
+from typing import Callable, ClassVar, Sequence, TypeVar
 from rich.console import Console, ConsoleOptions, Group, RenderResult
 from rich.padding import Padding
@@ -16,15 +16,15 @@ from epstein_files.util.constant.names import *
 from epstein_files.util.constant.strings import *
 from epstein_files.util.constant.urls import *
 from epstein_files.util.constants import ALL_FILE_CONFIGS, FALLBACK_TIMESTAMP
-from epstein_files.util.data import collapse_newlines, date_str, patternize, remove_time_from_timestamp_str, without_falsey
+from epstein_files.util.data import collapse_newlines, date_str, patternize, remove_zero_time_from_timestamp_str, without_falsey
 from epstein_files.util.doc_cfg import DUPE_TYPE_STRS, EmailCfg, DocCfg, Metadata, TextCfg
 from epstein_files.util.env import DOCS_DIR, args
-from epstein_files.util.file_helper import (file_stem_for_id, extract_file_id, file_size,
-     file_size_str, is_local_extract_file)
+from epstein_files.util.file_helper import extract_file_id, file_size, file_size_str, is_local_extract_file
 from epstein_files.util.logging import DOC_TYPE_STYLES, FILENAME_STYLE, logger
-from epstein_files.util.rich import INFO_STYLE, SYMBOL_STYLE, console, highlighter, key_value_txt, link_text_obj
+from epstein_files.util.rich import INFO_STYLE, SYMBOL_STYLE, console, highlighter, join_texts, key_value_txt, link_text_obj, parenthesize
 from epstein_files.util.search_result import MatchedLine
+ALT_LINK_STYLE = 'white dim'
 CLOSE_PROPERTIES_CHAR = ']'
 HOUSE_OVERSIGHT = HOUSE_OVERSIGHT_PREFIX.replace('_', ' ').strip()
 INFO_INDENT = 2
@@ -46,7 +46,6 @@ FILENAME_MATCH_STYLES = [
 METADATA_FIELDS = [
     'author',
     'file_id',
-    'num_lines',
     'timestamp'
 ]
@@ -68,7 +67,6 @@ class Document:
         config (DocCfg): Information about this fil
         file_id (str): 6 digit (or 8 digits if it's a local extract file) string ID
         filename (str): File's basename
-        length (int): Number of characters in the file after all the cleanup
         lines (str): Number of lines in the file after all the cleanup
         text (str): Contents of the file
         timestamp (datetime | None): When the file was originally created
@@ -80,12 +78,10 @@ class Document:
     config: EmailCfg | DocCfg | TextCfg | None = None
     file_id: str = field(init=False)
     filename: str = field(init=False)
-    length: int = field(init=False)
-    lines: list[str] = field(init=False)
-    num_lines: int = field(init=False)
+    lines: list[str] = field(default_factory=list)
     text: str = ''
     timestamp: datetime | None = None
-    url_slug: str = field(init=False)  # e.g. 'HOUSE_OVERSIGHT_123456
+    url_slug: str = ''
     # Class variables
     include_description_in_summary_panel: ClassVar[bool] = False
@@ -94,12 +90,13 @@ class Document:
     def __post_init__(self):
         self.filename = self.file_path.name
         self.file_id = extract_file_id(self.filename)
+        # config and url_slug could have been pre-set in Email
         self.config = self.config or deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
+        self.url_slug = self.url_slug or self.filename.split('.')[0]
-        if 'url_slug' not in vars(self):
-            self.url_slug = self.file_path.stem
+        if not self.text:
+            self._load_file()
-        self._set_computed_fields(text=self.text or self._load_file())
         self._repair()
         self._extract_author()
         self.timestamp = self._extract_timestamp()
@@ -114,47 +111,49 @@ class Document:
     def duplicate_file_txt(self) -> Text:
         """If the file is a dupe make a nice message to explain what file it's a duplicate of."""
-        if not self.config or not self.config.dupe_of_id or self.config.dupe_type is None:
+        if not self.is_duplicate():
             raise RuntimeError(f"duplicate_file_txt() called on {self.summary()} but not a dupe! config:\n\n{self.config}")
         txt = Text(f"Not showing ", style=INFO_STYLE).append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
         txt.append(f" because it's {DUPE_TYPE_STRS[self.config.dupe_type]} ")
-        return txt.append(epstein_media_doc_link_txt(self.config.dupe_of_id, style='royal_blue1'))
+        return txt.append(epstein_media_doc_link_txt(self.config.duplicate_of_id, style='royal_blue1'))
     def epsteinify_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
-        """Create a Text obj link to this document on epsteinify.com."""
-        return link_text_obj(epsteinify_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
+        return self.external_link(epsteinify_doc_url, style, link_txt)
     def epstein_media_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
-        """Create a Text obj link to this document on epstein.media."""
-        return link_text_obj(epstein_media_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
+        return self.external_link(epstein_media_doc_url, style, link_txt)
     def epstein_web_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
-        """Create a Text obj link to this document on EpsteinWeb."""
-        return link_text_obj(epstein_web_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
+        return self.external_link(epstein_web_doc_url, style, link_txt)
-    def external_links(self, style: str = '', include_alt_link: bool = False) -> Text:
-        """Returns colored links to epstein.media and and epsteinweb in a Text object."""
-        txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
+    def rollcall_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
+        return self.external_link(rollcall_doc_url, style, link_txt)
-        if args.use_epstein_web:
-            txt.append(self.epstein_web_link(style=style))
+    def external_link(self, fxn: Callable[[str], str], style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
+        return link_text_obj(fxn(self.url_slug), link_txt or self.file_path.stem, style)
-            if include_alt_link:
-                txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
-                txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
-        else:
-            txt.append(self.epstein_media_link(style=style))
+    def external_links_txt(self, style: str = '', include_alt_links: bool = False) -> Text:
+        """Returns colored links to epstein.media and alternates in a Text object."""
+        links = [self.epstein_media_link(style=style)]
-            if include_alt_link:
-                txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
-                txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
+        if include_alt_links:
+            links.append(self.epsteinify_link(style=ALT_LINK_STYLE, link_txt=EPSTEINIFY))
+            links.append(self.epstein_web_link(style=ALT_LINK_STYLE, link_txt=EPSTEIN_WEB))
-        return txt
+            if self._class_name() == 'Email':
+                links.append(self.rollcall_link(style=ALT_LINK_STYLE, link_txt=ROLLCALL))
+        links = [links[0]] + [parenthesize(link) for link in links[1:]]
+        base_txt = Text('', style='white' if include_alt_links else ARCHIVE_LINK_COLOR)
+        return base_txt.append(join_texts(links))
+    def file_id_debug_info(self) -> str:
+        return ', '.join([f"{prop}={getattr(self, prop)}" for prop in ['file_id', 'filename', 'url_slug']])
     def file_info_panel(self) -> Group:
         """Panel with filename linking to raw file plus any additional info about the file."""
-        panel = Panel(self.external_links(include_alt_link=True), border_style=self._border_style(), expand=False)
+        panel = Panel(self.external_links_txt(include_alt_links=True), border_style=self._border_style(), expand=False)
         padded_info = [Padding(sentence, INFO_PADDING) for sentence in self.info()]
         return Group(*([panel] + padded_info))
@@ -176,12 +175,15 @@ class Document:
         return None
     def is_duplicate(self) -> bool:
-        return bool(self.config and self.config.dupe_of_id)
+        return bool(self.config and self.config.duplicate_of_id)
     def is_local_extract_file(self) -> bool:
         """True if file created by extracting text from a court doc (identifiable from filename e.g. HOUSE_OVERSIGHT_012345_1.txt)."""
         return is_local_extract_file(self.filename)
+    def length(self) -> int:
+        return len(self.text)
     def log(self, msg: str, level: int = logging.INFO):
         """Log with filename as a prefix."""
         logger.log(level, f"{self.file_path.stem} {msg}")
@@ -202,17 +204,21 @@ class Document:
         metadata.update({k: v for k, v in asdict(self).items() if k in METADATA_FIELDS and v is not None})
         metadata['bytes'] = self.file_size()
         metadata['filename'] = f"{self.url_slug}.txt"
+        metadata['num_lines'] = self.num_lines()
         metadata['type'] = self._class_name()
         if self.is_local_extract_file():
             metadata['extracted_file'] = {
-                'explanation': 'Manually extracted from one of the court filings.',
+                'explanation': 'manually extracted from one of the other files',
                 'extracted_from': self.url_slug + '.txt',
                 'url': extracted_file_url(self.filename),
             }
         return metadata
+    def num_lines(self) -> int:
+        return len(self.lines)
     def raw_text(self) -> str:
         with open(self.file_path) as f:
             return f.read()
@@ -229,7 +235,7 @@ class Document:
     def sort_key(self) -> tuple[datetime, str, int]:
         if self.is_duplicate():
-            sort_id = self.config.dupe_of_id
+            sort_id = self.config.duplicate_of_id
             dupe_idx = 1
         else:
             sort_id = self.file_id
@@ -243,15 +249,15 @@ class Document:
         txt.append(f" {self.url_slug}", style=FILENAME_STYLE)
         if self.timestamp:
-            timestamp_str = remove_time_from_timestamp_str(self.timestamp)
+            timestamp_str = remove_zero_time_from_timestamp_str(self.timestamp).replace('T', ' ')
             txt.append(' (', style=SYMBOL_STYLE)
             txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
         txt.append(' [').append(key_value_txt('size', Text(self.file_size_str(), style='aquamarine1')))
-        txt.append(", ").append(key_value_txt('lines', self.num_lines))
+        txt.append(", ").append(key_value_txt('lines', self.num_lines()))
-        if self.config and self.config.dupe_of_id:
-            txt.append(", ").append(key_value_txt('dupe_of', Text(self.config.dupe_of_id, style='magenta')))
+        if self.config and self.config.duplicate_of_id:
+            txt.append(", ").append(key_value_txt('dupe_of', Text(self.config.duplicate_of_id, style='magenta')))
         return txt
@@ -290,13 +296,19 @@ class Document:
         """Should be implemented in subclasses."""
         pass
-    def _load_file(self) -> str:
+    def _load_file(self) -> None:
         """Remove BOM and HOUSE OVERSIGHT lines, strip whitespace."""
         text = self.raw_text()
         text = text[1:] if (len(text) > 0 and text[0] == '\ufeff') else text  # remove BOM
         text = self.repair_ocr_text(OCR_REPAIRS, text.strip())
-        lines = [l.strip() for l in text.split('\n') if not l.startswith(HOUSE_OVERSIGHT)]
-        return collapse_newlines('\n'.join(lines))
+        lines = [
+            line.strip() if self.strip_whitespace else line for line in text.split('\n')
+            if not line.startswith(HOUSE_OVERSIGHT)
+        ]
+        self.text = collapse_newlines('\n'.join(lines))
+        self.lines = self.text.split('\n')
     def _repair(self) -> None:
         """Can optionally be overloaded in subclasses to further improve self.text."""
@@ -313,9 +325,7 @@ class Document:
         else:
             raise RuntimeError(f"[{self.filename}] Either 'lines' or 'text' arg must be provided (neither was)")
-        self.length = len(self.text)
         self.lines = [line.strip() if self.strip_whitespace else line for line in self.text.split('\n')]
-        self.num_lines = len(self.lines)
     def _write_clean_text(self, output_path: Path) -> None:
         """Write self.text to 'output_path'. Used only for diffing files."""
@@ -328,7 +338,7 @@ class Document:
         with open(output_path, 'w') as f:
             f.write(self.text)
-        logger.warning(f"Wrote {self.length} chars of cleaned {self.filename} to {output_path}.")
+        logger.warning(f"Wrote {self.length()} chars of cleaned {self.filename} to {output_path}.")
     def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
         yield self.file_info_panel()

epstein_files/documents/email.py CHANGED Viewed

@@ -131,13 +131,12 @@ JUNK_EMAILERS = [
     'editorialstaff@flipboard.com',
     'How To Academy',
     'Jokeland',
-    JP_MORGAN_USGIO,
-    'Saved by Internet Explorer 11',
 ]
 MAILING_LISTS = [
     INTELLIGENCE_SQUARED,
     'middle.east.update@hotmail.com',
+    JP_MORGAN_USGIO,
 ]
 TRUNCATE_ALL_EMAILS_FROM = JUNK_EMAILERS + MAILING_LISTS + [
@@ -274,11 +273,9 @@ USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIP
     'Michael Simmons',                       # Random CC
     'Nancy Portland',                        # Lawrence Krauss CC
     'Oliver Goodenough',                     # Robert Trivers CC
-    'Owen Blicksilver',                      # Landon Thomas CC
     'Peter Aldhous',                         # Lawrence Krauss CC
     'Sam Harris',                            # Lawrence Krauss CC
     SAMUEL_LEFF,                             # Random CC
-    "Saved by Internet Explorer 11",
     'Sean T Lehane',                         # Random CC
     'Stephen Rubin',                         # Random CC
     'Tim Kane',                              # Random CC
@@ -319,7 +316,7 @@ class Email(Communication):
     recipients: list[str | None] = field(default_factory=list)
     sent_from_device: str | None = None
     signature_substitution_counts: dict[str, int] = field(default_factory=dict)  # defaultdict breaks asdict :(
-    truncation_allowed: bool = True
+    _truncation_allowed: bool = True  # Hacky way to get __rich_console__() not to truncate in epstein_show script
     # For logging how many headers we prettified while printing, kind of janky
     rewritten_header_ids: ClassVar[set[str]] = set([])
@@ -340,10 +337,10 @@ class Email(Communication):
         try:
             if self.config and self.config.recipients:
-                self.recipients = cast(list[str | None], self.config.recipients)
+                self.recipients = self.config.recipients
             else:
                 for recipient in self.header.recipients():
-                    self.recipients.extend(self._get_names(recipient))
+                    self.recipients.extend(self._emailer_names(recipient))
         except Exception as e:
             console.print_exception()
             console.line(2)
@@ -358,8 +355,12 @@ class Email(Communication):
         self.actual_text = self._actual_text()
         self.sent_from_device = self._sent_from_device()
+    def attachments(self) -> list[str]:
+        return (self.header.attachments or '').split(';')
     def info_txt(self) -> Text:
-        txt = Text("OCR text of email from ", style='grey46').append(self.author_txt).append(' to ')
+        email_type = 'fwded article' if self.is_fwded_article() else 'email'
+        txt = Text(f"OCR text of {email_type} from ", style='grey46').append(self.author_txt).append(' to ')
         return txt.append(self._recipients_txt()).append(highlighter(f" probably sent at {self.timestamp}"))
     def is_fwded_article(self) -> bool:
@@ -401,8 +402,8 @@ class Email(Communication):
             return self.text
         reply_text_match = REPLY_TEXT_REGEX.search(text)
-        # logger.info(f"Raw text:\n" + self.top_lines(20) + '\n\n')
-        # logger.info(f"With header removed:\n" + text[0:500] + '\n\n')
+        self.log_top_lines(20, "Raw text:", logging.DEBUG)
+        self.log(f"With header removed:\n{text[0:500]}\n\n", logging.DEBUG)
         if reply_text_match:
             actual_num_chars = len(reply_text_match.group(1))
@@ -438,12 +439,32 @@ class Email(Communication):
         return style.replace('bold', '').strip()
+    def _emailer_names(self, emailer_str: str) -> list[str]:
+        """Return a list of people's names found in 'emailer_str' (email author or recipients field)."""
+        emailer_str = EmailHeader.cleanup_str(emailer_str)
+        if len(emailer_str) == 0:
+            return []
+        names_found = [name for name, regex in EMAILER_REGEXES.items() if regex.search(emailer_str)]
+        if BAD_EMAILER_REGEX.match(emailer_str) or TIME_REGEX.match(emailer_str):
+            if len(names_found) == 0 and emailer_str not in SUPPRESS_LOGS_FOR_AUTHORS:
+                logger.warning(f"'{self.filename}': No emailer found in '{escape_single_quotes(emailer_str)}'")
+            else:
+                logger.info(f"Extracted {len(names_found)} names from semi-invalid '{emailer_str}': {names_found}...")
+            return names_found
+        names_found = names_found or [emailer_str]
+        return [_reverse_first_and_last_names(name) for name in names_found]
     def _extract_author(self) -> None:
         self._extract_header()
         super()._extract_author()
         if not self.author and self.header.author:
-            authors = self._get_names(self.header.author)
+            authors = self._emailer_names(self.header.author)
             self.author = authors[0] if (len(authors) > 0 and authors[0]) else None
     def _extract_header(self) -> None:
@@ -493,26 +514,6 @@ class Email(Communication):
         raise RuntimeError(f"No timestamp found in '{self.file_path.name}' top lines:\n{searchable_text}")
-    def _get_names(self, emailer_str: str) -> list[str]:
-        """Return a list of people's names found in 'emailer_str' (email author or recipients field)."""
-        emailer_str = EmailHeader.cleanup_str(emailer_str)
-        if len(emailer_str) == 0:
-            return []
-        names_found = [name for name, regex in EMAILER_REGEXES.items() if regex.search(emailer_str)]
-        if BAD_EMAILER_REGEX.match(emailer_str) or TIME_REGEX.match(emailer_str):
-            if len(names_found) == 0 and emailer_str not in SUPPRESS_LOGS_FOR_AUTHORS:
-                logger.warning(f"'{self.filename}': No emailer found in '{escape_single_quotes(emailer_str)}'")
-            else:
-                logger.info(f"Extracted {len(names_found)} names from semi-invalid '{emailer_str}': {names_found}...")
-            return names_found
-        names_found = names_found or [emailer_str]
-        return [_reverse_first_and_last_names(name) for name in names_found]
     def _idx_of_nth_quoted_reply(self, n: int = MAX_QUOTED_REPLIES, text: str | None = None) -> int | None:
         """Get position of the nth 'On June 12th, 1985 [SOMEONE] wrote:' style line in self.text."""
         for i, match in enumerate(QUOTED_REPLY_LINE_REGEX.finditer(text or self.text)):
@@ -584,7 +585,7 @@ class Email(Communication):
                 self._merge_lines(2, 5)
             elif self.file_id in ['029498', '031428']:
                 self._merge_lines(2, 4)
-        elif self.file_id in ['029976', '023067']:
+        elif self.file_id in ['029976', '023067', '033576']:
             self._merge_lines(3)  # Merge 4th and 5th rows
         elif self.file_id in '026609 029402 032405 022695'.split():
             self._merge_lines(4)  # Merge 5th and 6th rows
@@ -609,6 +610,8 @@ class Email(Communication):
             self._merge_lines(7, 9)
         elif self.file_id == '030299':
             self._merge_lines(7, 10)
+        elif self.file_id in ['022673', '022684']:
+            self._merge_lines(9)
         elif self.file_id == '014860':
             self._merge_lines(3)
             self._merge_lines(4)
@@ -680,6 +683,9 @@ class Email(Communication):
         if extracted_from_description:
             extracted_description = f"{APPEARS_IN} {extracted_from_description}"
+            if isinstance(extracted_from_doc_cfg, EmailCfg):
+                extracted_description += ' email'
             if self.config.description:
                 self.warn(f"Overwriting description '{self.config.description}' with extract description '{self.config.description}'")
@@ -705,10 +711,10 @@ class Email(Communication):
             num_chars = quote_cutoff
         # Truncate long emails but leave a note explaining what happened w/link to source document
-        if len(text) > num_chars and self.truncation_allowed:
+        if len(text) > num_chars and self._truncation_allowed:
             text = text[0:num_chars]
             doc_link_markup = epstein_media_doc_link_markup(self.url_slug, self.author_style)
-            trim_note = f"<...trimmed to {num_chars} characters of {self.length}, read the rest at {doc_link_markup}...>"
+            trim_note = f"<...trimmed to {num_chars} characters of {self.length()}, read the rest at {doc_link_markup}...>"
             trim_footer_txt = Text.from_markup(wrap_in_markup_style(trim_note, 'dim'))
         # Rewrite broken headers where the values are on separate lines from the field names

epstein_files/documents/imessage/text_message.py CHANGED Viewed

@@ -5,6 +5,7 @@ from datetime import datetime
 from rich.text import Text
 from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN
+from epstein_files.util.constant.strings import TIMESTAMP_DIM
 from epstein_files.util.data import extract_last_name
 from epstein_files.util.highlighted_group import get_style_for_name
 from epstein_files.util.logging import logger
@@ -12,7 +13,6 @@ from epstein_files.util.rich import TEXT_LINK, highlighter
 MSG_DATE_FORMAT = r"%m/%d/%y %I:%M:%S %p"
 PHONE_NUMBER_REGEX = re.compile(r'^[\d+]+.*')
-TIMESTAMP_STYLE = 'turquoise4 dim'
 DISPLAY_LAST_NAME_ONLY = [
     JEFFREY_EPSTEIN,
@@ -29,7 +29,7 @@ TEXTER_MAPPING = {
 class TextMessage:
     """Class representing a single iMessage text message."""
     author: str | None
-    author_str: str | None = None
+    author_str: str = ''
     id_confirmed: bool = False
     text: str
     timestamp_str: str
@@ -37,7 +37,7 @@ class TextMessage:
     def __post_init__(self):
         self.author = TEXTER_MAPPING.get(self.author or UNKNOWN, self.author)
-        if self.author is None:
+        if not self.author:
             self.author_str = UNKNOWN
         elif self.author in DISPLAY_LAST_NAME_ONLY and not self.author_str:
             self.author_str = extract_last_name(self.author)
@@ -77,5 +77,5 @@ class TextMessage:
     def __rich__(self) -> Text:
         author_style = get_style_for_name(self.author_str if self.author_str.startswith('+') else self.author)
         author_txt = Text(self.author_str, style=author_style)
-        timestamp_txt = Text(f"[{self.timestamp_str}]", style=TIMESTAMP_STYLE).append(' ')
+        timestamp_txt = Text(f"[{self.timestamp_str}]", style=TIMESTAMP_DIM).append(' ')
         return Text('').append(timestamp_txt).append(author_txt).append(': ', style='dim').append(self._message())

epstein_files/documents/json_file.py CHANGED Viewed

@@ -6,10 +6,12 @@ from typing import ClassVar
 from rich.text import Text
-from epstein_files.documents.other_file import OtherFile
+from epstein_files.documents.other_file import Metadata, OtherFile
 from epstein_files.util.constant.strings import JSON
 from epstein_files.util.rich import INFO_STYLE
+DESCRIPTION = "JSON data containing preview info for links sent in a messaging app like iMessage"
 TEXT_FIELDS = [
     'caption',
     'standard',
@@ -23,7 +25,6 @@ TEXT_FIELDS = [
 @dataclass
 class JsonFile(OtherFile):
     """File containing JSON data."""
     include_description_in_summary_panel: ClassVar[bool] = False
     strip_whitespace: ClassVar[bool] = False
@@ -39,7 +40,7 @@ class JsonFile(OtherFile):
         return JSON
     def info_txt(self) -> Text | None:
-        return Text(f"JSON file, contains preview data for links sent a messaging app", style=INFO_STYLE)
+        return Text(DESCRIPTION, style=INFO_STYLE)
     def is_interesting(self):
         return False
@@ -48,5 +49,10 @@ class JsonFile(OtherFile):
         with open(self.file_path, encoding='utf-8-sig') as f:
             return json.load(f)
+    def metadata(self) -> Metadata:
+        metadata = super().metadata()
+        metadata['description'] = DESCRIPTION
+        return metadata
     def json_str(self) -> str:
         return json.dumps(self.json_data(), indent=4)

epstein-files 1.0.12__py3-none-any.whl → 1.0.14__py3-none-any.whl

epstein-files 1.0.12py3-none-any.whl → 1.0.14py3-none-any.whl