PyPI - epstein-files - Versions diffs - 1.0.12__py3-none-any.whl → 1.0.14__py3-none-any.whl - Mend

epstein-files 1.0.12py3-none-any.whl → 1.0.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

epstein_files/__init__.py +11 -6
epstein_files/documents/communication.py +2 -2
epstein_files/documents/document.py +60 -50
epstein_files/documents/email.py +40 -34
epstein_files/documents/imessage/text_message.py +4 -4
epstein_files/documents/json_file.py +9 -3
epstein_files/documents/messenger_log.py +22 -19
epstein_files/documents/other_file.py +50 -71
epstein_files/epstein_files.py +108 -71
epstein_files/util/constant/names.py +5 -3
epstein_files/util/constant/strings.py +1 -1
epstein_files/util/constant/urls.py +13 -8
epstein_files/util/constants.py +66 -46
epstein_files/util/data.py +3 -1
epstein_files/util/doc_cfg.py +9 -9
epstein_files/util/env.py +2 -5
epstein_files/util/highlighted_group.py +25 -31
epstein_files/util/output.py +15 -30
epstein_files/util/rich.py +40 -31
epstein_files/util/word_count.py +1 -1
{epstein_files-1.0.12.dist-info → epstein_files-1.0.14.dist-info}/METADATA +10 -3
epstein_files-1.0.14.dist-info/RECORD +33 -0
epstein_files-1.0.12.dist-info/RECORD +0 -33
{epstein_files-1.0.12.dist-info → epstein_files-1.0.14.dist-info}/LICENSE +0 -0
{epstein_files-1.0.12.dist-info → epstein_files-1.0.14.dist-info}/WHEEL +0 -0
{epstein_files-1.0.12.dist-info → epstein_files-1.0.14.dist-info}/entry_points.txt +0 -0

epstein_files/documents/messenger_log.py CHANGED Viewed

@@ -9,17 +9,17 @@ from rich.table import Table
 from rich.text import Text
 from epstein_files.documents.communication import Communication
-from epstein_files.documents.imessage.text_message import MSG_DATE_FORMAT, TextMessage
+from epstein_files.documents.imessage.text_message import TextMessage
 from epstein_files.util.constant.names import JEFFREY_EPSTEIN, UNKNOWN
-from epstein_files.util.constant.strings import AUTHOR
-from epstein_files.util.data import iso_timestamp, listify, sort_dict
+from epstein_files.util.constant.strings import AUTHOR, TIMESTAMP_STYLE
+from epstein_files.util.data import days_between, days_between_str, iso_timestamp, listify, sort_dict
 from epstein_files.util.doc_cfg import Metadata, TextCfg
 from epstein_files.util.highlighted_group import get_style_for_name
 from epstein_files.util.logging import logger
-from epstein_files.util.rich import build_table, highlighter
+from epstein_files.util.rich import LAST_TIMESTAMP_STYLE, build_table, highlighter
-CONFIRMED_MSG = 'Found confirmed counterparty'
-GUESSED_MSG = 'This is probably a conversation with'
+CONFIRMED_MSG = 'with confirmed counterparty'
+GUESSED_MSG = 'and is probably with'
 MSG_REGEX = re.compile(r'Sender:(.*?)\nTime:(.*? (AM|PM)).*?Message:(.*?)\s*?((?=(\nSender)|\Z))', re.DOTALL)
 REDACTED_AUTHOR_REGEX = re.compile(r"^([-+•_1MENO.=F]+|[4Ide])$")
@@ -39,17 +39,20 @@ class MessengerLog(Communication):
         return self.messages_by(name)[0].timestamp()
     def info_txt(self) -> Text | None:
-        if self.author is None:
-            return None
+        num_days_str = days_between_str(self.timestamp, self.messages[-1].timestamp())
+        txt = Text(f"(Covers {num_days_str} starting ", style='dim')
+        txt.append(self.date_str(), style=TIMESTAMP_STYLE).append(' ')
-        info_msg = GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG
-        author_txt = Text(self.author, style=self.author_style + ' bold')
-        txt = Text(f"({info_msg} ", style='dim').append(author_txt)
+        if not self.author:
+            txt.append('with unknown counterparty')
+        else:
+            txt.append(GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG).append(' ')
+            txt.append(Text(self.author, style=self.author_style + ' bold'))
         if self.phone_number:
-            txt.append(f" using the phone number {self.phone_number}")
+            txt.append(highlighter(f" using the phone number {self.phone_number}"))
-        return highlighter(txt.append(')'))
+        return txt.append(')')
     def last_message_at(self, name: str | None) -> datetime:
         return self.messages_by(name)[-1].timestamp()
@@ -82,7 +85,7 @@ class MessengerLog(Communication):
         # If the Sender: is redacted or if it's an unredacted phone number that means it's from self.author
         return TextMessage(
             author=self.author if (is_phone_number or not author_str) else author_str,
-            author_str=author_str if is_phone_number else None,  # Preserve phone numbers
+            author_str=author_str if is_phone_number else '',  # Preserve phone numbers
             id_confirmed=not self.is_attribution_uncertain(),
             text=match.group(4).strip(),
             timestamp_str=match.group(2).strip(),
@@ -90,12 +93,12 @@ class MessengerLog(Communication):
     def _extract_timestamp(self) -> datetime:
         for match in MSG_REGEX.finditer(self.text):
-            timestamp_str = match.group(2).strip()
+            message = self._build_message(match)
             try:
-                return datetime.strptime(timestamp_str, MSG_DATE_FORMAT)
+                return message.timestamp()
             except ValueError as e:
-                logger.info(f"Failed to parse '{timestamp_str}' to datetime! Using next match. Error: {e}'")
+                logger.info(f"Failed to parse '{message.timestamp_str}' to datetime! Using next match. Error: {e}'")
         raise RuntimeError(f"{self}: No timestamp found!")
@@ -130,7 +133,7 @@ class MessengerLog(Communication):
         counts_table.add_column('Files', justify='right', style='white')
         counts_table.add_column("Msgs", justify='right')
         counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
-        counts_table.add_column('Last Sent At', justify='center', style='wheat4', width=21)
+        counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE, width=21)
         counts_table.add_column('Days', justify='right', style='dim')
         for name, count in sort_dict(cls.count_authors(imessage_logs)):
@@ -144,7 +147,7 @@ class MessengerLog(Communication):
                 f"{count:,}",
                 iso_timestamp(first_at),
                 iso_timestamp(last_at),
-                str((last_at - first_at).days + 1),
+                str(days_between(first_at, last_at)),
             )
         return counts_table

epstein_files/documents/other_file.py CHANGED Viewed

@@ -18,7 +18,7 @@ from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, WHITESPACE_R
 from epstein_files.util.constant.strings import *
 from epstein_files.util.constants import *
 from epstein_files.util.doc_cfg import FINANCIAL_REPORTS_AUTHORS, DocCfg, Metadata
-from epstein_files.util.data import escape_single_quotes, remove_timezone, sort_dict, uniquify
+from epstein_files.util.data import days_between, escape_single_quotes, remove_timezone, sort_dict, uniquify
 from epstein_files.util.file_helper import FILENAME_LENGTH, file_size_to_str
 from epstein_files.util.env import args
 from epstein_files.util.highlighted_group import styled_category
@@ -36,94 +36,62 @@ TIMESTAMP_LOG_INDENT = f'{LOG_INDENT}    '
 VAST_HOUSE = 'vast house'  # Michael Wolff article draft about Epstein indicator
 VI_DAILY_NEWS_REGEX = re.compile(r'virgin\s*is[kl][ai]nds\s*daily\s*news', re.IGNORECASE)
-UNINTERESTING_CATEGORES = [
+SKIP_TIMESTAMP_EXTRACT = [
+    PALM_BEACH_TSV,
+    PALM_BEACH_PROPERTY_INFO,
+]
+UNINTERESTING_CATEGORIES = [
+    ACADEMIA,
+    ARTICLE,
     ARTS,
     BOOK,
+    CONFERENCE,
     JUNK,
+    POLITICS,
     SKYPE_LOG,
-    SPEECH,
 ]
 # OtherFiles whose descriptions/info match these prefixes are not displayed unless --all-other-files is used
-UNINTERESTING_PREFIXES = FINANCIAL_REPORTS_AUTHORS + [
+UNINTERESTING_PREFIXES = [
     'article about',
-    ARTICLE_DRAFT,
-    'Aviation International',
-    BBC,
-    BLOOMBERG,
-    'Boston Globe',
     BROCKMAN_INC,
-    CHINA_DAILY,
-    CNN,
-    'completely redacted',
     CVRA,
-    DAILY_MAIL,
-    DAILY_TELEGRAPH,
-    CVRA_LEXIS_SEARCH[0:-12],  # Because date at end :(
     DERSH_GIUFFRE_TWEET,
-    'Financial Times',
-    'Forbes',
-    'Frontlines',
-    'Future Science',
-    'Globe and Mail',
     GORDON_GETTY,
     f"{HARVARD} Econ",
     HARVARD_POETRY,
-    'Inference',
     JASTA,
-    'JetGala',
-    JOHN_BOLTON_PRESS_CLIPPING,
-    'Journal of Criminal',
-    LA_TIMES,
-    'Litigation Daily',
-    LAWRENCE_KRAUSS,
-    LAWRENCE_KRAUSS_ASU_ORIGINS,
-    'MarketWatch',
-    MARTIN_NOWAK,
-    'Morning News',
+    LEXIS_NEXIS,
     NOBEL_CHARITABLE_TRUST,
-    'Nautilus',
-    'New Yorker',
-    NYT,
     PALM_BEACH_CODE_ENFORCEMENT,
-    PALM_BEACH_DAILY_NEWS,
-    PALM_BEACH_POST,
     PALM_BEACH_TSV,
     PALM_BEACH_WATER_COMMITTEE,
-    PAUL_KRASSNER,
-    PEGGY_SIEGAL,
-    'Politifact',
-    'Rafanelli',
-    ROBERT_LAWRENCE_KUHN,
-    ROBERT_TRIVERS,
-    'SCMP',
-    'SciencExpress',
-    'Scowcroft',
-    SHIMON_POST_ARTICLE,
-    SINGLE_PAGE,
-    STACEY_PLASKETT,
-    'Tatler',
-    TERJE_ROD_LARSEN,
-    TEXT_OF_US_LAW,
-    TRANSLATION,
     TWEET,
-    REAL_DEAL_ARTICLE,
-    TRUMP_DISCLOSURES,
-    UBS_CIO_REPORT,
     UN_GENERAL_ASSEMBLY,
-    'U.S. News',
     'US Office',
-    'Vanity Fair',
-    VI_DAILY_NEWS,
-    WAPO,
+]
+INTERESTING_AUTHORS = [
+    EDWARD_JAY_EPSTEIN,
+    EHUD_BARAK,
+    JOI_ITO,
+    NOAM_CHOMSKY,
+    MICHAEL_WOLFF,
+    SVETLANA_POZHIDAEVA,
 ]
 @dataclass
 class OtherFile(Document):
-    """File that is not an email, an iMessage log, or JSON data."""
+    """
+    File that is not an email, an iMessage log, or JSON data.
-    include_description_in_summary_panel: ClassVar[bool] = True
+    Attributes:
+        was_timestamp_extracted (bool): True if the timestamp was programmatically extracted (and could be wrong)
+    """
+    was_timestamp_extracted: bool = False
+    include_description_in_summary_panel: ClassVar[bool] = True  # Class var for logging output
     def __post_init__(self):
         super().__post_init__()
@@ -162,11 +130,13 @@ class OtherFile(Document):
         elif len(info_sentences) == 0:
             return True
         elif self.config:
-            if self.config.is_interesting:
+            if self.config.is_interesting is not None:
+                return self.config.is_interesting
+            elif self.config.author in INTERESTING_AUTHORS:
                 return True
             elif self.category() == FINANCE and self.author is not None:
                 return False
-            elif self.category() in UNINTERESTING_CATEGORES:
+            elif self.category() in UNINTERESTING_CATEGORIES:
                 return False
         for prefix in UNINTERESTING_PREFIXES:
@@ -178,6 +148,10 @@ class OtherFile(Document):
     def metadata(self) -> Metadata:
         metadata = super().metadata()
         metadata['is_interesting'] = self.is_interesting()
+        if self.was_timestamp_extracted:
+            metadata['was_timestamp_extracted'] = self.was_timestamp_extracted
         return metadata
     def preview_text(self) -> str:
@@ -191,6 +165,8 @@ class OtherFile(Document):
         """Return configured timestamp or value extracted by scanning text with datefinder."""
         if self.config and self.config.timestamp:
             return self.config.timestamp
+        elif self.config and any([s in (self.config_description() or '') for s in SKIP_TIMESTAMP_EXTRACT]):
+            return None
         timestamps: list[datetime] = []
@@ -214,7 +190,10 @@ class OtherFile(Document):
                 self.log_top_lines(15, msg=f"No timestamps found")
             return None
-        elif len(timestamps) == 1:
+        self.was_timestamp_extracted = True
+        if len(timestamps) == 1:
             return timestamps[0]
         else:
             timestamps = sorted(uniquify(timestamps), reverse=True)
@@ -222,7 +201,7 @@ class OtherFile(Document):
             return timestamps[0]  # Most recent timestamp appearing in text is usually the closest
     def _log_extracted_timestamps_info(self, timestamps: list[datetime]) -> None:
-        num_days_spanned = (timestamps[0] - timestamps[-1]).days
+        num_days_spanned = days_between(timestamps[-1], timestamps[0])
         timestamps_log_msg = f"Extracted {len(timestamps)} timestamps spanning {num_days_spanned} days{TIMESTAMP_LOG_INDENT}"
         timestamps_log_msg += TIMESTAMP_LOG_INDENT.join([str(dt) for dt in timestamps])
@@ -230,9 +209,9 @@ class OtherFile(Document):
             self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
     @staticmethod
-    def build_table(files: Sequence['OtherFile']) -> Table:
+    def files_preview_table(files: Sequence['OtherFile']) -> Table:
         """Build a table of OtherFile documents."""
-        table = build_table(None, show_lines=True)
+        table = build_table('Other Files Details', show_lines=True)
         table.add_column('File', justify='center', width=FILENAME_LENGTH)
         table.add_column('Date', justify='center')
         table.add_column('Size', justify='center')
@@ -240,7 +219,7 @@ class OtherFile(Document):
         table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
         for file in files:
-            link_and_info = [file.external_links()]
+            link_and_info = [file.external_links_txt()]
             date_str = file.date_str()
             if file.is_duplicate():
@@ -272,10 +251,10 @@ class OtherFile(Document):
                 logger.warning(f"file {file.file_id} has no category")
             counts[file.category()] += 1
-            category_bytes[file.category()] += file.length
+            category_bytes[file.category()] += file.file_size()
-        table = build_table('Other Files Summary')
-        add_cols_to_table(table, ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
+        table = build_table('Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
+        table.columns[0].min_width = 14
         table.columns[-1].style = 'dim'
         for (category, count) in sort_dict(counts):

epstein_files/epstein_files.py CHANGED Viewed

@@ -23,14 +23,14 @@ from epstein_files.util.constant.strings import *
 from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL, epstein_media_person_url,
      epsteinify_name_url, epstein_web_person_url, search_jmail_url, search_twitter_url)
 from epstein_files.util.constants import *
-from epstein_files.util.data import dict_sets_to_lists, json_safe, listify, sort_dict
+from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify, sort_dict
 from epstein_files.util.doc_cfg import EmailCfg, Metadata
-from epstein_files.util.env import DOCS_DIR, args, logger
+from epstein_files.util.env import DOCS_DIR, args, logger, specified_names
 from epstein_files.util.file_helper import file_size_str
-from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
-from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, add_cols_to_table,
-     build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
-     print_other_site_link, print_panel, print_section_header, vertically_pad)
+from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames, get_info_for_name, get_style_for_name
+from epstein_files.util.rich import (DEFAULT_NAME_STYLE, LAST_TIMESTAMP_STYLE, NA_TXT, add_cols_to_table,
+     print_all_files_page_link, build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
+     print_panel, print_section_header, vertically_pad)
 from epstein_files.util.search_result import SearchResult
 from epstein_files.util.timer import Timer
@@ -72,18 +72,18 @@ class EpsteinFiles:
         # Read through and classify all the files
         for file_arg in self.all_files:
-            doc_timer = Timer(decimals=4)
+            doc_timer = Timer(decimals=2)
             document = Document(file_arg)
             cls = document_cls(document)
-            if document.length == 0:
+            if document.length() == 0:
                 logger.warning(f"Skipping empty file: {document}]")
                 continue
             elif args.skip_other_files and cls == OtherFile and file_type_count[cls.__name__] > 1:
-                logger.warning(f"Skipping {document.filename}...")
+                document.log(f"Skipping OtherFile...")
                 continue
-            documents.append(cls(file_arg, text=document.text))
+            documents.append(cls(file_arg, lines=document.lines, text=document.text))
             logger.info(str(documents[-1]))
             file_type_count[cls.__name__] += 1
@@ -104,16 +104,20 @@ class EpsteinFiles:
         if PICKLED_PATH.exists() and not args.overwrite_pickle:
             with gzip.open(PICKLED_PATH, 'rb') as file:
                 epstein_files = pickle.load(file)
-                timer.print_at_checkpoint(f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})")
                 epstein_files.timer = timer
+                timer_msg = f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}'"
+                epstein_files.timer.print_at_checkpoint(f"{timer_msg} ({file_size_str(PICKLED_PATH)})")
                 return epstein_files
         logger.warning(f"Building new cache file, this will take a few minutes...")
         epstein_files = EpsteinFiles(timer=timer)
-        with gzip.open(PICKLED_PATH, 'wb') as file:
-            pickle.dump(epstein_files, file)
-            logger.warning(f"Pickled data to '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})...")
+        if args.skip_other_files:
+            logger.warning(f"Not writing pickled data because --skip-other-files")
+        else:
+            with gzip.open(PICKLED_PATH, 'wb') as file:
+                pickle.dump(epstein_files, file)
+                logger.warning(f"Pickled data to '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})...")
         timer.print_at_checkpoint(f'Processed {len(epstein_files.all_files):,} documents')
         return epstein_files
@@ -127,9 +131,6 @@ class EpsteinFiles:
         names = names if include_useless else [e for e in names if e is None or e.lower() not in EXCLUDED_EMAILERS]
         return sorted(list(set(names)), key=lambda e: self.email_author_counts[e] + self.email_recipient_counts[e])
-    def attributed_email_count(self) -> int:
-        return sum([i for author, i in self.email_author_counts.items() if author != UNKNOWN])
     def docs_matching(
             self,
             pattern: re.Pattern | str,
@@ -156,7 +157,7 @@ class EpsteinFiles:
         return self.emails_for(author)[-1].timestamp
     def email_conversation_length_in_days(self, author: str | None) -> int:
-        return (self.last_email_at(author) - self.earliest_email_at(author)).days + 1
+        return days_between(self.earliest_email_at(author), self.last_email_at(author))
     def email_signature_substitution_counts(self) -> dict[str, int]:
         """Return the number of times an email signature was replaced with "<...snipped...>" for each author."""
@@ -172,7 +173,7 @@ class EpsteinFiles:
         return sorted(list(self.unknown_recipient_email_ids))
     def emails_by(self, author: str | None) -> list[Email]:
-        return [e for e in self.emails if e.author == author]
+        return Document.sort_by_timestamp([e for e in self.emails if e.author == author])
     def emails_for(self, author: str | None) -> list[Email]:
         """Returns emails to or from a given 'author' sorted chronologically."""
@@ -185,9 +186,11 @@ class EpsteinFiles:
     def emails_to(self, author: str | None) -> list[Email]:
         if author is None:
-            return [e for e in self.emails if len(e.recipients) == 0 or None in e.recipients]
+            emails = [e for e in self.emails if len(e.recipients) == 0 or None in e.recipients]
         else:
-            return [e for e in self.emails if author in e.recipients]
+            emails = [e for e in self.emails if author in e.recipients]
+        return Document.sort_by_timestamp(emails)
     def get_documents_by_id(self, file_ids: str | list[str]) -> list[Document]:
         file_ids = listify(file_ids)
@@ -204,14 +207,26 @@ class EpsteinFiles:
     def json_metadata(self) -> str:
         """Create a JSON string containing metadata for all the files."""
         metadata = {
-            Email.__name__: _sorted_metadata(self.emails),
-            JsonFile.__name__: _sorted_metadata(self.json_files),
-            MessengerLog.__name__: _sorted_metadata(self.imessage_logs),
-            OtherFile.__name__: _sorted_metadata(self.non_json_other_files()),
+            'files': {
+                Email.__name__: _sorted_metadata(self.emails),
+                JsonFile.__name__: _sorted_metadata(self.json_files),
+                MessengerLog.__name__: _sorted_metadata(self.imessage_logs),
+                OtherFile.__name__: _sorted_metadata(self.non_json_other_files()),
+            },
+            'people': {
+                name: highlighted_group.get_info(name)
+                for highlighted_group in HIGHLIGHTED_NAMES
+                if isinstance(highlighted_group, HighlightedNames)
+                for name, description in highlighted_group.emailers.items()
+                if description
+            }
         }
         return json.dumps(metadata, indent=4, sort_keys=True)
+    def non_duplicate_emails(self) -> list[Email]:
+        return [email for email in self.emails if not email.is_duplicate()]
     def non_json_other_files(self) -> list[OtherFile]:
         return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
@@ -230,8 +245,8 @@ class EpsteinFiles:
                 f"{len([d for d in docs if d.is_duplicate()])}",
             )
-        add_row('iMessage Logs', self.imessage_logs)
         add_row('Emails', self.emails)
+        add_row('iMessage Logs', self.imessage_logs)
         add_row('JSON Data', self.json_files)
         add_row('Other', self.non_json_other_files())
         console.print(Align.center(table))
@@ -271,71 +286,93 @@ class EpsteinFiles:
         console.print(Align.center(Email.build_table(emails, author)), '\n')
     def print_email_device_info(self) -> None:
-        print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(4, 0, 0, 0), centered=True)
+        print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(2, 0, 0, 0), centered=True)
         console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
         console.print(_build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
-    def print_emailer_counts_table(self) -> None:
-        footer = f"Identified authors of {self.attributed_email_count():,} out of {len(self.emails):,} emails ."
-        counts_table = build_table("Email Counts", caption=footer)
-        add_cols_to_table(counts_table, ['Name', 'Count', 'Sent', "Recv'd", JMAIL, EPSTEIN_MEDIA, EPSTEIN_WEB, 'Twitter'])
-        emailer_counts = {
-            emailer: self.email_author_counts[emailer] + self.email_recipient_counts[emailer]
-            for emailer in self.all_emailers(True)
-        }
+    def print_other_files_section(self, files: list[OtherFile]) -> None:
+        """Returns the OtherFile objects that were interesting enough to print."""
+        category_table = OtherFile.count_by_category_table(files)
+        other_files_preview_table = OtherFile.files_preview_table(files)
+        header_pfx = '' if args.all_other_files else 'Selected '
+        print_section_header(f"{FIRST_FEW_LINES} of {len(files)} {header_pfx}Files That Are Neither Emails Nor Text Messages")
-        for p, count in sort_dict(emailer_counts):
-            style = get_style_for_name(p, default_style=DEFAULT_NAME_STYLE)
+        if args.all_other_files:
+            console.line(1)
+        else:
+            print_all_files_page_link(self)
+            console.line(2)
-            counts_table.add_row(
-                Text.from_markup(link_markup(epsteinify_name_url(p or UNKNOWN), p or UNKNOWN, style)),
-                str(count),
-                str(self.email_author_counts[p]),
-                str(self.email_recipient_counts[p]),
-                '' if p is None else link_text_obj(search_jmail_url(p), JMAIL),
-                '' if not is_ok_for_epstein_web(p) else link_text_obj(epstein_media_person_url(p), EPSTEIN_MEDIA),
-                '' if not is_ok_for_epstein_web(p) else link_text_obj(epstein_web_person_url(p), EPSTEIN_WEB),
-                '' if p is None else link_text_obj(search_twitter_url(p), 'search X'),
-            )
+            for table in [category_table, other_files_preview_table]:
+                table.title = f"{header_pfx}{table.title}"
-        console.print(vertically_pad(counts_table, 2))
+        print_centered(category_table)
+        console.line(2)
+        console.print(other_files_preview_table)
-    def print_imessage_summary(self) -> None:
+    def print_text_messages_section(self) -> None:
         """Print summary table and stats for text messages."""
-        console.print(MessengerLog.summary_table(self.imessage_logs))
+        print_section_header('All of His Text Messages')
+        print_centered("(conversations are sorted chronologically based on timestamp of first message)\n", style='gray30')
+        authors: list[str | None] = specified_names if specified_names else [JEFFREY_EPSTEIN]
+        log_files = self.imessage_logs_for(authors)
+        for log_file in log_files:
+            console.print(Padding(log_file))
+            console.line(2)
+        print_centered(MessengerLog.summary_table(self.imessage_logs))
         text_summary_msg = f"\nDeanonymized {Document.known_author_count(self.imessage_logs)} of "
         text_summary_msg += f"{len(self.imessage_logs)} {TEXT_MESSAGE} logs found in {len(self.all_files):,} files."
         console.print(text_summary_msg)
         imessage_msg_count = sum([len(log.messages) for log in self.imessage_logs])
         console.print(f"Found {imessage_msg_count} text messages in {len(self.imessage_logs)} iMessage log files.")
-    def print_other_files_table(self) -> list[OtherFile]:
-        """Returns the OtherFile objects that were interesting enough to print."""
-        interesting_files = [doc for doc in self.other_files if args.all_other_files or doc.is_interesting()]
-        header_pfx = '' if args.all_other_files else 'Selected '
-        print_section_header(f"{FIRST_FEW_LINES} of {len(interesting_files)} {header_pfx}Files That Are Neither Emails Nor Text Msgs")
+    def table_of_emailers(self) -> Table:
+        attributed_emails = [e for e in self.non_duplicate_emails() if e.author]
+        footer = f"Identified authors of {len(attributed_emails):,} out of {len(self.non_duplicate_emails()):,} emails."
+        counts_table = build_table("Email Counts", caption=footer)
-        if not args.all_other_files:
-            print_centered(f"(the other site is uncurated and has all {len(self.other_files)} unclassifiable files and {len(self.emails):,} emails)", style='dim')
-            print_other_site_link(False)
-            console.line(2)
+        add_cols_to_table(counts_table, [
+            'Name',
+            'Num',
+            'Sent',
+            "Recv",
+            {'name': 'First', 'highlight': True},
+            {'name': 'Last', 'style': LAST_TIMESTAMP_STYLE},
+            JMAIL,
+            'eMedia',
+            'eWeb',
+            'Twitter',
+        ])
-        console.print(OtherFile.build_table(interesting_files))
-        console.print(Padding(OtherFile.count_by_category_table(interesting_files), (2, 0, 2, 2)))
-        skipped_file_count = len(self.other_files) - len(interesting_files)
+        emailer_counts = {
+            emailer: self.email_author_counts[emailer] + self.email_recipient_counts[emailer]
+            for emailer in self.all_emailers(True)
+        }
+        for name, count in sort_dict(emailer_counts):
+            style = get_style_for_name(name, default_style=DEFAULT_NAME_STYLE)
+            emails = self.emails_for(name)
-        if skipped_file_count > 0:
-            logger.warning(f"Skipped {skipped_file_count} uninteresting other files...")
+            counts_table.add_row(
+                Text.from_markup(link_markup(epsteinify_name_url(name or UNKNOWN), name or UNKNOWN, style)),
+                str(count),
+                str(self.email_author_counts[name]),
+                str(self.email_recipient_counts[name]),
+                emails[0].timestamp_without_seconds(),
+                emails[-1].timestamp_without_seconds(),
+                link_text_obj(search_jmail_url(name), JMAIL) if name else '',
+                link_text_obj(epstein_media_person_url(name), 'eMedia') if is_ok_for_epstein_web(name) else '',
+                link_text_obj(epstein_web_person_url(name), 'eWeb') if is_ok_for_epstein_web(name) else '',
+                link_text_obj(search_twitter_url(name), 'search X') if name else '',
+            )
-        return interesting_files
+        return counts_table
     def _tally_email_data(self) -> None:
         """Tally up summary info about Email objects."""
-        for email in self.emails:
-            if email.is_duplicate():
-                continue
+        for email in self.non_duplicate_emails():
             self.email_author_counts[email.author] += 1
             if len(email.recipients) == 0:
@@ -365,7 +402,7 @@ def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
 def document_cls(doc: Document) -> Type[Document]:
     search_area = doc.text[0:5000]  # Limit search area to avoid pointless scans of huge files
-    if doc.length == 0:
+    if doc.length() == 0:
         return Document
     if doc.text[0] == '{':
         return JsonFile

epstein_files/util/constant/names.py CHANGED Viewed

@@ -42,6 +42,7 @@ CECILE_DE_JONGH = 'Cecile de Jongh'
 CECILIA_STEEN = 'Cecilia Steen'
 CELINA_DUBIN = 'Celina Dubin'
 CHRISTINA_GALBRAITH = 'Christina Galbraith'  # Works with Tyler Shears on reputation stuff
+DANGENE_AND_JENNIE_ENTERPRISE = 'Dangene and Jennie Enterprise'
 DANIEL_SABBA = 'Daniel Sabba'
 DANIEL_SIAD = 'Daniel Siad'
 DANNY_FROST = 'Danny Frost'
@@ -233,13 +234,14 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
     edmond elizabeth emily entwistle erik evelyn
     ferguson flachsbart francis franco frank frost
     gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
-    hancock harold harrison harry hay helen hirsch hofstadter horowitz hussein
+    hancock harold harrison harry hay helen hill hirsch hofstadter horowitz hussein
     ian isaac isaacson
-    jamie jane janet jason jen jim joe johnson jones josh julie justin
+    james jamie jane janet jason jen jim joe johnson jones josh julie justin
     karl kate kathy kelly kim kruger kyle
-    laurie leo leonard lenny leslie lieberman louis lynch lynn
+    laurie lawrence leo leonard lenny leslie lieberman louis lynch lynn
     marcus marianne matt matthew melissa michele michelle moore moscowitz
     nancy nicole nussbaum
+    owen
     paulson philippe
     rafael ray richard richardson rob robin ron rubin rudolph ryan
     sara sarah sean seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart

epstein_files/util/constant/strings.py CHANGED Viewed

@@ -22,7 +22,6 @@ PUBLICIST = 'publicist'
 REPUTATION = 'reputation'
 SKYPE_LOG = 'Skype log'
 SOCIAL = 'social'
-SPEECH = 'speech'
 # Locations
 PALM_BEACH = 'Palm Beach'
@@ -35,6 +34,7 @@ CHINA_DAILY = "China Daily"
 DAILY_MAIL = 'Daily Mail'
 DAILY_TELEGRAPH = "Daily Telegraph"
 LA_TIMES = 'LA Times'
+LEXIS_NEXIS = 'Lexis Nexis'
 MIAMI_HERALD = 'Miami Herald'
 NYT = "New York Times"
 PALM_BEACH_DAILY_NEWS = f'{PALM_BEACH} Daily News'

epstein-files 1.0.12__py3-none-any.whl → 1.0.14__py3-none-any.whl

epstein-files 1.0.12py3-none-any.whl → 1.0.14py3-none-any.whl