PyPI - epstein-files - Versions diffs - 1.1.5__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

epstein-files 1.1.5py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

epstein_files/__init__.py +12 -21
epstein_files/documents/communication.py +0 -3
epstein_files/documents/document.py +68 -21
epstein_files/documents/email.py +54 -70
epstein_files/documents/emails/email_header.py +14 -4
epstein_files/documents/imessage/text_message.py +5 -4
epstein_files/documents/messenger_log.py +7 -7
epstein_files/documents/other_file.py +16 -34
epstein_files/epstein_files.py +133 -141
epstein_files/person.py +324 -0
epstein_files/util/constant/names.py +46 -15
epstein_files/util/constant/output_files.py +1 -0
epstein_files/util/constant/strings.py +3 -3
epstein_files/util/constant/urls.py +15 -2
epstein_files/util/constants.py +75 -21
epstein_files/util/data.py +1 -20
epstein_files/util/doc_cfg.py +27 -17
epstein_files/util/env.py +5 -3
epstein_files/util/highlighted_group.py +248 -203
epstein_files/util/logging.py +1 -1
epstein_files/util/output.py +113 -157
epstein_files/util/rich.py +20 -35
epstein_files/util/timer.py +14 -0
epstein_files/util/word_count.py +1 -1
{epstein_files-1.1.5.dist-info → epstein_files-1.2.1.dist-info}/METADATA +6 -2
epstein_files-1.2.1.dist-info/RECORD +34 -0
epstein_files-1.1.5.dist-info/RECORD +0 -33
{epstein_files-1.1.5.dist-info → epstein_files-1.2.1.dist-info}/LICENSE +0 -0
{epstein_files-1.1.5.dist-info → epstein_files-1.2.1.dist-info}/WHEEL +0 -0
{epstein_files-1.1.5.dist-info → epstein_files-1.2.1.dist-info}/entry_points.txt +0 -0

epstein_files/documents/messenger_log.py CHANGED Viewed

@@ -10,11 +10,11 @@ from rich.text import Text
 from epstein_files.documents.communication import Communication
 from epstein_files.documents.imessage.text_message import TextMessage
-from epstein_files.util.constant.names import JEFFREY_EPSTEIN, UNKNOWN
+from epstein_files.util.constant.names import JEFFREY_EPSTEIN, Name
 from epstein_files.util.constant.strings import AUTHOR, TIMESTAMP_STYLE
 from epstein_files.util.data import days_between, days_between_str, iso_timestamp, sort_dict
 from epstein_files.util.doc_cfg import Metadata, TextCfg
-from epstein_files.util.highlighted_group import get_style_for_name, styled_name
+from epstein_files.util.highlighted_group import styled_name
 from epstein_files.util.logging import logger
 from epstein_files.util.rich import LAST_TIMESTAMP_STYLE, build_table, highlighter
@@ -35,7 +35,7 @@ class MessengerLog(Communication):
         super().__post_init__()
         self.messages = [self._build_message(match) for match in MSG_REGEX.finditer(self.text)]
-    def first_message_at(self, name: str | None) -> datetime:
+    def first_message_at(self, name: Name) -> datetime:
         return self.messages_by(name)[0].parse_timestamp()
     def info_txt(self) -> Text | None:
@@ -54,10 +54,10 @@ class MessengerLog(Communication):
         return txt.append(')')
-    def last_message_at(self, name: str | None) -> datetime:
+    def last_message_at(self, name: Name) -> datetime:
         return self.messages_by(name)[-1].parse_timestamp()
-    def messages_by(self, name: str | None) -> list[TextMessage]:
+    def messages_by(self, name: Name) -> list[TextMessage]:
         """Return all messages by 'name'."""
         return [m for m in self.messages if m.author == name]
@@ -129,9 +129,9 @@ class MessengerLog(Communication):
             yield message
     @classmethod
-    def count_authors(cls, imessage_logs: list['MessengerLog']) -> dict[str | None, int]:
+    def count_authors(cls, imessage_logs: list['MessengerLog']) -> dict[Name, int]:
         """Count up how many texts were sent by each author."""
-        sender_counts: dict[str | None, int] = defaultdict(int)
+        sender_counts: dict[Name, int] = defaultdict(int)
         for message_log in imessage_logs:
             for message in message_log.messages:

epstein_files/documents/other_file.py CHANGED Viewed

@@ -22,7 +22,7 @@ from epstein_files.util.data import days_between, escape_single_quotes, remove_t
 from epstein_files.util.file_helper import FILENAME_LENGTH, file_size_to_str
 from epstein_files.util.env import args
 from epstein_files.util.highlighted_group import QUESTION_MARKS_TXT, styled_category
-from epstein_files.util.rich import build_table, highlighter
+from epstein_files.util.rich import add_cols_to_table, build_table, highlighter
 from epstein_files.util.logging import logger
 FIRST_FEW_LINES = 'First Few Lines'
@@ -209,39 +209,8 @@ class OtherFile(Document):
         if num_days_spanned > MAX_DAYS_SPANNED_TO_BE_VALID and VAST_HOUSE not in self.text:
             self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
-    @staticmethod
-    def count_by_category_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
-        counts = defaultdict(int)
-        category_bytes = defaultdict(int)
-        for file in files:
-            if file.category() is None:
-                logger.warning(f"file {file.file_id} has no category")
-            counts[file.category()] += 1
-            category_bytes[file.category()] += file.file_size()
-        table = build_table(f'{title_pfx}Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
-        table.columns[-1].justify = 'right'
-        table.columns[0].min_width = 14
-        table.columns[-1].style = 'dim'
-        for (category, count) in sort_dict(counts):
-            category_files = [f for f in files if f.category() == category]
-            known_author_count = Document.known_author_count(category_files)
-            table.add_row(
-                styled_category(category),
-                str(count),
-                str(known_author_count),
-                str(count - known_author_count),
-                file_size_to_str(category_bytes[category]),
-            )
-        return table
-    @staticmethod
-    def files_preview_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
+    @classmethod
+    def files_preview_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
         """Build a table of OtherFile documents."""
         table = build_table(f'{title_pfx}Other Files Details in Chronological Order', show_lines=True)
         table.add_column('File', justify='center', width=FILENAME_LENGTH)
@@ -272,3 +241,16 @@ class OtherFile(Document):
             )
         return table
+    @classmethod
+    def summary_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
+        categories = uniquify([f.category() for f in files])
+        categories = sorted(categories, key=lambda c: -len([f for f in files if f.category() == c]))
+        table = cls.file_info_table(f'{title_pfx}Other Files Summary', 'Category')
+        for category in categories:
+            category_files = [f for f in files if f.category() == category]
+            table.add_row(styled_category(category), *cls.files_info_row(category_files))
+        table.columns = table.columns[:-2] + [table.columns[-1]]  # Removee unknown author col
+        return table

epstein_files/epstein_files.py CHANGED Viewed

@@ -3,39 +3,44 @@ import json
 import pickle
 import re
 from collections import defaultdict
+from copy import copy
 from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
-from typing import Sequence, Type
+from typing import Sequence, Type, cast
-from rich.padding import Padding
 from rich.table import Table
-from rich.text import Text
 from epstein_files.documents.document import Document
-from epstein_files.documents.email import DETECT_EMAIL_REGEX, USELESS_EMAILERS, Email
-from epstein_files.documents.emails.email_header import AUTHOR
+from epstein_files.documents.email import DETECT_EMAIL_REGEX, Email
 from epstein_files.documents.json_file import JsonFile
 from epstein_files.documents.messenger_log import MSG_REGEX, MessengerLog
 from epstein_files.documents.other_file import OtherFile
+from epstein_files.person import Person
 from epstein_files.util.constant.strings import *
 from epstein_files.util.constants import *
-from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify
+from epstein_files.util.data import flatten, json_safe, listify, uniquify
 from epstein_files.util.doc_cfg import EmailCfg, Metadata
 from epstein_files.util.env import DOCS_DIR, args, logger
 from epstein_files.util.file_helper import file_size_str
-from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames, get_info_for_name, get_style_for_name
-from epstein_files.util.rich import (NA_TXT, add_cols_to_table, build_table, console, highlighter,
-     print_author_panel, print_centered, print_subtitle_panel)
+from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames
 from epstein_files.util.search_result import SearchResult
 from epstein_files.util.timer import Timer
-DEVICE_SIGNATURE_SUBTITLE = f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown"
-DEVICE_SIGNATURE = 'Device Signature'
-DEVICE_SIGNATURE_PADDING = (1, 0)
+DUPLICATE_PROPS_TO_COPY = ['author', 'recipients', 'timestamp']
 PICKLED_PATH = Path("the_epstein_files.pkl.gz")
 SLOW_FILE_SECONDS = 1.0
+EMAILS_WITH_UNINTERESTING_CCS = [
+    '025329',    # Krassner
+    '024923',    # Krassner
+    '033568',    # Krassner
+]
+EMAILS_WITH_UNINTERESTING_BCCS = [
+    '014797_1',  # Ross Gow
+]
 @dataclass
 class EpsteinFiles:
@@ -45,19 +50,13 @@ class EpsteinFiles:
     json_files: list[JsonFile] = field(default_factory=list)
     other_files: list[OtherFile] = field(default_factory=list)
     timer: Timer = field(default_factory=lambda: Timer())
-    # Analytics / calculations
-    email_author_counts: dict[str | None, int] = field(default_factory=lambda: defaultdict(int))
-    email_authors_to_device_signatures: dict[str, set] = field(default_factory=lambda: defaultdict(set))
-    email_device_signatures_to_authors: dict[str, set] = field(default_factory=lambda: defaultdict(set))
-    email_recipient_counts: dict[str | None, int] = field(default_factory=lambda: defaultdict(int))
-    unknown_recipient_email_ids: set[str] = field(default_factory=set)
+    uninteresting_ccs: list[Name] = field(default_factory=list)
     def __post_init__(self):
         """Iterate through files and build appropriate objects."""
         self.all_files = sorted([f for f in DOCS_DIR.iterdir() if f.is_file() and not f.name.startswith('.')])
         documents = []
-        file_type_count = defaultdict(int)
+        file_type_count = defaultdict(int)  # Hack used by --skip-other-files option
         # Read through and classify all the files
         for file_arg in self.all_files:
@@ -83,23 +82,23 @@ class EpsteinFiles:
         self.imessage_logs = Document.sort_by_timestamp([d for d in documents if isinstance(d, MessengerLog)])
         self.other_files = Document.sort_by_timestamp([d for d in documents if isinstance(d, (JsonFile, OtherFile))])
         self.json_files = [doc for doc in self.other_files if isinstance(doc, JsonFile)]
-        self._tally_email_data()
+        self._set_uninteresting_ccs()
+        self._copy_duplicate_email_properties()
     @classmethod
     def get_files(cls, timer: Timer | None = None) -> 'EpsteinFiles':
         """Alternate constructor that reads/writes a pickled version of the data ('timer' arg is for logging)."""
         timer = timer or Timer()
-        if PICKLED_PATH.exists() and not args.overwrite_pickle:
+        if PICKLED_PATH.exists() and not args.overwrite_pickle and not args.skip_other_files:
             with gzip.open(PICKLED_PATH, 'rb') as file:
                 epstein_files = pickle.load(file)
-                epstein_files.timer = timer
                 timer_msg = f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}'"
-                epstein_files.timer.print_at_checkpoint(f"{timer_msg} ({file_size_str(PICKLED_PATH)})")
+                timer.print_at_checkpoint(f"{timer_msg} ({file_size_str(PICKLED_PATH)})")
                 return epstein_files
         logger.warning(f"Building new cache file, this will take a few minutes...")
-        epstein_files = EpsteinFiles(timer=timer)
+        epstein_files = EpsteinFiles()
         if args.skip_other_files:
             logger.warning(f"Not writing pickled data because --skip-other-files")
@@ -114,17 +113,7 @@ class EpsteinFiles:
     def all_documents(self) -> Sequence[Document]:
         return self.imessage_logs + self.emails + self.other_files
-    def all_emailers(self, include_useless: bool = False) -> list[str | None]:
-        """Returns all emailers USELESS_EMAILERS, sorted from least frequent to most."""
-        names = [a for a in self.email_author_counts.keys()] + [r for r in self.email_recipient_counts.keys()]
-        names = names if include_useless else [e for e in names if e not in USELESS_EMAILERS]
-        return sorted(list(set(names)), key=lambda e: self.email_author_counts[e] + self.email_recipient_counts[e])
-    def docs_matching(
-            self,
-            pattern: re.Pattern | str,
-            names: list[str | None] | None = None
-        ) -> list[SearchResult]:
+    def docs_matching(self, pattern: re.Pattern | str, names: list[Name] | None = None) -> list[SearchResult]:
         """Find documents whose text matches a pattern (file_type and names args limit the documents searched)."""
         results: list[SearchResult] = []
@@ -139,14 +128,39 @@ class EpsteinFiles:
         return results
-    def earliest_email_at(self, author: str | None) -> datetime:
-        return self.emails_for(author)[0].timestamp
+    def earliest_email_at(self, name: Name) -> datetime:
+        return self.emails_for(name)[0].timestamp
+    def last_email_at(self, name: Name) -> datetime:
+        return self.emails_for(name)[-1].timestamp
+    def email_author_counts(self) -> dict[Name, int]:
+        return {
+            person.name: len(person.unique_emails_by())
+            for person in self.emailers() if len(person.unique_emails_by()) > 0
+        }
+    def email_authors_to_device_signatures(self) -> dict[str, set[str]]:
+        signatures = defaultdict(set)
+        for email in [e for e in self.non_duplicate_emails() if e.sent_from_device]:
+            signatures[email.author_or_unknown()].add(email.sent_from_device)
-    def last_email_at(self, author: str | None) -> datetime:
-        return self.emails_for(author)[-1].timestamp
+        return signatures
-    def email_conversation_length_in_days(self, author: str | None) -> int:
-        return days_between(self.earliest_email_at(author), self.last_email_at(author))
+    def email_device_signatures_to_authors(self) -> dict[str, set[str]]:
+        signatures = defaultdict(set)
+        for email in [e for e in self.non_duplicate_emails() if e.sent_from_device]:
+            signatures[email.sent_from_device].add(email.author_or_unknown())
+        return signatures
+    def email_recipient_counts(self) -> dict[Name, int]:
+        return {
+            person.name: len(person.unique_emails_to())
+            for person in self.emailers() if len(person.unique_emails_to()) > 0
+        }
     def email_signature_substitution_counts(self) -> dict[str, int]:
         """Return the number of times an email signature was replaced with "<...snipped...>" for each author."""
@@ -158,32 +172,40 @@ class EpsteinFiles:
         return substitution_counts
-    def email_unknown_recipient_file_ids(self) -> list[str]:
-        return sorted(list(self.unknown_recipient_email_ids))
+    def emailers(self) -> list[Person]:
+        """All the people who sent or received an email."""
+        authors = [email.author for email in self.emails]
+        recipients = flatten([email.recipients for email in self.emails])
+        return self.person_objs(uniquify(authors + recipients))
-    def emails_by(self, author: str | None) -> list[Email]:
+    def emails_by(self, author: Name) -> list[Email]:
         return Document.sort_by_timestamp([e for e in self.emails if e.author == author])
-    def emails_for(self, author: str | None) -> list[Email]:
+    def emails_for(self, name: Name) -> list[Email]:
         """Returns emails to or from a given 'author' sorted chronologically."""
-        if author == JEFFREY_EPSTEIN:
-            emails = [e for e in self.emails_by(JEFFREY_EPSTEIN) if e.is_note_to_self()]
-        else:
-            emails = self.emails_by(author) + self.emails_to(author)
+        emails = self.emails_by(name) + self.emails_to(name)
         if len(emails) == 0:
-            raise RuntimeError(f"No emails found for '{author}'")
+            raise RuntimeError(f"No emails found for '{name}'")
         return Document.sort_by_timestamp(Document.uniquify(emails))
-    def emails_to(self, author: str | None) -> list[Email]:
-        if author is None:
+    def emails_to(self, name: Name) -> list[Email]:
+        if name is None:
             emails = [e for e in self.emails if len(e.recipients) == 0 or None in e.recipients]
         else:
-            emails = [e for e in self.emails if author in e.recipients]
+            emails = [e for e in self.emails if name in e.recipients]
         return Document.sort_by_timestamp(emails)
+    def email_for_id(self, file_id: str) -> Email:
+        docs = self.for_ids([file_id])
+        if docs and isinstance(docs[0], Email):
+            return docs[0]
+        else:
+            raise ValueError(f"No email found for {file_id}")
     def for_ids(self, file_ids: str | list[str]) -> list[Document]:
         file_ids = listify(file_ids)
         docs = [doc for doc in self.all_documents() if doc.file_id in file_ids]
@@ -193,6 +215,9 @@ class EpsteinFiles:
         return docs
+    def imessage_logs_for(self, name: Name) -> list[MessengerLog]:
+        return [log for log in self.imessage_logs if name == log.author]
     def json_metadata(self) -> str:
         """Create a JSON string containing metadata for all the files."""
         metadata = {
@@ -203,7 +228,7 @@ class EpsteinFiles:
                 OtherFile.__name__: _sorted_metadata(self.non_json_other_files()),
             },
             'people': {
-                name: highlighted_group.get_info(name)
+                name: highlighted_group.info_for(name, include_category=True)
                 for highlighted_group in HIGHLIGHTED_NAMES
                 if isinstance(highlighted_group, HighlightedNames)
                 for name, description in highlighted_group.emailers.items()
@@ -214,89 +239,71 @@ class EpsteinFiles:
         return json.dumps(metadata, indent=4, sort_keys=True)
     def non_duplicate_emails(self) -> list[Email]:
-        return [email for email in self.emails if not email.is_duplicate()]
+        return Document.without_dupes(self.emails)
     def non_json_other_files(self) -> list[OtherFile]:
         return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
-    def print_files_summary(self) -> None:
-        table = build_table('File Overview')
-        add_cols_to_table(table, ['File Type', 'Count', 'Author Known', 'Author Unknown', 'Duplicates'])
-        table.columns[1].justify = 'right'
-        def add_row(label: str, docs: list):
-            known = None if isinstance(docs[0], JsonFile) else Document.known_author_count(docs)
-            table.add_row(
-                label,
-                f"{len(docs):,}",
-                f"{known:,}" if known is not None else NA_TXT,
-                f"{len(docs) - known:,}" if known is not None else NA_TXT,
-                f"{len([d for d in docs if d.is_duplicate()])}",
+    def person_objs(self, names: list[Name]) -> list[Person]:
+        """Construct Person objects for a list of names."""
+        return [
+            Person(
+                name=name,
+                emails=self.emails_for(name),
+                imessage_logs=self.imessage_logs_for(name),
+                is_uninteresting_cc=name in self.uninteresting_emailers(),
+                other_files=[f for f in self.other_files if name and name == f.author]
             )
+            for name in names
+        ]
+    def overview_table(self) -> Table:
+        table = Document.file_info_table('Files Overview', 'File Type')
+        table.add_row('Emails', *Document.files_info_row(self.emails))
+        table.add_row('iMessage Logs', *Document.files_info_row(self.imessage_logs))
+        table.add_row('JSON Data', *Document.files_info_row(self.json_files, True))
+        table.add_row('Other', *Document.files_info_row(self.non_json_other_files()))
+        return table
+    def unknown_recipient_ids(self) -> list[str]:
+        """IDs of emails whose recipient is not known."""
+        return sorted([e.file_id for e in self.emails if None in e.recipients or not e.recipients])
+    def uninteresting_emailers(self) -> list[Name]:
+        """Emailers whom we don't want to print a separate section for because they're just CCed."""
+        if '_uninteresting_emailers' not in vars(self):
+            self._uninteresting_emailers = sorted(uniquify(UNINTERESTING_EMAILERS + self.uninteresting_ccs))
+        return self._uninteresting_emailers
+    def _copy_duplicate_email_properties(self) -> None:
+        """Ensure dupe emails have the properties of the emails they duplicate to capture any repairs, config etc."""
+        for email in self.emails:
+            if not email.is_duplicate():
+                continue
-        add_row('Emails', self.emails)
-        add_row('iMessage Logs', self.imessage_logs)
-        add_row('JSON Data', self.json_files)
-        add_row('Other', self.non_json_other_files())
-        print_centered(table)
-        console.line()
-    def print_emails_for(self, _author: str | None) -> list[Email]:
-        """Print complete emails to or from a particular 'author'. Returns the Emails that were printed."""
-        emails = self.emails_for(_author)
-        num_days = self.email_conversation_length_in_days(_author)
-        unique_emails = [email for email in emails if not email.is_duplicate()]
-        start_date = emails[0].timestamp.date()
-        author = _author or UNKNOWN
-        title = f"Found {len(unique_emails)} emails"
-        if author == JEFFREY_EPSTEIN:
-            title += f" sent by {JEFFREY_EPSTEIN} to himself"
-        else:
-            title += f" to/from {author} starting {start_date} covering {num_days:,} days"
-        print_author_panel(title, get_info_for_name(author), get_style_for_name(author))
-        self.print_emails_table_for(_author)
-        last_printed_email_was_duplicate = False
-        for email in emails:
-            if email.is_duplicate():
-                console.print(Padding(email.duplicate_file_txt().append('...'), (0, 0, 0, 4)))
-                last_printed_email_was_duplicate = True
-            else:
-                if last_printed_email_was_duplicate:
-                    console.line()
-                console.print(email)
-                last_printed_email_was_duplicate = False
+            original = self.email_for_id(email.duplicate_of_id())
-        return emails
+            for field_name in DUPLICATE_PROPS_TO_COPY:
+                original_prop = getattr(original, field_name)
+                duplicate_prop = getattr(email, field_name)
-    def print_emails_table_for(self, author: str | None) -> None:
-        emails = [email for email in self.emails_for(author) if not email.is_duplicate()]  # Remove dupes
-        print_centered(Padding(Email.build_emails_table(emails, author), (0, 5, 1, 5)))
+                if original_prop != duplicate_prop:
+                    email.warn(f"Replacing {field_name} {duplicate_prop} with {original_prop} from duplicated '{original.file_id}'")
+                    setattr(email, field_name, original_prop)
-    def print_email_device_info(self) -> None:
-        print_subtitle_panel(DEVICE_SIGNATURE_SUBTITLE)
-        console.print(_build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
-        console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
+        # Resort in case any timestamp were updated
+        self.emails = Document.sort_by_timestamp(self.emails)
-    def _tally_email_data(self) -> None:
-        """Tally up summary info about Email objects."""
-        for email in self.non_duplicate_emails():
-            self.email_author_counts[email.author] += 1
+    def _set_uninteresting_ccs(self) -> None:
+        for id in EMAILS_WITH_UNINTERESTING_BCCS:
+            self.uninteresting_ccs += copy(cast(list[Name], self.email_for_id(id).header.bcc))
-            if len(email.recipients) == 0:
-                self.unknown_recipient_email_ids.add(email.file_id)
-                self.email_recipient_counts[None] += 1
-            else:
-                for recipient in email.recipients:
-                    self.email_recipient_counts[recipient] += 1
+        for id in EMAILS_WITH_UNINTERESTING_CCS:
+            self.uninteresting_ccs += self.email_for_id(id).recipients
-            if email.sent_from_device:
-                self.email_authors_to_device_signatures[email.author_or_unknown()].add(email.sent_from_device)
-                self.email_device_signatures_to_authors[email.sent_from_device].add(email.author_or_unknown())
+        self.uninteresting_ccs = sorted(uniquify(self.uninteresting_ccs))
+        logger.info(f"Extracted uninteresting_ccs: {self.uninteresting_ccs}")
 def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
@@ -326,21 +333,6 @@ def document_cls(doc: Document) -> Type[Document]:
         return OtherFile
-def _build_signature_table(keyed_sets: dict[str, set[str]], cols: tuple[str, str], join_char: str = '\n') -> Padding:
-    title = 'Signatures Used By Authors' if cols[0] == AUTHOR else 'Authors Seen Using Signatures'
-    table = build_table(title, header_style="bold reverse", show_lines=True)
-    for i, col in enumerate(cols):
-        table.add_column(col.title() + ('s' if i == 1 else ''))
-    new_dict = dict_sets_to_lists(keyed_sets)
-    for k in sorted(new_dict.keys()):
-        table.add_row(highlighter(k or UNKNOWN), highlighter(join_char.join(sorted(new_dict[k]))))
-    return Padding(table, DEVICE_SIGNATURE_PADDING)
 def _sorted_metadata(docs: Sequence[Document]) -> list[Metadata]:
     docs_sorted_by_id = sorted(docs, key=lambda d: d.file_id)
     return [json_safe(d.metadata()) for d in docs_sorted_by_id]

epstein-files 1.1.5__py3-none-any.whl → 1.2.1__py3-none-any.whl

epstein-files 1.1.5py3-none-any.whl → 1.2.1py3-none-any.whl