PyPI - epstein-files - Versions diffs - 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

epstein-files 1.0.0py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

epstein_files/__init__.py +59 -51
epstein_files/documents/communication.py +9 -9
epstein_files/documents/document.py +111 -87
epstein_files/documents/email.py +154 -85
epstein_files/documents/emails/email_header.py +7 -6
epstein_files/documents/imessage/text_message.py +3 -2
epstein_files/documents/json_file.py +17 -0
epstein_files/documents/messenger_log.py +62 -3
epstein_files/documents/other_file.py +165 -17
epstein_files/epstein_files.py +100 -143
epstein_files/util/constant/names.py +6 -0
epstein_files/util/constant/strings.py +27 -0
epstein_files/util/constant/urls.py +22 -9
epstein_files/util/constants.py +968 -1015
epstein_files/util/data.py +14 -28
epstein_files/util/{file_cfg.py → doc_cfg.py} +120 -34
epstein_files/util/env.py +16 -18
epstein_files/util/file_helper.py +56 -17
epstein_files/util/highlighted_group.py +227 -175
epstein_files/util/logging.py +57 -0
epstein_files/util/rich.py +18 -13
epstein_files/util/search_result.py +14 -6
epstein_files/util/timer.py +24 -0
epstein_files/util/word_count.py +2 -1
{epstein_files-1.0.0.dist-info → epstein_files-1.0.1.dist-info}/METADATA +3 -2
epstein_files-1.0.1.dist-info/RECORD +30 -0
epstein_files-1.0.0.dist-info/RECORD +0 -28
{epstein_files-1.0.0.dist-info → epstein_files-1.0.1.dist-info}/LICENSE +0 -0
{epstein_files-1.0.0.dist-info → epstein_files-1.0.1.dist-info}/WHEEL +0 -0

epstein_files/epstein_files.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import gzip
+import json
 import pickle
 import re
 from collections import defaultdict
 from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
-from typing import Literal, Sequence
+from typing import Sequence, Type
 from rich.align import Align
-from rich.console import Group
 from rich.padding import Padding
 from rich.table import Table
 from rich.text import Text
@@ -23,20 +23,21 @@ from epstein_files.util.constant.strings import *
 from epstein_files.util.constant.urls import (EPSTEIN_WEB, JMAIL, epsteinify_name_url, epstein_web_person_url,
      search_jmail_url, search_twitter_url)
 from epstein_files.util.constants import *
-from epstein_files.util.data import Timer, dict_sets_to_lists, iso_timestamp, sort_dict
+from epstein_files.util.data import dict_sets_to_lists, json_safe, sort_dict
+from epstein_files.util.doc_cfg import EmailCfg
 from epstein_files.util.env import args, logger
-from epstein_files.util.file_cfg import MessageCfg
-from epstein_files.util.file_helper import DOCS_DIR, FILENAME_LENGTH, PICKLED_PATH, file_size_str
+from epstein_files.util.file_helper import DOCS_DIR, PICKLED_PATH, file_size_str
 from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
-from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, QUESTION_MARK_TXT, add_cols_to_table, console,
-     highlighter, link_text_obj, link_markup, print_author_header, print_centered, print_other_site_link, print_panel,
+from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, add_cols_to_table, console, highlighter,
+     link_text_obj, link_markup, print_author_header, print_centered, print_other_site_link, print_panel,
      print_section_header, vertically_pad)
 from epstein_files.util.search_result import SearchResult
+from epstein_files.util.timer import Timer
 DEVICE_SIGNATURE = 'Device Signature'
-FIRST_FEW_LINES = 'First Few Lines'
 DEVICE_SIGNATURE_PADDING = (1, 0)
 NOT_INCLUDED_EMAILERS = [e.lower() for e in (USELESS_EMAILERS + [JEFFREY_EPSTEIN])]
+SLOW_FILE_SECONDS = 0.4
 INVALID_FOR_EPSTEIN_WEB = JUNK_EMAILERS + KRASSNER_RECIPIENTS + [
     'ACT for America',
@@ -59,51 +60,34 @@ class EpsteinFiles:
     email_authors_to_device_signatures: dict[str, set] = field(default_factory=lambda: defaultdict(set))
     email_device_signatures_to_authors: dict[str, set] = field(default_factory=lambda: defaultdict(set))
     email_recipient_counts: dict[str | None, int] = field(default_factory=lambda: defaultdict(int))
-    _email_unknown_recipient_file_ids: set[str] = field(default_factory=set)
+    unknown_recipient_email_ids: set[str] = field(default_factory=set)
     def __post_init__(self):
+        """Iterate through files and build appropriate objects."""
         self.all_files = [f for f in DOCS_DIR.iterdir() if f.is_file() and not f.name.startswith('.')]
+        documents = []
         # Read through and classify all the files
         for file_arg in self.all_files:
-            logger.info(f"Scanning '{file_arg.name}'...")
+            doc_timer = Timer(decimals=4)
             document = Document(file_arg)
             if document.length == 0:
-                logger.info(f"Skipping empty file {document.description().plain}")
-            elif document.text[0] == '{':
-                # Handle JSON files
-                self.json_files.append(JsonFile(file_arg, text=document.text))
-                logger.info(self.json_files[-1].description().plain)
-            elif MSG_REGEX.search(document.text):
-                # Handle iMessage log files
-                self.imessage_logs.append(MessengerLog(file_arg, text=document.text))
-                logger.info(self.imessage_logs[-1].description().plain)
-            elif DETECT_EMAIL_REGEX.match(document.text) or isinstance(document.config, MessageCfg):
-                # Handle emails
-                email = Email(file_arg, text=document.text)
-                logger.info(email.description().plain)
-                self.emails.append(email)
-                self.email_author_counts[email.author] += 1
-                if len(email.recipients) == 0:
-                    self._email_unknown_recipient_file_ids.add(email.file_id)
-                    self.email_recipient_counts[None] += 1
-                else:
-                    for recipient in email.recipients:
-                        self.email_recipient_counts[recipient] += 1
-                if email.sent_from_device:
-                    self.email_authors_to_device_signatures[email.author_or_unknown()].add(email.sent_from_device)
-                    self.email_device_signatures_to_authors[email.sent_from_device].add(email.author_or_unknown())
-            else:
-                # Handle OtherFiles
-                self.other_files.append(OtherFile(file_arg, text=document.text))
-                logger.info(self.other_files[-1].description().plain)
+                logger.warning(f"Skipping empty file: {document}")
+                continue
+            cls = document_cls(document)
+            documents.append(cls(file_arg, text=document.text))
+            logger.info(str(documents[-1]))
-        self.emails = Document.sort_by_timestamp(self.emails)
-        self.imessage_logs = Document.sort_by_timestamp(self.imessage_logs)
-        self.other_files = Document.sort_by_timestamp(self.other_files + self.json_files)
+            if doc_timer.seconds_since_start() > SLOW_FILE_SECONDS:
+                doc_timer.print_at_checkpoint(f"Slow file: {documents[-1]} processed")
+        self.emails = Document.sort_by_timestamp([d for d in documents if isinstance(d, Email)])
+        self.imessage_logs = Document.sort_by_timestamp([d for d in documents if isinstance(d, MessengerLog)])
+        self.other_files = Document.sort_by_timestamp([d for d in documents if isinstance(d, (JsonFile, OtherFile))])
+        self.json_files = [doc for doc in self.other_files if isinstance(doc, JsonFile)]
+        self._tally_email_data()
     @classmethod
     def get_files(cls, timer: Timer | None = None) -> 'EpsteinFiles':
@@ -141,18 +125,17 @@ class EpsteinFiles:
     def docs_matching(
             self,
             pattern: re.Pattern | str,
-            file_type: Literal['all', 'other'] = 'all',
             names: list[str | None] | None = None
         ) -> list[SearchResult]:
         """Find documents whose text matches a pattern (file_type and names args limit the documents searched)."""
         results: list[SearchResult] = []
-        for doc in (self.all_documents() if file_type == 'all' else self.other_files):
-            lines = doc.lines_matching_txt(pattern)
-            if names and ((not isinstance(doc, (Email, MessengerLog))) or doc.author not in names):
+        for doc in self.all_documents():
+            if names and doc.author not in names:
                 continue
+            lines = doc.matching_lines(pattern)
             if len(lines) > 0:
                 results.append(SearchResult(doc, lines))
@@ -178,7 +161,7 @@ class EpsteinFiles:
         return substitution_counts
     def email_unknown_recipient_file_ids(self) -> list[str]:
-        return sorted(list(self._email_unknown_recipient_file_ids))
+        return sorted(list(self.unknown_recipient_email_ids))
     def emails_by(self, author: str | None) -> list[Email]:
         return [e for e in self.emails if e.author == author]
@@ -198,33 +181,38 @@ class EpsteinFiles:
         else:
             return [e for e in self.emails if author in e.recipients]
-    def imessage_logs_for(self, author: str | None | list[str | None]) -> Sequence[MessengerLog]:
-        if author in [EVERYONE, JEFFREY_EPSTEIN]:
-            return self.imessage_logs
+    def get_documents_by_id(self, file_ids: list[str]) -> list[Document]:
+        docs = [doc for doc in self.all_documents() if doc.file_id in file_ids]
+        if len(docs) != len(file_ids):
+            logger.warning(f"{len(file_ids)} file IDs provided but only {len(docs)} Epstein files found!")
+        return docs
-        authors = author if isinstance(author, list) else [author]
-        return [log for log in self.imessage_logs if log.author in authors]
+    def imessage_logs_for(self, author: str | None | list[str | None]) -> Sequence[MessengerLog]:
+        return MessengerLog.logs_for(author, self.imessage_logs)
     def identified_imessage_log_count(self) -> int:
         return len([log for log in self.imessage_logs if log.author])
-    def imessage_sender_counts(self) -> dict[str | None, int]:
-        sender_counts: dict[str | None, int] = defaultdict(int)
-        for message_log in self.imessage_logs:
-            for message in message_log.messages():
-                sender_counts[message.author] += 1
+    def json_metadata(self) -> str:
+        metadata = {
+            EMAIL_CLASS: [json_safe(doc.metadata()) for doc in self.emails],
+            MESSENGER_LOG_CLASS: [json_safe(doc.metadata()) for doc in self.imessage_logs],
+            OTHER_FILE_CLASS: [json_safe(doc.metadata()) for doc in self.other_files],
+        }
-        return sender_counts
+        return json.dumps(metadata, indent=4, sort_keys=True)
     def print_files_summary(self) -> None:
+        other_files = [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
         dupes = defaultdict(int)
         for doc in self.all_documents():
             if doc.is_duplicate:
-                dupes[doc.document_type()] += 1
+                dupes[doc.class_name()] += 1
-        table = Table()
+        table = Table(title='Summary of Document Types')
         add_cols_to_table(table, ['File Type', 'Files', 'Author Known', 'Author Unknown', 'Duplicates'])
         def add_row(label: str, docs: list, known: int | None = None, dupes: int | None = None):
@@ -239,7 +227,7 @@ class EpsteinFiles:
         add_row('iMessage Logs', self.imessage_logs, self.identified_imessage_log_count())
         add_row('Emails', self.emails, len([e for e in self.emails if e.author]), dupes[EMAIL_CLASS])
         add_row('JSON Data', self.json_files, dupes=0)
-        add_row('Other', self.other_files, dupes=dupes[OTHER_FILE_CLASS])
+        add_row('Other', other_files, dupes=dupes[OTHER_FILE_CLASS])
         console.print(Align.center(table))
         console.line()
@@ -247,10 +235,11 @@ class EpsteinFiles:
         """Print complete emails to or from a particular 'author'. Returns the Emails that were printed."""
         conversation_length = self.email_conversation_length_in_days(_author)
         emails = self.emails_for(_author)
+        unique_emails = [email for email in emails if not email.is_duplicate]
         author = _author or UNKNOWN
         print_author_header(
-            f"Found {len(emails)} {author} emails starting {emails[0].timestamp.date()} over {conversation_length:,} days",
+            f"Found {len(unique_emails)} {author} emails starting {emails[0].timestamp.date()} over {conversation_length:,} days",
             get_style_for_name(author),
             get_info_for_name(author)
         )
@@ -271,28 +260,9 @@ class EpsteinFiles:
         return emails
-    def print_emails_table_for(self, _author: str | None) -> None:
-        emails = [email for email in self.emails_for(_author) if not email.is_duplicate]  # Remove dupes
-        author = _author or UNKNOWN
-        table = Table(
-            title=f"Emails to/from {author} starting {emails[0].timestamp.date()}",
-            border_style=get_style_for_name(author, allow_bold=False),
-            header_style="bold"
-        )
-        table.add_column('From', justify='left')
-        table.add_column('Timestamp', justify='center')
-        table.add_column('Subject', justify='left', style='honeydew2', min_width=60)
-        for email in emails:
-            table.add_row(
-                email.author_txt,
-                email.epstein_media_link(link_txt=email.timestamp_without_seconds()),
-                highlighter(email.subject())
-            )
-        console.print(Align.center(table), '\n')
+    def print_emails_table_for(self, author: str | None) -> None:
+        emails = [email for email in self.emails_for(author) if not email.is_duplicate]  # Remove dupes
+        console.print(Align.center(Email.build_table(emails, author)), '\n')
     def print_email_device_info(self) -> None:
         print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(4, 0, 0, 0), centered=True)
@@ -300,13 +270,13 @@ class EpsteinFiles:
         console.print(build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
     def print_emailer_counts_table(self) -> None:
-        footer = f"Identified authors of {self.attributed_email_count()} emails out of {len(self.emails)} potential email files."
+        footer = f"Identified authors of {self.attributed_email_count():,} emails out of {len(self.emails):,}."
         counts_table = Table(title=f"Email Counts", caption=footer, header_style="bold")
         add_cols_to_table(counts_table, ['Name', 'Count', 'Sent', "Recv'd", JMAIL, EPSTEIN_WEB, 'Twitter'])
         emailer_counts = {
-            e: self.email_author_counts[e] + self.email_recipient_counts[e]
-            for e in self.all_emailers(True)
+            emailer: self.email_author_counts[emailer] + self.email_recipient_counts[emailer]
+            for emailer in self.all_emailers(True)
         }
         for p, count in sort_dict(emailer_counts):
@@ -326,76 +296,50 @@ class EpsteinFiles:
     def print_imessage_summary(self) -> None:
         """Print summary table and stats for text messages."""
-        counts_table = Table(title="Text Message Counts By Author", header_style="bold")
-        counts_table.add_column(AUTHOR.title(), justify='left', style="steel_blue bold", width=30)
-        counts_table.add_column('Files', justify='right', style='white')
-        counts_table.add_column("Msgs", justify='right')
-        counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
-        counts_table.add_column('Last Sent At', justify='center', style='wheat4', width=21)
-        counts_table.add_column('Days', justify='right', style='dim')
-        for name, count in sort_dict(self.imessage_sender_counts()):
-            logs = self.imessage_logs_for(name)
-            first_at = logs[0].first_message_at(name)
-            last_at = logs[-1].first_message_at(name)
-            counts_table.add_row(
-                Text(name or UNKNOWN,
-                    get_style_for_name(name)),
-                    str(len(logs)),
-                    f"{count:,}",
-                    iso_timestamp(first_at),
-                    iso_timestamp(last_at),
-                    str((last_at - first_at).days + 1),
-                )
-        console.print(counts_table)
+        console.print(MessengerLog.summary_table(self.imessage_logs))
         text_summary_msg = f"\nDeanonymized {self.identified_imessage_log_count()} of "
-        text_summary_msg += f"{len(self.imessage_logs)} {TEXT_MESSAGE} logs found in {len(self.all_files)} files."
+        text_summary_msg += f"{len(self.imessage_logs)} {TEXT_MESSAGE} logs found in {len(self.all_files):,} files."
         console.print(text_summary_msg)
         imessage_msg_count = sum([len(log.messages()) for log in self.imessage_logs])
-        console.print(f"Found {imessage_msg_count} total text messages in {len(self.imessage_logs)} conversations.")
-        console.print(f"(Last deploy found 4668 messages in 77 conversations)", style='dim')
+        console.print(f"Found {imessage_msg_count} text messages in {len(self.imessage_logs)} iMessage log files.")
     def print_other_files_table(self) -> list[OtherFile]:
-        """Returns the OtherFiles that were interesting enough to print."""
+        """Returns the OtherFile objects that were interesting enough to print."""
         interesting_files = [doc for doc in self.other_files if args.all_other_files or doc.is_interesting()]
         header_pfx = '' if args.all_other_files else 'Selected '
         print_section_header(f"{FIRST_FEW_LINES} of {len(interesting_files)} {header_pfx}Files That Are Neither Emails Nor Text Msgs")
         if not args.all_other_files:
-            print_centered(f"(the other site is uncurated and has all {len(self.other_files)} unclassifiable files and all {len(self.emails):,} emails)", style='dim')
+            print_centered(f"(the other site is uncurated and has all {len(self.other_files)} unclassifiable files and {len(self.emails):,} emails)", style='dim')
             print_other_site_link(False)
             console.line(2)
-        table = Table(header_style='bold', show_lines=True)
-        table.add_column('File', justify='center', width=FILENAME_LENGTH)
-        table.add_column('Date', justify='center')
-        table.add_column('Length', justify='center')
-        table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
+        console.print(OtherFile.build_table(interesting_files))
+        skipped_file_count = len(self.other_files) - len(interesting_files)
-        for doc in interesting_files:
-            link_and_info = [doc.raw_document_link_txt(), *doc.hints()]
-            date_str = doc.date_str()
+        if skipped_file_count > 0:
+            logger.warning(f"Skipped {skipped_file_count} uninteresting files...")
-            if doc.is_duplicate:
-                preview_text = doc.duplicate_file_txt()
-                row_style = ' dim'
-            else:
-                preview_text = doc.highlighted_preview_text()
-                row_style = ''
+        return interesting_files
-            table.add_row(
-                Group(*link_and_info),
-                Text(date_str, style=TIMESTAMP_DIM) if date_str else QUESTION_MARK_TXT,
-                doc.file_size_str(),
-                preview_text,
-                style=row_style
-            )
+    def _tally_email_data(self) -> None:
+        """Tally up summary info about Email objects."""
+        for email in self.emails:
+            if email.is_duplicate:
+                continue
-        console.print(table)
-        logger.warning(f"Skipped {len(self.other_files) - len(interesting_files)} uninteresting files...")
-        return interesting_files
+            self.email_author_counts[email.author] += 1
+            if len(email.recipients) == 0:
+                self.unknown_recipient_email_ids.add(email.file_id)
+                self.email_recipient_counts[None] += 1
+            else:
+                for recipient in email.recipients:
+                    self.email_recipient_counts[recipient] += 1
+            if email.sent_from_device:
+                self.email_authors_to_device_signatures[email.author_or_unknown()].add(email.sent_from_device)
+                self.email_device_signatures_to_authors[email.sent_from_device].add(email.author_or_unknown())
 def build_signature_table(keyed_sets: dict[str, set[str]], cols: tuple[str, str], join_char: str = '\n') -> Padding:
@@ -413,6 +357,19 @@ def build_signature_table(keyed_sets: dict[str, set[str]], cols: tuple[str, str]
     return Padding(table, DEVICE_SIGNATURE_PADDING)
+def document_cls(document: Document) -> Type[Document]:
+    search_area = document.text[0:5000]  # Limit search area to avoid pointless scans of huge files
+    if document.text[0] == '{':
+        return JsonFile
+    elif isinstance(document.config, EmailCfg) or DETECT_EMAIL_REGEX.match(search_area):
+        return Email
+    elif MSG_REGEX.search(search_area):
+        return MessengerLog
+    else:
+        return OtherFile
 def is_ok_for_epstein_web(name: str | None) -> bool:
     """Return True if it's likely that EpsteinWeb has a page for this name."""
     if name is None or ' ' not in name:

epstein_files/util/constant/names.py CHANGED Viewed

@@ -184,15 +184,21 @@ TULSI_GABBARD = 'Tulsi Gabbard'
 VIRGINIA_GIUFFRE = 'Virginia Giuffre'
 # Organizations
+BOFA = 'BofA'
 CNN = 'CNN'
 DEUTSCHE_BANK = 'Deutsche Bank'
+ELECTRON_CAPITAL_PARTNERS = 'Electron Capital Partners'
 GOLDMAN_SACHS = 'Goldman Sachs'
+GOLDMAN_INVESTMENT_MGMT = f'{GOLDMAN_SACHS} Investment Management Division'
 HARVARD = 'Harvard'
 INSIGHTS_POD = f"InsightsPod"  # Zubair bots
+NEXT_MANAGEMENT = 'Next Management LLC'
 JP_MORGAN = 'JP Morgan'
 OSBORNE_LLP = f"{IAN_OSBORNE} & Partners LLP"  # Ian Osborne's PR firm
+UBS = 'UBS'
 # Locations
+PALM_BEACH = 'Palm Beach'
 VIRGIN_ISLANDS = 'Virgin Islands'
 # First and last names that should be made part of a highlighting regex for emailers

epstein_files/util/constant/strings.py CHANGED Viewed

@@ -9,6 +9,27 @@ JSON_FILE_CLASS = 'JsonFile'
 MESSENGER_LOG_CLASS = 'MessengerLog'
 OTHER_FILE_CLASS = 'OtherFile'
+# categories
+ACADEMIA = 'academia'
+ARTS = 'arts'
+ARTICLE = 'article'
+BOOK = 'book'
+BUSINESS = 'business'
+CONFERENCE = 'conference'
+ENTERTAINER = 'entertainer'
+FINANCE = 'finance'
+FLIGHT_LOGS = 'flight logs'
+JOURNALIST = 'journalist'
+JUNK = 'junk'
+LEGAL = 'legal'
+LOBBYIST = 'lobbyist'
+POLITICS = 'politics'
+PROPERTY = 'property'
+PUBLICIST = 'publicist'
+REPUTATION = 'reputation'
+SOCIAL = 'social'
+SPEECH = 'speech'
 # Publications
 BBC = 'BBC'
 BLOOMBERG = 'Bloomberg'
@@ -36,11 +57,17 @@ TIMESTAMP_DIM = f"turquoise4 dim"
 AUTHOR = 'author'
 DEFAULT = 'default'
 EVERYONE = 'everyone'
+FIRST_FEW_LINES = 'First Few Lines'
 HOUSE_OVERSIGHT_PREFIX = 'HOUSE_OVERSIGHT_'
+JSON = 'json'
 NA = 'n/a'
 REDACTED = '<REDACTED>'
 URL_SIGNIFIERS = ['gclid', 'htm', 'ref=', 'utm']
 QUESTION_MARKS = '(???)'
+# Regexes
+FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}(\d{{6}}(_\d{{1,2}})?)")
+FILE_NAME_REGEX = re.compile(fr"{FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
 QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')

epstein_files/util/constant/urls.py CHANGED Viewed

@@ -6,7 +6,7 @@ from inflection import parameterize
 from rich.text import Text
 from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
-from epstein_files.util.file_helper import coerce_file_stem, filename_for_id
+from epstein_files.util.file_helper import JSON_METADATA_PATH, WORD_COUNT_HTML_PATH, coerce_file_stem
 # Style stuff
 ARCHIVE_LINK_COLOR = 'slate_blue3'
@@ -20,8 +20,27 @@ EPSTEIN_WEB = 'EpsteinWeb'
 EPSTEINIFY = 'epsteinify'
 JMAIL = 'Jmail'
-# URLs
-ATTRIBUTIONS_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages/blob/master/epstein_files/util/constants.py'
+# Cryptadamus URLs
+GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
+TEXT_MSGS_BASE_URL = f"{GH_PAGES_BASE_URL}/epstein_text_messages"
+JSON_METADATA_URL = f'{TEXT_MSGS_BASE_URL}/{JSON_METADATA_PATH.name}'
+WORD_COUNT_URL = f'{TEXT_MSGS_BASE_URL}/{WORD_COUNT_HTML_PATH.name}'
+SITE_URLS: dict[SiteType, str] = {
+    EMAIL: f'{GH_PAGES_BASE_URL}/epstein_emails_house_oversight/',  # TODO should just be same repo
+    TEXT_MESSAGE: TEXT_MSGS_BASE_URL,
+}
+GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
+GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
+ATTRIBUTIONS_URL = f'{GH_MASTER_URL}/epstein_files/util/constants.py'
+EXTRACTS_BASE_URL = f'{GH_MASTER_URL}/emails_extracted_from_legal_filings'
+extracted_file_url = lambda f: f"{EXTRACTS_BASE_URL}/{f}"
+# External URLs
 COFFEEZILLA_ARCHIVE_URL = 'https://journaliststudio.google.com/pinpoint/search?collection=061ce61c9e70bdfd'
 COURIER_NEWSROOM_ARCHIVE_URL = 'https://journaliststudio.google.com/pinpoint/search?collection=092314e384a58618'
 EPSTEINIFY_URL = 'https://epsteinify.com'
@@ -31,12 +50,6 @@ JMAIL_URL = 'https://jmail.world'
 OVERSIGHT_REPUBLICANS_PRESSER_URL = 'https://oversight.house.gov/release/oversight-committee-releases-additional-epstein-estate-documents/'
 RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL = 'https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_'
 SUBSTACK_URL = 'https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great'
-WORD_COUNT_URL = 'https://michelcrypt4d4mus.github.io/epstein_text_messages/epstein_emails_word_count.html'
-SITE_URLS: dict[SiteType, str] = {
-    EMAIL: 'https://michelcrypt4d4mus.github.io/epstein_emails_house_oversight/',
-    TEXT_MESSAGE: 'https://michelcrypt4d4mus.github.io/epstein_text_messages/',
-}
 DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
     EPSTEIN_MEDIA: f"{EPSTEIN_MEDIA_URL}/files",

epstein-files 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

epstein-files 1.0.0py3-none-any.whl → 1.0.1py3-none-any.whl