PyPI - epstein-files - Versions diffs - 1.0.10__py3-none-any.whl → 1.0.11__py3-none-any.whl - Mend

epstein-files 1.0.10py3-none-any.whl → 1.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

epstein_files/__init__.py +4 -6
epstein_files/documents/document.py +92 -49
epstein_files/documents/email.py +7 -4
epstein_files/documents/imessage/text_message.py +3 -12
epstein_files/documents/json_file.py +13 -1
epstein_files/documents/messenger_log.py +32 -19
epstein_files/documents/other_file.py +66 -43
epstein_files/epstein_files.py +22 -15
epstein_files/util/constant/names.py +2 -2
epstein_files/util/constants.py +84 -78
epstein_files/util/doc_cfg.py +17 -25
epstein_files/util/env.py +29 -17
epstein_files/util/file_helper.py +13 -24
epstein_files/util/highlighted_group.py +22 -14
epstein_files/util/logging.py +0 -6
epstein_files/util/output.py +12 -7
epstein_files/util/rich.py +15 -10
epstein_files/util/word_count.py +65 -5
{epstein_files-1.0.10.dist-info → epstein_files-1.0.11.dist-info}/METADATA +1 -1
epstein_files-1.0.11.dist-info/RECORD +33 -0
epstein_files/count_words.py +0 -72
epstein_files-1.0.10.dist-info/RECORD +0 -34
{epstein_files-1.0.10.dist-info → epstein_files-1.0.11.dist-info}/LICENSE +0 -0
{epstein_files-1.0.10.dist-info → epstein_files-1.0.11.dist-info}/WHEEL +0 -0
{epstein_files-1.0.10.dist-info → epstein_files-1.0.11.dist-info}/entry_points.txt +0 -0

epstein_files/util/highlighted_group.py CHANGED Viewed

@@ -2,6 +2,7 @@ import re
 from dataclasses import dataclass, field
 from rich.highlighter import RegexHighlighter
+from rich.text import Text
 from epstein_files.util.constant.names import *
 from epstein_files.util.constant.strings import *
@@ -21,7 +22,7 @@ EPSTEIN_ESTATE_EXECUTOR = f"Epstein {ESTATE_EXECUTOR}"
 REGEX_STYLE_PREFIX = 'regex'
 SIMPLE_NAME_REGEX = re.compile(r"^[-\w ]+$", re.IGNORECASE)
-CATEGORY_LABEL_MAPPING = {
+CATEGORY_STYLE_MAPPING = {
     ARTICLE: JOURNALIST,
     ARTS: ENTERTAINER,
     BOOK: JOURNALIST,
@@ -31,6 +32,12 @@ CATEGORY_LABEL_MAPPING = {
     REPUTATION: PUBLICIST,
 }
+CATEGORY_STYLES = {
+    JSON: 'dark_red',
+    JUNK: 'grey19',
+    'letter': 'medium_orchid1'
+}
 @dataclass(kw_only=True)
 class HighlightedText:
@@ -156,7 +163,7 @@ HIGHLIGHTED_NAMES = [
     HighlightedNames(
         label=BUSINESS,
         style='spring_green4',
-        pattern=r'Gruterite|(John\s*)?Kluge|Marc Rich|(Mi(chael|ke)\s*)?Ovitz|(Steve\s+)?Wynn|(Les(lie)?\s+)?Wexner|SALSS|Swedish[-\s]*American\s*Life\s*Science\s*Summit|Valhi|(Yves\s*)?Bouvier',
+        pattern=r'Gruterite|(John\s*)?Kluge|Marc Rich|(Mi(chael|ke)\s*)?Ovitz|(Steve\s+)?Wynn|(Les(lie)?\s+)?Wexner|New Leaf Ventures|Park Partners|SALSS|Swedish[-\s]*American\s*Life\s*Science\s*Summit|Valhi|(Yves\s*)?Bouvier',
         emailers = {
             ALIREZA_ITTIHADIEH: 'CEO Freestream Aircraft Limited',
             BARBRO_C_EHNBOM: 'Swedish pharmaceuticals, SALSS',
@@ -305,7 +312,7 @@ HIGHLIGHTED_NAMES = [
     HighlightedNames(
         label='finance',
         style='green',
-        pattern=r'Apollo|Ari\s*Glass|Bank|(Bernie\s*)?Madoff|Black(rock|stone)|B\s*of\s*A|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|Mizrahi\s*Bank|MLPF&S|(money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
+        pattern=r'Apollo|Ari\s*Glass|Bank|(Bernie\s*)?Madoff|Black(rock|stone)|B\s*of\s*A|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|Mizrahi\s*Bank|MLPF&S|((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
         emailers={
             AMANDA_ENS: 'Citigroup',
             DANIEL_SABBA: 'UBS Investment Bank',
@@ -325,6 +332,7 @@ HIGHLIGHTED_NAMES = [
         style='deep_pink2',
         pattern=r'Cambridge|(Derek\s*)?Bok|Elisa(\s*New)?|Harvard(\s*(Business|Law|University)(\s*School)?)?|(Jonathan\s*)?Zittrain|(Stephen\s*)?Kosslyn',
         emailers = {
+            "Donald Rubin": f"Professor of Statistics",
             "Kelly Friendly": f"longtime aide and spokesperson of {LARRY_SUMMERS}",
             LARRY_SUMMERS: 'board of Digital Currency Group (DCG), Harvard president, Obama economic advisor',
             'Leah Reis-Dennis': 'producer for Lisa New\'s Poetry in America',
@@ -390,7 +398,7 @@ HIGHLIGHTED_NAMES = [
     HighlightedNames(
         label='law enforcement',
         style='color(24) bold',
-        pattern=r'ag|(Alicia\s*)?Valle|attorney|((Bob|Robert)\s*)?Mueller|(Byung\s)?Pak|CFTC|CIA|CIS|CVRA|Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)|DHS|DOJ|FBI|FCPA|FDIC|Federal\s*Bureau\s*of\s*Investigation|FinCEN|FINRA|FOIA|FTC|IRS|(James\s*)?Comey|(Jennifer\s*Shasky\s*)?Calvery|((Judge|Mark)\s*)?(Carney|Filip)|(Kirk )?Blouin|KYC|NIH|NS(A|C)|OCC|OFAC|(Lann?a\s*)?Belohlavek|lawyer|(Michael\s*)?Reiter|OGE|Office\s*of\s*Government\s*Ethics|Police Code Enforcement|(Preet\s*)?Bharara|SCOTUS|SD(FL|NY)|Southern\s*District\s*of\s*(Florida|New\s*York)|SEC|Secret\s*Service|Securities\s*and\s*Exchange\s*Commission|State\s*Dep(artmen)?t|Strzok|Supreme\s*Court|Treasury\s*(Dep(artmen)?t|Secretary)|TSA|USAID|(William\s*J\.?\s*)?Zloch',
+        pattern=r'ag|(Alicia\s*)?Valle|AML|attorney|((Bob|Robert)\s*)?Mueller|(Byung\s)?Pak|CFTC?|CIA|CIS|CVRA|Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)|DHS|DOJ|FBI|FCPA|FDIC|Federal\s*Bureau\s*of\s*Investigation|FinCEN|FINRA|FOIA|FTC|IRS|(James\s*)?Comey|(Jennifer\s*Shasky\s*)?Calvery|((Judge|Mark)\s*)?(Carney|Filip)|(Kirk )?Blouin|KYC|NIH|NS(A|C)|OCC|OFAC|(Lann?a\s*)?Belohlavek|lawyer|(Michael\s*)?Reiter|OGE|Office\s*of\s*Government\s*Ethics|Police Code Enforcement|(Preet\s*)?Bharara|SCOTUS|SD(FL|NY)|Southern\s*District\s*of\s*(Florida|New\s*York)|SEC|Secret\s*Service|Securities\s*and\s*Exchange\s*Commission|State\s*Dep(artmen)?t|Strzok|Supreme\s*Court|Treasury\s*(Dep(artmen)?t|Secretary)|TSA|USAID|(William\s*J\.?\s*)?Zloch',
         emailers = {
             ANN_MARIE_VILLAFANA: 'southern district of Florida U.S. Attorney',
             DANNY_FROST: 'Director of Communications at Manhattan DA',
@@ -588,7 +596,7 @@ HIGHLIGHTED_NAMES = [
     HighlightedText(
         label='phone_number',
         style='bright_green',
-        pattern=r"\+?(1?\(?\d{3}\)?[- ]\d{3}[- ]\d{4}|\d{2}[- ]\(?0?\)?\d{2}[- ]\d{4}[- ]\d{4})|\b[\d+]{10,12}\b",
+        pattern=r"\+?(1?\(?\d{3}\)?[- ]\d{3}[- ]\d{4}|\d{2}[- ]\(?0?\)?\d{2}[- ]\d{4}[- ]\d{4})|(\b|\+)[\d+]{10,12}\b",
     ),
 ]
@@ -648,18 +656,14 @@ def get_info_for_name(name: str) -> str | None:
 def get_style_for_category(category: str) -> str | None:
-    if category in [CONFERENCE, SPEECH]:
+    if category in CATEGORY_STYLES:
+        return CATEGORY_STYLES[category]
+    elif category in [CONFERENCE, SPEECH]:
         return f"{get_style_for_category(ACADEMIA)} dim"
-    elif category == JSON:
-        return 'dark_red'
-    elif category == JUNK:
-        return 'grey19'
-    elif category == 'letter':
-        return 'medium_orchid1'
     elif category == SOCIAL:
-        return f"{get_style_for_category(PUBLICIST)} dim"
+        return f"{get_style_for_category(PUBLICIST)}"
-    category = CATEGORY_LABEL_MAPPING.get(category, category)
+    category = CATEGORY_STYLE_MAPPING.get(category, category)
     for highlight_group in HIGHLIGHTED_NAMES:
         if highlight_group.label == category:
@@ -672,6 +676,10 @@ def get_style_for_name(name: str | None, default_style: str = DEFAULT, allow_bol
     return style if allow_bold else style.replace('bold', '').strip()
+def styled_category(category: str) -> Text:
+    return Text(category, get_style_for_category(category) or 'wheat4')
 def _get_highlight_group_for_name(name: str) -> HighlightedNames | None:
     for highlight_group in HIGHLIGHTED_NAMES:
         if highlight_group.regex.search(name):

epstein_files/util/logging.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import logging
 from os import environ
-from pathlib import Path
 import datefinder
 import rich_argparse_plus
@@ -10,7 +9,6 @@ from rich.logging import RichHandler
 from rich.theme import Theme
 from epstein_files.util.constant.strings import *
-from epstein_files.util.file_helper import file_size_str
 FILENAME_STYLE = 'gray27'
@@ -60,7 +58,3 @@ if env_log_level_str:
     logger.warning(f"Setting log level to {env_log_level} based on {LOG_LEVEL_ENV_VAR} env var...")
     logger.setLevel(env_log_level)
-def log_file_write(file_path: str | Path) -> None:
-    logger.warning(f"Wrote {file_size_str(file_path)} to '{file_path}'")

epstein_files/util/output.py CHANGED Viewed

@@ -11,7 +11,8 @@ from epstein_files.util.constant.names import *
 from epstein_files.util.constant.output_files import JSON_FILES_JSON_PATH, JSON_METADATA_PATH
 from epstein_files.util.data import dict_sets_to_lists
 from epstein_files.util.env import args, specified_names
-from epstein_files.util.logging import log_file_write, logger
+from epstein_files.util.file_helper import log_file_write
+from epstein_files.util.logging import logger
 from epstein_files.util.rich import *
 PRINT_COLOR_KEY_EVERY_N_EMAILS = 150
@@ -60,7 +61,6 @@ def print_emails(epstein_files: EpsteinFiles) -> int:
     """Returns number of emails printed."""
     print_section_header(('Selections from ' if not args.all_emails else '') + 'His Emails')
     print_other_site_link(is_header=False)
     emailers_to_print: list[str | None]
     emailer_tables: list[str | None] = []
     already_printed_emails: list[Email] = []
@@ -106,8 +106,8 @@ def print_emails(epstein_files: EpsteinFiles) -> int:
         _verify_all_emails_were_printed(epstein_files, already_printed_emails)
     fwded_articles = [e for e in already_printed_emails if e.config and e.config.is_fwded_article]
-    logger.warning(f"{len(fwded_articles)} of {len(already_printed_emails)} emails were forwarded articles.")
-    logger.warning(f"Rewrote {len(Email.rewritten_header_ids)} headers of {len(epstein_files.emails)} emails.")
+    log_msg = f"Rewrote {len(Email.rewritten_header_ids)} email headers (out of {len(already_printed_emails)})"
+    logger.warning(f"{log_msg}, {len(fwded_articles)} of the emails were forwarded articles.")
     return len(already_printed_emails)
@@ -121,7 +121,7 @@ def print_json_files(epstein_files: EpsteinFiles):
     else:
         for json_file in epstein_files.json_files:
             console.line(2)
-            console.print(json_file.description_panel())
+            console.print(json_file.summary_panel())
             console.print_json(json_file.json_str(), indent=4, sort_keys=False)
@@ -187,8 +187,13 @@ def write_urls() -> None:
 def _verify_all_emails_were_printed(epstein_files: EpsteinFiles, already_printed_emails: list[Email]) -> None:
     """Log warnings if some emails were never printed."""
     email_ids_that_were_printed = set([email.file_id for email in already_printed_emails])
-    logger.warning(f"Printed {len(already_printed_emails)} emails of {len(email_ids_that_were_printed)} unique file IDs.")
+    logger.warning(f"Printed {len(already_printed_emails):,} emails of {len(email_ids_that_were_printed):,} unique file IDs.")
+    missed_an_email = False
     for email in epstein_files.emails:
-        if email.file_id not in email_ids_that_were_printed and not email.is_duplicate:
+        if email.file_id not in email_ids_that_were_printed and not email.is_duplicate():
             logger.warning(f"Failed to print {email.summary()}")
+            missed_an_email = True
+    if not missed_an_email:
+        logger.warning(f"All {len(epstein_files.emails):,} emails printed at least once.")

epstein_files/util/rich.py CHANGED Viewed

@@ -20,8 +20,9 @@ from epstein_files.util.constant.urls import *
 from epstein_files.util.constants import FALLBACK_TIMESTAMP, HEADER_ABBREVIATIONS
 from epstein_files.util.data import json_safe
 from epstein_files.util.env import args
+from epstein_files.util.file_helper import log_file_write
 from epstein_files.util.highlighted_group import ALL_HIGHLIGHTS, HIGHLIGHTED_NAMES, EpsteinHighlighter
-from epstein_files.util.logging import log_file_write, logger
+from epstein_files.util.logging import logger
 TITLE_WIDTH = 50
 NUM_COLOR_KEY_COLS = 4
@@ -30,6 +31,7 @@ QUESTION_MARK_TXT = Text(QUESTION_MARKS, style='dim')
 GREY_NUMBERS = [58, 39, 39, 35, 30, 27, 23, 23, 19, 19, 15, 15, 15]
 DEFAULT_NAME_STYLE = 'gray46'
+INFO_STYLE = 'white dim italic'
 KEY_STYLE='honeydew2 bold'
 SECTION_HEADER_STYLE = 'bold white on blue3'
 SOCIAL_MEDIA_LINK_STYLE = 'pale_turquoise4'
@@ -239,23 +241,26 @@ def print_numbered_list_of_emailers(_list: list[str | None], epstein_files = Non
 def print_other_site_link(is_header: bool = True) -> None:
     """Print a link to the emails site if we're building text messages site and vice versa."""
     site_type: SiteType = EMAIL if args.all_emails else TEXT_MESSAGE
+    link_style = OTHER_SITE_LINK_STYLE if is_header else 'light_slate_grey bold'
     if is_header:
         print_starred_header(f"This is the Epstein {site_type.title()}s site", num_spaces=4, num_stars=14)
     other_site_type: SiteType = TEXT_MESSAGE if site_type == EMAIL else EMAIL
-    other_site_msg = "another site for" + (' all of' if other_site_type == EMAIL else '')
+    other_site_msg = "another site with" + (' all of' if other_site_type == EMAIL else '')
     other_site_msg += f" Epstein's {other_site_type}s also generated by this code"
-    markup_msg = link_markup(SITE_URLS[other_site_type], other_site_msg, OTHER_SITE_LINK_STYLE)
+    markup_msg = link_markup(SITE_URLS[other_site_type], other_site_msg, link_style)
     print_centered(parenthesize(Text.from_markup(markup_msg)), style='bold')
-    if is_header:
-        word_count_link = link_text_obj(WORD_COUNT_URL, 'most frequently used words in the emails and texts', AUX_SITE_LINK_STYLE)
-        print_centered(parenthesize(word_count_link))
-        metadata_link = link_text_obj(JSON_METADATA_URL, 'author attribution explanations', AUX_SITE_LINK_STYLE)
-        print_centered(parenthesize(metadata_link))
-        json_link = link_text_obj(WORD_COUNT_URL, "epstein's json files", AUX_SITE_LINK_STYLE)
-        print_centered(parenthesize(json_link))
+    if not is_header:
+        return
+    word_count_link = link_text_obj(WORD_COUNT_URL, 'most frequently used words in the emails and texts', AUX_SITE_LINK_STYLE)
+    print_centered(parenthesize(word_count_link))
+    metadata_link = link_text_obj(JSON_METADATA_URL, 'author attribution explanations', AUX_SITE_LINK_STYLE)
+    print_centered(parenthesize(metadata_link))
+    json_link = link_text_obj(WORD_COUNT_URL, "epstein's json files", AUX_SITE_LINK_STYLE)
+    print_centered(parenthesize(json_link))
 def print_page_title(expand: bool = True, width: int | None = None) -> None:

epstein_files/util/word_count.py CHANGED Viewed

@@ -9,18 +9,22 @@ from rich.padding import Padding
 from rich.text import Text
 from epstein_files.documents.emails.email_header import EmailHeader
-from epstein_files.util.constant.common_words import COMMON_WORDS, UNSINGULARIZABLE_WORDS
+from epstein_files.epstein_files import EpsteinFiles
+from epstein_files.util.constant.common_words import COMMON_WORDS_LIST, COMMON_WORDS, UNSINGULARIZABLE_WORDS
 from epstein_files.util.constant.names import OTHER_NAMES
+from epstein_files.util.constant.output_files import WORD_COUNT_HTML_PATH
 from epstein_files.util.data import ALL_NAMES, flatten, sort_dict
-from epstein_files.util.env import args
+from epstein_files.util.env import args, specified_names
 from epstein_files.util.logging import logger
-from epstein_files.util.rich import highlighter
-from epstein_files.util.search_result import SearchResult
+from epstein_files.util.rich import (console, highlighter, print_centered, print_color_key, print_page_title,
+     print_panel, print_starred_header, write_html)
+from epstein_files.util.search_result import MatchedLine, SearchResult
+from epstein_files.util.timer import Timer
 FIRST_AND_LAST_NAMES = flatten([n.split() for n in ALL_NAMES])
 FIRST_AND_LAST_NAMES = [n.lower() for n in FIRST_AND_LAST_NAMES] + OTHER_NAMES
-HTML_REGEX = re.compile(r"com/|cae-v2w=|content-(transfe|type)|font(/|-(family|size))|http|\.html?\??|margin-bottom|padding-left|quoted-printable|region=|text-decoration|ttps|www|\.(gif|jpe?g|png);?$")
+HTML_REGEX = re.compile(r"^http|#yiv|com/|cae-v2w=|content-(transfe|type)|font(/|-(family|size))|http|\.html?\??|margin-bottom|padding-left|quoted-printable|region=|text-decoration|ttps|www|\.(gif|jpe?g|png);?$")
 HYPHENATED_WORD_REGEX = re.compile(r"[a-z]+-[a-z]+", re.IGNORECASE)
 OK_SYMBOL_WORDS = ['mar-a-lago', 'p/e', 's&p', ':)', ':).', ';)', ':-)', ';-)']
 ONLY_SYMBOLS_REGEX = re.compile(r"^[^a-zA-Z0-9]+$")
@@ -187,6 +191,62 @@ class WordCount:
         yield f"Showing {len(word_txts):,} words appearing at least {MIN_COUNT_CUTOFF} times (out of {len(self.count):,} words)."
+def write_word_counts_html() -> None:
+    timer = Timer()
+    epstein_files = EpsteinFiles.get_files(timer)
+    email_subjects: set[str] = set()
+    word_count = WordCount()
+    # Remove dupes, junk mail, and fwded articles from emails
+    emails = [e for e in epstein_files.emails if not (e.is_duplicate() or e.is_junk_mail() or e.is_fwded_article())]
+    for email in emails:
+        if specified_names and email.author not in specified_names:
+            continue
+        logger.info(f"Counting words in {email}\n  [SUBJECT] {email.subject()}")
+        lines = email.actual_text.split('\n')
+        if email.subject() not in email_subjects and f'Re: {email.subject()}' not in email_subjects:
+            email_subjects.add(email.subject())
+            lines.append(email.subject())
+        for i, line in enumerate(lines):
+            if HTML_REGEX.search(line):
+                continue
+            for word in line.split():
+                word_count.tally_word(word, SearchResult(email, [MatchedLine(line, i)]))
+    # Add in iMessage conversation words
+    imessage_logs = epstein_files.imessage_logs_for(specified_names) if specified_names else epstein_files.imessage_logs
+    for imessage_log in imessage_logs:
+        logger.info(f"Counting words in {imessage_log}")
+        for i, msg in enumerate(imessage_log.messages):
+            if specified_names and msg.author not in specified_names:
+                continue
+            elif HTML_REGEX.search(line):
+                continue
+            for word in msg.text.split():
+                word_count.tally_word(word, SearchResult(imessage_log, [MatchedLine(msg.text, i)]))
+    print_page_title(expand=False)
+    print_starred_header(f"Most Common Words in {len(emails):,} Emails and {len(imessage_logs)} iMessage Logs")
+    print_centered(f"(excluding {len(COMMON_WORDS_LIST)} particularly common words at bottom)", style='dim')
+    console.line()
+    print_color_key()
+    console.line()
+    console.print(word_count)
+    console.line(2)
+    print_panel(f"{len(COMMON_WORDS_LIST):,} Excluded Words", centered=True)
+    console.print(', '.join(COMMON_WORDS_LIST), highlight=False)
+    write_html(WORD_COUNT_HTML_PATH)
+    timer.print_at_checkpoint(f"Finished counting words")
 def _word_style(word: str | None) -> str:
     word = word or ''
     return 'bright_white' if word in FIRST_AND_LAST_NAMES else 'grey53'

{epstein_files-1.0.10.dist-info → epstein_files-1.0.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: epstein-files
-Version: 1.0.10
+Version: 1.0.11
 Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
 Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
 License: GPL-3.0-or-later

epstein_files-1.0.11.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,33 @@
+epstein_files/__init__.py,sha256=4zxX1tw-0xMwpM-Sbq7PezV0YNS9zN-P6gc9BQ1BqKU,4710
+epstein_files/documents/communication.py,sha256=SunZdjMhR9v6y8LlQ6jhIu8vYjSndaBK0Su1mKnhfj0,2060
+epstein_files/documents/document.py,sha256=dECV0bSnOJzPfOIHyHeG5rNxKd6uwuiso35-sQZg9No,18353
+epstein_files/documents/email.py,sha256=yXiW7mB4myU8G9DY7PnnqazaCqeAR3dHr35NfBplfRU,38519
+epstein_files/documents/emails/email_header.py,sha256=wkPfSLbmzkAeQwvhf0bAeFDLPbQT-EeG0v8vNNLYktM,7502
+epstein_files/documents/imessage/text_message.py,sha256=3HlNp75JIoMlWj7PaUWIFry3qlGEmpGu5OmdmsBYS34,2807
+epstein_files/documents/json_file.py,sha256=HsnVWPZXVxTF_DadL2YtJtsiXKXOd18PUs05O33tjNc,1317
+epstein_files/documents/messenger_log.py,sha256=uSPlg85jGTwod1cV9f7MtxSNqmMZ61JBFzoiRNqg52M,6263
+epstein_files/documents/other_file.py,sha256=S_Y-SxYYYXtx42JHmhFWl5BbTduNI7cwQjeYHBJA7sc,9950
+epstein_files/epstein_files.py,sha256=SaD4DJJ5tRxY97Ei4BdOgLzHQ9wrBVGrP64CSqdmk-w,18691
+epstein_files/util/constant/common_words.py,sha256=aR0UjoWmxyR49XS-DtHECQ1CiA_bK8hNP6CQ1TS9yZA,3696
+epstein_files/util/constant/html.py,sha256=9U098TGzlghGg4WfxLYHyub5JGR17Dv7VP5i2MSu8Kk,1415
+epstein_files/util/constant/names.py,sha256=KKJEYFpdOp4xDwXe5dhrqYgF12oJODvVSFpAB28Q76A,10153
+epstein_files/util/constant/output_files.py,sha256=BkV4_gmdj46RfGy5SFYp6dgTty3FtlBth5YGmaGutls,1700
+epstein_files/util/constant/strings.py,sha256=FDtksfH50PSxtSBw9XhmqxtrgRgGxdIvGiAR2bbPpu4,1899
+epstein_files/util/constant/urls.py,sha256=0IdCVVvXib0i-4TZFkVHoS4zCbjOBZWcr6NkGxsmQWM,4981
+epstein_files/util/constants.py,sha256=LPSI6Z0n3ChFDnMGYVO80cGuSKZf0OoyUzLih_jlRKI,111434
+epstein_files/util/data.py,sha256=xwTqrbAi7ZDJM0iyFVOevnokP_oIQ2npkRjHzF1KGGY,2908
+epstein_files/util/doc_cfg.py,sha256=OZlocAWldfR8Nomiad4FxQeyhNMbd0PQ-rumKn2nWBg,9641
+epstein_files/util/env.py,sha256=HnYcfHSNkwVJ_T75Woy43_OpDyxD0KHPj3GxcVx86N4,5751
+epstein_files/util/file_helper.py,sha256=-higKqc9J5IfNpzMzg-9j1ps3beV4N2cw8kdAxfm7NA,2835
+epstein_files/util/highlighted_group.py,sha256=fU-8ns50uUolzPEAxadF5AnPLjn383KpEeyRXfFbv_U,35971
+epstein_files/util/logging.py,sha256=8e22WaBfDAKEmkcr3Gb4TdqtFSkU4FQDpk3Z6hfSzbw,1977
+epstein_files/util/output.py,sha256=UzTU0mNHEmeJr3w2TXAp19X497GB6_-HyW0mfztI1jk,8120
+epstein_files/util/rich.py,sha256=8-4IA5bwPBdDPqkPdymq3zVKB9hfy3nrT7fUrN_XevY,14744
+epstein_files/util/search_result.py,sha256=1fxe0KPBQXBk4dLfu6m0QXIzYfZCzvaSkWqvghJGzxY,567
+epstein_files/util/timer.py,sha256=8hxW4Y1JcTUfnBrHh7sL2pM9xu1sL4HFQM4CmmzTarU,837
+epstein_files/util/word_count.py,sha256=8qBTuq3d0Q-3fwiuECKWi2RfL-KUiZD8TciwvfL0D_o,9353
+epstein_files-1.0.11.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+epstein_files-1.0.11.dist-info/METADATA,sha256=HBW3t1F9lkoN6GIR7ySV2kBYnJhNEF9otDZWnf03jUo,5480
+epstein_files-1.0.11.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
+epstein_files-1.0.11.dist-info/entry_points.txt,sha256=5qYgwAXpxegeAicD_rzda_trDRnUC51F5UVDpcZ7j6Q,240
+epstein_files-1.0.11.dist-info/RECORD,,

epstein_files/count_words.py DELETED Viewed

@@ -1,72 +0,0 @@
-# Count word usage in emails and texts
-import re
-from epstein_files.epstein_files import EpsteinFiles
-from epstein_files.util.constant.common_words import COMMON_WORDS_LIST
-from epstein_files.util.constant.output_files import WORD_COUNT_HTML_PATH
-from epstein_files.util.env import args, specified_names
-from epstein_files.util.logging import logger
-from epstein_files.util.rich import (console, print_centered, print_color_key, print_page_title, print_panel,
-     print_starred_header, write_html)
-from epstein_files.util.search_result import MatchedLine, SearchResult
-from epstein_files.util.timer import Timer
-from epstein_files.util.word_count import WordCount
-HTML_REGEX = re.compile(r"^http|#yiv")
-def write_word_counts_html() -> None:
-    timer = Timer()
-    epstein_files = EpsteinFiles.get_files(timer)
-    email_subjects: set[str] = set()
-    word_count = WordCount()
-    # Remove dupes, junk mail, and fwded articles from emails
-    emails = [
-        e for e in epstein_files.emails
-        if not (e.is_duplicate or e.is_junk_mail() or (e.config and e.config.is_fwded_article)) \
-            and (len(specified_names) == 0 or e.author in specified_names)
-    ]
-    for email in emails:
-        logger.info(f"Counting words in {email}\n  [SUBJECT] {email.subject()}")
-        lines = email.actual_text.split('\n')
-        if email.subject() not in email_subjects and f'Re: {email.subject()}' not in email_subjects:
-            email_subjects.add(email.subject())
-            lines.append(email.subject())
-        for i, line in enumerate(lines):
-            if HTML_REGEX.search(line):
-                continue
-            for word in line.split():
-                word_count.tally_word(word, SearchResult(email, [MatchedLine(line, i)]))
-    # Add in iMessage conversation words
-    imessage_logs = epstein_files.imessage_logs_for(specified_names) if specified_names else epstein_files.imessage_logs
-    for imessage_log in imessage_logs:
-        logger.info(f"Counting words in {imessage_log}")
-        for msg in imessage_log.messages():
-            if len(specified_names) > 0 and msg.author not in specified_names:
-                continue
-            elif HTML_REGEX.search(line):
-                continue
-            for word in msg.text.split():
-                word_count.tally_word(word, SearchResult(imessage_log, [msg.text]))
-    print_page_title(expand=False)
-    print_starred_header(f"Most Common Words in {len(emails):,} Emails and {len(imessage_logs)} iMessage Logs")
-    print_centered(f"(excluding {len(COMMON_WORDS_LIST)} particularly common words at bottom)", style='dim')
-    console.line()
-    print_color_key()
-    console.line()
-    console.print(word_count)
-    console.line(2)
-    print_panel(f"{len(COMMON_WORDS_LIST):,} Excluded Words", centered=True)
-    console.print(', '.join(COMMON_WORDS_LIST), highlight=False)
-    write_html(WORD_COUNT_HTML_PATH)
-    timer.print_at_checkpoint(f"Finished counting words")

epstein_files-1.0.10.dist-info/RECORD DELETED Viewed

@@ -1,34 +0,0 @@
-epstein_files/__init__.py,sha256=SfLLu9X7rfHdmZcl8JGmiIxZ_E1RVsmCrh8sLO4jNPU,4859
-epstein_files/count_words.py,sha256=i1pYaQzX7b9S3pyV3RM_8asbQJ1PEk8wJgLOG6Mf0D8,2966
-epstein_files/documents/communication.py,sha256=SunZdjMhR9v6y8LlQ6jhIu8vYjSndaBK0Su1mKnhfj0,2060
-epstein_files/documents/document.py,sha256=BUaioSvOmfsR-ULa6hJy3WYg-hBDC-kqafUheMJ-jFY,16665
-epstein_files/documents/email.py,sha256=H34b2zt_TrPUgXHwZXybjmLE9-QNAtezs9NVSCPOSGM,38462
-epstein_files/documents/emails/email_header.py,sha256=wkPfSLbmzkAeQwvhf0bAeFDLPbQT-EeG0v8vNNLYktM,7502
-epstein_files/documents/imessage/text_message.py,sha256=wfWPQhwGG5Yzyhbr1NAQAY0bzRjjqVZmh8SPl48XmAM,3025
-epstein_files/documents/json_file.py,sha256=1Cx_3uM38Dwgrbs8fU55TUZKSrCsmd4QpHKWtfWdudw,1089
-epstein_files/documents/messenger_log.py,sha256=DHlQpbLbMITMpMtCYk2vcRc7-CTvYvOXql-9nDUc3tQ,5887
-epstein_files/documents/other_file.py,sha256=NdVlCYcyzHvOInReqF-zvHJI1hwtzMWW9ekDojHIb4U,9091
-epstein_files/epstein_files.py,sha256=EEx8Auwv8z0FkRrCi7wE8iuuRQd6K1rQDMc2vdbrsh4,18298
-epstein_files/util/constant/common_words.py,sha256=aR0UjoWmxyR49XS-DtHECQ1CiA_bK8hNP6CQ1TS9yZA,3696
-epstein_files/util/constant/html.py,sha256=9U098TGzlghGg4WfxLYHyub5JGR17Dv7VP5i2MSu8Kk,1415
-epstein_files/util/constant/names.py,sha256=uYhv9xa4NO5jCk9zrGpPKFkcVVaMY2qtBC7ZaKGK3J8,10135
-epstein_files/util/constant/output_files.py,sha256=BkV4_gmdj46RfGy5SFYp6dgTty3FtlBth5YGmaGutls,1700
-epstein_files/util/constant/strings.py,sha256=FDtksfH50PSxtSBw9XhmqxtrgRgGxdIvGiAR2bbPpu4,1899
-epstein_files/util/constant/urls.py,sha256=0IdCVVvXib0i-4TZFkVHoS4zCbjOBZWcr6NkGxsmQWM,4981
-epstein_files/util/constants.py,sha256=gp5HWHt5FHd916r4UpjcJKslO5L-Wno6kjA4F3ZA4YU,110884
-epstein_files/util/data.py,sha256=xwTqrbAi7ZDJM0iyFVOevnokP_oIQ2npkRjHzF1KGGY,2908
-epstein_files/util/doc_cfg.py,sha256=5Pb__bP00mKi9ACv33omZQA-TBzumc7D2Td_Mk4M5DY,9822
-epstein_files/util/env.py,sha256=PaPBi27-npU9egt9LHxr5qR65B2DPHwt7Xc9sx5VN-M,5225
-epstein_files/util/file_helper.py,sha256=v_bE10MHEcXti9DVJo4WqyOsG83Xrv05S3Vc70cYJkk,3082
-epstein_files/util/highlighted_group.py,sha256=dajLYuSbT69zMWf6XKUOZI6ZcgFy-Beq7Nsg9qlteck,35715
-epstein_files/util/logging.py,sha256=4hVl1Qw1qRMSVEYKXZxrvdQuSIMBgTPskzvNMNu8268,2185
-epstein_files/util/output.py,sha256=wLjFBGR5ffn4cLep12G3OmUR0H3WtEMXeVMOXtd-6ig,7909
-epstein_files/util/rich.py,sha256=rdHzn4XRB2erQSf2yYyPakRmd9ixqBUdS8-BVOUAXnE,14603
-epstein_files/util/search_result.py,sha256=1fxe0KPBQXBk4dLfu6m0QXIzYfZCzvaSkWqvghJGzxY,567
-epstein_files/util/timer.py,sha256=8hxW4Y1JcTUfnBrHh7sL2pM9xu1sL4HFQM4CmmzTarU,837
-epstein_files/util/word_count.py,sha256=eGzcsoAvMcutRUFOJnVuEp9_28H74to7T9jTdGUZnuI,6757
-epstein_files-1.0.10.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-epstein_files-1.0.10.dist-info/METADATA,sha256=zi10sSw5g5BZDRovIeWlpMYEgLbqFxSl7QII9jUuKdw,5480
-epstein_files-1.0.10.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
-epstein_files-1.0.10.dist-info/entry_points.txt,sha256=5qYgwAXpxegeAicD_rzda_trDRnUC51F5UVDpcZ7j6Q,240
-epstein_files-1.0.10.dist-info/RECORD,,

{epstein_files-1.0.10.dist-info → epstein_files-1.0.11.dist-info}/LICENSE RENAMED Viewed

File without changes

{epstein_files-1.0.10.dist-info → epstein_files-1.0.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{epstein_files-1.0.10.dist-info → epstein_files-1.0.11.dist-info}/entry_points.txt RENAMED Viewed

File without changes

epstein-files 1.0.10__py3-none-any.whl → 1.0.11__py3-none-any.whl

epstein-files 1.0.10py3-none-any.whl → 1.0.11py3-none-any.whl