PyPI - epstein-files - Versions diffs - 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

epstein-files 1.4.1py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

epstein_files/__init__.py +31 -18
epstein_files/documents/communication.py +9 -5
epstein_files/documents/document.py +225 -136
epstein_files/documents/doj_file.py +242 -0
epstein_files/documents/doj_files/full_text.py +166 -0
epstein_files/documents/email.py +138 -163
epstein_files/documents/emails/email_header.py +21 -11
epstein_files/documents/emails/emailers.py +223 -0
epstein_files/documents/imessage/text_message.py +2 -3
epstein_files/documents/json_file.py +18 -14
epstein_files/documents/messenger_log.py +23 -39
epstein_files/documents/other_file.py +48 -44
epstein_files/epstein_files.py +54 -33
epstein_files/person.py +142 -110
epstein_files/util/constant/names.py +29 -6
epstein_files/util/constant/output_files.py +2 -0
epstein_files/util/constant/strings.py +12 -6
epstein_files/util/constant/urls.py +17 -0
epstein_files/util/constants.py +101 -174
epstein_files/util/data.py +2 -0
epstein_files/util/doc_cfg.py +20 -15
epstein_files/util/env.py +24 -16
epstein_files/util/file_helper.py +28 -6
epstein_files/util/helpers/debugging_helper.py +13 -0
epstein_files/util/helpers/env_helpers.py +21 -0
epstein_files/util/highlighted_group.py +57 -16
epstein_files/util/layout/left_bar_panel.py +26 -0
epstein_files/util/logging.py +28 -13
epstein_files/util/output.py +33 -10
epstein_files/util/rich.py +28 -2
epstein_files/util/word_count.py +7 -7
{epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/METADATA +14 -1
epstein_files-1.5.0.dist-info/RECORD +40 -0
epstein_files-1.4.1.dist-info/RECORD +0 -34
{epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/LICENSE +0 -0
{epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/WHEEL +0 -0
{epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/entry_points.txt +0 -0

epstein_files/util/constant/names.py CHANGED Viewed

@@ -177,6 +177,16 @@ ZUBAIR_KHAN = 'Zubair Khan'
 UNKNOWN = '(unknown)'
+# DOJ files emails
+ALISON_J_NATHAN = 'Alison J. Nathan'
+AMIR_TAAKI = 'Amir Taaki'
+BROCK_PIERCE = 'Brock Pierce'
+CHRISTIAN_EVERDELL = 'Christian Everdell'
+CHRISTOPHER_DILORIO = 'Christopher Dilorio'
+DOUGLAS_WIGDOR = 'Douglas Wigdor'
+KARYNA_SHULIAK = 'Karyna Shuliak'
+STACEY_RICHMAN = 'Stacey Richman'
 # No communications but name is in the files
 BILL_GATES = 'Bill Gates'
 DONALD_TRUMP = 'Donald Trump'
@@ -216,10 +226,10 @@ UBS = 'UBS'
 # First and last names that should be made part of a highlighting regex for emailers
 NAMES_TO_NOT_HIGHLIGHT = """
-    al alain alan alfredo allen alex alexander amanda andres andrew anthony
+    al alain alan alison alfredo allen alex alexander amanda andres andrew anthony
     bard barrett barry bennet bernard bill black bob boris brad brenner bruce
-    cameron caroline carolyn chris christina cohen
-    dan daniel danny darren dave david debbie donald
+    cameron caroline carolyn chris christian christina cohen
+    dan daniel danny darren dave david debbie donald douglas
     ed edward edwards enforcement enterprise enterprises entourage epstein eric erika etienne
     faith fisher forget fred friendly frost fuller
     gates gerald george gold gordon
@@ -229,11 +239,11 @@ NAMES_TO_NOT_HIGHLIGHT = """
     kafka kahn karl kate katherine kelly ken kevin krassner
     larry larsen laurie lawrence leon lesley linda link lisa
     mann marc marie mark martin matthew melanie michael mike miller mitchell miles morris moskowitz
-    nancy neal new nicole norman
+    nancy nathan neal new nicole norman
     owen
-    paul paula pen peter philip prince
+    paul paula pen peter philip pierce prince
     randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubenstein rubin
-    scott sean skip smith stanley stern stephen steve steven stone susan
+    scott sean skip smith stacey stanley stern stephen steve steven stone susan
     terry the thomas tim tom tony tyler
     victor
     wade waters
@@ -304,7 +314,20 @@ def extract_last_name(name: str) -> str:
         return first_last_names[-1]
+def reverse_first_and_last_names(name: str) -> str:
+    """If there's a comma in the name in the style 'Lastname, Firstname', reverse it and remove comma."""
+    if '@' in name:
+        return name.lower()
+    if ', ' in name:
+        names = name.split(', ')
+        return f"{names[1]} {names[0]}"
+    else:
+        return name
 def reversed_name(name: str) -> str:
+    """'Jeffrey Epstein' becomes 'Epstein Jeffrey'."""
     if ' ' not in name:
         return name

epstein_files/util/constant/output_files.py CHANGED Viewed

@@ -13,6 +13,7 @@ JSON_METADATA_PATH = HTML_DIR.joinpath(f'file_metadata_{EPSTEIN_FILES_NOV_2025}.
 TEXT_MSGS_HTML_PATH = HTML_DIR.joinpath('index.html')
 WORD_COUNT_HTML_PATH = HTML_DIR.joinpath(f'communication_word_count_{EPSTEIN_FILES_NOV_2025}.html')
 # EPSTEIN_WORD_COUNT_HTML_PATH = HTML_DIR.joinpath('epstein_texts_and_emails_word_count.html')
+DOJ_2026_HTML_PATH = HTML_DIR.joinpath('doj_2026-01-30_files.html')
 URLS_ENV = '.urls.env'
 EMAILERS_TABLE_PNG_PATH = HTML_DIR.joinpath('emailers_info_table.png')
@@ -26,6 +27,7 @@ CHRONOLOGICAL_EMAILS_URL = f"{TEXT_MSGS_URL}/{CHRONOLOGICAL_EMAILS_PATH.name}"
 JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
 JSON_METADATA_URL = f"{TEXT_MSGS_URL}/{JSON_METADATA_PATH.name}"
 WORD_COUNT_URL = f"{TEXT_MSGS_URL}/{WORD_COUNT_HTML_PATH.name}"
+DOJ_2026_URL = f"{TEXT_MSGS_URL}/{DOJ_2026_HTML_PATH.name}"
 SITE_URLS: dict[SiteType, str] = {
     EMAIL: ALL_EMAILS_URL,

epstein_files/util/constant/strings.py CHANGED Viewed

@@ -57,25 +57,31 @@ TIMESTAMP_DIM = f"turquoise4 dim"
 # Misc
 AUTHOR = 'author'
 DEFAULT = 'default'
+EFTA_PREFIX = 'EFTA'
 HOUSE_OVERSIGHT_PREFIX = 'HOUSE_OVERSIGHT_'
 JSON = 'json'
 NA = 'n/a'
 REDACTED = '<REDACTED>'
 QUESTION_MARKS = '(???)'
-# Regexes
-ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
-FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({ID_REGEX.pattern})")
-FILE_NAME_REGEX = re.compile(fr"{FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
-QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
 # Document subclass names (this sucks)
 DOCUMENT_CLASS = 'Document'
+DOJ_FILE_CLASS = 'DojFile'
 EMAIL_CLASS = 'Email'
 JSON_FILE_CLASS = 'JsonFile'
 MESSENGER_LOG_CLASS = 'MessengerLog'
 OTHER_FILE_CLASS = 'OtherFile'
+# Regexes
+DOJ_FILE_STEM_REGEX = re.compile(fr"{EFTA_PREFIX}\d{{8}}")
+DOJ_FILE_NAME_REGEX = re.compile(fr"{DOJ_FILE_STEM_REGEX.pattern}(\.txt)?")
+HOUSE_OVERSIGHT_NOV_2025_ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
+HOUSE_OVERSIGHT_NOV_2025_FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({HOUSE_OVERSIGHT_NOV_2025_ID_REGEX.pattern})")
+HOUSE_OVERSIGHT_NOV_2025_FILE_NAME_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_NOV_2025_FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
+QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
 remove_question_marks = lambda name: QUESTION_MARKS_REGEX.sub('', name).strip()

epstein_files/util/constant/urls.py CHANGED Viewed

@@ -12,6 +12,7 @@ from epstein_files.util.file_helper import coerce_file_stem
 # Style stuff
 ARCHIVE_LINK_COLOR = 'slate_blue3'
+ARCHIVE_ALT_LINK_STYLE = 'medium_purple4 italic'
 TEXT_LINK = 'text_link'
 # External site names
@@ -39,6 +40,9 @@ EPSTEIN_DOCS_URL = 'https://epstein-docs.github.io'
 OVERSIGHT_REPUBLICANS_PRESSER_URL = 'https://oversight.house.gov/release/oversight-committee-releases-additional-epstein-estate-documents/'
 RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL = 'https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_'
 SUBSTACK_URL = 'https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great'
+# DOJ docs
+DOJ_2026_URL = 'https://www.justice.gov/epstein/doj-disclosures'
+DOJ_SEARCH_URL = 'https://www.justice.gov/epstein/search'
 # Document source sites
 EPSTEINIFY_URL = 'https://epsteinify.com'
@@ -53,6 +57,9 @@ DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
     ROLLCALL: f'https://rollcall.com/factbase/epstein/file?id=',
 }
+# Example: https://www.justice.gov/epstein/files/DataSet%208/EFTA00009802.pdf
+DOJ_2026_FILE_BASE_URL = "https://www.justice.gov/epstein/files/DataSet%20"
 epsteinify_api_url = lambda file_stem: f"{EPSTEINIFY_URL}/api/documents/{file_stem}"
 epsteinify_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEINIFY, filename_or_id, style)
@@ -90,6 +97,16 @@ def build_doc_url(base_url: str, filename_or_id: int | str, case: Literal['lower
     return f"{base_url}{file_stem}"
+def doj_2026_file_url(dataset_id: int, file_stem: str) -> str:
+    """Link to justice.gov for a DOJ file."""
+    return f"{DOJ_2026_FILE_BASE_URL}{dataset_id}/{file_stem}.pdf"
+def jmail_doj_2026_file_url(dataset_id: int, file_stem: str) -> str:
+    """Link to Jmail backup of DOJ file."""
+    return f"{JMAIL_URL}/drive/vol{dataset_id:05}-{file_stem.lower()}-pdf"
 def external_doc_link_markup(site: ExternalSite, filename_or_id: int | str, style: str = TEXT_LINK) -> str:
     url = build_doc_url(DOC_LINK_BASE_URLS[site], filename_or_id)
     return link_markup(url, coerce_file_stem(filename_or_id), style)

epstein_files/util/constants.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import cast
 from dateutil.parser import parse
+from epstein_files.documents.doj_files.full_text import EFTA00009622_TEXT
 from epstein_files.util.constant.names import *
 from epstein_files.util.constant.strings import *
 from epstein_files.util.doc_cfg import DocCfg, EmailCfg, TextCfg
@@ -56,172 +57,6 @@ HEADER_ABBREVIATIONS = {
 # Emailers Config Stuff #
 #########################
-# Emailers
-EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
-    ALAN_DERSHOWITZ: re.compile(r'(alan.{1,7})?dershowi(lz?|t?z)|AlanDersh', re.IGNORECASE),
-    ALIREZA_ITTIHADIEH: re.compile(r'Alireza.[Il]ttihadieh', re.IGNORECASE),
-    AMANDA_ENS: re.compile(r'ens, amanda?|Amanda.Ens', re.IGNORECASE),
-    ANAS_ALRASHEED: re.compile(r'anas\s*al\s*rashee[cd]', re.IGNORECASE),
-    ANIL_AMBANI: re.compile(r'Anil.Ambani', re.IGNORECASE),
-    ANN_MARIE_VILLAFANA: re.compile(r'Villafana, Ann Marie|(A(\.|nn) Marie )?Villafa(c|n|ri)a', re.IGNORECASE),
-    ANTHONY_SCARAMUCCI: re.compile(r"mooch|(Anthony ('The Mooch' )?)?Scaramucci", re.IGNORECASE),
-    ARIANE_DE_ROTHSCHILD: re.compile(r'AdeR|((Ariane|Edmond) (de )?)?Rothsh?ch?ild|Ariane(?!\s+Dwyer)', re.IGNORECASE),
-    BARBRO_C_EHNBOM: re.compile(r'behnbom@aol.com|(Barbro\s.*)?Ehnbom', re.IGNORECASE),
-    BARRY_J_COHEN: re.compile(r'barry\s*((j.?|james)\s*)?cohen?', re.IGNORECASE),
-    BENNET_MOSKOWITZ: re.compile(r'Moskowitz.*Bennet|Bennet.*Moskowitz', re.IGNORECASE),
-    BOB_CROWE: re.compile(r"[BR]ob Crowe", re.IGNORECASE),
-    BORIS_NIKOLIC: re.compile(r'(boris )?nikolic?', re.IGNORECASE),
-    BRAD_EDWARDS:  re.compile(r'Brad(ley)?(\s*J(.?|ames))?\s*Edwards', re.IGNORECASE),
-    BRAD_KARP: re.compile(r'Brad (S.? )?Karp|Karp, Brad', re.IGNORECASE),
-    DANGENE_AND_JENNIE_ENTERPRISE: re.compile(r'Dangene and Jennie Enterprise?', re.IGNORECASE),
-    DANNY_FROST: re.compile(r'Frost, Danny|frostd@dany.nyc.gov|Danny\s*Frost', re.IGNORECASE),
-    DARREN_INDYKE: re.compile(r'darren$|Darren\s*(K\.?\s*)?[il]n[dq]_?yke?|dkiesq', re.IGNORECASE),
-    DAVID_FISZEL: re.compile(r'David\s*Fis?zel', re.IGNORECASE),
-    DAVID_HAIG: re.compile(fr'{DAVID_HAIG}|Haig, David', re.IGNORECASE),
-    DAVID_STERN: re.compile(r'David Stern?', re.IGNORECASE),
-    EDUARDO_ROBLES: re.compile(r'Ed(uardo)?\s*Robles', re.IGNORECASE),
-    EDWARD_JAY_EPSTEIN: re.compile(r'(?<!Jeffrey )Edward (Jay )?Epstein', re.IGNORECASE),
-    EHUD_BARAK: re.compile(r'(ehud|e?h)\s*barak|\behud', re.IGNORECASE),
-    FAITH_KATES: re.compile(r'faith kates?', re.IGNORECASE),
-    GERALD_BARTON: re.compile(r'Gerald.*Barton', re.IGNORECASE),
-    GERALD_LEFCOURT: re.compile(r'Gerald\s*(B\.?\s*)?Lefcourt', re.IGNORECASE),
-    GHISLAINE_MAXWELL: re.compile(r'g ?max(well)?|Ghislaine|Maxwell', re.IGNORECASE),
-    HEATHER_MANN: re.compile(r'Heather Mann?', re.IGNORECASE),
-    INTELLIGENCE_SQUARED: re.compile(r'intelligence\s*squared', re.IGNORECASE),
-    JACKIE_PERCZEK:  re.compile(r'jackie percze[kl]?', re.IGNORECASE),
-    JABOR_Y: re.compile(r'[ji]abor\s*y?', re.IGNORECASE),
-    JAMES_HILL: re.compile(r"hill, james e.|james.e.hill@abc.com", re.IGNORECASE),
-    JANUSZ_BANASIAK: re.compile(r"Janu[is]z Banasiak", re.IGNORECASE),
-    JEAN_HUGUEN: re.compile(r"Jean[\s.]Huguen", re.IGNORECASE),
-    JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?|JeanLuc', re.IGNORECASE),
-    JEFF_FULLER: re.compile(r"jeff@mc2mm.com|Jeff Fuller", re.IGNORECASE),
-    JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeff(rey)? (Edward )?E((sp|ps)tein?)?( VI Foundation)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!(Mark L.|ard Jay) )Epstein', re.IGNORECASE),
-    JESSICA_CADWELL: re.compile(r'Jessica Cadwell?', re.IGNORECASE),
-    JOHNNY_EL_HACHEM: re.compile(r'el hachem johnny|johnny el hachem', re.IGNORECASE),
-    JOI_ITO: re.compile(r'ji@media.mit.?edu|(joichi|joi)( Ito)?', re.IGNORECASE),
-    JONATHAN_FARKAS: re.compile(r'Jonathan Fark(a|u)(s|il)', re.IGNORECASE),
-    KATHRYN_RUEMMLER: re.compile(r'Kathr?yn? Ruemmler?', re.IGNORECASE),
-    KEN_STARR: re.compile(r'starr, ken|Ken(neth\s*(W.\s*)?)?\s+starr?|starr', re.IGNORECASE),
-    LANDON_THOMAS: re.compile(r'lando[nr] thomas( jr)?|thomas jr.?, lando[nr]', re.IGNORECASE),
-    LARRY_SUMMERS: re.compile(r'(La(wrence|rry).{1,5})?Summers?|^LH$|LHS|[Il]hsofficel?', re.IGNORECASE),
-    LAWRANCE_VISOSKI: re.compile(r'La(rry|wrance) Visoski?|Lvjet', re.IGNORECASE),
-    LAWRENCE_KRAUSS: re.compile(r'Lawrence Kraus[es]?|[jl]awkrauss|kruase', re.IGNORECASE),
-    LEON_BLACK: re.compile(r'Leon\s*Black?|(?<!Marc )Leon(?! (Botstein|Jaworski|Wieseltier))', re.IGNORECASE),
-    LILLY_SANCHEZ: re.compile(r'Lilly.*Sanchez', re.IGNORECASE),
-    LISA_NEW: re.compile(r'E?Lisa New?\b', re.IGNORECASE),
-    MANUELA_MARTINEZ: re.compile(fr'Manuela (- Mega Partners|Martinez)', re.IGNORECASE),
-    MARIANA_IDZKOWSKA: re.compile(r'Mariana [Il]d[źi]kowska?', re.IGNORECASE),
-    MARK_EPSTEIN: re.compile(r'Mark (L\. )?(Epstein|Lloyd)', re.IGNORECASE),
-    MARC_LEON: re.compile(r'Marc[.\s]+(Kensington|Leon)|Kensington2', re.IGNORECASE),
-    MARTIN_NOWAK: re.compile(r'(Martin.*?)?No[vw]ak|Nowak, Martin', re.IGNORECASE),
-    MARTIN_WEINBERG: re.compile(r'martin.*?weinberg', re.IGNORECASE),
-    "Matthew Schafer": re.compile(r"matthew\.?schafer?", re.IGNORECASE),
-    MELANIE_SPINELLA: re.compile(r'M?elanie Spine[Il]{2}a', re.IGNORECASE),
-    MICHAEL_BUCHHOLTZ: re.compile(r'Michael.*Buchholtz', re.IGNORECASE),
-    MICHAEL_MILLER: re.compile(r'Micha(el)? Miller|Miller, Micha(el)?', re.IGNORECASE),
-    MICHAEL_SITRICK: re.compile(r'(Mi(chael|ke).{0,5})?[CS]itrick', re.IGNORECASE),
-    MICHAEL_WOLFF: re.compile(r'Michael\s*Wol(f[ef]e?|i)|Wolff', re.IGNORECASE),
-    MIROSLAV_LAJCAK: re.compile(r"Miro(slav)?(\s+Laj[cč][aá]k)?"),
-    MOHAMED_WAHEED_HASSAN: re.compile(r'Mohamed Waheed(\s+Hassan)?', re.IGNORECASE),
-    NADIA_MARCINKO: re.compile(r"Na[dď]i?a\s+Marcinko(v[aá])?", re.IGNORECASE),
-    NEAL_KASSELL: re.compile(r'Neal\s*Kassell?', re.IGNORECASE),
-    NICHOLAS_RIBIS: re.compile(r'Nic(holas|k)[\s._]Ribi?s?|Ribbis', re.IGNORECASE),
-    OLIVIER_COLOM: re.compile(fr'Colom, Olivier|{OLIVIER_COLOM}', re.IGNORECASE),
-    PAUL_BARRETT: re.compile(r'Paul Barre(d|tt)', re.IGNORECASE),
-    PAUL_KRASSNER: re.compile(r'Pa\s?ul Krassner', re.IGNORECASE),
-    PAUL_MORRIS: re.compile(r'morris, paul|Paul Morris', re.IGNORECASE),
-    PAULA: re.compile(r'^Paula( Heil Fisher)?$', re.IGNORECASE),
-    PEGGY_SIEGAL:  re.compile(r'Peggy Siegal?', re.IGNORECASE),
-    PETER_ATTIA: re.compile(r'Peter Attia?', re.IGNORECASE),
-    PETER_MANDELSON: re.compile(r"((Lord|Peter) )?Mandelson", re.IGNORECASE),
-    'pink@mc2mm.com': re.compile(r"^Pink$|pink@mc2mm\.com", re.IGNORECASE),
-    PRINCE_ANDREW: re.compile(r'Prince Andrew|The Duke', re.IGNORECASE),
-    REID_WEINGARTEN: re.compile(r'Weingarten, Rei[cdi]|Rei[cdi] Weingarten', re.IGNORECASE),
-    RICHARD_KAHN: re.compile(r'rich(ard)? kahn?', re.IGNORECASE),
-    ROBERT_D_CRITTON_JR: re.compile(r'Robert D.? Critton,? Jr.?', re.IGNORECASE),
-    ROBERT_LAWRENCE_KUHN: re.compile(r'Robert\s*(Lawrence)?\s*Kuhn', re.IGNORECASE),
-    ROBERT_TRIVERS: re.compile(r'tri[vy]ersr@gmail|Robert\s*Trivers?', re.IGNORECASE),
-    ROSS_GOW: re.compile(fr"Ross(acuity)? Gow|(ross@)?acuity\s*reputation(\.com)?", re.IGNORECASE),
-    SAMUEL_LEFF: re.compile(r"Sam(uel)?(/Walli)? Leff", re.IGNORECASE),
-    SCOTT_J_LINK: re.compile(r'scott j. link?', re.IGNORECASE),
-    SEAN_BANNON: re.compile(r'sean bannon?', re.IGNORECASE),
-    SHAHER_ABDULHAK_BESHER: re.compile(r'\bShaher( Abdulhak Besher)?\b', re.IGNORECASE),
-    SOON_YI_PREVIN: re.compile(r'Soon[- ]Yi Previn?', re.IGNORECASE),
-    STEPHEN_HANSON: re.compile(r'ste(phen|ve) hanson?|Shanson900', re.IGNORECASE),
-    STEVE_BANNON: re.compile(r'steve banno[nr]?', re.IGNORECASE),
-    STEVEN_SINOFSKY: re.compile(r'Steven Sinofsky?', re.IGNORECASE),
-    SULTAN_BIN_SULAYEM: re.compile(r'Sultan (Ahmed )?bin Sulaye?m?', re.IGNORECASE),
-    TERJE_ROD_LARSEN: re.compile(r"Terje(( (R[øo]e?d[- ])?)?Lars[eo]n)?", re.IGNORECASE),
-    TERRY_KAFKA: re.compile(r'Terry Kafka?', re.IGNORECASE),
-    THANU_BOONYAWATANA: re.compile(r"Thanu (BOONYAWATANA|Cnx)", re.IGNORECASE),
-    THORBJORN_JAGLAND: re.compile(r'(Thor.{3,8})?Jag[il]and?', re.IGNORECASE),
-    TONJA_HADDAD_COLEMAN: re.compile(r"To(nj|rl)a Haddad Coleman|haddadfm@aol.com", re.IGNORECASE),
-    VINCENZO_IOZZO: re.compile(r"Vincenzo [IL]ozzo", re.IGNORECASE),
-}
-# If found as substring consider them the author
-EMAILERS = [
-    'Anne Boyles',
-    AL_SECKEL,
-    'Ariane Dwyer',
-    AZIZA_ALAHMADI,
-    BILL_GATES,
-    BILL_SIEGEL,
-    BRAD_WECHSLER,
-    CHRISTINA_GALBRAITH,
-    DANIEL_SABBA,
-    'Danny Goldberg',
-    DAVID_SCHOEN,
-    DEBBIE_FEIN,
-    DEEPAK_CHOPRA,
-    GLENN_DUBIN,
-    GORDON_GETTY,
-    'Kevin Bright',
-    'Jack Lang',
-    JACK_SCAROLA,
-    JAY_LEFKOWITZ,
-    JES_STALEY,
-    JOHN_PAGE,
-    'Jokeland',
-    JOSCHA_BACH,
-    'Kathleen Ruderman',
-    KENNETH_E_MAPP,
-    'Larry Cohen',
-    LESLEY_GROFF,
-    'lorraine@mc2mm.com',
-    LINDA_STONE,
-    'Lyn Fontanilla',
-    MARK_TRAMO,
-    MELANIE_WALKER,
-    MERWIN_DELA_CRUZ,
-    'Michael Simmons',   # Not the only "To:"
-    'middle.east.update@hotmail.com',
-    'Nancy Cain',
-    'Nancy Dahl',
-    'Nancy Portland',
-    'Oliver Goodenough',
-    'Peter Aldhous',
-    'Peter Green',
-    ROGER_SCHANK,
-    'Roy Black',
-    STEVEN_PFEIFFER,
-    'Steven Victor MD',
-    'Susan Edelman',
-    TOM_BARRACK,
-    'Vahe Stepanian',
-    'Vladimir Yudashkin',
-]
-EMAILER_REGEXES = deepcopy(EMAILER_ID_REGEXES)  # Keep a copy without the simple EMAILERS regexes
-# Add simple matching regexes for EMAILERS entries to EMAILER_REGEXES
-for emailer in EMAILERS:
-    if emailer in EMAILER_REGEXES:
-        raise RuntimeError(f"Can't overwrite emailer regex for '{emailer}'")
-    EMAILER_REGEXES[emailer] = re.compile(emailer, re.IGNORECASE)
 # Atribution reasons
 BOLOTOVA_REASON = 'Same signature style as 029020 ("--" followed by "Sincerely Renata Bolotova")'
 KATHY_REASON = 'from "Kathy" about dems, sent from iPad'
@@ -459,7 +294,6 @@ EMAILS_CONFIG = [
     EmailCfg(id='026287', author=DAVID_SCHOEN, attribution_reason='Signature'),
     EmailCfg(id='033419', author=DAVID_SCHOEN, attribution_reason='Signature'),
     EmailCfg(id='031460', author=EDWARD_JAY_EPSTEIN, attribution_reason='quoted reply has edwardjayepstein.com', is_fwded_article=True),
-    EmailCfg(id='031607', is_fwded_article=True, comment='Epstein reply to Edward Jay Epstein'),
     EmailCfg(
         id='030475',
         author=FAITH_KATES,
@@ -553,7 +387,12 @@ EMAILS_CONFIG = [
     EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd, Mark Green is in signature'),
     EmailCfg(id='030472', author=MARTIN_WEINBERG, attribution_reason='Maybe. in reply', is_attribution_uncertain=True),
     EmailCfg(id='032563', author=MASHA_DROKOVA, attribution_reason='replied to in 033014'),
-    EmailCfg(id='032564', author=MASHA_DROKOVA, attribution_reason='follow up to 032563 about huffpo article with link'),
+    EmailCfg(
+        id='032564',
+        attribution_reason='follow up to 032563 about huffpo article with link',
+        author=MASHA_DROKOVA,
+        description='an archived version of the HuffPost link is here: https://archive.is/hJxT3 '
+    ),
     EmailCfg(id='031544', author=MASHA_DROKOVA, attribution_reason='follow up to 032563 about huffpo article with link'),
     EmailCfg(id='032605', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
     EmailCfg(id='032606', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
@@ -657,7 +496,7 @@ EMAILS_CONFIG = [
     EmailCfg(id='026431', recipients=[ARIANE_DE_ROTHSCHILD], attribution_reason='Reply'),
     EmailCfg(id='032876', recipients=[CECILIA_STEEN], attribution_reason='unredacted in 032267'),
     EmailCfg(id='026466', recipients=[DIANE_ZIMAN], attribution_reason='Quoted reply'),
-    EmailCfg(id='031607', recipients=[EDWARD_JAY_EPSTEIN], attribution_reason='quoted reply has edwardjayepstein.com'),
+    EmailCfg(id='031607', recipients=[EDWARD_JAY_EPSTEIN], is_fwded_article=True, attribution_reason='quoted reply has edwardjayepstein.com'),
     EmailCfg(
         id='030525',
         recipients=[FAITH_KATES],
@@ -1091,6 +930,40 @@ EMAILS_CONFIG = [
     EmailCfg(id='027028', truncate_to=1000, comment='Tom Pritzer penny pritzker'),
     EmailCfg(id='029910', truncate_to=NO_TRUNCATE, comment='Tom Pritzer Aspen'),
     EmailCfg(id='025163', truncate_to=NO_TRUNCATE, comment='Tom Pritzer'),
+    # DOJ files
+    EmailCfg(id='EFTA00935996', recipients=[RENATA_BOLOTOVA], attribution_reason='"sneaky dog"'),
+    EmailCfg(id='EFTA02731737', date='2023-06-30T16:05:00'),
+    EmailCfg(id='EFTA02731689', author=UNKNOWN, recipients=[None], date='2023-06-09 20:14:00'),
+    EmailCfg(id='EFTA02731475', date='2023-05-31T20:53:00'),
+    EmailCfg(id='EFTA02731732', date='2024-03-06T12:21:00'),
+    EmailCfg(id='EFTA02731485', date='2023-06-12T13:53:00'),
+    EmailCfg(id='EFTA02731617', date='2021-04-28T15:05:41'),
+    EmailCfg(id='EFTA02730483', date='2023-07-11T08:25:00'), # TODO: actually reply timewtamp
+    EmailCfg(id='EFTA02730481', date='2023-07-07T11:01:00'), # TODO: actually reply timewtamp
+    EmailCfg(id='EFTA02731754', date='2024-03-06T23:24:00'), # TODO: actually reply timewtamp
+    EmailCfg(id='EFTA02731735', date='2024-03-04T05:04:00'), # TODO: actually reply timewtamp
+    EmailCfg(id='EFTA02731577', date='2024-10-16T00:00:00'), # TODO: actually reply timewtamp
+    EmailCfg(id='EFTA02730468', date='2019-07-11T08:25:00'), # TODO: This is just wrong
+    # Generated basd on OtheFile extract_timestamp()
+    EmailCfg(id='EFTA02731783', date='2022-01-21 17:28:00'),
+    EmailCfg(id='EFTA02731587', date='2022-01-21 17:28:00'),
+    EmailCfg(id='EFTA02731729', date='2021-08-17 00:00:00'),
+    EmailCfg(id='EFTA02731578', date='2021-05-28 10:00:00'),
+    EmailCfg(id='EFTA02730473', date='2013-04-24 16:32:00'),
+    EmailCfg(id='EFTA02731699', date='2021-05-27 10:19:00'),
+    EmailCfg(id='EFTA02731583', date='2022-01-21 17:28:00'),
+    EmailCfg(id='EFTA02731552', date='2021-05-26 16:12:00'),
+    EmailCfg(id='EFTA00039888', date='2019-05-14 16:49:00'),
+    EmailCfg(id='EFTA02731684', date='2021-05-11 15:27:00'),
+    EmailCfg(id='EFTA02731697', date='2021-06-07 17:33:00'),
+    EmailCfg(id='EFTA02731733', date='2021-05-17 17:29:00'),
+    EmailCfg(id='EFTA00040145', date='2021-11-09 17:24:30'),
+    # EmailCfg(id='EFTA02730468', date='2004-02-03 00:00:00'),  # TODO: ???
+    EmailCfg(id='EFTA02731528', date='2021-05-06 09:39:15'),
+    EmailCfg(id='EFTA02730485', date='2021-12-03 00:00:00'),
+    EmailCfg(id='EFTA00039689', truncate_to=NO_TRUNCATE),
+    EmailCfg(id='EFTA00995559', author=RENATA_BOLOTOVA, attribution_reason='poorly redacted signature'),
 ]
 if args.constantize:
@@ -1163,7 +1036,7 @@ ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
 OTHER_FILES_BOOKS = [
     DocCfg(id='017088', author=ALAN_DERSHOWITZ, description=f'"Taking the Stand: My Life in the Law" (draft)'),
     DocCfg(id='013501', author='Arnold J. Mandell', description=f'The Nearness Of Grace: A Personal Science Of Spiritual Transformation', date='2005-01-01'),
-    DocCfg(id='012899', author='Ben Goertzel', description=f'Engineering General Intelligence: A Path to Advanced AGI Via Embodied Learning and Cognitive Synergy'),
+    DocCfg(id='012899', author='Ben Goertzel', description=f'Engineering General Intelligence: A Path to Advanced AGI Via Embodied Learning and Cognitive Synergy', date='2013-09-19'),
     DocCfg(id='018438', author='Clarisse Thorn', description=f'The S&M Feminist'),
     DocCfg(id='019477', author=EDWARD_JAY_EPSTEIN, description=f'How America Lost Its Secrets: Edward Snowden, the Man, and the Theft'),
     DocCfg(id='020153', author=EDWARD_JAY_EPSTEIN, description=f'The Snowden Affair: A Spy Story In Six Parts'),
@@ -1475,6 +1348,10 @@ OTHER_FILES_LEGAL = [
     DocCfg(id='028540', author='SCOTUS', description=f"decision in Budha Ismail Jam et al. v. INTERNATIONAL FINANCE CORP"),
     DocCfg(id='012197', author='SDFL', description=f"response to {JAY_LEFKOWITZ} on Epstein Plea Agreement Compliance"),
     DocCfg(id='022277', description=f"text of National Labour Relations Board (NLRB) law", is_interesting=False),
+    # DOJ files
+    DocCfg(id='EFTA00007157', description='victim list and police log'),
+    DocCfg(id='EFTA02730274', description='evidence inventory that appears to have since been deleted from the DOJ website'),
 ]
 OTHER_FILES_CONFERENCES = [
@@ -1585,7 +1462,12 @@ OTHER_FILES_FINANCE = [
     DocCfg(id='024132', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2012-03-15'),
     DocCfg(id='024194', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2012-10-22'),
     DocCfg(id='025296', author='Laffer Associates', description=f'report predicting Trump win', date='2016-07-06'),
-    DocCfg(id='020824', author='Mary Meeker', description=f"USA Inc: A Basic Summary of America's Financial Statements compiled", date='2011-02-01'),
+    DocCfg(
+        id='020824',
+        author='Mary Meeker',
+        date='2011-02-01',
+        description=f"USA Inc: A Basic Summary of America's Financial Statements compiled",
+    ),
     DocCfg(id='025551', author='Morgan Stanley', description=f'report about alternative asset managers', date='2018-01-30'),
     DocCfg(id='019856', author='Sadis Goldberg LLP', description=f"report on SCOTUS ruling about insider trading", is_interesting=True),
     DocCfg(id='025763', author='S&P', description=f"Economic Research: How Increasing Income Inequality Is Dampening U.S. Growth", date='2014-08-05'),
@@ -1594,12 +1476,20 @@ OTHER_FILES_FINANCE = [
     DocCfg(id='026584', description=f"article about tax implications of disregarded entities", date='2009-07-01', is_interesting=True),
     DocCfg(
         id='024271',
-        description=f"Blockchain Capital and Brock Pierce pitch deck",
         date='2015-10-01',
+        description=f"Blockchain Capital and Brock Pierce pitch deck",
         is_interesting=True,
     ),
-    DocCfg(id='024817', description=f"Cowen's Collective View of CBD / Cannabis report"),
-    DocCfg(id='012048', description=f"{PRESS_RELEASE} 'Rockefeller Partners with Gregory J. Fleming to Create Independent Financial Services Firm' and other articles"),
+    DocCfg(
+        id='024817',
+        date='2019-02-25',
+        description=f"Cowen's Collective View of CBD / Cannabis report",
+        is_interesting=True
+    ),
+    DocCfg(
+        id='012048',
+        description=f"{PRESS_RELEASE} 'Rockefeller Partners with Gregory J. Fleming to Create Independent Financial Services Firm' and other articles"
+    ),
     # private placement memoranda
     DocCfg(
@@ -1668,6 +1558,11 @@ OTHER_FILES_PROPERTY = [
         description=f"{VIRGIN_ISLANDS} property deal pitch deck, building will be leased to the U.S. govt GSA",
         date='2014-06-01',
     ),
+    # DOJ files
+    DocCfg(id='EFTA00001884', date='2019-03-14', description='photo of letter from Virgin Islands DOJ to Epstein'),
+    DocCfg(id='EFTA00005783', date='2019-08-29', description='heavily redacted handwritten note and 30+ completely blacked out redacted pages'),
 ]
 OTHER_FILES_REPUTATION = [
@@ -1881,6 +1776,32 @@ OTHER_FILES_MISC = [
     DocCfg(id='033434', description=f"{SCREENSHOT} iPhone chat labeled 'Edwards' at the top"),
     DocCfg(id='029475', description=f'{VIRGIN_ISLANDS} Twin City Mobile Integrated Health Services (TCMIH) proposal/request for donation'),
     DocCfg(id='029448', description=f"weird short essay titled 'President Obama and Self-Deception'"),
+    # DOJ files
+    DocCfg(id='EFTA00007781', description='paychecks signed by Epstein deposited at Colonial Bank'),
+    DocCfg(id='EFTA00009622', description='handwritten note transcribed Claude AI', date='2006-07-19', replace_text_with=EFTA00009622_TEXT),
+    DocCfg(id='EFTA00039295', replace_text_with='Bureau of Prisons inmate telephone privileges Program Statement'),
+    DocCfg(
+        id='EFTA00004477',
+        replace_text_with='Epstein 50th birthday photo book 12 "THAIS, MOSCOW GIRLS, AFRICA, HAWAII, [REDACTED] [REDACTED], Zorro, [REDACTED] [REDACTED] [REDACTED], CRACK WHOLE PROPOSAL, BALI/THAILAND/ASIA, RUSSIA, [REDACTED], [REDACTED], NUDES, YOGAL GIRLS',
+    ),
+    DocCfg(id='EFTA00008120', replace_text_with='"Part II: The Art of Receiving a Massage"'),
+    DocCfg(id='EFTA00008020', replace_text_with='"Massage for Dummies"'),
+    DocCfg(id='EFTA00008220', replace_text_with='"Massage book: Chapter 11: Putting the Moves Together"'),
+    DocCfg(id='EFTA00008320', replace_text_with='"Massage for Dummies (???)"'),
+    DocCfg(id='EFTA00000476', replace_text_with='photo of JEFFREY EPSTEIN CASH DISBURSEMENTS for the month 2006-09'),
+    DocCfg(id='EFTA00039312', replace_text_with='Bureau of Prisons Program Statement / Memo about BOP Pharmacy Program'),
+    # Phone bills TODO: Some kind of special handling?
+    DocCfg(id='EFTA00006387', replace_text_with='T-Mobile phone bill covering 2006-06-15 to 2006-07-23'),
+    DocCfg(id='EFTA00007501', replace_text_with='T-Mobile phone bill from 2005'),
+    DocCfg(id='EFTA00006587', replace_text_with='T-Mobile phone bill from 2006-09-04 to 2016-10-15'),
+    DocCfg(id='EFTA00006687', replace_text_with='T-Mobile phone bill from 2006-10-31 to 2006-12-25'),
+    DocCfg(id='EFTA00007401', replace_text_with='T-Mobile phone bill from 2004-08-25 to 2005-07-13'),
+    DocCfg(id='EFTA00007301', replace_text_with='T-Mobile response to subpoena March 23, 2007 - Blackberry phone logs for 2005'),
+    DocCfg(id='EFTA00006487', replace_text_with='T-Mobile phone bill 2006-08-26'),
+    DocCfg(id='EFTA00006100', replace_text_with='Palm Beach Police fax machine activity log 2005-12-28 to 2006-01-04'),
+    DocCfg(id='EFTA00007253', replace_text_with='T-Mobile response to subpoena March 23, 2007 - phone bill '),
 ]
 OTHER_FILES_JUNK = [
@@ -1894,6 +1815,12 @@ OTHER_FILES_JUNK = [
     DocCfg(id='029351', description=OBAMA_JOKE, date='2013-07-26'),
     DocCfg(id='029354', description=OBAMA_JOKE, date='2013-07-26'),
     DocCfg(id='031293'),
+    # Completely redacted DOJ emails, no timestamp at all
+    DocCfg(id='EFTA02731726'),
+    DocCfg(id='EFTA02731728'),
+    # Almost no timestamp
+    DocCfg(id='EFTA00003154'),
 ]
 OTHER_FILES_CATEGORIES = [

epstein_files/util/data.py CHANGED Viewed

@@ -19,6 +19,8 @@ MULTINEWLINE_REGEX = re.compile(r"\n{2,}")
 CONSTANT_VAR_REGEX = re.compile(r"^[A-Z_]+$")
 ALL_NAMES = [v for k, v in vars(names).items() if isinstance(v, str) and CONSTANT_VAR_REGEX.match(k)]
+AMERICAN_DATE_FORMAT = r"%m/%d/%y %I:%M:%S %p"
+AMERICAN_TIME_REGEX = re.compile(r"(\d{1,2}/\d{1,2}/\d{2,4}\s+\d{1,2}:\d{2}(?::\d{2})?\s*(?:AM|PM)?)")
 PACIFIC_TZ = tz.gettz("America/Los_Angeles")
 TIMEZONE_INFO = {"PDT": PACIFIC_TZ, "PST": PACIFIC_TZ}  # Suppresses annoying warnings from parse() calls

epstein_files/util/doc_cfg.py CHANGED Viewed

@@ -74,6 +74,7 @@ class DocCfg:
         duplicate_of_id (str | None): If this is a dupe the ID of the duplicated file. This file will be suppressed
         is_interesting (bool | None): Override other considerations and always consider this file interesting (or not)
         is_synthetic (bool): True if this config was generated by the duplicate_cfgs() method
+        replace_text_with (bool): True if `description` should replace body of the document when printing.
     """
     id: str
     attached_to_email_id: str | None = None
@@ -88,11 +89,9 @@ class DocCfg:
     is_attribution_uncertain: bool = False
     is_interesting: bool | None = None
     is_synthetic: bool = False
+    replace_text_with: str = ''
-    def __post_init__(self):
-        if self.duplicate_of_id or self.duplicate_ids:
-            self.dupe_type = self.dupe_type or SAME
+    @property
     def complete_description(self) -> str | None:
         """String that summarizes what is known about this document."""
         description = ''
@@ -130,17 +129,7 @@ class DocCfg:
         return description
-    def duplicate_cfgs(self) -> Generator['DocCfg', None, None]:
-        """Create synthetic DocCfg objects that set the 'duplicate_of_id' field to point back to this object."""
-        for id in self.duplicate_ids:
-            dupe_cfg = deepcopy(self)
-            dupe_cfg.id = id
-            dupe_cfg.duplicate_of_id = self.id
-            dupe_cfg.duplicate_ids = []
-            dupe_cfg.dupe_type = self.dupe_type
-            dupe_cfg.is_synthetic = True
-            yield dupe_cfg
+    @property
     def metadata(self) -> Metadata:
         metadata = {k: v for k, v in asdict(self).items() if k not in NON_METADATA_FIELDS and v}
@@ -149,10 +138,26 @@ class DocCfg:
         return metadata
+    @property
     def timestamp(self) -> datetime | None:
         if self.date:
             return parse(self.date)
+    def __post_init__(self):
+        if self.duplicate_of_id or self.duplicate_ids:
+            self.dupe_type = self.dupe_type or SAME
+    def duplicate_cfgs(self) -> Generator['DocCfg', None, None]:
+        """Create synthetic DocCfg objects that set the 'duplicate_of_id' field to point back to this object."""
+        for id in self.duplicate_ids:
+            dupe_cfg = deepcopy(self)
+            dupe_cfg.id = id
+            dupe_cfg.duplicate_of_id = self.id
+            dupe_cfg.duplicate_ids = []
+            dupe_cfg.dupe_type = self.dupe_type
+            dupe_cfg.is_synthetic = True
+            yield dupe_cfg
     def _props_strs(self) -> list[str]:
         props = []
         add_prop = lambda f, value: props.append(f"{f.name}={value}")

epstein-files 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl

epstein-files 1.4.1py3-none-any.whl → 1.5.0py3-none-any.whl