epstein-files 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +194 -0
- epstein_files/documents/communication.py +53 -0
- epstein_files/documents/document.py +357 -0
- epstein_files/documents/email.py +655 -0
- epstein_files/documents/emails/email_header.py +167 -0
- epstein_files/documents/imessage/text_message.py +93 -0
- epstein_files/documents/json_file.py +23 -0
- epstein_files/documents/messenger_log.py +73 -0
- epstein_files/documents/other_file.py +117 -0
- epstein_files/epstein_files.py +437 -0
- epstein_files/util/constant/common_words.py +94 -0
- epstein_files/util/constant/html.py +57 -0
- epstein_files/util/constant/names.py +261 -0
- epstein_files/util/constant/strings.py +47 -0
- epstein_files/util/constant/urls.py +103 -0
- epstein_files/util/constants.py +1552 -0
- epstein_files/util/data.py +131 -0
- epstein_files/util/env.py +80 -0
- epstein_files/util/file_cfg.py +172 -0
- epstein_files/util/file_helper.py +81 -0
- epstein_files/util/highlighted_group.py +620 -0
- epstein_files/util/rich.py +324 -0
- epstein_files/util/search_result.py +15 -0
- epstein_files/util/word_count.py +191 -0
- epstein_files-1.0.0.dist-info/LICENSE +674 -0
- epstein_files-1.0.0.dist-info/METADATA +60 -0
- epstein_files-1.0.0.dist-info/RECORD +28 -0
- epstein_files-1.0.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
from epstein_files.util.constant.strings import QUESTION_MARKS, remove_question_marks
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
UNKNOWN = '(unknown)'
|
|
5
|
+
|
|
6
|
+
# Texting Names
|
|
7
|
+
ANDRZEJ_DUDA = 'Andrzej Duda or entourage'
|
|
8
|
+
ANIL_AMBANI = "Anil Ambani"
|
|
9
|
+
ANTHONY_SCARAMUCCI = "Anthony Scaramucci"
|
|
10
|
+
ARDA_BESKARDES = 'Arda Beskardes'
|
|
11
|
+
EVA = 'Eva (Dubin?)'
|
|
12
|
+
JEFFREY_EPSTEIN = 'Jeffrey Epstein'
|
|
13
|
+
JOI_ITO = 'Joi Ito'
|
|
14
|
+
LARRY_SUMMERS = 'Larry Summers'
|
|
15
|
+
MELANIE_WALKER = 'Melanie Walker'
|
|
16
|
+
MIROSLAV_LAJCAK = 'Miroslav Lajčák'
|
|
17
|
+
STACEY_PLASKETT = 'Stacey Plaskett'
|
|
18
|
+
SOON_YI_PREVIN = 'Soon-Yi Previn'
|
|
19
|
+
STEVE_BANNON = 'Steve Bannon'
|
|
20
|
+
STEVEN_SINOFSKY = 'Steven Sinofsky'
|
|
21
|
+
TERJE_ROD_LARSEN = 'Terje Rød-Larsen'
|
|
22
|
+
|
|
23
|
+
# Email Names - no trailing periods! (messes up regexes)
|
|
24
|
+
AL_SECKEL = 'Al Seckel'
|
|
25
|
+
ALAN_DERSHOWITZ = 'Alan Dershowitz'
|
|
26
|
+
ALIREZA_ITTIHADIEH = 'Alireza Ittihadieh'
|
|
27
|
+
AMANDA_ENS = 'Amanda Ens'
|
|
28
|
+
ANDRES_SERRANO = 'Andres Serrano'
|
|
29
|
+
ANN_MARIE_VILLAFANA = 'Ann Marie Villafana'
|
|
30
|
+
ANAS_ALRASHEED = 'Anas Alrasheed'
|
|
31
|
+
ANTHONY_BARRETT = 'Anthony Barrett'
|
|
32
|
+
ARIANE_DE_ROTHSCHILD = 'Ariane de Rothschild'
|
|
33
|
+
AZIZA_ALAHMADI = 'Aziza Alahmadi'
|
|
34
|
+
BARBRO_C_EHNBOM = 'Barbro C. Ehnbom'
|
|
35
|
+
BARRY_J_COHEN = 'Barry J. Cohen'
|
|
36
|
+
BENNET_MOSKOWITZ = 'Bennet Moskowitz'
|
|
37
|
+
BILL_SIEGEL = 'Bill Siegel'
|
|
38
|
+
BRAD_EDWARDS = 'Brad Edwards'
|
|
39
|
+
BRAD_KARP = 'Brad Karp'
|
|
40
|
+
BRAD_WECHSLER = 'Brad Wechsler'
|
|
41
|
+
BORIS_NIKOLIC = 'Boris Nikolic'
|
|
42
|
+
CECILE_DE_JONGH = 'Cecile de Jongh'
|
|
43
|
+
CECILIA_STEEN = 'Cecilia Steen'
|
|
44
|
+
CELINA_DUBIN = 'Celina Dubin'
|
|
45
|
+
CHRISTINA_GALBRAITH = 'Christina Galbraith' # Works with Tyler Shears on reputation stuff
|
|
46
|
+
DANIEL_SABBA = 'Daniel Sabba'
|
|
47
|
+
DANIEL_SIAD = 'Daniel Siad'
|
|
48
|
+
DANNY_FROST = 'Danny Frost'
|
|
49
|
+
DARREN_INDYKE = 'Darren Indyke'
|
|
50
|
+
DAVID_BLAINE = 'David Blaine'
|
|
51
|
+
DAVID_FISZEL = 'David Fiszel'
|
|
52
|
+
DAVID_HAIG = 'David Haig'
|
|
53
|
+
DAVID_INGRAM = 'David Ingram'
|
|
54
|
+
DAVID_SCHOEN = 'David Schoen'
|
|
55
|
+
DAVID_STERN = 'David Stern'
|
|
56
|
+
DEBBIE_FEIN = 'Debbie Fein'
|
|
57
|
+
DEEPAK_CHOPRA = 'Deepak Chopra'
|
|
58
|
+
DIANE_ZIMAN = 'Diane Ziman'
|
|
59
|
+
DONALD_TRUMP = 'Donald Trump'
|
|
60
|
+
EDUARDO_ROBLES = 'Eduardo Robles'
|
|
61
|
+
EDWARD_JAY_EPSTEIN = 'Edward Jay Epstein'
|
|
62
|
+
EHUD_BARAK = 'Ehud Barak'
|
|
63
|
+
ERIC_ROTH = 'Eric Roth'
|
|
64
|
+
FAITH_KATES = 'Faith Kates'
|
|
65
|
+
FRED_HADDAD = 'Fred Haddad'
|
|
66
|
+
GERALD_BARTON = 'Gerald Barton'
|
|
67
|
+
GERALD_LEFCOURT = 'Gerald Lefcourt'
|
|
68
|
+
GHISLAINE_MAXWELL = 'Ghislaine Maxwell'
|
|
69
|
+
GLENN_DUBIN = 'Glenn Dubin'
|
|
70
|
+
GORDON_GETTY = 'Gordon Getty'
|
|
71
|
+
GWENDOLYN_BECK = 'Gwendolyn Beck' # https://www.lbc.co.uk/article/who-gwendolyn-beck-epstein-andrew-5HjdN66_2/
|
|
72
|
+
HEATHER_MANN = 'Heather Mann'
|
|
73
|
+
IAN_OSBORNE = 'Ian Osborne'
|
|
74
|
+
INTELLIGENCE_SQUARED = 'Intelligence Squared'
|
|
75
|
+
JABOR_Y = 'Jabor Y' # mysterious 'influential man in Qatar"
|
|
76
|
+
JACK_GOLDBERGER = 'Jack Goldberger'
|
|
77
|
+
JACK_SCAROLA = 'Jack Scarola'
|
|
78
|
+
JACKIE_PERCZEK = 'Jackie Perczek'
|
|
79
|
+
JAMES_HILL = 'James Hill'
|
|
80
|
+
JAY_LEFKOWITZ = 'Jay Lefkowitz'
|
|
81
|
+
JEAN_HUGUEN = 'Jean Huguen'
|
|
82
|
+
JEAN_LUC_BRUNEL = 'Jean Luc Brunel'
|
|
83
|
+
JEFF_FULLER = 'Jeff Fuller'
|
|
84
|
+
JENNIFER_JACQUET = 'Jennifer Jacquet'
|
|
85
|
+
JEREMY_RUBIN = 'Jeremy Rubin' # Bitcoin dev
|
|
86
|
+
JES_STALEY = 'Jes Staley'
|
|
87
|
+
JESSICA_CADWELL = 'Jessica Cadwell' # Paralegal?
|
|
88
|
+
JIDE_ZEITLIN = 'Jide Zeitlin'
|
|
89
|
+
JOHN_BROCKMAN = "John Brockman"
|
|
90
|
+
JOHN_PAGE = 'John Page'
|
|
91
|
+
JOHNNY_EL_HACHEM = 'Johnny el Hachem'
|
|
92
|
+
JONATHAN_FARKAS = 'Jonathan Farkas'
|
|
93
|
+
JOSCHA_BACH = 'Joscha Bach'
|
|
94
|
+
JP_MORGAN_USGIO = 'us.gio@jpmorgan.com'
|
|
95
|
+
KATHERINE_KEATING = 'Katherine Keating'
|
|
96
|
+
KATHRYN_RUEMMLER = 'Kathryn Ruemmler'
|
|
97
|
+
KEN_JENNE = 'Ken Jenne'
|
|
98
|
+
KEN_STARR = 'Ken Starr'
|
|
99
|
+
KENNETH_E_MAPP = 'Kenneth E. Mapp'
|
|
100
|
+
LANDON_THOMAS = 'Landon Thomas Jr'
|
|
101
|
+
LAWRANCE_VISOSKI = 'Lawrance Visoski'
|
|
102
|
+
LAWRENCE_KRAUSS = 'Lawrence Krauss'
|
|
103
|
+
LEON_BLACK = 'Leon Black'
|
|
104
|
+
LESLEY_GROFF = 'Lesley Groff'
|
|
105
|
+
LILLY_SANCHEZ = 'Lilly Sanchez'
|
|
106
|
+
LINDA_STONE = 'Linda Stone'
|
|
107
|
+
LISA_NEW = 'Lisa New'
|
|
108
|
+
MANUELA_MARTINEZ = 'Manuela Martinez'
|
|
109
|
+
MARC_LEON = 'Marc Leon'
|
|
110
|
+
MARIANA_IDZKOWSKA = 'Mariana Idźkowska'
|
|
111
|
+
MARK_EPSTEIN = 'Mark Epstein'
|
|
112
|
+
MARK_TRAMO = 'Mark Tramo'
|
|
113
|
+
MARTIN_NOWAK = 'Martin Nowak'
|
|
114
|
+
MARTIN_WEINBERG = "Martin Weinberg"
|
|
115
|
+
MASHA_DROKOVA = 'Masha Drokova'
|
|
116
|
+
MELANIE_SPINELLA = 'Melanie Spinella'
|
|
117
|
+
MERWIN_DELA_CRUZ = 'Merwin Dela Cruz'
|
|
118
|
+
MICHAEL_BUCHHOLTZ = 'Michael Buchholtz'
|
|
119
|
+
MICHAEL_MILLER = 'Michael Miller'
|
|
120
|
+
MICHAEL_SITRICK = 'Michael Sitrick'
|
|
121
|
+
MICHAEL_WOLFF = "Michael Wolff"
|
|
122
|
+
MOHAMED_WAHEED_HASSAN = 'Mohamed Waheed Hassan'
|
|
123
|
+
MORTIMER_ZUCKERMAN = 'Mortimer Zuckerman'
|
|
124
|
+
MOSHE_HOFFMAN = 'Moshe Hoffman'
|
|
125
|
+
NADIA_MARCINKO = 'Nadia Marcinko'
|
|
126
|
+
NEAL_KASSELL = 'Neal Kassell'
|
|
127
|
+
NICHOLAS_RIBIS = 'Nicholas Ribis'
|
|
128
|
+
NOAM_CHOMSKY = 'Noam Chomsky'
|
|
129
|
+
NORMAN_D_RAU = 'Norman D. Rau'
|
|
130
|
+
OLIVIER_COLOM = 'Olivier Colom'
|
|
131
|
+
PAUL_BARRETT = 'Paul Barrett'
|
|
132
|
+
PAUL_KRASSNER = 'Paul Krassner'
|
|
133
|
+
PAUL_MORRIS = 'Paul Morris'
|
|
134
|
+
PAUL_PROSPERI = 'Paul Prosperi'
|
|
135
|
+
PAULA = f"Paula Heil Fisher {QUESTION_MARKS}" # the last email about opera lines up but if Fisher was supposedly w/Epstein at Bear Stearns the timeline is a bit weird for her to call him "Unc"
|
|
136
|
+
PEGGY_SIEGAL = 'Peggy Siegal'
|
|
137
|
+
PETER_ATTIA = 'Peter Attia'
|
|
138
|
+
PETER_MANDELSON = 'Peter Mandelson'
|
|
139
|
+
PETER_THIEL = 'Peter Thiel'
|
|
140
|
+
PRINCE_ANDREW = 'Prince Andrew'
|
|
141
|
+
PUREVSUREN_LUNDEG = 'Purevsuren Lundeg'
|
|
142
|
+
RAAFAT_ALSABBAGH = 'Raafat Alsabbagh'
|
|
143
|
+
REID_HOFFMAN = 'Reid Hoffman'
|
|
144
|
+
REID_WEINGARTEN = 'Reid Weingarten'
|
|
145
|
+
RENATA_BOLOTOVA = 'Renata Bolotova'
|
|
146
|
+
RICHARD_KAHN = 'Richard Kahn'
|
|
147
|
+
ROBERT_D_CRITTON = 'Robert D. Critton Jr.'
|
|
148
|
+
ROBERT_LAWRENCE_KUHN = 'Robert Lawrence Kuhn'
|
|
149
|
+
ROBERT_TRIVERS = 'Robert Trivers'
|
|
150
|
+
ROGER_SCHANK = 'Roger Schank'
|
|
151
|
+
ROSS_GOW = 'Ross Gow'
|
|
152
|
+
SAMUEL_LEFF = 'Samuel Leff'
|
|
153
|
+
SCOTT_J_LINK = 'Scott J. Link'
|
|
154
|
+
SEAN_BANNON = 'Sean Bannon'
|
|
155
|
+
SHAHER_ABDULHAK_BESHER = f'Shaher Abdulhak Besher {QUESTION_MARKS}'
|
|
156
|
+
STEPHEN_HANSON = 'Stephen Hanson'
|
|
157
|
+
STEVEN_HOFFENBERG = 'Steven Hoffenberg'
|
|
158
|
+
STEVEN_PFEIFFER = 'Steven Pfeiffer'
|
|
159
|
+
SULTAN_BIN_SULAYEM = 'Sultan Ahmed Bin Sulayem'
|
|
160
|
+
SVETLANA_POZHIDAEVA = 'Svetlana Pozhidaeva'
|
|
161
|
+
TERRY_KAFKA = 'Terry Kafka'
|
|
162
|
+
THANU_BOONYAWATANA = 'Thanu Boonyawatana'
|
|
163
|
+
THORBJORN_JAGLAND = 'Thorbjørn Jagland'
|
|
164
|
+
TOM_BARRACK = 'Tom Barrack'
|
|
165
|
+
TOM_PRITZKER = 'Tom Pritzker'
|
|
166
|
+
TONJA_HADDAD_COLEMAN = 'Tonja Haddad Coleman'
|
|
167
|
+
TYLER_SHEARS = 'Tyler Shears' # Reputation manager, like Al Seckel
|
|
168
|
+
VINIT_SAHNI = 'Vinit Sahni'
|
|
169
|
+
ZUBAIR_KHAN = 'Zubair Khan'
|
|
170
|
+
|
|
171
|
+
# No communications but name is in the files
|
|
172
|
+
BILL_GATES = 'Bill Gates'
|
|
173
|
+
ELON_MUSK = 'Elon Musk'
|
|
174
|
+
HENRY_HOLT = 'Henry Holt' # Actually a company?
|
|
175
|
+
IVANKA = 'Ivanka'
|
|
176
|
+
JAMES_PATTERSON = 'James Patterson'
|
|
177
|
+
JARED_KUSHNER = 'Jared Kushner'
|
|
178
|
+
JULIE_K_BROWN = 'Julie K. Brown'
|
|
179
|
+
KARIM_SADJADPOUR = 'KARIM SADJADPOUR'.title()
|
|
180
|
+
MICHAEL_J_BOCCIO = 'Michael J. Boccio'
|
|
181
|
+
PAUL_G_CASSELL = 'Paul G. Cassell'
|
|
182
|
+
RUDY_GIULIANI = 'Rudy Giuliani'
|
|
183
|
+
TULSI_GABBARD = 'Tulsi Gabbard'
|
|
184
|
+
VIRGINIA_GIUFFRE = 'Virginia Giuffre'
|
|
185
|
+
|
|
186
|
+
# Organizations
|
|
187
|
+
CNN = 'CNN'
|
|
188
|
+
DEUTSCHE_BANK = 'Deutsche Bank'
|
|
189
|
+
GOLDMAN_SACHS = 'Goldman Sachs'
|
|
190
|
+
HARVARD = 'Harvard'
|
|
191
|
+
INSIGHTS_POD = f"InsightsPod" # Zubair bots
|
|
192
|
+
JP_MORGAN = 'JP Morgan'
|
|
193
|
+
OSBORNE_LLP = f"{IAN_OSBORNE} & Partners LLP" # Ian Osborne's PR firm
|
|
194
|
+
|
|
195
|
+
# Locations
|
|
196
|
+
VIRGIN_ISLANDS = 'Virgin Islands'
|
|
197
|
+
|
|
198
|
+
# First and last names that should be made part of a highlighting regex for emailers
|
|
199
|
+
NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
|
|
200
|
+
'Al', 'Alfredo', 'Allen', 'Alex', 'Alexander', 'Amanda', 'Andres', 'Andrew',
|
|
201
|
+
'Bard', 'Barry', 'Bill', 'Black', 'Brad', 'Bruce',
|
|
202
|
+
'Carolyn', 'Chris', 'Christina',
|
|
203
|
+
'Dan', 'Daniel', 'Danny', 'Darren', 'Dave', 'David',
|
|
204
|
+
'Ed', 'Edward', 'Edwards', 'Epstein', 'Eric', 'Erika', 'Etienne',
|
|
205
|
+
'Faith', 'Fred', 'Frost', 'Fuller',
|
|
206
|
+
'George',
|
|
207
|
+
'Heather', 'Henry', 'Hill', 'Hoffman',
|
|
208
|
+
'Ian',
|
|
209
|
+
'Jack', 'James', 'Jay', 'Jean', 'Jeff', 'Jeffrey', 'Jennifer', 'Jeremy', 'jessica', 'Joel', 'John', 'Jon', 'Jonathan', 'Joseph', 'Jr',
|
|
210
|
+
'Kahn', 'Katherine', 'Ken', 'Kevin',
|
|
211
|
+
'Leon', 'Lesley', 'Linda', 'Link', 'Lisa',
|
|
212
|
+
'Mann', 'Marc', 'Marie', 'Mark', 'Martin', 'Melanie', 'Michael', 'Mike', 'Miller', 'Mitchell', 'Miles', 'Morris', 'Moskowitz',
|
|
213
|
+
'Nancy', 'Neal', 'New',
|
|
214
|
+
'Paul', 'Paula', 'Pen', 'Peter', 'Philip',
|
|
215
|
+
'Randall', 'Reid', 'Richard', 'Robert', 'Rodriguez', 'Roger', 'Rosenberg', 'Ross', 'Roth', 'Rubin',
|
|
216
|
+
'Scott', 'Sean', 'Stanley', 'Stern', 'Stephen', 'Steve', 'Steven', 'Stone', 'Susan',
|
|
217
|
+
'The', 'Thomas', 'Tim', 'Tom', 'Tyler',
|
|
218
|
+
'Victor',
|
|
219
|
+
'Wade',
|
|
220
|
+
"Y",
|
|
221
|
+
]]
|
|
222
|
+
|
|
223
|
+
# Names to color white in the word counts
|
|
224
|
+
OTHER_NAMES = """
|
|
225
|
+
aaron albert alberto alec alex alexandra alice allen anderson andre andres ann anna anne ariana arthur
|
|
226
|
+
baldwin barack barbro barry ben benjamin berger bert binant bob bonner boyden brad bradley brady branson bruce bruno burton
|
|
227
|
+
chapman charles charlie chris christopher clint cohen colin collins conway
|
|
228
|
+
dave davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
|
|
229
|
+
ed edmond elizabeth emily entwistle erik erika etienne evelyn
|
|
230
|
+
ferguson flachsbart francis franco frank
|
|
231
|
+
gardner gary geoff geoffrey george gilbert goldberg gonzalez gould graham greene guarino gwyneth
|
|
232
|
+
hancock harold harrison harry helen hirsch hofstadter horowitz hussein
|
|
233
|
+
isaac isaacson
|
|
234
|
+
jamie james jane janet jason jen jennifer jim joe joel johnson jones julie justin
|
|
235
|
+
kate kathy kelly kevin kim kruger kyle
|
|
236
|
+
leonard lenny lieberman louis lynch lynn
|
|
237
|
+
marcus marianne matt matthew melissa michele michelle mike mitchell moore moscowitz
|
|
238
|
+
nicole nussbaum
|
|
239
|
+
paul paula paulson philip philippe
|
|
240
|
+
rafael ray richardson rob robin rodriguez ron rudolph ryan
|
|
241
|
+
sara sarah seligman serge sergey silverman sloman smith snowden sorkin stanley steele stevie stewart susan
|
|
242
|
+
ted theresa thompson tiffany tim timothy tom
|
|
243
|
+
valeria
|
|
244
|
+
walter warren weinstein weiss william
|
|
245
|
+
zach zack
|
|
246
|
+
""".strip().split()
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def constantize_name(name: str) -> str:
|
|
250
|
+
if name == 'Andrzej Duda or entourage':
|
|
251
|
+
return 'ANDRZEJ_DUDA'
|
|
252
|
+
|
|
253
|
+
variable_name = remove_question_marks(name)
|
|
254
|
+
variable_name = variable_name.removesuffix('.').removesuffix('Jr').replace('ź', 'z').replace('ø', 'o').strip()
|
|
255
|
+
variable_name = variable_name.upper().replace('-', '_').replace(' ', '_').replace('.', '')
|
|
256
|
+
|
|
257
|
+
if variable_name not in globals():
|
|
258
|
+
#print(f" ****ERROR**** {variable_name} is not a name variable!")
|
|
259
|
+
return f"'{name}'"
|
|
260
|
+
else:
|
|
261
|
+
return variable_name
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# Document subclass names (this sucks)
|
|
6
|
+
DOCUMENT_CLASS = 'Document'
|
|
7
|
+
EMAIL_CLASS = 'Email'
|
|
8
|
+
JSON_FILE_CLASS = 'JsonFile'
|
|
9
|
+
MESSENGER_LOG_CLASS = 'MessengerLog'
|
|
10
|
+
OTHER_FILE_CLASS = 'OtherFile'
|
|
11
|
+
|
|
12
|
+
# Publications
|
|
13
|
+
BBC = 'BBC'
|
|
14
|
+
BLOOMBERG = 'Bloomberg'
|
|
15
|
+
CHINA_DAILY = "China Daily"
|
|
16
|
+
DAILY_MAIL = 'Daily Mail'
|
|
17
|
+
DAILY_TELEGRAPH = "Daily Telegraph"
|
|
18
|
+
LA_TIMES = 'LA Times'
|
|
19
|
+
MIAMI_HERALD = 'Miami Herald'
|
|
20
|
+
NYT_ARTICLE = 'NYT article about'
|
|
21
|
+
NYT_COLUMN = 'NYT column about'
|
|
22
|
+
THE_REAL_DEAL = 'The Real Deal'
|
|
23
|
+
WAPO = 'WaPo'
|
|
24
|
+
|
|
25
|
+
# Site types
|
|
26
|
+
EMAIL = 'email'
|
|
27
|
+
TEXT_MESSAGE = 'text message'
|
|
28
|
+
SiteType = Literal['email', 'text message']
|
|
29
|
+
|
|
30
|
+
# Styles
|
|
31
|
+
OTHER_SITE_LINK_STYLE = 'dark_goldenrod'
|
|
32
|
+
TIMESTAMP_STYLE = 'turquoise4'
|
|
33
|
+
TIMESTAMP_DIM = f"turquoise4 dim"
|
|
34
|
+
|
|
35
|
+
# Misc
|
|
36
|
+
AUTHOR = 'author'
|
|
37
|
+
DEFAULT = 'default'
|
|
38
|
+
EVERYONE = 'everyone'
|
|
39
|
+
HOUSE_OVERSIGHT_PREFIX = 'HOUSE_OVERSIGHT_'
|
|
40
|
+
NA = 'n/a'
|
|
41
|
+
REDACTED = '<REDACTED>'
|
|
42
|
+
URL_SIGNIFIERS = ['gclid', 'htm', 'ref=', 'utm']
|
|
43
|
+
QUESTION_MARKS = '(???)'
|
|
44
|
+
QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
remove_question_marks = lambda name: QUESTION_MARKS_REGEX.sub('', name)
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import urllib.parse
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
5
|
+
from inflection import parameterize
|
|
6
|
+
from rich.text import Text
|
|
7
|
+
|
|
8
|
+
from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
|
|
9
|
+
from epstein_files.util.file_helper import coerce_file_stem, filename_for_id
|
|
10
|
+
|
|
11
|
+
# Style stuff
|
|
12
|
+
ARCHIVE_LINK_COLOR = 'slate_blue3'
|
|
13
|
+
TEXT_LINK = 'text_link'
|
|
14
|
+
|
|
15
|
+
# External site names
|
|
16
|
+
ExternalSite = Literal['epstein.media', 'epsteinify', 'EpsteinWeb']
|
|
17
|
+
|
|
18
|
+
EPSTEIN_MEDIA = 'epstein.media'
|
|
19
|
+
EPSTEIN_WEB = 'EpsteinWeb'
|
|
20
|
+
EPSTEINIFY = 'epsteinify'
|
|
21
|
+
JMAIL = 'Jmail'
|
|
22
|
+
|
|
23
|
+
# URLs
|
|
24
|
+
ATTRIBUTIONS_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages/blob/master/epstein_files/util/constants.py'
|
|
25
|
+
COFFEEZILLA_ARCHIVE_URL = 'https://journaliststudio.google.com/pinpoint/search?collection=061ce61c9e70bdfd'
|
|
26
|
+
COURIER_NEWSROOM_ARCHIVE_URL = 'https://journaliststudio.google.com/pinpoint/search?collection=092314e384a58618'
|
|
27
|
+
EPSTEINIFY_URL = 'https://epsteinify.com'
|
|
28
|
+
EPSTEIN_MEDIA_URL = 'https://www.epstein.media'
|
|
29
|
+
EPSTEIN_WEB_URL = 'https://epsteinweb.org'
|
|
30
|
+
JMAIL_URL = 'https://jmail.world'
|
|
31
|
+
OVERSIGHT_REPUBLICANS_PRESSER_URL = 'https://oversight.house.gov/release/oversight-committee-releases-additional-epstein-estate-documents/'
|
|
32
|
+
RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL = 'https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_'
|
|
33
|
+
SUBSTACK_URL = 'https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great'
|
|
34
|
+
WORD_COUNT_URL = 'https://michelcrypt4d4mus.github.io/epstein_text_messages/epstein_emails_word_count.html'
|
|
35
|
+
|
|
36
|
+
SITE_URLS: dict[SiteType, str] = {
|
|
37
|
+
EMAIL: 'https://michelcrypt4d4mus.github.io/epstein_emails_house_oversight/',
|
|
38
|
+
TEXT_MESSAGE: 'https://michelcrypt4d4mus.github.io/epstein_text_messages/',
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
|
|
42
|
+
EPSTEIN_MEDIA: f"{EPSTEIN_MEDIA_URL}/files",
|
|
43
|
+
EPSTEIN_WEB: f'{EPSTEIN_WEB_URL}/wp-content/uploads/epstein_evidence/images',
|
|
44
|
+
EPSTEINIFY: f"{EPSTEINIFY_URL}/document",
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# TODO: epsteinify.com seems to be down as of 2025-12-30, switched to epstein.web for links
|
|
49
|
+
epsteinify_api_url = lambda file_id: f"{EPSTEINIFY_URL}/api/documents/HOUSE_OVERSIGHT_{file_id}"
|
|
50
|
+
epsteinify_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEINIFY, filename_or_id, style)
|
|
51
|
+
epsteinify_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_markup(external_doc_link_markup(filename_or_id, style))
|
|
52
|
+
epsteinify_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEINIFY], file_stem)
|
|
53
|
+
epsteinify_name_url = lambda name: f"{EPSTEINIFY_URL}/?name={urllib.parse.quote(name)}"
|
|
54
|
+
|
|
55
|
+
epstein_media_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEIN_MEDIA], file_stem, True)
|
|
56
|
+
epstein_media_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEIN_MEDIA, filename_or_id, style)
|
|
57
|
+
epstein_media_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_markup(epstein_media_doc_link_markup(filename_or_id, style))
|
|
58
|
+
|
|
59
|
+
epstein_web_doc_url = lambda file_stem: f"{DOC_LINK_BASE_URLS[EPSTEIN_WEB]}/{file_stem}.jpg"
|
|
60
|
+
epstein_web_person_url = lambda person: f"{EPSTEIN_WEB_URL}/{parameterize(person)}"
|
|
61
|
+
epstein_web_search_url = lambda s: f"{EPSTEIN_WEB_URL}/?ewmfileq={urllib.parse.quote(s)}&ewmfilepp=20"
|
|
62
|
+
|
|
63
|
+
search_archive_url = lambda txt: f"{COURIER_NEWSROOM_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
|
|
64
|
+
search_coffeezilla_url = lambda txt: f"{COFFEEZILLA_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
|
|
65
|
+
search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
|
|
66
|
+
search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def build_doc_url(base_url: str, filename_or_id: int | str, lowercase: bool = False) -> str:
|
|
70
|
+
file_stem = coerce_file_stem(filename_or_id)
|
|
71
|
+
file_stem = file_stem.lower() if lowercase else file_stem
|
|
72
|
+
return f"{base_url}/{file_stem}"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def external_doc_link_markup(site: ExternalSite, filename_or_id: int | str, style: str = TEXT_LINK) -> str:
|
|
76
|
+
url = build_doc_url(DOC_LINK_BASE_URLS[site], filename_or_id)
|
|
77
|
+
return link_markup(url, coerce_file_stem(filename_or_id), style)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def external_doc_link_txt(site: ExternalSite, filename_or_id: int | str, style: str = TEXT_LINK) -> Text:
|
|
81
|
+
return Text.from_markup(external_doc_link_markup(site, filename_or_id, style))
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def link_markup(
|
|
85
|
+
url: str,
|
|
86
|
+
link_text: str | None = None,
|
|
87
|
+
style: str | None = ARCHIVE_LINK_COLOR,
|
|
88
|
+
underline: bool = True
|
|
89
|
+
) -> str:
|
|
90
|
+
link_text = link_text or url.removeprefix('https://')
|
|
91
|
+
style = ((style or '') + (' underline' if underline else '')).strip()
|
|
92
|
+
return (f"[{style}][link={url}]{link_text}[/link][/{style}]")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def link_text_obj(url: str, link_text: str | None = None, style: str = ARCHIVE_LINK_COLOR) -> Text:
|
|
96
|
+
return Text.from_markup(link_markup(url, link_text, style))
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def search_coffeezilla_link(text: str, link_txt: str, style: str = ARCHIVE_LINK_COLOR) -> Text:
|
|
100
|
+
return link_text_obj(search_coffeezilla_url(text), link_txt or text, style)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
CRYPTADAMUS_TWITTER = link_markup('https://x.com/cryptadamist', '@cryptadamist')
|