epstein-files 1.2.1__py3-none-any.whl → 1.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +31 -6
- epstein_files/documents/document.py +5 -1
- epstein_files/documents/email.py +192 -203
- epstein_files/epstein_files.py +2 -3
- epstein_files/person.py +29 -9
- epstein_files/util/constant/names.py +9 -7
- epstein_files/util/constant/output_files.py +8 -5
- epstein_files/util/constant/strings.py +2 -1
- epstein_files/util/constant/urls.py +13 -2
- epstein_files/util/constants.py +35 -11
- epstein_files/util/data.py +1 -0
- epstein_files/util/env.py +9 -6
- epstein_files/util/highlighted_group.py +263 -117
- epstein_files/util/output.py +18 -9
- epstein_files/util/rich.py +7 -2
- epstein_files/util/word_count.py +1 -1
- {epstein_files-1.2.1.dist-info → epstein_files-1.2.5.dist-info}/METADATA +1 -1
- epstein_files-1.2.5.dist-info/RECORD +34 -0
- epstein_files-1.2.1.dist-info/RECORD +0 -34
- {epstein_files-1.2.1.dist-info → epstein_files-1.2.5.dist-info}/LICENSE +0 -0
- {epstein_files-1.2.1.dist-info → epstein_files-1.2.5.dist-info}/WHEEL +0 -0
- {epstein_files-1.2.1.dist-info → epstein_files-1.2.5.dist-info}/entry_points.txt +0 -0
epstein_files/person.py
CHANGED
|
@@ -120,7 +120,7 @@ class Person:
|
|
|
120
120
|
else:
|
|
121
121
|
email_count = len(self.unique_emails())
|
|
122
122
|
num_days = self.email_conversation_length_in_days()
|
|
123
|
-
title_suffix = f"
|
|
123
|
+
title_suffix = f"{TO_FROM} {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
|
|
124
124
|
|
|
125
125
|
title = f"Found {email_count} emails {title_suffix}"
|
|
126
126
|
width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category()) + 8)
|
|
@@ -136,8 +136,12 @@ class Person:
|
|
|
136
136
|
highlight_group = self.highlight_group()
|
|
137
137
|
|
|
138
138
|
if highlight_group and isinstance(highlight_group, HighlightedNames) and self.name:
|
|
139
|
-
|
|
140
|
-
|
|
139
|
+
info = highlight_group.info_for(self.name)
|
|
140
|
+
|
|
141
|
+
if info:
|
|
142
|
+
return info
|
|
143
|
+
|
|
144
|
+
if self.is_uninteresting_cc:
|
|
141
145
|
if self.has_any_epstein_emails():
|
|
142
146
|
return UNINTERESTING_CC_INFO
|
|
143
147
|
else:
|
|
@@ -152,7 +156,7 @@ class Person:
|
|
|
152
156
|
elif self.name is None:
|
|
153
157
|
return Text('(emails whose author or recipient could not be determined)', style=ALT_INFO_STYLE)
|
|
154
158
|
elif self.category() == JUNK:
|
|
155
|
-
return Text(f"({JUNK} mail)", style='
|
|
159
|
+
return Text(f"({JUNK} mail)", style='bright_black dim')
|
|
156
160
|
elif self.is_uninteresting_cc and (self.info_str() or '').startswith(UNINTERESTING_CC_INFO):
|
|
157
161
|
if self.info_str() == UNINTERESTING_CC_INFO:
|
|
158
162
|
return Text(f"({self.info_str()})", style='wheat4 dim')
|
|
@@ -168,7 +172,11 @@ class Person:
|
|
|
168
172
|
else:
|
|
169
173
|
return None
|
|
170
174
|
else:
|
|
171
|
-
return Text(self.info_str())
|
|
175
|
+
return Text(self.info_str(), style=self.style())
|
|
176
|
+
|
|
177
|
+
def internal_link(self) -> Text:
|
|
178
|
+
"""Kind of like an anchor link to the section of the page containing these emails."""
|
|
179
|
+
return link_text_obj(internal_link_to_emails(self.name_str()), self.name_str(), style=self.style())
|
|
172
180
|
|
|
173
181
|
def is_a_mystery(self) -> bool:
|
|
174
182
|
"""Return True if this is someone we theroetically could know more about."""
|
|
@@ -237,7 +245,13 @@ class Person:
|
|
|
237
245
|
console.line()
|
|
238
246
|
|
|
239
247
|
def sort_key(self) -> list[int | str]:
|
|
240
|
-
counts = [
|
|
248
|
+
counts = [
|
|
249
|
+
len(self.unique_emails()),
|
|
250
|
+
-1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
|
|
251
|
+
-1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO),
|
|
252
|
+
int(self.has_any_epstein_emails()),
|
|
253
|
+
]
|
|
254
|
+
|
|
241
255
|
counts = [-1 * count for count in counts]
|
|
242
256
|
|
|
243
257
|
if args.sort_alphabetical:
|
|
@@ -276,6 +290,11 @@ class Person:
|
|
|
276
290
|
highlighted = highlighted or people
|
|
277
291
|
highlighted_names = [p.name for p in highlighted]
|
|
278
292
|
is_selection = len(people) != len(highlighted) or args.emailers_info
|
|
293
|
+
all_emails = Document.uniquify(flatten([list(p.unique_emails()) for p in people]))
|
|
294
|
+
email_authors = [p for p in people if p.emails_by() and p.name]
|
|
295
|
+
attributed_emails = [email for email in all_emails if email.author]
|
|
296
|
+
footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}" \
|
|
297
|
+
f" out of {len(all_emails):,} emails, {len(all_emails) - len(attributed_emails)} still unknown)"
|
|
279
298
|
|
|
280
299
|
if is_selection:
|
|
281
300
|
title = Text(f"{EMAILER_INFO_TITLE} in This Order for the Highlighted Names (", style=TABLE_TITLE_STYLE)
|
|
@@ -283,7 +302,7 @@ class Person:
|
|
|
283
302
|
else:
|
|
284
303
|
title = f"{EMAILER_INFO_TITLE} in Chronological Order Based on Timestamp of First Email"
|
|
285
304
|
|
|
286
|
-
table = build_table(title)
|
|
305
|
+
table = build_table(title, caption=footer)
|
|
287
306
|
table.add_column('First')
|
|
288
307
|
table.add_column('Name', max_width=24, no_wrap=True)
|
|
289
308
|
table.add_column('Category', justify='left', style='dim italic')
|
|
@@ -298,6 +317,7 @@ class Person:
|
|
|
298
317
|
|
|
299
318
|
for person in people:
|
|
300
319
|
earliest_email_date = person.earliest_email_date()
|
|
320
|
+
is_on_page = False if show_epstein_total else person.name in highlighted_names
|
|
301
321
|
year_months = (earliest_email_date.year * 12) + earliest_email_date.month
|
|
302
322
|
|
|
303
323
|
# Color year rollovers more brightly
|
|
@@ -311,14 +331,14 @@ class Person:
|
|
|
311
331
|
|
|
312
332
|
table.add_row(
|
|
313
333
|
Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[0 if is_selection else grey_idx]}"),
|
|
314
|
-
person.
|
|
334
|
+
person.internal_link() if is_on_page and not person.is_uninteresting_cc else person.name_txt(),
|
|
315
335
|
person.category_txt(),
|
|
316
336
|
f"{len(person.unique_emails() if show_epstein_total else person._unique_printable_emails())}",
|
|
317
337
|
Text(f"{len(person.unique_emails_by())}", style='dim' if len(person.unique_emails_by()) == 0 else ''),
|
|
318
338
|
Text(f"{len(person.unique_emails_to())}", style='dim' if len(person.unique_emails_to()) == 0 else ''),
|
|
319
339
|
f"{person.email_conversation_length_in_days()}",
|
|
320
340
|
person.info_txt() or '',
|
|
321
|
-
style='' if
|
|
341
|
+
style='' if show_epstein_total or is_on_page else 'dim',
|
|
322
342
|
)
|
|
323
343
|
|
|
324
344
|
return table
|
|
@@ -61,6 +61,7 @@ DIANE_ZIMAN = 'Diane Ziman'
|
|
|
61
61
|
DONALD_TRUMP = 'Donald Trump'
|
|
62
62
|
EDUARDO_ROBLES = 'Eduardo Robles'
|
|
63
63
|
EDWARD_JAY_EPSTEIN = 'Edward Jay Epstein'
|
|
64
|
+
EDWARD_ROD_LARSEN = 'Edward Rod Larsen'
|
|
64
65
|
EHUD_BARAK = 'Ehud Barak'
|
|
65
66
|
ERIC_ROTH = 'Eric Roth'
|
|
66
67
|
FAITH_KATES = 'Faith Kates'
|
|
@@ -129,6 +130,7 @@ MOSHE_HOFFMAN = 'Moshe Hoffman'
|
|
|
129
130
|
NADIA_MARCINKO = 'Nadia Marcinko'
|
|
130
131
|
NEAL_KASSELL = 'Neal Kassell'
|
|
131
132
|
NICHOLAS_RIBIS = 'Nicholas Ribis'
|
|
133
|
+
NILI_PRIELL_BARAK = 'Nili Priell Barak'
|
|
132
134
|
NOAM_CHOMSKY = 'Noam Chomsky'
|
|
133
135
|
NORMAN_D_RAU = 'Norman D. Rau'
|
|
134
136
|
OLIVIER_COLOM = 'Olivier Colom'
|
|
@@ -215,13 +217,13 @@ UBS = 'UBS'
|
|
|
215
217
|
# First and last names that should be made part of a highlighting regex for emailers
|
|
216
218
|
NAMES_TO_NOT_HIGHLIGHT = """
|
|
217
219
|
al alain alan alfredo allen alex alexander amanda andres andrew anthony
|
|
218
|
-
bard barrett barry bennet bill black bob boris brad bruce
|
|
220
|
+
bard barrett barry bennet bernard bill black bob boris brad brenner bruce
|
|
219
221
|
caroline carolyn chris christina cohen
|
|
220
|
-
dan daniel danny darren dave david donald
|
|
222
|
+
dan daniel danny darren dave david debbie donald
|
|
221
223
|
ed edward edwards enforcement enterprise enterprises entourage epstein eric erika etienne
|
|
222
|
-
faith forget fred friendly frost fuller
|
|
223
|
-
gerald george gold gordon
|
|
224
|
-
haddad harry hay heather henry hill hoffman
|
|
224
|
+
faith fisher forget fred friendly frost fuller
|
|
225
|
+
gates gerald george gold gordon
|
|
226
|
+
haddad harry hay heather henry hill hoffman howard
|
|
225
227
|
ian ivan
|
|
226
228
|
jack james jay jean jeff jeffrey jennifer jeremy jessica joel john jon jonathan joseph jr
|
|
227
229
|
kahn karl kate katherine kelly ken kevin krassner
|
|
@@ -230,7 +232,7 @@ NAMES_TO_NOT_HIGHLIGHT = """
|
|
|
230
232
|
nancy neal new nicole norman
|
|
231
233
|
owen
|
|
232
234
|
paul paula pen peter philip prince
|
|
233
|
-
randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubin
|
|
235
|
+
randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubenstein rubin
|
|
234
236
|
scott sean skip stanley stern stephen steve steven stone susan
|
|
235
237
|
the thomas tim tom tony tyler
|
|
236
238
|
victor
|
|
@@ -243,7 +245,7 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
|
|
|
243
245
|
aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
|
|
244
246
|
baldwin barack barrett ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
|
|
245
247
|
chapman charles charlie christopher clint cohen colin collins conway
|
|
246
|
-
davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
|
|
248
|
+
davis dean debbie debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
|
|
247
249
|
edmond elizabeth emily entwistle erik evelyn
|
|
248
250
|
ferguson flachsbart francis franco frank
|
|
249
251
|
gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
3
|
from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
|
|
4
|
+
from epstein_files.util.logging import logger
|
|
4
5
|
|
|
5
6
|
# Files output by the code
|
|
6
7
|
HTML_DIR = Path('docs')
|
|
@@ -16,9 +17,10 @@ URLS_ENV = '.urls.env'
|
|
|
16
17
|
EMAILERS_TABLE_PNG_PATH = HTML_DIR.joinpath('emailers_info_table.png')
|
|
17
18
|
|
|
18
19
|
# Deployment URLS
|
|
19
|
-
# NOTE: don't rename these variables without changing deploy.sh
|
|
20
|
+
# NOTE: don't rename these variables without changing deploy.sh
|
|
21
|
+
GH_REPO_NAME = 'epstein_text_messages'
|
|
20
22
|
GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
|
|
21
|
-
TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/
|
|
23
|
+
TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/{GH_REPO_NAME}"
|
|
22
24
|
ALL_EMAILS_URL = f"{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}"
|
|
23
25
|
CHRONOLOGICAL_EMAILS_URL = f"{TEXT_MSGS_URL}/{CHRONOLOGICAL_EMAILS_PATH.name}"
|
|
24
26
|
JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
|
|
@@ -44,6 +46,7 @@ BUILD_ARTIFACTS = [
|
|
|
44
46
|
def make_clean() -> None:
|
|
45
47
|
"""Delete all build artifacts."""
|
|
46
48
|
for build_file in BUILD_ARTIFACTS:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
49
|
+
for file in [build_file, Path(f"{build_file}.txt")]:
|
|
50
|
+
if file.exists():
|
|
51
|
+
logger.warning(f"Removing build file '{file}'...")
|
|
52
|
+
file.unlink()
|
|
@@ -64,7 +64,8 @@ REDACTED = '<REDACTED>'
|
|
|
64
64
|
QUESTION_MARKS = '(???)'
|
|
65
65
|
|
|
66
66
|
# Regexes
|
|
67
|
-
|
|
67
|
+
ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
|
|
68
|
+
FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({ID_REGEX.pattern})")
|
|
68
69
|
FILE_NAME_REGEX = re.compile(fr"{FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
|
|
69
70
|
QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
|
|
70
71
|
|
|
@@ -6,6 +6,7 @@ from inflection import parameterize
|
|
|
6
6
|
from rich.text import Text
|
|
7
7
|
|
|
8
8
|
from epstein_files.util.constant.output_files import *
|
|
9
|
+
from epstein_files.util.constant.strings import remove_question_marks
|
|
9
10
|
from epstein_files.util.env import args
|
|
10
11
|
from epstein_files.util.file_helper import coerce_file_stem
|
|
11
12
|
|
|
@@ -22,10 +23,11 @@ JMAIL = 'Jmail'
|
|
|
22
23
|
ROLLCALL = 'RollCall'
|
|
23
24
|
TWITTER = 'search X'
|
|
24
25
|
|
|
25
|
-
GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/
|
|
26
|
+
GH_PROJECT_URL = f'https://github.com/michelcrypt4d4mus/{GH_REPO_NAME}'
|
|
26
27
|
GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
|
|
27
28
|
ATTRIBUTIONS_URL = f'{GH_MASTER_URL}/epstein_files/util/constants.py'
|
|
28
29
|
EXTRACTS_BASE_URL = f'{GH_MASTER_URL}/emails_extracted_from_legal_filings'
|
|
30
|
+
TO_FROM = 'to/from'
|
|
29
31
|
|
|
30
32
|
extracted_file_url = lambda f: f"{EXTRACTS_BASE_URL}/{f}"
|
|
31
33
|
|
|
@@ -72,7 +74,6 @@ rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL],
|
|
|
72
74
|
search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
|
|
73
75
|
search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
|
|
74
76
|
|
|
75
|
-
|
|
76
77
|
PERSON_LINK_BUILDERS: dict[ExternalSite, Callable[[str], str]] = {
|
|
77
78
|
EPSTEIN_MEDIA: epstein_media_person_url,
|
|
78
79
|
EPSTEIN_WEB: epstein_web_person_url,
|
|
@@ -98,6 +99,12 @@ def external_doc_link_txt(site: ExternalSite, filename_or_id: int | str, style:
|
|
|
98
99
|
return Text.from_markup(external_doc_link_markup(site, filename_or_id, style))
|
|
99
100
|
|
|
100
101
|
|
|
102
|
+
def internal_link_to_emails(name: str) -> str:
|
|
103
|
+
"""e.g. https://michelcrypt4d4mus.github.io/epstein_text_messages/all_emails_epstein_files_nov_2025.html#:~:text=to%2Ffrom%20Jack%20Goldberger"""
|
|
104
|
+
search_term = urllib.parse.quote(f"{TO_FROM} {remove_question_marks(name)}")
|
|
105
|
+
return f"{this_site_url()}#:~:text={search_term}"
|
|
106
|
+
|
|
107
|
+
|
|
101
108
|
def link_markup(
|
|
102
109
|
url: str,
|
|
103
110
|
link_text: str | None = None,
|
|
@@ -121,6 +128,10 @@ def other_site_url() -> str:
|
|
|
121
128
|
return SITE_URLS[other_site_type()]
|
|
122
129
|
|
|
123
130
|
|
|
131
|
+
def this_site_url() -> str:
|
|
132
|
+
return SITE_URLS[EMAIL if other_site_type() == TEXT_MESSAGE else TEXT_MESSAGE]
|
|
133
|
+
|
|
134
|
+
|
|
124
135
|
CRYPTADAMUS_TWITTER = link_markup('https://x.com/cryptadamist', '@cryptadamist')
|
|
125
136
|
THE_OTHER_PAGE_MARKUP = link_markup(other_site_url(), 'the other page', style='light_slate_grey bold')
|
|
126
137
|
THE_OTHER_PAGE_TXT = Text.from_markup(THE_OTHER_PAGE_MARKUP)
|
epstein_files/util/constants.py
CHANGED
|
@@ -39,6 +39,7 @@ HEADER_ABBREVIATIONS = {
|
|
|
39
39
|
'MBZ': "Mohamed bin Zayed Al Nahyan (Emirates sheikh)",
|
|
40
40
|
"Miro": MIROSLAV_LAJCAK,
|
|
41
41
|
"Mooch": "Anthony 'The Mooch' Scaramucci (Skybridge crypto bro)",
|
|
42
|
+
"NPA": 'non-prosecution agreement',
|
|
42
43
|
"Terje": TERJE_ROD_LARSEN,
|
|
43
44
|
"VI": f"U.S. {VIRGIN_ISLANDS}",
|
|
44
45
|
"Woody": "Woody Allen",
|
|
@@ -52,14 +53,14 @@ HEADER_ABBREVIATIONS = {
|
|
|
52
53
|
|
|
53
54
|
# Emailers
|
|
54
55
|
EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
55
|
-
ALAN_DERSHOWITZ: re.compile(r'(alan.{1,7})?dershowi(lz?|
|
|
56
|
+
ALAN_DERSHOWITZ: re.compile(r'(alan.{1,7})?dershowi(lz?|t?z)|AlanDersh', re.IGNORECASE),
|
|
56
57
|
ALIREZA_ITTIHADIEH: re.compile(r'Alireza.[Il]ttihadieh', re.IGNORECASE),
|
|
57
58
|
AMANDA_ENS: re.compile(r'ens, amanda?|Amanda.Ens', re.IGNORECASE),
|
|
58
59
|
ANAS_ALRASHEED: re.compile(r'anas\s*al\s*rashee[cd]', re.IGNORECASE),
|
|
59
60
|
ANIL_AMBANI: re.compile(r'Anil.Ambani', re.IGNORECASE),
|
|
60
61
|
ANN_MARIE_VILLAFANA: re.compile(r'Villafana, Ann Marie|(A(\.|nn) Marie )?Villafa(c|n|ri)a', re.IGNORECASE),
|
|
61
62
|
ANTHONY_SCARAMUCCI: re.compile(r"mooch|(Anthony ('The Mooch' )?)?Scaramucci", re.IGNORECASE),
|
|
62
|
-
ARIANE_DE_ROTHSCHILD: re.compile(r'AdeR|((Ariane|Edmond) de )?
|
|
63
|
+
ARIANE_DE_ROTHSCHILD: re.compile(r'AdeR|((Ariane|Edmond) (de )?)?Roths(ch|hc)?ild|Ariane', re.IGNORECASE),
|
|
63
64
|
BARBRO_C_EHNBOM: re.compile(r'behnbom@aol.com|(Barbro\s.*)?Ehnbom', re.IGNORECASE),
|
|
64
65
|
BARRY_J_COHEN: re.compile(r'barry\s*((j.?|james)\s*)?cohen?', re.IGNORECASE),
|
|
65
66
|
BENNET_MOSKOWITZ: re.compile(r'Moskowitz.*Bennet|Bennet.*Moskowitz', re.IGNORECASE),
|
|
@@ -86,7 +87,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
86
87
|
JABOR_Y: re.compile(r'[ji]abor\s*y?', re.IGNORECASE),
|
|
87
88
|
JAMES_HILL: re.compile(r"hill, james e.|james.e.hill@abc.com", re.IGNORECASE),
|
|
88
89
|
JANUSZ_BANASIAK: re.compile(r"Janu[is]z Banasiak", re.IGNORECASE),
|
|
89
|
-
JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel
|
|
90
|
+
JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?|JeanLuc', re.IGNORECASE),
|
|
90
91
|
JEFF_FULLER: re.compile(r"jeff@mc2mm.com|Jeff Fuller", re.IGNORECASE),
|
|
91
92
|
JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeffrey E((sp|ps)tein?)?( VI Foundation)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!(Mark L.|ard Jay) )Epstein', re.IGNORECASE),
|
|
92
93
|
JESSICA_CADWELL: re.compile(r'Jessica Cadwell?', re.IGNORECASE),
|
|
@@ -104,7 +105,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
104
105
|
LISA_NEW: re.compile(r'E?Lisa New?\b', re.IGNORECASE),
|
|
105
106
|
MANUELA_MARTINEZ: re.compile(fr'Manuela (- Mega Partners|Martinez)', re.IGNORECASE),
|
|
106
107
|
MARIANA_IDZKOWSKA: re.compile(r'Mariana [Il]d[źi]kowska?', re.IGNORECASE),
|
|
107
|
-
MARK_EPSTEIN: re.compile(r'Mark (L\. )?Epstein', re.IGNORECASE),
|
|
108
|
+
MARK_EPSTEIN: re.compile(r'Mark (L\. )?(Epstein|Lloyd)', re.IGNORECASE),
|
|
108
109
|
MARC_LEON: re.compile(r'Marc[.\s]+(Kensington|Leon)|Kensington2', re.IGNORECASE),
|
|
109
110
|
MARTIN_NOWAK: re.compile(r'(Martin.*?)?No[vw]ak|Nowak, Martin', re.IGNORECASE),
|
|
110
111
|
MARTIN_WEINBERG: re.compile(r'martin.*?weinberg', re.IGNORECASE),
|
|
@@ -113,7 +114,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
113
114
|
MICHAEL_BUCHHOLTZ: re.compile(r'Michael.*Buchholtz', re.IGNORECASE),
|
|
114
115
|
MICHAEL_MILLER: re.compile(r'Micha(el)? Miller|Miller, Micha(el)?', re.IGNORECASE),
|
|
115
116
|
MICHAEL_SITRICK: re.compile(r'(Mi(chael|ke).{0,5})?[CS]itrick', re.IGNORECASE),
|
|
116
|
-
MICHAEL_WOLFF: re.compile(r'Michael\s*Wol(f[ef]
|
|
117
|
+
MICHAEL_WOLFF: re.compile(r'Michael\s*Wol(f[ef]e?|i)|Wolff', re.IGNORECASE),
|
|
117
118
|
MIROSLAV_LAJCAK: re.compile(r"Miro(slav)?(\s+Laj[cč][aá]k)?"),
|
|
118
119
|
MOHAMED_WAHEED_HASSAN: re.compile(r'Mohamed Waheed(\s+Hassan)?', re.IGNORECASE),
|
|
119
120
|
NADIA_MARCINKO: re.compile(r"Na[dď]i?a\s+Marcinko(v[aá])?", re.IGNORECASE),
|
|
@@ -195,6 +196,7 @@ EMAILERS = [
|
|
|
195
196
|
'Peter Aldhous',
|
|
196
197
|
'Peter Green',
|
|
197
198
|
ROGER_SCHANK,
|
|
199
|
+
'Roy Black',
|
|
198
200
|
STEVEN_PFEIFFER,
|
|
199
201
|
'Steven Victor MD',
|
|
200
202
|
'Susan Edelman',
|
|
@@ -513,7 +515,7 @@ EMAILS_CONFIG = [
|
|
|
513
515
|
recipients=['George Krassner', 'Nick Kazan', 'Mrisman02', 'Rebecca Risman', 'Linda W. Grossman'],
|
|
514
516
|
duplicate_ids=['031973']
|
|
515
517
|
),
|
|
516
|
-
EmailCfg(id='032457', author=PAUL_KRASSNER), # Bad OCR (nofix)
|
|
518
|
+
EmailCfg(id='032457', author=PAUL_KRASSNER, recipients=[JEFFREY_EPSTEIN, 'Nancy Cain']), # Bad OCR (nofix)
|
|
517
519
|
EmailCfg(id='029981', author=PAULA, attribution_reason='Name in reply + opera reference (Fisher now works in opera)'),
|
|
518
520
|
EmailCfg(id='030482', author=PAULA, attribution_reason=PAULA_REASON),
|
|
519
521
|
EmailCfg(id='033383', author=PAUL_PROSPERI, attribution_reason='Reply'),
|
|
@@ -643,7 +645,16 @@ EMAILS_CONFIG = [
|
|
|
643
645
|
EmailCfg(id='029344', actual_text='I thought of you when I read this article. Was this your idea? Alan'),
|
|
644
646
|
EmailCfg(id='032358', actual_text=REDACTED), # Completely redacted
|
|
645
647
|
EmailCfg(id='033050', actual_text='schwartman'),
|
|
648
|
+
EmailCfg(id='031036', description=f"{BARBRO_C_EHNBOM} related donation and Swedish girls discussion"),
|
|
646
649
|
EmailCfg(id='022219', description="discussion of attempts to clean up Epstein's Google search results"),
|
|
650
|
+
EmailCfg(id='030648', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
|
|
651
|
+
EmailCfg(id='030762', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
|
|
652
|
+
EmailCfg(id='030649', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
|
|
653
|
+
EmailCfg(id='026026', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
|
|
654
|
+
EmailCfg(id='026030', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
|
|
655
|
+
EmailCfg(id='026033', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
|
|
656
|
+
EmailCfg(id='031320', description=f"Epstein and {RICHARD_KAHN} appear to be discussing routing donatings through {PEGGY_SIEGAL}"),
|
|
657
|
+
EmailCfg(id='016693', description='signed "MM"'),
|
|
647
658
|
EmailCfg(id='028524', is_fwded_article=True, description='Zach Braff op-ed on Woody Allen in NYT'),
|
|
648
659
|
EmailCfg(id='031333', is_fwded_article=True, description='Fort Knox conspiracy theory, looks like a Russian disinfo article'), # Russia Says IMF Chief Jailed For Discovering All US Gold is Gone
|
|
649
660
|
EmailCfg(id='031335', is_fwded_article=True, description='Fort Knox conspiracy theory, looks like a Russian disinfo article'), # DOMINQUE STRAUSS-KAHN ARRESTED, NOT BECAUSE HE RAPED A MAID, BUT BECAUSE HE HAD EVIDENCE US HAS NO GOLD IN FORT KNOX.
|
|
@@ -661,6 +672,9 @@ EMAILS_CONFIG = [
|
|
|
661
672
|
EmailCfg(id='032023', is_fwded_article=True, duplicate_ids=['032012']), # American-Israeli Cooperative Enterprise Newsletter
|
|
662
673
|
EmailCfg(id='021758', is_fwded_article=True, duplicate_ids=['030616']), # Radar Online article about Epstein's early prison release
|
|
663
674
|
EmailCfg(id='033297', is_fwded_article=True, duplicate_ids=['033586']), # Sultan Sulayem fwding article about Trump and Russia
|
|
675
|
+
EmailCfg(id='026829', is_fwded_article=True), # Taxes
|
|
676
|
+
EmailCfg(id='020443', is_fwded_article=True), # WSJ Deplorables Bannon
|
|
677
|
+
EmailCfg(id='030372', is_fwded_article=True), # Bannon China Iran
|
|
664
678
|
EmailCfg(id='030983', is_fwded_article=True), # Power Line blog Alex Acosta and Jeffrey Epstein Plea Deal Analysis
|
|
665
679
|
EmailCfg(id='031774', is_fwded_article=True), # Krassner fwd of Palmer Report article
|
|
666
680
|
EmailCfg(id='033345', is_fwded_article=True), # Krassner fwd of Palmer Report article
|
|
@@ -711,6 +725,8 @@ EMAILS_CONFIG = [
|
|
|
711
725
|
EmailCfg(id='031340', is_fwded_article=True), # Article about Alex Jones threatening Robert Mueller
|
|
712
726
|
EmailCfg(id='030209', is_fwded_article=True), # Atlantic Council Syria: Blackberry Diplomacy
|
|
713
727
|
EmailCfg(id='026605', is_fwded_article=True), # Article about Ruemmler turning down attorney general job by NEDRA PICKLER
|
|
728
|
+
EmailCfg(id='031990', is_fwded_article=True), # newsmax on ken starr
|
|
729
|
+
EmailCfg(id='029433', is_fwded_article=True), # Estate Planning After the Enactment of the Tax Cuts and Jobs Act
|
|
714
730
|
EmailCfg(id='032475', timestamp=parse('2017-02-15 13:31:25')),
|
|
715
731
|
EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
|
|
716
732
|
|
|
@@ -733,12 +749,12 @@ EMAILS_CONFIG = [
|
|
|
733
749
|
EmailCfg(id='033512', duplicate_ids=['033361']),
|
|
734
750
|
EmailCfg(id='030299', duplicate_ids=['021794']),
|
|
735
751
|
EmailCfg(id='033575', duplicate_ids=['012898']),
|
|
736
|
-
EmailCfg(id='031428', duplicate_ids=['031388']),
|
|
752
|
+
EmailCfg(id='031428', is_fwded_article=True, duplicate_ids=['031388']),
|
|
737
753
|
EmailCfg(id='031980', duplicate_ids=['019409']),
|
|
738
754
|
EmailCfg(id='033486', duplicate_ids=['033156']),
|
|
739
755
|
EmailCfg(id='025790', duplicate_ids=['031994']),
|
|
740
756
|
EmailCfg(id='028497', duplicate_ids=['026228']),
|
|
741
|
-
EmailCfg(id='033528', duplicate_ids=['033517']),
|
|
757
|
+
EmailCfg(id='033528', is_fwded_article=True, duplicate_ids=['033517']),
|
|
742
758
|
EmailCfg(id='019412', duplicate_ids=['028621']),
|
|
743
759
|
EmailCfg(id='027053', duplicate_ids=['028765']),
|
|
744
760
|
EmailCfg(id='027049', duplicate_ids=['028773']),
|
|
@@ -1355,7 +1371,12 @@ OTHER_FILES_FINANCE = [
|
|
|
1355
1371
|
DocCfg(id='012048', description=f"{PRESS_RELEASE} 'Rockefeller Partners with Gregory J. Fleming to Create Independent Financial Services Firm' and other articles"),
|
|
1356
1372
|
|
|
1357
1373
|
# private placement memoranda
|
|
1358
|
-
DocCfg(
|
|
1374
|
+
DocCfg(
|
|
1375
|
+
id='024432',
|
|
1376
|
+
date='2006-09-27',
|
|
1377
|
+
description=f"Michael Milken's Knowledge Universe Education (KUE) $1,000,000 corporate share placement notice (SEC filing?)"
|
|
1378
|
+
),
|
|
1379
|
+
|
|
1359
1380
|
DocCfg(id='024003', description=f"New Leaf Ventures ($375 million biotech fund) private placement memorandum"),
|
|
1360
1381
|
]
|
|
1361
1382
|
|
|
@@ -1689,13 +1710,16 @@ for cfg in ALL_CONFIGS:
|
|
|
1689
1710
|
|
|
1690
1711
|
# Email related regexes (have to be here for circular dependencies reasons)
|
|
1691
1712
|
FORWARDED_LINE_PATTERN = r"-+ ?(Forwarded|Original)\s*Message ?-*|Begin forwarded message:?"
|
|
1713
|
+
FRENCH_REPLY_PATTERN = r"Le .* a ecrit:"
|
|
1714
|
+
GERMAN_REPLY_PATTERN = r"Am \d\d\.\d\d\..*schrieb.*"
|
|
1715
|
+
NORWEGAIN_REPLY_PATTERN = r"(Den .* folgende|(fre|lor|son)\. .* skrev .*):"
|
|
1692
1716
|
REPLY_LINE_IN_A_MSG_PATTERN = r"In a message dated \d+/\d+/\d+.*writes:"
|
|
1693
1717
|
REPLY_LINE_ENDING_PATTERN = r"[_ \n](AM|PM|[<_]|wrote:?)"
|
|
1694
1718
|
REPLY_LINE_ON_NUMERIC_DATE_PATTERN = fr"On \d+/\d+/\d+[, ].*{REPLY_LINE_ENDING_PATTERN}"
|
|
1695
1719
|
REPLY_LINE_ON_DATE_PATTERN = fr"^On (\d+ )?((Mon|Tues?|Wed(nes)?|Thu(rs)?|Fri|Sat(ur)?|Sun)(day)?|(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*)[, ].*{REPLY_LINE_ENDING_PATTERN}"
|
|
1696
|
-
REPLY_LINE_PATTERN = rf"({REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
|
|
1720
|
+
REPLY_LINE_PATTERN = rf"({FRENCH_REPLY_PATTERN}|{GERMAN_REPLY_PATTERN}|{NORWEGAIN_REPLY_PATTERN}|{REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
|
|
1697
1721
|
REPLY_REGEX = re.compile(REPLY_LINE_PATTERN, re.IGNORECASE | re.MULTILINE)
|
|
1698
|
-
SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?((Envoyé de mon|Sent (from|via)).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)', re.M | re.I)
|
|
1722
|
+
SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?((Envoyé de mon|Sent (from|via)).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)|Co-authored with iPhone auto-correct', re.M | re.I)
|
|
1699
1723
|
|
|
1700
1724
|
|
|
1701
1725
|
# No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
|
epstein_files/util/data.py
CHANGED
|
@@ -22,6 +22,7 @@ ALL_NAMES = [v for k, v in vars(names).items() if isinstance(v, str) and CONSTAN
|
|
|
22
22
|
PACIFIC_TZ = tz.gettz("America/Los_Angeles")
|
|
23
23
|
TIMEZONE_INFO = {"PDT": PACIFIC_TZ, "PST": PACIFIC_TZ} # Suppresses annoying warnings from parse() calls
|
|
24
24
|
|
|
25
|
+
all_elements_same = lambda _list: len(_list) == 0 or all(x == _list[0] for x in _list)
|
|
25
26
|
collapse_newlines = lambda text: MULTINEWLINE_REGEX.sub('\n\n', text)
|
|
26
27
|
date_str = lambda dt: dt.isoformat()[0:10] if dt else None
|
|
27
28
|
escape_double_quotes = lambda text: text.replace('"', r'\"')
|
epstein_files/util/env.py
CHANGED
|
@@ -38,7 +38,7 @@ output.add_argument('--all-emails', '-ae', action='store_true', help='all the em
|
|
|
38
38
|
output.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just the interesting ones')
|
|
39
39
|
parser.add_argument('--build', '-b', nargs="?", default=None, const=DEFAULT_FILE, help='write output to HTML file')
|
|
40
40
|
output.add_argument('--email-timeline', action='store_true', help='print a table of all emails in chronological order')
|
|
41
|
-
output.add_argument('--emailers-info', action='store_true', help='write a .png of the eeailers info table')
|
|
41
|
+
output.add_argument('--emailers-info', '-ei', action='store_true', help='write a .png of the eeailers info table')
|
|
42
42
|
output.add_argument('--json-files', action='store_true', help='pretty print all the raw JSON data files in the collection and exit')
|
|
43
43
|
output.add_argument('--json-metadata', action='store_true', help='dump JSON metadata for all files and exit')
|
|
44
44
|
output.add_argument('--output-emails', '-oe', action='store_true', help='generate emails section')
|
|
@@ -61,31 +61,34 @@ debug.add_argument('--deep-debug', '-dd', action='store_true', help='set debug l
|
|
|
61
61
|
debug.add_argument('--json-stats', '-j', action='store_true', help='print JSON formatted stats about the files')
|
|
62
62
|
debug.add_argument('--skip-other-files', '-sof', action='store_true', help='skip parsing non email/text files')
|
|
63
63
|
debug.add_argument('--suppress-logs', '-sl', action='store_true', help='set debug level to FATAL')
|
|
64
|
+
debug.add_argument('--truncate', '-t', type=int, help='truncate emails to this many characters')
|
|
65
|
+
debug.add_argument('--write-txt', '-wt', action='store_true', help='write a plain text version of output')
|
|
64
66
|
|
|
65
67
|
|
|
66
68
|
# Parse args
|
|
67
69
|
args = parser.parse_args()
|
|
68
70
|
is_html_script = parser.prog in HTML_SCRIPTS
|
|
69
71
|
|
|
70
|
-
args.build = args.build
|
|
71
72
|
args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
|
|
72
73
|
args.names = [None if n == 'None' else n.strip() for n in (args.names or [])]
|
|
73
74
|
args.output_emails = args.output_emails or args.all_emails
|
|
74
75
|
args.output_other = args.output_other or args.all_other_files or args.uninteresting
|
|
75
76
|
args.overwrite_pickle = args.overwrite_pickle or (is_env_var_set('OVERWRITE_PICKLE') and not is_env_var_set('PICKLED'))
|
|
76
77
|
args.width = args.width if is_html_script else None
|
|
78
|
+
args.any_output_selected = any([is_output_arg(arg) and val for arg, val in vars(args).items()])
|
|
79
|
+
|
|
80
|
+
if not (args.any_output_selected or args.email_timeline or args.emailers_info):
|
|
81
|
+
logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
|
|
82
|
+
args.output_emails = args.output_other = args.output_texts = True
|
|
77
83
|
|
|
78
84
|
if is_html_script:
|
|
79
85
|
if args.positional_args:
|
|
80
86
|
exit_with_error(f"{parser.prog} does not accept positional arguments (receeived {args.positional_args})")
|
|
81
87
|
|
|
82
88
|
if parser.prog == EPSTEIN_GENERATE:
|
|
83
|
-
if
|
|
89
|
+
if args.any_output_selected:
|
|
84
90
|
if args.email_timeline:
|
|
85
91
|
exit_with_error(f"--email-timeline option is mutually exlusive with other output options")
|
|
86
|
-
elif not args.email_timeline and not args.emailers_info:
|
|
87
|
-
logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
|
|
88
|
-
args.output_texts = args.output_emails = args.output_other = True
|
|
89
92
|
|
|
90
93
|
if args.build == DEFAULT_FILE:
|
|
91
94
|
if args.all_emails:
|