epstein-files 1.2.5__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +55 -23
- epstein_files/documents/communication.py +9 -5
- epstein_files/documents/document.py +231 -135
- epstein_files/documents/doj_file.py +242 -0
- epstein_files/documents/doj_files/full_text.py +166 -0
- epstein_files/documents/email.py +289 -232
- epstein_files/documents/emails/email_header.py +35 -16
- epstein_files/documents/emails/emailers.py +223 -0
- epstein_files/documents/imessage/text_message.py +2 -3
- epstein_files/documents/json_file.py +18 -14
- epstein_files/documents/messenger_log.py +23 -39
- epstein_files/documents/other_file.py +54 -48
- epstein_files/epstein_files.py +65 -29
- epstein_files/person.py +151 -94
- epstein_files/util/constant/names.py +37 -10
- epstein_files/util/constant/output_files.py +2 -0
- epstein_files/util/constant/strings.py +14 -7
- epstein_files/util/constant/urls.py +17 -0
- epstein_files/util/constants.py +556 -391
- epstein_files/util/data.py +2 -0
- epstein_files/util/doc_cfg.py +44 -33
- epstein_files/util/env.py +34 -19
- epstein_files/util/file_helper.py +30 -6
- epstein_files/util/helpers/debugging_helper.py +13 -0
- epstein_files/util/helpers/env_helpers.py +21 -0
- epstein_files/util/highlighted_group.py +121 -37
- epstein_files/util/layout/left_bar_panel.py +26 -0
- epstein_files/util/logging.py +28 -13
- epstein_files/util/output.py +49 -40
- epstein_files/util/rich.py +30 -3
- epstein_files/util/word_count.py +7 -7
- {epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/METADATA +16 -3
- epstein_files-1.5.0.dist-info/RECORD +40 -0
- {epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/entry_points.txt +1 -1
- epstein_files-1.2.5.dist-info/RECORD +0 -34
- {epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/LICENSE +0 -0
- {epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/WHEEL +0 -0
epstein_files/person.py
CHANGED
|
@@ -9,17 +9,18 @@ from rich.table import Table
|
|
|
9
9
|
from rich.text import Text
|
|
10
10
|
|
|
11
11
|
from epstein_files.documents.document import Document
|
|
12
|
-
from epstein_files.documents.email import MAILING_LISTS, JUNK_EMAILERS, Email
|
|
12
|
+
from epstein_files.documents.email import TRUNCATE_EMAILS_FROM, MAILING_LISTS, JUNK_EMAILERS, Email
|
|
13
13
|
from epstein_files.documents.messenger_log import MessengerLog
|
|
14
14
|
from epstein_files.documents.other_file import OtherFile
|
|
15
15
|
from epstein_files.util.constant.strings import *
|
|
16
16
|
from epstein_files.util.constant.urls import *
|
|
17
17
|
from epstein_files.util.constants import *
|
|
18
|
-
from epstein_files.util.data import days_between, flatten, without_falsey
|
|
18
|
+
from epstein_files.util.data import days_between, flatten, uniquify, without_falsey
|
|
19
19
|
from epstein_files.util.env import args
|
|
20
20
|
from epstein_files.util.highlighted_group import (QUESTION_MARKS_TXT, HighlightedNames,
|
|
21
21
|
get_highlight_group_for_name, get_style_for_name, styled_category, styled_name)
|
|
22
|
-
from epstein_files.util.rich import GREY_NUMBERS, TABLE_TITLE_STYLE, build_table,
|
|
22
|
+
from epstein_files.util.rich import (GREY_NUMBERS, SKIPPED_FILE_MSG_PADDING, TABLE_TITLE_STYLE, build_table,
|
|
23
|
+
console, join_texts, print_centered)
|
|
23
24
|
|
|
24
25
|
ALT_INFO_STYLE = 'medium_purple4'
|
|
25
26
|
CC = 'cc:'
|
|
@@ -42,14 +43,15 @@ class Person:
|
|
|
42
43
|
emails: list[Email] = field(default_factory=list)
|
|
43
44
|
imessage_logs: list[MessengerLog] = field(default_factory=list)
|
|
44
45
|
other_files: list[OtherFile] = field(default_factory=list)
|
|
45
|
-
|
|
46
|
+
is_uninteresting: bool = False
|
|
46
47
|
|
|
47
48
|
def __post_init__(self):
|
|
48
49
|
self.emails = Document.sort_by_timestamp(self.emails)
|
|
49
50
|
self.imessage_logs = Document.sort_by_timestamp(self.imessage_logs)
|
|
50
51
|
|
|
52
|
+
@property
|
|
51
53
|
def category(self) -> str | None:
|
|
52
|
-
highlight_group = self.highlight_group
|
|
54
|
+
highlight_group = self.highlight_group
|
|
53
55
|
|
|
54
56
|
if highlight_group and isinstance(highlight_group, HighlightedNames):
|
|
55
57
|
category = highlight_group.category or highlight_group.label
|
|
@@ -57,60 +59,63 @@ class Person:
|
|
|
57
59
|
if category != self.name and category != 'paula': # TODO: this sucks
|
|
58
60
|
return category
|
|
59
61
|
|
|
62
|
+
@property
|
|
60
63
|
def category_txt(self) -> Text | None:
|
|
61
64
|
if self.name is None:
|
|
62
65
|
return None
|
|
63
|
-
elif self.category
|
|
64
|
-
return styled_category(self.category
|
|
65
|
-
elif self.is_a_mystery
|
|
66
|
+
elif self.category:
|
|
67
|
+
return styled_category(self.category)
|
|
68
|
+
elif self.is_a_mystery or self.is_uninteresting:
|
|
66
69
|
return QUESTION_MARKS_TXT
|
|
67
70
|
|
|
71
|
+
@property
|
|
68
72
|
def email_conversation_length_in_days(self) -> int:
|
|
69
73
|
return days_between(self.emails[0].timestamp, self.emails[-1].timestamp)
|
|
70
74
|
|
|
75
|
+
@property
|
|
71
76
|
def earliest_email_at(self) -> datetime:
|
|
72
77
|
return self.emails[0].timestamp
|
|
73
78
|
|
|
79
|
+
@property
|
|
74
80
|
def earliest_email_date(self) -> date:
|
|
75
|
-
return self.earliest_email_at
|
|
81
|
+
return self.earliest_email_at.date()
|
|
76
82
|
|
|
83
|
+
@property
|
|
77
84
|
def last_email_at(self) -> datetime:
|
|
78
85
|
return self.emails[-1].timestamp
|
|
79
86
|
|
|
87
|
+
@property
|
|
80
88
|
def last_email_date(self) -> date:
|
|
81
|
-
return self.last_email_at
|
|
89
|
+
return self.last_email_at.date()
|
|
82
90
|
|
|
91
|
+
@property
|
|
83
92
|
def emails_by(self) -> list[Email]:
|
|
84
93
|
return [e for e in self.emails if self.name == e.author]
|
|
85
94
|
|
|
95
|
+
@property
|
|
86
96
|
def emails_to(self) -> list[Email]:
|
|
87
97
|
return [
|
|
88
98
|
e for e in self.emails
|
|
89
99
|
if self.name in e.recipients or (self.name is None and len(e.recipients) == 0)
|
|
90
100
|
]
|
|
91
101
|
|
|
92
|
-
|
|
93
|
-
return PERSON_LINK_BUILDERS[site](self.name_str())
|
|
94
|
-
|
|
95
|
-
def external_link_txt(self, site: ExternalSite = EPSTEINIFY, link_str: str | None = None) -> Text:
|
|
96
|
-
if self.name is None:
|
|
97
|
-
return Text('')
|
|
98
|
-
|
|
99
|
-
return link_text_obj(self.external_link(site), link_str or site, style=self.style())
|
|
100
|
-
|
|
102
|
+
@property
|
|
101
103
|
def external_links_line(self) -> Text:
|
|
102
104
|
links = [self.external_link_txt(site) for site in PERSON_LINK_BUILDERS]
|
|
103
105
|
return Text('', justify='center', style='dim').append(join_texts(links, join=' / ')) #, encloser='()'))#, encloser='‹›'))
|
|
104
106
|
|
|
107
|
+
@property
|
|
105
108
|
def has_any_epstein_emails(self) -> bool:
|
|
106
109
|
contacts = [e.author for e in self.emails] + flatten([e.recipients for e in self.emails])
|
|
107
110
|
return JEFFREY_EPSTEIN in contacts
|
|
108
111
|
|
|
112
|
+
@property
|
|
109
113
|
def highlight_group(self) -> HighlightedNames | None:
|
|
110
114
|
return get_highlight_group_for_name(self.name)
|
|
111
115
|
|
|
116
|
+
@property
|
|
112
117
|
def info_panel(self) -> Padding:
|
|
113
|
-
"""
|
|
118
|
+
"""Return a `Panel` with the name of an emailer and a few tidbits of information about them."""
|
|
114
119
|
style = 'white' if (not self.style() or self.style() == DEFAULT) else self.style()
|
|
115
120
|
panel_style = f"black on {style} bold"
|
|
116
121
|
|
|
@@ -118,22 +123,23 @@ class Person:
|
|
|
118
123
|
email_count = len(self._printable_emails())
|
|
119
124
|
title_suffix = f"sent by {JEFFREY_EPSTEIN} to himself"
|
|
120
125
|
else:
|
|
121
|
-
email_count = len(self.unique_emails
|
|
122
|
-
num_days = self.email_conversation_length_in_days
|
|
123
|
-
title_suffix = f"{TO_FROM} {self.name_str
|
|
126
|
+
email_count = len(self.unique_emails)
|
|
127
|
+
num_days = self.email_conversation_length_in_days
|
|
128
|
+
title_suffix = f"{TO_FROM} {self.name_str} starting {self.earliest_email_date} covering {num_days:,} days"
|
|
124
129
|
|
|
125
130
|
title = f"Found {email_count} emails {title_suffix}"
|
|
126
|
-
width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category
|
|
131
|
+
width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category) + 8)
|
|
127
132
|
panel = Panel(Text(title, justify='center'), width=width, style=panel_style)
|
|
128
133
|
elements: list[RenderableType] = [panel]
|
|
129
134
|
|
|
130
|
-
if self.info_with_category
|
|
131
|
-
elements.append(Text(f"({self.info_with_category
|
|
135
|
+
if self.info_with_category:
|
|
136
|
+
elements.append(Text(f"({self.info_with_category})", justify='center', style=f"{style} italic"))
|
|
132
137
|
|
|
133
138
|
return Padding(Group(*elements), (2, 0, 1, 0))
|
|
134
139
|
|
|
140
|
+
@property
|
|
135
141
|
def info_str(self) -> str | None:
|
|
136
|
-
highlight_group = self.highlight_group
|
|
142
|
+
highlight_group = self.highlight_group
|
|
137
143
|
|
|
138
144
|
if highlight_group and isinstance(highlight_group, HighlightedNames) and self.name:
|
|
139
145
|
info = highlight_group.info_for(self.name)
|
|
@@ -141,90 +147,161 @@ class Person:
|
|
|
141
147
|
if info:
|
|
142
148
|
return info
|
|
143
149
|
|
|
144
|
-
if self.
|
|
145
|
-
if self.has_any_epstein_emails
|
|
150
|
+
if self.is_uninteresting and len(self.emails_by) == 0:
|
|
151
|
+
if self.has_any_epstein_emails:
|
|
146
152
|
return UNINTERESTING_CC_INFO
|
|
147
153
|
else:
|
|
148
154
|
return UNINTERESTING_CC_INFO_NO_CONTACT
|
|
149
155
|
|
|
150
|
-
|
|
151
|
-
return ', '.join(without_falsey([self.category(), self.info_str()]))
|
|
152
|
-
|
|
156
|
+
@property
|
|
153
157
|
def info_txt(self) -> Text | None:
|
|
154
158
|
if self.name == JEFFREY_EPSTEIN:
|
|
155
159
|
return Text('(emails sent by Epstein to himself are here)', style=ALT_INFO_STYLE)
|
|
156
160
|
elif self.name is None:
|
|
157
161
|
return Text('(emails whose author or recipient could not be determined)', style=ALT_INFO_STYLE)
|
|
158
|
-
elif self.category
|
|
162
|
+
elif self.category == JUNK:
|
|
159
163
|
return Text(f"({JUNK} mail)", style='bright_black dim')
|
|
160
|
-
elif self.
|
|
161
|
-
if self.
|
|
162
|
-
return Text(f"({self.
|
|
164
|
+
elif self.is_uninteresting and (self.info_str or '').startswith(UNINTERESTING_CC_INFO):
|
|
165
|
+
if self.sole_cc:
|
|
166
|
+
return Text(f"(cc: from {self.sole_cc} only)", style='wheat4 dim')
|
|
167
|
+
elif self.info_str == UNINTERESTING_CC_INFO:
|
|
168
|
+
return Text(f"({self.info_str})", style='wheat4 dim')
|
|
163
169
|
else:
|
|
164
|
-
return Text(f"({self.info_str
|
|
165
|
-
elif self.is_a_mystery
|
|
170
|
+
return Text(f"({self.info_str})", style='plum4 dim')
|
|
171
|
+
elif self.is_a_mystery:
|
|
166
172
|
return Text(QUESTION_MARKS, style='honeydew2 bold')
|
|
167
|
-
elif self.info_str
|
|
173
|
+
elif self.info_str is None:
|
|
168
174
|
if self.name in MAILING_LISTS:
|
|
169
175
|
return Text('(mailing list)', style=f"pale_turquoise4 dim")
|
|
170
|
-
elif self.category
|
|
176
|
+
elif self.category:
|
|
171
177
|
return Text(QUESTION_MARKS, style=self.style())
|
|
172
178
|
else:
|
|
173
179
|
return None
|
|
174
180
|
else:
|
|
175
|
-
return Text(self.info_str
|
|
181
|
+
return Text(self.info_str, style=self.style(allow_bold=False))
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def info_with_category(self) -> str:
|
|
185
|
+
return ', '.join(without_falsey([self.category, self.info_str]))
|
|
176
186
|
|
|
187
|
+
@property
|
|
177
188
|
def internal_link(self) -> Text:
|
|
178
189
|
"""Kind of like an anchor link to the section of the page containing these emails."""
|
|
179
|
-
return link_text_obj(internal_link_to_emails(self.name_str
|
|
190
|
+
return link_text_obj(internal_link_to_emails(self.name_str), self.name_str, style=self.style())
|
|
180
191
|
|
|
192
|
+
@property
|
|
181
193
|
def is_a_mystery(self) -> bool:
|
|
182
194
|
"""Return True if this is someone we theroetically could know more about."""
|
|
183
|
-
return self.is_unstyled
|
|
195
|
+
return self.is_unstyled and not (self.is_email_address or self.info_str or self.is_uninteresting)
|
|
184
196
|
|
|
197
|
+
@property
|
|
185
198
|
def is_email_address(self) -> bool:
|
|
186
199
|
return '@' in (self.name or '')
|
|
187
200
|
|
|
201
|
+
@property
|
|
188
202
|
def is_linkable(self) -> bool:
|
|
189
203
|
"""Return True if it's likely that EpsteinWeb has a page for this name."""
|
|
190
204
|
if self.name is None or ' ' not in self.name:
|
|
191
205
|
return False
|
|
192
|
-
elif self.is_email_address
|
|
206
|
+
elif self.is_email_address or '/' in self.name or QUESTION_MARKS in self.name:
|
|
193
207
|
return False
|
|
194
208
|
elif self.name in INVALID_FOR_EPSTEIN_WEB:
|
|
195
209
|
return False
|
|
196
210
|
|
|
197
211
|
return True
|
|
198
212
|
|
|
213
|
+
@property
|
|
199
214
|
def is_unstyled(self) -> bool:
|
|
200
215
|
"""True if there's no highlight group for this name."""
|
|
201
216
|
return self.style() == DEFAULT_NAME_STYLE
|
|
202
217
|
|
|
203
|
-
|
|
204
|
-
return self.name or UNKNOWN
|
|
205
|
-
|
|
218
|
+
@property
|
|
206
219
|
def name_link(self) -> Text:
|
|
207
220
|
"""Will only link if it's worth linking, otherwise just a Text object."""
|
|
208
|
-
if not self.is_linkable
|
|
209
|
-
return self.name_txt
|
|
221
|
+
if not self.is_linkable:
|
|
222
|
+
return self.name_txt
|
|
210
223
|
else:
|
|
211
|
-
return Text.from_markup(link_markup(self.external_link(), self.name_str
|
|
224
|
+
return Text.from_markup(link_markup(self.external_link(), self.name_str, self.style()))
|
|
225
|
+
|
|
226
|
+
@property
|
|
227
|
+
def name_str(self) -> str:
|
|
228
|
+
return self.name or UNKNOWN
|
|
212
229
|
|
|
230
|
+
@property
|
|
213
231
|
def name_txt(self) -> Text:
|
|
214
232
|
return styled_name(self.name)
|
|
215
233
|
|
|
234
|
+
@property # TODO: unused?
|
|
235
|
+
def should_always_truncate(self) -> bool:
|
|
236
|
+
"""True if we want to truncate all emails to/from this user."""
|
|
237
|
+
return self.name in TRUNCATE_EMAILS_FROM or self.is_uninteresting
|
|
238
|
+
|
|
239
|
+
@property
|
|
240
|
+
def sole_cc(self) -> str | None:
|
|
241
|
+
"""Return name if this person sent 0 emails and received CC from only one that name."""
|
|
242
|
+
email_authors = uniquify([e.author for e in self.emails_to])
|
|
243
|
+
|
|
244
|
+
if len(self.unique_emails) == 1 and len(email_authors) > 0:
|
|
245
|
+
logger.info(f"sole author of email to '{self.name}' is '{email_authors[0]}'")
|
|
246
|
+
else:
|
|
247
|
+
logger.info(f"'{self.name}' email_authors '{email_authors[0]}'")
|
|
248
|
+
|
|
249
|
+
if len(self.unique_emails_by) > 0:
|
|
250
|
+
return None
|
|
251
|
+
|
|
252
|
+
if len(email_authors) == 1:
|
|
253
|
+
return email_authors[0]
|
|
254
|
+
|
|
255
|
+
@property
|
|
256
|
+
def sort_key(self) -> list[int | str]:
|
|
257
|
+
"""Key used to sort `Person` objects by the number of emails sent/received."""
|
|
258
|
+
counts = [
|
|
259
|
+
len(self.unique_emails),
|
|
260
|
+
-1 * int((self.info_str or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
|
|
261
|
+
-1 * int((self.info_str or '') == UNINTERESTING_CC_INFO),
|
|
262
|
+
int(self.has_any_epstein_emails),
|
|
263
|
+
]
|
|
264
|
+
|
|
265
|
+
counts = [-1 * count for count in counts]
|
|
266
|
+
|
|
267
|
+
if args.sort_alphabetical:
|
|
268
|
+
return [self.name_str] + counts
|
|
269
|
+
else:
|
|
270
|
+
return counts + [self.name_str]
|
|
271
|
+
|
|
272
|
+
@property
|
|
273
|
+
def unique_emails(self) -> Sequence[Email]:
|
|
274
|
+
return Document.without_dupes(self.emails)
|
|
275
|
+
|
|
276
|
+
@property
|
|
277
|
+
def unique_emails_by(self) -> list[Email]:
|
|
278
|
+
return Document.without_dupes(self.emails_by)
|
|
279
|
+
|
|
280
|
+
@property
|
|
281
|
+
def unique_emails_to(self) -> list[Email]:
|
|
282
|
+
return Document.without_dupes(self.emails_to)
|
|
283
|
+
|
|
284
|
+
def external_link(self, site: ExternalSite = EPSTEINIFY) -> str:
|
|
285
|
+
return PERSON_LINK_BUILDERS[site](self.name_str)
|
|
286
|
+
|
|
287
|
+
def external_link_txt(self, site: ExternalSite = EPSTEINIFY, link_str: str | None = None) -> Text:
|
|
288
|
+
if self.name is None:
|
|
289
|
+
return Text('')
|
|
290
|
+
|
|
291
|
+
return link_text_obj(self.external_link(site), link_str or site, style=self.style())
|
|
292
|
+
|
|
216
293
|
def print_emails(self) -> list[Email]:
|
|
217
294
|
"""Print complete emails to or from a particular 'author'. Returns the Emails that were printed."""
|
|
218
|
-
print_centered(self.info_panel
|
|
295
|
+
print_centered(self.info_panel)
|
|
219
296
|
self.print_emails_table()
|
|
220
297
|
last_printed_email_was_duplicate = False
|
|
221
298
|
|
|
222
|
-
if self.category
|
|
299
|
+
if self.category == JUNK:
|
|
223
300
|
logger.warning(f"Not printing junk emailer '{self.name}'")
|
|
224
301
|
else:
|
|
225
302
|
for email in self._printable_emails():
|
|
226
|
-
if email.is_duplicate
|
|
227
|
-
console.print(Padding(email.duplicate_file_txt
|
|
303
|
+
if email.is_duplicate:
|
|
304
|
+
console.print(Padding(email.duplicate_file_txt.append('...'), SKIPPED_FILE_MSG_PADDING))
|
|
228
305
|
last_printed_email_was_duplicate = True
|
|
229
306
|
else:
|
|
230
307
|
if last_printed_email_was_duplicate:
|
|
@@ -239,42 +316,18 @@ class Person:
|
|
|
239
316
|
table = Email.build_emails_table(self._unique_printable_emails(), self.name)
|
|
240
317
|
print_centered(Padding(table, (0, 5, 0, 5)))
|
|
241
318
|
|
|
242
|
-
if self.is_linkable
|
|
243
|
-
print_centered(self.external_links_line
|
|
319
|
+
if self.is_linkable:
|
|
320
|
+
print_centered(self.external_links_line)
|
|
244
321
|
|
|
245
322
|
console.line()
|
|
246
323
|
|
|
247
|
-
def
|
|
248
|
-
|
|
249
|
-
len(self.unique_emails()),
|
|
250
|
-
-1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
|
|
251
|
-
-1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO),
|
|
252
|
-
int(self.has_any_epstein_emails()),
|
|
253
|
-
]
|
|
254
|
-
|
|
255
|
-
counts = [-1 * count for count in counts]
|
|
256
|
-
|
|
257
|
-
if args.sort_alphabetical:
|
|
258
|
-
return [self.name_str()] + counts
|
|
259
|
-
else:
|
|
260
|
-
return counts + [self.name_str()]
|
|
261
|
-
|
|
262
|
-
def style(self) -> str:
|
|
263
|
-
return get_style_for_name(self.name)
|
|
264
|
-
|
|
265
|
-
def unique_emails(self) -> Sequence[Email]:
|
|
266
|
-
return Document.without_dupes(self.emails)
|
|
267
|
-
|
|
268
|
-
def unique_emails_by(self) -> list[Email]:
|
|
269
|
-
return Document.without_dupes(self.emails_by())
|
|
270
|
-
|
|
271
|
-
def unique_emails_to(self) -> list[Email]:
|
|
272
|
-
return Document.without_dupes(self.emails_to())
|
|
324
|
+
def style(self, allow_bold: bool = True) -> str:
|
|
325
|
+
return get_style_for_name(self.name, allow_bold=allow_bold)
|
|
273
326
|
|
|
274
327
|
def _printable_emails(self):
|
|
275
328
|
"""For Epstein we only want to print emails he sent to himself."""
|
|
276
329
|
if self.name == JEFFREY_EPSTEIN:
|
|
277
|
-
return [e for e in self.emails if e.is_note_to_self
|
|
330
|
+
return [e for e in self.emails if e.is_note_to_self]
|
|
278
331
|
else:
|
|
279
332
|
return self.emails
|
|
280
333
|
|
|
@@ -282,7 +335,7 @@ class Person:
|
|
|
282
335
|
return Document.without_dupes(self._printable_emails())
|
|
283
336
|
|
|
284
337
|
def __str__(self):
|
|
285
|
-
return f"{self.name_str
|
|
338
|
+
return f"{self.name_str}"
|
|
286
339
|
|
|
287
340
|
@staticmethod
|
|
288
341
|
def emailer_info_table(people: list['Person'], highlighted: list['Person'] | None = None, show_epstein_total: bool = False) -> Table:
|
|
@@ -290,8 +343,8 @@ class Person:
|
|
|
290
343
|
highlighted = highlighted or people
|
|
291
344
|
highlighted_names = [p.name for p in highlighted]
|
|
292
345
|
is_selection = len(people) != len(highlighted) or args.emailers_info
|
|
293
|
-
all_emails =
|
|
294
|
-
email_authors = [p for p in people if p.emails_by
|
|
346
|
+
all_emails = Person.emails_from_people(people)
|
|
347
|
+
email_authors = [p for p in people if p.emails_by and p.name]
|
|
295
348
|
attributed_emails = [email for email in all_emails if email.author]
|
|
296
349
|
footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}" \
|
|
297
350
|
f" out of {len(all_emails):,} emails, {len(all_emails) - len(attributed_emails)} still unknown)"
|
|
@@ -316,7 +369,7 @@ class Person:
|
|
|
316
369
|
grey_idx = 0
|
|
317
370
|
|
|
318
371
|
for person in people:
|
|
319
|
-
earliest_email_date = person.earliest_email_date
|
|
372
|
+
earliest_email_date = person.earliest_email_date
|
|
320
373
|
is_on_page = False if show_epstein_total else person.name in highlighted_names
|
|
321
374
|
year_months = (earliest_email_date.year * 12) + earliest_email_date.month
|
|
322
375
|
|
|
@@ -331,14 +384,18 @@ class Person:
|
|
|
331
384
|
|
|
332
385
|
table.add_row(
|
|
333
386
|
Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[0 if is_selection else grey_idx]}"),
|
|
334
|
-
person.internal_link
|
|
335
|
-
person.category_txt
|
|
336
|
-
f"{len(person.unique_emails
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
f"{person.email_conversation_length_in_days
|
|
340
|
-
person.info_txt
|
|
387
|
+
person.internal_link if is_on_page and not person.is_uninteresting else person.name_txt,
|
|
388
|
+
person.category_txt,
|
|
389
|
+
f"{len(person.unique_emails if show_epstein_total else person._unique_printable_emails())}",
|
|
390
|
+
str(len(person.unique_emails_by)) if len(person.unique_emails_by) > 0 else '',
|
|
391
|
+
str(len(person.unique_emails_to)) if len(person.unique_emails_to) > 0 else '',
|
|
392
|
+
f"{person.email_conversation_length_in_days}",
|
|
393
|
+
person.info_txt or '',
|
|
341
394
|
style='' if show_epstein_total or is_on_page else 'dim',
|
|
342
395
|
)
|
|
343
396
|
|
|
344
397
|
return table
|
|
398
|
+
|
|
399
|
+
@staticmethod
|
|
400
|
+
def emails_from_people(people: list['Person']) -> Sequence[Email]:
|
|
401
|
+
return Document.uniquify(flatten([list(p.unique_emails) for p in people]))
|
|
@@ -177,6 +177,16 @@ ZUBAIR_KHAN = 'Zubair Khan'
|
|
|
177
177
|
|
|
178
178
|
UNKNOWN = '(unknown)'
|
|
179
179
|
|
|
180
|
+
# DOJ files emails
|
|
181
|
+
ALISON_J_NATHAN = 'Alison J. Nathan'
|
|
182
|
+
AMIR_TAAKI = 'Amir Taaki'
|
|
183
|
+
BROCK_PIERCE = 'Brock Pierce'
|
|
184
|
+
CHRISTIAN_EVERDELL = 'Christian Everdell'
|
|
185
|
+
CHRISTOPHER_DILORIO = 'Christopher Dilorio'
|
|
186
|
+
DOUGLAS_WIGDOR = 'Douglas Wigdor'
|
|
187
|
+
KARYNA_SHULIAK = 'Karyna Shuliak'
|
|
188
|
+
STACEY_RICHMAN = 'Stacey Richman'
|
|
189
|
+
|
|
180
190
|
# No communications but name is in the files
|
|
181
191
|
BILL_GATES = 'Bill Gates'
|
|
182
192
|
DONALD_TRUMP = 'Donald Trump'
|
|
@@ -216,25 +226,25 @@ UBS = 'UBS'
|
|
|
216
226
|
|
|
217
227
|
# First and last names that should be made part of a highlighting regex for emailers
|
|
218
228
|
NAMES_TO_NOT_HIGHLIGHT = """
|
|
219
|
-
al alain alan alfredo allen alex alexander amanda andres andrew anthony
|
|
229
|
+
al alain alan alison alfredo allen alex alexander amanda andres andrew anthony
|
|
220
230
|
bard barrett barry bennet bernard bill black bob boris brad brenner bruce
|
|
221
|
-
caroline carolyn chris christina cohen
|
|
222
|
-
dan daniel danny darren dave david debbie donald
|
|
231
|
+
cameron caroline carolyn chris christian christina cohen
|
|
232
|
+
dan daniel danny darren dave david debbie donald douglas
|
|
223
233
|
ed edward edwards enforcement enterprise enterprises entourage epstein eric erika etienne
|
|
224
234
|
faith fisher forget fred friendly frost fuller
|
|
225
235
|
gates gerald george gold gordon
|
|
226
|
-
haddad harry hay heather henry hill hoffman howard
|
|
236
|
+
haddad hanson harry hay heather henry hill hoffman howard
|
|
227
237
|
ian ivan
|
|
228
238
|
jack james jay jean jeff jeffrey jennifer jeremy jessica joel john jon jonathan joseph jr
|
|
229
|
-
kahn karl kate katherine kelly ken kevin krassner
|
|
230
|
-
larry laurie lawrence leon lesley linda link lisa
|
|
239
|
+
kafka kahn karl kate katherine kelly ken kevin krassner
|
|
240
|
+
larry larsen laurie lawrence leon lesley linda link lisa
|
|
231
241
|
mann marc marie mark martin matthew melanie michael mike miller mitchell miles morris moskowitz
|
|
232
|
-
nancy neal new nicole norman
|
|
242
|
+
nancy nathan neal new nicole norman
|
|
233
243
|
owen
|
|
234
|
-
paul paula pen peter philip prince
|
|
244
|
+
paul paula pen peter philip pierce prince
|
|
235
245
|
randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubenstein rubin
|
|
236
|
-
scott sean skip stanley stern stephen steve steven stone susan
|
|
237
|
-
the thomas tim tom tony tyler
|
|
246
|
+
scott sean skip smith stacey stanley stern stephen steve steven stone susan
|
|
247
|
+
terry the thomas tim tom tony tyler
|
|
238
248
|
victor
|
|
239
249
|
wade waters
|
|
240
250
|
y
|
|
@@ -269,6 +279,10 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
|
|
|
269
279
|
def constantize_name(name: str) -> str:
|
|
270
280
|
if name == 'Andrzej Duda or entourage':
|
|
271
281
|
return 'ANDRZEJ_DUDA'
|
|
282
|
+
elif name == MIROSLAV_LAJCAK:
|
|
283
|
+
return 'MIROSLAV_LAJCAK'
|
|
284
|
+
elif name == 'Paula Heil Fisher (???)':
|
|
285
|
+
return 'PAULA'
|
|
272
286
|
|
|
273
287
|
variable_name = remove_question_marks(name)
|
|
274
288
|
variable_name = variable_name.removesuffix('.').removesuffix('Jr').replace('ź', 'z').replace('ø', 'o').strip()
|
|
@@ -300,7 +314,20 @@ def extract_last_name(name: str) -> str:
|
|
|
300
314
|
return first_last_names[-1]
|
|
301
315
|
|
|
302
316
|
|
|
317
|
+
def reverse_first_and_last_names(name: str) -> str:
|
|
318
|
+
"""If there's a comma in the name in the style 'Lastname, Firstname', reverse it and remove comma."""
|
|
319
|
+
if '@' in name:
|
|
320
|
+
return name.lower()
|
|
321
|
+
|
|
322
|
+
if ', ' in name:
|
|
323
|
+
names = name.split(', ')
|
|
324
|
+
return f"{names[1]} {names[0]}"
|
|
325
|
+
else:
|
|
326
|
+
return name
|
|
327
|
+
|
|
328
|
+
|
|
303
329
|
def reversed_name(name: str) -> str:
|
|
330
|
+
"""'Jeffrey Epstein' becomes 'Epstein Jeffrey'."""
|
|
304
331
|
if ' ' not in name:
|
|
305
332
|
return name
|
|
306
333
|
|
|
@@ -13,6 +13,7 @@ JSON_METADATA_PATH = HTML_DIR.joinpath(f'file_metadata_{EPSTEIN_FILES_NOV_2025}.
|
|
|
13
13
|
TEXT_MSGS_HTML_PATH = HTML_DIR.joinpath('index.html')
|
|
14
14
|
WORD_COUNT_HTML_PATH = HTML_DIR.joinpath(f'communication_word_count_{EPSTEIN_FILES_NOV_2025}.html')
|
|
15
15
|
# EPSTEIN_WORD_COUNT_HTML_PATH = HTML_DIR.joinpath('epstein_texts_and_emails_word_count.html')
|
|
16
|
+
DOJ_2026_HTML_PATH = HTML_DIR.joinpath('doj_2026-01-30_files.html')
|
|
16
17
|
URLS_ENV = '.urls.env'
|
|
17
18
|
EMAILERS_TABLE_PNG_PATH = HTML_DIR.joinpath('emailers_info_table.png')
|
|
18
19
|
|
|
@@ -26,6 +27,7 @@ CHRONOLOGICAL_EMAILS_URL = f"{TEXT_MSGS_URL}/{CHRONOLOGICAL_EMAILS_PATH.name}"
|
|
|
26
27
|
JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
|
|
27
28
|
JSON_METADATA_URL = f"{TEXT_MSGS_URL}/{JSON_METADATA_PATH.name}"
|
|
28
29
|
WORD_COUNT_URL = f"{TEXT_MSGS_URL}/{WORD_COUNT_HTML_PATH.name}"
|
|
30
|
+
DOJ_2026_URL = f"{TEXT_MSGS_URL}/{DOJ_2026_HTML_PATH.name}"
|
|
29
31
|
|
|
30
32
|
SITE_URLS: dict[SiteType, str] = {
|
|
31
33
|
EMAIL: ALL_EMAILS_URL,
|
|
@@ -57,29 +57,36 @@ TIMESTAMP_DIM = f"turquoise4 dim"
|
|
|
57
57
|
# Misc
|
|
58
58
|
AUTHOR = 'author'
|
|
59
59
|
DEFAULT = 'default'
|
|
60
|
+
EFTA_PREFIX = 'EFTA'
|
|
60
61
|
HOUSE_OVERSIGHT_PREFIX = 'HOUSE_OVERSIGHT_'
|
|
61
62
|
JSON = 'json'
|
|
62
63
|
NA = 'n/a'
|
|
63
64
|
REDACTED = '<REDACTED>'
|
|
64
65
|
QUESTION_MARKS = '(???)'
|
|
65
66
|
|
|
66
|
-
# Regexes
|
|
67
|
-
ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
|
|
68
|
-
FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({ID_REGEX.pattern})")
|
|
69
|
-
FILE_NAME_REGEX = re.compile(fr"{FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
|
|
70
|
-
QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
|
|
71
|
-
|
|
72
67
|
# Document subclass names (this sucks)
|
|
73
68
|
DOCUMENT_CLASS = 'Document'
|
|
69
|
+
DOJ_FILE_CLASS = 'DojFile'
|
|
74
70
|
EMAIL_CLASS = 'Email'
|
|
75
71
|
JSON_FILE_CLASS = 'JsonFile'
|
|
76
72
|
MESSENGER_LOG_CLASS = 'MessengerLog'
|
|
77
73
|
OTHER_FILE_CLASS = 'OtherFile'
|
|
78
74
|
|
|
75
|
+
# Regexes
|
|
76
|
+
DOJ_FILE_STEM_REGEX = re.compile(fr"{EFTA_PREFIX}\d{{8}}")
|
|
77
|
+
DOJ_FILE_NAME_REGEX = re.compile(fr"{DOJ_FILE_STEM_REGEX.pattern}(\.txt)?")
|
|
78
|
+
|
|
79
|
+
HOUSE_OVERSIGHT_NOV_2025_ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
|
|
80
|
+
HOUSE_OVERSIGHT_NOV_2025_FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({HOUSE_OVERSIGHT_NOV_2025_ID_REGEX.pattern})")
|
|
81
|
+
HOUSE_OVERSIGHT_NOV_2025_FILE_NAME_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_NOV_2025_FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
|
|
82
|
+
|
|
83
|
+
QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
|
|
84
|
+
|
|
79
85
|
|
|
80
86
|
remove_question_marks = lambda name: QUESTION_MARKS_REGEX.sub('', name).strip()
|
|
81
87
|
|
|
82
88
|
|
|
83
|
-
def indented(s: str, spaces: int = 4) -> str:
|
|
89
|
+
def indented(s: str, spaces: int = 4, prefix: str = '') -> str:
|
|
84
90
|
indent = ' ' * spaces
|
|
91
|
+
indent += prefix
|
|
85
92
|
return indent + f"\n{indent}".join(s.split('\n'))
|
|
@@ -12,6 +12,7 @@ from epstein_files.util.file_helper import coerce_file_stem
|
|
|
12
12
|
|
|
13
13
|
# Style stuff
|
|
14
14
|
ARCHIVE_LINK_COLOR = 'slate_blue3'
|
|
15
|
+
ARCHIVE_ALT_LINK_STYLE = 'medium_purple4 italic'
|
|
15
16
|
TEXT_LINK = 'text_link'
|
|
16
17
|
|
|
17
18
|
# External site names
|
|
@@ -39,6 +40,9 @@ EPSTEIN_DOCS_URL = 'https://epstein-docs.github.io'
|
|
|
39
40
|
OVERSIGHT_REPUBLICANS_PRESSER_URL = 'https://oversight.house.gov/release/oversight-committee-releases-additional-epstein-estate-documents/'
|
|
40
41
|
RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL = 'https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_'
|
|
41
42
|
SUBSTACK_URL = 'https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great'
|
|
43
|
+
# DOJ docs
|
|
44
|
+
DOJ_2026_URL = 'https://www.justice.gov/epstein/doj-disclosures'
|
|
45
|
+
DOJ_SEARCH_URL = 'https://www.justice.gov/epstein/search'
|
|
42
46
|
|
|
43
47
|
# Document source sites
|
|
44
48
|
EPSTEINIFY_URL = 'https://epsteinify.com'
|
|
@@ -53,6 +57,9 @@ DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
|
|
|
53
57
|
ROLLCALL: f'https://rollcall.com/factbase/epstein/file?id=',
|
|
54
58
|
}
|
|
55
59
|
|
|
60
|
+
# Example: https://www.justice.gov/epstein/files/DataSet%208/EFTA00009802.pdf
|
|
61
|
+
DOJ_2026_FILE_BASE_URL = "https://www.justice.gov/epstein/files/DataSet%20"
|
|
62
|
+
|
|
56
63
|
|
|
57
64
|
epsteinify_api_url = lambda file_stem: f"{EPSTEINIFY_URL}/api/documents/{file_stem}"
|
|
58
65
|
epsteinify_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEINIFY, filename_or_id, style)
|
|
@@ -90,6 +97,16 @@ def build_doc_url(base_url: str, filename_or_id: int | str, case: Literal['lower
|
|
|
90
97
|
return f"{base_url}{file_stem}"
|
|
91
98
|
|
|
92
99
|
|
|
100
|
+
def doj_2026_file_url(dataset_id: int, file_stem: str) -> str:
|
|
101
|
+
"""Link to justice.gov for a DOJ file."""
|
|
102
|
+
return f"{DOJ_2026_FILE_BASE_URL}{dataset_id}/{file_stem}.pdf"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def jmail_doj_2026_file_url(dataset_id: int, file_stem: str) -> str:
|
|
106
|
+
"""Link to Jmail backup of DOJ file."""
|
|
107
|
+
return f"{JMAIL_URL}/drive/vol{dataset_id:05}-{file_stem.lower()}-pdf"
|
|
108
|
+
|
|
109
|
+
|
|
93
110
|
def external_doc_link_markup(site: ExternalSite, filename_or_id: int | str, style: str = TEXT_LINK) -> str:
|
|
94
111
|
url = build_doc_url(DOC_LINK_BASE_URLS[site], filename_or_id)
|
|
95
112
|
return link_markup(url, coerce_file_stem(filename_or_id), style)
|