epstein-files 1.2.5__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. epstein_files/__init__.py +55 -23
  2. epstein_files/documents/communication.py +9 -5
  3. epstein_files/documents/document.py +231 -135
  4. epstein_files/documents/doj_file.py +242 -0
  5. epstein_files/documents/doj_files/full_text.py +166 -0
  6. epstein_files/documents/email.py +289 -232
  7. epstein_files/documents/emails/email_header.py +35 -16
  8. epstein_files/documents/emails/emailers.py +223 -0
  9. epstein_files/documents/imessage/text_message.py +2 -3
  10. epstein_files/documents/json_file.py +18 -14
  11. epstein_files/documents/messenger_log.py +23 -39
  12. epstein_files/documents/other_file.py +54 -48
  13. epstein_files/epstein_files.py +65 -29
  14. epstein_files/person.py +151 -94
  15. epstein_files/util/constant/names.py +37 -10
  16. epstein_files/util/constant/output_files.py +2 -0
  17. epstein_files/util/constant/strings.py +14 -7
  18. epstein_files/util/constant/urls.py +17 -0
  19. epstein_files/util/constants.py +556 -391
  20. epstein_files/util/data.py +2 -0
  21. epstein_files/util/doc_cfg.py +44 -33
  22. epstein_files/util/env.py +34 -19
  23. epstein_files/util/file_helper.py +30 -6
  24. epstein_files/util/helpers/debugging_helper.py +13 -0
  25. epstein_files/util/helpers/env_helpers.py +21 -0
  26. epstein_files/util/highlighted_group.py +121 -37
  27. epstein_files/util/layout/left_bar_panel.py +26 -0
  28. epstein_files/util/logging.py +28 -13
  29. epstein_files/util/output.py +49 -40
  30. epstein_files/util/rich.py +30 -3
  31. epstein_files/util/word_count.py +7 -7
  32. {epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/METADATA +16 -3
  33. epstein_files-1.5.0.dist-info/RECORD +40 -0
  34. {epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/entry_points.txt +1 -1
  35. epstein_files-1.2.5.dist-info/RECORD +0 -34
  36. {epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/LICENSE +0 -0
  37. {epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/WHEEL +0 -0
epstein_files/person.py CHANGED
@@ -9,17 +9,18 @@ from rich.table import Table
9
9
  from rich.text import Text
10
10
 
11
11
  from epstein_files.documents.document import Document
12
- from epstein_files.documents.email import MAILING_LISTS, JUNK_EMAILERS, Email
12
+ from epstein_files.documents.email import TRUNCATE_EMAILS_FROM, MAILING_LISTS, JUNK_EMAILERS, Email
13
13
  from epstein_files.documents.messenger_log import MessengerLog
14
14
  from epstein_files.documents.other_file import OtherFile
15
15
  from epstein_files.util.constant.strings import *
16
16
  from epstein_files.util.constant.urls import *
17
17
  from epstein_files.util.constants import *
18
- from epstein_files.util.data import days_between, flatten, without_falsey
18
+ from epstein_files.util.data import days_between, flatten, uniquify, without_falsey
19
19
  from epstein_files.util.env import args
20
20
  from epstein_files.util.highlighted_group import (QUESTION_MARKS_TXT, HighlightedNames,
21
21
  get_highlight_group_for_name, get_style_for_name, styled_category, styled_name)
22
- from epstein_files.util.rich import GREY_NUMBERS, TABLE_TITLE_STYLE, build_table, console, join_texts, print_centered
22
+ from epstein_files.util.rich import (GREY_NUMBERS, SKIPPED_FILE_MSG_PADDING, TABLE_TITLE_STYLE, build_table,
23
+ console, join_texts, print_centered)
23
24
 
24
25
  ALT_INFO_STYLE = 'medium_purple4'
25
26
  CC = 'cc:'
@@ -42,14 +43,15 @@ class Person:
42
43
  emails: list[Email] = field(default_factory=list)
43
44
  imessage_logs: list[MessengerLog] = field(default_factory=list)
44
45
  other_files: list[OtherFile] = field(default_factory=list)
45
- is_uninteresting_cc: bool = False
46
+ is_uninteresting: bool = False
46
47
 
47
48
  def __post_init__(self):
48
49
  self.emails = Document.sort_by_timestamp(self.emails)
49
50
  self.imessage_logs = Document.sort_by_timestamp(self.imessage_logs)
50
51
 
52
+ @property
51
53
  def category(self) -> str | None:
52
- highlight_group = self.highlight_group()
54
+ highlight_group = self.highlight_group
53
55
 
54
56
  if highlight_group and isinstance(highlight_group, HighlightedNames):
55
57
  category = highlight_group.category or highlight_group.label
@@ -57,60 +59,63 @@ class Person:
57
59
  if category != self.name and category != 'paula': # TODO: this sucks
58
60
  return category
59
61
 
62
+ @property
60
63
  def category_txt(self) -> Text | None:
61
64
  if self.name is None:
62
65
  return None
63
- elif self.category():
64
- return styled_category(self.category())
65
- elif self.is_a_mystery() or self.is_uninteresting_cc:
66
+ elif self.category:
67
+ return styled_category(self.category)
68
+ elif self.is_a_mystery or self.is_uninteresting:
66
69
  return QUESTION_MARKS_TXT
67
70
 
71
+ @property
68
72
  def email_conversation_length_in_days(self) -> int:
69
73
  return days_between(self.emails[0].timestamp, self.emails[-1].timestamp)
70
74
 
75
+ @property
71
76
  def earliest_email_at(self) -> datetime:
72
77
  return self.emails[0].timestamp
73
78
 
79
+ @property
74
80
  def earliest_email_date(self) -> date:
75
- return self.earliest_email_at().date()
81
+ return self.earliest_email_at.date()
76
82
 
83
+ @property
77
84
  def last_email_at(self) -> datetime:
78
85
  return self.emails[-1].timestamp
79
86
 
87
+ @property
80
88
  def last_email_date(self) -> date:
81
- return self.last_email_at().date()
89
+ return self.last_email_at.date()
82
90
 
91
+ @property
83
92
  def emails_by(self) -> list[Email]:
84
93
  return [e for e in self.emails if self.name == e.author]
85
94
 
95
+ @property
86
96
  def emails_to(self) -> list[Email]:
87
97
  return [
88
98
  e for e in self.emails
89
99
  if self.name in e.recipients or (self.name is None and len(e.recipients) == 0)
90
100
  ]
91
101
 
92
- def external_link(self, site: ExternalSite = EPSTEINIFY) -> str:
93
- return PERSON_LINK_BUILDERS[site](self.name_str())
94
-
95
- def external_link_txt(self, site: ExternalSite = EPSTEINIFY, link_str: str | None = None) -> Text:
96
- if self.name is None:
97
- return Text('')
98
-
99
- return link_text_obj(self.external_link(site), link_str or site, style=self.style())
100
-
102
+ @property
101
103
  def external_links_line(self) -> Text:
102
104
  links = [self.external_link_txt(site) for site in PERSON_LINK_BUILDERS]
103
105
  return Text('', justify='center', style='dim').append(join_texts(links, join=' / ')) #, encloser='()'))#, encloser='‹›'))
104
106
 
107
+ @property
105
108
  def has_any_epstein_emails(self) -> bool:
106
109
  contacts = [e.author for e in self.emails] + flatten([e.recipients for e in self.emails])
107
110
  return JEFFREY_EPSTEIN in contacts
108
111
 
112
+ @property
109
113
  def highlight_group(self) -> HighlightedNames | None:
110
114
  return get_highlight_group_for_name(self.name)
111
115
 
116
+ @property
112
117
  def info_panel(self) -> Padding:
113
- """Print a panel with the name of an emailer and a few tidbits of information about them."""
118
+ """Return a `Panel` with the name of an emailer and a few tidbits of information about them."""
114
119
  style = 'white' if (not self.style() or self.style() == DEFAULT) else self.style()
115
120
  panel_style = f"black on {style} bold"
116
121
 
@@ -118,22 +123,23 @@ class Person:
118
123
  email_count = len(self._printable_emails())
119
124
  title_suffix = f"sent by {JEFFREY_EPSTEIN} to himself"
120
125
  else:
121
- email_count = len(self.unique_emails())
122
- num_days = self.email_conversation_length_in_days()
123
- title_suffix = f"{TO_FROM} {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
126
+ email_count = len(self.unique_emails)
127
+ num_days = self.email_conversation_length_in_days
128
+ title_suffix = f"{TO_FROM} {self.name_str} starting {self.earliest_email_date} covering {num_days:,} days"
124
129
 
125
130
  title = f"Found {email_count} emails {title_suffix}"
126
- width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category()) + 8)
131
+ width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category) + 8)
127
132
  panel = Panel(Text(title, justify='center'), width=width, style=panel_style)
128
133
  elements: list[RenderableType] = [panel]
129
134
 
130
- if self.info_with_category():
131
- elements.append(Text(f"({self.info_with_category()})", justify='center', style=f"{style} italic"))
135
+ if self.info_with_category:
136
+ elements.append(Text(f"({self.info_with_category})", justify='center', style=f"{style} italic"))
132
137
 
133
138
  return Padding(Group(*elements), (2, 0, 1, 0))
134
139
 
140
+ @property
135
141
  def info_str(self) -> str | None:
136
- highlight_group = self.highlight_group()
142
+ highlight_group = self.highlight_group
137
143
 
138
144
  if highlight_group and isinstance(highlight_group, HighlightedNames) and self.name:
139
145
  info = highlight_group.info_for(self.name)
@@ -141,90 +147,161 @@ class Person:
141
147
  if info:
142
148
  return info
143
149
 
144
- if self.is_uninteresting_cc:
145
- if self.has_any_epstein_emails():
150
+ if self.is_uninteresting and len(self.emails_by) == 0:
151
+ if self.has_any_epstein_emails:
146
152
  return UNINTERESTING_CC_INFO
147
153
  else:
148
154
  return UNINTERESTING_CC_INFO_NO_CONTACT
149
155
 
150
- def info_with_category(self) -> str:
151
- return ', '.join(without_falsey([self.category(), self.info_str()]))
152
-
156
+ @property
153
157
  def info_txt(self) -> Text | None:
154
158
  if self.name == JEFFREY_EPSTEIN:
155
159
  return Text('(emails sent by Epstein to himself are here)', style=ALT_INFO_STYLE)
156
160
  elif self.name is None:
157
161
  return Text('(emails whose author or recipient could not be determined)', style=ALT_INFO_STYLE)
158
- elif self.category() == JUNK:
162
+ elif self.category == JUNK:
159
163
  return Text(f"({JUNK} mail)", style='bright_black dim')
160
- elif self.is_uninteresting_cc and (self.info_str() or '').startswith(UNINTERESTING_CC_INFO):
161
- if self.info_str() == UNINTERESTING_CC_INFO:
162
- return Text(f"({self.info_str()})", style='wheat4 dim')
164
+ elif self.is_uninteresting and (self.info_str or '').startswith(UNINTERESTING_CC_INFO):
165
+ if self.sole_cc:
166
+ return Text(f"(cc: from {self.sole_cc} only)", style='wheat4 dim')
167
+ elif self.info_str == UNINTERESTING_CC_INFO:
168
+ return Text(f"({self.info_str})", style='wheat4 dim')
163
169
  else:
164
- return Text(f"({self.info_str()})", style='plum4 dim')
165
- elif self.is_a_mystery():
170
+ return Text(f"({self.info_str})", style='plum4 dim')
171
+ elif self.is_a_mystery:
166
172
  return Text(QUESTION_MARKS, style='honeydew2 bold')
167
- elif self.info_str() is None:
173
+ elif self.info_str is None:
168
174
  if self.name in MAILING_LISTS:
169
175
  return Text('(mailing list)', style=f"pale_turquoise4 dim")
170
- elif self.category():
176
+ elif self.category:
171
177
  return Text(QUESTION_MARKS, style=self.style())
172
178
  else:
173
179
  return None
174
180
  else:
175
- return Text(self.info_str(), style=self.style())
181
+ return Text(self.info_str, style=self.style(allow_bold=False))
182
+
183
+ @property
184
+ def info_with_category(self) -> str:
185
+ return ', '.join(without_falsey([self.category, self.info_str]))
176
186
 
187
+ @property
177
188
  def internal_link(self) -> Text:
178
189
  """Kind of like an anchor link to the section of the page containing these emails."""
179
- return link_text_obj(internal_link_to_emails(self.name_str()), self.name_str(), style=self.style())
190
+ return link_text_obj(internal_link_to_emails(self.name_str), self.name_str, style=self.style())
180
191
 
192
+ @property
181
193
  def is_a_mystery(self) -> bool:
182
194
  """Return True if this is someone we theroetically could know more about."""
183
- return self.is_unstyled() and not (self.is_email_address() or self.info_str() or self.is_uninteresting_cc)
195
+ return self.is_unstyled and not (self.is_email_address or self.info_str or self.is_uninteresting)
184
196
 
197
+ @property
185
198
  def is_email_address(self) -> bool:
186
199
  return '@' in (self.name or '')
187
200
 
201
+ @property
188
202
  def is_linkable(self) -> bool:
189
203
  """Return True if it's likely that EpsteinWeb has a page for this name."""
190
204
  if self.name is None or ' ' not in self.name:
191
205
  return False
192
- elif self.is_email_address() or '/' in self.name or QUESTION_MARKS in self.name:
206
+ elif self.is_email_address or '/' in self.name or QUESTION_MARKS in self.name:
193
207
  return False
194
208
  elif self.name in INVALID_FOR_EPSTEIN_WEB:
195
209
  return False
196
210
 
197
211
  return True
198
212
 
213
+ @property
199
214
  def is_unstyled(self) -> bool:
200
215
  """True if there's no highlight group for this name."""
201
216
  return self.style() == DEFAULT_NAME_STYLE
202
217
 
203
- def name_str(self) -> str:
204
- return self.name or UNKNOWN
205
-
218
+ @property
206
219
  def name_link(self) -> Text:
207
220
  """Will only link if it's worth linking, otherwise just a Text object."""
208
- if not self.is_linkable():
209
- return self.name_txt()
221
+ if not self.is_linkable:
222
+ return self.name_txt
210
223
  else:
211
- return Text.from_markup(link_markup(self.external_link(), self.name_str(), self.style()))
224
+ return Text.from_markup(link_markup(self.external_link(), self.name_str, self.style()))
225
+
226
+ @property
227
+ def name_str(self) -> str:
228
+ return self.name or UNKNOWN
212
229
 
230
+ @property
213
231
  def name_txt(self) -> Text:
214
232
  return styled_name(self.name)
215
233
 
234
+ @property # TODO: unused?
235
+ def should_always_truncate(self) -> bool:
236
+ """True if we want to truncate all emails to/from this user."""
237
+ return self.name in TRUNCATE_EMAILS_FROM or self.is_uninteresting
238
+
239
+ @property
240
+ def sole_cc(self) -> str | None:
241
+ """Return name if this person sent 0 emails and received CC from only one that name."""
242
+ email_authors = uniquify([e.author for e in self.emails_to])
243
+
244
+ if len(self.unique_emails) == 1 and len(email_authors) > 0:
245
+ logger.info(f"sole author of email to '{self.name}' is '{email_authors[0]}'")
246
+ else:
247
+ logger.info(f"'{self.name}' email_authors '{email_authors[0]}'")
248
+
249
+ if len(self.unique_emails_by) > 0:
250
+ return None
251
+
252
+ if len(email_authors) == 1:
253
+ return email_authors[0]
254
+
255
+ @property
256
+ def sort_key(self) -> list[int | str]:
257
+ """Key used to sort `Person` objects by the number of emails sent/received."""
258
+ counts = [
259
+ len(self.unique_emails),
260
+ -1 * int((self.info_str or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
261
+ -1 * int((self.info_str or '') == UNINTERESTING_CC_INFO),
262
+ int(self.has_any_epstein_emails),
263
+ ]
264
+
265
+ counts = [-1 * count for count in counts]
266
+
267
+ if args.sort_alphabetical:
268
+ return [self.name_str] + counts
269
+ else:
270
+ return counts + [self.name_str]
271
+
272
+ @property
273
+ def unique_emails(self) -> Sequence[Email]:
274
+ return Document.without_dupes(self.emails)
275
+
276
+ @property
277
+ def unique_emails_by(self) -> list[Email]:
278
+ return Document.without_dupes(self.emails_by)
279
+
280
+ @property
281
+ def unique_emails_to(self) -> list[Email]:
282
+ return Document.without_dupes(self.emails_to)
283
+
284
+ def external_link(self, site: ExternalSite = EPSTEINIFY) -> str:
285
+ return PERSON_LINK_BUILDERS[site](self.name_str)
286
+
287
+ def external_link_txt(self, site: ExternalSite = EPSTEINIFY, link_str: str | None = None) -> Text:
288
+ if self.name is None:
289
+ return Text('')
290
+
291
+ return link_text_obj(self.external_link(site), link_str or site, style=self.style())
292
+
216
293
  def print_emails(self) -> list[Email]:
217
294
  """Print complete emails to or from a particular 'author'. Returns the Emails that were printed."""
218
- print_centered(self.info_panel())
295
+ print_centered(self.info_panel)
219
296
  self.print_emails_table()
220
297
  last_printed_email_was_duplicate = False
221
298
 
222
- if self.category() == JUNK:
299
+ if self.category == JUNK:
223
300
  logger.warning(f"Not printing junk emailer '{self.name}'")
224
301
  else:
225
302
  for email in self._printable_emails():
226
- if email.is_duplicate():
227
- console.print(Padding(email.duplicate_file_txt().append('...'), (0, 0, 0, 4)))
303
+ if email.is_duplicate:
304
+ console.print(Padding(email.duplicate_file_txt.append('...'), SKIPPED_FILE_MSG_PADDING))
228
305
  last_printed_email_was_duplicate = True
229
306
  else:
230
307
  if last_printed_email_was_duplicate:
@@ -239,42 +316,18 @@ class Person:
239
316
  table = Email.build_emails_table(self._unique_printable_emails(), self.name)
240
317
  print_centered(Padding(table, (0, 5, 0, 5)))
241
318
 
242
- if self.is_linkable():
243
- print_centered(self.external_links_line())
319
+ if self.is_linkable:
320
+ print_centered(self.external_links_line)
244
321
 
245
322
  console.line()
246
323
 
247
- def sort_key(self) -> list[int | str]:
248
- counts = [
249
- len(self.unique_emails()),
250
- -1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
251
- -1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO),
252
- int(self.has_any_epstein_emails()),
253
- ]
254
-
255
- counts = [-1 * count for count in counts]
256
-
257
- if args.sort_alphabetical:
258
- return [self.name_str()] + counts
259
- else:
260
- return counts + [self.name_str()]
261
-
262
- def style(self) -> str:
263
- return get_style_for_name(self.name)
264
-
265
- def unique_emails(self) -> Sequence[Email]:
266
- return Document.without_dupes(self.emails)
267
-
268
- def unique_emails_by(self) -> list[Email]:
269
- return Document.without_dupes(self.emails_by())
270
-
271
- def unique_emails_to(self) -> list[Email]:
272
- return Document.without_dupes(self.emails_to())
324
+ def style(self, allow_bold: bool = True) -> str:
325
+ return get_style_for_name(self.name, allow_bold=allow_bold)
273
326
 
274
327
  def _printable_emails(self):
275
328
  """For Epstein we only want to print emails he sent to himself."""
276
329
  if self.name == JEFFREY_EPSTEIN:
277
- return [e for e in self.emails if e.is_note_to_self()]
330
+ return [e for e in self.emails if e.is_note_to_self]
278
331
  else:
279
332
  return self.emails
280
333
 
@@ -282,7 +335,7 @@ class Person:
282
335
  return Document.without_dupes(self._printable_emails())
283
336
 
284
337
  def __str__(self):
285
- return f"{self.name_str()}"
338
+ return f"{self.name_str}"
286
339
 
287
340
  @staticmethod
288
341
  def emailer_info_table(people: list['Person'], highlighted: list['Person'] | None = None, show_epstein_total: bool = False) -> Table:
@@ -290,8 +343,8 @@ class Person:
290
343
  highlighted = highlighted or people
291
344
  highlighted_names = [p.name for p in highlighted]
292
345
  is_selection = len(people) != len(highlighted) or args.emailers_info
293
- all_emails = Document.uniquify(flatten([list(p.unique_emails()) for p in people]))
294
- email_authors = [p for p in people if p.emails_by() and p.name]
346
+ all_emails = Person.emails_from_people(people)
347
+ email_authors = [p for p in people if p.emails_by and p.name]
295
348
  attributed_emails = [email for email in all_emails if email.author]
296
349
  footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}" \
297
350
  f" out of {len(all_emails):,} emails, {len(all_emails) - len(attributed_emails)} still unknown)"
@@ -316,7 +369,7 @@ class Person:
316
369
  grey_idx = 0
317
370
 
318
371
  for person in people:
319
- earliest_email_date = person.earliest_email_date()
372
+ earliest_email_date = person.earliest_email_date
320
373
  is_on_page = False if show_epstein_total else person.name in highlighted_names
321
374
  year_months = (earliest_email_date.year * 12) + earliest_email_date.month
322
375
 
@@ -331,14 +384,18 @@ class Person:
331
384
 
332
385
  table.add_row(
333
386
  Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[0 if is_selection else grey_idx]}"),
334
- person.internal_link() if is_on_page and not person.is_uninteresting_cc else person.name_txt(),
335
- person.category_txt(),
336
- f"{len(person.unique_emails() if show_epstein_total else person._unique_printable_emails())}",
337
- Text(f"{len(person.unique_emails_by())}", style='dim' if len(person.unique_emails_by()) == 0 else ''),
338
- Text(f"{len(person.unique_emails_to())}", style='dim' if len(person.unique_emails_to()) == 0 else ''),
339
- f"{person.email_conversation_length_in_days()}",
340
- person.info_txt() or '',
387
+ person.internal_link if is_on_page and not person.is_uninteresting else person.name_txt,
388
+ person.category_txt,
389
+ f"{len(person.unique_emails if show_epstein_total else person._unique_printable_emails())}",
390
+ str(len(person.unique_emails_by)) if len(person.unique_emails_by) > 0 else '',
391
+ str(len(person.unique_emails_to)) if len(person.unique_emails_to) > 0 else '',
392
+ f"{person.email_conversation_length_in_days}",
393
+ person.info_txt or '',
341
394
  style='' if show_epstein_total or is_on_page else 'dim',
342
395
  )
343
396
 
344
397
  return table
398
+
399
+ @staticmethod
400
+ def emails_from_people(people: list['Person']) -> Sequence[Email]:
401
+ return Document.uniquify(flatten([list(p.unique_emails) for p in people]))
@@ -177,6 +177,16 @@ ZUBAIR_KHAN = 'Zubair Khan'
177
177
 
178
178
  UNKNOWN = '(unknown)'
179
179
 
180
+ # DOJ files emails
181
+ ALISON_J_NATHAN = 'Alison J. Nathan'
182
+ AMIR_TAAKI = 'Amir Taaki'
183
+ BROCK_PIERCE = 'Brock Pierce'
184
+ CHRISTIAN_EVERDELL = 'Christian Everdell'
185
+ CHRISTOPHER_DILORIO = 'Christopher Dilorio'
186
+ DOUGLAS_WIGDOR = 'Douglas Wigdor'
187
+ KARYNA_SHULIAK = 'Karyna Shuliak'
188
+ STACEY_RICHMAN = 'Stacey Richman'
189
+
180
190
  # No communications but name is in the files
181
191
  BILL_GATES = 'Bill Gates'
182
192
  DONALD_TRUMP = 'Donald Trump'
@@ -216,25 +226,25 @@ UBS = 'UBS'
216
226
 
217
227
  # First and last names that should be made part of a highlighting regex for emailers
218
228
  NAMES_TO_NOT_HIGHLIGHT = """
219
- al alain alan alfredo allen alex alexander amanda andres andrew anthony
229
+ al alain alan alison alfredo allen alex alexander amanda andres andrew anthony
220
230
  bard barrett barry bennet bernard bill black bob boris brad brenner bruce
221
- caroline carolyn chris christina cohen
222
- dan daniel danny darren dave david debbie donald
231
+ cameron caroline carolyn chris christian christina cohen
232
+ dan daniel danny darren dave david debbie donald douglas
223
233
  ed edward edwards enforcement enterprise enterprises entourage epstein eric erika etienne
224
234
  faith fisher forget fred friendly frost fuller
225
235
  gates gerald george gold gordon
226
- haddad harry hay heather henry hill hoffman howard
236
+ haddad hanson harry hay heather henry hill hoffman howard
227
237
  ian ivan
228
238
  jack james jay jean jeff jeffrey jennifer jeremy jessica joel john jon jonathan joseph jr
229
- kahn karl kate katherine kelly ken kevin krassner
230
- larry laurie lawrence leon lesley linda link lisa
239
+ kafka kahn karl kate katherine kelly ken kevin krassner
240
+ larry larsen laurie lawrence leon lesley linda link lisa
231
241
  mann marc marie mark martin matthew melanie michael mike miller mitchell miles morris moskowitz
232
- nancy neal new nicole norman
242
+ nancy nathan neal new nicole norman
233
243
  owen
234
- paul paula pen peter philip prince
244
+ paul paula pen peter philip pierce prince
235
245
  randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubenstein rubin
236
- scott sean skip stanley stern stephen steve steven stone susan
237
- the thomas tim tom tony tyler
246
+ scott sean skip smith stacey stanley stern stephen steve steven stone susan
247
+ terry the thomas tim tom tony tyler
238
248
  victor
239
249
  wade waters
240
250
  y
@@ -269,6 +279,10 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
269
279
  def constantize_name(name: str) -> str:
270
280
  if name == 'Andrzej Duda or entourage':
271
281
  return 'ANDRZEJ_DUDA'
282
+ elif name == MIROSLAV_LAJCAK:
283
+ return 'MIROSLAV_LAJCAK'
284
+ elif name == 'Paula Heil Fisher (???)':
285
+ return 'PAULA'
272
286
 
273
287
  variable_name = remove_question_marks(name)
274
288
  variable_name = variable_name.removesuffix('.').removesuffix('Jr').replace('ź', 'z').replace('ø', 'o').strip()
@@ -300,7 +314,20 @@ def extract_last_name(name: str) -> str:
300
314
  return first_last_names[-1]
301
315
 
302
316
 
317
+ def reverse_first_and_last_names(name: str) -> str:
318
+ """If there's a comma in the name in the style 'Lastname, Firstname', reverse it and remove comma."""
319
+ if '@' in name:
320
+ return name.lower()
321
+
322
+ if ', ' in name:
323
+ names = name.split(', ')
324
+ return f"{names[1]} {names[0]}"
325
+ else:
326
+ return name
327
+
328
+
303
329
  def reversed_name(name: str) -> str:
330
+ """'Jeffrey Epstein' becomes 'Epstein Jeffrey'."""
304
331
  if ' ' not in name:
305
332
  return name
306
333
 
@@ -13,6 +13,7 @@ JSON_METADATA_PATH = HTML_DIR.joinpath(f'file_metadata_{EPSTEIN_FILES_NOV_2025}.
13
13
  TEXT_MSGS_HTML_PATH = HTML_DIR.joinpath('index.html')
14
14
  WORD_COUNT_HTML_PATH = HTML_DIR.joinpath(f'communication_word_count_{EPSTEIN_FILES_NOV_2025}.html')
15
15
  # EPSTEIN_WORD_COUNT_HTML_PATH = HTML_DIR.joinpath('epstein_texts_and_emails_word_count.html')
16
+ DOJ_2026_HTML_PATH = HTML_DIR.joinpath('doj_2026-01-30_files.html')
16
17
  URLS_ENV = '.urls.env'
17
18
  EMAILERS_TABLE_PNG_PATH = HTML_DIR.joinpath('emailers_info_table.png')
18
19
 
@@ -26,6 +27,7 @@ CHRONOLOGICAL_EMAILS_URL = f"{TEXT_MSGS_URL}/{CHRONOLOGICAL_EMAILS_PATH.name}"
26
27
  JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
27
28
  JSON_METADATA_URL = f"{TEXT_MSGS_URL}/{JSON_METADATA_PATH.name}"
28
29
  WORD_COUNT_URL = f"{TEXT_MSGS_URL}/{WORD_COUNT_HTML_PATH.name}"
30
+ DOJ_2026_URL = f"{TEXT_MSGS_URL}/{DOJ_2026_HTML_PATH.name}"
29
31
 
30
32
  SITE_URLS: dict[SiteType, str] = {
31
33
  EMAIL: ALL_EMAILS_URL,
@@ -57,29 +57,36 @@ TIMESTAMP_DIM = f"turquoise4 dim"
57
57
  # Misc
58
58
  AUTHOR = 'author'
59
59
  DEFAULT = 'default'
60
+ EFTA_PREFIX = 'EFTA'
60
61
  HOUSE_OVERSIGHT_PREFIX = 'HOUSE_OVERSIGHT_'
61
62
  JSON = 'json'
62
63
  NA = 'n/a'
63
64
  REDACTED = '<REDACTED>'
64
65
  QUESTION_MARKS = '(???)'
65
66
 
66
- # Regexes
67
- ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
68
- FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({ID_REGEX.pattern})")
69
- FILE_NAME_REGEX = re.compile(fr"{FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
70
- QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
71
-
72
67
  # Document subclass names (this sucks)
73
68
  DOCUMENT_CLASS = 'Document'
69
+ DOJ_FILE_CLASS = 'DojFile'
74
70
  EMAIL_CLASS = 'Email'
75
71
  JSON_FILE_CLASS = 'JsonFile'
76
72
  MESSENGER_LOG_CLASS = 'MessengerLog'
77
73
  OTHER_FILE_CLASS = 'OtherFile'
78
74
 
75
+ # Regexes
76
+ DOJ_FILE_STEM_REGEX = re.compile(fr"{EFTA_PREFIX}\d{{8}}")
77
+ DOJ_FILE_NAME_REGEX = re.compile(fr"{DOJ_FILE_STEM_REGEX.pattern}(\.txt)?")
78
+
79
+ HOUSE_OVERSIGHT_NOV_2025_ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
80
+ HOUSE_OVERSIGHT_NOV_2025_FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({HOUSE_OVERSIGHT_NOV_2025_ID_REGEX.pattern})")
81
+ HOUSE_OVERSIGHT_NOV_2025_FILE_NAME_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_NOV_2025_FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
82
+
83
+ QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
84
+
79
85
 
80
86
  remove_question_marks = lambda name: QUESTION_MARKS_REGEX.sub('', name).strip()
81
87
 
82
88
 
83
- def indented(s: str, spaces: int = 4) -> str:
89
+ def indented(s: str, spaces: int = 4, prefix: str = '') -> str:
84
90
  indent = ' ' * spaces
91
+ indent += prefix
85
92
  return indent + f"\n{indent}".join(s.split('\n'))
@@ -12,6 +12,7 @@ from epstein_files.util.file_helper import coerce_file_stem
12
12
 
13
13
  # Style stuff
14
14
  ARCHIVE_LINK_COLOR = 'slate_blue3'
15
+ ARCHIVE_ALT_LINK_STYLE = 'medium_purple4 italic'
15
16
  TEXT_LINK = 'text_link'
16
17
 
17
18
  # External site names
@@ -39,6 +40,9 @@ EPSTEIN_DOCS_URL = 'https://epstein-docs.github.io'
39
40
  OVERSIGHT_REPUBLICANS_PRESSER_URL = 'https://oversight.house.gov/release/oversight-committee-releases-additional-epstein-estate-documents/'
40
41
  RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL = 'https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_'
41
42
  SUBSTACK_URL = 'https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great'
43
+ # DOJ docs
44
+ DOJ_2026_URL = 'https://www.justice.gov/epstein/doj-disclosures'
45
+ DOJ_SEARCH_URL = 'https://www.justice.gov/epstein/search'
42
46
 
43
47
  # Document source sites
44
48
  EPSTEINIFY_URL = 'https://epsteinify.com'
@@ -53,6 +57,9 @@ DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
53
57
  ROLLCALL: f'https://rollcall.com/factbase/epstein/file?id=',
54
58
  }
55
59
 
60
+ # Example: https://www.justice.gov/epstein/files/DataSet%208/EFTA00009802.pdf
61
+ DOJ_2026_FILE_BASE_URL = "https://www.justice.gov/epstein/files/DataSet%20"
62
+
56
63
 
57
64
  epsteinify_api_url = lambda file_stem: f"{EPSTEINIFY_URL}/api/documents/{file_stem}"
58
65
  epsteinify_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEINIFY, filename_or_id, style)
@@ -90,6 +97,16 @@ def build_doc_url(base_url: str, filename_or_id: int | str, case: Literal['lower
90
97
  return f"{base_url}{file_stem}"
91
98
 
92
99
 
100
+ def doj_2026_file_url(dataset_id: int, file_stem: str) -> str:
101
+ """Link to justice.gov for a DOJ file."""
102
+ return f"{DOJ_2026_FILE_BASE_URL}{dataset_id}/{file_stem}.pdf"
103
+
104
+
105
+ def jmail_doj_2026_file_url(dataset_id: int, file_stem: str) -> str:
106
+ """Link to Jmail backup of DOJ file."""
107
+ return f"{JMAIL_URL}/drive/vol{dataset_id:05}-{file_stem.lower()}-pdf"
108
+
109
+
93
110
  def external_doc_link_markup(site: ExternalSite, filename_or_id: int | str, style: str = TEXT_LINK) -> str:
94
111
  url = build_doc_url(DOC_LINK_BASE_URLS[site], filename_or_id)
95
112
  return link_markup(url, coerce_file_stem(filename_or_id), style)