epstein-files 1.2.0__py3-none-any.whl → 1.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ from datetime import datetime
4
4
 
5
5
  from rich.text import Text
6
6
 
7
- from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN, Name, extract_last_name
7
+ from epstein_files.util.constant.names import ANTHONY_SCARAMUCCI, JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN, Name, extract_last_name
8
8
  from epstein_files.util.constant.strings import TIMESTAMP_DIM
9
9
  from epstein_files.util.data import iso_timestamp
10
10
  from epstein_files.util.highlighted_group import get_style_for_name
@@ -17,6 +17,7 @@ PHONE_NUMBER_REGEX = re.compile(r'^[\d+]+.*')
17
17
  UNCERTAIN_SUFFIX = ' (?)'
18
18
 
19
19
  DISPLAY_LAST_NAME_ONLY = [
20
+ ANTHONY_SCARAMUCCI,
20
21
  JEFFREY_EPSTEIN,
21
22
  STEVE_BANNON,
22
23
  ]
@@ -59,7 +60,7 @@ class TextMessage:
59
60
  try:
60
61
  timestamp_str = iso_timestamp(self.parse_timestamp())
61
62
  except Exception as e:
62
- logger.warning(f"Failed to parse timestamp for {self}")
63
+ logger.info(f"Failed to parse timestamp for {self}")
63
64
  timestamp_str = self.timestamp_str
64
65
 
65
66
  return Text(f"[{timestamp_str}]", style=TIMESTAMP_DIM)
@@ -22,7 +22,7 @@ from epstein_files.util.data import days_between, escape_single_quotes, remove_t
22
22
  from epstein_files.util.file_helper import FILENAME_LENGTH, file_size_to_str
23
23
  from epstein_files.util.env import args
24
24
  from epstein_files.util.highlighted_group import QUESTION_MARKS_TXT, styled_category
25
- from epstein_files.util.rich import build_table, highlighter
25
+ from epstein_files.util.rich import add_cols_to_table, build_table, highlighter
26
26
  from epstein_files.util.logging import logger
27
27
 
28
28
  FIRST_FEW_LINES = 'First Few Lines'
@@ -209,39 +209,8 @@ class OtherFile(Document):
209
209
  if num_days_spanned > MAX_DAYS_SPANNED_TO_BE_VALID and VAST_HOUSE not in self.text:
210
210
  self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
211
211
 
212
- @staticmethod
213
- def count_by_category_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
214
- counts = defaultdict(int)
215
- category_bytes = defaultdict(int)
216
-
217
- for file in files:
218
- if file.category() is None:
219
- logger.warning(f"file {file.file_id} has no category")
220
-
221
- counts[file.category()] += 1
222
- category_bytes[file.category()] += file.file_size()
223
-
224
- table = build_table(f'{title_pfx}Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
225
- table.columns[-1].justify = 'right'
226
- table.columns[0].min_width = 14
227
- table.columns[-1].style = 'dim'
228
-
229
- for (category, count) in sort_dict(counts):
230
- category_files = [f for f in files if f.category() == category]
231
- known_author_count = Document.known_author_count(category_files)
232
-
233
- table.add_row(
234
- styled_category(category),
235
- str(count),
236
- str(known_author_count),
237
- str(count - known_author_count),
238
- file_size_to_str(category_bytes[category]),
239
- )
240
-
241
- return table
242
-
243
- @staticmethod
244
- def files_preview_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
212
+ @classmethod
213
+ def files_preview_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
245
214
  """Build a table of OtherFile documents."""
246
215
  table = build_table(f'{title_pfx}Other Files Details in Chronological Order', show_lines=True)
247
216
  table.add_column('File', justify='center', width=FILENAME_LENGTH)
@@ -272,3 +241,16 @@ class OtherFile(Document):
272
241
  )
273
242
 
274
243
  return table
244
+
245
+ @classmethod
246
+ def summary_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
247
+ categories = uniquify([f.category() for f in files])
248
+ categories = sorted(categories, key=lambda c: -len([f for f in files if f.category() == c]))
249
+ table = cls.file_info_table(f'{title_pfx}Other Files Summary', 'Category')
250
+
251
+ for category in categories:
252
+ category_files = [f for f in files if f.category() == category]
253
+ table.add_row(styled_category(category), *cls.files_info_row(category_files))
254
+
255
+ table.columns = table.columns[:-2] + [table.columns[-1]] # Removee unknown author col
256
+ return table
@@ -9,6 +9,8 @@ from datetime import datetime
9
9
  from pathlib import Path
10
10
  from typing import Sequence, Type, cast
11
11
 
12
+ from rich.table import Table
13
+
12
14
  from epstein_files.documents.document import Document
13
15
  from epstein_files.documents.email import DETECT_EMAIL_REGEX, Email
14
16
  from epstein_files.documents.json_file import JsonFile
@@ -22,7 +24,6 @@ from epstein_files.util.doc_cfg import EmailCfg, Metadata
22
24
  from epstein_files.util.env import DOCS_DIR, args, logger
23
25
  from epstein_files.util.file_helper import file_size_str
24
26
  from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames
25
- from epstein_files.util.rich import NA_TXT, add_cols_to_table, build_table, console, print_centered
26
27
  from epstein_files.util.search_result import SearchResult
27
28
  from epstein_files.util.timer import Timer
28
29
 
@@ -31,9 +32,13 @@ PICKLED_PATH = Path("the_epstein_files.pkl.gz")
31
32
  SLOW_FILE_SECONDS = 1.0
32
33
 
33
34
  EMAILS_WITH_UNINTERESTING_CCS = [
34
- '025329', # Krassner
35
- '024923', # Krassner
36
- '033568', # Krassner
35
+ '025329', # Krassner
36
+ '024923', # Krassner
37
+ '033568', # Krassner
38
+ ]
39
+
40
+ EMAILS_WITH_UNINTERESTING_BCCS = [
41
+ '014797_1', # Ross Gow
37
42
  ]
38
43
 
39
44
 
@@ -45,7 +50,7 @@ class EpsteinFiles:
45
50
  json_files: list[JsonFile] = field(default_factory=list)
46
51
  other_files: list[OtherFile] = field(default_factory=list)
47
52
  timer: Timer = field(default_factory=lambda: Timer())
48
- uninteresting_ccs: list[Name] = field(init=False)
53
+ uninteresting_ccs: list[Name] = field(default_factory=list)
49
54
 
50
55
  def __post_init__(self):
51
56
  """Iterate through files and build appropriate objects."""
@@ -88,13 +93,12 @@ class EpsteinFiles:
88
93
  if PICKLED_PATH.exists() and not args.overwrite_pickle and not args.skip_other_files:
89
94
  with gzip.open(PICKLED_PATH, 'rb') as file:
90
95
  epstein_files = pickle.load(file)
91
- epstein_files.timer = timer
92
96
  timer_msg = f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}'"
93
- epstein_files.timer.print_at_checkpoint(f"{timer_msg} ({file_size_str(PICKLED_PATH)})")
97
+ timer.print_at_checkpoint(f"{timer_msg} ({file_size_str(PICKLED_PATH)})")
94
98
  return epstein_files
95
99
 
96
100
  logger.warning(f"Building new cache file, this will take a few minutes...")
97
- epstein_files = EpsteinFiles(timer=timer)
101
+ epstein_files = EpsteinFiles()
98
102
 
99
103
  if args.skip_other_files:
100
104
  logger.warning(f"Not writing pickled data because --skip-other-files")
@@ -235,7 +239,7 @@ class EpsteinFiles:
235
239
  return json.dumps(metadata, indent=4, sort_keys=True)
236
240
 
237
241
  def non_duplicate_emails(self) -> list[Email]:
238
- return [email for email in self.emails if not email.is_duplicate()]
242
+ return Document.without_dupes(self.emails)
239
243
 
240
244
  def non_json_other_files(self) -> list[OtherFile]:
241
245
  return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
@@ -253,34 +257,20 @@ class EpsteinFiles:
253
257
  for name in names
254
258
  ]
255
259
 
256
- def print_files_summary(self) -> None:
257
- table = build_table('File Overview')
258
- add_cols_to_table(table, ['File Type', 'Count', 'Author Known', 'Author Unknown', 'Duplicates'])
259
- table.columns[1].justify = 'right'
260
-
261
- def add_row(label: str, docs: list):
262
- known = None if isinstance(docs[0], JsonFile) else Document.known_author_count(docs)
263
-
264
- table.add_row(
265
- label,
266
- f"{len(docs):,}",
267
- f"{known:,}" if known is not None else NA_TXT,
268
- f"{len(docs) - known:,}" if known is not None else NA_TXT,
269
- f"{len([d for d in docs if d.is_duplicate()])}",
270
- )
271
-
272
- add_row('Emails', self.emails)
273
- add_row('iMessage Logs', self.imessage_logs)
274
- add_row('JSON Data', self.json_files)
275
- add_row('Other', self.non_json_other_files())
276
- print_centered(table)
277
- console.line()
260
+ def overview_table(self) -> Table:
261
+ table = Document.file_info_table('Files Overview', 'File Type')
262
+ table.add_row('Emails', *Document.files_info_row(self.emails))
263
+ table.add_row('iMessage Logs', *Document.files_info_row(self.imessage_logs))
264
+ table.add_row('JSON Data', *Document.files_info_row(self.json_files, True))
265
+ table.add_row('Other', *Document.files_info_row(self.non_json_other_files()))
266
+ return table
278
267
 
279
268
  def unknown_recipient_ids(self) -> list[str]:
280
269
  """IDs of emails whose recipient is not known."""
281
270
  return sorted([e.file_id for e in self.emails if None in e.recipients or not e.recipients])
282
271
 
283
272
  def uninteresting_emailers(self) -> list[Name]:
273
+ """Emailers whom we don't want to print a separate section for because they're just CCed."""
284
274
  if '_uninteresting_emailers' not in vars(self):
285
275
  self._uninteresting_emailers = sorted(uniquify(UNINTERESTING_EMAILERS + self.uninteresting_ccs))
286
276
 
@@ -306,8 +296,8 @@ class EpsteinFiles:
306
296
  self.emails = Document.sort_by_timestamp(self.emails)
307
297
 
308
298
  def _set_uninteresting_ccs(self) -> None:
309
- ross_gow_email = self.email_for_id('014797_1')
310
- self.uninteresting_ccs = copy(cast(list[Name], ross_gow_email.header.bcc))
299
+ for id in EMAILS_WITH_UNINTERESTING_BCCS:
300
+ self.uninteresting_ccs += [bcc.lower() for bcc in cast(list[str], self.email_for_id(id).header.bcc)]
311
301
 
312
302
  for id in EMAILS_WITH_UNINTERESTING_CCS:
313
303
  self.uninteresting_ccs += self.email_for_id(id).recipients
@@ -344,5 +334,4 @@ def document_cls(doc: Document) -> Type[Document]:
344
334
 
345
335
 
346
336
  def _sorted_metadata(docs: Sequence[Document]) -> list[Metadata]:
347
- docs_sorted_by_id = sorted(docs, key=lambda d: d.file_id)
348
- return [json_safe(d.metadata()) for d in docs_sorted_by_id]
337
+ return [json_safe(d.metadata()) for d in Document.sort_by_id(docs)]
epstein_files/person.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from dataclasses import dataclass, field
2
2
  from datetime import datetime, date
3
+ from typing import Sequence
3
4
 
4
5
  from rich.console import Group, RenderableType
5
6
  from rich.padding import Padding
@@ -18,13 +19,14 @@ from epstein_files.util.data import days_between, flatten, without_falsey
18
19
  from epstein_files.util.env import args
19
20
  from epstein_files.util.highlighted_group import (QUESTION_MARKS_TXT, HighlightedNames,
20
21
  get_highlight_group_for_name, get_style_for_name, styled_category, styled_name)
21
- from epstein_files.util.rich import GREY_NUMBERS, LAST_TIMESTAMP_STYLE, TABLE_TITLE_STYLE, build_table, console, join_texts, print_centered
22
+ from epstein_files.util.rich import GREY_NUMBERS, TABLE_TITLE_STYLE, build_table, console, join_texts, print_centered
22
23
 
23
24
  ALT_INFO_STYLE = 'medium_purple4'
24
25
  CC = 'cc:'
25
26
  MIN_AUTHOR_PANEL_WIDTH = 80
26
27
  EMAILER_INFO_TITLE = 'Email Conversations Will Appear'
27
- UNINTERESTING_CC_INFO = "CC: or BCC: recipient only"
28
+ UNINTERESTING_CC_INFO = "cc: or bcc: recipient only"
29
+ UNINTERESTING_CC_INFO_NO_CONTACT = f"{UNINTERESTING_CC_INFO}, no direct contact with Epstein"
28
30
 
29
31
  INVALID_FOR_EPSTEIN_WEB = JUNK_EMAILERS + MAILING_LISTS + [
30
32
  'ACT for America',
@@ -100,6 +102,10 @@ class Person:
100
102
  links = [self.external_link_txt(site) for site in PERSON_LINK_BUILDERS]
101
103
  return Text('', justify='center', style='dim').append(join_texts(links, join=' / ')) #, encloser='()'))#, encloser='‹›'))
102
104
 
105
+ def has_any_epstein_emails(self) -> bool:
106
+ contacts = [e.author for e in self.emails] + flatten([e.recipients for e in self.emails])
107
+ return JEFFREY_EPSTEIN in contacts
108
+
103
109
  def highlight_group(self) -> HighlightedNames | None:
104
110
  return get_highlight_group_for_name(self.name)
105
111
 
@@ -114,7 +120,7 @@ class Person:
114
120
  else:
115
121
  email_count = len(self.unique_emails())
116
122
  num_days = self.email_conversation_length_in_days()
117
- title_suffix = f"to/from {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
123
+ title_suffix = f"{TO_FROM} {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
118
124
 
119
125
  title = f"Found {email_count} emails {title_suffix}"
120
126
  width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category()) + 8)
@@ -130,9 +136,16 @@ class Person:
130
136
  highlight_group = self.highlight_group()
131
137
 
132
138
  if highlight_group and isinstance(highlight_group, HighlightedNames) and self.name:
133
- return highlight_group.info_for(self.name)
134
- elif self.is_uninteresting_cc:
135
- return UNINTERESTING_CC_INFO
139
+ info = highlight_group.info_for(self.name)
140
+
141
+ if info:
142
+ return info
143
+
144
+ if self.is_uninteresting_cc:
145
+ if self.has_any_epstein_emails():
146
+ return UNINTERESTING_CC_INFO
147
+ else:
148
+ return UNINTERESTING_CC_INFO_NO_CONTACT
136
149
 
137
150
  def info_with_category(self) -> str:
138
151
  return ', '.join(without_falsey([self.category(), self.info_str()]))
@@ -143,18 +156,27 @@ class Person:
143
156
  elif self.name is None:
144
157
  return Text('(emails whose author or recipient could not be determined)', style=ALT_INFO_STYLE)
145
158
  elif self.category() == JUNK:
146
- return Text(f"({JUNK} mail)", style='tan dim')
159
+ return Text(f"({JUNK} mail)", style='bright_black dim')
160
+ elif self.is_uninteresting_cc and (self.info_str() or '').startswith(UNINTERESTING_CC_INFO):
161
+ if self.info_str() == UNINTERESTING_CC_INFO:
162
+ return Text(f"({self.info_str()})", style='wheat4 dim')
163
+ else:
164
+ return Text(f"({self.info_str()})", style='plum4 dim')
147
165
  elif self.is_a_mystery():
148
- return Text(QUESTION_MARKS, style='magenta dim')
149
- elif self.is_uninteresting_cc and self.info_str() == UNINTERESTING_CC_INFO:
150
- return Text(f"({self.info_str()})", style='wheat4 dim')
166
+ return Text(QUESTION_MARKS, style='honeydew2 bold')
151
167
  elif self.info_str() is None:
152
168
  if self.name in MAILING_LISTS:
153
- return Text('(mailing list)', style=f"{self.style()} dim")
169
+ return Text('(mailing list)', style=f"pale_turquoise4 dim")
170
+ elif self.category():
171
+ return Text(QUESTION_MARKS, style=self.style())
154
172
  else:
155
173
  return None
156
174
  else:
157
- return Text(self.info_str())
175
+ return Text(self.info_str(), style=self.style())
176
+
177
+ def internal_link(self) -> Text:
178
+ """Kind of like an anchor link to the section of the page containing these emails."""
179
+ return link_text_obj(internal_link_to_emails(self.name_str()), self.name_str(), style=self.style())
158
180
 
159
181
  def is_a_mystery(self) -> bool:
160
182
  """Return True if this is someone we theroetically could know more about."""
@@ -214,8 +236,8 @@ class Person:
214
236
  return self._printable_emails()
215
237
 
216
238
  def print_emails_table(self) -> None:
217
- emails = [email for email in self._printable_emails() if not email.is_duplicate()] # Remove dupes
218
- print_centered(Padding(Email.build_emails_table(emails, self.name), (0, 5, 0, 5)))
239
+ table = Email.build_emails_table(self._unique_printable_emails(), self.name)
240
+ print_centered(Padding(table, (0, 5, 0, 5)))
219
241
 
220
242
  if self.is_linkable():
221
243
  print_centered(self.external_links_line())
@@ -223,7 +245,13 @@ class Person:
223
245
  console.line()
224
246
 
225
247
  def sort_key(self) -> list[int | str]:
226
- counts = [len(self.unique_emails())]
248
+ counts = [
249
+ len(self.unique_emails()),
250
+ -1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
251
+ -1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO),
252
+ int(self.has_any_epstein_emails()),
253
+ ]
254
+
227
255
  counts = [-1 * count for count in counts]
228
256
 
229
257
  if args.sort_alphabetical:
@@ -234,14 +262,14 @@ class Person:
234
262
  def style(self) -> str:
235
263
  return get_style_for_name(self.name)
236
264
 
237
- def unique_emails(self) -> list[Email]:
238
- return [email for email in self.emails if not email.is_duplicate()]
265
+ def unique_emails(self) -> Sequence[Email]:
266
+ return Document.without_dupes(self.emails)
239
267
 
240
268
  def unique_emails_by(self) -> list[Email]:
241
- return [email for email in self.emails_by() if not email.is_duplicate()]
269
+ return Document.without_dupes(self.emails_by())
242
270
 
243
271
  def unique_emails_to(self) -> list[Email]:
244
- return [email for email in self.emails_to() if not email.is_duplicate()]
272
+ return Document.without_dupes(self.emails_to())
245
273
 
246
274
  def _printable_emails(self):
247
275
  """For Epstein we only want to print emails he sent to himself."""
@@ -250,24 +278,32 @@ class Person:
250
278
  else:
251
279
  return self.emails
252
280
 
281
+ def _unique_printable_emails(self):
282
+ return Document.without_dupes(self._printable_emails())
283
+
253
284
  def __str__(self):
254
285
  return f"{self.name_str()}"
255
286
 
256
287
  @staticmethod
257
- def emailer_info_table(people: list['Person'], highlighted: list['Person'] | None = None) -> Table:
288
+ def emailer_info_table(people: list['Person'], highlighted: list['Person'] | None = None, show_epstein_total: bool = False) -> Table:
258
289
  """Table of info about emailers."""
259
290
  highlighted = highlighted or people
260
291
  highlighted_names = [p.name for p in highlighted]
261
- is_selection = len(people) != len(highlighted) or args.emailers_info_png
292
+ is_selection = len(people) != len(highlighted) or args.emailers_info
293
+ all_emails = Document.uniquify(flatten([list(p.unique_emails()) for p in people]))
294
+ email_authors = [p for p in people if p.emails_by() and p.name]
295
+ attributed_emails = [email for email in all_emails if email.author]
296
+ footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}" \
297
+ f" out of {len(all_emails):,} emails, {len(all_emails) - len(attributed_emails)} still unknown)"
262
298
 
263
299
  if is_selection:
264
- title = Text(f"{EMAILER_INFO_TITLE} in This Order for the Highlighted Names (see ", style=TABLE_TITLE_STYLE)
265
- title.append(THE_OTHER_PAGE_TXT).append(" for the rest)")
300
+ title = Text(f"{EMAILER_INFO_TITLE} in This Order for the Highlighted Names (", style=TABLE_TITLE_STYLE)
301
+ title.append(THE_OTHER_PAGE_TXT).append(" has the rest)")
266
302
  else:
267
303
  title = f"{EMAILER_INFO_TITLE} in Chronological Order Based on Timestamp of First Email"
268
304
 
269
- table = build_table(title)
270
- table.add_column('Start')
305
+ table = build_table(title, caption=footer)
306
+ table.add_column('First')
271
307
  table.add_column('Name', max_width=24, no_wrap=True)
272
308
  table.add_column('Category', justify='left', style='dim italic')
273
309
  table.add_column('Num', justify='right', style='white')
@@ -281,6 +317,7 @@ class Person:
281
317
 
282
318
  for person in people:
283
319
  earliest_email_date = person.earliest_email_date()
320
+ is_on_page = False if show_epstein_total else person.name in highlighted_names
284
321
  year_months = (earliest_email_date.year * 12) + earliest_email_date.month
285
322
 
286
323
  # Color year rollovers more brightly
@@ -294,57 +331,14 @@ class Person:
294
331
 
295
332
  table.add_row(
296
333
  Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[0 if is_selection else grey_idx]}"),
297
- person.name_txt(), # TODO: make link?
334
+ person.internal_link() if is_on_page and not person.is_uninteresting_cc else person.name_txt(),
298
335
  person.category_txt(),
299
- f"{len(person._printable_emails())}",
300
- f"{len(person.unique_emails_by())}",
301
- f"{len(person.unique_emails_to())}",
336
+ f"{len(person.unique_emails() if show_epstein_total else person._unique_printable_emails())}",
337
+ Text(f"{len(person.unique_emails_by())}", style='dim' if len(person.unique_emails_by()) == 0 else ''),
338
+ Text(f"{len(person.unique_emails_to())}", style='dim' if len(person.unique_emails_to()) == 0 else ''),
302
339
  f"{person.email_conversation_length_in_days()}",
303
340
  person.info_txt() or '',
304
- style='' if person.name in highlighted_names else 'dim',
341
+ style='' if show_epstein_total or is_on_page else 'dim',
305
342
  )
306
343
 
307
344
  return table
308
-
309
- @staticmethod
310
- def emailer_stats_table(people: list['Person']) -> Table:
311
- email_authors = [p for p in people if p.emails_by() and p.name]
312
- all_emails = Document.uniquify(flatten([p.unique_emails() for p in people]))
313
- attributed_emails = [email for email in all_emails if email.author]
314
- footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}"
315
- footer = f"{footer} out of {len(attributed_emails):,} emails)"
316
-
317
- counts_table = build_table(
318
- f"All {len(email_authors)} People Who Sent or Received an Email in the Files",
319
- caption=footer,
320
- cols=[
321
- 'Name',
322
- {'name': 'Count', 'justify': 'right', 'style': 'bold bright_white'},
323
- {'name': 'Sent', 'justify': 'right', 'style': 'gray74'},
324
- {'name': 'Recv', 'justify': 'right', 'style': 'gray74'},
325
- {'name': 'First', 'style': TIMESTAMP_STYLE},
326
- {'name': 'Last', 'style': LAST_TIMESTAMP_STYLE},
327
- {'name': 'Days', 'justify': 'right', 'style': 'dim'},
328
- JMAIL,
329
- EPSTEIN_MEDIA,
330
- EPSTEIN_WEB,
331
- 'Twitter',
332
- ]
333
- )
334
-
335
- for person in sorted(people, key=lambda person: person.sort_key()):
336
- counts_table.add_row(
337
- person.name_link(),
338
- f"{len(person.unique_emails()):,}",
339
- f"{len(person.unique_emails_by()):,}",
340
- f"{len(person.unique_emails_to()):,}",
341
- str(person.earliest_email_date()),
342
- str(person.last_email_date()),
343
- f"{person.email_conversation_length_in_days()}",
344
- person.external_link_txt(JMAIL),
345
- person.external_link_txt(EPSTEIN_MEDIA) if person.is_linkable() else '',
346
- person.external_link_txt(EPSTEIN_WEB) if person.is_linkable() else '',
347
- person.external_link_txt(TWITTER),
348
- )
349
-
350
- return counts_table
@@ -61,6 +61,7 @@ DIANE_ZIMAN = 'Diane Ziman'
61
61
  DONALD_TRUMP = 'Donald Trump'
62
62
  EDUARDO_ROBLES = 'Eduardo Robles'
63
63
  EDWARD_JAY_EPSTEIN = 'Edward Jay Epstein'
64
+ EDWARD_ROD_LARSEN = 'Edward Rod Larsen'
64
65
  EHUD_BARAK = 'Ehud Barak'
65
66
  ERIC_ROTH = 'Eric Roth'
66
67
  FAITH_KATES = 'Faith Kates'
@@ -129,6 +130,7 @@ MOSHE_HOFFMAN = 'Moshe Hoffman'
129
130
  NADIA_MARCINKO = 'Nadia Marcinko'
130
131
  NEAL_KASSELL = 'Neal Kassell'
131
132
  NICHOLAS_RIBIS = 'Nicholas Ribis'
133
+ NILI_PRIELL_BARAK = 'Nili Priell Barak'
132
134
  NOAM_CHOMSKY = 'Noam Chomsky'
133
135
  NORMAN_D_RAU = 'Norman D. Rau'
134
136
  OLIVIER_COLOM = 'Olivier Colom'
@@ -214,23 +216,23 @@ UBS = 'UBS'
214
216
 
215
217
  # First and last names that should be made part of a highlighting regex for emailers
216
218
  NAMES_TO_NOT_HIGHLIGHT = """
217
- al alain alan alfredo allen alex alexander amanda andres andrew
218
- bard barrett barry bill black bob boris brad bruce
219
- carolyn chris christina
220
- dan daniel danny darren dave david donald
221
- ed edward edwards enterprise enterprises entourage epstein eric erika etienne
222
- faith forget fred friendly frost fuller
223
- gerald george gold gordon
224
- haddad harry hay heather henry hill hoffman
219
+ al alain alan alfredo allen alex alexander amanda andres andrew anthony
220
+ bard barrett barry bennet bernard bill black bob boris brad brenner bruce
221
+ caroline carolyn chris christina cohen
222
+ dan daniel danny darren dave david debbie donald
223
+ ed edward edwards enforcement enterprise enterprises entourage epstein eric erika etienne
224
+ faith fisher forget fred friendly frost fuller
225
+ gates gerald george gold gordon
226
+ haddad harry hay heather henry hill hoffman howard
225
227
  ian ivan
226
228
  jack james jay jean jeff jeffrey jennifer jeremy jessica joel john jon jonathan joseph jr
227
229
  kahn karl kate katherine kelly ken kevin krassner
228
230
  larry laurie lawrence leon lesley linda link lisa
229
- mann marc marie mark martin melanie michael mike miller mitchell miles morris moskowitz
230
- nancy neal new nicole
231
+ mann marc marie mark martin matthew melanie michael mike miller mitchell miles morris moskowitz
232
+ nancy neal new nicole norman
231
233
  owen
232
234
  paul paula pen peter philip prince
233
- randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubin
235
+ randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubenstein rubin
234
236
  scott sean skip stanley stern stephen steve steven stone susan
235
237
  the thomas tim tom tony tyler
236
238
  victor
@@ -243,7 +245,7 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
243
245
  aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
244
246
  baldwin barack barrett ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
245
247
  chapman charles charlie christopher clint cohen colin collins conway
246
- davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
248
+ davis dean debbie debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
247
249
  edmond elizabeth emily entwistle erik evelyn
248
250
  ferguson flachsbart francis franco frank
249
251
  gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
@@ -296,3 +298,10 @@ def extract_last_name(name: str) -> str:
296
298
  return ' '.join(first_last_names[-2:])
297
299
  else:
298
300
  return first_last_names[-1]
301
+
302
+
303
+ def reversed_name(name: str) -> str:
304
+ if ' ' not in name:
305
+ return name
306
+
307
+ return f"{extract_last_name(name)}, {extract_first_name(name)}"
@@ -1,6 +1,7 @@
1
1
  from pathlib import Path
2
2
 
3
3
  from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
4
+ from epstein_files.util.logging import logger
4
5
 
5
6
  # Files output by the code
6
7
  HTML_DIR = Path('docs')
@@ -16,9 +17,10 @@ URLS_ENV = '.urls.env'
16
17
  EMAILERS_TABLE_PNG_PATH = HTML_DIR.joinpath('emailers_info_table.png')
17
18
 
18
19
  # Deployment URLS
19
- # NOTE: don't rename these variables without changing deploy.sh!
20
+ # NOTE: don't rename these variables without changing deploy.sh
21
+ GH_REPO_NAME = 'epstein_text_messages'
20
22
  GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
21
- TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/epstein_text_messages"
23
+ TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/{GH_REPO_NAME}"
22
24
  ALL_EMAILS_URL = f"{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}"
23
25
  CHRONOLOGICAL_EMAILS_URL = f"{TEXT_MSGS_URL}/{CHRONOLOGICAL_EMAILS_PATH.name}"
24
26
  JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
@@ -44,6 +46,7 @@ BUILD_ARTIFACTS = [
44
46
  def make_clean() -> None:
45
47
  """Delete all build artifacts."""
46
48
  for build_file in BUILD_ARTIFACTS:
47
- if build_file.exists():
48
- print(f"Removing build file '{build_file}'...")
49
- build_file.unlink()
49
+ for file in [build_file, Path(f"{build_file}.txt")]:
50
+ if file.exists():
51
+ logger.warning(f"Removing build file '{file}'...")
52
+ file.unlink()
@@ -9,7 +9,6 @@ ARTICLE = 'article'
9
9
  BOOK = 'book'
10
10
  BUSINESS = 'business'
11
11
  CONFERENCE = 'conference'
12
- ENTERTAINER = 'entertainer'
13
12
  FINANCE = 'finance'
14
13
  FRIEND = 'friend'
15
14
  FLIGHT_LOG = 'flight log'
@@ -65,7 +64,8 @@ REDACTED = '<REDACTED>'
65
64
  QUESTION_MARKS = '(???)'
66
65
 
67
66
  # Regexes
68
- FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}(\d{{6}}(_\d{{1,2}})?)")
67
+ ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
68
+ FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({ID_REGEX.pattern})")
69
69
  FILE_NAME_REGEX = re.compile(fr"{FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
70
70
  QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
71
71
 
@@ -6,6 +6,7 @@ from inflection import parameterize
6
6
  from rich.text import Text
7
7
 
8
8
  from epstein_files.util.constant.output_files import *
9
+ from epstein_files.util.constant.strings import remove_question_marks
9
10
  from epstein_files.util.env import args
10
11
  from epstein_files.util.file_helper import coerce_file_stem
11
12
 
@@ -22,10 +23,11 @@ JMAIL = 'Jmail'
22
23
  ROLLCALL = 'RollCall'
23
24
  TWITTER = 'search X'
24
25
 
25
- GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
26
+ GH_PROJECT_URL = f'https://github.com/michelcrypt4d4mus/{GH_REPO_NAME}'
26
27
  GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
27
28
  ATTRIBUTIONS_URL = f'{GH_MASTER_URL}/epstein_files/util/constants.py'
28
29
  EXTRACTS_BASE_URL = f'{GH_MASTER_URL}/emails_extracted_from_legal_filings'
30
+ TO_FROM = 'to/from'
29
31
 
30
32
  extracted_file_url = lambda f: f"{EXTRACTS_BASE_URL}/{f}"
31
33
 
@@ -33,6 +35,7 @@ extracted_file_url = lambda f: f"{EXTRACTS_BASE_URL}/{f}"
33
35
  # External URLs
34
36
  COFFEEZILLA_ARCHIVE_URL = 'https://journaliststudio.google.com/pinpoint/search?collection=061ce61c9e70bdfd'
35
37
  COURIER_NEWSROOM_ARCHIVE_URL = 'https://journaliststudio.google.com/pinpoint/search?collection=092314e384a58618'
38
+ EPSTEIN_DOCS_URL = 'https://epstein-docs.github.io'
36
39
  OVERSIGHT_REPUBLICANS_PRESSER_URL = 'https://oversight.house.gov/release/oversight-committee-releases-additional-epstein-estate-documents/'
37
40
  RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL = 'https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_'
38
41
  SUBSTACK_URL = 'https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great'
@@ -71,7 +74,6 @@ rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL],
71
74
  search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
72
75
  search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
73
76
 
74
-
75
77
  PERSON_LINK_BUILDERS: dict[ExternalSite, Callable[[str], str]] = {
76
78
  EPSTEIN_MEDIA: epstein_media_person_url,
77
79
  EPSTEIN_WEB: epstein_web_person_url,
@@ -97,6 +99,12 @@ def external_doc_link_txt(site: ExternalSite, filename_or_id: int | str, style:
97
99
  return Text.from_markup(external_doc_link_markup(site, filename_or_id, style))
98
100
 
99
101
 
102
+ def internal_link_to_emails(name: str) -> str:
103
+ """e.g. https://michelcrypt4d4mus.github.io/epstein_text_messages/all_emails_epstein_files_nov_2025.html#:~:text=to%2Ffrom%20Jack%20Goldberger"""
104
+ search_term = urllib.parse.quote(f"{TO_FROM} {remove_question_marks(name)}")
105
+ return f"{this_site_url()}#:~:text={search_term}"
106
+
107
+
100
108
  def link_markup(
101
109
  url: str,
102
110
  link_text: str | None = None,
@@ -120,6 +128,10 @@ def other_site_url() -> str:
120
128
  return SITE_URLS[other_site_type()]
121
129
 
122
130
 
131
+ def this_site_url() -> str:
132
+ return SITE_URLS[EMAIL if other_site_type() == TEXT_MESSAGE else TEXT_MESSAGE]
133
+
134
+
123
135
  CRYPTADAMUS_TWITTER = link_markup('https://x.com/cryptadamist', '@cryptadamist')
124
136
  THE_OTHER_PAGE_MARKUP = link_markup(other_site_url(), 'the other page', style='light_slate_grey bold')
125
137
  THE_OTHER_PAGE_TXT = Text.from_markup(THE_OTHER_PAGE_MARKUP)