epstein-files 1.2.1__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ import json
2
2
  import re
3
3
  from dataclasses import asdict, dataclass, field
4
4
 
5
- from epstein_files.util.constant.strings import AUTHOR, REDACTED
5
+ from epstein_files.util.constant.strings import AUTHOR, REDACTED, indented
6
6
  from epstein_files.util.constants import ALL_CONFIGS
7
7
  from epstein_files.util.doc_cfg import EmailCfg
8
8
  from epstein_files.util.logging import logger
@@ -13,7 +13,10 @@ ON_BEHALF_OF = 'on behalf of'
13
13
  TO_FIELDS = ['bcc', 'cc', 'to']
14
14
  EMAILER_FIELDS = [AUTHOR] + TO_FIELDS
15
15
 
16
- HEADER_REGEX_STR = r'(((?:(?:Date|From|Sent|To|C[cC]|Importance|Subject|Bee|B[cC]{2}|Attachments|Classification|Flag):|on behalf of ?)(?! +(by |from my|via )).*\n){3,})'
16
+ FIELD_PATTERNS = ['Date', 'From', 'Sent', 'To', r"C[cC]", r"B[cC][cC]", 'Importance', 'Subject', 'Attachments', 'Classification', 'Flag', 'Reply-To']
17
+ FIELDS_PATTERN = '|'.join(FIELD_PATTERNS)
18
+ FIELDS_COLON_PATTERN = fr"^({FIELDS_PATTERN}):"
19
+ HEADER_REGEX_STR = fr"(((?:(?:{FIELDS_PATTERN}|Bee):|on behalf of ?)(?! +(by |from my|via )).*\n){{3,}})"
17
20
  EMAIL_SIMPLE_HEADER_REGEX = re.compile(rf'^{HEADER_REGEX_STR}')
18
21
  EMAIL_SIMPLE_HEADER_LINE_BREAK_REGEX = re.compile(HEADER_REGEX_STR)
19
22
  EMAIL_PRE_FORWARD_REGEX = re.compile(r"(.{3,2000}?)" + HEADER_REGEX_STR, re.DOTALL) # Match up to the next email header section
@@ -53,6 +56,7 @@ class EmailHeader:
53
56
  importance: str | None = None
54
57
  attachments: str | None = None
55
58
  to: list[str] | None = None
59
+ reply_to: str | None = None
56
60
 
57
61
  def __post_init__(self):
58
62
  self.num_header_rows = len(self.field_names)
@@ -95,13 +99,10 @@ class EmailHeader:
95
99
  logger.info(f"{log_prefix}, trying next line...")
96
100
  num_headers += 1
97
101
  value = email_lines[i + num_headers]
98
- elif BAD_EMAILER_REGEX.match(value):
102
+ elif BAD_EMAILER_REGEX.match(value) or value.startswith('http'):
99
103
  logger.info(f"{log_prefix}, decrementing num_headers and skipping...")
100
104
  num_headers -= 1
101
105
  continue
102
- elif value.startswith('http'):
103
- logger.info(f"{log_prefix}, using empty string instead...")
104
- value = ''
105
106
 
106
107
  value = [v.strip() for v in value.split(';') if len(v.strip()) > 0]
107
108
 
@@ -110,7 +111,12 @@ class EmailHeader:
110
111
  self.num_header_rows = len(self.field_names) + num_headers
111
112
  self.header_chars = '\n'.join(email_lines[0:self.num_header_rows])
112
113
  log_msg = f"Corrected empty header using {self.num_header_rows} lines to:\n"
113
- logger.debug(f"{log_msg}{self}\n\nTop lines:\n\n%s", '\n'.join(email_lines[0:(num_headers + 1) * 2]))
114
+
115
+ logger.warning(
116
+ f"{log_msg}{self}\n\n[top lines]:\n\n%s\n\n[body_lines]:\n\n%s\n\n",
117
+ indented('\n'.join(email_lines[0:(num_headers + 1) * 2]), prefix='> '),
118
+ indented('\n'.join(email_lines[self.num_header_rows:self.num_header_rows + 5]), prefix='> '),
119
+ )
114
120
 
115
121
  def rewrite_header(self) -> str:
116
122
  header_fields = {}
@@ -151,7 +157,7 @@ class EmailHeader:
151
157
  #logger.debug(f"extracting header line: '{line}'")
152
158
  key, value = [element.strip() for element in line.split(':', 1)]
153
159
  value = value.rstrip('_')
154
- key = AUTHOR if key == 'From' else ('sent_at' if key in ['Date', 'Sent'] else key.lower())
160
+ key = AUTHOR if key == 'From' else ('sent_at' if key in ['Date', 'Sent'] else key.lower().replace('-', '_'))
155
161
  key = 'bcc' if key == 'bee' else key
156
162
 
157
163
  if kw_args.get(key):
@@ -161,6 +167,9 @@ class EmailHeader:
161
167
 
162
168
  field_names.append(key)
163
169
 
170
+ if key == 'reply_to':
171
+ logger.warning(f"Found value for Reply-To field: '{value}'")
172
+
164
173
  if key in TO_FIELDS:
165
174
  recipients = [element.strip() for element in value.split(';')]
166
175
  recipients = [r for r in recipients if len(r) > 0]
@@ -122,8 +122,8 @@ class OtherFile(Document):
122
122
 
123
123
  return Text(escape(self.preview_text()))
124
124
 
125
- def is_interesting(self):
126
- """False for lame prefixes, duplicates, and other boring files."""
125
+ def is_interesting(self) -> bool:
126
+ """Overloaded. False for lame prefixes, duplicates, and other boring files."""
127
127
  info_sentences = self.info()
128
128
 
129
129
  if self.is_duplicate():
@@ -164,8 +164,8 @@ class OtherFile(Document):
164
164
 
165
165
  def _extract_timestamp(self) -> datetime | None:
166
166
  """Return configured timestamp or value extracted by scanning text with datefinder."""
167
- if self.config and self.config.timestamp:
168
- return self.config.timestamp
167
+ if self.config and self.config.timestamp():
168
+ return self.config.timestamp()
169
169
  elif self.config and any([s in (self.config_description() or '') for s in SKIP_TIMESTAMP_EXTRACT]):
170
170
  return None
171
171
 
@@ -210,9 +210,10 @@ class OtherFile(Document):
210
210
  self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
211
211
 
212
212
  @classmethod
213
- def files_preview_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
213
+ def files_preview_table(cls, files: Sequence['OtherFile'], title_pfx: str = '', title: str = '') -> Table:
214
214
  """Build a table of OtherFile documents."""
215
- table = build_table(f'{title_pfx}Other Files Details in Chronological Order', show_lines=True)
215
+ title = title or f'{title_pfx}Other Files Details in Chronological Order'
216
+ table = build_table(title, show_lines=True, title_justify='left' if title else 'center')
216
217
  table.add_column('File', justify='center', width=FILENAME_LENGTH)
217
218
  table.add_column('Date', justify='center')
218
219
  table.add_column('Size', justify='right', style='dim')
@@ -244,6 +245,7 @@ class OtherFile(Document):
244
245
 
245
246
  @classmethod
246
247
  def summary_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
248
+ """Table showing file count by category."""
247
249
  categories = uniquify([f.category() for f in files])
248
250
  categories = sorted(categories, key=lambda c: -len([f for f in files if f.category() == c]))
249
251
  table = cls.file_info_table(f'{title_pfx}Other Files Summary', 'Category')
@@ -84,6 +84,7 @@ class EpsteinFiles:
84
84
  self.json_files = [doc for doc in self.other_files if isinstance(doc, JsonFile)]
85
85
  self._set_uninteresting_ccs()
86
86
  self._copy_duplicate_email_properties()
87
+ self._find_email_attachments_and_set_is_first_for_user()
87
88
 
88
89
  @classmethod
89
90
  def get_files(cls, timer: Timer | None = None) -> 'EpsteinFiles':
@@ -123,6 +124,9 @@ class EpsteinFiles:
123
124
 
124
125
  lines = doc.matching_lines(pattern)
125
126
 
127
+ if args.min_line_length:
128
+ lines = [line for line in lines if len(line.line) > args.min_line_length]
129
+
126
130
  if len(lines) > 0:
127
131
  results.append(SearchResult(doc, lines))
128
132
 
@@ -251,7 +255,7 @@ class EpsteinFiles:
251
255
  name=name,
252
256
  emails=self.emails_for(name),
253
257
  imessage_logs=self.imessage_logs_for(name),
254
- is_uninteresting_cc=name in self.uninteresting_emailers(),
258
+ is_uninteresting=name in self.uninteresting_emailers(),
255
259
  other_files=[f for f in self.other_files if name and name == f.author]
256
260
  )
257
261
  for name in names
@@ -276,6 +280,17 @@ class EpsteinFiles:
276
280
 
277
281
  return self._uninteresting_emailers
278
282
 
283
+ def _find_email_attachments_and_set_is_first_for_user(self) -> None:
284
+ for file in self.other_files:
285
+ if file.config and file.config.attached_to_email_id:
286
+ email = self.email_for_id(file.config.attached_to_email_id)
287
+ file.warn(f"Attaching to {email}")
288
+ email.attached_docs.append(file)
289
+
290
+ for emailer in self.emailers():
291
+ first_email = emailer.emails[0]
292
+ first_email._is_first_for_user = True
293
+
279
294
  def _copy_duplicate_email_properties(self) -> None:
280
295
  """Ensure dupe emails have the properties of the emails they duplicate to capture any repairs, config etc."""
281
296
  for email in self.emails:
@@ -297,7 +312,7 @@ class EpsteinFiles:
297
312
 
298
313
  def _set_uninteresting_ccs(self) -> None:
299
314
  for id in EMAILS_WITH_UNINTERESTING_BCCS:
300
- self.uninteresting_ccs += copy(cast(list[Name], self.email_for_id(id).header.bcc))
315
+ self.uninteresting_ccs += [bcc.lower() for bcc in cast(list[str], self.email_for_id(id).header.bcc)]
301
316
 
302
317
  for id in EMAILS_WITH_UNINTERESTING_CCS:
303
318
  self.uninteresting_ccs += self.email_for_id(id).recipients
@@ -334,5 +349,4 @@ def document_cls(doc: Document) -> Type[Document]:
334
349
 
335
350
 
336
351
  def _sorted_metadata(docs: Sequence[Document]) -> list[Metadata]:
337
- docs_sorted_by_id = sorted(docs, key=lambda d: d.file_id)
338
- return [json_safe(d.metadata()) for d in docs_sorted_by_id]
352
+ return [json_safe(d.metadata()) for d in Document.sort_by_id(docs)]
epstein_files/person.py CHANGED
@@ -9,13 +9,13 @@ from rich.table import Table
9
9
  from rich.text import Text
10
10
 
11
11
  from epstein_files.documents.document import Document
12
- from epstein_files.documents.email import MAILING_LISTS, JUNK_EMAILERS, Email
12
+ from epstein_files.documents.email import TRUNCATE_EMAILS_FROM, MAILING_LISTS, JUNK_EMAILERS, Email
13
13
  from epstein_files.documents.messenger_log import MessengerLog
14
14
  from epstein_files.documents.other_file import OtherFile
15
15
  from epstein_files.util.constant.strings import *
16
16
  from epstein_files.util.constant.urls import *
17
17
  from epstein_files.util.constants import *
18
- from epstein_files.util.data import days_between, flatten, without_falsey
18
+ from epstein_files.util.data import days_between, flatten, uniquify, without_falsey
19
19
  from epstein_files.util.env import args
20
20
  from epstein_files.util.highlighted_group import (QUESTION_MARKS_TXT, HighlightedNames,
21
21
  get_highlight_group_for_name, get_style_for_name, styled_category, styled_name)
@@ -42,7 +42,7 @@ class Person:
42
42
  emails: list[Email] = field(default_factory=list)
43
43
  imessage_logs: list[MessengerLog] = field(default_factory=list)
44
44
  other_files: list[OtherFile] = field(default_factory=list)
45
- is_uninteresting_cc: bool = False
45
+ is_uninteresting: bool = False
46
46
 
47
47
  def __post_init__(self):
48
48
  self.emails = Document.sort_by_timestamp(self.emails)
@@ -62,7 +62,7 @@ class Person:
62
62
  return None
63
63
  elif self.category():
64
64
  return styled_category(self.category())
65
- elif self.is_a_mystery() or self.is_uninteresting_cc:
65
+ elif self.is_a_mystery() or self.is_uninteresting:
66
66
  return QUESTION_MARKS_TXT
67
67
 
68
68
  def email_conversation_length_in_days(self) -> int:
@@ -120,7 +120,7 @@ class Person:
120
120
  else:
121
121
  email_count = len(self.unique_emails())
122
122
  num_days = self.email_conversation_length_in_days()
123
- title_suffix = f"to/from {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
123
+ title_suffix = f"{TO_FROM} {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
124
124
 
125
125
  title = f"Found {email_count} emails {title_suffix}"
126
126
  width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category()) + 8)
@@ -136,8 +136,12 @@ class Person:
136
136
  highlight_group = self.highlight_group()
137
137
 
138
138
  if highlight_group and isinstance(highlight_group, HighlightedNames) and self.name:
139
- return highlight_group.info_for(self.name)
140
- elif self.is_uninteresting_cc:
139
+ info = highlight_group.info_for(self.name)
140
+
141
+ if info:
142
+ return info
143
+
144
+ if self.is_uninteresting and len(self.emails_by()) == 0:
141
145
  if self.has_any_epstein_emails():
142
146
  return UNINTERESTING_CC_INFO
143
147
  else:
@@ -152,9 +156,11 @@ class Person:
152
156
  elif self.name is None:
153
157
  return Text('(emails whose author or recipient could not be determined)', style=ALT_INFO_STYLE)
154
158
  elif self.category() == JUNK:
155
- return Text(f"({JUNK} mail)", style='tan dim')
156
- elif self.is_uninteresting_cc and (self.info_str() or '').startswith(UNINTERESTING_CC_INFO):
157
- if self.info_str() == UNINTERESTING_CC_INFO:
159
+ return Text(f"({JUNK} mail)", style='bright_black dim')
160
+ elif self.is_uninteresting and (self.info_str() or '').startswith(UNINTERESTING_CC_INFO):
161
+ if self.sole_cc():
162
+ return Text(f"(cc: from {self.sole_cc()} only)", style='wheat4 dim')
163
+ elif self.info_str() == UNINTERESTING_CC_INFO:
158
164
  return Text(f"({self.info_str()})", style='wheat4 dim')
159
165
  else:
160
166
  return Text(f"({self.info_str()})", style='plum4 dim')
@@ -168,11 +174,30 @@ class Person:
168
174
  else:
169
175
  return None
170
176
  else:
171
- return Text(self.info_str())
177
+ return Text(self.info_str(), style=self.style(allow_bold=False))
178
+
179
+ def internal_link(self) -> Text:
180
+ """Kind of like an anchor link to the section of the page containing these emails."""
181
+ return link_text_obj(internal_link_to_emails(self.name_str()), self.name_str(), style=self.style())
172
182
 
173
183
  def is_a_mystery(self) -> bool:
174
184
  """Return True if this is someone we theroetically could know more about."""
175
- return self.is_unstyled() and not (self.is_email_address() or self.info_str() or self.is_uninteresting_cc)
185
+ return self.is_unstyled() and not (self.is_email_address() or self.info_str() or self.is_uninteresting)
186
+
187
+ def sole_cc(self) -> str | None:
188
+ """Return name if this person sent 0 emails and received CC from only one that name."""
189
+ email_authors = uniquify([e.author for e in self.emails_to()])
190
+
191
+ if len(self.unique_emails()) == 1 and len(email_authors) > 0:
192
+ logger.info(f"sole author of email to '{self.name}' is '{email_authors[0]}'")
193
+ else:
194
+ logger.info(f"'{self.name}' email_authors '{email_authors[0]}'")
195
+
196
+ if len(self.unique_emails_by()) > 0:
197
+ return None
198
+
199
+ if len(email_authors) == 1:
200
+ return email_authors[0]
176
201
 
177
202
  def is_email_address(self) -> bool:
178
203
  return '@' in (self.name or '')
@@ -188,6 +213,10 @@ class Person:
188
213
 
189
214
  return True
190
215
 
216
+ def should_always_truncate(self) -> bool:
217
+ """True if we want to truncate all emails to/from this user."""
218
+ return self.name in TRUNCATE_EMAILS_FROM or self.is_uninteresting
219
+
191
220
  def is_unstyled(self) -> bool:
192
221
  """True if there's no highlight group for this name."""
193
222
  return self.style() == DEFAULT_NAME_STYLE
@@ -237,7 +266,13 @@ class Person:
237
266
  console.line()
238
267
 
239
268
  def sort_key(self) -> list[int | str]:
240
- counts = [len(self.unique_emails()), int(self.has_any_epstein_emails())]
269
+ counts = [
270
+ len(self.unique_emails()),
271
+ -1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
272
+ -1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO),
273
+ int(self.has_any_epstein_emails()),
274
+ ]
275
+
241
276
  counts = [-1 * count for count in counts]
242
277
 
243
278
  if args.sort_alphabetical:
@@ -245,8 +280,8 @@ class Person:
245
280
  else:
246
281
  return counts + [self.name_str()]
247
282
 
248
- def style(self) -> str:
249
- return get_style_for_name(self.name)
283
+ def style(self, allow_bold: bool = True) -> str:
284
+ return get_style_for_name(self.name, allow_bold=allow_bold)
250
285
 
251
286
  def unique_emails(self) -> Sequence[Email]:
252
287
  return Document.without_dupes(self.emails)
@@ -276,6 +311,11 @@ class Person:
276
311
  highlighted = highlighted or people
277
312
  highlighted_names = [p.name for p in highlighted]
278
313
  is_selection = len(people) != len(highlighted) or args.emailers_info
314
+ all_emails = Person.emails_from_people(people)
315
+ email_authors = [p for p in people if p.emails_by() and p.name]
316
+ attributed_emails = [email for email in all_emails if email.author]
317
+ footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}" \
318
+ f" out of {len(all_emails):,} emails, {len(all_emails) - len(attributed_emails)} still unknown)"
279
319
 
280
320
  if is_selection:
281
321
  title = Text(f"{EMAILER_INFO_TITLE} in This Order for the Highlighted Names (", style=TABLE_TITLE_STYLE)
@@ -283,7 +323,7 @@ class Person:
283
323
  else:
284
324
  title = f"{EMAILER_INFO_TITLE} in Chronological Order Based on Timestamp of First Email"
285
325
 
286
- table = build_table(title)
326
+ table = build_table(title, caption=footer)
287
327
  table.add_column('First')
288
328
  table.add_column('Name', max_width=24, no_wrap=True)
289
329
  table.add_column('Category', justify='left', style='dim italic')
@@ -298,6 +338,7 @@ class Person:
298
338
 
299
339
  for person in people:
300
340
  earliest_email_date = person.earliest_email_date()
341
+ is_on_page = False if show_epstein_total else person.name in highlighted_names
301
342
  year_months = (earliest_email_date.year * 12) + earliest_email_date.month
302
343
 
303
344
  # Color year rollovers more brightly
@@ -311,14 +352,18 @@ class Person:
311
352
 
312
353
  table.add_row(
313
354
  Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[0 if is_selection else grey_idx]}"),
314
- person.name_txt(), # TODO: make link?
355
+ person.internal_link() if is_on_page and not person.is_uninteresting else person.name_txt(),
315
356
  person.category_txt(),
316
357
  f"{len(person.unique_emails() if show_epstein_total else person._unique_printable_emails())}",
317
- Text(f"{len(person.unique_emails_by())}", style='dim' if len(person.unique_emails_by()) == 0 else ''),
318
- Text(f"{len(person.unique_emails_to())}", style='dim' if len(person.unique_emails_to()) == 0 else ''),
358
+ str(len(person.unique_emails_by())) if len(person.unique_emails_by()) > 0 else '',
359
+ str(len(person.unique_emails_to())) if len(person.unique_emails_to()) > 0 else '',
319
360
  f"{person.email_conversation_length_in_days()}",
320
361
  person.info_txt() or '',
321
- style='' if person.name in highlighted_names else 'dim',
362
+ style='' if show_epstein_total or is_on_page else 'dim',
322
363
  )
323
364
 
324
365
  return table
366
+
367
+ @staticmethod
368
+ def emails_from_people(people: list['Person']) -> Sequence[Email]:
369
+ return Document.uniquify(flatten([list(p.unique_emails()) for p in people]))
@@ -61,6 +61,7 @@ DIANE_ZIMAN = 'Diane Ziman'
61
61
  DONALD_TRUMP = 'Donald Trump'
62
62
  EDUARDO_ROBLES = 'Eduardo Robles'
63
63
  EDWARD_JAY_EPSTEIN = 'Edward Jay Epstein'
64
+ EDWARD_ROD_LARSEN = 'Edward Rod Larsen'
64
65
  EHUD_BARAK = 'Ehud Barak'
65
66
  ERIC_ROTH = 'Eric Roth'
66
67
  FAITH_KATES = 'Faith Kates'
@@ -129,6 +130,7 @@ MOSHE_HOFFMAN = 'Moshe Hoffman'
129
130
  NADIA_MARCINKO = 'Nadia Marcinko'
130
131
  NEAL_KASSELL = 'Neal Kassell'
131
132
  NICHOLAS_RIBIS = 'Nicholas Ribis'
133
+ NILI_PRIELL_BARAK = 'Nili Priell Barak'
132
134
  NOAM_CHOMSKY = 'Noam Chomsky'
133
135
  NORMAN_D_RAU = 'Norman D. Rau'
134
136
  OLIVIER_COLOM = 'Olivier Colom'
@@ -215,24 +217,24 @@ UBS = 'UBS'
215
217
  # First and last names that should be made part of a highlighting regex for emailers
216
218
  NAMES_TO_NOT_HIGHLIGHT = """
217
219
  al alain alan alfredo allen alex alexander amanda andres andrew anthony
218
- bard barrett barry bennet bill black bob boris brad bruce
219
- caroline carolyn chris christina cohen
220
- dan daniel danny darren dave david donald
220
+ bard barrett barry bennet bernard bill black bob boris brad brenner bruce
221
+ cameron caroline carolyn chris christina cohen
222
+ dan daniel danny darren dave david debbie donald
221
223
  ed edward edwards enforcement enterprise enterprises entourage epstein eric erika etienne
222
- faith forget fred friendly frost fuller
223
- gerald george gold gordon
224
- haddad harry hay heather henry hill hoffman
224
+ faith fisher forget fred friendly frost fuller
225
+ gates gerald george gold gordon
226
+ haddad hanson harry hay heather henry hill hoffman howard
225
227
  ian ivan
226
228
  jack james jay jean jeff jeffrey jennifer jeremy jessica joel john jon jonathan joseph jr
227
- kahn karl kate katherine kelly ken kevin krassner
228
- larry laurie lawrence leon lesley linda link lisa
229
+ kafka kahn karl kate katherine kelly ken kevin krassner
230
+ larry larsen laurie lawrence leon lesley linda link lisa
229
231
  mann marc marie mark martin matthew melanie michael mike miller mitchell miles morris moskowitz
230
232
  nancy neal new nicole norman
231
233
  owen
232
234
  paul paula pen peter philip prince
233
- randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubin
234
- scott sean skip stanley stern stephen steve steven stone susan
235
- the thomas tim tom tony tyler
235
+ randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubenstein rubin
236
+ scott sean skip smith stanley stern stephen steve steven stone susan
237
+ terry the thomas tim tom tony tyler
236
238
  victor
237
239
  wade waters
238
240
  y
@@ -243,7 +245,7 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
243
245
  aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
244
246
  baldwin barack barrett ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
245
247
  chapman charles charlie christopher clint cohen colin collins conway
246
- davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
248
+ davis dean debbie debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
247
249
  edmond elizabeth emily entwistle erik evelyn
248
250
  ferguson flachsbart francis franco frank
249
251
  gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
@@ -267,6 +269,10 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
267
269
  def constantize_name(name: str) -> str:
268
270
  if name == 'Andrzej Duda or entourage':
269
271
  return 'ANDRZEJ_DUDA'
272
+ elif name == MIROSLAV_LAJCAK:
273
+ return 'MIROSLAV_LAJCAK'
274
+ elif name == 'Paula Heil Fisher (???)':
275
+ return 'PAULA'
270
276
 
271
277
  variable_name = remove_question_marks(name)
272
278
  variable_name = variable_name.removesuffix('.').removesuffix('Jr').replace('ź', 'z').replace('ø', 'o').strip()
@@ -1,6 +1,7 @@
1
1
  from pathlib import Path
2
2
 
3
3
  from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
4
+ from epstein_files.util.logging import logger
4
5
 
5
6
  # Files output by the code
6
7
  HTML_DIR = Path('docs')
@@ -16,9 +17,10 @@ URLS_ENV = '.urls.env'
16
17
  EMAILERS_TABLE_PNG_PATH = HTML_DIR.joinpath('emailers_info_table.png')
17
18
 
18
19
  # Deployment URLS
19
- # NOTE: don't rename these variables without changing deploy.sh!
20
+ # NOTE: don't rename these variables without changing deploy.sh
21
+ GH_REPO_NAME = 'epstein_text_messages'
20
22
  GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
21
- TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/epstein_text_messages"
23
+ TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/{GH_REPO_NAME}"
22
24
  ALL_EMAILS_URL = f"{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}"
23
25
  CHRONOLOGICAL_EMAILS_URL = f"{TEXT_MSGS_URL}/{CHRONOLOGICAL_EMAILS_PATH.name}"
24
26
  JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
@@ -44,6 +46,7 @@ BUILD_ARTIFACTS = [
44
46
  def make_clean() -> None:
45
47
  """Delete all build artifacts."""
46
48
  for build_file in BUILD_ARTIFACTS:
47
- if build_file.exists():
48
- print(f"Removing build file '{build_file}'...")
49
- build_file.unlink()
49
+ for file in [build_file, Path(f"{build_file}.txt")]:
50
+ if file.exists():
51
+ logger.warning(f"Removing build file '{file}'...")
52
+ file.unlink()
@@ -64,7 +64,8 @@ REDACTED = '<REDACTED>'
64
64
  QUESTION_MARKS = '(???)'
65
65
 
66
66
  # Regexes
67
- FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}(\d{{6}}(_\d{{1,2}})?)")
67
+ ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
68
+ FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({ID_REGEX.pattern})")
68
69
  FILE_NAME_REGEX = re.compile(fr"{FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
69
70
  QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
70
71
 
@@ -79,6 +80,7 @@ OTHER_FILE_CLASS = 'OtherFile'
79
80
  remove_question_marks = lambda name: QUESTION_MARKS_REGEX.sub('', name).strip()
80
81
 
81
82
 
82
- def indented(s: str, spaces: int = 4) -> str:
83
+ def indented(s: str, spaces: int = 4, prefix: str = '') -> str:
83
84
  indent = ' ' * spaces
85
+ indent += prefix
84
86
  return indent + f"\n{indent}".join(s.split('\n'))
@@ -6,6 +6,7 @@ from inflection import parameterize
6
6
  from rich.text import Text
7
7
 
8
8
  from epstein_files.util.constant.output_files import *
9
+ from epstein_files.util.constant.strings import remove_question_marks
9
10
  from epstein_files.util.env import args
10
11
  from epstein_files.util.file_helper import coerce_file_stem
11
12
 
@@ -22,10 +23,11 @@ JMAIL = 'Jmail'
22
23
  ROLLCALL = 'RollCall'
23
24
  TWITTER = 'search X'
24
25
 
25
- GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
26
+ GH_PROJECT_URL = f'https://github.com/michelcrypt4d4mus/{GH_REPO_NAME}'
26
27
  GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
27
28
  ATTRIBUTIONS_URL = f'{GH_MASTER_URL}/epstein_files/util/constants.py'
28
29
  EXTRACTS_BASE_URL = f'{GH_MASTER_URL}/emails_extracted_from_legal_filings'
30
+ TO_FROM = 'to/from'
29
31
 
30
32
  extracted_file_url = lambda f: f"{EXTRACTS_BASE_URL}/{f}"
31
33
 
@@ -72,7 +74,6 @@ rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL],
72
74
  search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
73
75
  search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
74
76
 
75
-
76
77
  PERSON_LINK_BUILDERS: dict[ExternalSite, Callable[[str], str]] = {
77
78
  EPSTEIN_MEDIA: epstein_media_person_url,
78
79
  EPSTEIN_WEB: epstein_web_person_url,
@@ -98,6 +99,12 @@ def external_doc_link_txt(site: ExternalSite, filename_or_id: int | str, style:
98
99
  return Text.from_markup(external_doc_link_markup(site, filename_or_id, style))
99
100
 
100
101
 
102
+ def internal_link_to_emails(name: str) -> str:
103
+ """e.g. https://michelcrypt4d4mus.github.io/epstein_text_messages/all_emails_epstein_files_nov_2025.html#:~:text=to%2Ffrom%20Jack%20Goldberger"""
104
+ search_term = urllib.parse.quote(f"{TO_FROM} {remove_question_marks(name)}")
105
+ return f"{this_site_url()}#:~:text={search_term}"
106
+
107
+
101
108
  def link_markup(
102
109
  url: str,
103
110
  link_text: str | None = None,
@@ -121,6 +128,10 @@ def other_site_url() -> str:
121
128
  return SITE_URLS[other_site_type()]
122
129
 
123
130
 
131
+ def this_site_url() -> str:
132
+ return SITE_URLS[EMAIL if other_site_type() == TEXT_MESSAGE else TEXT_MESSAGE]
133
+
134
+
124
135
  CRYPTADAMUS_TWITTER = link_markup('https://x.com/cryptadamist', '@cryptadamist')
125
136
  THE_OTHER_PAGE_MARKUP = link_markup(other_site_url(), 'the other page', style='light_slate_grey bold')
126
137
  THE_OTHER_PAGE_TXT = Text.from_markup(THE_OTHER_PAGE_MARKUP)