epstein-files 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,70 +2,101 @@ import re
2
2
  from dataclasses import dataclass, field
3
3
 
4
4
  from rich.highlighter import RegexHighlighter
5
+ from rich.text import Text
5
6
 
6
7
  from epstein_files.util.constant.names import *
7
- from epstein_files.util.constant.strings import DEFAULT, REDACTED, TIMESTAMP_STYLE, remove_question_marks
8
+ from epstein_files.util.constant.strings import *
8
9
  from epstein_files.util.constant.urls import ARCHIVE_LINK_COLOR
9
- from epstein_files.util.constants import (EMAILER_ID_REGEXES, HEADER_ABBREVIATIONS, OSBORNE_LLP, REPLY_REGEX,
10
- REPUTATION_MGMT, SENT_FROM_REGEX, VIRGIN_ISLANDS)
10
+ from epstein_files.util.constants import (EMAILER_ID_REGEXES, EPSTEIN_V_ROTHSTEIN_EDWARDS, HEADER_ABBREVIATIONS,
11
+ OSBORNE_LLP, REPLY_REGEX, SENT_FROM_REGEX, VIRGIN_ISLANDS)
12
+ from epstein_files.util.doc_cfg import *
11
13
  from epstein_files.util.data import extract_last_name, listify
12
- from epstein_files.util.env import args, logger
13
14
 
14
- ESTATE_EXECUTOR = 'Epstein estate executor'
15
+ CIVIL_ATTORNEY = 'civil attorney'
16
+ CRIMINAL_DEFENSE_ATTORNEY = 'criminal defense attorney'
17
+ CRIMINAL_DEFENSE_2008 = f"{CRIMINAL_DEFENSE_ATTORNEY} on 2008 case"
18
+ EPSTEIN_LAWYER = 'epstein_lawyer'
19
+ EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY = f"{CIVIL_ATTORNEY} working on {EPSTEIN_V_ROTHSTEIN_EDWARDS}"
20
+ ESTATE_EXECUTOR = 'estate executor'
21
+ EPSTEIN_ESTATE_EXECUTOR = f"Epstein {ESTATE_EXECUTOR}"
15
22
  REGEX_STYLE_PREFIX = 'regex'
16
- NO_CATEGORY_LABELS = [BILL_GATES, STEVE_BANNON]
17
23
  SIMPLE_NAME_REGEX = re.compile(r"^[-\w ]+$", re.IGNORECASE)
18
24
 
25
+ CATEGORY_LABEL_MAPPING = {
26
+ ARTICLE: JOURNALIST,
27
+ ARTS: ENTERTAINER,
28
+ BOOK: JOURNALIST,
29
+ LEGAL: EPSTEIN_LAWYER,
30
+ POLITICS: LOBBYIST,
31
+ PROPERTY: BUSINESS,
32
+ REPUTATION: PUBLICIST,
33
+ }
34
+
19
35
 
20
36
  @dataclass(kw_only=True)
21
- class HighlightedGroup:
37
+ class HighlightedText:
22
38
  """
23
- Encapsulates info about people, places, and other strings we want to highlight with RegexHighlighter.
24
- Constructor must be called with either an 'emailers' arg or a 'pattern' arg (or both).
39
+ Color highlighting for things other than people's names (e.g. phone numbers, email headers).
25
40
 
26
41
  Attributes:
27
- category (str): optional string to use as an override for self.label in some contexts
28
- emailers (dict[str, str | None]): optional names to construct regexes for (values are descriptions)
29
- is_multiline (bool): True if this pattern is only used by RegexHighlighter and this highlight group has no other info
30
42
  label (str): RegexHighlighter match group name, defaults to 1st 'emailers' key if only 1 emailer provided
31
- pattern (str): optional regex pattern identifying strings matching this group
32
- regex (re.Pattern): matches self.pattern + all first and last names (and pluralizations) in self.emailers
43
+ pattern (str): regex pattern identifying strings matching this group
44
+ regex (re.Pattern): matches self.pattern
33
45
  style (str): Rich style to apply to text matching this group
34
- _capture_group_label (str): regex capture group variable name for matches of this HighlightedGroup's 'regex'
46
+ theme_style_name (str): The style name that must be a part of the rich.Console's theme
35
47
  """
36
- category: str = ''
37
- emailers: dict[str, str | None] = field(default_factory=dict)
38
- is_multiline: bool = False
39
48
  label: str = ''
40
49
  pattern: str = ''
41
50
  style: str
42
51
  # Computed fields
43
52
  regex: re.Pattern = field(init=False)
53
+ theme_style_name: str = field(init=False)
44
54
  _capture_group_label: str = field(init=False)
55
+ _match_group_var: str = field(init=False)
56
+
57
+ def __post_init__(self):
58
+ if not self.label:
59
+ raise ValueError(f"No label provided for {repr(self)}")
60
+
61
+ self._capture_group_label = self.label.lower().replace(' ', '_').replace('-', '_')
62
+ self._match_group_var = fr"?P<{self._capture_group_label}>"
63
+ self.theme_style_name = f"{REGEX_STYLE_PREFIX}.{self._capture_group_label}"
64
+ self.regex = re.compile(fr"({self._match_group_var}{self.pattern})", re.IGNORECASE | re.MULTILINE)
65
+
66
+ def __str__(self) -> str:
67
+ return f"{type(self).__name__}(label='{self.label}')"
68
+
69
+
70
+ @dataclass(kw_only=True)
71
+ class HighlightedNames(HighlightedText):
72
+ """
73
+ Encapsulates info about people, places, and other strings we want to highlight with RegexHighlighter.
74
+ Constructor must be called with either an 'emailers' arg or a 'pattern' arg (or both).
75
+
76
+ Attributes:
77
+ category (str): optional string to use as an override for self.label in some contexts
78
+ emailers (dict[str, str | None]): optional names to construct regexes for (values are descriptions)
79
+ _pattern (str): complete regex pattern that combines 'pattern' with 'emailers'
80
+ """
81
+ category: str = ''
82
+ emailers: dict[str, str | None] = field(default_factory=dict)
83
+ _pattern: str = field(init=False)
45
84
 
46
85
  def __post_init__(self):
47
86
  if not (self.emailers or self.pattern):
48
87
  raise ValueError(f"Must provide either 'emailers' or 'pattern' arg.")
49
- elif self.is_multiline and self.emailers:
50
- raise ValueError(f"'is_multiline' cannot be True when there are 'emailers'.")
51
88
  elif not self.label:
52
89
  if len(self.emailers) == 1:
53
90
  self.label = [k for k in self.emailers.keys()][0]
54
91
  else:
55
92
  raise ValueError(f"No label provided for {repr(self)}")
56
93
 
57
- pattern = '|'.join([self._emailer_pattern(e) for e in self.emailers] + listify(self.pattern))
58
- self._capture_group_label = self.label.lower().replace(' ', '_').replace('-', '_')
59
- self.theme_style_name = f"{REGEX_STYLE_PREFIX}.{self._capture_group_label}"
60
- match_group_var = fr"?P<{self._capture_group_label}>"
61
-
62
- if self.is_multiline:
63
- self.regex = re.compile(fr"({match_group_var}{pattern})", re.IGNORECASE | re.MULTILINE)
64
- else:
65
- self.regex = re.compile(fr"\b({match_group_var}({pattern})s?)\b", re.IGNORECASE)
94
+ super().__post_init__()
95
+ self._pattern = '|'.join([self._emailer_pattern(e) for e in self.emailers] + listify(self.pattern))
96
+ self.regex = re.compile(fr"\b({self._match_group_var}({self._pattern})s?)\b", re.IGNORECASE)
66
97
 
67
98
  def get_info(self, name: str) -> str | None:
68
- """Label for people in this group with the additional info for 'name' if 'name' is in self.emailers."""
99
+ """Label and additional info for 'name' if 'name' is in self.emailers."""
69
100
  info_pieces = [
70
101
  None if len(self.emailers) == 1 else (self.category or self.label.title()),
71
102
  self.emailers.get(name),
@@ -74,30 +105,37 @@ class HighlightedGroup:
74
105
  info_pieces = [p for p in info_pieces if p is not None]
75
106
  return ', '.join(info_pieces) if info_pieces else None
76
107
 
77
- # TODO: handle word boundary issue for names that end in symbols
78
108
  def _emailer_pattern(self, name: str) -> str:
79
109
  """Pattern matching 'name'. Extends value in EMAILER_ID_REGEXES with last name if it exists."""
80
110
  name = remove_question_marks(name)
81
111
  last_name = extract_last_name(name)
112
+ first_name = name.removesuffix(f" {last_name}")
82
113
 
83
114
  if name in EMAILER_ID_REGEXES:
84
115
  pattern = EMAILER_ID_REGEXES[name].pattern
85
116
 
117
+ # Include regex for last name
118
+ # TODO: handle word boundary issue for names that end in symbols
86
119
  if SIMPLE_NAME_REGEX.match(last_name) and last_name.lower() not in NAMES_TO_NOT_HIGHLIGHT:
87
- pattern += fr"|{last_name}" # Include regex for last name
120
+ pattern += fr"|{last_name}"
88
121
 
89
122
  return pattern
90
123
  elif ' ' not in name:
91
124
  return name
92
125
 
93
- first_name = name.removesuffix(f" {last_name}")
94
- name_patterns = [name.replace(' ', r"\s+"), first_name.replace(' ', r"\s+"), last_name.replace(' ', r"\s+")]
95
- name_regex_parts = [n for n in name_patterns if n.lower() not in NAMES_TO_NOT_HIGHLIGHT]
96
- return '|'.join(name_regex_parts)
126
+ name_patterns = [
127
+ n.replace(' ', r"\s+") for n in [name, first_name, last_name]
128
+ if n.lower() not in NAMES_TO_NOT_HIGHLIGHT
129
+ ]
97
130
 
131
+ return '|'.join(name_patterns)
98
132
 
99
- HIGHLIGHTED_GROUPS = [
100
- HighlightedGroup(
133
+ def __str__(self) -> str:
134
+ return f"{type(self).__name__}(label='{self.label}')"
135
+
136
+
137
+ HIGHLIGHTED_NAMES = [
138
+ HighlightedNames(
101
139
  label='africa',
102
140
  style='light_pink4',
103
141
  pattern=r'Econet(\s*Wireless)|Ghana(ian)?|Johannesburg|Kenya|Nigerian?|Senegal(ese)?|Serengeti|(South\s*)?African?|(Strive\s*)?Masiyiwa|Tanzania|Ugandan?|Zimbabwe(an)?',
@@ -108,7 +146,7 @@ HIGHLIGHTED_GROUPS = [
108
146
  'Macky Sall': 'prime minister of Senegal, defeated Abdoulaye Wade',
109
147
  },
110
148
  ),
111
- HighlightedGroup(
149
+ HighlightedNames(
112
150
  label='bitcoin',
113
151
  style='orange1 bold',
114
152
  pattern=r'Balaji|bitcoin|block ?chain(\s*capital)?|Brock(\s*Pierce)?|coins?|cr[iy]?pto(currenc(y|ies))?|e-currency|(Gavin )?Andressen|(Howard\s+)?Lutnic?k|(jeffrey\s+)?wernick|Libra|Madars|(Patrick\s*)?Murck|(Ross\s*)?Ulbricht|Silk\s*Road|SpanCash|Tether|virtual\s*currenc(ies|y)|(zero\s+knowledge\s+|zk)pro(of|tocols?)',
@@ -117,25 +155,15 @@ HIGHLIGHTED_GROUPS = [
117
155
  ANTHONY_SCARAMUCCI: 'Skybridge Capital, FTX investor',
118
156
  },
119
157
  ),
120
- HighlightedGroup(
121
- label='bro',
122
- style='tan',
123
- pattern=r"Andrew Farkas|Thomas\s*(J\.?\s*)?Barrack(\s*Jr)?",
124
- emailers = {
125
- JONATHAN_FARKAS: "heir to the Alexander's department store fortune",
126
- 'Peter Thomas Roth': 'student of Epstein at Dalton, skincare company founder',
127
- STEPHEN_HANSON: None,
128
- TOM_BARRACK: 'long time friend of Trump',
129
- }
130
- ),
131
- HighlightedGroup(
132
- label='business',
158
+ HighlightedNames(
159
+ label=BUSINESS,
133
160
  style='spring_green4',
134
- pattern=r'Gruterite|(John\s*)?Kluge|Marc Rich|(Mi(chael|ke)\s*)?Ovitz|(Steve\s+)?Wynn|(Leslie\s+)?Wexner|SALSS|Swedish[-\s]*American\s*Life\s*Science\s*Summit|Valhi|(Yves\s*)?Bouvier',
161
+ pattern=r'Gruterite|(John\s*)?Kluge|Marc Rich|(Mi(chael|ke)\s*)?Ovitz|(Steve\s+)?Wynn|(Les(lie)?\s+)?Wexner|SALSS|Swedish[-\s]*American\s*Life\s*Science\s*Summit|Valhi|(Yves\s*)?Bouvier',
135
162
  emailers = {
136
163
  ALIREZA_ITTIHADIEH: 'CEO Freestream Aircraft Limited',
137
164
  BARBRO_C_EHNBOM: 'Swedish pharmaceuticals',
138
165
  FRED_HADDAD: "co-founder of Heck's in West Virginia",
166
+ GERALD_BARTON: "Maryland property developer, fan of Trump's Irish golf course",
139
167
  GORDON_GETTY: 'heir of oil tycoon J. Paul Getty',
140
168
  NICHOLAS_RIBIS: 'Hilton CEO, former president of Trump Organization',
141
169
  'Philip Kafka': 'president of Prince Concepts (and son of Terry Kafka?)',
@@ -144,17 +172,17 @@ HIGHLIGHTED_GROUPS = [
144
172
  TOM_PRITZKER: 'brother of J.B. Pritzker',
145
173
  }
146
174
  ),
147
- HighlightedGroup(
175
+ HighlightedNames(
148
176
  label='cannabis',
149
177
  style='chartreuse2',
150
178
  pattern=r"CBD|cannabis|marijuana|THC|WEED(guide|maps)?[^s]?",
151
179
  ),
152
- HighlightedGroup(
180
+ HighlightedNames(
153
181
  label='china',
154
182
  style='bright_red',
155
183
  pattern=r"Ali.?baba|Beijing|CCP|Chin(a|e?se)(?! Daily)|DPRK|Gino\s+Yu|Global Times|Guo|Hong|Huaw[ae]i|Kim\s*Jong\s*Un|Kong|Jack\s+Ma|Kwok|Ministry\sof\sState\sSecurity|Mongolian?|MSS|North\s*Korea|Peking|PRC|SCMP|Tai(pei|wan)|Xi(aomi)?|Jinping",
156
184
  ),
157
- HighlightedGroup(
185
+ HighlightedNames(
158
186
  label='deepak_chopra',
159
187
  style='dark_sea_green4',
160
188
  emailers = {
@@ -162,12 +190,12 @@ HIGHLIGHTED_GROUPS = [
162
190
  DEEPAK_CHOPRA: 'woo woo',
163
191
  }
164
192
  ),
165
- HighlightedGroup(
193
+ HighlightedNames(
166
194
  label='democrats',
167
195
  style='sky_blue1',
168
196
  pattern=r'(Al\s*)?Franken|((Bill|Hillart?y)\s*)?Clinton|((Chuck|Charles)\s*)?S(ch|hc)umer|(Diana\s*)?DeGette|DNC|Elena\s*Kagan|(Eliott?\s*)?Spitzer(, Eliot)?|George\s*Mitchell|(George\s*)?Soros|Hill?ary|Dem(ocrat(ic)?)?|(Jo(e|seph)\s*)?Biden|(John\s*)?Kerry|Lisa Monaco|(Matteo\s*)?Salvini|Maxine\s*Waters|(Barac?k )?Obama|(Nancy )?Pelosi|Ron\s*Dellums|Schumer|(Tim\s*)?Geithner|Vernon\s*Jordan',
169
197
  ),
170
- HighlightedGroup(
198
+ HighlightedNames(
171
199
  label='Dubin family',
172
200
  style='medium_orchid1',
173
201
  pattern=r'((Celina|Eva( Anderss?on)?|Glenn) )?Dubin',
@@ -176,7 +204,7 @@ HIGHLIGHTED_GROUPS = [
176
204
  EVA: "possibly Epstein's ex-girlfriend (?)",
177
205
  },
178
206
  ),
179
- HighlightedGroup(
207
+ HighlightedNames(
180
208
  label='employee',
181
209
  style='deep_sky_blue4',
182
210
  pattern=r'Merwin',
@@ -192,8 +220,8 @@ HIGHLIGHTED_GROUPS = [
192
220
  NADIA_MARCINKO: 'pilot',
193
221
  }
194
222
  ),
195
- HighlightedGroup(
196
- label='entertainer',
223
+ HighlightedNames(
224
+ label=ENTERTAINER,
197
225
  style='light_steel_blue3',
198
226
  pattern=r'(Art )?Spiegelman|Bobby slayton|bono\s*mick|Errol(\s*Morris)?|Etienne Binant|(Frank\s)?Gehry|Jagger|(Jeffrey\s*)?Katzenberg|(Johnny\s*)?Depp|Kid Rock|Lena\s*Dunham|Madonna|Mark\s*Burnett|Ramsey Elkholy|shirley maclaine|Steven Gaydos?|Woody( Allen)?|Zach Braff',
199
227
  emailers={
@@ -206,16 +234,42 @@ HIGHLIGHTED_GROUPS = [
206
234
  STEVEN_PFEIFFER: 'Associate Director at Independent Filmmaker Project (IFP)',
207
235
  },
208
236
  ),
209
- HighlightedGroup(
210
- label='estate_executor',
237
+ HighlightedNames(
238
+ label=EPSTEIN_LAWYER,
239
+ style='purple',
240
+ pattern=r'(Barry (E. )?)?Krischer|Kate Kelly|Kirkland\s*&\s*Ellis|(Leon\s*)?Jaworski|Michael J. Pike|Paul,?\s*Weiss|Steptoe|Wein(berg|garten)',
241
+ emailers = {
242
+ 'Alan S Halperin': 'parnter at Paul, Weiss',
243
+ ARDA_BESKARDES: 'NYC immigration attorney allegedly involved in sex-trafficking operations',
244
+ BENNET_MOSKOWITZ: f'represented the {EPSTEIN_ESTATE_EXECUTOR}s',
245
+ BRAD_KARP: 'head of the law firm Paul Weiss',
246
+ DAVID_SCHOEN: f"{CRIMINAL_DEFENSE_ATTORNEY} after 2019 arrest",
247
+ DEBBIE_FEIN: EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY,
248
+ 'Erika Kellerhals': 'attorney in St. Thomas',
249
+ GERALD_LEFCOURT: f'friend of {ALAN_DERSHOWITZ}',
250
+ JACK_GOLDBERGER: CRIMINAL_DEFENSE_2008,
251
+ JACKIE_PERCZEK: CRIMINAL_DEFENSE_2008,
252
+ JAY_LEFKOWITZ: f"Kirkland & Ellis partner, {CRIMINAL_DEFENSE_2008}",
253
+ JESSICA_CADWELL: 'paralegal', # paralegal, see https://x.com/ImDrinknWyn/status/1993765348898927022
254
+ LILLY_SANCHEZ: CRIMINAL_DEFENSE_ATTORNEY,
255
+ MARTIN_WEINBERG: CRIMINAL_DEFENSE_ATTORNEY,
256
+ MICHAEL_MILLER: 'Steptoe LLP partner',
257
+ REID_WEINGARTEN: 'Steptoe LLP partner',
258
+ 'Roy Black': CRIMINAL_DEFENSE_2008,
259
+ SCOTT_J_LINK: None,
260
+ TONJA_HADDAD_COLEMAN: f'{EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY}, maybe daughter of Fred Haddad?',
261
+ }
262
+ ),
263
+ HighlightedNames(
264
+ label=ESTATE_EXECUTOR,
211
265
  style='purple3 bold',
212
- category='lawyer',
266
+ category='epstein lawyer',
213
267
  emailers = {
214
- DARREN_INDYKE: ESTATE_EXECUTOR,
215
- RICHARD_KAHN: ESTATE_EXECUTOR,
268
+ DARREN_INDYKE: EPSTEIN_ESTATE_EXECUTOR,
269
+ RICHARD_KAHN: EPSTEIN_ESTATE_EXECUTOR,
216
270
  }
217
271
  ),
218
- HighlightedGroup(
272
+ HighlightedNames(
219
273
  label='europe',
220
274
  style='light_sky_blue3',
221
275
  pattern=r'(Angela )?Merk(el|le)|Austria|(Benjamin\s*)?Harnwell|Berlin|Brexit(eers?)?|Brit(ain|ish)|Brussels|Cannes|(Caroline|Jack)?\s*Lang(, Caroline)?|Cypr(iot|us)|Davos|ECB|EU|Europe(an)?(\s*Union)?|France|Geneva|Germany?|Gillard|Gree(ce|k)|Ital(ian|y)|Jacques|(Kevin\s*)?Rudd|Le\s*Pen|London|Macron|Melusine|Munich|(Natalia\s*)?Veselnitskaya|(Nicholas\s*)?Sarkozy|Nigel(\s*Farage)?|Oslo|Paris|Polish|(Sebastian )?Kurz|(Vi(c|k)tor\s+)?Orbah?n|Edward Rod Larsen|Strasbourg|Strauss[- ]?Kahn|Swed(en|ish)(?![-\s]+America)|Switzerland|(Tony\s)?Blair|Ukrain(e|ian)|Vienna|(Vitaly\s*)?Churkin|Zug',
@@ -227,7 +281,7 @@ HIGHLIGHTED_GROUPS = [
227
281
  THORBJORN_JAGLAND: 'former prime minister of Norway and head of the Nobel Peace Prize Committee',
228
282
  }
229
283
  ),
230
- HighlightedGroup(
284
+ HighlightedNames(
231
285
  label='famous_lawyer',
232
286
  style='medium_purple3',
233
287
  category='famous_lawyer',
@@ -237,7 +291,19 @@ HIGHLIGHTED_GROUPS = [
237
291
  KEN_STARR: 'head of the Monica Lewinsky investigation against Bill Clinton',
238
292
  }
239
293
  ),
240
- HighlightedGroup(
294
+ HighlightedNames(
295
+ label='friend',
296
+ style='tan',
297
+ pattern=r"Andrew Farkas|Thomas\s*(J\.?\s*)?Barrack(\s*Jr)?",
298
+ emailers = {
299
+ DAVID_STERN: f'emailed Epstein from Moscow, appears to know chairman of {DEUTSCHE_BANK}',
300
+ JONATHAN_FARKAS: "heir to the Alexander's department store fortune",
301
+ 'Peter Thomas Roth': 'student of Epstein at Dalton, skincare company founder',
302
+ STEPHEN_HANSON: None,
303
+ TOM_BARRACK: 'long time friend of Trump',
304
+ }
305
+ ),
306
+ HighlightedNames(
241
307
  label='finance',
242
308
  style='green',
243
309
  pattern=r'Apollo|Ari\s*Glass|(Bernie\s*)?Madoff|Black(rock|stone)|BofA|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|MLPF&S|(money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
@@ -255,11 +321,12 @@ HIGHLIGHTED_GROUPS = [
255
321
  PAUL_MORRIS: 'Deutsche Bank',
256
322
  }
257
323
  ),
258
- HighlightedGroup(
324
+ HighlightedNames(
259
325
  label=HARVARD.lower(),
260
326
  style='deep_pink2',
261
327
  pattern=r'Cambridge|(Derek\s*)?Bok|Elisa(\s*New)?|Harvard(\s*(Business|Law|University)(\s*School)?)?|(Jonathan\s*)?Zittrain|(Stephen\s*)?Kosslyn',
262
328
  emailers = {
329
+ "Kelly Friendly": f"longtime aide and spokesperson of {LARRY_SUMMERS}",
263
330
  LARRY_SUMMERS: 'board of Digital Currency Group (DCG), Harvard president, Obama economic advisor',
264
331
  'Leah Reis-Dennis': 'producer for Lisa New\'s Poetry in America',
265
332
  LISA_NEW: f'professor of poetry, wife of {LARRY_SUMMERS}, AKA "Elisa New"',
@@ -268,7 +335,7 @@ HIGHLIGHTED_GROUPS = [
268
335
  MOSHE_HOFFMAN: 'lecturer and research scholar in behavioral and evolutionary economics',
269
336
  }
270
337
  ),
271
- HighlightedGroup(
338
+ HighlightedNames(
272
339
  label='india',
273
340
  style='bright_green',
274
341
  pattern=r'Abraaj|Anna\s*Hazare|(Arif\s*)?Naqvi|(Arvind\s*)?Kejriwal|Hardeep( Pur[ei]e)?|Indian?|InsightsPod|Modi|Mumbai|Tranchulas',
@@ -278,7 +345,7 @@ HIGHLIGHTED_GROUPS = [
278
345
  ZUBAIR_KHAN: 'Tranchulas CEO, InsightsPod founder',
279
346
  }
280
347
  ),
281
- HighlightedGroup(
348
+ HighlightedNames(
282
349
  label='israel',
283
350
  style='dodger_blue2',
284
351
  pattern=r"AIPAC|Bibi|(eh|(Ehud|Nili Priell) )?barak|Ehud\s*Barack|Israeli?|Jerusalem|J\s*Street|Mossad|Netanyahu|(Sheldon\s*)?Adelson|Tel\s*Aviv|(The\s*)?Shimon\s*Post|Yitzhak|Rabin|YIVO|zionist",
@@ -288,12 +355,12 @@ HIGHLIGHTED_GROUPS = [
288
355
  'Nili Priell Barak': f'wife of {EHUD_BARAK}',
289
356
  }
290
357
  ),
291
- HighlightedGroup(
358
+ HighlightedNames(
292
359
  label='japan',
293
360
  style='color(168)',
294
361
  pattern=r'BOJ|(Bank\s+of\s+)?Japan(ese)?|jpy?(?! Morgan)|SG|Singapore|Toky[op]',
295
362
  ),
296
- HighlightedGroup(
363
+ HighlightedNames(
297
364
  label='javanka',
298
365
  style='medium_violet_red',
299
366
  emailers = {
@@ -301,10 +368,10 @@ HIGHLIGHTED_GROUPS = [
301
368
  JARED_KUSHNER: None,
302
369
  }
303
370
  ),
304
- HighlightedGroup(
305
- label='journalist',
371
+ HighlightedNames(
372
+ label=JOURNALIST,
306
373
  style='bright_yellow',
307
- pattern=r'Palm\s*Beach\s*(Daily\s*News|Post)|ABC(\s*News)?|Alex\s*Yablon|(Andrew\s*)?Marra|Arianna(\s*Huffington)?|(Arthur\s*)?Kretchmer|BBC|Bloomberg|Breitbart|Charlie\s*Rose|China\s*Daily|CNBC|CNN(politics?)?|Con[cs]hita|Sarnoff|(?<!Virgin[-\s]Islands[-\s])Daily\s*(Beast|Mail|News|Telegraph)|(David\s*)?Pecker|David\s*Brooks|Ed\s*Krassenstein|(Emily\s*)?Michot|Ezra\s*Klein|(George\s*)?Stephanopoulus|Globe\s*and\s*Mail|Good\s*Morning\s*America|Graydon(\s*Carter)?|Huffington(\s*Post)?|Ingram, David|(James\s*)?Patterson|Jonathan\s*Karl|Julie\s*(K.?\s*)?Brown|(Katie\s*)?Couric|Keith\s*Larsen|Miami\s*Herald|(Michele\s*)?Dargan|(National\s*)?Enquirer|(The\s*)?N(ew\s*)?Y(ork\s*)?(P(ost)?|T(imes)?)|(The\s*)?New\s*Yorker|NYer|PERVERSION\s*OF\s*JUSTICE|Politico|Pro\s*Publica|(Sean\s*)?Hannity|Sulzberger|SunSentinel|Susan Edelman|(Uma\s*)?Sanghvi|(The\s*)?Wa(shington\s*)?Po(st)?|Viceland|Vick[iy]\s*Ward|Vox|WGBH|(The\s*)?Wall\s*Street\s*Journal|WSJ|[-\w.]+@(bbc|independent|mailonline|mirror|thetimes)\.co\.uk',
374
+ pattern=r'Palm\s*Beach\s*(Daily\s*News|Post)|ABC(\s*News)?|Alex\s*Yablon|(Andrew\s*)?Marra|Arianna(\s*Huffington)?|(Arthur\s*)?Kretchmer|BBC|Bloomberg|Breitbart|Charlie\s*Rose|China\s*Daily|CNBC|CNN(politics?)?|Con[cs]hita|Sarnoff|(?<!Virgin[-\s]Islands[-\s])Daily\s*(Beast|Mail|News|Telegraph)|(David\s*)?Pecker|David\s*Brooks|Ed\s*Krassenstein|(Emily\s*)?Michot|Ezra\s*Klein|(George\s*)?Stephanopoulus|Globe\s*and\s*Mail|Good\s*Morning\s*America|Graydon(\s*Carter)?|Huffington(\s*Post)?|Ingram, David|(James\s*)?Patterson|Jonathan\s*Karl|Julie\s*(K.?\s*)?Brown|(Katie\s*)?Couric|Keith\s*Larsen|L\.?A\.?\s*Times|Miami\s*Herald|(Michele\s*)?Dargan|(National\s*)?Enquirer|(The\s*)?N(ew\s*)?Y(ork\s*)?(P(ost)?|T(imes)?)|(The\s*)?New\s*Yorker|NYer|PERVERSION\s*OF\s*JUSTICE|Politico|Pro\s*Publica|(Sean\s*)?Hannity|Sulzberger|SunSentinel|Susan Edelman|(Uma\s*)?Sanghvi|(The\s*)?Wa(shington\s*)?Po(st)?|Viceland|Vick[iy]\s*Ward|Vox|WGBH|(The\s*)?Wall\s*Street\s*Journal|WSJ|[-\w.]+@(bbc|independent|mailonline|mirror|thetimes)\.co\.uk',
308
375
  emailers = {
309
376
  EDWARD_JAY_EPSTEIN: 'reporter who wrote about the kinds of crimes Epstein was involved in, no relation to Jeffrey',
310
377
  'James Hill': 'ABC News',
@@ -316,12 +383,12 @@ HIGHLIGHTED_GROUPS = [
316
383
  'Tim Zagat': 'Zagat restaurant guide CEO',
317
384
  }
318
385
  ),
319
- HighlightedGroup(
386
+ HighlightedNames(
320
387
  label='latin america',
321
388
  style='yellow',
322
389
  pattern=r'Argentin(a|ian)|Bolsonar[aio]|Bra[sz]il(ian)?|Bukele|Caracas|Castro|Colombian?|Cuban?|El\s*Salvador|((Enrique )?Pena )?Nieto|LatAm|Lula|Mexic(an|o)|(Nicolas\s+)?Maduro|Panama( Papers)?|Peru|Venezuelan?|Zambrano',
323
390
  ),
324
- HighlightedGroup(
391
+ HighlightedNames(
325
392
  label='law enforcement',
326
393
  style='color(24) bold',
327
394
  pattern=r'ag|(Alicia\s*)?Valle|((Bob|Robert)\s*)?Mueller|(Byung\s)?Pak|CFTC|CIA|CIS|CVRA|Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)|DHS|DOJ|FBI|FCPA|FDIC|Federal\s*Bureau\s*of\s*Investigation|FinCEN|FINRA|FOIA|FTC|IRS|(James\s*)?Comey|(Jennifer\s*Shasky\s*)?Calvery|((Judge|Mark)\s*)?(Carney|Filip)|(Kirk )?Blouin|KYC|NIH|NS(A|C)|OCC|OFAC|(Lann?a\s*)?Belohlavek|(Michael\s*)?Reiter|OGE|Office\s*of\s*Government\s*Ethics|Police Code Enforcement|(Preet\s*)?Bharara|SCOTUS|SD(FL|NY)|Southern\s*District\s*of\s*(Florida|New\s*York)|SEC|Securities\s*and\s*Exchange\s*Commission|State\s*Dep(artmen)?t|Strzok|Supreme\s*Court|Treasury\s*(Dep(artmen)?t|Secretary)|TSA|USAID|(William\s*J\.?\s*)?Zloch',
@@ -330,34 +397,8 @@ HIGHLIGHTED_GROUPS = [
330
397
  DANNY_FROST: 'Director of Communications at Manhattan DA',
331
398
  }
332
399
  ),
333
- HighlightedGroup(
334
- label='epstein lawyer',
335
- style='purple',
336
- pattern=r'(Barry (E. )?)?Krischer|Kate Kelly|Kirkland\s*&\s*Ellis|(Leon\s*)?Jaworski|Michael J. Pike|Paul,?\s*Weiss|Steptoe|Wein(berg|garten)',
337
- emailers = {
338
- ARDA_BESKARDES: 'NYC immigration attorney allegedly involved in sex-trafficking operations',
339
- BENNET_MOSKOWITZ: None,
340
- BRAD_KARP: 'head of the law firm Paul Weiss',
341
- DAVID_STERN: None,
342
- DAVID_SCHOEN: None,
343
- DEBBIE_FEIN: None,
344
- 'Erika Kellerhals': 'attorney in St. Thomas',
345
- GERALD_LEFCOURT: f'friend of {ALAN_DERSHOWITZ}',
346
- JACK_GOLDBERGER: None,
347
- JACKIE_PERCZEK: None,
348
- JAY_LEFKOWITZ: None,
349
- JESSICA_CADWELL: 'paralegal (?)', # paralegal, see https://x.com/ImDrinknWyn/status/1993765348898927022
350
- LILLY_SANCHEZ: 'criminal defense attorney',
351
- MARTIN_WEINBERG: 'criminal defense attorney',
352
- MICHAEL_MILLER: 'Steptoe LLP partner',
353
- REID_WEINGARTEN: 'Steptoe LLP partner',
354
- 'Roy Black': 'criminal defense attorney',
355
- SCOTT_J_LINK: None,
356
- TONJA_HADDAD_COLEMAN: 'maybe daughter of Fred Haddad?',
357
- }
358
- ),
359
- HighlightedGroup(
360
- label='lobbyist',
400
+ HighlightedNames(
401
+ label=LOBBYIST,
361
402
  style='light_coral',
362
403
  pattern=r'[BR]ob Crowe|Stanley Rosenberg',
363
404
  emailers = {
@@ -370,10 +411,11 @@ HIGHLIGHTED_GROUPS = [
370
411
  'Stanley Rosenberg': 'former President of the Massachusetts Senate',
371
412
  }
372
413
  ),
373
- HighlightedGroup(
414
+ HighlightedNames(
374
415
  label='mideast',
375
416
  style='dark_sea_green4',
376
- pattern=r"[-\s]9/11[\s.]|Abdulmalik Al-Makhlafi|Abdullah|Abu\s+Dhabi|Afghanistan|Al[-\s]?Qa[ei]da|Ahmadinejad|Arab|Aramco|Assad|Bahrain|Basiji?|Benghazi|Cairo|Chagoury|Dj[iu]bo?uti|Doha|Dubai|Egypt(ian)?|Emir(at(es?|i))?|Erdogan|Fashi|Gaddafi|(Hamid\s*)?Karzai|Hamad\s*bin\s*Jassim|HBJ|Houthi|Imran\s+Khan|Iran(ian)?|Isi[ls]|Islam(abad|ic|ist)?|Istanbul|Kh?ashoggi|(Kairat\s*)?Kelimbetov|kasshohgi|Kaz(akh|ich)stan|Kazakh?|Kh[ao]menei|Khalid\s*Sheikh\s*Mohammed|KSA|Leban(ese|on)|Libyan?|Mahmoud|Marra[hk]e[cs]h|MB(N|S|Z)|Mohammed\s+bin\s+Salman|Morocco|Mubarak|Muslim|Nayaf|Pakistani?|Omar|(Osama\s*)?Bin\s*Laden|Osama(?! al)|Palestin(e|ian)|Persian?|Riya(dh|nd)|Saddam|Salman|Saudi(\s+Arabian?)?|Shariah?|SHC|sheikh|shia|(Sultan\s*)?Yacoub|Syrian?|(Tarek\s*)?El\s*Sayed|Tehran|Tunisian?|Turk(ey|ish)|UAE|((Iraq|Iran|Kuwait|Qatar|Yemen)i?)",
417
+ # this won't match ever because of word boundary: [-\s]9/11[\s.]
418
+ pattern=r"Abdulmalik Al-Makhlafi|Abdullah|Abu\s+Dhabi|Afghanistan|Al[-\s]?Qa[ei]da|Ahmadinejad|Arab|Aramco|Assad|Bahrain|Basiji?|Benghazi|Cairo|Chagoury|Dj[iu]bo?uti|Doha|Dubai|Egypt(ian)?|Emir(at(es?|i))?|Erdogan|Fashi|Gaddafi|(Hamid\s*)?Karzai|Hamad\s*bin\s*Jassim|HBJ|Houthi|Imran\s+Khan|Iran(ian)?|Isi[ls]|Islam(abad|ic|ist)?|Istanbul|Kh?ashoggi|(Kairat\s*)?Kelimbetov|kasshohgi|Kaz(akh|ich)stan|Kazakh?|Kh[ao]menei|Khalid\s*Sheikh\s*Mohammed|KSA|Leban(ese|on)|Libyan?|Mahmoud|Marra[hk]e[cs]h|MB(N|S|Z)|Mohammed\s+bin\s+Salman|Morocco|Mubarak|Muslim|Nayaf|Pakistani?|Omar|(Osama\s*)?Bin\s*Laden|Osama(?! al)|Palestin(e|ian)|Persian?|Riya(dh|nd)|Saddam|Salman|Saudi(\s+Arabian?)?|Shariah?|SHC|sheikh|shia|(Sultan\s*)?Yacoub|Syrian?|(Tarek\s*)?El\s*Sayed|Tehran|Tunisian?|Turk(ey|ish)|UAE|((Iraq|Iran|Kuwait|Qatar|Yemen)i?)",
377
419
  emailers = {
378
420
  ANAS_ALRASHEED: f'former information minister of Kuwait {QUESTION_MARKS}',
379
421
  AZIZA_ALAHMADI: 'Abu Dhabi Department of Culture & Tourism',
@@ -381,7 +423,7 @@ HIGHLIGHTED_GROUPS = [
381
423
  SHAHER_ABDULHAK_BESHER: 'Yemeni billionaire',
382
424
  }
383
425
  ),
384
- HighlightedGroup(
426
+ HighlightedNames(
385
427
  label='modeling',
386
428
  style='pale_violet_red1',
387
429
  pattern=r'\w+@mc2mm.com|(Nicole\s*)?Junkerman',
@@ -398,22 +440,22 @@ HIGHLIGHTED_GROUPS = [
398
440
  'Michael Sanka': 'MC2 Model Management (?)',
399
441
  }
400
442
  ),
401
- HighlightedGroup(
402
- label='publicist',
443
+ HighlightedNames(
444
+ label=PUBLICIST,
403
445
  style='orange_red1',
404
- pattern=fr"(Matt(hew)? )?Hiltzi[gk]|{REPUTATION_MGMT.rstrip(':')}",
446
+ pattern=fr"(Matt(hew)? )?Hiltzi[gk]|{REPUTATION_MGMT}",
405
447
  emailers = {
406
448
  AL_SECKEL: 'husband of Isabel Maxwell, Mindshift conference organizer who fell off a cliff',
407
449
  'Barnaby Marsh': 'co-founder of Saint Partners, a philanthropy services company',
408
- CHRISTINA_GALBRAITH: None,
450
+ CHRISTINA_GALBRAITH: f"{REPUTATION_MGMT}, worked on Epstein's Google search results with {TYLER_SHEARS}",
409
451
  IAN_OSBORNE: f"{OSBORNE_LLP} reputation repairer possibly hired by Epstein ca. 2011-06",
410
- MICHAEL_SITRICK: None,
452
+ MICHAEL_SITRICK: 'crisis PR',
411
453
  PEGGY_SIEGAL: 'socialite',
412
454
  ROSS_GOW: 'Acuity Reputation Management',
413
- TYLER_SHEARS: None,
455
+ TYLER_SHEARS: f"{REPUTATION_MGMT}, worked on Epstein's Google search results with {CHRISTINA_GALBRAITH}",
414
456
  }
415
457
  ),
416
- HighlightedGroup(
458
+ HighlightedNames(
417
459
  label='republicans',
418
460
  style='bold dark_red',
419
461
  pattern=r'Alberto\sGonzale[sz]|(Alex\s*)?Acosta|(Bill\s*)?Barr|Bill\s*Shine|(Bob\s*)?Corker|(John\s*(R.?\s*)?)Bolton|Broidy|(Chris\s)?Christie|Devin\s*Nunes|(Don\s*)?McGa[hn]n|McMaster|(George\s*)?Nader|GOP|(Brett\s*)?Kavanaugh|Kissinger|Kobach|Koch\s*Brothers|Kolfage|Kudlow|Lewandowski|(Marco\s)?Rubio|(Mark\s*)Meadows|Mattis|(?<!Merwin Dela )Cruz|(Michael\s)?Hayden|((General|Mike)\s*)?(Flynn|Pence)|(Mitt\s*)?Romney|Mnuchin|Nikki|Haley|(Paul\s+)?Manafort|(Peter\s)?Navarro|Pompeo|Reagan|Republican|(?<!Cynthia )(Richard\s*)?Nixon|Sasse|(Rex\s*)?Tillerson',
@@ -422,7 +464,7 @@ HIGHLIGHTED_GROUPS = [
422
464
  TULSI_GABBARD: None,
423
465
  },
424
466
  ),
425
- HighlightedGroup(
467
+ HighlightedNames(
426
468
  label='Rothschild family',
427
469
  style='indian_red',
428
470
  emailers={
@@ -430,18 +472,18 @@ HIGHLIGHTED_GROUPS = [
430
472
  JOHNNY_EL_HACHEM: f'Works with {ARIANE_DE_ROTHSCHILD}',
431
473
  },
432
474
  ),
433
- HighlightedGroup(
475
+ HighlightedNames(
434
476
  label='russia',
435
477
  style='red bold',
436
- pattern=r'Alfa\s*Bank|Anya\s*Rasulova|Chernobyl|Day\s+One\s+Ventures|(Dmitry\s)?(Kiselyov|(Lana\s*)?Pozhidaeva|Medvedev|Rybolo(o?l?ev|vlev))|Dmitry|FSB|GRU|KGB|Kislyak|Kremlin|Kuznetsova|Lavrov|Lukoil|Moscow|(Oleg\s*)?Deripaska|Oleksandr Vilkul|Rosneft|RT|St.?\s*?Petersburg|Russian?|Sberbank|Soviet(\s*Union)?|USSR|(Vladimir\s*)?(Putin|Yudashkin)|Xitrans',
478
+ pattern=r'Alfa\s*Bank|Anya\s*Rasulova|Chernobyl|Day\s+One\s+Ventures|(Dmitry\s)?(Kiselyov|(Lana\s*)?Pozhidaeva|Medvedev|Rybolo(o?l?ev|vlev))|Dmitry|FSB|GRU|KGB|Kislyak|Kremlin|Kuznetsova|Lavrov|Lukoil|Moscow|(Oleg\s*)?Deripaska|Oleksandr Vilkul|Rosneft|RT|St.?\s*?Petersburg|Russian?|Sberbank|Soviet(\s*Union)?|USSR|(Vladimir\s*)?(Putin|Yudashkin)|Women\s*Empowerment|Xitrans',
437
479
  emailers = {
438
480
  MASHA_DROKOVA: 'silicon valley VC, former Putin Youth',
439
481
  RENATA_BOLOTOVA: 'former aspiring model, now fund manager at New York State Insurance Fund',
440
482
  SVETLANA_POZHIDAEVA: f'Epstein\'s Russian assistant who was recommended for a visa by Sergei Belyakov (FSB) and {DAVID_BLAINE}',
441
483
  }
442
484
  ),
443
- HighlightedGroup(
444
- label='scholar',
485
+ HighlightedNames(
486
+ label=ACADEMIA,
445
487
  style='light_goldenrod2',
446
488
  pattern=r'Alain Forget|Brotherton|Carl\s*Sagan|Columbia|David Grosof|J(ames|im)\s*Watson|(Lord\s*)?Martin\s*Rees|Massachusetts\s*Institute\s*of\s*Technology|MIT(\s*Media\s*Lab)?|Media\s*Lab|Minsky|((Noam|Valeria)\s*)?Chomsky|Praluent|Regeneron|(Richard\s*)?Dawkins|Sanofi|Stanford|(Stephen\s*)?Hawking|(Steven?\s*)?Pinker|UCLA',
447
489
  emailers = {
@@ -457,12 +499,12 @@ HIGHLIGHTED_GROUPS = [
457
499
  ROGER_SCHANK: 'Teachers College, Columbia University',
458
500
  },
459
501
  ),
460
- HighlightedGroup(
502
+ HighlightedNames(
461
503
  label='southeast_asia',
462
504
  style='light_salmon3 bold',
463
505
  pattern=r'Bangkok|Burm(a|ese)|Cambodian?|Laos|Malaysian?|Myan?mar|Thai(land)?|Vietnam(ese)?',
464
506
  ),
465
- HighlightedGroup(
507
+ HighlightedNames(
466
508
  label='tech bro',
467
509
  style='bright_cyan',
468
510
  pattern=r"AG?I|Chamath|Palihapitiya|Danny\s*Hillis|Drew\s*Houston|Eric\s*Schmidt|Greylock(\s*Partners)?|(?<!(ustin|Moshe)\s)Hoffmand?|LinkedIn|(Mark\s*)?Zuckerberg|Masa(yoshi)?(\sSon)?|Najeev|Nathan\s*Myhrvold|Palantir|(Peter\s)?Th(ie|ei)l|Pierre\s*Omidyar|Sergey\s*Brin|Silicon\s*Valley|Softbank|SpaceX|Tim\s*Ferriss?|WikiLeak(ed|s)",
@@ -474,7 +516,7 @@ HIGHLIGHTED_GROUPS = [
474
516
  STEVEN_SINOFSKY: 'ex-Microsoft, loves bitcoin',
475
517
  },
476
518
  ),
477
- HighlightedGroup(
519
+ HighlightedNames(
478
520
  label='trump',
479
521
  style='red3 bold',
480
522
  pattern=r"@?realDonaldTrump|(Alan\s*)?Weiss?elberg|\bDJ?T\b|Donald J. Tramp|(Donald\s+(J\.\s+)?)?Trump(ism|\s*Properties)?|Don(ald| *Jr)(?! Rubin)|Ivana|(Madeleine\s*)?Westerhout|Mar[-\s]*a[-\s]*Lago|(Marla\s*)?Maples|(Matt(hew)? )?Calamari|\bMatt C\b|Melania|(Michael (J.? )?)?Boccio|Roger\s+Stone|rona|(The\s*)?Art\s*of\s*the\s*Deal",
@@ -482,13 +524,13 @@ HIGHLIGHTED_GROUPS = [
482
524
  'Bruce Moskowitz': "'Trump's health guy' according to Epstein",
483
525
  },
484
526
  ),
485
- HighlightedGroup(
527
+ HighlightedNames(
486
528
  label='victim',
487
529
  style='orchid1',
488
530
  pattern=r'BVI|(Jane|Tiffany)\s*Doe|Katie\s*Johnson|(Virginia\s+((L\.?|Roberts)\s+)?)?Giuffre|Virginia\s+Roberts',
489
531
  ),
490
- HighlightedGroup(
491
- label='victim lawyer',
532
+ HighlightedNames(
533
+ label='victim_lawyer',
492
534
  style='dark_magenta bold',
493
535
  pattern=r'(Alan(\s*P.)?|MINTZ)\s*FRAADE|Paul\s*(G.\s*)?Cassell|Rothstein\s*Rosenfeldt\s*Adler|(Scott\s*)?Rothstein|(J\.?\s*)?(Stan(ley)?\s*)?Pottinger',
494
536
  emailers = {
@@ -496,10 +538,10 @@ HIGHLIGHTED_GROUPS = [
496
538
  JACK_SCAROLA: 'Searcy Denney Scarola Barnhart & Shipley',
497
539
  }
498
540
  ),
499
- HighlightedGroup(
541
+ HighlightedNames(
500
542
  label=VIRGIN_ISLANDS,
501
543
  style='sea_green1',
502
- pattern=r'Bahamas|Dominican\s*Republic|(Great|Little)\s*St.?\s*James|Haiti(an)?|(John\s*)deJongh(\s*Jr\.?)|(Kenneth E\. )?Mapp|Palm\s*Beach(?!\s*Post)|PBI|S(ain)?t.?\s*Thomas|USVI|VI|(The\s*)?Virgin\s*Islands(\s*Daily\s*News)?', # TODO: VI Daily News should be yellow but it's hard bc Daily News xists
544
+ pattern=r'Bahamas|Caribb?ean|Dominican\s*Republic|(Great|Little)\s*St.?\s*James|Haiti(an)?|(John\s*)deJongh(\s*Jr\.?)|(Kenneth E\. )?Mapp|Palm\s*Beach(?!\s*Post)|PBI|S(ain)?t.?\s*Thomas|USVI|VI|(The\s*)?Virgin\s*Islands(\s*Daily\s*News)?', # TODO: VI Daily News should be yellow but it's hard bc Daily News xists
503
545
  emailers = {
504
546
  CECILE_DE_JONGH: f'First lady 2007-2015',
505
547
  STACEY_PLASKETT: 'non-voting member of Congress',
@@ -508,113 +550,123 @@ HIGHLIGHTED_GROUPS = [
508
550
  ),
509
551
 
510
552
  # Individuals
511
- HighlightedGroup(
553
+ HighlightedNames(
512
554
  label=BILL_GATES,
513
555
  style='turquoise4',
514
556
  pattern=r'BG|b?g?C3|(Bill\s*((and|or)\s*Melinda\s*)?)?Gates|Melinda(\s*Gates)?|Microsoft|MSFT',
515
557
  emailers = {
516
- BORIS_NIKOLIC: f'biotech VC partner of {BILL_GATES}, {ESTATE_EXECUTOR}',
558
+ BORIS_NIKOLIC: f'biotech VC partner of {BILL_GATES}, {EPSTEIN_ESTATE_EXECUTOR}',
517
559
  },
518
560
  ),
519
- HighlightedGroup(
561
+ HighlightedNames(
520
562
  label=STEVE_BANNON,
521
563
  style='color(58)',
522
564
  pattern=r'((Steve|Sean)\s*)?Bannon?',
523
565
  ),
524
- HighlightedGroup(
566
+ HighlightedNames(
525
567
  emailers={STEVEN_HOFFENBERG: HEADER_ABBREVIATIONS['Hoffenberg']},
526
568
  pattern=r'(steven?\s*)?hoffenberg?w?',
527
569
  style='gold3'
528
570
  ),
529
- HighlightedGroup(emailers={GHISLAINE_MAXWELL: None}, pattern='gmax(1@ellmax.com)?|TerraMar', style='deep_pink3'),
530
- HighlightedGroup(emailers={JABOR_Y: HEADER_ABBREVIATIONS['Jabor']}, style='spring_green1'),
531
- HighlightedGroup(emailers={JEFFREY_EPSTEIN: None}, pattern='JEGE|LSJ|Mark (L. )?Epstein', style='blue1'),
532
- HighlightedGroup(emailers={JOI_ITO: 'former head of MIT Media Lab'}, style='gold1'),
533
- HighlightedGroup(emailers={KATHRYN_RUEMMLER: 'former Obama legal counsel'}, style='magenta2'),
534
- HighlightedGroup(emailers={MELANIE_WALKER: 'doctor'}, style='pale_violet_red1'),
535
- HighlightedGroup(emailers={PAULA: "Epstein's ex-girlfriend who is now in the opera"}, label='paula_heil_fisher', style='pink1'),
536
- HighlightedGroup(emailers={PRINCE_ANDREW: 'British royal family'}, style='dodger_blue1'),
537
- HighlightedGroup(emailers={SOON_YI_PREVIN: "wife of Woody Allen"}, style='hot_pink'),
538
- HighlightedGroup(emailers={SULTAN_BIN_SULAYEM: 'CEO of DP World, chairman of ports in Dubai'}, style='green1'),
539
-
540
- # Highlight regexes for things other than names, only used by RegexHighlighter pattern matching
541
- HighlightedGroup(
571
+ HighlightedNames(emailers={GHISLAINE_MAXWELL: None}, pattern='gmax(1@ellmax.com)?|TerraMar', style='deep_pink3'),
572
+ HighlightedNames(emailers={JABOR_Y: HEADER_ABBREVIATIONS['Jabor']}, style='spring_green1'),
573
+ HighlightedNames(emailers={JEFFREY_EPSTEIN: None}, pattern='JEGE|LSJ|Mark (L. )?Epstein', style='blue1'),
574
+ HighlightedNames(emailers={JOI_ITO: 'former head of MIT Media Lab'}, style='gold1'),
575
+ HighlightedNames(emailers={KATHRYN_RUEMMLER: 'former Obama legal counsel'}, style='magenta2'),
576
+ HighlightedNames(emailers={MELANIE_WALKER: 'doctor'}, style='pale_violet_red1'),
577
+ HighlightedNames(emailers={PAULA: "Epstein's ex-girlfriend who is now in the opera"}, label='paula_heil_fisher', style='pink1'),
578
+ HighlightedNames(emailers={PRINCE_ANDREW: 'British royal family'}, style='dodger_blue1'),
579
+ HighlightedNames(emailers={SOON_YI_PREVIN: "wife of Woody Allen"}, style='hot_pink'),
580
+ HighlightedNames(emailers={SULTAN_BIN_SULAYEM: 'CEO of DP World, chairman of ports in Dubai'}, style='green1'),
581
+ HighlightedText(label='unknown', style='cyan', pattern=r'\(unknown\)'), # HighlightedText bc of word boundary issue
582
+ ]
583
+
584
+ # Highlight regexes for things other than names, only used by RegexHighlighter pattern matching
585
+ HIGHLIGHTED_TEXTS = [
586
+ HighlightedText(
542
587
  label='header_field',
543
588
  style='plum4',
544
- pattern='^(Date|From|Sent|To|C[cC]|Importance|Subject|Bee|B[cC]{2}|Attachments):',
545
- is_multiline=True,
589
+ pattern=r'^(Date|From|Sent|To|C[cC]|Importance|Subject|Bee|B[cC]{2}|Attachments):',
546
590
  ),
547
- HighlightedGroup(
591
+ HighlightedText(
548
592
  label='http_links',
549
593
  style=f'{ARCHIVE_LINK_COLOR} underline',
550
594
  pattern=r"https?:[^\s]+",
551
- is_multiline=True,
552
595
  ),
553
- HighlightedGroup(
596
+ HighlightedText(
554
597
  label='phone_number',
555
598
  style='bright_green',
556
599
  pattern=r"\+?(1?\(?\d{3}\)?[- ]\d{3}[- ]\d{4}|\d{2}[- ]\(?0?\)?\d{2}[- ]\d{4}[- ]\d{4})|[\d+]{10,12}",
557
- is_multiline=True,
558
600
  ),
559
- HighlightedGroup(
601
+ HighlightedText(
560
602
  label='quoted_reply_line',
561
603
  style='dim',
562
604
  pattern=REPLY_REGEX.pattern,
563
- is_multiline=True,
564
605
  ),
565
- HighlightedGroup(
606
+ HighlightedText(
566
607
  label='redacted',
567
608
  style='grey58',
568
- pattern=REDACTED,
569
- is_multiline=True,
609
+ pattern=fr"{REDACTED}|Privileged - Redacted",
570
610
  ),
571
- HighlightedGroup(
611
+ HighlightedText(
572
612
  label='sent_from',
573
613
  style='gray42 italic',
574
614
  pattern=SENT_FROM_REGEX.pattern,
575
- is_multiline=True,
576
615
  ),
577
- HighlightedGroup(
616
+ HighlightedText(
578
617
  label='snipped_signature',
579
618
  style='gray19',
580
619
  pattern=r'<\.\.\.(snipped|trimmed).*\.\.\.>',
581
- is_multiline=True,
582
620
  ),
583
- HighlightedGroup(
621
+ HighlightedText(
584
622
  label='timestamp_2',
585
623
  style=TIMESTAMP_STYLE,
586
624
  pattern=r"\d{1,4}[-/]\d{1,2}[-/]\d{2,4} \d{1,2}:\d{2}:\d{2}( [AP]M)?",
587
- is_multiline=True,
588
- ),
589
- HighlightedGroup(
590
- label='unknown',
591
- style='cyan',
592
- pattern=r'\(unknown\)',
593
- is_multiline=True,
594
625
  ),
595
626
  ]
596
627
 
628
+ ALL_HIGHLIGHTS = HIGHLIGHTED_NAMES + HIGHLIGHTED_TEXTS
597
629
 
598
- class InterestingNamesHighlighter(RegexHighlighter):
630
+
631
+ class EpsteinHighlighter(RegexHighlighter):
599
632
  """rich.highlighter that finds and colors interesting keywords based on the above config."""
600
633
  base_style = f"{REGEX_STYLE_PREFIX}."
601
- highlights = [highlight_group.regex for highlight_group in HIGHLIGHTED_GROUPS]
634
+ highlights = [highlight_group.regex for highlight_group in ALL_HIGHLIGHTS]
602
635
 
603
636
 
604
637
  def get_info_for_name(name: str) -> str | None:
605
638
  highlight_group = _get_highlight_group_for_name(name)
606
639
 
607
- if highlight_group:
640
+ if highlight_group and isinstance(highlight_group, HighlightedNames):
608
641
  return highlight_group.get_info(name)
609
642
 
610
643
 
644
+ def get_style_for_category(category: str) -> str | None:
645
+ if category in [CONFERENCE, SPEECH]:
646
+ return f"{get_style_for_category(ACADEMIA)} dim"
647
+ elif category == JSON:
648
+ return 'dark_red'
649
+ elif category == JUNK:
650
+ return 'grey19'
651
+ elif category == 'letter':
652
+ return 'medium_orchid1'
653
+ elif category == SOCIAL:
654
+ return f"{get_style_for_category(PUBLICIST)} dim"
655
+
656
+ category = CATEGORY_LABEL_MAPPING.get(category, category)
657
+
658
+ for highlight_group in HIGHLIGHTED_NAMES:
659
+ if highlight_group.label == category:
660
+ return highlight_group.style
661
+
662
+
611
663
  def get_style_for_name(name: str | None, default_style: str = DEFAULT, allow_bold: bool = True) -> str:
612
664
  highlight_group = _get_highlight_group_for_name(name or UNKNOWN)
613
665
  style = highlight_group.style if highlight_group else default_style
614
666
  return style if allow_bold else style.replace('bold', '').strip()
615
667
 
616
668
 
617
- def _get_highlight_group_for_name(name: str) -> HighlightedGroup | None:
618
- for highlight_group in HIGHLIGHTED_GROUPS:
669
+ def _get_highlight_group_for_name(name: str) -> HighlightedNames | None:
670
+ for highlight_group in HIGHLIGHTED_NAMES:
619
671
  if highlight_group.regex.search(name):
620
672
  return highlight_group