epstein-files 1.1.2__py3-none-any.whl → 1.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +17 -20
- epstein_files/documents/communication.py +3 -3
- epstein_files/documents/document.py +3 -0
- epstein_files/documents/email.py +75 -64
- epstein_files/documents/imessage/text_message.py +5 -9
- epstein_files/documents/messenger_log.py +2 -2
- epstein_files/epstein_files.py +17 -15
- epstein_files/util/constant/names.py +39 -38
- epstein_files/util/constant/strings.py +1 -0
- epstein_files/util/constants.py +65 -9
- epstein_files/util/data.py +9 -1
- epstein_files/util/doc_cfg.py +8 -2
- epstein_files/util/env.py +11 -1
- epstein_files/util/file_helper.py +4 -1
- epstein_files/util/highlighted_group.py +99 -52
- epstein_files/util/output.py +112 -94
- epstein_files/util/rich.py +28 -35
- epstein_files/util/word_count.py +1 -2
- {epstein_files-1.1.2.dist-info → epstein_files-1.1.5.dist-info}/METADATA +4 -1
- epstein_files-1.1.5.dist-info/RECORD +33 -0
- epstein_files-1.1.2.dist-info/RECORD +0 -33
- {epstein_files-1.1.2.dist-info → epstein_files-1.1.5.dist-info}/LICENSE +0 -0
- {epstein_files-1.1.2.dist-info → epstein_files-1.1.5.dist-info}/WHEEL +0 -0
- {epstein_files-1.1.2.dist-info → epstein_files-1.1.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import re
|
|
3
|
-
from abc import ABC
|
|
4
3
|
from dataclasses import dataclass, field
|
|
5
4
|
|
|
6
5
|
from rich.highlighter import RegexHighlighter
|
|
@@ -12,7 +11,7 @@ from epstein_files.util.constant.urls import ARCHIVE_LINK_COLOR
|
|
|
12
11
|
from epstein_files.util.constants import (EMAILER_ID_REGEXES, EPSTEIN_V_ROTHSTEIN_EDWARDS,
|
|
13
12
|
OSBORNE_LLP, REPLY_REGEX, SENT_FROM_REGEX, VIRGIN_ISLANDS)
|
|
14
13
|
from epstein_files.util.doc_cfg import *
|
|
15
|
-
from epstein_files.util.data import extract_last_name, without_falsey
|
|
14
|
+
from epstein_files.util.data import extract_first_name, extract_last_name, without_falsey
|
|
16
15
|
from epstein_files.util.logging import logger
|
|
17
16
|
|
|
18
17
|
CIVIL_ATTORNEY = 'civil attorney'
|
|
@@ -38,7 +37,6 @@ CATEGORY_STYLE_MAPPING = {
|
|
|
38
37
|
|
|
39
38
|
CATEGORY_STYLES = {
|
|
40
39
|
JSON: 'dark_red',
|
|
41
|
-
JUNK: 'grey19',
|
|
42
40
|
'letter': 'medium_orchid1'
|
|
43
41
|
}
|
|
44
42
|
|
|
@@ -46,12 +44,11 @@ CATEGORY_STYLES = {
|
|
|
46
44
|
@dataclass(kw_only=True)
|
|
47
45
|
class BaseHighlight:
|
|
48
46
|
"""
|
|
49
|
-
Regex and style information.
|
|
47
|
+
Regex and style information for things we want to highlight.
|
|
50
48
|
|
|
51
49
|
Attributes:
|
|
52
|
-
label (str): RegexHighlighter match group name
|
|
50
|
+
label (str): RegexHighlighter match group name
|
|
53
51
|
pattern (str): regex pattern identifying strings matching this group
|
|
54
|
-
regex (re.Pattern): matches self.pattern
|
|
55
52
|
style (str): Rich style to apply to text matching this group
|
|
56
53
|
theme_style_name (str): The style name that must be a part of the rich.Console's theme
|
|
57
54
|
"""
|
|
@@ -79,9 +76,6 @@ class HighlightedText(BaseHighlight):
|
|
|
79
76
|
Attributes:
|
|
80
77
|
label (str): RegexHighlighter match group name, defaults to 1st 'emailers' key if only 1 emailer provided
|
|
81
78
|
patterns (list[str]): regex patterns identifying strings matching this group
|
|
82
|
-
regex (re.Pattern): matches self.pattern
|
|
83
|
-
style (str): Rich style to apply to text matching this group
|
|
84
|
-
theme_style_name (str): The style name that must be a part of the rich.Console's theme
|
|
85
79
|
"""
|
|
86
80
|
patterns: list[str] = field(default_factory=list)
|
|
87
81
|
_pattern: str = field(init=False)
|
|
@@ -112,6 +106,7 @@ class HighlightedNames(HighlightedText):
|
|
|
112
106
|
"""
|
|
113
107
|
category: str = ''
|
|
114
108
|
emailers: dict[str, str | None] = field(default_factory=dict)
|
|
109
|
+
should_match_first_last_name: bool = True
|
|
115
110
|
|
|
116
111
|
def __post_init__(self):
|
|
117
112
|
if not (self.emailers or self.patterns):
|
|
@@ -138,9 +133,12 @@ class HighlightedNames(HighlightedText):
|
|
|
138
133
|
|
|
139
134
|
def _emailer_pattern(self, name: str) -> str:
|
|
140
135
|
"""Pattern matching 'name'. Extends value in EMAILER_ID_REGEXES with first/last name if it exists."""
|
|
136
|
+
if not self.should_match_first_last_name:
|
|
137
|
+
return name
|
|
138
|
+
|
|
141
139
|
name = remove_question_marks(name)
|
|
140
|
+
first_name = extract_first_name(name)
|
|
142
141
|
last_name = extract_last_name(name)
|
|
143
|
-
first_name = name.removesuffix(f" {last_name}")
|
|
144
142
|
|
|
145
143
|
if name in EMAILER_ID_REGEXES:
|
|
146
144
|
pattern = EMAILER_ID_REGEXES[name].pattern
|
|
@@ -221,10 +219,11 @@ HIGHLIGHTED_NAMES = [
|
|
|
221
219
|
JOSCHA_BACH: 'cognitive science / AI research',
|
|
222
220
|
'Daniel Kahneman': 'Nobel economic sciences laureate and cognitivie psychologist (?)',
|
|
223
221
|
'Ed Boyden': f'Associate Professor, {MIT_MEDIA_LAB} neurobiology',
|
|
222
|
+
'Harry Fisch': "men's health expert at New York-Presbyterian / Weill Cornell (?)",
|
|
224
223
|
LAWRENCE_KRAUSS: 'theoretical physicist',
|
|
225
224
|
LINDA_STONE: f'ex-Microsoft, {MIT_MEDIA_LAB}',
|
|
226
225
|
MARK_TRAMO: 'professor of neurology at UCLA',
|
|
227
|
-
'Nancy Dahl': 'wife of
|
|
226
|
+
'Nancy Dahl': f'wife of {LAWRENCE_KRAUSS}',
|
|
228
227
|
NEAL_KASSELL: 'professor of neurosurgery at University of Virginia',
|
|
229
228
|
PETER_ATTIA: 'longevity medicine',
|
|
230
229
|
ROBERT_TRIVERS: 'evolutionary biology',
|
|
@@ -235,20 +234,23 @@ HIGHLIGHTED_NAMES = [
|
|
|
235
234
|
r"Brotherton",
|
|
236
235
|
r"Carl\s*Sagan",
|
|
237
236
|
r"Columbia",
|
|
237
|
+
r"Dan(iel|ny) Kahneman",
|
|
238
238
|
r"David Grosof",
|
|
239
239
|
r"J(ames|im)\s*Watson",
|
|
240
240
|
r"(Lord\s*)?Martin\s*Rees",
|
|
241
241
|
r"Massachusetts\s*Institute\s*of\s*Technology",
|
|
242
242
|
r"Media\s*Lab",
|
|
243
|
-
r"Minsky",
|
|
243
|
+
r"(Marvin\s*)?Minsky",
|
|
244
244
|
r"MIT(\s*Media\s*Lab)?",
|
|
245
245
|
r"((Noam|Valeria)\s*)?Chomsky",
|
|
246
246
|
r"Norman\s*Finkelstein",
|
|
247
|
+
r"Oxford(?! Analytica)",
|
|
247
248
|
r"Praluent",
|
|
249
|
+
r"Princeton(\s*University)?",
|
|
248
250
|
r"Regeneron",
|
|
249
251
|
r"(Richard\s*)?Dawkins",
|
|
250
252
|
r"Sanofi",
|
|
251
|
-
r"Stanford",
|
|
253
|
+
r"Stanford(\s*University)?(\s*Hospital)?",
|
|
252
254
|
r"(Stephen\s*)?Hawking",
|
|
253
255
|
r"(Steven?\s*)?Pinker",
|
|
254
256
|
r"UCLA",
|
|
@@ -287,18 +289,19 @@ HIGHLIGHTED_NAMES = [
|
|
|
287
289
|
label='bitcoin',
|
|
288
290
|
style='orange1 bold',
|
|
289
291
|
emailers={
|
|
290
|
-
|
|
292
|
+
JEFFREY_WERNICK: 'former COO of Parler, involved in numerous crypto companies like Bitforex',
|
|
291
293
|
JEREMY_RUBIN: 'developer/researcher',
|
|
292
294
|
JOI_ITO: f"former head of {MIT_MEDIA_LAB} and MIT Digital Currency Initiative",
|
|
293
295
|
ANTHONY_SCARAMUCCI: 'Skybridge Capital, FTX investor',
|
|
294
296
|
},
|
|
295
297
|
patterns=[
|
|
296
298
|
r"Balaji",
|
|
297
|
-
r"bitcoin",
|
|
299
|
+
r"bitcoin(\s*Foundation)?",
|
|
298
300
|
r"block ?chain(\s*capital)?",
|
|
299
301
|
r"Brock(\s*Pierce)?",
|
|
300
302
|
r"coins?",
|
|
301
303
|
r"cr[iy]?pto(currenc(y|ies))?",
|
|
304
|
+
r"Digital\s*Currenc(ies|y)(\s*Initiative)?",
|
|
302
305
|
r"e-currency",
|
|
303
306
|
r"(Gavin )?Andressen",
|
|
304
307
|
r"(Howard\s+)?Lutnic?k",
|
|
@@ -379,12 +382,11 @@ HIGHLIGHTED_NAMES = [
|
|
|
379
382
|
r"Ministry\sof\sState\sSecurity",
|
|
380
383
|
r"Mongolian?",
|
|
381
384
|
r"MSS",
|
|
382
|
-
r"North\s*
|
|
385
|
+
r"North\s*Korean?",
|
|
383
386
|
r"Peking",
|
|
384
387
|
r"PRC",
|
|
385
388
|
r"Pyongyang",
|
|
386
389
|
r"SCMP",
|
|
387
|
-
r"Tai(pei|wan)",
|
|
388
390
|
r"Xi(aomi)?",
|
|
389
391
|
r"Jinping",
|
|
390
392
|
],
|
|
@@ -400,6 +402,9 @@ HIGHLIGHTED_NAMES = [
|
|
|
400
402
|
HighlightedNames(
|
|
401
403
|
label='Democrat',
|
|
402
404
|
style='sky_blue1',
|
|
405
|
+
emailers={
|
|
406
|
+
PAUL_PROSPERI: 'friend of Bill Clinton',
|
|
407
|
+
},
|
|
403
408
|
patterns=[
|
|
404
409
|
r"(Al\s*)?Franken",
|
|
405
410
|
r"(Barac?k )?Obama",
|
|
@@ -413,6 +418,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
413
418
|
r"Eric Holder",
|
|
414
419
|
r"George\s*Mitchell",
|
|
415
420
|
r"(George\s*)?Soros",
|
|
421
|
+
r"Hakeem\s*Jeffries",
|
|
416
422
|
r"Hill?ary",
|
|
417
423
|
r"HRC",
|
|
418
424
|
r"(Jo(e|seph)\s*)?Biden",
|
|
@@ -438,7 +444,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
438
444
|
),
|
|
439
445
|
HighlightedNames(
|
|
440
446
|
label='employee',
|
|
441
|
-
style='
|
|
447
|
+
style='medium_purple4',
|
|
442
448
|
emailers={
|
|
443
449
|
'Alfredo Rodriguez': "Epstein's butler, stole the journal",
|
|
444
450
|
ERIC_ROTH: 'jet decorator',
|
|
@@ -451,7 +457,11 @@ HIGHLIGHTED_NAMES = [
|
|
|
451
457
|
NADIA_MARCINKO: 'pilot',
|
|
452
458
|
'Sean J. Lancaster': 'airplane reseller',
|
|
453
459
|
},
|
|
454
|
-
patterns=[
|
|
460
|
+
patterns=[
|
|
461
|
+
r"Adriana\s*Ross",
|
|
462
|
+
r"Merwin",
|
|
463
|
+
r"(Sarah\s*)?Kellen", r"Vickers", # Married name is Metiers
|
|
464
|
+
],
|
|
455
465
|
),
|
|
456
466
|
HighlightedNames(
|
|
457
467
|
label=ENTERTAINER,
|
|
@@ -495,6 +505,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
495
505
|
style='purple',
|
|
496
506
|
emailers={
|
|
497
507
|
'Alan S Halperin': 'partner at Paul, Weiss',
|
|
508
|
+
ALAN_DERSHOWITZ: 'Harvard Law School professor and all around (in)famous American lawyer',
|
|
498
509
|
ARDA_BESKARDES: 'NYC immigration attorney allegedly involved in sex-trafficking operations',
|
|
499
510
|
BENNET_MOSKOWITZ: f'represented the {EPSTEIN_ESTATE_EXECUTOR}s',
|
|
500
511
|
BRAD_KARP: 'head of the law firm Paul Weiss',
|
|
@@ -506,6 +517,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
506
517
|
JACKIE_PERCZEK: CRIMINAL_DEFENSE_2008,
|
|
507
518
|
JAY_LEFKOWITZ: f"Kirkland & Ellis partner, {CRIMINAL_DEFENSE_2008}",
|
|
508
519
|
JESSICA_CADWELL: f'paralegal to {ROBERT_D_CRITTON_JR}', # house_oversight_030464
|
|
520
|
+
KEN_STARR: 'head of the Monica Lewinsky investigation against Bill Clinton',
|
|
509
521
|
LILLY_SANCHEZ: CRIMINAL_DEFENSE_ATTORNEY,
|
|
510
522
|
MARTIN_WEINBERG: CRIMINAL_DEFENSE_ATTORNEY,
|
|
511
523
|
MICHAEL_MILLER: 'Steptoe LLP partner',
|
|
@@ -518,6 +530,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
518
530
|
},
|
|
519
531
|
patterns=[
|
|
520
532
|
r"(Barry (E. )?)?Krischer",
|
|
533
|
+
r"dersh",
|
|
521
534
|
r"Kate Kelly",
|
|
522
535
|
r"Kirkland\s*&\s*Ellis",
|
|
523
536
|
r"(Leon\s*)?Jaworski",
|
|
@@ -541,6 +554,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
541
554
|
style='light_sky_blue3',
|
|
542
555
|
emailers={
|
|
543
556
|
ANDRZEJ_DUDA: 'former president of Poland',
|
|
557
|
+
'Fabrice Aidan': f'diplomat who worked with {TERJE_ROD_LARSEN}',
|
|
544
558
|
MIROSLAV_LAJCAK: 'Russia-friendly Slovakian politician, friend of Steve Bannon',
|
|
545
559
|
PETER_MANDELSON: 'UK politics',
|
|
546
560
|
TERJE_ROD_LARSEN: 'Norwegian diplomat',
|
|
@@ -592,27 +606,13 @@ HIGHLIGHTED_NAMES = [
|
|
|
592
606
|
r"Strasbourg",
|
|
593
607
|
r"Strauss[- ]?Kahn",
|
|
594
608
|
r"Swed(en|ish)(?![-\s]+American Life Scienc)",
|
|
595
|
-
r"
|
|
609
|
+
r"Swi(ss|tzerland)",
|
|
596
610
|
r"(Tony\s)?Blair",
|
|
597
611
|
r"U\.K\.",
|
|
598
612
|
r"Ukrain(e|ian)",
|
|
599
613
|
r"Vienna",
|
|
600
614
|
r"Zug",
|
|
601
|
-
|
|
602
|
-
),
|
|
603
|
-
HighlightedNames(
|
|
604
|
-
label='famous lawyer',
|
|
605
|
-
style='medium_purple3',
|
|
606
|
-
emailers={
|
|
607
|
-
ALAN_DERSHOWITZ: 'Harvard Law School professor and all around (in)famous American lawyer',
|
|
608
|
-
'Fabrice Aidan': f'diplomat who worked with {TERJE_ROD_LARSEN}',
|
|
609
|
-
KEN_STARR: 'head of the Monica Lewinsky investigation against Bill Clinton',
|
|
610
|
-
},
|
|
611
|
-
patterns=[
|
|
612
|
-
r"(David\s*)?Bo[il]es",
|
|
613
|
-
r"dersh",
|
|
614
|
-
r"(Gloria\s*)?Allred",
|
|
615
|
-
r"(Mi(chael|ke)\s*)?Avenatti",
|
|
615
|
+
r"Zurich",
|
|
616
616
|
],
|
|
617
617
|
),
|
|
618
618
|
HighlightedNames(
|
|
@@ -630,10 +630,12 @@ HIGHLIGHTED_NAMES = [
|
|
|
630
630
|
MARC_LEON: 'Luxury Properties Sari Morrocco',
|
|
631
631
|
MELANIE_SPINELLA: 'representative of Leon Black',
|
|
632
632
|
MORTIMER_ZUCKERMAN: 'business partner of Epstein, newspaper publisher',
|
|
633
|
+
NORMAN_D_RAU: 'managing director at Morgan Stanley',
|
|
633
634
|
PAUL_BARRETT: None,
|
|
634
635
|
PAUL_MORRIS: DEUTSCHE_BANK,
|
|
635
636
|
'Skip Rimer': 'Milken Institute (Michael Milken)',
|
|
636
637
|
'Steven Elkman': DEUTSCHE_BANK,
|
|
638
|
+
'Vahe Stepanian': 'Cetera Financial Group',
|
|
637
639
|
},
|
|
638
640
|
patterns=[
|
|
639
641
|
r"((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?",
|
|
@@ -662,7 +664,8 @@ HIGHLIGHTED_NAMES = [
|
|
|
662
664
|
r"j\.?p\.?\s*morgan(\.?com|\s*Chase)?",
|
|
663
665
|
r"Madoff",
|
|
664
666
|
r"Merrill(\s*Lynch)?",
|
|
665
|
-
r"(Michael\s*)?
|
|
667
|
+
r"(Michael\s*)?Cembalest",
|
|
668
|
+
r"(Mi(chael|ke)\s*)?Milken(\s*Conference)?",
|
|
666
669
|
r"Mizrahi\s*Bank",
|
|
667
670
|
r"MLPF&S",
|
|
668
671
|
r"Morgan Stanley",
|
|
@@ -689,6 +692,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
689
692
|
},
|
|
690
693
|
patterns=[
|
|
691
694
|
r"Andrew Farkas",
|
|
695
|
+
r"Jonanthan and Kimberly Farkus",
|
|
692
696
|
r"Thomas\s*(J\.?\s*)?Barrack(\s*Jr)?",
|
|
693
697
|
],
|
|
694
698
|
),
|
|
@@ -795,23 +799,26 @@ HIGHLIGHTED_NAMES = [
|
|
|
795
799
|
r"Arianna(\s*Huffington)?",
|
|
796
800
|
r"(Arthur\s*)?Kretchmer",
|
|
797
801
|
r'Associated\s*Press',
|
|
802
|
+
r"Axios",
|
|
798
803
|
r"BBC",
|
|
799
|
-
r"Bloomberg",
|
|
800
804
|
r"Breitbart",
|
|
801
805
|
r"Charlie\s*Rose",
|
|
802
806
|
r"China\s*Daily",
|
|
803
807
|
r"CNBC",
|
|
804
808
|
r"CNN(politics?)?",
|
|
805
|
-
r"Con[cs]hita",
|
|
806
|
-
r"
|
|
809
|
+
r"Con[cs]hita", r"Sarnoff",
|
|
810
|
+
r"Daily Business Review",
|
|
807
811
|
r"(?<!Virgin[-\s]Islands[-\s])Daily\s*(Beast|Mail|News|Telegraph)",
|
|
808
812
|
r"(David\s*)?(Pecker|Pegg)",
|
|
809
813
|
r"David\s*Brooks",
|
|
810
814
|
r"Ed\s*Krassenstein",
|
|
811
815
|
r"(Emily\s*)?Michot",
|
|
812
816
|
r"Ezra\s*Klein",
|
|
817
|
+
r"Fox\s*News(\.com)?",
|
|
818
|
+
r"FrontPage Magazine",
|
|
813
819
|
r"FT",
|
|
814
820
|
r"(George\s*)?Stephanopoulus",
|
|
821
|
+
r"Ger(ald|ry)\s*Baker",
|
|
815
822
|
r"Globe\s*and\s*Mail",
|
|
816
823
|
r"Good\s*Morning\s*America",
|
|
817
824
|
r"Graydon(\s*Carter)?",
|
|
@@ -823,9 +830,14 @@ HIGHLIGHTED_NAMES = [
|
|
|
823
830
|
r"(Katie\s*)?Couric",
|
|
824
831
|
r"Keith\s*Larsen",
|
|
825
832
|
r"L\.?A\.?\s*Times",
|
|
833
|
+
r"Law(360|\.com|fare)",
|
|
834
|
+
r"MarketWatch",
|
|
826
835
|
r"Miami\s*Herald",
|
|
836
|
+
r"(Mi(chael|ke)\s*)?Bloomberg",
|
|
827
837
|
r"(Michele\s*)?Dargan",
|
|
838
|
+
r"Morning News USA",
|
|
828
839
|
r"(National\s*)?Enquirer",
|
|
840
|
+
r"Newsweek",
|
|
829
841
|
r"NYer",
|
|
830
842
|
r"Palm\s*Beach\s*(Daily\s*News|Post)",
|
|
831
843
|
r"PERVERSION\s*OF\s*JUSTICE",
|
|
@@ -837,6 +849,8 @@ HIGHLIGHTED_NAMES = [
|
|
|
837
849
|
r"Susan Edelman",
|
|
838
850
|
r"(The\s*)?Financial\s*Times",
|
|
839
851
|
r"The\s*Guardian",
|
|
852
|
+
r"TheHill",
|
|
853
|
+
r"(The\s*)?Mail\s*On\s*Sunday",
|
|
840
854
|
r"(The\s*)?N(ew\s*)?Y(ork\s*)?(P(ost)?|T(imes)?)",
|
|
841
855
|
r"(The\s*)?New\s*Yorker",
|
|
842
856
|
r"(The\s*)?Wall\s*Street\s*Journal",
|
|
@@ -851,6 +865,18 @@ HIGHLIGHTED_NAMES = [
|
|
|
851
865
|
r"[-\w.]+@(bbc|independent|mailonline|mirror|thetimes)\.co\.uk",
|
|
852
866
|
],
|
|
853
867
|
),
|
|
868
|
+
HighlightedNames(
|
|
869
|
+
label=JUNK,
|
|
870
|
+
style='gray46',
|
|
871
|
+
emailers={
|
|
872
|
+
'asmallworld@travel.asmallworld.net': None,
|
|
873
|
+
"digest-noreply@quora.com": None,
|
|
874
|
+
'editorialstaff@flipboard.com': None,
|
|
875
|
+
'How To Academy': None,
|
|
876
|
+
'Jokeland': None,
|
|
877
|
+
},
|
|
878
|
+
should_match_first_last_name=False,
|
|
879
|
+
),
|
|
854
880
|
HighlightedNames(
|
|
855
881
|
label='Latin America',
|
|
856
882
|
style='yellow',
|
|
@@ -860,11 +886,12 @@ HIGHLIGHTED_NAMES = [
|
|
|
860
886
|
r"Bra[sz]il(ian)?",
|
|
861
887
|
r"Caracas",
|
|
862
888
|
r"Castro",
|
|
889
|
+
r"Chile",
|
|
863
890
|
r"Colombian?",
|
|
864
891
|
r"Cuban?",
|
|
865
892
|
r"El\s*Salvador",
|
|
866
893
|
r"((Enrique )?Pena )?Nieto",
|
|
867
|
-
r"Lat(in)?\s*Am(
|
|
894
|
+
r"Lat(in)?\s*Am(erican?)?",
|
|
868
895
|
r"Lula",
|
|
869
896
|
r"Mexic(an|o)",
|
|
870
897
|
r"(Nicolas\s+)?Maduro",
|
|
@@ -984,6 +1011,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
984
1011
|
r"Basiji?",
|
|
985
1012
|
r"Beirut",
|
|
986
1013
|
r"Benghazi",
|
|
1014
|
+
r"Brunei",
|
|
987
1015
|
r"Cairo",
|
|
988
1016
|
r"Chagoury",
|
|
989
1017
|
r"Damascus",
|
|
@@ -1004,7 +1032,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1004
1032
|
r"HBJ",
|
|
1005
1033
|
r"Houthi",
|
|
1006
1034
|
r"Imran\s+Khan",
|
|
1007
|
-
r"Iran(ian)?",
|
|
1035
|
+
r"Iran(ian)?([-\s]Contra)?",
|
|
1008
1036
|
r"Isi[ls]",
|
|
1009
1037
|
r"Islam(abad|ic|ist)?",
|
|
1010
1038
|
r"Istanbul",
|
|
@@ -1047,6 +1075,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1047
1075
|
r"Syrian?",
|
|
1048
1076
|
r"(Tarek\s*)?El\s*Sayed",
|
|
1049
1077
|
r"Tehran",
|
|
1078
|
+
r"Tripoli",
|
|
1050
1079
|
r"Tunisian?",
|
|
1051
1080
|
r"Turk(ey|ish)",
|
|
1052
1081
|
r"UAE",
|
|
@@ -1071,6 +1100,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1071
1100
|
},
|
|
1072
1101
|
patterns=[
|
|
1073
1102
|
r"\w+@mc2mm.com",
|
|
1103
|
+
r"MC2",
|
|
1074
1104
|
r"model(ed|ing)",
|
|
1075
1105
|
r"(Nicole\s*)?Junkerman", # Also a venture fund manager now
|
|
1076
1106
|
],
|
|
@@ -1093,6 +1123,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1093
1123
|
patterns=[
|
|
1094
1124
|
r"(Matt(hew)? )?Hiltzi[gk]",
|
|
1095
1125
|
REPUTATION_MGMT,
|
|
1126
|
+
r"Reputation.com",
|
|
1096
1127
|
],
|
|
1097
1128
|
),
|
|
1098
1129
|
HighlightedNames(
|
|
@@ -1116,7 +1147,6 @@ HIGHLIGHTED_NAMES = [
|
|
|
1116
1147
|
r"(?<!Merwin Dela )Cruz",
|
|
1117
1148
|
r"Devin\s*Nunes",
|
|
1118
1149
|
r"(Don\s*)?McGa[hn]n",
|
|
1119
|
-
r"Fox\s*News",
|
|
1120
1150
|
r"George\s*(H\.?\s*)?(W\.?\s*)?Bush",
|
|
1121
1151
|
r"(George\s*)?Nader",
|
|
1122
1152
|
r"GOP",
|
|
@@ -1153,7 +1183,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1153
1183
|
style='indian_red',
|
|
1154
1184
|
emailers={
|
|
1155
1185
|
ARIANE_DE_ROTHSCHILD: 'heiress',
|
|
1156
|
-
JOHNNY_EL_HACHEM: f'
|
|
1186
|
+
JOHNNY_EL_HACHEM: f'Edmond de Rothschild Private Equity',
|
|
1157
1187
|
},
|
|
1158
1188
|
patterns=['AdR'],
|
|
1159
1189
|
),
|
|
@@ -1201,9 +1231,8 @@ HIGHLIGHTED_NAMES = [
|
|
|
1201
1231
|
r"(Vitaly\s*)?Churkin",
|
|
1202
1232
|
],
|
|
1203
1233
|
),
|
|
1204
|
-
|
|
1205
1234
|
HighlightedNames(
|
|
1206
|
-
label='
|
|
1235
|
+
label='Southeast Asia',
|
|
1207
1236
|
style='light_salmon3 bold',
|
|
1208
1237
|
patterns=[
|
|
1209
1238
|
r"Bangkok",
|
|
@@ -1212,6 +1241,9 @@ HIGHLIGHTED_NAMES = [
|
|
|
1212
1241
|
r"Laos",
|
|
1213
1242
|
r"Malaysian?",
|
|
1214
1243
|
r"Myan?mar",
|
|
1244
|
+
r"Philippines",
|
|
1245
|
+
r"South\s*Korean?",
|
|
1246
|
+
r"Tai(pei|wan)",
|
|
1215
1247
|
r"Thai(land)?",
|
|
1216
1248
|
r"Vietnam(ese)?",
|
|
1217
1249
|
],
|
|
@@ -1235,7 +1267,10 @@ HIGHLIGHTED_NAMES = [
|
|
|
1235
1267
|
r"Eric\s*Schmidt",
|
|
1236
1268
|
r"Greylock(\s*Partners)?",
|
|
1237
1269
|
r"(?<!(ustin|Moshe)\s)Hoffmand?",
|
|
1270
|
+
r"(Jeff\s*)?Bezos",
|
|
1271
|
+
r"Larry Page",
|
|
1238
1272
|
r"LinkedIn",
|
|
1273
|
+
r"(Marc\s*)?Andreess?en",
|
|
1239
1274
|
r"(Mark\s*)?Zuckerberg",
|
|
1240
1275
|
r"Masa(yoshi)?(\sSon)?",
|
|
1241
1276
|
r"Najeev",
|
|
@@ -1252,7 +1287,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1252
1287
|
],
|
|
1253
1288
|
),
|
|
1254
1289
|
HighlightedNames(
|
|
1255
|
-
label='
|
|
1290
|
+
label='Trump',
|
|
1256
1291
|
style='red3 bold',
|
|
1257
1292
|
emailers={
|
|
1258
1293
|
'Bruce Moskowitz': "'Trump's health guy' according to Epstein",
|
|
@@ -1262,8 +1297,8 @@ HIGHLIGHTED_NAMES = [
|
|
|
1262
1297
|
r"(Alan\s*)?Weiss?elberg",
|
|
1263
1298
|
r"\bDJ?T\b",
|
|
1264
1299
|
r"Donald J. Tramp",
|
|
1265
|
-
r"(Donald\s+(J\.\s+)?)?Trump(ism|\s*Properties)?",
|
|
1266
|
-
r"Don(ald| *Jr)(?! Rubin)",
|
|
1300
|
+
r"(Donald\s+(J\.\s+)?)?Trump(ism|\s*(Org(anization)?|Properties)(\s*LLC)?)?",
|
|
1301
|
+
r"Don(ald| *Jr)(?! (B|Rubin))",
|
|
1267
1302
|
r"Ivank?a",
|
|
1268
1303
|
r"Jared",
|
|
1269
1304
|
r"Kushner",
|
|
@@ -1272,8 +1307,10 @@ HIGHLIGHTED_NAMES = [
|
|
|
1272
1307
|
r"(Marla\s*)?Maples",
|
|
1273
1308
|
r"(Matt(hew)? )?Calamari",
|
|
1274
1309
|
r"\bMatt C\b",
|
|
1310
|
+
r"Michael\s*Cohen",
|
|
1275
1311
|
r"Melania",
|
|
1276
1312
|
r"(Michael (J.? )?)?Boccio",
|
|
1313
|
+
r"Paul Rampell",
|
|
1277
1314
|
r"Rebekah\s*Mercer",
|
|
1278
1315
|
r"Roger\s+Stone",
|
|
1279
1316
|
r"rona",
|
|
@@ -1284,6 +1321,8 @@ HIGHLIGHTED_NAMES = [
|
|
|
1284
1321
|
label='victim',
|
|
1285
1322
|
style='orchid1',
|
|
1286
1323
|
patterns=[
|
|
1324
|
+
r"(David\s*)?Bo[il]es",
|
|
1325
|
+
r"(Gloria\s*)?Allred",
|
|
1287
1326
|
r"(Jane|Tiffany)\s*Doe",
|
|
1288
1327
|
r"Katie\s*Johnson",
|
|
1289
1328
|
r"(Virginia\s+((L\.?|Roberts)\s+)?)?Giuffre",
|
|
@@ -1302,6 +1341,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1302
1341
|
patterns=[
|
|
1303
1342
|
r"(Alan(\s*P.)?|MINTZ)\s*FRAADE",
|
|
1304
1343
|
r"(J\.?\s*)?(Stan(ley)?\s*)?Pottinger",
|
|
1344
|
+
r"(Mi(chael|ke)\s*)?Avenatti",
|
|
1305
1345
|
r"Paul\s*(G.\s*)?Cassell",
|
|
1306
1346
|
r"Rothstein\s*Rosenfeldt\s*Adler",
|
|
1307
1347
|
r"(Scott\s*)?Rothstein",
|
|
@@ -1326,6 +1366,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1326
1366
|
r"(John\s*)deJongh(\s*Jr\.?)",
|
|
1327
1367
|
r"(Kenneth E\. )?Mapp",
|
|
1328
1368
|
r"PBI",
|
|
1369
|
+
r"Puerto\s*Ric(an|o)",
|
|
1329
1370
|
r"S(ain)?t.?\s*Thomas",
|
|
1330
1371
|
r"USVI",
|
|
1331
1372
|
r"(?<!Epstein )VI",
|
|
@@ -1342,7 +1383,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1342
1383
|
patterns=[
|
|
1343
1384
|
r"BG",
|
|
1344
1385
|
r"b?g?C3",
|
|
1345
|
-
r"(Bill\s*((and|or)\s*Melinda\s*)?)?Gates",
|
|
1386
|
+
r"(Bill\s*((and|or)\s*Melinda\s*)?)?Gates(\s*Foundation)?",
|
|
1346
1387
|
r"Melinda(\s*Gates)?",
|
|
1347
1388
|
r"Microsoft",
|
|
1348
1389
|
r"MSFT",
|
|
@@ -1357,6 +1398,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1357
1398
|
},
|
|
1358
1399
|
patterns=[
|
|
1359
1400
|
r"(American\s*)?Dharma",
|
|
1401
|
+
r"Biosphere",
|
|
1360
1402
|
r"((Steve|Sean)\s*)?Bannon?",
|
|
1361
1403
|
],
|
|
1362
1404
|
),
|
|
@@ -1371,7 +1413,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1371
1413
|
HighlightedNames(emailers={JEFFREY_EPSTEIN: None}, patterns=[r"JEGE", r"LSJ", r"Mark (L. )?Epstein"], style='blue1'),
|
|
1372
1414
|
HighlightedNames(emailers={KATHRYN_RUEMMLER: 'former Obama legal counsel'}, style='magenta2'),
|
|
1373
1415
|
HighlightedNames(emailers={MELANIE_WALKER: 'doctor'}, style='pale_violet_red1'),
|
|
1374
|
-
HighlightedNames(emailers={PAULA: "Epstein's ex-girlfriend who is now in the opera world"}, label='
|
|
1416
|
+
HighlightedNames(emailers={PAULA: "Epstein's ex-girlfriend who is now in the opera world"}, label='paula', style='pink1'),
|
|
1375
1417
|
HighlightedNames(emailers={PRINCE_ANDREW: 'British royal family'}, style='dodger_blue1'),
|
|
1376
1418
|
HighlightedNames(emailers={SOON_YI_PREVIN: 'wife of Woody Allen'}, style='hot_pink'),
|
|
1377
1419
|
HighlightedNames(emailers={SULTAN_BIN_SULAYEM: 'CEO of DP World, chairman of ports in Dubai'}, style='green1'),
|
|
@@ -1397,7 +1439,7 @@ HIGHLIGHTED_TEXTS = [
|
|
|
1397
1439
|
HighlightedText(
|
|
1398
1440
|
label='header_field',
|
|
1399
1441
|
style='plum4',
|
|
1400
|
-
patterns=[r'^(> )?(Date|From|Sent|To|C[cC]|Importance|Subject|Bee|B[cC]{2}|Attachments):'],
|
|
1442
|
+
patterns=[r'^(> )?(Date|From|Sent|To|C[cC]|Importance|Reply-To|Subject|Bee|B[cC]{2}|Attachments):'],
|
|
1401
1443
|
),
|
|
1402
1444
|
HighlightedText(
|
|
1403
1445
|
label='http_links',
|
|
@@ -1444,6 +1486,7 @@ HIGHLIGHTED_TEXTS = [
|
|
|
1444
1486
|
]
|
|
1445
1487
|
|
|
1446
1488
|
ALL_HIGHLIGHTS = HIGHLIGHTED_NAMES + HIGHLIGHTED_TEXTS
|
|
1489
|
+
JUNK_EMAILERS = [name for name in [hn for hn in HIGHLIGHTED_NAMES if hn.label == JUNK][0].emailers.keys()]
|
|
1447
1490
|
|
|
1448
1491
|
|
|
1449
1492
|
class EpsteinHighlighter(RegexHighlighter):
|
|
@@ -1452,7 +1495,7 @@ class EpsteinHighlighter(RegexHighlighter):
|
|
|
1452
1495
|
highlights = [highlight_group.regex for highlight_group in ALL_HIGHLIGHTS]
|
|
1453
1496
|
|
|
1454
1497
|
|
|
1455
|
-
def
|
|
1498
|
+
def get_category_txt_for_name(name: str | None) -> Text | None:
|
|
1456
1499
|
highlight_group = _get_highlight_group_for_name(name)
|
|
1457
1500
|
|
|
1458
1501
|
if highlight_group and isinstance(highlight_group, HighlightedNames):
|
|
@@ -1495,6 +1538,10 @@ def styled_category(category: str | None) -> Text:
|
|
|
1495
1538
|
return Text(category, get_style_for_category(category) or 'wheat4')
|
|
1496
1539
|
|
|
1497
1540
|
|
|
1541
|
+
def styled_name(name: str | None, default_style: str = DEFAULT_NAME_STYLE) -> Text:
|
|
1542
|
+
return Text(name or UNKNOWN, style=get_style_for_name(name, default_style=default_style))
|
|
1543
|
+
|
|
1544
|
+
|
|
1498
1545
|
def _get_highlight_group_for_name(name: str | None) -> HighlightedNames | None:
|
|
1499
1546
|
if name is None:
|
|
1500
1547
|
return None
|