epstein-files 1.2.1__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +55 -11
- epstein_files/documents/document.py +13 -2
- epstein_files/documents/email.py +329 -258
- epstein_files/documents/emails/email_header.py +17 -8
- epstein_files/documents/other_file.py +8 -6
- epstein_files/epstein_files.py +18 -4
- epstein_files/person.py +65 -20
- epstein_files/util/constant/names.py +18 -12
- epstein_files/util/constant/output_files.py +8 -5
- epstein_files/util/constant/strings.py +4 -2
- epstein_files/util/constant/urls.py +13 -2
- epstein_files/util/constants.py +486 -224
- epstein_files/util/data.py +1 -0
- epstein_files/util/doc_cfg.py +33 -27
- epstein_files/util/env.py +18 -8
- epstein_files/util/file_helper.py +2 -0
- epstein_files/util/highlighted_group.py +321 -132
- epstein_files/util/output.py +19 -24
- epstein_files/util/rich.py +9 -3
- epstein_files/util/word_count.py +2 -2
- {epstein_files-1.2.1.dist-info → epstein_files-1.4.1.dist-info}/METADATA +3 -3
- epstein_files-1.4.1.dist-info/RECORD +34 -0
- {epstein_files-1.2.1.dist-info → epstein_files-1.4.1.dist-info}/entry_points.txt +1 -1
- epstein_files-1.2.1.dist-info/RECORD +0 -34
- {epstein_files-1.2.1.dist-info → epstein_files-1.4.1.dist-info}/LICENSE +0 -0
- {epstein_files-1.2.1.dist-info → epstein_files-1.4.1.dist-info}/WHEEL +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import re
|
|
3
|
+
from collections import defaultdict
|
|
3
4
|
from dataclasses import dataclass, field
|
|
4
5
|
|
|
5
6
|
from rich.console import Console
|
|
@@ -11,8 +12,8 @@ from epstein_files.util.constant.strings import *
|
|
|
11
12
|
from epstein_files.util.constant.urls import ARCHIVE_LINK_COLOR
|
|
12
13
|
from epstein_files.util.constants import (EMAILER_ID_REGEXES, EPSTEIN_V_ROTHSTEIN_EDWARDS,
|
|
13
14
|
OSBORNE_LLP, REPLY_REGEX, SENT_FROM_REGEX)
|
|
15
|
+
from epstein_files.util.data import sort_dict, without_falsey
|
|
14
16
|
from epstein_files.util.doc_cfg import *
|
|
15
|
-
from epstein_files.util.data import without_falsey
|
|
16
17
|
from epstein_files.util.env import args
|
|
17
18
|
from epstein_files.util.logging import logger
|
|
18
19
|
|
|
@@ -30,6 +31,8 @@ REGEX_STYLE_PREFIX = 'regex'
|
|
|
30
31
|
SIMPLE_NAME_REGEX = re.compile(r"^[-\w, ]+$", re.IGNORECASE)
|
|
31
32
|
TECH_BRO = 'tech bro'
|
|
32
33
|
|
|
34
|
+
VICTIM_COLOR = 'orchid1'
|
|
35
|
+
|
|
33
36
|
CATEGORY_STYLE_MAPPING = {
|
|
34
37
|
ARTICLE: JOURNALIST,
|
|
35
38
|
BOOK: JOURNALIST,
|
|
@@ -153,13 +156,13 @@ class HighlightedNames(HighlightedText):
|
|
|
153
156
|
name_patterns = [remove_question_marks(name).replace(' ', r"\s+")]
|
|
154
157
|
|
|
155
158
|
if ' ' in name:
|
|
156
|
-
for partial_name in [
|
|
159
|
+
for partial_name in [reversed_name(name), extract_first_name(name), extract_last_name(name)]: # Order matters
|
|
157
160
|
if partial_name.lower() not in NAMES_TO_NOT_HIGHLIGHT and SIMPLE_NAME_REGEX.match(partial_name):
|
|
158
161
|
name_patterns.append(partial_name.replace(' ', r"\s+"))
|
|
159
162
|
|
|
160
163
|
pattern = '|'.join(name_patterns)
|
|
161
164
|
|
|
162
|
-
if args.deep_debug:
|
|
165
|
+
if args.deep_debug and args.colors_only:
|
|
163
166
|
debug_console.print(Text('').append(f"{name:25s}", style=self.style).append(f" '{pattern}'", style='dim'))
|
|
164
167
|
|
|
165
168
|
return pattern
|
|
@@ -214,7 +217,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
214
217
|
ManualHighlight(
|
|
215
218
|
label='email_subject',
|
|
216
219
|
style='light_yellow3',
|
|
217
|
-
pattern=r"^(> )?(Classification|Flag|Subject): (?P<email_subject>.*)",
|
|
220
|
+
pattern=r"^(> )?(Classification|Flag|Subject|Sujet ?): (?P<email_subject>.*)",
|
|
218
221
|
),
|
|
219
222
|
HighlightedNames(
|
|
220
223
|
label=ACADEMIA,
|
|
@@ -239,14 +242,18 @@ HIGHLIGHTED_NAMES = [
|
|
|
239
242
|
'Valeria Chomsky': f"wife of {NOAM_CHOMSKY}",
|
|
240
243
|
},
|
|
241
244
|
patterns=[
|
|
245
|
+
r"Andy\s*Lippman", # Media Lab
|
|
246
|
+
r"Arizona\s*State\s*University",
|
|
242
247
|
r"Bard\s+((Early )?College|High School|Schools)",
|
|
243
248
|
r"Brotherton",
|
|
244
249
|
r"Carl\s*Sagan",
|
|
245
|
-
r"Columbia",
|
|
250
|
+
r"Columbia(\s*(Business\s*School|University))?",
|
|
246
251
|
r"Dan(iel|ny) Kahneman",
|
|
252
|
+
r"(Francis\s*)?Crick",
|
|
247
253
|
r"J(ames|im)\s*Watson",
|
|
248
254
|
r"(Lord\s*)?Martin\s*Rees",
|
|
249
255
|
r"Massachusetts\s*Institute\s*of\s*Technology",
|
|
256
|
+
r"Mayo\s*Clinic",
|
|
250
257
|
r"Media\s*Lab",
|
|
251
258
|
r"(Marvin\s*)?Minsky",
|
|
252
259
|
r"MIT(\s*Media\s*Lab)?",
|
|
@@ -256,10 +263,14 @@ HIGHLIGHTED_NAMES = [
|
|
|
256
263
|
r"Princeton(\s*University)?",
|
|
257
264
|
r"Regeneron",
|
|
258
265
|
r"(Richard\s*)?Dawkins",
|
|
266
|
+
r"Rockefeller\s*University",
|
|
267
|
+
r"(Sandy\s*)?Pentland", # Media Lab
|
|
259
268
|
r"Sanofi",
|
|
260
269
|
r"Stanford(\s*University)?(\s*Hospital)?",
|
|
261
|
-
r"(
|
|
270
|
+
r"(Ste(ph|v)en\s*)?Hawking",
|
|
262
271
|
r"(Steven?\s*)?Pinker",
|
|
272
|
+
r"Texas\s*A&M",
|
|
273
|
+
r"Tulane",
|
|
263
274
|
r"UCLA",
|
|
264
275
|
],
|
|
265
276
|
),
|
|
@@ -276,19 +287,23 @@ HIGHLIGHTED_NAMES = [
|
|
|
276
287
|
patterns=[
|
|
277
288
|
r"Buhari",
|
|
278
289
|
r"Econet(\s*Wireless)",
|
|
290
|
+
r"Ethiopian?",
|
|
279
291
|
r"Ghana(ian)?",
|
|
280
292
|
r"Glencore",
|
|
281
293
|
r"Goodluck Jonathan",
|
|
282
294
|
r"Johannesburg",
|
|
283
|
-
r"
|
|
295
|
+
r"Kenyan?",
|
|
284
296
|
r"Nigerian?",
|
|
285
297
|
r"Okey Enelamah",
|
|
298
|
+
r"(Paul\s*)?Kagame",
|
|
299
|
+
r"Rwandan?",
|
|
286
300
|
r"Senegal(ese)?",
|
|
287
301
|
r"Serengeti",
|
|
288
302
|
r"(South\s*)?African?",
|
|
289
303
|
r"(Strive\s*)?Masiyiwa",
|
|
290
|
-
r"
|
|
304
|
+
r"Tanzanian?",
|
|
291
305
|
r"Ugandan?",
|
|
306
|
+
r"(Yoweri\s*)?Museveni",
|
|
292
307
|
r"Zimbabwe(an)?",
|
|
293
308
|
],
|
|
294
309
|
),
|
|
@@ -300,6 +315,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
300
315
|
'Barry Josephson': 'American film producer, editor FamilySecurityMatters.org',
|
|
301
316
|
BILL_SIEGEL: 'documentary film producer and director',
|
|
302
317
|
DAVID_BLAINE: 'famous magician',
|
|
318
|
+
'David Brenner': 'American comedian and actor',
|
|
303
319
|
'Richard Merkin': 'painter, illustrator and arts educator',
|
|
304
320
|
STEVEN_PFEIFFER: 'Associate Director at Independent Filmmaker Project (IFP)',
|
|
305
321
|
'Steven Gaydos': 'American screenwriter and journalist',
|
|
@@ -307,11 +323,15 @@ HIGHLIGHTED_NAMES = [
|
|
|
307
323
|
patterns=[
|
|
308
324
|
r"(Art )?Spiegelman",
|
|
309
325
|
r"Artspace",
|
|
326
|
+
r"Ayn\s*Rand",
|
|
310
327
|
r"Bobby slayton",
|
|
311
328
|
r"bono\s*mick",
|
|
312
329
|
r"Errol(\s*Morris)?",
|
|
313
330
|
r"Etienne Binant",
|
|
314
331
|
r"(Frank\s)?Gehry",
|
|
332
|
+
r"Harvey\s*Weinstein", r"wientstein", r"Weinstein\s*Co(s?|mpany)",
|
|
333
|
+
r"IFP",
|
|
334
|
+
r"Independent\s*Filmmaker\s*Project",
|
|
315
335
|
r"Jagger",
|
|
316
336
|
r"(Jeffrey\s*)?Katzenberg",
|
|
317
337
|
r"(Johnny\s*)?Depp",
|
|
@@ -320,6 +340,8 @@ HIGHLIGHTED_NAMES = [
|
|
|
320
340
|
r"Lena\s*Dunham",
|
|
321
341
|
r"Madonna",
|
|
322
342
|
r"Mark\s*Burnett",
|
|
343
|
+
r"New York Film Festival",
|
|
344
|
+
r"Peter Getzels",
|
|
323
345
|
r"Phaidon",
|
|
324
346
|
r"Ramsey Elkholy",
|
|
325
347
|
r"Regan arts",
|
|
@@ -340,6 +362,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
340
362
|
r"BG",
|
|
341
363
|
r"b?g?C3",
|
|
342
364
|
r"(Bill\s*((and|or|&)\s*Melinda\s*)?)?Gates(\s*Foundation)?",
|
|
365
|
+
r"Kofi\s*Rashid",
|
|
343
366
|
r"Melinda(\s*Gates)?",
|
|
344
367
|
r"Microsoft",
|
|
345
368
|
r"MSFT",
|
|
@@ -358,23 +381,27 @@ HIGHLIGHTED_NAMES = [
|
|
|
358
381
|
r"Balaji",
|
|
359
382
|
r"bitcoin(\s*Foundation)?",
|
|
360
383
|
r"block ?chain(\s*capital)?",
|
|
384
|
+
r"Brian Forde",
|
|
361
385
|
r"Brock(\s*Pierce)?",
|
|
362
386
|
r"coins?",
|
|
387
|
+
r"Cory\s*Fields", # bitcoin dev
|
|
363
388
|
r"cr[iy]?pto(currenc(y|ies))?",
|
|
364
389
|
r"Digital\s*Currenc(ies|y)(\s*Initiative)?",
|
|
365
390
|
r"e-currency",
|
|
366
|
-
r"(Gavin )?Andress?en",
|
|
391
|
+
r"(Gavin )?Andress?en", # bitcoin dev
|
|
367
392
|
r"(Howard\s+)?Lutnic?k",
|
|
393
|
+
r"(Jim\s*)Pallotta", # Media lab advisory board
|
|
368
394
|
r"Libra",
|
|
369
395
|
r"Madars",
|
|
370
396
|
r"Mi(chael|ke)\s*Novogratz",
|
|
371
397
|
r"(Patrick\s*)?Murck",
|
|
398
|
+
r"Ron Rivest",
|
|
372
399
|
r"(Ross\s*)?Ulbricht",
|
|
373
400
|
r"Silk\s*Road",
|
|
374
401
|
r"SpanCash",
|
|
375
402
|
r"Tether",
|
|
376
403
|
r"virtual\s*currenc(ies|y)",
|
|
377
|
-
r"Wladimir( van der Laan)?",
|
|
404
|
+
r"Wladimir( van der Laan)?", # bitcoin dev
|
|
378
405
|
r"(zero\s+knowledge\s+|zk)pro(of|tocols?)",
|
|
379
406
|
],
|
|
380
407
|
),
|
|
@@ -385,28 +412,29 @@ HIGHLIGHTED_NAMES = [
|
|
|
385
412
|
ALIREZA_ITTIHADIEH: 'CEO Freestream Aircraft Limited',
|
|
386
413
|
BARBRO_C_EHNBOM: 'Swedish pharmaceuticals, SALSS',
|
|
387
414
|
BARRY_J_COHEN: None,
|
|
388
|
-
'David Mitchell': 'Mitchell Holdings New York real estate developer',
|
|
389
|
-
FRED_HADDAD: "co-founder of Heck's in West Virginia",
|
|
415
|
+
'David Mitchell': 'Mitchell Holdings, New York real estate developer',
|
|
390
416
|
GERALD_BARTON: "Maryland property developer Landmark Land Company",
|
|
391
417
|
GORDON_GETTY: 'heir to oil tycoon J. Paul Getty',
|
|
392
|
-
NICHOLAS_RIBIS: 'Hilton CEO, former president of Trump Organization',
|
|
393
418
|
'Philip Kafka': 'president of Prince Concepts (and son of Terry Kafka?)',
|
|
394
419
|
ROBERT_LAWRENCE_KUHN: 'investment banker, China expert',
|
|
395
420
|
TERRY_KAFKA: 'CEO of Impact Outdoor (highway billboards)',
|
|
396
|
-
TOM_PRITZKER: '
|
|
421
|
+
TOM_PRITZKER: 'chairman of The Pritzker Organization and Hyatt Hotels',
|
|
397
422
|
},
|
|
398
423
|
patterns=[
|
|
424
|
+
r"Arthur Klein",
|
|
399
425
|
r"((Bill|David)\s*)?Koch(\s*(Bro(s|thers)|Industries))?",
|
|
400
426
|
r"Gruterite",
|
|
401
|
-
r"(John\s*)?Kluge",
|
|
427
|
+
r"((John|Patricia)\s*)?Kluge",
|
|
402
428
|
r"Marc Rich",
|
|
403
429
|
r"(Mi(chael|ke)\s*)?Ovitz",
|
|
404
430
|
r"(Steve\s+)?Wynn",
|
|
405
431
|
r"(Les(lie)?\s+)?Wexner",
|
|
432
|
+
r"Michael\s*Klein",
|
|
406
433
|
r"New Leaf Ventures",
|
|
407
434
|
r"Park Partners",
|
|
408
435
|
r"SALSS",
|
|
409
436
|
r"Swedish[-\s]*American\s*Life\s*Science\s*Summit",
|
|
437
|
+
r"Trilateral Commission",
|
|
410
438
|
r"Valhi",
|
|
411
439
|
r"(Yves\s*)?Bouvier",
|
|
412
440
|
],
|
|
@@ -418,6 +446,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
418
446
|
r"CBD",
|
|
419
447
|
r"cannabis",
|
|
420
448
|
r"marijuana",
|
|
449
|
+
r"psychedelic",
|
|
421
450
|
r"THC",
|
|
422
451
|
r"WEED(guide|maps)?[^s]?",
|
|
423
452
|
],
|
|
@@ -474,9 +503,11 @@ HIGHLIGHTED_NAMES = [
|
|
|
474
503
|
r"(Barac?k )?Obama",
|
|
475
504
|
r"((Bill|Hillart?y)\s*)?Clinton",
|
|
476
505
|
r"((Chuck|Charles)\s*)?S(ch|hc)umer",
|
|
506
|
+
r"Debbie\s*Wasserman\s*Schultz",
|
|
477
507
|
r"Dem(ocrat(ic)?)?",
|
|
478
508
|
r"(Diana\s*)?DeGette",
|
|
479
509
|
r"DNC",
|
|
510
|
+
r"(Ed(ward)?\s*)?Mezvinsky",
|
|
480
511
|
r"Elena\s*Kagan",
|
|
481
512
|
r"(Eliott?\s*)?Spitzer(, Eliot)?",
|
|
482
513
|
r"Eric Holder",
|
|
@@ -491,6 +522,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
491
522
|
r"(Matteo\s*)?Salvini",
|
|
492
523
|
r"Maxine\s*Waters",
|
|
493
524
|
r"(Nancy )?Pelosi",
|
|
525
|
+
r"Open Society( Global Board)?",
|
|
494
526
|
r"Ron\s*Dellums",
|
|
495
527
|
r"Schumer",
|
|
496
528
|
r"(Tim(othy)?\s*)?Geithner",
|
|
@@ -506,16 +538,19 @@ HIGHLIGHTED_NAMES = [
|
|
|
506
538
|
EVA: "possibly Epstein's ex-girlfriend (?)",
|
|
507
539
|
'Eva Dubin': f"Epstein's ex-girlfriend now married to {GLENN_DUBIN}",
|
|
508
540
|
},
|
|
509
|
-
patterns=[r"((Celina|Eva( Anderss?on)?|Glenn) )?Dubin"],
|
|
541
|
+
patterns=[r"((Celina|Eva( Anderss?on)?|Glenn?) )?Dubin"],
|
|
510
542
|
),
|
|
511
543
|
HighlightedNames(
|
|
512
544
|
label='employee',
|
|
513
545
|
style='medium_purple4',
|
|
514
546
|
emailers={
|
|
515
547
|
'Alfredo Rodriguez': "Epstein's butler, stole the journal",
|
|
516
|
-
|
|
548
|
+
'Bernard Kruger': "Epstein's doctor",
|
|
549
|
+
EDUARDO_ROBLES: f'home builder at Creative Kingdom Dubai',
|
|
550
|
+
ERIC_ROTH: 'jet decorator at International Jet',
|
|
517
551
|
GWENDOLYN_BECK: 'Epstein fund manager in the 90s',
|
|
518
552
|
JANUSZ_BANASIAK: "Epstein's house manager",
|
|
553
|
+
"John Allessi": "Epstein's houseman",
|
|
519
554
|
JEAN_HUGUEN: 'interior design at Alberto Pinto Cabinet',
|
|
520
555
|
LAWRANCE_VISOSKI: "Epstein's pilot",
|
|
521
556
|
LESLEY_GROFF: f"Epstein's assistant",
|
|
@@ -523,7 +558,6 @@ HIGHLIGHTED_NAMES = [
|
|
|
523
558
|
MERWIN_DELA_CRUZ: None, # HOUSE_OVERSIGHT_032652 Groff says "Jojo and Merwin both requested off Nov. 25 and 26"
|
|
524
559
|
NADIA_MARCINKO: "Epstein's pilot",
|
|
525
560
|
'Sean J. Lancaster': 'airplane reseller',
|
|
526
|
-
ZUBAIR_KHAN: 'cybersecurity firm Tranchulas CEO, InsightsPod founder, Islamabad / Dubai',
|
|
527
561
|
},
|
|
528
562
|
patterns=[
|
|
529
563
|
r"Adriana\s*Ross",
|
|
@@ -539,7 +573,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
539
573
|
MARK_EPSTEIN: 'brother of Jeffrey',
|
|
540
574
|
},
|
|
541
575
|
patterns=[
|
|
542
|
-
r"JEGE",
|
|
576
|
+
r"JEGE(\s*Inc)?",
|
|
543
577
|
r"LSJ",
|
|
544
578
|
],
|
|
545
579
|
),
|
|
@@ -556,7 +590,9 @@ HIGHLIGHTED_NAMES = [
|
|
|
556
590
|
DAVID_SCHOEN: f"{CRIMINAL_DEFENSE_ATTORNEY} after 2019 arrest",
|
|
557
591
|
DEBBIE_FEIN: EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY,
|
|
558
592
|
'Erika Kellerhals': 'attorney in St. Thomas',
|
|
593
|
+
FRED_HADDAD: "co-founder of Heck's in West Virginia",
|
|
559
594
|
GERALD_LEFCOURT: f'friend of {ALAN_DERSHOWITZ}',
|
|
595
|
+
'Howard Rubenstein': f"Epstein's former spokesman",
|
|
560
596
|
JACK_GOLDBERGER: CRIMINAL_DEFENSE_2008,
|
|
561
597
|
JACKIE_PERCZEK: CRIMINAL_DEFENSE_2008,
|
|
562
598
|
JAY_LEFKOWITZ: f"Kirkland & Ellis partner, {CRIMINAL_DEFENSE_2008}",
|
|
@@ -599,7 +635,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
599
635
|
emailers={
|
|
600
636
|
ANDRZEJ_DUDA: 'former president of Poland',
|
|
601
637
|
'Caroline Lang': 'daughter of Jack Lang',
|
|
602
|
-
|
|
638
|
+
EDWARD_ROD_LARSEN: f"son of {TERJE_ROD_LARSEN}",
|
|
603
639
|
'Fabrice Aidan': f'diplomat who worked with {TERJE_ROD_LARSEN}',
|
|
604
640
|
'Jack Lang': 'former French Minister of National Education',
|
|
605
641
|
MIROSLAV_LAJCAK: 'Russia-friendly Slovakian politician, friend of Steve Bannon',
|
|
@@ -608,8 +644,10 @@ HIGHLIGHTED_NAMES = [
|
|
|
608
644
|
THORBJORN_JAGLAND: 'former prime minister of Norway, Nobel Peace Prize Committee',
|
|
609
645
|
},
|
|
610
646
|
patterns=[
|
|
647
|
+
r"AfD",
|
|
611
648
|
r"(Angela )?Merk(el|le)",
|
|
612
649
|
r"Austria",
|
|
650
|
+
r"Belgi(an|um)",
|
|
613
651
|
r"(Benjamin\s*)?Harnwell",
|
|
614
652
|
r"Berlin",
|
|
615
653
|
r"Borge",
|
|
@@ -619,6 +657,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
619
657
|
r"Brussels",
|
|
620
658
|
r"Cannes",
|
|
621
659
|
r"Cypr(iot|us)",
|
|
660
|
+
r"David\s*Cameron",
|
|
622
661
|
r"Davos",
|
|
623
662
|
r"ECB",
|
|
624
663
|
r"England",
|
|
@@ -633,6 +672,8 @@ HIGHLIGHTED_NAMES = [
|
|
|
633
672
|
r"Ital(ian|y)",
|
|
634
673
|
r"Jacques",
|
|
635
674
|
r"Kiev",
|
|
675
|
+
r"Latvian?",
|
|
676
|
+
r"Lithuanian?",
|
|
636
677
|
r"Le\s*Pen",
|
|
637
678
|
r"London",
|
|
638
679
|
r"Macron",
|
|
@@ -642,11 +683,14 @@ HIGHLIGHTED_NAMES = [
|
|
|
642
683
|
r"NATO",
|
|
643
684
|
r"(Nicholas\s*)?Sarkozy",
|
|
644
685
|
r"Nigel(\s*Farage)?",
|
|
686
|
+
r"(Northern\s*)?Ireland",
|
|
645
687
|
r"Norw(ay|egian)",
|
|
646
688
|
r"Oslo",
|
|
647
689
|
r"Paris",
|
|
648
690
|
r"Polish",
|
|
649
691
|
r"pope",
|
|
692
|
+
r"Portugal",
|
|
693
|
+
r"Scotland",
|
|
650
694
|
r"(Sebastian )?Kurz",
|
|
651
695
|
r"Stockholm",
|
|
652
696
|
r"Strasbourg",
|
|
@@ -654,8 +698,10 @@ HIGHLIGHTED_NAMES = [
|
|
|
654
698
|
r"Swed(en|ish)(?![-\s]+American Life Scienc)",
|
|
655
699
|
r"Swi(ss|tzerland)",
|
|
656
700
|
r"(Tony\s)?Blair",
|
|
701
|
+
r"United\s*Kingdom",
|
|
657
702
|
r"U\.K\.",
|
|
658
703
|
r"Ukrain(e|ian)",
|
|
704
|
+
r"Venice",
|
|
659
705
|
r"(Vi(c|k)tor\s+)?Orbah?n",
|
|
660
706
|
r"Vienna",
|
|
661
707
|
r"Zug",
|
|
@@ -684,8 +730,11 @@ HIGHLIGHTED_NAMES = [
|
|
|
684
730
|
'Skip Rimer': 'Milken Institute (Michael Milken)',
|
|
685
731
|
'Steven Elkman': DEUTSCHE_BANK,
|
|
686
732
|
'Vahe Stepanian': 'Cetera Financial Group',
|
|
733
|
+
VINIT_SAHNI: f"analyst at {DEUTSCHE_BANK} and {GOLDMAN_SACHS}",
|
|
687
734
|
},
|
|
688
735
|
patterns=[
|
|
736
|
+
r"Ace\s*Greenberg",
|
|
737
|
+
r"AIG",
|
|
689
738
|
r"((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?",
|
|
690
739
|
r"Apollo",
|
|
691
740
|
r"Ari\s*Glass",
|
|
@@ -705,27 +754,33 @@ HIGHLIGHTED_NAMES = [
|
|
|
705
754
|
r"FRBNY",
|
|
706
755
|
r"Goldman(\s*Sachs)",
|
|
707
756
|
r"GRAT",
|
|
757
|
+
r"Gratitude (America|& Enhanced)", # Leon Black and/or Epstein charity?
|
|
758
|
+
r"Hank\s*Greenburg",
|
|
708
759
|
r"HSBC",
|
|
709
760
|
r"Invesco",
|
|
761
|
+
r"Jamie\s*D(imon)?",
|
|
710
762
|
r"(Janet\s*)?Yellen",
|
|
711
763
|
r"(Jerome\s*)?Powell(?! M\. Cabot)",
|
|
712
764
|
r"(Jimmy\s*)?Cayne",
|
|
765
|
+
r"Joon\s*Yun",
|
|
713
766
|
r"JPMC?",
|
|
714
767
|
r"j\.?p\.?\s*morgan(\.?com|\s*Chase)?",
|
|
715
768
|
r"Madoff",
|
|
716
769
|
r"Merrill(\s*Lynch)?",
|
|
717
770
|
r"(Michael\s*)?Cembalest",
|
|
718
|
-
r"(Mi(chael|ke)\s*)?Milken(\s*Conference)?",
|
|
771
|
+
r"(Mi(chael|ke)\s*)?Milken(\s*Conference|Institute)?",
|
|
719
772
|
r"Mizrahi\s*Bank",
|
|
720
773
|
r"MLPF&S",
|
|
721
774
|
r"Morgan Stanley",
|
|
722
775
|
r"(Peter L. )?Scher",
|
|
723
776
|
r"(Ray\s*)?Dalio",
|
|
724
777
|
r"(Richard\s*)?LeFrak",
|
|
725
|
-
r"
|
|
778
|
+
r"Rockefeller(?! University)(\s*Foundation)?",
|
|
779
|
+
r"(Ste(phen|ve)\s*)?Schwart?z?man",
|
|
726
780
|
r"Serageldin",
|
|
727
781
|
r"UBS",
|
|
728
782
|
r"us.gio@jpmorgan.com",
|
|
783
|
+
r"Wall\s*Street(?!\s*Jour)",
|
|
729
784
|
],
|
|
730
785
|
),
|
|
731
786
|
HighlightedNames(
|
|
@@ -746,6 +801,83 @@ HIGHLIGHTED_NAMES = [
|
|
|
746
801
|
r"Thomas\s*(J\.?\s*)?Barrack(\s*Jr)?",
|
|
747
802
|
],
|
|
748
803
|
),
|
|
804
|
+
HighlightedNames(
|
|
805
|
+
label='government',
|
|
806
|
+
style='color(24) bold',
|
|
807
|
+
emailers={
|
|
808
|
+
ANN_MARIE_VILLAFANA: 'Southern District of Florida (SDFL) U.S. Attorney',
|
|
809
|
+
DANNY_FROST: 'Director of Communications at Manhattan D.A.',
|
|
810
|
+
'Police Code Enforcement': f"{PALM_BEACH} buildings code enforcement",
|
|
811
|
+
},
|
|
812
|
+
patterns=[
|
|
813
|
+
r"AG",
|
|
814
|
+
r"(Alicia\s*)?Valle",
|
|
815
|
+
r'Alice\s*Fisher|Fisher, Alice',
|
|
816
|
+
r"AML",
|
|
817
|
+
r"(Andrew\s*)?(McCabe|Natsios)",
|
|
818
|
+
r"Attorney General",
|
|
819
|
+
r"((Bob|Robert)\s*)?Mueller",
|
|
820
|
+
r"(Byung\s)?Pak",
|
|
821
|
+
r"Case 1:19-cv-03377(-LAP)?",
|
|
822
|
+
r"(CENT|NORTH|SOUTH)COM",
|
|
823
|
+
r"CFTC?",
|
|
824
|
+
r"CIA",
|
|
825
|
+
r"CIS",
|
|
826
|
+
r"CVRA",
|
|
827
|
+
r"DARPA",
|
|
828
|
+
r"Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)",
|
|
829
|
+
r"DHS",
|
|
830
|
+
r"DOJ",
|
|
831
|
+
r"FBI",
|
|
832
|
+
r"FCPA",
|
|
833
|
+
r"FDIC",
|
|
834
|
+
r"FDLE",
|
|
835
|
+
r"Federal\s*Bureau\s*of\s*Investigation",
|
|
836
|
+
r"FinCEN",
|
|
837
|
+
r"FINRA",
|
|
838
|
+
r"FOIA",
|
|
839
|
+
r"FTC",
|
|
840
|
+
r"(General\s*)?P(a|e)traeus",
|
|
841
|
+
r"Geoff\s*Ling",
|
|
842
|
+
r"Homeland\s*Security",
|
|
843
|
+
r"IRS",
|
|
844
|
+
r"(James\s*)?Comey",
|
|
845
|
+
r"(Jennifer\s*Shasky\s*)?Calvery",
|
|
846
|
+
r"((Judge|Mark)\s*)?(Carney|Filip)",
|
|
847
|
+
r"(Judge\s*)?(Kenneth\s*)?(A\.?\s*)?Marra",
|
|
848
|
+
r"(Justice|Treasury)\s*Dep(t|artment)",
|
|
849
|
+
r"(Kirk )?Blouin",
|
|
850
|
+
r"KYC",
|
|
851
|
+
r"(Lann?a\s*)?Belohlavek",
|
|
852
|
+
r"NIH",
|
|
853
|
+
r"NPA",
|
|
854
|
+
r"NS(A|C)",
|
|
855
|
+
r"OCC",
|
|
856
|
+
r"OFAC",
|
|
857
|
+
r"(Michael\s*)?Reiter",
|
|
858
|
+
r"OGE",
|
|
859
|
+
r"Office\s*of\s*Government\s*Ethics",
|
|
860
|
+
r"police",
|
|
861
|
+
r"(Preet\s*)?Bharara",
|
|
862
|
+
r"SCOTUS",
|
|
863
|
+
r"SD(FL|NY)",
|
|
864
|
+
r"SEC",
|
|
865
|
+
r"Secret\s*Service",
|
|
866
|
+
r"Securities\s*and\s*Exchange\s*Commission",
|
|
867
|
+
r"Southern\s*District(\s*of\s*(Florida|New\s*York))?",
|
|
868
|
+
r"State\s*Dep(artmen)?t",
|
|
869
|
+
r"Strzok",
|
|
870
|
+
r"Supreme\s*Court",
|
|
871
|
+
r"Treasury\s*(Dep(artmen)?t|Secretary)",
|
|
872
|
+
r"TSA",
|
|
873
|
+
r"U\.?S\.? attorney",
|
|
874
|
+
r"USAID",
|
|
875
|
+
r"US\s*(AF|Army|Air\s*Force)",
|
|
876
|
+
r"Walter\s*Reed(\s*Army\s*Institute\s*of\s*Research)?",
|
|
877
|
+
r"(William\s*J\.?\s*)?Zloch",
|
|
878
|
+
r"WRAIR",
|
|
879
|
+
],
|
|
880
|
+
),
|
|
749
881
|
HighlightedNames(
|
|
750
882
|
label=HARVARD,
|
|
751
883
|
style='light_goldenrod3',
|
|
@@ -773,8 +905,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
773
905
|
label='India',
|
|
774
906
|
style='bright_green',
|
|
775
907
|
emailers={
|
|
776
|
-
ANIL_AMBANI: 'chairman of Reliance Group',
|
|
777
|
-
VINIT_SAHNI: None,
|
|
908
|
+
ANIL_AMBANI: 'billionaire chairman of Reliance Group',
|
|
778
909
|
},
|
|
779
910
|
patterns=[
|
|
780
911
|
r"Abraaj",
|
|
@@ -797,16 +928,18 @@ HIGHLIGHTED_NAMES = [
|
|
|
797
928
|
emailers={
|
|
798
929
|
EHUD_BARAK: 'former prime minister of Israel, Epstein business partner',
|
|
799
930
|
'Mitchell Bard': 'director of the American-Israeli Cooperative Enterprise (AICE)',
|
|
800
|
-
|
|
931
|
+
NILI_PRIELL_BARAK: 'wife of Ehud Barak',
|
|
801
932
|
},
|
|
802
933
|
patterns=[
|
|
803
934
|
r"AIPAC",
|
|
804
935
|
r"Bibi",
|
|
805
936
|
r"(eh|(Ehud|Nili Priell)\s*)?barak",
|
|
937
|
+
r"EB",
|
|
806
938
|
r"Ehud\s*Barack",
|
|
807
939
|
r"Israeli?",
|
|
808
940
|
r"Jerusalem",
|
|
809
941
|
r"J\s*Street",
|
|
942
|
+
r"Menachem\s*Begin",
|
|
810
943
|
r"Mossad",
|
|
811
944
|
r"Netanyahu",
|
|
812
945
|
r"(Sheldon\s*)?Adelson",
|
|
@@ -836,7 +969,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
836
969
|
'Alain Forget': 'author of "How To Get Out Of This World ALIVE"',
|
|
837
970
|
'Alex Yablon': 'New York Magazine fact checker (?)',
|
|
838
971
|
EDWARD_JAY_EPSTEIN: 'no relation, wrote books about spies',
|
|
839
|
-
HENRY_HOLT: f"{MICHAEL_WOLFF}'s book publisher",
|
|
972
|
+
HENRY_HOLT: f"{MICHAEL_WOLFF}'s book publisher (company not a person)",
|
|
840
973
|
JAMES_HILL: 'ABC News',
|
|
841
974
|
JENNIFER_JACQUET: 'Future Science magazine',
|
|
842
975
|
JOHN_BROCKMAN: 'literary agent and author specializing in scientific literature',
|
|
@@ -849,17 +982,21 @@ HIGHLIGHTED_NAMES = [
|
|
|
849
982
|
},
|
|
850
983
|
patterns=[
|
|
851
984
|
r"ABC(\s*News)?",
|
|
985
|
+
r"Alexandra Wolfe|Wolfe, Alexandra",
|
|
986
|
+
r"AlterNet",
|
|
852
987
|
r"Arianna(\s*Huffington)?",
|
|
853
988
|
r"(Arthur\s*)?Kretchmer",
|
|
854
989
|
r'Associated\s*Press',
|
|
855
990
|
r"Axios",
|
|
856
991
|
r"BBC",
|
|
992
|
+
r"(Bob|Robert)\s*(Costa|Woodward)",
|
|
857
993
|
r"Breitbart",
|
|
858
|
-
r"BuzzFeed",
|
|
859
|
-
r"
|
|
994
|
+
r"BuzzFeed(\s*News)?",
|
|
995
|
+
r"C-?Span",
|
|
996
|
+
r"CBS(\s*(4|Corp|News))?",
|
|
860
997
|
r"Charlie\s*Rose",
|
|
861
998
|
r"China\s*Daily",
|
|
862
|
-
r"
|
|
999
|
+
r"(C|MS)?NBC(\s*News)?",
|
|
863
1000
|
r"CNN(politics?)?",
|
|
864
1001
|
r"Con[cs]hita", r"Sarnoff",
|
|
865
1002
|
r"Daily Business Review",
|
|
@@ -869,6 +1006,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
869
1006
|
r"Ed\s*Krassenstein",
|
|
870
1007
|
r"(Emily\s*)?Michot",
|
|
871
1008
|
r"Ezra\s*Klein",
|
|
1009
|
+
r"Fire\s*and\s*Fury",
|
|
872
1010
|
r"Forbes",
|
|
873
1011
|
r"Fortune\s*Magazine",
|
|
874
1012
|
r"Fox\s*News(\.com)?",
|
|
@@ -879,11 +1017,15 @@ HIGHLIGHTED_NAMES = [
|
|
|
879
1017
|
r"Globe\s*and\s*Mail",
|
|
880
1018
|
r"Good\s*Morning\s*America",
|
|
881
1019
|
r"Graydon(\s*Carter)?",
|
|
1020
|
+
r"Hollywood\s*Reporter",
|
|
882
1021
|
r"Huff(ington)?(\s*Po(st)?)?",
|
|
883
1022
|
r"Ingram, David",
|
|
884
|
-
r"
|
|
1023
|
+
r"James\s*Hill",
|
|
1024
|
+
r"(James\s*)?Patterson",
|
|
885
1025
|
r"Jesse Kornbluth",
|
|
1026
|
+
r"John\s*Connolly",
|
|
886
1027
|
r"Jonathan\s*Karl",
|
|
1028
|
+
r"Journal of Criminal Law and Criminology",
|
|
887
1029
|
r"Julie\s*(K.?\s*)?Brown", r'jbrown@miamiherald.com',
|
|
888
1030
|
r"(Katie\s*)?Couric",
|
|
889
1031
|
r"Keith\s*Larsen",
|
|
@@ -892,7 +1034,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
892
1034
|
r"(Les\s*)?Moonves",
|
|
893
1035
|
r"MarketWatch",
|
|
894
1036
|
r"Miami\s*Herald",
|
|
895
|
-
r"(Mi(chael|ke)\s*)?
|
|
1037
|
+
r"(Mi(chael|ke)\s*)?Bloomber[gq](\s*News)?",
|
|
896
1038
|
r"(Michele\s*)?Dargan",
|
|
897
1039
|
r"Morning News USA",
|
|
898
1040
|
r"(National\s*)?Enquirer",
|
|
@@ -902,19 +1044,21 @@ HIGHLIGHTED_NAMES = [
|
|
|
902
1044
|
r"PERVERSION\s*OF\s*JUSTICE",
|
|
903
1045
|
r"Politico",
|
|
904
1046
|
r"Pro\s*Publica",
|
|
905
|
-
r"Reuters",
|
|
906
1047
|
r"(Sean\s*)?Hannity",
|
|
1048
|
+
r"Sharon Churcher", # Daily Mail
|
|
907
1049
|
r"Sulzberger",
|
|
908
1050
|
r"SunSentinel",
|
|
909
1051
|
r"(The\s*)?Financial\s*Times",
|
|
910
1052
|
r"The\s*Guardian",
|
|
911
1053
|
r"TheHill",
|
|
912
1054
|
r"(The\s*)?Mail\s*On\s*Sunday",
|
|
913
|
-
r"(The\s*)?N(ew\s*)?Y(ork
|
|
1055
|
+
r"(The\s*)?N(ew\s*)?Y(ork)?\s*(Magazine|Observer|P(ost)?|T(imes)?)",
|
|
914
1056
|
r"(The\s*)?New\s*Yorker",
|
|
915
1057
|
r"(The\s*)?Wall\s*Street\s*Journal",
|
|
916
1058
|
r"(The\s*)?Wa(shington\s*)?Po(st)?",
|
|
1059
|
+
r"(Thomson\s*)?Reuters",
|
|
917
1060
|
r"(Uma\s*)?Sanghvi",
|
|
1061
|
+
r"USA\s*Today",
|
|
918
1062
|
r"Vanity\s*Fair",
|
|
919
1063
|
r"Viceland",
|
|
920
1064
|
r"Vick[iy]\s*Ward",
|
|
@@ -948,11 +1092,12 @@ HIGHLIGHTED_NAMES = [
|
|
|
948
1092
|
r"Chile",
|
|
949
1093
|
r"Colombian?",
|
|
950
1094
|
r"Cuban?",
|
|
1095
|
+
r"el chapo",
|
|
951
1096
|
r"El\s*Salvador",
|
|
952
1097
|
r"((Enrique )?Pena )?Nieto",
|
|
953
1098
|
r"Lat(in)?\s*Am(erican?)?",
|
|
954
1099
|
r"Lula",
|
|
955
|
-
r"Mexic(an|o)",
|
|
1100
|
+
r"(?<!New )Mexic(an|o)",
|
|
956
1101
|
r"(Nicolas\s+)?Maduro",
|
|
957
1102
|
r"Panama( Papers)?",
|
|
958
1103
|
r"Peru(vian)?",
|
|
@@ -960,90 +1105,68 @@ HIGHLIGHTED_NAMES = [
|
|
|
960
1105
|
r"Zambrano",
|
|
961
1106
|
],
|
|
962
1107
|
),
|
|
963
|
-
HighlightedNames(
|
|
964
|
-
label='government',
|
|
965
|
-
style='color(24) bold',
|
|
966
|
-
emailers={
|
|
967
|
-
ANN_MARIE_VILLAFANA: 'Southern District of Florida (SDFL) U.S. Attorney',
|
|
968
|
-
DANNY_FROST: 'Director of Communications at Manhattan D.A.',
|
|
969
|
-
'Police Code Enforcement': f"{PALM_BEACH} buildings code enforcement",
|
|
970
|
-
},
|
|
971
|
-
patterns=[
|
|
972
|
-
r"AG",
|
|
973
|
-
r"(Alicia\s*)?Valle",
|
|
974
|
-
r"AML",
|
|
975
|
-
r"(Andrew\s*)?McCabe",
|
|
976
|
-
r"Attorney General",
|
|
977
|
-
r"((Bob|Robert)\s*)?Mueller",
|
|
978
|
-
r"(Byung\s)?Pak",
|
|
979
|
-
r"Case 1:19-cv-03377(-LAP)?",
|
|
980
|
-
r"CFTC?",
|
|
981
|
-
r"CIA",
|
|
982
|
-
r"CIS",
|
|
983
|
-
r"CVRA",
|
|
984
|
-
r"Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)",
|
|
985
|
-
r"DHS",
|
|
986
|
-
r"DOJ",
|
|
987
|
-
r"FBI",
|
|
988
|
-
r"FCPA",
|
|
989
|
-
r"FDIC",
|
|
990
|
-
r"Federal\s*Bureau\s*of\s*Investigation",
|
|
991
|
-
r"FinCEN",
|
|
992
|
-
r"FINRA",
|
|
993
|
-
r"FOIA",
|
|
994
|
-
r"FTC",
|
|
995
|
-
r"(General\s*)?P(a|e)traeus",
|
|
996
|
-
r"IRS",
|
|
997
|
-
r"(James\s*)?Comey",
|
|
998
|
-
r"(Jennifer\s*Shasky\s*)?Calvery",
|
|
999
|
-
r"((Judge|Mark)\s*)?(Carney|Filip)",
|
|
1000
|
-
r"(Judge\s*)?(Kenneth\s*)?(A\.?\s*)?Marra",
|
|
1001
|
-
r"(Justice|Treasury)\s*Dep(t|artment)",
|
|
1002
|
-
r"(Kirk )?Blouin",
|
|
1003
|
-
r"KYC",
|
|
1004
|
-
r"(Lann?a\s*)?Belohlavek",
|
|
1005
|
-
r"NIH",
|
|
1006
|
-
r"NS(A|C)",
|
|
1007
|
-
r"OCC",
|
|
1008
|
-
r"OFAC",
|
|
1009
|
-
r"(Michael\s*)?Reiter",
|
|
1010
|
-
r"OGE",
|
|
1011
|
-
r"Office\s*of\s*Government\s*Ethics",
|
|
1012
|
-
r"police",
|
|
1013
|
-
r"(Preet\s*)?Bharara",
|
|
1014
|
-
r"SCOTUS",
|
|
1015
|
-
r"SD(FL|NY)",
|
|
1016
|
-
r"SEC",
|
|
1017
|
-
r"Secret\s*Service",
|
|
1018
|
-
r"Securities\s*and\s*Exchange\s*Commission",
|
|
1019
|
-
r"Southern\s*District(\s*of\s*(Florida|New\s*York))?",
|
|
1020
|
-
r"State\s*Dep(artmen)?t",
|
|
1021
|
-
r"Strzok",
|
|
1022
|
-
r"Supreme\s*Court",
|
|
1023
|
-
r"Treasury\s*(Dep(artmen)?t|Secretary)",
|
|
1024
|
-
r"TSA",
|
|
1025
|
-
r"U\.?S\.? attorney",
|
|
1026
|
-
r"USAID",
|
|
1027
|
-
r"(William\s*J\.?\s*)?Zloch",
|
|
1028
|
-
],
|
|
1029
|
-
),
|
|
1030
1108
|
HighlightedNames(
|
|
1031
1109
|
label=LOBBYIST,
|
|
1032
1110
|
style='light_coral',
|
|
1033
1111
|
emailers={
|
|
1034
1112
|
BOB_CROWE: 'partner at Nelson Mullins',
|
|
1035
1113
|
'Joshua Cooper Ramo': 'co-CEO of Henry Kissinger Associates',
|
|
1036
|
-
KATHERINE_KEATING: '
|
|
1114
|
+
KATHERINE_KEATING: 'daughter of former Australian prime minister',
|
|
1037
1115
|
MOHAMED_WAHEED_HASSAN: 'former president of the Maldives',
|
|
1038
1116
|
OLIVIER_COLOM: 'France',
|
|
1039
|
-
'Paul Keating': 'former
|
|
1117
|
+
'Paul Keating': 'former prime minister of Australia',
|
|
1040
1118
|
PUREVSUREN_LUNDEG: 'Mongolian ambassador to the UN',
|
|
1041
1119
|
'Stanley Rosenberg': 'former President of the Massachusetts Senate',
|
|
1042
1120
|
},
|
|
1043
1121
|
patterns=[
|
|
1044
1122
|
r"CSIS",
|
|
1123
|
+
r"elisabeth\s*feliho",
|
|
1045
1124
|
r"(Kevin\s*)?Rudd",
|
|
1046
1125
|
r"Stanley Rosenberg",
|
|
1126
|
+
r"Vinoda\s*Basnayake",
|
|
1127
|
+
],
|
|
1128
|
+
),
|
|
1129
|
+
HighlightedNames(
|
|
1130
|
+
label='locations',
|
|
1131
|
+
style='cornsilk1',
|
|
1132
|
+
patterns=[
|
|
1133
|
+
r"Alabama",
|
|
1134
|
+
r"Arizona(?! State University)",
|
|
1135
|
+
r"Aspen",
|
|
1136
|
+
r"Berkeley",
|
|
1137
|
+
r"Boston",
|
|
1138
|
+
r"Brooklyn",
|
|
1139
|
+
r"California",
|
|
1140
|
+
r"Canada",
|
|
1141
|
+
r"Cape Cod",
|
|
1142
|
+
r"Charlottesville",
|
|
1143
|
+
r"Colorado",
|
|
1144
|
+
r"Connecticut",
|
|
1145
|
+
r"Florida",
|
|
1146
|
+
r"Los Angeles",
|
|
1147
|
+
r"Loudoun\s*County?",
|
|
1148
|
+
r"Martha's\s*Vineyard",
|
|
1149
|
+
r"Miami(?!\s?Herald)",
|
|
1150
|
+
r"Nantucket",
|
|
1151
|
+
r"New\s*(Jersey|Mexico)",
|
|
1152
|
+
r"(North|South)\s*Carolina",
|
|
1153
|
+
r"NY(C|\s*State)",
|
|
1154
|
+
r"Orange\s*County",
|
|
1155
|
+
r"Oregon",
|
|
1156
|
+
r"Palo Alto",
|
|
1157
|
+
r"Pennsylvania",
|
|
1158
|
+
r"Phoenix",
|
|
1159
|
+
r"Portland",
|
|
1160
|
+
r"San Francisco",
|
|
1161
|
+
r"Sant[ae]\s*Fe",
|
|
1162
|
+
r"Telluride",
|
|
1163
|
+
r"Teterboro",
|
|
1164
|
+
r"Texas(?! A&M)",
|
|
1165
|
+
r"Toronto",
|
|
1166
|
+
r"Tu(sc|cs)on",
|
|
1167
|
+
r"Vermont",
|
|
1168
|
+
r"Washington(\s*D\.?C)?(?!\s*Post)",
|
|
1169
|
+
r"Westchester",
|
|
1047
1170
|
],
|
|
1048
1171
|
),
|
|
1049
1172
|
HighlightedNames(
|
|
@@ -1062,6 +1185,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1062
1185
|
r"Afghanistan",
|
|
1063
1186
|
r"Al[-\s]?Qa[ei]da",
|
|
1064
1187
|
r"Ahmadinejad",
|
|
1188
|
+
r"(Rakhat )?Aliyev",
|
|
1065
1189
|
r"Arab",
|
|
1066
1190
|
r"Aramco",
|
|
1067
1191
|
r"Armenia",
|
|
@@ -1090,6 +1214,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1090
1214
|
r"Hamas",
|
|
1091
1215
|
r"Hezbollah",
|
|
1092
1216
|
r"HBJ",
|
|
1217
|
+
r"Hourani",
|
|
1093
1218
|
r"Houthi",
|
|
1094
1219
|
r"Imran\s+Khan",
|
|
1095
1220
|
r"Iran(ian)?([-\s]Contra)?",
|
|
@@ -1112,10 +1237,11 @@ HIGHLIGHTED_NAMES = [
|
|
|
1112
1237
|
r"MB(N|S|Z)",
|
|
1113
1238
|
r"Mid(dle)?\s*East(ern)?",
|
|
1114
1239
|
r"Mohammed\s+bin\s+Salman",
|
|
1115
|
-
r"
|
|
1240
|
+
r"Morocc(an|o)",
|
|
1116
1241
|
r"Mubarak",
|
|
1117
|
-
r"Muslim",
|
|
1242
|
+
r"Muslim(\s*Brotherhood)?",
|
|
1118
1243
|
r"Nayaf",
|
|
1244
|
+
r"Nazarbayev",
|
|
1119
1245
|
r"Pakistani?",
|
|
1120
1246
|
r"Omar",
|
|
1121
1247
|
r"(Osama\s*)?Bin\s*Laden",
|
|
@@ -1135,9 +1261,10 @@ HIGHLIGHTED_NAMES = [
|
|
|
1135
1261
|
r"Syrian?",
|
|
1136
1262
|
r"(Tarek\s*)?El\s*Sayed",
|
|
1137
1263
|
r"Tehran",
|
|
1264
|
+
r"Timur\s*Kulibayev",
|
|
1138
1265
|
r"Tripoli",
|
|
1139
1266
|
r"Tunisian?",
|
|
1140
|
-
r"Turk(ey|ish)",
|
|
1267
|
+
r"Turk(ey|ish)?(?!s & Caicos)",
|
|
1141
1268
|
r"UAE",
|
|
1142
1269
|
r"((Iraq|Iran|Kuwait|Qatar|Yemen)i?)",
|
|
1143
1270
|
],
|
|
@@ -1185,8 +1312,11 @@ HIGHLIGHTED_NAMES = [
|
|
|
1185
1312
|
},
|
|
1186
1313
|
patterns=[
|
|
1187
1314
|
r"(Matt(hew)? )?Hiltzi[gk]",
|
|
1315
|
+
r"Philip\s*Barden",
|
|
1316
|
+
r"PR\s*Newswire",
|
|
1188
1317
|
REPUTATION_MGMT,
|
|
1189
1318
|
r"Reputation.com",
|
|
1319
|
+
r"(Robert L\. )?Dilenschneider",
|
|
1190
1320
|
],
|
|
1191
1321
|
),
|
|
1192
1322
|
HighlightedNames(
|
|
@@ -1201,21 +1331,28 @@ HIGHLIGHTED_NAMES = [
|
|
|
1201
1331
|
r"Alberto\sGonzale[sz]",
|
|
1202
1332
|
r"(Alex\s*)?Acosta",
|
|
1203
1333
|
r"(Ben\s*)?Sasse",
|
|
1334
|
+
r"Betsy Devos",
|
|
1204
1335
|
r"((Bill|William)\s*)?Barr",
|
|
1205
1336
|
r"Bill\s*Shine",
|
|
1337
|
+
r"Blackwater",
|
|
1206
1338
|
r"(Bob\s*)?Corker",
|
|
1207
1339
|
r"(Brett\s*)?Kavanaugh",
|
|
1208
1340
|
r"Broidy",
|
|
1209
1341
|
r"(Chris\s)?Christie",
|
|
1210
1342
|
r"(?<!Merwin Dela )Cruz",
|
|
1343
|
+
r"Darrell\s*Issa",
|
|
1211
1344
|
r"Devin\s*Nunes",
|
|
1212
1345
|
r"(Don\s*)?McGa[hn]n",
|
|
1346
|
+
r"Erik Prince",
|
|
1213
1347
|
r"Gary\s*Cohn",
|
|
1214
1348
|
r"George\s*(H\.?\s*)?(W\.?\s*)?Bush",
|
|
1215
1349
|
r"(George\s*)?Nader",
|
|
1216
1350
|
r"GOP",
|
|
1217
1351
|
r"Jeff(rey)?\s*Sessions",
|
|
1218
|
-
r"(John\s*(R.?\s*)?)Bolton",
|
|
1352
|
+
r"(John\s*(R.?\s*)?)?Bolton",
|
|
1353
|
+
r"Kasich",
|
|
1354
|
+
r"Keith\s*Schiller",
|
|
1355
|
+
r"Kelly(\s*Anne?)?\s*Conway|Kellyanne",
|
|
1219
1356
|
r"Kissinger",
|
|
1220
1357
|
r"Kobach",
|
|
1221
1358
|
r"Kolfage",
|
|
@@ -1229,7 +1366,8 @@ HIGHLIGHTED_NAMES = [
|
|
|
1229
1366
|
r"(Michael\s)?Hayden",
|
|
1230
1367
|
r"((General|Mike)\s*)?(Flynn|Pence)",
|
|
1231
1368
|
r"(Mitt\s*)?Romney",
|
|
1232
|
-
r"Mnuchin",
|
|
1369
|
+
r"(Steven?\s*)?Mnuchin",
|
|
1370
|
+
r"(Newt\s*)Gingrich",
|
|
1233
1371
|
r"Nikki",
|
|
1234
1372
|
r"Haley",
|
|
1235
1373
|
r"(Paul\s*)?(Manafort|Volcker)",
|
|
@@ -1238,9 +1376,12 @@ HIGHLIGHTED_NAMES = [
|
|
|
1238
1376
|
r"Reagan",
|
|
1239
1377
|
r"Reince", r"Priebus",
|
|
1240
1378
|
r"Republican",
|
|
1241
|
-
r"(Rex\s*)?
|
|
1379
|
+
r"(Rex\s*)?Till?erson",
|
|
1242
1380
|
r"(?<!Cynthia )(Richard\s*)?Nixon",
|
|
1381
|
+
r"RNC",
|
|
1382
|
+
r"(Roy|Stephen)\s*Moore",
|
|
1243
1383
|
r"Tea\s*Party",
|
|
1384
|
+
r"Wilbur\s*Ross",
|
|
1244
1385
|
],
|
|
1245
1386
|
),
|
|
1246
1387
|
HighlightedNames(
|
|
@@ -1281,6 +1422,8 @@ HIGHLIGHTED_NAMES = [
|
|
|
1281
1422
|
r"(Natalia\s*)?Veselnitskaya",
|
|
1282
1423
|
r"(Oleg\s*)?Deripaska",
|
|
1283
1424
|
r"Oleksandr Vilkul",
|
|
1425
|
+
r"Onexim", # Prokhorov investment vehicle
|
|
1426
|
+
r"Prokhorov",
|
|
1284
1427
|
r"Rosneft",
|
|
1285
1428
|
r"RT",
|
|
1286
1429
|
r"St.?\s*?Petersburg",
|
|
@@ -1289,7 +1432,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1289
1432
|
r"Sberbank",
|
|
1290
1433
|
r"Soviet(\s*Union)?",
|
|
1291
1434
|
r"USSR",
|
|
1292
|
-
r"
|
|
1435
|
+
r"Vlad(imir)?(?! Yudash)",
|
|
1293
1436
|
r"(Vladimir\s*)?Putin",
|
|
1294
1437
|
r"Women\s*Empowerment",
|
|
1295
1438
|
r"Xitrans",
|
|
@@ -1300,6 +1443,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1300
1443
|
label='Southeast Asia',
|
|
1301
1444
|
style='light_salmon3 bold',
|
|
1302
1445
|
patterns=[
|
|
1446
|
+
r"Australian?(?! Ave)",
|
|
1303
1447
|
r"Bangkok",
|
|
1304
1448
|
r"Burm(a|ese)",
|
|
1305
1449
|
r"Cambodian?",
|
|
@@ -1307,6 +1451,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1307
1451
|
r"Malaysian?",
|
|
1308
1452
|
r"Maldives",
|
|
1309
1453
|
r"Myan?mar",
|
|
1454
|
+
r"New\s*Zealand",
|
|
1310
1455
|
r"Philippines",
|
|
1311
1456
|
r"South\s*Korean?",
|
|
1312
1457
|
r"Tai(pei|wan)",
|
|
@@ -1324,12 +1469,16 @@ HIGHLIGHTED_NAMES = [
|
|
|
1324
1469
|
REID_HOFFMAN: 'PayPal mafia member, founder of LinkedIn',
|
|
1325
1470
|
STEVEN_SINOFSKY: 'ex-Microsoft, loves bitcoin',
|
|
1326
1471
|
VINCENZO_IOZZO: 'CEO of the identity-security company SlashID',
|
|
1472
|
+
ZUBAIR_KHAN: 'Tranchulas cybersecurity, InsightsPod founder, Islamabad / Dubai',
|
|
1327
1473
|
},
|
|
1328
1474
|
patterns=[
|
|
1329
1475
|
r"AG?I",
|
|
1476
|
+
r"Artificial\s*(General\s*)?Intelligence",
|
|
1330
1477
|
r"Chamath", r"Palihapitiya",
|
|
1331
1478
|
r"Danny\s*Hillis",
|
|
1479
|
+
r"deep learning",
|
|
1332
1480
|
r"Drew\s*Houston",
|
|
1481
|
+
r"Eliezer\s*Yudkowsky",
|
|
1333
1482
|
r"Eric\s*Schmidt",
|
|
1334
1483
|
r"Greylock(\s*Partners)?",
|
|
1335
1484
|
r"(?<!(ustin|Moshe)\s)Hoffmand?",
|
|
@@ -1349,6 +1498,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1349
1498
|
r"Softbank",
|
|
1350
1499
|
r"SpaceX",
|
|
1351
1500
|
r"Tim\s*Ferriss?",
|
|
1501
|
+
r"Vision\s*Fund",
|
|
1352
1502
|
r"WikiLeak(ed|s)",
|
|
1353
1503
|
],
|
|
1354
1504
|
),
|
|
@@ -1357,10 +1507,12 @@ HIGHLIGHTED_NAMES = [
|
|
|
1357
1507
|
style='red3 bold',
|
|
1358
1508
|
emailers={
|
|
1359
1509
|
'Bruce Moskowitz': "'Trump's health guy' according to Epstein",
|
|
1510
|
+
NICHOLAS_RIBIS: 'Hilton CEO, former president of Trump Organization',
|
|
1360
1511
|
},
|
|
1361
1512
|
patterns=[
|
|
1362
1513
|
r"@?realDonaldTrump",
|
|
1363
1514
|
r"(Alan\s*)?Weiss?elberg",
|
|
1515
|
+
r"Alex\s*Jones",
|
|
1364
1516
|
r"\bDJ?T\b",
|
|
1365
1517
|
r"Donald J. Tramp",
|
|
1366
1518
|
r"(Donald\s+(J\.\s+)?)?Trump(ism|\s*(Org(anization)?|Properties)(\s*LLC)?)?",
|
|
@@ -1372,7 +1524,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1372
1524
|
r"(Marla\s*)?Maples",
|
|
1373
1525
|
r"(Matt(hew)? )?Calamari",
|
|
1374
1526
|
r"\bMatt C\b",
|
|
1375
|
-
r"Michael\s*Cohen",
|
|
1527
|
+
r"Michael\s*(D\.?\s*)?Cohen",
|
|
1376
1528
|
r"Melania",
|
|
1377
1529
|
r"(Michael (J.? )?)?Boccio",
|
|
1378
1530
|
r"Paul Rampell",
|
|
@@ -1398,25 +1550,31 @@ HIGHLIGHTED_NAMES = [
|
|
|
1398
1550
|
r"Dominican\s*Republic",
|
|
1399
1551
|
r"(Great|Little)\s*St.?\s*James",
|
|
1400
1552
|
r"Haiti(an)?",
|
|
1553
|
+
r"Jamaican?",
|
|
1401
1554
|
r"(John\s*)deJongh(\s*Jr\.?)",
|
|
1402
1555
|
r"(Kenneth E\. )?Mapp",
|
|
1403
1556
|
r"PBI",
|
|
1404
1557
|
r"Puerto\s*Ric(an|o)",
|
|
1558
|
+
r"San\s*Juan",
|
|
1405
1559
|
r"S(ain)?t.?\s*Thomas",
|
|
1406
1560
|
r"USVI",
|
|
1407
|
-
r"(?<!
|
|
1408
|
-
r"(The\s*)?Virgin\s*
|
|
1409
|
-
r"(West\s*)?Palm\s*Beach(?!\s*(Daily|Post))",
|
|
1561
|
+
r"(?<!stein |vis-a-)VI(?!s-a-)",
|
|
1562
|
+
r"(The\s*)?Virgin\s*Is(al|la)nds(\s*Daily\s*News)?", # Hard to make this work right
|
|
1563
|
+
r"(West\s*)?Palm\s*Beach(\s*County)?(?!\s*(Daily|Post))",
|
|
1410
1564
|
],
|
|
1411
1565
|
),
|
|
1412
1566
|
HighlightedNames(
|
|
1413
1567
|
label='victim',
|
|
1414
|
-
style=
|
|
1568
|
+
style=VICTIM_COLOR,
|
|
1415
1569
|
patterns=[
|
|
1416
|
-
r"
|
|
1570
|
+
r"child\s*pornography",
|
|
1571
|
+
r"(David\s*)?Bo[il]es(,?\s*Schiller( & Flexner)?)?",
|
|
1417
1572
|
r"(Gloria\s*)?Allred",
|
|
1418
1573
|
r"(Jane|Tiffany)\s*Doe",
|
|
1419
1574
|
r"Katie\s*Johnson",
|
|
1575
|
+
r"pedophile",
|
|
1576
|
+
r"Stephanie\s*Clifford",
|
|
1577
|
+
r"Stormy\s*Daniels",
|
|
1420
1578
|
r"(Virginia\s+((L\.?|Roberts)\s+)?)?Giuffre",
|
|
1421
1579
|
r"Virginia\s+Roberts",
|
|
1422
1580
|
],
|
|
@@ -1455,7 +1613,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1455
1613
|
|
|
1456
1614
|
# Individuals
|
|
1457
1615
|
HighlightedNames(
|
|
1458
|
-
emailers={STEVEN_HOFFENBERG: "Epstein's Towers Financial ponzi
|
|
1616
|
+
emailers={STEVEN_HOFFENBERG: "Epstein's Towers Financial ponzi partner, prison for 18 years"},
|
|
1459
1617
|
style='dark_olive_green3',
|
|
1460
1618
|
category=FINANCE,
|
|
1461
1619
|
patterns=[r"(steven?\s*)?hoffenberg?w?"],
|
|
@@ -1471,10 +1629,15 @@ HIGHLIGHTED_NAMES = [
|
|
|
1471
1629
|
HighlightedNames(emailers={MELANIE_WALKER: f"doctor, friend of {BILL_GATES}"}, style='pale_violet_red1', category=FRIEND),
|
|
1472
1630
|
HighlightedNames(emailers={PAULA: "Epstein's ex-girlfriend who is now in the opera world"}, label='paula', style='pink1', category=FRIEND),
|
|
1473
1631
|
HighlightedNames(emailers={PRINCE_ANDREW: 'British royal family'}, style='dodger_blue1', category='Europe'),
|
|
1474
|
-
HighlightedNames(emailers={SOON_YI_PREVIN: 'wife of Woody Allen'}, style='hot_pink', category=
|
|
1475
|
-
HighlightedNames(emailers={SULTAN_BIN_SULAYEM: '
|
|
1632
|
+
HighlightedNames(emailers={SOON_YI_PREVIN: 'wife of Woody Allen'}, style='hot_pink', category=ARTS),
|
|
1633
|
+
HighlightedNames(emailers={SULTAN_BIN_SULAYEM: 'chairman of ports in Dubai, CEO of DP World'}, style='green1', category=MIDEAST),
|
|
1476
1634
|
|
|
1477
1635
|
# HighlightedText not HighlightedNames bc of word boundary issue
|
|
1636
|
+
HighlightedText(
|
|
1637
|
+
label='metoo',
|
|
1638
|
+
style=VICTIM_COLOR,
|
|
1639
|
+
patterns=[r"#metoo"]
|
|
1640
|
+
),
|
|
1478
1641
|
HighlightedText(
|
|
1479
1642
|
label='phone_number',
|
|
1480
1643
|
style='bright_green',
|
|
@@ -1495,7 +1658,7 @@ HIGHLIGHTED_TEXTS = [
|
|
|
1495
1658
|
HighlightedText(
|
|
1496
1659
|
label='header_field',
|
|
1497
1660
|
style='plum4',
|
|
1498
|
-
patterns=[r'^
|
|
1661
|
+
patterns=[r'^[>• ]{,4}(Date ?|From|Sent|To|C[cC]|Importance|Reply[- ]?To|Subject|Bee|B[cC]{2}|Attachments|Flag|Classification|((A|Debut du message transfer[&e]|De(stinataire)?|Envoye|Expe(cl|d)iteur|Objet|Q|Sujet) ?)):|^on behalf of'],
|
|
1499
1662
|
),
|
|
1500
1663
|
HighlightedText(
|
|
1501
1664
|
label='http_links',
|
|
@@ -1505,16 +1668,16 @@ HIGHLIGHTED_TEXTS = [
|
|
|
1505
1668
|
HighlightedText(
|
|
1506
1669
|
label='quoted_reply_line',
|
|
1507
1670
|
style='dim',
|
|
1508
|
-
patterns=[REPLY_REGEX.pattern],
|
|
1671
|
+
patterns=[REPLY_REGEX.pattern, r"^(> )?wrote:$"],
|
|
1509
1672
|
),
|
|
1510
1673
|
HighlightedText(
|
|
1511
1674
|
label='redacted',
|
|
1512
1675
|
style='grey58',
|
|
1513
|
-
patterns=[fr"{REDACTED}
|
|
1676
|
+
patterns=[fr"{REDACTED}|<?Privileged - Redacted>?"],
|
|
1514
1677
|
),
|
|
1515
1678
|
HighlightedText(
|
|
1516
1679
|
label='sent_from',
|
|
1517
|
-
style='
|
|
1680
|
+
style='light_cyan3 italic dim',
|
|
1518
1681
|
patterns=[SENT_FROM_REGEX.pattern],
|
|
1519
1682
|
),
|
|
1520
1683
|
HighlightedText(
|
|
@@ -1549,6 +1712,41 @@ class EpsteinHighlighter(RegexHighlighter):
|
|
|
1549
1712
|
"""Finds and colors interesting keywords based on the above config."""
|
|
1550
1713
|
base_style = f"{REGEX_STYLE_PREFIX}."
|
|
1551
1714
|
highlights = [highlight_group.regex for highlight_group in ALL_HIGHLIGHTS]
|
|
1715
|
+
highlight_counts = defaultdict(int)
|
|
1716
|
+
|
|
1717
|
+
def highlight(self, text: Text) -> None:
|
|
1718
|
+
"""overrides https://rich.readthedocs.io/en/latest/_modules/rich/highlighter.html#RegexHighlighter"""
|
|
1719
|
+
highlight_regex = text.highlight_regex
|
|
1720
|
+
|
|
1721
|
+
for re_highlight in self.highlights:
|
|
1722
|
+
highlight_regex(re_highlight, style_prefix=self.base_style)
|
|
1723
|
+
|
|
1724
|
+
if args.debug and isinstance(re_highlight, re.Pattern):
|
|
1725
|
+
for match in re_highlight.finditer(text.plain):
|
|
1726
|
+
type(self).highlight_counts[(match.group(1) or 'None').replace('\n', ' ')] += 1
|
|
1727
|
+
|
|
1728
|
+
def print_highlight_counts(self, console: Console) -> None:
|
|
1729
|
+
"""Print counts of how many times strings were highlighted."""
|
|
1730
|
+
highlight_counts = deepcopy(self.highlight_counts)
|
|
1731
|
+
weak_date_regex = re.compile(r"^(\d\d?/|20|http|On ).*")
|
|
1732
|
+
|
|
1733
|
+
for highlighted, count in sort_dict(highlight_counts):
|
|
1734
|
+
if highlighted is None or weak_date_regex.match(highlighted):
|
|
1735
|
+
continue
|
|
1736
|
+
|
|
1737
|
+
try:
|
|
1738
|
+
console.print(f"{highlighted:25s} highlighted {count} times")
|
|
1739
|
+
except Exception as e:
|
|
1740
|
+
logger.error(f"Failed to print highlight count {count} for {highlighted}")
|
|
1741
|
+
|
|
1742
|
+
|
|
1743
|
+
def get_highlight_group_for_name(name: str | None) -> HighlightedNames | None:
|
|
1744
|
+
if name is None:
|
|
1745
|
+
return None
|
|
1746
|
+
|
|
1747
|
+
for highlight_group in HIGHLIGHTED_NAMES:
|
|
1748
|
+
if highlight_group.regex.search(name):
|
|
1749
|
+
return highlight_group
|
|
1552
1750
|
|
|
1553
1751
|
|
|
1554
1752
|
def get_style_for_category(category: str) -> str | None:
|
|
@@ -1584,15 +1782,6 @@ def styled_name(name: str | None, default_style: str = DEFAULT_NAME_STYLE) -> Te
|
|
|
1584
1782
|
return Text(name or UNKNOWN, style=get_style_for_name(name, default_style=default_style))
|
|
1585
1783
|
|
|
1586
1784
|
|
|
1587
|
-
def get_highlight_group_for_name(name: str | None) -> HighlightedNames | None:
|
|
1588
|
-
if name is None:
|
|
1589
|
-
return None
|
|
1590
|
-
|
|
1591
|
-
for highlight_group in HIGHLIGHTED_NAMES:
|
|
1592
|
-
if highlight_group.regex.search(name):
|
|
1593
|
-
return highlight_group
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
1785
|
def _print_highlighted_names_repr() -> None:
|
|
1597
1786
|
for hn in HIGHLIGHTED_NAMES:
|
|
1598
1787
|
if isinstance(hn, HighlightedNames):
|