epstein-files 1.2.1__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import re
3
+ from collections import defaultdict
3
4
  from dataclasses import dataclass, field
4
5
 
5
6
  from rich.console import Console
@@ -11,8 +12,8 @@ from epstein_files.util.constant.strings import *
11
12
  from epstein_files.util.constant.urls import ARCHIVE_LINK_COLOR
12
13
  from epstein_files.util.constants import (EMAILER_ID_REGEXES, EPSTEIN_V_ROTHSTEIN_EDWARDS,
13
14
  OSBORNE_LLP, REPLY_REGEX, SENT_FROM_REGEX)
15
+ from epstein_files.util.data import sort_dict, without_falsey
14
16
  from epstein_files.util.doc_cfg import *
15
- from epstein_files.util.data import without_falsey
16
17
  from epstein_files.util.env import args
17
18
  from epstein_files.util.logging import logger
18
19
 
@@ -30,6 +31,8 @@ REGEX_STYLE_PREFIX = 'regex'
30
31
  SIMPLE_NAME_REGEX = re.compile(r"^[-\w, ]+$", re.IGNORECASE)
31
32
  TECH_BRO = 'tech bro'
32
33
 
34
+ VICTIM_COLOR = 'orchid1'
35
+
33
36
  CATEGORY_STYLE_MAPPING = {
34
37
  ARTICLE: JOURNALIST,
35
38
  BOOK: JOURNALIST,
@@ -153,13 +156,13 @@ class HighlightedNames(HighlightedText):
153
156
  name_patterns = [remove_question_marks(name).replace(' ', r"\s+")]
154
157
 
155
158
  if ' ' in name:
156
- for partial_name in [extract_first_name(name), extract_last_name(name), reversed_name(name)]:
159
+ for partial_name in [reversed_name(name), extract_first_name(name), extract_last_name(name)]: # Order matters
157
160
  if partial_name.lower() not in NAMES_TO_NOT_HIGHLIGHT and SIMPLE_NAME_REGEX.match(partial_name):
158
161
  name_patterns.append(partial_name.replace(' ', r"\s+"))
159
162
 
160
163
  pattern = '|'.join(name_patterns)
161
164
 
162
- if args.deep_debug:
165
+ if args.deep_debug and args.colors_only:
163
166
  debug_console.print(Text('').append(f"{name:25s}", style=self.style).append(f" '{pattern}'", style='dim'))
164
167
 
165
168
  return pattern
@@ -214,7 +217,7 @@ HIGHLIGHTED_NAMES = [
214
217
  ManualHighlight(
215
218
  label='email_subject',
216
219
  style='light_yellow3',
217
- pattern=r"^(> )?(Classification|Flag|Subject): (?P<email_subject>.*)",
220
+ pattern=r"^(> )?(Classification|Flag|Subject|Sujet ?): (?P<email_subject>.*)",
218
221
  ),
219
222
  HighlightedNames(
220
223
  label=ACADEMIA,
@@ -239,14 +242,18 @@ HIGHLIGHTED_NAMES = [
239
242
  'Valeria Chomsky': f"wife of {NOAM_CHOMSKY}",
240
243
  },
241
244
  patterns=[
245
+ r"Andy\s*Lippman", # Media Lab
246
+ r"Arizona\s*State\s*University",
242
247
  r"Bard\s+((Early )?College|High School|Schools)",
243
248
  r"Brotherton",
244
249
  r"Carl\s*Sagan",
245
- r"Columbia",
250
+ r"Columbia(\s*(Business\s*School|University))?",
246
251
  r"Dan(iel|ny) Kahneman",
252
+ r"(Francis\s*)?Crick",
247
253
  r"J(ames|im)\s*Watson",
248
254
  r"(Lord\s*)?Martin\s*Rees",
249
255
  r"Massachusetts\s*Institute\s*of\s*Technology",
256
+ r"Mayo\s*Clinic",
250
257
  r"Media\s*Lab",
251
258
  r"(Marvin\s*)?Minsky",
252
259
  r"MIT(\s*Media\s*Lab)?",
@@ -256,10 +263,14 @@ HIGHLIGHTED_NAMES = [
256
263
  r"Princeton(\s*University)?",
257
264
  r"Regeneron",
258
265
  r"(Richard\s*)?Dawkins",
266
+ r"Rockefeller\s*University",
267
+ r"(Sandy\s*)?Pentland", # Media Lab
259
268
  r"Sanofi",
260
269
  r"Stanford(\s*University)?(\s*Hospital)?",
261
- r"(Stephen\s*)?Hawking",
270
+ r"(Ste(ph|v)en\s*)?Hawking",
262
271
  r"(Steven?\s*)?Pinker",
272
+ r"Texas\s*A&M",
273
+ r"Tulane",
263
274
  r"UCLA",
264
275
  ],
265
276
  ),
@@ -276,19 +287,23 @@ HIGHLIGHTED_NAMES = [
276
287
  patterns=[
277
288
  r"Buhari",
278
289
  r"Econet(\s*Wireless)",
290
+ r"Ethiopian?",
279
291
  r"Ghana(ian)?",
280
292
  r"Glencore",
281
293
  r"Goodluck Jonathan",
282
294
  r"Johannesburg",
283
- r"Kenya",
295
+ r"Kenyan?",
284
296
  r"Nigerian?",
285
297
  r"Okey Enelamah",
298
+ r"(Paul\s*)?Kagame",
299
+ r"Rwandan?",
286
300
  r"Senegal(ese)?",
287
301
  r"Serengeti",
288
302
  r"(South\s*)?African?",
289
303
  r"(Strive\s*)?Masiyiwa",
290
- r"Tanzania",
304
+ r"Tanzanian?",
291
305
  r"Ugandan?",
306
+ r"(Yoweri\s*)?Museveni",
292
307
  r"Zimbabwe(an)?",
293
308
  ],
294
309
  ),
@@ -300,6 +315,7 @@ HIGHLIGHTED_NAMES = [
300
315
  'Barry Josephson': 'American film producer, editor FamilySecurityMatters.org',
301
316
  BILL_SIEGEL: 'documentary film producer and director',
302
317
  DAVID_BLAINE: 'famous magician',
318
+ 'David Brenner': 'American comedian and actor',
303
319
  'Richard Merkin': 'painter, illustrator and arts educator',
304
320
  STEVEN_PFEIFFER: 'Associate Director at Independent Filmmaker Project (IFP)',
305
321
  'Steven Gaydos': 'American screenwriter and journalist',
@@ -307,11 +323,15 @@ HIGHLIGHTED_NAMES = [
307
323
  patterns=[
308
324
  r"(Art )?Spiegelman",
309
325
  r"Artspace",
326
+ r"Ayn\s*Rand",
310
327
  r"Bobby slayton",
311
328
  r"bono\s*mick",
312
329
  r"Errol(\s*Morris)?",
313
330
  r"Etienne Binant",
314
331
  r"(Frank\s)?Gehry",
332
+ r"Harvey\s*Weinstein", r"wientstein", r"Weinstein\s*Co(s?|mpany)",
333
+ r"IFP",
334
+ r"Independent\s*Filmmaker\s*Project",
315
335
  r"Jagger",
316
336
  r"(Jeffrey\s*)?Katzenberg",
317
337
  r"(Johnny\s*)?Depp",
@@ -320,6 +340,8 @@ HIGHLIGHTED_NAMES = [
320
340
  r"Lena\s*Dunham",
321
341
  r"Madonna",
322
342
  r"Mark\s*Burnett",
343
+ r"New York Film Festival",
344
+ r"Peter Getzels",
323
345
  r"Phaidon",
324
346
  r"Ramsey Elkholy",
325
347
  r"Regan arts",
@@ -340,6 +362,7 @@ HIGHLIGHTED_NAMES = [
340
362
  r"BG",
341
363
  r"b?g?C3",
342
364
  r"(Bill\s*((and|or|&)\s*Melinda\s*)?)?Gates(\s*Foundation)?",
365
+ r"Kofi\s*Rashid",
343
366
  r"Melinda(\s*Gates)?",
344
367
  r"Microsoft",
345
368
  r"MSFT",
@@ -358,23 +381,27 @@ HIGHLIGHTED_NAMES = [
358
381
  r"Balaji",
359
382
  r"bitcoin(\s*Foundation)?",
360
383
  r"block ?chain(\s*capital)?",
384
+ r"Brian Forde",
361
385
  r"Brock(\s*Pierce)?",
362
386
  r"coins?",
387
+ r"Cory\s*Fields", # bitcoin dev
363
388
  r"cr[iy]?pto(currenc(y|ies))?",
364
389
  r"Digital\s*Currenc(ies|y)(\s*Initiative)?",
365
390
  r"e-currency",
366
- r"(Gavin )?Andress?en",
391
+ r"(Gavin )?Andress?en", # bitcoin dev
367
392
  r"(Howard\s+)?Lutnic?k",
393
+ r"(Jim\s*)Pallotta", # Media lab advisory board
368
394
  r"Libra",
369
395
  r"Madars",
370
396
  r"Mi(chael|ke)\s*Novogratz",
371
397
  r"(Patrick\s*)?Murck",
398
+ r"Ron Rivest",
372
399
  r"(Ross\s*)?Ulbricht",
373
400
  r"Silk\s*Road",
374
401
  r"SpanCash",
375
402
  r"Tether",
376
403
  r"virtual\s*currenc(ies|y)",
377
- r"Wladimir( van der Laan)?",
404
+ r"Wladimir( van der Laan)?", # bitcoin dev
378
405
  r"(zero\s+knowledge\s+|zk)pro(of|tocols?)",
379
406
  ],
380
407
  ),
@@ -385,28 +412,29 @@ HIGHLIGHTED_NAMES = [
385
412
  ALIREZA_ITTIHADIEH: 'CEO Freestream Aircraft Limited',
386
413
  BARBRO_C_EHNBOM: 'Swedish pharmaceuticals, SALSS',
387
414
  BARRY_J_COHEN: None,
388
- 'David Mitchell': 'Mitchell Holdings New York real estate developer',
389
- FRED_HADDAD: "co-founder of Heck's in West Virginia",
415
+ 'David Mitchell': 'Mitchell Holdings, New York real estate developer',
390
416
  GERALD_BARTON: "Maryland property developer Landmark Land Company",
391
417
  GORDON_GETTY: 'heir to oil tycoon J. Paul Getty',
392
- NICHOLAS_RIBIS: 'Hilton CEO, former president of Trump Organization',
393
418
  'Philip Kafka': 'president of Prince Concepts (and son of Terry Kafka?)',
394
419
  ROBERT_LAWRENCE_KUHN: 'investment banker, China expert',
395
420
  TERRY_KAFKA: 'CEO of Impact Outdoor (highway billboards)',
396
- TOM_PRITZKER: 'brother of J.B. Pritzker',
421
+ TOM_PRITZKER: 'chairman of The Pritzker Organization and Hyatt Hotels',
397
422
  },
398
423
  patterns=[
424
+ r"Arthur Klein",
399
425
  r"((Bill|David)\s*)?Koch(\s*(Bro(s|thers)|Industries))?",
400
426
  r"Gruterite",
401
- r"(John\s*)?Kluge",
427
+ r"((John|Patricia)\s*)?Kluge",
402
428
  r"Marc Rich",
403
429
  r"(Mi(chael|ke)\s*)?Ovitz",
404
430
  r"(Steve\s+)?Wynn",
405
431
  r"(Les(lie)?\s+)?Wexner",
432
+ r"Michael\s*Klein",
406
433
  r"New Leaf Ventures",
407
434
  r"Park Partners",
408
435
  r"SALSS",
409
436
  r"Swedish[-\s]*American\s*Life\s*Science\s*Summit",
437
+ r"Trilateral Commission",
410
438
  r"Valhi",
411
439
  r"(Yves\s*)?Bouvier",
412
440
  ],
@@ -418,6 +446,7 @@ HIGHLIGHTED_NAMES = [
418
446
  r"CBD",
419
447
  r"cannabis",
420
448
  r"marijuana",
449
+ r"psychedelic",
421
450
  r"THC",
422
451
  r"WEED(guide|maps)?[^s]?",
423
452
  ],
@@ -474,9 +503,11 @@ HIGHLIGHTED_NAMES = [
474
503
  r"(Barac?k )?Obama",
475
504
  r"((Bill|Hillart?y)\s*)?Clinton",
476
505
  r"((Chuck|Charles)\s*)?S(ch|hc)umer",
506
+ r"Debbie\s*Wasserman\s*Schultz",
477
507
  r"Dem(ocrat(ic)?)?",
478
508
  r"(Diana\s*)?DeGette",
479
509
  r"DNC",
510
+ r"(Ed(ward)?\s*)?Mezvinsky",
480
511
  r"Elena\s*Kagan",
481
512
  r"(Eliott?\s*)?Spitzer(, Eliot)?",
482
513
  r"Eric Holder",
@@ -491,6 +522,7 @@ HIGHLIGHTED_NAMES = [
491
522
  r"(Matteo\s*)?Salvini",
492
523
  r"Maxine\s*Waters",
493
524
  r"(Nancy )?Pelosi",
525
+ r"Open Society( Global Board)?",
494
526
  r"Ron\s*Dellums",
495
527
  r"Schumer",
496
528
  r"(Tim(othy)?\s*)?Geithner",
@@ -506,16 +538,19 @@ HIGHLIGHTED_NAMES = [
506
538
  EVA: "possibly Epstein's ex-girlfriend (?)",
507
539
  'Eva Dubin': f"Epstein's ex-girlfriend now married to {GLENN_DUBIN}",
508
540
  },
509
- patterns=[r"((Celina|Eva( Anderss?on)?|Glenn) )?Dubin"],
541
+ patterns=[r"((Celina|Eva( Anderss?on)?|Glenn?) )?Dubin"],
510
542
  ),
511
543
  HighlightedNames(
512
544
  label='employee',
513
545
  style='medium_purple4',
514
546
  emailers={
515
547
  'Alfredo Rodriguez': "Epstein's butler, stole the journal",
516
- ERIC_ROTH: 'jet decorator',
548
+ 'Bernard Kruger': "Epstein's doctor",
549
+ EDUARDO_ROBLES: f'home builder at Creative Kingdom Dubai',
550
+ ERIC_ROTH: 'jet decorator at International Jet',
517
551
  GWENDOLYN_BECK: 'Epstein fund manager in the 90s',
518
552
  JANUSZ_BANASIAK: "Epstein's house manager",
553
+ "John Allessi": "Epstein's houseman",
519
554
  JEAN_HUGUEN: 'interior design at Alberto Pinto Cabinet',
520
555
  LAWRANCE_VISOSKI: "Epstein's pilot",
521
556
  LESLEY_GROFF: f"Epstein's assistant",
@@ -523,7 +558,6 @@ HIGHLIGHTED_NAMES = [
523
558
  MERWIN_DELA_CRUZ: None, # HOUSE_OVERSIGHT_032652 Groff says "Jojo and Merwin both requested off Nov. 25 and 26"
524
559
  NADIA_MARCINKO: "Epstein's pilot",
525
560
  'Sean J. Lancaster': 'airplane reseller',
526
- ZUBAIR_KHAN: 'cybersecurity firm Tranchulas CEO, InsightsPod founder, Islamabad / Dubai',
527
561
  },
528
562
  patterns=[
529
563
  r"Adriana\s*Ross",
@@ -539,7 +573,7 @@ HIGHLIGHTED_NAMES = [
539
573
  MARK_EPSTEIN: 'brother of Jeffrey',
540
574
  },
541
575
  patterns=[
542
- r"JEGE",
576
+ r"JEGE(\s*Inc)?",
543
577
  r"LSJ",
544
578
  ],
545
579
  ),
@@ -556,7 +590,9 @@ HIGHLIGHTED_NAMES = [
556
590
  DAVID_SCHOEN: f"{CRIMINAL_DEFENSE_ATTORNEY} after 2019 arrest",
557
591
  DEBBIE_FEIN: EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY,
558
592
  'Erika Kellerhals': 'attorney in St. Thomas',
593
+ FRED_HADDAD: "co-founder of Heck's in West Virginia",
559
594
  GERALD_LEFCOURT: f'friend of {ALAN_DERSHOWITZ}',
595
+ 'Howard Rubenstein': f"Epstein's former spokesman",
560
596
  JACK_GOLDBERGER: CRIMINAL_DEFENSE_2008,
561
597
  JACKIE_PERCZEK: CRIMINAL_DEFENSE_2008,
562
598
  JAY_LEFKOWITZ: f"Kirkland & Ellis partner, {CRIMINAL_DEFENSE_2008}",
@@ -599,7 +635,7 @@ HIGHLIGHTED_NAMES = [
599
635
  emailers={
600
636
  ANDRZEJ_DUDA: 'former president of Poland',
601
637
  'Caroline Lang': 'daughter of Jack Lang',
602
- "Edward Rod Larsen": f"son of {TERJE_ROD_LARSEN}",
638
+ EDWARD_ROD_LARSEN: f"son of {TERJE_ROD_LARSEN}",
603
639
  'Fabrice Aidan': f'diplomat who worked with {TERJE_ROD_LARSEN}',
604
640
  'Jack Lang': 'former French Minister of National Education',
605
641
  MIROSLAV_LAJCAK: 'Russia-friendly Slovakian politician, friend of Steve Bannon',
@@ -608,8 +644,10 @@ HIGHLIGHTED_NAMES = [
608
644
  THORBJORN_JAGLAND: 'former prime minister of Norway, Nobel Peace Prize Committee',
609
645
  },
610
646
  patterns=[
647
+ r"AfD",
611
648
  r"(Angela )?Merk(el|le)",
612
649
  r"Austria",
650
+ r"Belgi(an|um)",
613
651
  r"(Benjamin\s*)?Harnwell",
614
652
  r"Berlin",
615
653
  r"Borge",
@@ -619,6 +657,7 @@ HIGHLIGHTED_NAMES = [
619
657
  r"Brussels",
620
658
  r"Cannes",
621
659
  r"Cypr(iot|us)",
660
+ r"David\s*Cameron",
622
661
  r"Davos",
623
662
  r"ECB",
624
663
  r"England",
@@ -633,6 +672,8 @@ HIGHLIGHTED_NAMES = [
633
672
  r"Ital(ian|y)",
634
673
  r"Jacques",
635
674
  r"Kiev",
675
+ r"Latvian?",
676
+ r"Lithuanian?",
636
677
  r"Le\s*Pen",
637
678
  r"London",
638
679
  r"Macron",
@@ -642,11 +683,14 @@ HIGHLIGHTED_NAMES = [
642
683
  r"NATO",
643
684
  r"(Nicholas\s*)?Sarkozy",
644
685
  r"Nigel(\s*Farage)?",
686
+ r"(Northern\s*)?Ireland",
645
687
  r"Norw(ay|egian)",
646
688
  r"Oslo",
647
689
  r"Paris",
648
690
  r"Polish",
649
691
  r"pope",
692
+ r"Portugal",
693
+ r"Scotland",
650
694
  r"(Sebastian )?Kurz",
651
695
  r"Stockholm",
652
696
  r"Strasbourg",
@@ -654,8 +698,10 @@ HIGHLIGHTED_NAMES = [
654
698
  r"Swed(en|ish)(?![-\s]+American Life Scienc)",
655
699
  r"Swi(ss|tzerland)",
656
700
  r"(Tony\s)?Blair",
701
+ r"United\s*Kingdom",
657
702
  r"U\.K\.",
658
703
  r"Ukrain(e|ian)",
704
+ r"Venice",
659
705
  r"(Vi(c|k)tor\s+)?Orbah?n",
660
706
  r"Vienna",
661
707
  r"Zug",
@@ -684,8 +730,11 @@ HIGHLIGHTED_NAMES = [
684
730
  'Skip Rimer': 'Milken Institute (Michael Milken)',
685
731
  'Steven Elkman': DEUTSCHE_BANK,
686
732
  'Vahe Stepanian': 'Cetera Financial Group',
733
+ VINIT_SAHNI: f"analyst at {DEUTSCHE_BANK} and {GOLDMAN_SACHS}",
687
734
  },
688
735
  patterns=[
736
+ r"Ace\s*Greenberg",
737
+ r"AIG",
689
738
  r"((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?",
690
739
  r"Apollo",
691
740
  r"Ari\s*Glass",
@@ -705,27 +754,33 @@ HIGHLIGHTED_NAMES = [
705
754
  r"FRBNY",
706
755
  r"Goldman(\s*Sachs)",
707
756
  r"GRAT",
757
+ r"Gratitude (America|& Enhanced)", # Leon Black and/or Epstein charity?
758
+ r"Hank\s*Greenburg",
708
759
  r"HSBC",
709
760
  r"Invesco",
761
+ r"Jamie\s*D(imon)?",
710
762
  r"(Janet\s*)?Yellen",
711
763
  r"(Jerome\s*)?Powell(?! M\. Cabot)",
712
764
  r"(Jimmy\s*)?Cayne",
765
+ r"Joon\s*Yun",
713
766
  r"JPMC?",
714
767
  r"j\.?p\.?\s*morgan(\.?com|\s*Chase)?",
715
768
  r"Madoff",
716
769
  r"Merrill(\s*Lynch)?",
717
770
  r"(Michael\s*)?Cembalest",
718
- r"(Mi(chael|ke)\s*)?Milken(\s*Conference)?",
771
+ r"(Mi(chael|ke)\s*)?Milken(\s*Conference|Institute)?",
719
772
  r"Mizrahi\s*Bank",
720
773
  r"MLPF&S",
721
774
  r"Morgan Stanley",
722
775
  r"(Peter L. )?Scher",
723
776
  r"(Ray\s*)?Dalio",
724
777
  r"(Richard\s*)?LeFrak",
725
- r"Schwartz?man",
778
+ r"Rockefeller(?! University)(\s*Foundation)?",
779
+ r"(Ste(phen|ve)\s*)?Schwart?z?man",
726
780
  r"Serageldin",
727
781
  r"UBS",
728
782
  r"us.gio@jpmorgan.com",
783
+ r"Wall\s*Street(?!\s*Jour)",
729
784
  ],
730
785
  ),
731
786
  HighlightedNames(
@@ -746,6 +801,83 @@ HIGHLIGHTED_NAMES = [
746
801
  r"Thomas\s*(J\.?\s*)?Barrack(\s*Jr)?",
747
802
  ],
748
803
  ),
804
+ HighlightedNames(
805
+ label='government',
806
+ style='color(24) bold',
807
+ emailers={
808
+ ANN_MARIE_VILLAFANA: 'Southern District of Florida (SDFL) U.S. Attorney',
809
+ DANNY_FROST: 'Director of Communications at Manhattan D.A.',
810
+ 'Police Code Enforcement': f"{PALM_BEACH} buildings code enforcement",
811
+ },
812
+ patterns=[
813
+ r"AG",
814
+ r"(Alicia\s*)?Valle",
815
+ r'Alice\s*Fisher|Fisher, Alice',
816
+ r"AML",
817
+ r"(Andrew\s*)?(McCabe|Natsios)",
818
+ r"Attorney General",
819
+ r"((Bob|Robert)\s*)?Mueller",
820
+ r"(Byung\s)?Pak",
821
+ r"Case 1:19-cv-03377(-LAP)?",
822
+ r"(CENT|NORTH|SOUTH)COM",
823
+ r"CFTC?",
824
+ r"CIA",
825
+ r"CIS",
826
+ r"CVRA",
827
+ r"DARPA",
828
+ r"Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)",
829
+ r"DHS",
830
+ r"DOJ",
831
+ r"FBI",
832
+ r"FCPA",
833
+ r"FDIC",
834
+ r"FDLE",
835
+ r"Federal\s*Bureau\s*of\s*Investigation",
836
+ r"FinCEN",
837
+ r"FINRA",
838
+ r"FOIA",
839
+ r"FTC",
840
+ r"(General\s*)?P(a|e)traeus",
841
+ r"Geoff\s*Ling",
842
+ r"Homeland\s*Security",
843
+ r"IRS",
844
+ r"(James\s*)?Comey",
845
+ r"(Jennifer\s*Shasky\s*)?Calvery",
846
+ r"((Judge|Mark)\s*)?(Carney|Filip)",
847
+ r"(Judge\s*)?(Kenneth\s*)?(A\.?\s*)?Marra",
848
+ r"(Justice|Treasury)\s*Dep(t|artment)",
849
+ r"(Kirk )?Blouin",
850
+ r"KYC",
851
+ r"(Lann?a\s*)?Belohlavek",
852
+ r"NIH",
853
+ r"NPA",
854
+ r"NS(A|C)",
855
+ r"OCC",
856
+ r"OFAC",
857
+ r"(Michael\s*)?Reiter",
858
+ r"OGE",
859
+ r"Office\s*of\s*Government\s*Ethics",
860
+ r"police",
861
+ r"(Preet\s*)?Bharara",
862
+ r"SCOTUS",
863
+ r"SD(FL|NY)",
864
+ r"SEC",
865
+ r"Secret\s*Service",
866
+ r"Securities\s*and\s*Exchange\s*Commission",
867
+ r"Southern\s*District(\s*of\s*(Florida|New\s*York))?",
868
+ r"State\s*Dep(artmen)?t",
869
+ r"Strzok",
870
+ r"Supreme\s*Court",
871
+ r"Treasury\s*(Dep(artmen)?t|Secretary)",
872
+ r"TSA",
873
+ r"U\.?S\.? attorney",
874
+ r"USAID",
875
+ r"US\s*(AF|Army|Air\s*Force)",
876
+ r"Walter\s*Reed(\s*Army\s*Institute\s*of\s*Research)?",
877
+ r"(William\s*J\.?\s*)?Zloch",
878
+ r"WRAIR",
879
+ ],
880
+ ),
749
881
  HighlightedNames(
750
882
  label=HARVARD,
751
883
  style='light_goldenrod3',
@@ -773,8 +905,7 @@ HIGHLIGHTED_NAMES = [
773
905
  label='India',
774
906
  style='bright_green',
775
907
  emailers={
776
- ANIL_AMBANI: 'chairman of Reliance Group',
777
- VINIT_SAHNI: None,
908
+ ANIL_AMBANI: 'billionaire chairman of Reliance Group',
778
909
  },
779
910
  patterns=[
780
911
  r"Abraaj",
@@ -797,16 +928,18 @@ HIGHLIGHTED_NAMES = [
797
928
  emailers={
798
929
  EHUD_BARAK: 'former prime minister of Israel, Epstein business partner',
799
930
  'Mitchell Bard': 'director of the American-Israeli Cooperative Enterprise (AICE)',
800
- 'Nili Priell Barak': 'wife of Ehud Barak',
931
+ NILI_PRIELL_BARAK: 'wife of Ehud Barak',
801
932
  },
802
933
  patterns=[
803
934
  r"AIPAC",
804
935
  r"Bibi",
805
936
  r"(eh|(Ehud|Nili Priell)\s*)?barak",
937
+ r"EB",
806
938
  r"Ehud\s*Barack",
807
939
  r"Israeli?",
808
940
  r"Jerusalem",
809
941
  r"J\s*Street",
942
+ r"Menachem\s*Begin",
810
943
  r"Mossad",
811
944
  r"Netanyahu",
812
945
  r"(Sheldon\s*)?Adelson",
@@ -836,7 +969,7 @@ HIGHLIGHTED_NAMES = [
836
969
  'Alain Forget': 'author of "How To Get Out Of This World ALIVE"',
837
970
  'Alex Yablon': 'New York Magazine fact checker (?)',
838
971
  EDWARD_JAY_EPSTEIN: 'no relation, wrote books about spies',
839
- HENRY_HOLT: f"{MICHAEL_WOLFF}'s book publisher",
972
+ HENRY_HOLT: f"{MICHAEL_WOLFF}'s book publisher (company not a person)",
840
973
  JAMES_HILL: 'ABC News',
841
974
  JENNIFER_JACQUET: 'Future Science magazine',
842
975
  JOHN_BROCKMAN: 'literary agent and author specializing in scientific literature',
@@ -849,17 +982,21 @@ HIGHLIGHTED_NAMES = [
849
982
  },
850
983
  patterns=[
851
984
  r"ABC(\s*News)?",
985
+ r"Alexandra Wolfe|Wolfe, Alexandra",
986
+ r"AlterNet",
852
987
  r"Arianna(\s*Huffington)?",
853
988
  r"(Arthur\s*)?Kretchmer",
854
989
  r'Associated\s*Press',
855
990
  r"Axios",
856
991
  r"BBC",
992
+ r"(Bob|Robert)\s*(Costa|Woodward)",
857
993
  r"Breitbart",
858
- r"BuzzFeed",
859
- r"CBS(\s*(4|Corp|News))?"
994
+ r"BuzzFeed(\s*News)?",
995
+ r"C-?Span",
996
+ r"CBS(\s*(4|Corp|News))?",
860
997
  r"Charlie\s*Rose",
861
998
  r"China\s*Daily",
862
- r"CNBC",
999
+ r"(C|MS)?NBC(\s*News)?",
863
1000
  r"CNN(politics?)?",
864
1001
  r"Con[cs]hita", r"Sarnoff",
865
1002
  r"Daily Business Review",
@@ -869,6 +1006,7 @@ HIGHLIGHTED_NAMES = [
869
1006
  r"Ed\s*Krassenstein",
870
1007
  r"(Emily\s*)?Michot",
871
1008
  r"Ezra\s*Klein",
1009
+ r"Fire\s*and\s*Fury",
872
1010
  r"Forbes",
873
1011
  r"Fortune\s*Magazine",
874
1012
  r"Fox\s*News(\.com)?",
@@ -879,11 +1017,15 @@ HIGHLIGHTED_NAMES = [
879
1017
  r"Globe\s*and\s*Mail",
880
1018
  r"Good\s*Morning\s*America",
881
1019
  r"Graydon(\s*Carter)?",
1020
+ r"Hollywood\s*Reporter",
882
1021
  r"Huff(ington)?(\s*Po(st)?)?",
883
1022
  r"Ingram, David",
884
- r"(James\s*)?(Hill|Patterson)",
1023
+ r"James\s*Hill",
1024
+ r"(James\s*)?Patterson",
885
1025
  r"Jesse Kornbluth",
1026
+ r"John\s*Connolly",
886
1027
  r"Jonathan\s*Karl",
1028
+ r"Journal of Criminal Law and Criminology",
887
1029
  r"Julie\s*(K.?\s*)?Brown", r'jbrown@miamiherald.com',
888
1030
  r"(Katie\s*)?Couric",
889
1031
  r"Keith\s*Larsen",
@@ -892,7 +1034,7 @@ HIGHLIGHTED_NAMES = [
892
1034
  r"(Les\s*)?Moonves",
893
1035
  r"MarketWatch",
894
1036
  r"Miami\s*Herald",
895
- r"(Mi(chael|ke)\s*)?Bloomberg",
1037
+ r"(Mi(chael|ke)\s*)?Bloomber[gq](\s*News)?",
896
1038
  r"(Michele\s*)?Dargan",
897
1039
  r"Morning News USA",
898
1040
  r"(National\s*)?Enquirer",
@@ -902,19 +1044,21 @@ HIGHLIGHTED_NAMES = [
902
1044
  r"PERVERSION\s*OF\s*JUSTICE",
903
1045
  r"Politico",
904
1046
  r"Pro\s*Publica",
905
- r"Reuters",
906
1047
  r"(Sean\s*)?Hannity",
1048
+ r"Sharon Churcher", # Daily Mail
907
1049
  r"Sulzberger",
908
1050
  r"SunSentinel",
909
1051
  r"(The\s*)?Financial\s*Times",
910
1052
  r"The\s*Guardian",
911
1053
  r"TheHill",
912
1054
  r"(The\s*)?Mail\s*On\s*Sunday",
913
- r"(The\s*)?N(ew\s*)?Y(ork\s*)?(P(ost)?|T(imes)?)",
1055
+ r"(The\s*)?N(ew\s*)?Y(ork)?\s*(Magazine|Observer|P(ost)?|T(imes)?)",
914
1056
  r"(The\s*)?New\s*Yorker",
915
1057
  r"(The\s*)?Wall\s*Street\s*Journal",
916
1058
  r"(The\s*)?Wa(shington\s*)?Po(st)?",
1059
+ r"(Thomson\s*)?Reuters",
917
1060
  r"(Uma\s*)?Sanghvi",
1061
+ r"USA\s*Today",
918
1062
  r"Vanity\s*Fair",
919
1063
  r"Viceland",
920
1064
  r"Vick[iy]\s*Ward",
@@ -948,11 +1092,12 @@ HIGHLIGHTED_NAMES = [
948
1092
  r"Chile",
949
1093
  r"Colombian?",
950
1094
  r"Cuban?",
1095
+ r"el chapo",
951
1096
  r"El\s*Salvador",
952
1097
  r"((Enrique )?Pena )?Nieto",
953
1098
  r"Lat(in)?\s*Am(erican?)?",
954
1099
  r"Lula",
955
- r"Mexic(an|o)",
1100
+ r"(?<!New )Mexic(an|o)",
956
1101
  r"(Nicolas\s+)?Maduro",
957
1102
  r"Panama( Papers)?",
958
1103
  r"Peru(vian)?",
@@ -960,90 +1105,68 @@ HIGHLIGHTED_NAMES = [
960
1105
  r"Zambrano",
961
1106
  ],
962
1107
  ),
963
- HighlightedNames(
964
- label='government',
965
- style='color(24) bold',
966
- emailers={
967
- ANN_MARIE_VILLAFANA: 'Southern District of Florida (SDFL) U.S. Attorney',
968
- DANNY_FROST: 'Director of Communications at Manhattan D.A.',
969
- 'Police Code Enforcement': f"{PALM_BEACH} buildings code enforcement",
970
- },
971
- patterns=[
972
- r"AG",
973
- r"(Alicia\s*)?Valle",
974
- r"AML",
975
- r"(Andrew\s*)?McCabe",
976
- r"Attorney General",
977
- r"((Bob|Robert)\s*)?Mueller",
978
- r"(Byung\s)?Pak",
979
- r"Case 1:19-cv-03377(-LAP)?",
980
- r"CFTC?",
981
- r"CIA",
982
- r"CIS",
983
- r"CVRA",
984
- r"Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)",
985
- r"DHS",
986
- r"DOJ",
987
- r"FBI",
988
- r"FCPA",
989
- r"FDIC",
990
- r"Federal\s*Bureau\s*of\s*Investigation",
991
- r"FinCEN",
992
- r"FINRA",
993
- r"FOIA",
994
- r"FTC",
995
- r"(General\s*)?P(a|e)traeus",
996
- r"IRS",
997
- r"(James\s*)?Comey",
998
- r"(Jennifer\s*Shasky\s*)?Calvery",
999
- r"((Judge|Mark)\s*)?(Carney|Filip)",
1000
- r"(Judge\s*)?(Kenneth\s*)?(A\.?\s*)?Marra",
1001
- r"(Justice|Treasury)\s*Dep(t|artment)",
1002
- r"(Kirk )?Blouin",
1003
- r"KYC",
1004
- r"(Lann?a\s*)?Belohlavek",
1005
- r"NIH",
1006
- r"NS(A|C)",
1007
- r"OCC",
1008
- r"OFAC",
1009
- r"(Michael\s*)?Reiter",
1010
- r"OGE",
1011
- r"Office\s*of\s*Government\s*Ethics",
1012
- r"police",
1013
- r"(Preet\s*)?Bharara",
1014
- r"SCOTUS",
1015
- r"SD(FL|NY)",
1016
- r"SEC",
1017
- r"Secret\s*Service",
1018
- r"Securities\s*and\s*Exchange\s*Commission",
1019
- r"Southern\s*District(\s*of\s*(Florida|New\s*York))?",
1020
- r"State\s*Dep(artmen)?t",
1021
- r"Strzok",
1022
- r"Supreme\s*Court",
1023
- r"Treasury\s*(Dep(artmen)?t|Secretary)",
1024
- r"TSA",
1025
- r"U\.?S\.? attorney",
1026
- r"USAID",
1027
- r"(William\s*J\.?\s*)?Zloch",
1028
- ],
1029
- ),
1030
1108
  HighlightedNames(
1031
1109
  label=LOBBYIST,
1032
1110
  style='light_coral',
1033
1111
  emailers={
1034
1112
  BOB_CROWE: 'partner at Nelson Mullins',
1035
1113
  'Joshua Cooper Ramo': 'co-CEO of Henry Kissinger Associates',
1036
- KATHERINE_KEATING: 'Daughter of former Australian PM',
1114
+ KATHERINE_KEATING: 'daughter of former Australian prime minister',
1037
1115
  MOHAMED_WAHEED_HASSAN: 'former president of the Maldives',
1038
1116
  OLIVIER_COLOM: 'France',
1039
- 'Paul Keating': 'former PM of Australia',
1117
+ 'Paul Keating': 'former prime minister of Australia',
1040
1118
  PUREVSUREN_LUNDEG: 'Mongolian ambassador to the UN',
1041
1119
  'Stanley Rosenberg': 'former President of the Massachusetts Senate',
1042
1120
  },
1043
1121
  patterns=[
1044
1122
  r"CSIS",
1123
+ r"elisabeth\s*feliho",
1045
1124
  r"(Kevin\s*)?Rudd",
1046
1125
  r"Stanley Rosenberg",
1126
+ r"Vinoda\s*Basnayake",
1127
+ ],
1128
+ ),
1129
+ HighlightedNames(
1130
+ label='locations',
1131
+ style='cornsilk1',
1132
+ patterns=[
1133
+ r"Alabama",
1134
+ r"Arizona(?! State University)",
1135
+ r"Aspen",
1136
+ r"Berkeley",
1137
+ r"Boston",
1138
+ r"Brooklyn",
1139
+ r"California",
1140
+ r"Canada",
1141
+ r"Cape Cod",
1142
+ r"Charlottesville",
1143
+ r"Colorado",
1144
+ r"Connecticut",
1145
+ r"Florida",
1146
+ r"Los Angeles",
1147
+ r"Loudoun\s*County?",
1148
+ r"Martha's\s*Vineyard",
1149
+ r"Miami(?!\s?Herald)",
1150
+ r"Nantucket",
1151
+ r"New\s*(Jersey|Mexico)",
1152
+ r"(North|South)\s*Carolina",
1153
+ r"NY(C|\s*State)",
1154
+ r"Orange\s*County",
1155
+ r"Oregon",
1156
+ r"Palo Alto",
1157
+ r"Pennsylvania",
1158
+ r"Phoenix",
1159
+ r"Portland",
1160
+ r"San Francisco",
1161
+ r"Sant[ae]\s*Fe",
1162
+ r"Telluride",
1163
+ r"Teterboro",
1164
+ r"Texas(?! A&M)",
1165
+ r"Toronto",
1166
+ r"Tu(sc|cs)on",
1167
+ r"Vermont",
1168
+ r"Washington(\s*D\.?C)?(?!\s*Post)",
1169
+ r"Westchester",
1047
1170
  ],
1048
1171
  ),
1049
1172
  HighlightedNames(
@@ -1062,6 +1185,7 @@ HIGHLIGHTED_NAMES = [
1062
1185
  r"Afghanistan",
1063
1186
  r"Al[-\s]?Qa[ei]da",
1064
1187
  r"Ahmadinejad",
1188
+ r"(Rakhat )?Aliyev",
1065
1189
  r"Arab",
1066
1190
  r"Aramco",
1067
1191
  r"Armenia",
@@ -1090,6 +1214,7 @@ HIGHLIGHTED_NAMES = [
1090
1214
  r"Hamas",
1091
1215
  r"Hezbollah",
1092
1216
  r"HBJ",
1217
+ r"Hourani",
1093
1218
  r"Houthi",
1094
1219
  r"Imran\s+Khan",
1095
1220
  r"Iran(ian)?([-\s]Contra)?",
@@ -1112,10 +1237,11 @@ HIGHLIGHTED_NAMES = [
1112
1237
  r"MB(N|S|Z)",
1113
1238
  r"Mid(dle)?\s*East(ern)?",
1114
1239
  r"Mohammed\s+bin\s+Salman",
1115
- r"Morocco",
1240
+ r"Morocc(an|o)",
1116
1241
  r"Mubarak",
1117
- r"Muslim",
1242
+ r"Muslim(\s*Brotherhood)?",
1118
1243
  r"Nayaf",
1244
+ r"Nazarbayev",
1119
1245
  r"Pakistani?",
1120
1246
  r"Omar",
1121
1247
  r"(Osama\s*)?Bin\s*Laden",
@@ -1135,9 +1261,10 @@ HIGHLIGHTED_NAMES = [
1135
1261
  r"Syrian?",
1136
1262
  r"(Tarek\s*)?El\s*Sayed",
1137
1263
  r"Tehran",
1264
+ r"Timur\s*Kulibayev",
1138
1265
  r"Tripoli",
1139
1266
  r"Tunisian?",
1140
- r"Turk(ey|ish)",
1267
+ r"Turk(ey|ish)?(?!s & Caicos)",
1141
1268
  r"UAE",
1142
1269
  r"((Iraq|Iran|Kuwait|Qatar|Yemen)i?)",
1143
1270
  ],
@@ -1185,8 +1312,11 @@ HIGHLIGHTED_NAMES = [
1185
1312
  },
1186
1313
  patterns=[
1187
1314
  r"(Matt(hew)? )?Hiltzi[gk]",
1315
+ r"Philip\s*Barden",
1316
+ r"PR\s*Newswire",
1188
1317
  REPUTATION_MGMT,
1189
1318
  r"Reputation.com",
1319
+ r"(Robert L\. )?Dilenschneider",
1190
1320
  ],
1191
1321
  ),
1192
1322
  HighlightedNames(
@@ -1201,21 +1331,28 @@ HIGHLIGHTED_NAMES = [
1201
1331
  r"Alberto\sGonzale[sz]",
1202
1332
  r"(Alex\s*)?Acosta",
1203
1333
  r"(Ben\s*)?Sasse",
1334
+ r"Betsy Devos",
1204
1335
  r"((Bill|William)\s*)?Barr",
1205
1336
  r"Bill\s*Shine",
1337
+ r"Blackwater",
1206
1338
  r"(Bob\s*)?Corker",
1207
1339
  r"(Brett\s*)?Kavanaugh",
1208
1340
  r"Broidy",
1209
1341
  r"(Chris\s)?Christie",
1210
1342
  r"(?<!Merwin Dela )Cruz",
1343
+ r"Darrell\s*Issa",
1211
1344
  r"Devin\s*Nunes",
1212
1345
  r"(Don\s*)?McGa[hn]n",
1346
+ r"Erik Prince",
1213
1347
  r"Gary\s*Cohn",
1214
1348
  r"George\s*(H\.?\s*)?(W\.?\s*)?Bush",
1215
1349
  r"(George\s*)?Nader",
1216
1350
  r"GOP",
1217
1351
  r"Jeff(rey)?\s*Sessions",
1218
- r"(John\s*(R.?\s*)?)Bolton",
1352
+ r"(John\s*(R.?\s*)?)?Bolton",
1353
+ r"Kasich",
1354
+ r"Keith\s*Schiller",
1355
+ r"Kelly(\s*Anne?)?\s*Conway|Kellyanne",
1219
1356
  r"Kissinger",
1220
1357
  r"Kobach",
1221
1358
  r"Kolfage",
@@ -1229,7 +1366,8 @@ HIGHLIGHTED_NAMES = [
1229
1366
  r"(Michael\s)?Hayden",
1230
1367
  r"((General|Mike)\s*)?(Flynn|Pence)",
1231
1368
  r"(Mitt\s*)?Romney",
1232
- r"Mnuchin",
1369
+ r"(Steven?\s*)?Mnuchin",
1370
+ r"(Newt\s*)Gingrich",
1233
1371
  r"Nikki",
1234
1372
  r"Haley",
1235
1373
  r"(Paul\s*)?(Manafort|Volcker)",
@@ -1238,9 +1376,12 @@ HIGHLIGHTED_NAMES = [
1238
1376
  r"Reagan",
1239
1377
  r"Reince", r"Priebus",
1240
1378
  r"Republican",
1241
- r"(Rex\s*)?Tillerson",
1379
+ r"(Rex\s*)?Till?erson",
1242
1380
  r"(?<!Cynthia )(Richard\s*)?Nixon",
1381
+ r"RNC",
1382
+ r"(Roy|Stephen)\s*Moore",
1243
1383
  r"Tea\s*Party",
1384
+ r"Wilbur\s*Ross",
1244
1385
  ],
1245
1386
  ),
1246
1387
  HighlightedNames(
@@ -1281,6 +1422,8 @@ HIGHLIGHTED_NAMES = [
1281
1422
  r"(Natalia\s*)?Veselnitskaya",
1282
1423
  r"(Oleg\s*)?Deripaska",
1283
1424
  r"Oleksandr Vilkul",
1425
+ r"Onexim", # Prokhorov investment vehicle
1426
+ r"Prokhorov",
1284
1427
  r"Rosneft",
1285
1428
  r"RT",
1286
1429
  r"St.?\s*?Petersburg",
@@ -1289,7 +1432,7 @@ HIGHLIGHTED_NAMES = [
1289
1432
  r"Sberbank",
1290
1433
  r"Soviet(\s*Union)?",
1291
1434
  r"USSR",
1292
- r"Vladimir",
1435
+ r"Vlad(imir)?(?! Yudash)",
1293
1436
  r"(Vladimir\s*)?Putin",
1294
1437
  r"Women\s*Empowerment",
1295
1438
  r"Xitrans",
@@ -1300,6 +1443,7 @@ HIGHLIGHTED_NAMES = [
1300
1443
  label='Southeast Asia',
1301
1444
  style='light_salmon3 bold',
1302
1445
  patterns=[
1446
+ r"Australian?(?! Ave)",
1303
1447
  r"Bangkok",
1304
1448
  r"Burm(a|ese)",
1305
1449
  r"Cambodian?",
@@ -1307,6 +1451,7 @@ HIGHLIGHTED_NAMES = [
1307
1451
  r"Malaysian?",
1308
1452
  r"Maldives",
1309
1453
  r"Myan?mar",
1454
+ r"New\s*Zealand",
1310
1455
  r"Philippines",
1311
1456
  r"South\s*Korean?",
1312
1457
  r"Tai(pei|wan)",
@@ -1324,12 +1469,16 @@ HIGHLIGHTED_NAMES = [
1324
1469
  REID_HOFFMAN: 'PayPal mafia member, founder of LinkedIn',
1325
1470
  STEVEN_SINOFSKY: 'ex-Microsoft, loves bitcoin',
1326
1471
  VINCENZO_IOZZO: 'CEO of the identity-security company SlashID',
1472
+ ZUBAIR_KHAN: 'Tranchulas cybersecurity, InsightsPod founder, Islamabad / Dubai',
1327
1473
  },
1328
1474
  patterns=[
1329
1475
  r"AG?I",
1476
+ r"Artificial\s*(General\s*)?Intelligence",
1330
1477
  r"Chamath", r"Palihapitiya",
1331
1478
  r"Danny\s*Hillis",
1479
+ r"deep learning",
1332
1480
  r"Drew\s*Houston",
1481
+ r"Eliezer\s*Yudkowsky",
1333
1482
  r"Eric\s*Schmidt",
1334
1483
  r"Greylock(\s*Partners)?",
1335
1484
  r"(?<!(ustin|Moshe)\s)Hoffmand?",
@@ -1349,6 +1498,7 @@ HIGHLIGHTED_NAMES = [
1349
1498
  r"Softbank",
1350
1499
  r"SpaceX",
1351
1500
  r"Tim\s*Ferriss?",
1501
+ r"Vision\s*Fund",
1352
1502
  r"WikiLeak(ed|s)",
1353
1503
  ],
1354
1504
  ),
@@ -1357,10 +1507,12 @@ HIGHLIGHTED_NAMES = [
1357
1507
  style='red3 bold',
1358
1508
  emailers={
1359
1509
  'Bruce Moskowitz': "'Trump's health guy' according to Epstein",
1510
+ NICHOLAS_RIBIS: 'Hilton CEO, former president of Trump Organization',
1360
1511
  },
1361
1512
  patterns=[
1362
1513
  r"@?realDonaldTrump",
1363
1514
  r"(Alan\s*)?Weiss?elberg",
1515
+ r"Alex\s*Jones",
1364
1516
  r"\bDJ?T\b",
1365
1517
  r"Donald J. Tramp",
1366
1518
  r"(Donald\s+(J\.\s+)?)?Trump(ism|\s*(Org(anization)?|Properties)(\s*LLC)?)?",
@@ -1372,7 +1524,7 @@ HIGHLIGHTED_NAMES = [
1372
1524
  r"(Marla\s*)?Maples",
1373
1525
  r"(Matt(hew)? )?Calamari",
1374
1526
  r"\bMatt C\b",
1375
- r"Michael\s*Cohen",
1527
+ r"Michael\s*(D\.?\s*)?Cohen",
1376
1528
  r"Melania",
1377
1529
  r"(Michael (J.? )?)?Boccio",
1378
1530
  r"Paul Rampell",
@@ -1398,25 +1550,31 @@ HIGHLIGHTED_NAMES = [
1398
1550
  r"Dominican\s*Republic",
1399
1551
  r"(Great|Little)\s*St.?\s*James",
1400
1552
  r"Haiti(an)?",
1553
+ r"Jamaican?",
1401
1554
  r"(John\s*)deJongh(\s*Jr\.?)",
1402
1555
  r"(Kenneth E\. )?Mapp",
1403
1556
  r"PBI",
1404
1557
  r"Puerto\s*Ric(an|o)",
1558
+ r"San\s*Juan",
1405
1559
  r"S(ain)?t.?\s*Thomas",
1406
1560
  r"USVI",
1407
- r"(?<!Epstein )VI",
1408
- r"(The\s*)?Virgin\s*Islands(\s*Daily\s*News)?", # Hard to make this work right
1409
- r"(West\s*)?Palm\s*Beach(?!\s*(Daily|Post))",
1561
+ r"(?<!stein |vis-a-)VI(?!s-a-)",
1562
+ r"(The\s*)?Virgin\s*Is(al|la)nds(\s*Daily\s*News)?", # Hard to make this work right
1563
+ r"(West\s*)?Palm\s*Beach(\s*County)?(?!\s*(Daily|Post))",
1410
1564
  ],
1411
1565
  ),
1412
1566
  HighlightedNames(
1413
1567
  label='victim',
1414
- style='orchid1',
1568
+ style=VICTIM_COLOR,
1415
1569
  patterns=[
1416
- r"(David\s*)?Bo[il]es",
1570
+ r"child\s*pornography",
1571
+ r"(David\s*)?Bo[il]es(,?\s*Schiller( & Flexner)?)?",
1417
1572
  r"(Gloria\s*)?Allred",
1418
1573
  r"(Jane|Tiffany)\s*Doe",
1419
1574
  r"Katie\s*Johnson",
1575
+ r"pedophile",
1576
+ r"Stephanie\s*Clifford",
1577
+ r"Stormy\s*Daniels",
1420
1578
  r"(Virginia\s+((L\.?|Roberts)\s+)?)?Giuffre",
1421
1579
  r"Virginia\s+Roberts",
1422
1580
  ],
@@ -1455,7 +1613,7 @@ HIGHLIGHTED_NAMES = [
1455
1613
 
1456
1614
  # Individuals
1457
1615
  HighlightedNames(
1458
- emailers={STEVEN_HOFFENBERG: "Epstein's Towers Financial ponzi scheme partner, prison 18 years"},
1616
+ emailers={STEVEN_HOFFENBERG: "Epstein's Towers Financial ponzi partner, prison for 18 years"},
1459
1617
  style='dark_olive_green3',
1460
1618
  category=FINANCE,
1461
1619
  patterns=[r"(steven?\s*)?hoffenberg?w?"],
@@ -1471,10 +1629,15 @@ HIGHLIGHTED_NAMES = [
1471
1629
  HighlightedNames(emailers={MELANIE_WALKER: f"doctor, friend of {BILL_GATES}"}, style='pale_violet_red1', category=FRIEND),
1472
1630
  HighlightedNames(emailers={PAULA: "Epstein's ex-girlfriend who is now in the opera world"}, label='paula', style='pink1', category=FRIEND),
1473
1631
  HighlightedNames(emailers={PRINCE_ANDREW: 'British royal family'}, style='dodger_blue1', category='Europe'),
1474
- HighlightedNames(emailers={SOON_YI_PREVIN: 'wife of Woody Allen'}, style='hot_pink', category=FRIEND),
1475
- HighlightedNames(emailers={SULTAN_BIN_SULAYEM: 'CEO of DP World, chairman of ports in Dubai'}, style='green1', category=MIDEAST),
1632
+ HighlightedNames(emailers={SOON_YI_PREVIN: 'wife of Woody Allen'}, style='hot_pink', category=ARTS),
1633
+ HighlightedNames(emailers={SULTAN_BIN_SULAYEM: 'chairman of ports in Dubai, CEO of DP World'}, style='green1', category=MIDEAST),
1476
1634
 
1477
1635
  # HighlightedText not HighlightedNames bc of word boundary issue
1636
+ HighlightedText(
1637
+ label='metoo',
1638
+ style=VICTIM_COLOR,
1639
+ patterns=[r"#metoo"]
1640
+ ),
1478
1641
  HighlightedText(
1479
1642
  label='phone_number',
1480
1643
  style='bright_green',
@@ -1495,7 +1658,7 @@ HIGHLIGHTED_TEXTS = [
1495
1658
  HighlightedText(
1496
1659
  label='header_field',
1497
1660
  style='plum4',
1498
- patterns=[r'^(> )?(Date|From|Sent|To|C[cC]|Importance|Reply-To|Subject|Bee|B[cC]{2}|Attachments|Flag|Classification):'],
1661
+ patterns=[r'^[>• ]{,4}(Date ?|From|Sent|To|C[cC]|Importance|Reply[- ]?To|Subject|Bee|B[cC]{2}|Attachments|Flag|Classification|((A|Debut du message transfer[&e]|De(stinataire)?|Envoye|Expe(cl|d)iteur|Objet|Q|Sujet) ?)):|^on behalf of'],
1499
1662
  ),
1500
1663
  HighlightedText(
1501
1664
  label='http_links',
@@ -1505,16 +1668,16 @@ HIGHLIGHTED_TEXTS = [
1505
1668
  HighlightedText(
1506
1669
  label='quoted_reply_line',
1507
1670
  style='dim',
1508
- patterns=[REPLY_REGEX.pattern],
1671
+ patterns=[REPLY_REGEX.pattern, r"^(> )?wrote:$"],
1509
1672
  ),
1510
1673
  HighlightedText(
1511
1674
  label='redacted',
1512
1675
  style='grey58',
1513
- patterns=[fr"{REDACTED}|Privileged - Redacted"],
1676
+ patterns=[fr"{REDACTED}|<?Privileged - Redacted>?"],
1514
1677
  ),
1515
1678
  HighlightedText(
1516
1679
  label='sent_from',
1517
- style='gray42 italic',
1680
+ style='light_cyan3 italic dim',
1518
1681
  patterns=[SENT_FROM_REGEX.pattern],
1519
1682
  ),
1520
1683
  HighlightedText(
@@ -1549,6 +1712,41 @@ class EpsteinHighlighter(RegexHighlighter):
1549
1712
  """Finds and colors interesting keywords based on the above config."""
1550
1713
  base_style = f"{REGEX_STYLE_PREFIX}."
1551
1714
  highlights = [highlight_group.regex for highlight_group in ALL_HIGHLIGHTS]
1715
+ highlight_counts = defaultdict(int)
1716
+
1717
+ def highlight(self, text: Text) -> None:
1718
+ """overrides https://rich.readthedocs.io/en/latest/_modules/rich/highlighter.html#RegexHighlighter"""
1719
+ highlight_regex = text.highlight_regex
1720
+
1721
+ for re_highlight in self.highlights:
1722
+ highlight_regex(re_highlight, style_prefix=self.base_style)
1723
+
1724
+ if args.debug and isinstance(re_highlight, re.Pattern):
1725
+ for match in re_highlight.finditer(text.plain):
1726
+ type(self).highlight_counts[(match.group(1) or 'None').replace('\n', ' ')] += 1
1727
+
1728
+ def print_highlight_counts(self, console: Console) -> None:
1729
+ """Print counts of how many times strings were highlighted."""
1730
+ highlight_counts = deepcopy(self.highlight_counts)
1731
+ weak_date_regex = re.compile(r"^(\d\d?/|20|http|On ).*")
1732
+
1733
+ for highlighted, count in sort_dict(highlight_counts):
1734
+ if highlighted is None or weak_date_regex.match(highlighted):
1735
+ continue
1736
+
1737
+ try:
1738
+ console.print(f"{highlighted:25s} highlighted {count} times")
1739
+ except Exception as e:
1740
+ logger.error(f"Failed to print highlight count {count} for {highlighted}")
1741
+
1742
+
1743
+ def get_highlight_group_for_name(name: str | None) -> HighlightedNames | None:
1744
+ if name is None:
1745
+ return None
1746
+
1747
+ for highlight_group in HIGHLIGHTED_NAMES:
1748
+ if highlight_group.regex.search(name):
1749
+ return highlight_group
1552
1750
 
1553
1751
 
1554
1752
  def get_style_for_category(category: str) -> str | None:
@@ -1584,15 +1782,6 @@ def styled_name(name: str | None, default_style: str = DEFAULT_NAME_STYLE) -> Te
1584
1782
  return Text(name or UNKNOWN, style=get_style_for_name(name, default_style=default_style))
1585
1783
 
1586
1784
 
1587
- def get_highlight_group_for_name(name: str | None) -> HighlightedNames | None:
1588
- if name is None:
1589
- return None
1590
-
1591
- for highlight_group in HIGHLIGHTED_NAMES:
1592
- if highlight_group.regex.search(name):
1593
- return highlight_group
1594
-
1595
-
1596
1785
  def _print_highlighted_names_repr() -> None:
1597
1786
  for hn in HIGHLIGHTED_NAMES:
1598
1787
  if isinstance(hn, HighlightedNames):