epstein-files 1.2.1__py3-none-any.whl → 1.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import re
3
+ from collections import defaultdict
3
4
  from dataclasses import dataclass, field
4
5
 
5
6
  from rich.console import Console
@@ -11,8 +12,8 @@ from epstein_files.util.constant.strings import *
11
12
  from epstein_files.util.constant.urls import ARCHIVE_LINK_COLOR
12
13
  from epstein_files.util.constants import (EMAILER_ID_REGEXES, EPSTEIN_V_ROTHSTEIN_EDWARDS,
13
14
  OSBORNE_LLP, REPLY_REGEX, SENT_FROM_REGEX)
15
+ from epstein_files.util.data import sort_dict, without_falsey
14
16
  from epstein_files.util.doc_cfg import *
15
- from epstein_files.util.data import without_falsey
16
17
  from epstein_files.util.env import args
17
18
  from epstein_files.util.logging import logger
18
19
 
@@ -153,7 +154,7 @@ class HighlightedNames(HighlightedText):
153
154
  name_patterns = [remove_question_marks(name).replace(' ', r"\s+")]
154
155
 
155
156
  if ' ' in name:
156
- for partial_name in [extract_first_name(name), extract_last_name(name), reversed_name(name)]:
157
+ for partial_name in [reversed_name(name), extract_first_name(name), extract_last_name(name)]: # Order matters
157
158
  if partial_name.lower() not in NAMES_TO_NOT_HIGHLIGHT and SIMPLE_NAME_REGEX.match(partial_name):
158
159
  name_patterns.append(partial_name.replace(' ', r"\s+"))
159
160
 
@@ -239,11 +240,14 @@ HIGHLIGHTED_NAMES = [
239
240
  'Valeria Chomsky': f"wife of {NOAM_CHOMSKY}",
240
241
  },
241
242
  patterns=[
243
+ r"Andy\s*Lippman", # Media Lab
244
+ r"Arizona\s*State\s*University",
242
245
  r"Bard\s+((Early )?College|High School|Schools)",
243
246
  r"Brotherton",
244
247
  r"Carl\s*Sagan",
245
248
  r"Columbia",
246
249
  r"Dan(iel|ny) Kahneman",
250
+ r"(Francis\s*)?Crick",
247
251
  r"J(ames|im)\s*Watson",
248
252
  r"(Lord\s*)?Martin\s*Rees",
249
253
  r"Massachusetts\s*Institute\s*of\s*Technology",
@@ -256,10 +260,13 @@ HIGHLIGHTED_NAMES = [
256
260
  r"Princeton(\s*University)?",
257
261
  r"Regeneron",
258
262
  r"(Richard\s*)?Dawkins",
263
+ r"(Sandy\s*)?Pentland", # Media Lab
259
264
  r"Sanofi",
260
265
  r"Stanford(\s*University)?(\s*Hospital)?",
261
266
  r"(Stephen\s*)?Hawking",
262
267
  r"(Steven?\s*)?Pinker",
268
+ r"Texas\s*A&M",
269
+ r"Tulane",
263
270
  r"UCLA",
264
271
  ],
265
272
  ),
@@ -276,19 +283,23 @@ HIGHLIGHTED_NAMES = [
276
283
  patterns=[
277
284
  r"Buhari",
278
285
  r"Econet(\s*Wireless)",
286
+ r"Ethiopian?",
279
287
  r"Ghana(ian)?",
280
288
  r"Glencore",
281
289
  r"Goodluck Jonathan",
282
290
  r"Johannesburg",
283
- r"Kenya",
291
+ r"Kenyan?",
284
292
  r"Nigerian?",
285
293
  r"Okey Enelamah",
294
+ r"(Paul\s*)?Kagame",
295
+ r"Rwandan?",
286
296
  r"Senegal(ese)?",
287
297
  r"Serengeti",
288
298
  r"(South\s*)?African?",
289
299
  r"(Strive\s*)?Masiyiwa",
290
- r"Tanzania",
300
+ r"Tanzanian?",
291
301
  r"Ugandan?",
302
+ r"(Yoweri\s*)?Museveni",
292
303
  r"Zimbabwe(an)?",
293
304
  ],
294
305
  ),
@@ -300,6 +311,7 @@ HIGHLIGHTED_NAMES = [
300
311
  'Barry Josephson': 'American film producer, editor FamilySecurityMatters.org',
301
312
  BILL_SIEGEL: 'documentary film producer and director',
302
313
  DAVID_BLAINE: 'famous magician',
314
+ 'David Brenner': 'American comedian and actor',
303
315
  'Richard Merkin': 'painter, illustrator and arts educator',
304
316
  STEVEN_PFEIFFER: 'Associate Director at Independent Filmmaker Project (IFP)',
305
317
  'Steven Gaydos': 'American screenwriter and journalist',
@@ -312,6 +324,9 @@ HIGHLIGHTED_NAMES = [
312
324
  r"Errol(\s*Morris)?",
313
325
  r"Etienne Binant",
314
326
  r"(Frank\s)?Gehry",
327
+ r"Harvey\s*Weinstein", r"wientstein", r"Weinstein\s*Co(s?|mpany)",
328
+ r"IFP",
329
+ r"Independent\s*Filmmaker\s*Project",
315
330
  r"Jagger",
316
331
  r"(Jeffrey\s*)?Katzenberg",
317
332
  r"(Johnny\s*)?Depp",
@@ -320,6 +335,8 @@ HIGHLIGHTED_NAMES = [
320
335
  r"Lena\s*Dunham",
321
336
  r"Madonna",
322
337
  r"Mark\s*Burnett",
338
+ r"New York Film Festival",
339
+ r"Peter Getzels",
323
340
  r"Phaidon",
324
341
  r"Ramsey Elkholy",
325
342
  r"Regan arts",
@@ -340,6 +357,7 @@ HIGHLIGHTED_NAMES = [
340
357
  r"BG",
341
358
  r"b?g?C3",
342
359
  r"(Bill\s*((and|or|&)\s*Melinda\s*)?)?Gates(\s*Foundation)?",
360
+ r"Kofi\s*Rashid",
343
361
  r"Melinda(\s*Gates)?",
344
362
  r"Microsoft",
345
363
  r"MSFT",
@@ -358,23 +376,27 @@ HIGHLIGHTED_NAMES = [
358
376
  r"Balaji",
359
377
  r"bitcoin(\s*Foundation)?",
360
378
  r"block ?chain(\s*capital)?",
379
+ r"Brian Forde",
361
380
  r"Brock(\s*Pierce)?",
362
381
  r"coins?",
382
+ r"Cory\s*Fields", # bitcoin dev
363
383
  r"cr[iy]?pto(currenc(y|ies))?",
364
384
  r"Digital\s*Currenc(ies|y)(\s*Initiative)?",
365
385
  r"e-currency",
366
- r"(Gavin )?Andress?en",
386
+ r"(Gavin )?Andress?en", # bitcoin dev
367
387
  r"(Howard\s+)?Lutnic?k",
388
+ r"(Jim\s*)Pallotta", # Media lab advisory board
368
389
  r"Libra",
369
390
  r"Madars",
370
391
  r"Mi(chael|ke)\s*Novogratz",
371
392
  r"(Patrick\s*)?Murck",
393
+ r"Ron Rivest",
372
394
  r"(Ross\s*)?Ulbricht",
373
395
  r"Silk\s*Road",
374
396
  r"SpanCash",
375
397
  r"Tether",
376
398
  r"virtual\s*currenc(ies|y)",
377
- r"Wladimir( van der Laan)?",
399
+ r"Wladimir( van der Laan)?", # bitcoin dev
378
400
  r"(zero\s+knowledge\s+|zk)pro(of|tocols?)",
379
401
  ],
380
402
  ),
@@ -385,11 +407,9 @@ HIGHLIGHTED_NAMES = [
385
407
  ALIREZA_ITTIHADIEH: 'CEO Freestream Aircraft Limited',
386
408
  BARBRO_C_EHNBOM: 'Swedish pharmaceuticals, SALSS',
387
409
  BARRY_J_COHEN: None,
388
- 'David Mitchell': 'Mitchell Holdings New York real estate developer',
389
- FRED_HADDAD: "co-founder of Heck's in West Virginia",
410
+ 'David Mitchell': 'Mitchell Holdings, New York real estate developer',
390
411
  GERALD_BARTON: "Maryland property developer Landmark Land Company",
391
412
  GORDON_GETTY: 'heir to oil tycoon J. Paul Getty',
392
- NICHOLAS_RIBIS: 'Hilton CEO, former president of Trump Organization',
393
413
  'Philip Kafka': 'president of Prince Concepts (and son of Terry Kafka?)',
394
414
  ROBERT_LAWRENCE_KUHN: 'investment banker, China expert',
395
415
  TERRY_KAFKA: 'CEO of Impact Outdoor (highway billboards)',
@@ -398,7 +418,7 @@ HIGHLIGHTED_NAMES = [
398
418
  patterns=[
399
419
  r"((Bill|David)\s*)?Koch(\s*(Bro(s|thers)|Industries))?",
400
420
  r"Gruterite",
401
- r"(John\s*)?Kluge",
421
+ r"((John|Patricia)\s*)?Kluge",
402
422
  r"Marc Rich",
403
423
  r"(Mi(chael|ke)\s*)?Ovitz",
404
424
  r"(Steve\s+)?Wynn",
@@ -407,6 +427,7 @@ HIGHLIGHTED_NAMES = [
407
427
  r"Park Partners",
408
428
  r"SALSS",
409
429
  r"Swedish[-\s]*American\s*Life\s*Science\s*Summit",
430
+ r"Trilateral Commission",
410
431
  r"Valhi",
411
432
  r"(Yves\s*)?Bouvier",
412
433
  ],
@@ -418,6 +439,7 @@ HIGHLIGHTED_NAMES = [
418
439
  r"CBD",
419
440
  r"cannabis",
420
441
  r"marijuana",
442
+ r"psychedelic",
421
443
  r"THC",
422
444
  r"WEED(guide|maps)?[^s]?",
423
445
  ],
@@ -474,6 +496,7 @@ HIGHLIGHTED_NAMES = [
474
496
  r"(Barac?k )?Obama",
475
497
  r"((Bill|Hillart?y)\s*)?Clinton",
476
498
  r"((Chuck|Charles)\s*)?S(ch|hc)umer",
499
+ r"Debbie\s*Wasserman\s*Schultz",
477
500
  r"Dem(ocrat(ic)?)?",
478
501
  r"(Diana\s*)?DeGette",
479
502
  r"DNC",
@@ -491,6 +514,7 @@ HIGHLIGHTED_NAMES = [
491
514
  r"(Matteo\s*)?Salvini",
492
515
  r"Maxine\s*Waters",
493
516
  r"(Nancy )?Pelosi",
517
+ r"Open Society( Global Board)?",
494
518
  r"Ron\s*Dellums",
495
519
  r"Schumer",
496
520
  r"(Tim(othy)?\s*)?Geithner",
@@ -506,16 +530,19 @@ HIGHLIGHTED_NAMES = [
506
530
  EVA: "possibly Epstein's ex-girlfriend (?)",
507
531
  'Eva Dubin': f"Epstein's ex-girlfriend now married to {GLENN_DUBIN}",
508
532
  },
509
- patterns=[r"((Celina|Eva( Anderss?on)?|Glenn) )?Dubin"],
533
+ patterns=[r"((Celina|Eva( Anderss?on)?|Glenn?) )?Dubin"],
510
534
  ),
511
535
  HighlightedNames(
512
536
  label='employee',
513
537
  style='medium_purple4',
514
538
  emailers={
515
539
  'Alfredo Rodriguez': "Epstein's butler, stole the journal",
516
- ERIC_ROTH: 'jet decorator',
540
+ 'Bernard Kruger': "Epstein's doctor",
541
+ EDUARDO_ROBLES: f'home builder at Creative Kingdom Dubai',
542
+ ERIC_ROTH: 'jet decorator at International Jet',
517
543
  GWENDOLYN_BECK: 'Epstein fund manager in the 90s',
518
544
  JANUSZ_BANASIAK: "Epstein's house manager",
545
+ "John Allessi": "Epstein's houseman",
519
546
  JEAN_HUGUEN: 'interior design at Alberto Pinto Cabinet',
520
547
  LAWRANCE_VISOSKI: "Epstein's pilot",
521
548
  LESLEY_GROFF: f"Epstein's assistant",
@@ -523,7 +550,7 @@ HIGHLIGHTED_NAMES = [
523
550
  MERWIN_DELA_CRUZ: None, # HOUSE_OVERSIGHT_032652 Groff says "Jojo and Merwin both requested off Nov. 25 and 26"
524
551
  NADIA_MARCINKO: "Epstein's pilot",
525
552
  'Sean J. Lancaster': 'airplane reseller',
526
- ZUBAIR_KHAN: 'cybersecurity firm Tranchulas CEO, InsightsPod founder, Islamabad / Dubai',
553
+ ZUBAIR_KHAN: 'Tranchulas cybersecurity, InsightsPod founder, Islamabad / Dubai',
527
554
  },
528
555
  patterns=[
529
556
  r"Adriana\s*Ross",
@@ -539,7 +566,7 @@ HIGHLIGHTED_NAMES = [
539
566
  MARK_EPSTEIN: 'brother of Jeffrey',
540
567
  },
541
568
  patterns=[
542
- r"JEGE",
569
+ r"JEGE(\s*Inc)",
543
570
  r"LSJ",
544
571
  ],
545
572
  ),
@@ -556,7 +583,9 @@ HIGHLIGHTED_NAMES = [
556
583
  DAVID_SCHOEN: f"{CRIMINAL_DEFENSE_ATTORNEY} after 2019 arrest",
557
584
  DEBBIE_FEIN: EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY,
558
585
  'Erika Kellerhals': 'attorney in St. Thomas',
586
+ FRED_HADDAD: "co-founder of Heck's in West Virginia",
559
587
  GERALD_LEFCOURT: f'friend of {ALAN_DERSHOWITZ}',
588
+ 'Howard Rubenstein': f"Epstein's former spokesman",
560
589
  JACK_GOLDBERGER: CRIMINAL_DEFENSE_2008,
561
590
  JACKIE_PERCZEK: CRIMINAL_DEFENSE_2008,
562
591
  JAY_LEFKOWITZ: f"Kirkland & Ellis partner, {CRIMINAL_DEFENSE_2008}",
@@ -599,7 +628,7 @@ HIGHLIGHTED_NAMES = [
599
628
  emailers={
600
629
  ANDRZEJ_DUDA: 'former president of Poland',
601
630
  'Caroline Lang': 'daughter of Jack Lang',
602
- "Edward Rod Larsen": f"son of {TERJE_ROD_LARSEN}",
631
+ EDWARD_ROD_LARSEN: f"son of {TERJE_ROD_LARSEN}",
603
632
  'Fabrice Aidan': f'diplomat who worked with {TERJE_ROD_LARSEN}',
604
633
  'Jack Lang': 'former French Minister of National Education',
605
634
  MIROSLAV_LAJCAK: 'Russia-friendly Slovakian politician, friend of Steve Bannon',
@@ -608,6 +637,7 @@ HIGHLIGHTED_NAMES = [
608
637
  THORBJORN_JAGLAND: 'former prime minister of Norway, Nobel Peace Prize Committee',
609
638
  },
610
639
  patterns=[
640
+ r"AfD",
611
641
  r"(Angela )?Merk(el|le)",
612
642
  r"Austria",
613
643
  r"(Benjamin\s*)?Harnwell",
@@ -647,6 +677,7 @@ HIGHLIGHTED_NAMES = [
647
677
  r"Paris",
648
678
  r"Polish",
649
679
  r"pope",
680
+ r"Scotland",
650
681
  r"(Sebastian )?Kurz",
651
682
  r"Stockholm",
652
683
  r"Strasbourg",
@@ -656,6 +687,7 @@ HIGHLIGHTED_NAMES = [
656
687
  r"(Tony\s)?Blair",
657
688
  r"U\.K\.",
658
689
  r"Ukrain(e|ian)",
690
+ r"Venice",
659
691
  r"(Vi(c|k)tor\s+)?Orbah?n",
660
692
  r"Vienna",
661
693
  r"Zug",
@@ -684,8 +716,11 @@ HIGHLIGHTED_NAMES = [
684
716
  'Skip Rimer': 'Milken Institute (Michael Milken)',
685
717
  'Steven Elkman': DEUTSCHE_BANK,
686
718
  'Vahe Stepanian': 'Cetera Financial Group',
719
+ VINIT_SAHNI: f"analyst at {DEUTSCHE_BANK} and {GOLDMAN_SACHS}",
687
720
  },
688
721
  patterns=[
722
+ r"Ace\s*Greenberg",
723
+ r"AIG",
689
724
  r"((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?",
690
725
  r"Apollo",
691
726
  r"Ari\s*Glass",
@@ -705,8 +740,11 @@ HIGHLIGHTED_NAMES = [
705
740
  r"FRBNY",
706
741
  r"Goldman(\s*Sachs)",
707
742
  r"GRAT",
743
+ r"Gratitude (America|& Enhanced)", # Leon Black and/or Epstein charity?
744
+ r"Hank\s*Greenburg",
708
745
  r"HSBC",
709
746
  r"Invesco",
747
+ r"Jamie\s*D(imon)?",
710
748
  r"(Janet\s*)?Yellen",
711
749
  r"(Jerome\s*)?Powell(?! M\. Cabot)",
712
750
  r"(Jimmy\s*)?Cayne",
@@ -715,17 +753,18 @@ HIGHLIGHTED_NAMES = [
715
753
  r"Madoff",
716
754
  r"Merrill(\s*Lynch)?",
717
755
  r"(Michael\s*)?Cembalest",
718
- r"(Mi(chael|ke)\s*)?Milken(\s*Conference)?",
756
+ r"(Mi(chael|ke)\s*)?Milken(\s*Conference|Institute)?",
719
757
  r"Mizrahi\s*Bank",
720
758
  r"MLPF&S",
721
759
  r"Morgan Stanley",
722
760
  r"(Peter L. )?Scher",
723
761
  r"(Ray\s*)?Dalio",
724
762
  r"(Richard\s*)?LeFrak",
725
- r"Schwartz?man",
763
+ r"(Ste(phen|ve)\s*)?Schwart?z?man",
726
764
  r"Serageldin",
727
765
  r"UBS",
728
766
  r"us.gio@jpmorgan.com",
767
+ r"Wall\s*Street(?!\s*Jour)",
729
768
  ],
730
769
  ),
731
770
  HighlightedNames(
@@ -746,6 +785,82 @@ HIGHLIGHTED_NAMES = [
746
785
  r"Thomas\s*(J\.?\s*)?Barrack(\s*Jr)?",
747
786
  ],
748
787
  ),
788
+ HighlightedNames(
789
+ label='government',
790
+ style='color(24) bold',
791
+ emailers={
792
+ ANN_MARIE_VILLAFANA: 'Southern District of Florida (SDFL) U.S. Attorney',
793
+ DANNY_FROST: 'Director of Communications at Manhattan D.A.',
794
+ 'Police Code Enforcement': f"{PALM_BEACH} buildings code enforcement",
795
+ },
796
+ patterns=[
797
+ r"AG",
798
+ r"(Alicia\s*)?Valle",
799
+ r'Alice\s*Fisher|Fisher, Alice',
800
+ r"AML",
801
+ r"(Andrew\s*)?(McCabe|Natsios)",
802
+ r"Attorney General",
803
+ r"((Bob|Robert)\s*)?Mueller",
804
+ r"(Byung\s)?Pak",
805
+ r"Case 1:19-cv-03377(-LAP)?",
806
+ r"(CENT|NORTH|SOUTH)COM",
807
+ r"CFTC?",
808
+ r"CIA",
809
+ r"CIS",
810
+ r"CVRA",
811
+ r"DARPA",
812
+ r"Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)",
813
+ r"DHS",
814
+ r"DOJ",
815
+ r"FBI",
816
+ r"FCPA",
817
+ r"FDIC",
818
+ r"FDLE",
819
+ r"Federal\s*Bureau\s*of\s*Investigation",
820
+ r"FinCEN",
821
+ r"FINRA",
822
+ r"FOIA",
823
+ r"FTC",
824
+ r"(General\s*)?P(a|e)traeus",
825
+ r"Geoff\s*Ling",
826
+ r"IRS",
827
+ r"(James\s*)?Comey",
828
+ r"(Jennifer\s*Shasky\s*)?Calvery",
829
+ r"((Judge|Mark)\s*)?(Carney|Filip)",
830
+ r"(Judge\s*)?(Kenneth\s*)?(A\.?\s*)?Marra",
831
+ r"(Justice|Treasury)\s*Dep(t|artment)",
832
+ r"(Kirk )?Blouin",
833
+ r"KYC",
834
+ r"(Lann?a\s*)?Belohlavek",
835
+ r"NIH",
836
+ r"NPA",
837
+ r"NS(A|C)",
838
+ r"OCC",
839
+ r"OFAC",
840
+ r"(Michael\s*)?Reiter",
841
+ r"OGE",
842
+ r"Office\s*of\s*Government\s*Ethics",
843
+ r"police",
844
+ r"(Preet\s*)?Bharara",
845
+ r"SCOTUS",
846
+ r"SD(FL|NY)",
847
+ r"SEC",
848
+ r"Secret\s*Service",
849
+ r"Securities\s*and\s*Exchange\s*Commission",
850
+ r"Southern\s*District(\s*of\s*(Florida|New\s*York))?",
851
+ r"State\s*Dep(artmen)?t",
852
+ r"Strzok",
853
+ r"Supreme\s*Court",
854
+ r"Treasury\s*(Dep(artmen)?t|Secretary)",
855
+ r"TSA",
856
+ r"U\.?S\.? attorney",
857
+ r"USAID",
858
+ r"US\s*(AF|Army|Air\s*Force)",
859
+ r"Walter\s*Reed(\s*Army\s*Institute\s*of\s*Research)?",
860
+ r"(William\s*J\.?\s*)?Zloch",
861
+ r"WRAIR",
862
+ ],
863
+ ),
749
864
  HighlightedNames(
750
865
  label=HARVARD,
751
866
  style='light_goldenrod3',
@@ -773,8 +888,7 @@ HIGHLIGHTED_NAMES = [
773
888
  label='India',
774
889
  style='bright_green',
775
890
  emailers={
776
- ANIL_AMBANI: 'chairman of Reliance Group',
777
- VINIT_SAHNI: None,
891
+ ANIL_AMBANI: 'billionaire chairman of Reliance Group',
778
892
  },
779
893
  patterns=[
780
894
  r"Abraaj",
@@ -797,16 +911,18 @@ HIGHLIGHTED_NAMES = [
797
911
  emailers={
798
912
  EHUD_BARAK: 'former prime minister of Israel, Epstein business partner',
799
913
  'Mitchell Bard': 'director of the American-Israeli Cooperative Enterprise (AICE)',
800
- 'Nili Priell Barak': 'wife of Ehud Barak',
914
+ NILI_PRIELL_BARAK: 'wife of Ehud Barak',
801
915
  },
802
916
  patterns=[
803
917
  r"AIPAC",
804
918
  r"Bibi",
805
919
  r"(eh|(Ehud|Nili Priell)\s*)?barak",
920
+ r"EB",
806
921
  r"Ehud\s*Barack",
807
922
  r"Israeli?",
808
923
  r"Jerusalem",
809
924
  r"J\s*Street",
925
+ r"Menachem\s*Begin",
810
926
  r"Mossad",
811
927
  r"Netanyahu",
812
928
  r"(Sheldon\s*)?Adelson",
@@ -849,14 +965,18 @@ HIGHLIGHTED_NAMES = [
849
965
  },
850
966
  patterns=[
851
967
  r"ABC(\s*News)?",
968
+ r"Alexandra Wolfe|Wolfe, Alexandra",
969
+ r"AlterNet",
852
970
  r"Arianna(\s*Huffington)?",
853
971
  r"(Arthur\s*)?Kretchmer",
854
972
  r'Associated\s*Press',
855
973
  r"Axios",
856
974
  r"BBC",
975
+ r"(Bob|Robert)\s*Costa",
857
976
  r"Breitbart",
858
- r"BuzzFeed",
859
- r"CBS(\s*(4|Corp|News))?"
977
+ r"BuzzFeed(\s*News)?",
978
+ r"C-?Span",
979
+ r"CBS(\s*(4|Corp|News))?",
860
980
  r"Charlie\s*Rose",
861
981
  r"China\s*Daily",
862
982
  r"CNBC",
@@ -869,6 +989,7 @@ HIGHLIGHTED_NAMES = [
869
989
  r"Ed\s*Krassenstein",
870
990
  r"(Emily\s*)?Michot",
871
991
  r"Ezra\s*Klein",
992
+ r"Fire\s*and\s*Fury",
872
993
  r"Forbes",
873
994
  r"Fortune\s*Magazine",
874
995
  r"Fox\s*News(\.com)?",
@@ -881,8 +1002,10 @@ HIGHLIGHTED_NAMES = [
881
1002
  r"Graydon(\s*Carter)?",
882
1003
  r"Huff(ington)?(\s*Po(st)?)?",
883
1004
  r"Ingram, David",
884
- r"(James\s*)?(Hill|Patterson)",
1005
+ r"James\s*Hill",
1006
+ r"(James\s*)?Patterson",
885
1007
  r"Jesse Kornbluth",
1008
+ r"John\s*Connolly",
886
1009
  r"Jonathan\s*Karl",
887
1010
  r"Julie\s*(K.?\s*)?Brown", r'jbrown@miamiherald.com',
888
1011
  r"(Katie\s*)?Couric",
@@ -892,7 +1015,7 @@ HIGHLIGHTED_NAMES = [
892
1015
  r"(Les\s*)?Moonves",
893
1016
  r"MarketWatch",
894
1017
  r"Miami\s*Herald",
895
- r"(Mi(chael|ke)\s*)?Bloomberg",
1018
+ r"(Mi(chael|ke)\s*)?Bloomber[gq](\s*News)?",
896
1019
  r"(Michele\s*)?Dargan",
897
1020
  r"Morning News USA",
898
1021
  r"(National\s*)?Enquirer",
@@ -904,13 +1027,14 @@ HIGHLIGHTED_NAMES = [
904
1027
  r"Pro\s*Publica",
905
1028
  r"Reuters",
906
1029
  r"(Sean\s*)?Hannity",
1030
+ r"Sharon Churcher", # Daily Mail
907
1031
  r"Sulzberger",
908
1032
  r"SunSentinel",
909
1033
  r"(The\s*)?Financial\s*Times",
910
1034
  r"The\s*Guardian",
911
1035
  r"TheHill",
912
1036
  r"(The\s*)?Mail\s*On\s*Sunday",
913
- r"(The\s*)?N(ew\s*)?Y(ork\s*)?(P(ost)?|T(imes)?)",
1037
+ r"(The\s*)?N(ew\s*)?Y(ork)?\s*(Magazine|Observer|P(ost)?|T(imes)?)",
914
1038
  r"(The\s*)?New\s*Yorker",
915
1039
  r"(The\s*)?Wall\s*Street\s*Journal",
916
1040
  r"(The\s*)?Wa(shington\s*)?Po(st)?",
@@ -952,7 +1076,7 @@ HIGHLIGHTED_NAMES = [
952
1076
  r"((Enrique )?Pena )?Nieto",
953
1077
  r"Lat(in)?\s*Am(erican?)?",
954
1078
  r"Lula",
955
- r"Mexic(an|o)",
1079
+ r"(?<!New )Mexic(an|o)",
956
1080
  r"(Nicolas\s+)?Maduro",
957
1081
  r"Panama( Papers)?",
958
1082
  r"Peru(vian)?",
@@ -960,90 +1084,61 @@ HIGHLIGHTED_NAMES = [
960
1084
  r"Zambrano",
961
1085
  ],
962
1086
  ),
963
- HighlightedNames(
964
- label='government',
965
- style='color(24) bold',
966
- emailers={
967
- ANN_MARIE_VILLAFANA: 'Southern District of Florida (SDFL) U.S. Attorney',
968
- DANNY_FROST: 'Director of Communications at Manhattan D.A.',
969
- 'Police Code Enforcement': f"{PALM_BEACH} buildings code enforcement",
970
- },
971
- patterns=[
972
- r"AG",
973
- r"(Alicia\s*)?Valle",
974
- r"AML",
975
- r"(Andrew\s*)?McCabe",
976
- r"Attorney General",
977
- r"((Bob|Robert)\s*)?Mueller",
978
- r"(Byung\s)?Pak",
979
- r"Case 1:19-cv-03377(-LAP)?",
980
- r"CFTC?",
981
- r"CIA",
982
- r"CIS",
983
- r"CVRA",
984
- r"Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)",
985
- r"DHS",
986
- r"DOJ",
987
- r"FBI",
988
- r"FCPA",
989
- r"FDIC",
990
- r"Federal\s*Bureau\s*of\s*Investigation",
991
- r"FinCEN",
992
- r"FINRA",
993
- r"FOIA",
994
- r"FTC",
995
- r"(General\s*)?P(a|e)traeus",
996
- r"IRS",
997
- r"(James\s*)?Comey",
998
- r"(Jennifer\s*Shasky\s*)?Calvery",
999
- r"((Judge|Mark)\s*)?(Carney|Filip)",
1000
- r"(Judge\s*)?(Kenneth\s*)?(A\.?\s*)?Marra",
1001
- r"(Justice|Treasury)\s*Dep(t|artment)",
1002
- r"(Kirk )?Blouin",
1003
- r"KYC",
1004
- r"(Lann?a\s*)?Belohlavek",
1005
- r"NIH",
1006
- r"NS(A|C)",
1007
- r"OCC",
1008
- r"OFAC",
1009
- r"(Michael\s*)?Reiter",
1010
- r"OGE",
1011
- r"Office\s*of\s*Government\s*Ethics",
1012
- r"police",
1013
- r"(Preet\s*)?Bharara",
1014
- r"SCOTUS",
1015
- r"SD(FL|NY)",
1016
- r"SEC",
1017
- r"Secret\s*Service",
1018
- r"Securities\s*and\s*Exchange\s*Commission",
1019
- r"Southern\s*District(\s*of\s*(Florida|New\s*York))?",
1020
- r"State\s*Dep(artmen)?t",
1021
- r"Strzok",
1022
- r"Supreme\s*Court",
1023
- r"Treasury\s*(Dep(artmen)?t|Secretary)",
1024
- r"TSA",
1025
- r"U\.?S\.? attorney",
1026
- r"USAID",
1027
- r"(William\s*J\.?\s*)?Zloch",
1028
- ],
1029
- ),
1030
1087
  HighlightedNames(
1031
1088
  label=LOBBYIST,
1032
1089
  style='light_coral',
1033
1090
  emailers={
1034
1091
  BOB_CROWE: 'partner at Nelson Mullins',
1035
1092
  'Joshua Cooper Ramo': 'co-CEO of Henry Kissinger Associates',
1036
- KATHERINE_KEATING: 'Daughter of former Australian PM',
1093
+ KATHERINE_KEATING: 'daughter of former Australian prime minister',
1037
1094
  MOHAMED_WAHEED_HASSAN: 'former president of the Maldives',
1038
1095
  OLIVIER_COLOM: 'France',
1039
- 'Paul Keating': 'former PM of Australia',
1096
+ 'Paul Keating': 'former prime minister of Australia',
1040
1097
  PUREVSUREN_LUNDEG: 'Mongolian ambassador to the UN',
1041
1098
  'Stanley Rosenberg': 'former President of the Massachusetts Senate',
1042
1099
  },
1043
1100
  patterns=[
1044
1101
  r"CSIS",
1102
+ r"elisabeth\s*feliho",
1045
1103
  r"(Kevin\s*)?Rudd",
1046
1104
  r"Stanley Rosenberg",
1105
+ r"Vinoda\s*Basnayake",
1106
+ ],
1107
+ ),
1108
+ HighlightedNames(
1109
+ label='locations',
1110
+ style='cornsilk1',
1111
+ patterns=[
1112
+ r"Alabama",
1113
+ r"Arizona(?! State University)",
1114
+ r"Aspen",
1115
+ r"Berkeley",
1116
+ r"Brooklyn",
1117
+ r"California",
1118
+ r"Canada",
1119
+ r"Cape Cod",
1120
+ r"Connecticut",
1121
+ r"Florida",
1122
+ r"Loudoun\s*County?",
1123
+ r"Martha's\s*Vineyard",
1124
+ r"Miami(?!\s?Herald)",
1125
+ r"Nantucket",
1126
+ r"New\s*(Jersey|Mexico)",
1127
+ r"(North|South)\s*Carolina",
1128
+ r"NY(C|\s*State)",
1129
+ r"Orange\s*County",
1130
+ r"Oregon",
1131
+ r"Phoenix",
1132
+ r"Portland",
1133
+ r"Santa\s*Fe",
1134
+ r"Telluride",
1135
+ r"Teterboro",
1136
+ r"Texas(?! A&M)",
1137
+ r"Toronto",
1138
+ r"Tu(sc|cs)on",
1139
+ r"Vermont",
1140
+ r"Washington(\s*D\.?C)?(?!\s*Post)",
1141
+ r"Westchester",
1047
1142
  ],
1048
1143
  ),
1049
1144
  HighlightedNames(
@@ -1114,7 +1209,7 @@ HIGHLIGHTED_NAMES = [
1114
1209
  r"Mohammed\s+bin\s+Salman",
1115
1210
  r"Morocco",
1116
1211
  r"Mubarak",
1117
- r"Muslim",
1212
+ r"Muslim(\s*Brotherhood)?",
1118
1213
  r"Nayaf",
1119
1214
  r"Pakistani?",
1120
1215
  r"Omar",
@@ -1138,6 +1233,7 @@ HIGHLIGHTED_NAMES = [
1138
1233
  r"Tripoli",
1139
1234
  r"Tunisian?",
1140
1235
  r"Turk(ey|ish)",
1236
+ r"Turks(?! & Caicos)",
1141
1237
  r"UAE",
1142
1238
  r"((Iraq|Iran|Kuwait|Qatar|Yemen)i?)",
1143
1239
  ],
@@ -1185,8 +1281,10 @@ HIGHLIGHTED_NAMES = [
1185
1281
  },
1186
1282
  patterns=[
1187
1283
  r"(Matt(hew)? )?Hiltzi[gk]",
1284
+ r"PR\s*Newswire",
1188
1285
  REPUTATION_MGMT,
1189
1286
  r"Reputation.com",
1287
+ r"(Robert L\. )?Dilenschneider",
1190
1288
  ],
1191
1289
  ),
1192
1290
  HighlightedNames(
@@ -1201,8 +1299,10 @@ HIGHLIGHTED_NAMES = [
1201
1299
  r"Alberto\sGonzale[sz]",
1202
1300
  r"(Alex\s*)?Acosta",
1203
1301
  r"(Ben\s*)?Sasse",
1302
+ r"Betsy Devos",
1204
1303
  r"((Bill|William)\s*)?Barr",
1205
1304
  r"Bill\s*Shine",
1305
+ r"Blackwater",
1206
1306
  r"(Bob\s*)?Corker",
1207
1307
  r"(Brett\s*)?Kavanaugh",
1208
1308
  r"Broidy",
@@ -1210,12 +1310,16 @@ HIGHLIGHTED_NAMES = [
1210
1310
  r"(?<!Merwin Dela )Cruz",
1211
1311
  r"Devin\s*Nunes",
1212
1312
  r"(Don\s*)?McGa[hn]n",
1313
+ r"Erik Prince",
1213
1314
  r"Gary\s*Cohn",
1214
1315
  r"George\s*(H\.?\s*)?(W\.?\s*)?Bush",
1215
1316
  r"(George\s*)?Nader",
1216
1317
  r"GOP",
1217
1318
  r"Jeff(rey)?\s*Sessions",
1218
- r"(John\s*(R.?\s*)?)Bolton",
1319
+ r"(John\s*(R.?\s*)?)?Bolton",
1320
+ r"Kasich",
1321
+ r"Keith\s*Schiller",
1322
+ r"Kelly(\s*Anne?)?\s*Conway|Kellyanne",
1219
1323
  r"Kissinger",
1220
1324
  r"Kobach",
1221
1325
  r"Kolfage",
@@ -1230,6 +1334,7 @@ HIGHLIGHTED_NAMES = [
1230
1334
  r"((General|Mike)\s*)?(Flynn|Pence)",
1231
1335
  r"(Mitt\s*)?Romney",
1232
1336
  r"Mnuchin",
1337
+ r"(Newt\s*)Gingrich",
1233
1338
  r"Nikki",
1234
1339
  r"Haley",
1235
1340
  r"(Paul\s*)?(Manafort|Volcker)",
@@ -1238,8 +1343,9 @@ HIGHLIGHTED_NAMES = [
1238
1343
  r"Reagan",
1239
1344
  r"Reince", r"Priebus",
1240
1345
  r"Republican",
1241
- r"(Rex\s*)?Tillerson",
1346
+ r"(Rex\s*)?Till?erson",
1242
1347
  r"(?<!Cynthia )(Richard\s*)?Nixon",
1348
+ r"RNC",
1243
1349
  r"Tea\s*Party",
1244
1350
  ],
1245
1351
  ),
@@ -1281,6 +1387,8 @@ HIGHLIGHTED_NAMES = [
1281
1387
  r"(Natalia\s*)?Veselnitskaya",
1282
1388
  r"(Oleg\s*)?Deripaska",
1283
1389
  r"Oleksandr Vilkul",
1390
+ r"Onexim", # Prokhorov investment vehicle
1391
+ r"Prokhorov",
1284
1392
  r"Rosneft",
1285
1393
  r"RT",
1286
1394
  r"St.?\s*?Petersburg",
@@ -1288,8 +1396,9 @@ HIGHLIGHTED_NAMES = [
1288
1396
  r"Russian?",
1289
1397
  r"Sberbank",
1290
1398
  r"Soviet(\s*Union)?",
1399
+ r"Timur\s*Kulibayev",
1291
1400
  r"USSR",
1292
- r"Vladimir",
1401
+ r"Vlad(imir)?(?! Yudash)",
1293
1402
  r"(Vladimir\s*)?Putin",
1294
1403
  r"Women\s*Empowerment",
1295
1404
  r"Xitrans",
@@ -1300,6 +1409,7 @@ HIGHLIGHTED_NAMES = [
1300
1409
  label='Southeast Asia',
1301
1410
  style='light_salmon3 bold',
1302
1411
  patterns=[
1412
+ r"Australian?(?! Ave)",
1303
1413
  r"Bangkok",
1304
1414
  r"Burm(a|ese)",
1305
1415
  r"Cambodian?",
@@ -1307,6 +1417,7 @@ HIGHLIGHTED_NAMES = [
1307
1417
  r"Malaysian?",
1308
1418
  r"Maldives",
1309
1419
  r"Myan?mar",
1420
+ r"New\s*Zealand",
1310
1421
  r"Philippines",
1311
1422
  r"South\s*Korean?",
1312
1423
  r"Tai(pei|wan)",
@@ -1327,8 +1438,10 @@ HIGHLIGHTED_NAMES = [
1327
1438
  },
1328
1439
  patterns=[
1329
1440
  r"AG?I",
1441
+ r"Artificial\s*(General\s*)?Intelligence",
1330
1442
  r"Chamath", r"Palihapitiya",
1331
1443
  r"Danny\s*Hillis",
1444
+ r"deep learning",
1332
1445
  r"Drew\s*Houston",
1333
1446
  r"Eric\s*Schmidt",
1334
1447
  r"Greylock(\s*Partners)?",
@@ -1357,10 +1470,12 @@ HIGHLIGHTED_NAMES = [
1357
1470
  style='red3 bold',
1358
1471
  emailers={
1359
1472
  'Bruce Moskowitz': "'Trump's health guy' according to Epstein",
1473
+ NICHOLAS_RIBIS: 'Hilton CEO, former president of Trump Organization',
1360
1474
  },
1361
1475
  patterns=[
1362
1476
  r"@?realDonaldTrump",
1363
1477
  r"(Alan\s*)?Weiss?elberg",
1478
+ r"Alex\s*Jones",
1364
1479
  r"\bDJ?T\b",
1365
1480
  r"Donald J. Tramp",
1366
1481
  r"(Donald\s+(J\.\s+)?)?Trump(ism|\s*(Org(anization)?|Properties)(\s*LLC)?)?",
@@ -1372,7 +1487,7 @@ HIGHLIGHTED_NAMES = [
1372
1487
  r"(Marla\s*)?Maples",
1373
1488
  r"(Matt(hew)? )?Calamari",
1374
1489
  r"\bMatt C\b",
1375
- r"Michael\s*Cohen",
1490
+ r"Michael\s*(D\.?\s*)?Cohen",
1376
1491
  r"Melania",
1377
1492
  r"(Michael (J.? )?)?Boccio",
1378
1493
  r"Paul Rampell",
@@ -1398,25 +1513,30 @@ HIGHLIGHTED_NAMES = [
1398
1513
  r"Dominican\s*Republic",
1399
1514
  r"(Great|Little)\s*St.?\s*James",
1400
1515
  r"Haiti(an)?",
1516
+ r"Jamaican?",
1401
1517
  r"(John\s*)deJongh(\s*Jr\.?)",
1402
1518
  r"(Kenneth E\. )?Mapp",
1403
1519
  r"PBI",
1404
1520
  r"Puerto\s*Ric(an|o)",
1405
1521
  r"S(ain)?t.?\s*Thomas",
1406
1522
  r"USVI",
1407
- r"(?<!Epstein )VI",
1408
- r"(The\s*)?Virgin\s*Islands(\s*Daily\s*News)?", # Hard to make this work right
1409
- r"(West\s*)?Palm\s*Beach(?!\s*(Daily|Post))",
1523
+ r"(?<!stein |vis-a-)VI(?!s-a-)",
1524
+ r"(The\s*)?Virgin\s*Is(al|la)nds(\s*Daily\s*News)?", # Hard to make this work right
1525
+ r"(West\s*)?Palm\s*Beach(\s*County)?(?!\s*(Daily|Post))",
1410
1526
  ],
1411
1527
  ),
1412
1528
  HighlightedNames(
1413
1529
  label='victim',
1414
1530
  style='orchid1',
1415
1531
  patterns=[
1416
- r"(David\s*)?Bo[il]es",
1532
+ r"#metoo",
1533
+ r"(David\s*)?Bo[il]es(,?\s*Schiller( & Flexner)?)?",
1417
1534
  r"(Gloria\s*)?Allred",
1418
1535
  r"(Jane|Tiffany)\s*Doe",
1419
1536
  r"Katie\s*Johnson",
1537
+ r"pedophile",
1538
+ r"Stephanie\s*Clifford",
1539
+ r"Stormy\s*Daniels",
1420
1540
  r"(Virginia\s+((L\.?|Roberts)\s+)?)?Giuffre",
1421
1541
  r"Virginia\s+Roberts",
1422
1542
  ],
@@ -1455,7 +1575,7 @@ HIGHLIGHTED_NAMES = [
1455
1575
 
1456
1576
  # Individuals
1457
1577
  HighlightedNames(
1458
- emailers={STEVEN_HOFFENBERG: "Epstein's Towers Financial ponzi scheme partner, prison 18 years"},
1578
+ emailers={STEVEN_HOFFENBERG: "Epstein's Towers Financial ponzi partner, prison for 18 years"},
1459
1579
  style='dark_olive_green3',
1460
1580
  category=FINANCE,
1461
1581
  patterns=[r"(steven?\s*)?hoffenberg?w?"],
@@ -1471,8 +1591,8 @@ HIGHLIGHTED_NAMES = [
1471
1591
  HighlightedNames(emailers={MELANIE_WALKER: f"doctor, friend of {BILL_GATES}"}, style='pale_violet_red1', category=FRIEND),
1472
1592
  HighlightedNames(emailers={PAULA: "Epstein's ex-girlfriend who is now in the opera world"}, label='paula', style='pink1', category=FRIEND),
1473
1593
  HighlightedNames(emailers={PRINCE_ANDREW: 'British royal family'}, style='dodger_blue1', category='Europe'),
1474
- HighlightedNames(emailers={SOON_YI_PREVIN: 'wife of Woody Allen'}, style='hot_pink', category=FRIEND),
1475
- HighlightedNames(emailers={SULTAN_BIN_SULAYEM: 'CEO of DP World, chairman of ports in Dubai'}, style='green1', category=MIDEAST),
1594
+ HighlightedNames(emailers={SOON_YI_PREVIN: 'wife of Woody Allen'}, style='hot_pink', category=ARTS),
1595
+ HighlightedNames(emailers={SULTAN_BIN_SULAYEM: 'chairman of ports in Dubai, CEO of DP World'}, style='green1', category=MIDEAST),
1476
1596
 
1477
1597
  # HighlightedText not HighlightedNames bc of word boundary issue
1478
1598
  HighlightedText(
@@ -1495,7 +1615,7 @@ HIGHLIGHTED_TEXTS = [
1495
1615
  HighlightedText(
1496
1616
  label='header_field',
1497
1617
  style='plum4',
1498
- patterns=[r'^(> )?(Date|From|Sent|To|C[cC]|Importance|Reply-To|Subject|Bee|B[cC]{2}|Attachments|Flag|Classification):'],
1618
+ patterns=[r'^>? ?(Date|From|Sent|To|C[cC]|Importance|Reply[- ]?To|Subject|Bee|B[cC]{2}|Attachments|Flag|Classification|((A|De(stinataire)?|Envoye|Expe(cl|d)iteur|Objet|Q) ?)):'],
1499
1619
  ),
1500
1620
  HighlightedText(
1501
1621
  label='http_links',
@@ -1549,6 +1669,41 @@ class EpsteinHighlighter(RegexHighlighter):
1549
1669
  """Finds and colors interesting keywords based on the above config."""
1550
1670
  base_style = f"{REGEX_STYLE_PREFIX}."
1551
1671
  highlights = [highlight_group.regex for highlight_group in ALL_HIGHLIGHTS]
1672
+ highlight_counts = defaultdict(int)
1673
+
1674
+ def highlight(self, text: Text) -> None:
1675
+ """overrides https://rich.readthedocs.io/en/latest/_modules/rich/highlighter.html#RegexHighlighter"""
1676
+ highlight_regex = text.highlight_regex
1677
+
1678
+ for re_highlight in self.highlights:
1679
+ highlight_regex(re_highlight, style_prefix=self.base_style)
1680
+
1681
+ if args.debug and isinstance(re_highlight, re.Pattern):
1682
+ for match in re_highlight.finditer(text.plain):
1683
+ type(self).highlight_counts[(match.group(1) or 'None').replace('\n', ' ')] += 1
1684
+
1685
+ def print_highlight_counts(self, console: Console) -> None:
1686
+ """Print counts of how many times strings were highlighted."""
1687
+ highlight_counts = deepcopy(self.highlight_counts)
1688
+ weak_date_regex = re.compile(r"^(\d\d?/|20|http|On ).*")
1689
+
1690
+ for highlighted, count in sort_dict(highlight_counts):
1691
+ if highlighted is None or weak_date_regex.match(highlighted):
1692
+ continue
1693
+
1694
+ try:
1695
+ console.print(f"{highlighted:25s} highlighted {count} times")
1696
+ except Exception as e:
1697
+ logger.error(f"Failed to print highlight count {count} for {highlighted}")
1698
+
1699
+
1700
+ def get_highlight_group_for_name(name: str | None) -> HighlightedNames | None:
1701
+ if name is None:
1702
+ return None
1703
+
1704
+ for highlight_group in HIGHLIGHTED_NAMES:
1705
+ if highlight_group.regex.search(name):
1706
+ return highlight_group
1552
1707
 
1553
1708
 
1554
1709
  def get_style_for_category(category: str) -> str | None:
@@ -1584,15 +1739,6 @@ def styled_name(name: str | None, default_style: str = DEFAULT_NAME_STYLE) -> Te
1584
1739
  return Text(name or UNKNOWN, style=get_style_for_name(name, default_style=default_style))
1585
1740
 
1586
1741
 
1587
- def get_highlight_group_for_name(name: str | None) -> HighlightedNames | None:
1588
- if name is None:
1589
- return None
1590
-
1591
- for highlight_group in HIGHLIGHTED_NAMES:
1592
- if highlight_group.regex.search(name):
1593
- return highlight_group
1594
-
1595
-
1596
1742
  def _print_highlighted_names_repr() -> None:
1597
1743
  for hn in HIGHLIGHTED_NAMES:
1598
1744
  if isinstance(hn, HighlightedNames):