epstein-files 1.2.5__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ import json
1
2
  import re
2
3
  from copy import deepcopy
3
4
  from dataclasses import Field, asdict, dataclass, field, fields
@@ -9,20 +10,21 @@ from dateutil.parser import parse
9
10
  from epstein_files.util.constant.names import *
10
11
  from epstein_files.util.constant.strings import *
11
12
  from epstein_files.util.data import remove_zero_time, without_falsey
13
+ from epstein_files.util.env import args
12
14
 
13
- DuplicateType = Literal['earlier', 'quoted', 'redacted', 'same']
15
+ DuplicateType = Literal['bounced', 'earlier', 'quoted', 'redacted', 'same']
14
16
  Metadata = dict[str, bool | datetime | int | str | list[str | None] |dict[str, bool | str]]
15
17
 
16
18
  # Misc
17
- CONSTANTIZE_NAMES = False # A flag set to True that causes repr() of these classes to return strings of usable code
18
19
  INDENT = ' '
19
20
  INDENT_NEWLINE = f'\n{INDENT}'
20
21
  INDENTED_JOIN = f',{INDENT_NEWLINE}'
21
- MAX_LINE_LENGTH = 150
22
+ MAX_LINE_LENGTH = 135
22
23
  REPUTATION_MGMT = f'{REPUTATION} management'
23
24
  SAME = 'same'
24
25
 
25
26
  DUPE_TYPE_STRS: dict[DuplicateType, str] = {
27
+ 'bounced': 'a bounced copy of',
26
28
  'earlier': 'an earlier draft of',
27
29
  'quoted': 'quoted in full in',
28
30
  'redacted': 'a redacted version of',
@@ -32,7 +34,10 @@ DUPE_TYPE_STRS: dict[DuplicateType, str] = {
32
34
  FIELD_SORT_KEY = {
33
35
  'id': 'a',
34
36
  'author': 'aa',
35
- 'attribution_reason': 'zz',
37
+ 'comment': 'zz',
38
+ 'duplicate_ids': 'dup',
39
+ 'duplicate_of_id': 'dupe',
40
+ 'recipients': 'aaa',
36
41
  }
37
42
 
38
43
  FINANCIAL_REPORTS_AUTHORS = [
@@ -49,7 +54,6 @@ FINANCIAL_REPORTS_AUTHORS = [
49
54
  # Fields like timestamp and author are better added from the Document object
50
55
  NON_METADATA_FIELDS = [
51
56
  'actual_text',
52
- 'date',
53
57
  'id',
54
58
  'is_synthetic',
55
59
  ]
@@ -64,18 +68,18 @@ class DocCfg:
64
68
  id (str): ID of file
65
69
  author (Name): Author of the document (if any)
66
70
  category (str | None): Type of file
67
- date (str | None): If passed will be immediated parsed into the 'timestamp' field
71
+ date (str | None): Parsed to a datetime by timestamp() if it exists
68
72
  dupe_type (DuplicateType | None): The type of duplicate this file is or its 'duplicate_ids' are
69
73
  duplicate_ids (list[str]): IDs of *other* documents that are dupes of this document
70
74
  duplicate_of_id (str | None): If this is a dupe the ID of the duplicated file. This file will be suppressed
71
75
  is_interesting (bool | None): Override other considerations and always consider this file interesting (or not)
72
- timestamp (datetime | None): Time this email was sent, file was created, article published, etc.
73
76
  is_synthetic (bool): True if this config was generated by the duplicate_cfgs() method
74
77
  """
75
78
  id: str
76
79
  attached_to_email_id: str | None = None
77
80
  author: Name = None
78
81
  category: str | None = None
82
+ comment: str = ''
79
83
  date: str | None = None
80
84
  description: str | None = None
81
85
  dupe_type: DuplicateType | None = None
@@ -84,12 +88,8 @@ class DocCfg:
84
88
  is_attribution_uncertain: bool = False
85
89
  is_interesting: bool | None = None
86
90
  is_synthetic: bool = False
87
- timestamp: datetime | None = None
88
91
 
89
92
  def __post_init__(self):
90
- if self.date:
91
- self.timestamp = parse(self.date)
92
-
93
93
  if self.duplicate_of_id or self.duplicate_ids:
94
94
  self.dupe_type = self.dupe_type or SAME
95
95
 
@@ -142,7 +142,16 @@ class DocCfg:
142
142
  yield dupe_cfg
143
143
 
144
144
  def metadata(self) -> Metadata:
145
- return {k: v for k, v in asdict(self).items() if k not in NON_METADATA_FIELDS and v}
145
+ metadata = {k: v for k, v in asdict(self).items() if k not in NON_METADATA_FIELDS and v}
146
+
147
+ if self.is_interesting is False:
148
+ metadata['is_interesting'] = False
149
+
150
+ return metadata
151
+
152
+ def timestamp(self) -> datetime | None:
153
+ if self.date:
154
+ return parse(self.date)
146
155
 
147
156
  def _props_strs(self) -> list[str]:
148
157
  props = []
@@ -151,20 +160,16 @@ class DocCfg:
151
160
  for _field in sorted(fields(self), key=lambda f: FIELD_SORT_KEY.get(f.name, f.name)):
152
161
  value = getattr(self, _field.name)
153
162
 
154
- if value is None or value is False or (isinstance(value, list) and len(value) == 0):
163
+ if _field.name in ['actual_text', 'is_fwded_article', 'is_interesting']: # fields can be False or None or ''
164
+ if value is not None:
165
+ add_prop(_field, str(value))
166
+ elif not value or _field.name == 'dupe_type' and value == 'same':
155
167
  continue
156
168
  elif _field.name == AUTHOR:
157
- add_prop(_field, constantize_name(str(value)) if CONSTANTIZE_NAMES else f"'{value}'")
158
- elif _field.name == 'category' and value in [EMAIL, TEXT_MESSAGE]:
159
- continue
160
- elif _field.name == 'recipients' and value:
161
- recipients_str = str([constantize_name(r) if (CONSTANTIZE_NAMES and r) else r for r in value])
162
- add_prop(_field, recipients_str.replace("'", '') if CONSTANTIZE_NAMES else recipients_str)
163
- elif _field.name == 'timestamp' and self.date is not None:
164
- continue # Don't print both timestamp and date
165
- elif isinstance(value, datetime):
166
- value_str = remove_zero_time(value)
167
- add_prop(_field, f"parse('{value_str}')" if CONSTANTIZE_NAMES else f"'{value}'")
169
+ add_prop(_field, constantize_name(str(value)) if args.constantize else f"'{value}'")
170
+ elif _field.name == 'recipients':
171
+ recipients_str = str([constantize_name(r) if (args.constantize and r) else r for r in value])
172
+ add_prop(_field, recipients_str.replace("'", '') if args.constantize else recipients_str)
168
173
  elif isinstance(value, str):
169
174
  if "'" in value:
170
175
  value = '"' + value.replace('"', r'\"') + '"'
@@ -182,14 +187,14 @@ class DocCfg:
182
187
  type_str = f"{type(self).__name__}("
183
188
  single_line_repr = type_str + ', '.join(props) + f')'
184
189
 
185
- if len(single_line_repr) < MAX_LINE_LENGTH:
190
+ if len(single_line_repr) < MAX_LINE_LENGTH or (self.comment and getattr(self, 'is_fwded_article')):
186
191
  repr_str = single_line_repr
187
192
  else:
188
193
  repr_str = f"{type_str}{INDENT_NEWLINE}" + INDENTED_JOIN.join(props)
189
194
  repr_str += ',' if props else ''
190
195
  repr_str += '\n)'
191
196
 
192
- if CONSTANTIZE_NAMES:
197
+ if args.constantize:
193
198
  repr_str = INDENT + INDENT_NEWLINE.join(repr_str.split('\n'))
194
199
  return repr_str.replace(',,', ',').replace(',),', '),').replace(',),', '),')
195
200
  else:
@@ -224,9 +229,10 @@ class EmailCfg(CommunicationCfg):
224
229
  """
225
230
  actual_text: str | None = None
226
231
  fwded_text_after: str | None = None
227
- is_fwded_article: bool = False
232
+ is_fwded_article: bool | None = None
228
233
  recipients: list[Name] = field(default_factory=list)
229
234
  subject: str | None = None
235
+ truncate_to: int | None = None
230
236
 
231
237
  # This is necessary because for some dumb reason @dataclass(repr=False) doesn't cut it
232
238
  def __repr__(self) -> str:
epstein_files/util/env.py CHANGED
@@ -49,13 +49,16 @@ output.add_argument('--suppress-output', action='store_true', help='no output to
49
49
  output.add_argument('--uninteresting', action='store_true', help='only output uninteresting other files')
50
50
  output.add_argument('--width', '-w', type=int, default=DEFAULT_WIDTH, help='screen width to use (in characters)')
51
51
 
52
- scripts = parser.add_argument_group('SCRIPTS', 'Options used by epstein_search, epstein_show, and epstein_diff.')
52
+ scripts = parser.add_argument_group('SCRIPTS', 'Options used by epstein_grep, epstein_show, and epstein_diff.')
53
53
  scripts.add_argument('positional_args', nargs='*', help='strings to searchs for, file IDs to show or diff, etc.')
54
+ scripts.add_argument('--email-body', action='store_true', help='epstein_grep but only for the body of the email')
55
+ scripts.add_argument('--min-line-length', type=int, help='epstein_grep minimum length of a matched line')
54
56
  scripts.add_argument('--raw', '-r', action='store_true', help='show raw contents of file (used by epstein_show)')
55
57
  scripts.add_argument('--whole-file', '-wf', action='store_true', help='print whole files')
56
58
 
57
59
  debug = parser.add_argument_group('DEBUG')
58
60
  debug.add_argument('--colors-only', '-c', action='store_true', help='print header with color key table and links and exit')
61
+ debug.add_argument('--constantize', action='store_true', help='constantize names when printing repr() of objects')
59
62
  debug.add_argument('--debug', '-d', action='store_true', help='set debug level to INFO')
60
63
  debug.add_argument('--deep-debug', '-dd', action='store_true', help='set debug level to DEBUG')
61
64
  debug.add_argument('--json-stats', '-j', action='store_true', help='print JSON formatted stats about the files')
@@ -78,7 +81,9 @@ args.width = args.width if is_html_script else None
78
81
  args.any_output_selected = any([is_output_arg(arg) and val for arg, val in vars(args).items()])
79
82
 
80
83
  if not (args.any_output_selected or args.email_timeline or args.emailers_info):
81
- logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
84
+ if is_html_script:
85
+ logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
86
+
82
87
  args.output_emails = args.output_other = args.output_texts = True
83
88
 
84
89
  if is_html_script:
@@ -97,13 +102,15 @@ if is_html_script:
97
102
  args.build = CHRONOLOGICAL_EMAILS_PATH
98
103
  else:
99
104
  args.build = TEXT_MSGS_HTML_PATH
100
- elif parser.prog.startswith('epstein_') and not args.positional_args:
105
+ elif parser.prog.startswith('epstein_') and not args.positional_args and not args.names:
101
106
  exit_with_error(f"{parser.prog} requires positional arguments but got none!")
102
107
 
103
108
  if args.names:
104
109
  logger.warning(f"Output restricted to {args.names}")
105
110
  args.output_other = False
106
111
 
112
+ if args.truncate and args.whole_file:
113
+ exit_with_error(f"--whole-file and --truncate are incompatible")
107
114
 
108
115
  # Log level args
109
116
  if args.deep_debug:
@@ -38,6 +38,8 @@ def extract_file_id(filename_or_id: int | str | Path) -> str:
38
38
 
39
39
  if isinstance(filename_or_id, int) or (isinstance(filename_or_id, str) and len(filename_or_id) <= 6):
40
40
  return id_str(filename_or_id)
41
+ elif isinstance(filename_or_id, str) and len(filename_or_id) == 8:
42
+ return f"{HOUSE_OVERSIGHT_PREFIX}{filename_or_id}"
41
43
 
42
44
  file_match = FILE_ID_REGEX.match(str(filename_or_id).upper())
43
45
 
@@ -31,6 +31,8 @@ REGEX_STYLE_PREFIX = 'regex'
31
31
  SIMPLE_NAME_REGEX = re.compile(r"^[-\w, ]+$", re.IGNORECASE)
32
32
  TECH_BRO = 'tech bro'
33
33
 
34
+ VICTIM_COLOR = 'orchid1'
35
+
34
36
  CATEGORY_STYLE_MAPPING = {
35
37
  ARTICLE: JOURNALIST,
36
38
  BOOK: JOURNALIST,
@@ -160,7 +162,7 @@ class HighlightedNames(HighlightedText):
160
162
 
161
163
  pattern = '|'.join(name_patterns)
162
164
 
163
- if args.deep_debug:
165
+ if args.deep_debug and args.colors_only:
164
166
  debug_console.print(Text('').append(f"{name:25s}", style=self.style).append(f" '{pattern}'", style='dim'))
165
167
 
166
168
  return pattern
@@ -215,7 +217,7 @@ HIGHLIGHTED_NAMES = [
215
217
  ManualHighlight(
216
218
  label='email_subject',
217
219
  style='light_yellow3',
218
- pattern=r"^(> )?(Classification|Flag|Subject): (?P<email_subject>.*)",
220
+ pattern=r"^(> )?(Classification|Flag|Subject|Sujet ?): (?P<email_subject>.*)",
219
221
  ),
220
222
  HighlightedNames(
221
223
  label=ACADEMIA,
@@ -245,12 +247,13 @@ HIGHLIGHTED_NAMES = [
245
247
  r"Bard\s+((Early )?College|High School|Schools)",
246
248
  r"Brotherton",
247
249
  r"Carl\s*Sagan",
248
- r"Columbia",
250
+ r"Columbia(\s*(Business\s*School|University))?",
249
251
  r"Dan(iel|ny) Kahneman",
250
252
  r"(Francis\s*)?Crick",
251
253
  r"J(ames|im)\s*Watson",
252
254
  r"(Lord\s*)?Martin\s*Rees",
253
255
  r"Massachusetts\s*Institute\s*of\s*Technology",
256
+ r"Mayo\s*Clinic",
254
257
  r"Media\s*Lab",
255
258
  r"(Marvin\s*)?Minsky",
256
259
  r"MIT(\s*Media\s*Lab)?",
@@ -260,10 +263,11 @@ HIGHLIGHTED_NAMES = [
260
263
  r"Princeton(\s*University)?",
261
264
  r"Regeneron",
262
265
  r"(Richard\s*)?Dawkins",
266
+ r"Rockefeller\s*University",
263
267
  r"(Sandy\s*)?Pentland", # Media Lab
264
268
  r"Sanofi",
265
269
  r"Stanford(\s*University)?(\s*Hospital)?",
266
- r"(Stephen\s*)?Hawking",
270
+ r"(Ste(ph|v)en\s*)?Hawking",
267
271
  r"(Steven?\s*)?Pinker",
268
272
  r"Texas\s*A&M",
269
273
  r"Tulane",
@@ -319,6 +323,7 @@ HIGHLIGHTED_NAMES = [
319
323
  patterns=[
320
324
  r"(Art )?Spiegelman",
321
325
  r"Artspace",
326
+ r"Ayn\s*Rand",
322
327
  r"Bobby slayton",
323
328
  r"bono\s*mick",
324
329
  r"Errol(\s*Morris)?",
@@ -413,9 +418,10 @@ HIGHLIGHTED_NAMES = [
413
418
  'Philip Kafka': 'president of Prince Concepts (and son of Terry Kafka?)',
414
419
  ROBERT_LAWRENCE_KUHN: 'investment banker, China expert',
415
420
  TERRY_KAFKA: 'CEO of Impact Outdoor (highway billboards)',
416
- TOM_PRITZKER: 'brother of J.B. Pritzker',
421
+ TOM_PRITZKER: 'chairman of The Pritzker Organization and Hyatt Hotels',
417
422
  },
418
423
  patterns=[
424
+ r"Arthur Klein",
419
425
  r"((Bill|David)\s*)?Koch(\s*(Bro(s|thers)|Industries))?",
420
426
  r"Gruterite",
421
427
  r"((John|Patricia)\s*)?Kluge",
@@ -423,6 +429,7 @@ HIGHLIGHTED_NAMES = [
423
429
  r"(Mi(chael|ke)\s*)?Ovitz",
424
430
  r"(Steve\s+)?Wynn",
425
431
  r"(Les(lie)?\s+)?Wexner",
432
+ r"Michael\s*Klein",
426
433
  r"New Leaf Ventures",
427
434
  r"Park Partners",
428
435
  r"SALSS",
@@ -500,6 +507,7 @@ HIGHLIGHTED_NAMES = [
500
507
  r"Dem(ocrat(ic)?)?",
501
508
  r"(Diana\s*)?DeGette",
502
509
  r"DNC",
510
+ r"(Ed(ward)?\s*)?Mezvinsky",
503
511
  r"Elena\s*Kagan",
504
512
  r"(Eliott?\s*)?Spitzer(, Eliot)?",
505
513
  r"Eric Holder",
@@ -550,7 +558,6 @@ HIGHLIGHTED_NAMES = [
550
558
  MERWIN_DELA_CRUZ: None, # HOUSE_OVERSIGHT_032652 Groff says "Jojo and Merwin both requested off Nov. 25 and 26"
551
559
  NADIA_MARCINKO: "Epstein's pilot",
552
560
  'Sean J. Lancaster': 'airplane reseller',
553
- ZUBAIR_KHAN: 'Tranchulas cybersecurity, InsightsPod founder, Islamabad / Dubai',
554
561
  },
555
562
  patterns=[
556
563
  r"Adriana\s*Ross",
@@ -566,7 +573,7 @@ HIGHLIGHTED_NAMES = [
566
573
  MARK_EPSTEIN: 'brother of Jeffrey',
567
574
  },
568
575
  patterns=[
569
- r"JEGE(\s*Inc)",
576
+ r"JEGE(\s*Inc)?",
570
577
  r"LSJ",
571
578
  ],
572
579
  ),
@@ -640,6 +647,7 @@ HIGHLIGHTED_NAMES = [
640
647
  r"AfD",
641
648
  r"(Angela )?Merk(el|le)",
642
649
  r"Austria",
650
+ r"Belgi(an|um)",
643
651
  r"(Benjamin\s*)?Harnwell",
644
652
  r"Berlin",
645
653
  r"Borge",
@@ -649,6 +657,7 @@ HIGHLIGHTED_NAMES = [
649
657
  r"Brussels",
650
658
  r"Cannes",
651
659
  r"Cypr(iot|us)",
660
+ r"David\s*Cameron",
652
661
  r"Davos",
653
662
  r"ECB",
654
663
  r"England",
@@ -663,6 +672,8 @@ HIGHLIGHTED_NAMES = [
663
672
  r"Ital(ian|y)",
664
673
  r"Jacques",
665
674
  r"Kiev",
675
+ r"Latvian?",
676
+ r"Lithuanian?",
666
677
  r"Le\s*Pen",
667
678
  r"London",
668
679
  r"Macron",
@@ -672,11 +683,13 @@ HIGHLIGHTED_NAMES = [
672
683
  r"NATO",
673
684
  r"(Nicholas\s*)?Sarkozy",
674
685
  r"Nigel(\s*Farage)?",
686
+ r"(Northern\s*)?Ireland",
675
687
  r"Norw(ay|egian)",
676
688
  r"Oslo",
677
689
  r"Paris",
678
690
  r"Polish",
679
691
  r"pope",
692
+ r"Portugal",
680
693
  r"Scotland",
681
694
  r"(Sebastian )?Kurz",
682
695
  r"Stockholm",
@@ -685,6 +698,7 @@ HIGHLIGHTED_NAMES = [
685
698
  r"Swed(en|ish)(?![-\s]+American Life Scienc)",
686
699
  r"Swi(ss|tzerland)",
687
700
  r"(Tony\s)?Blair",
701
+ r"United\s*Kingdom",
688
702
  r"U\.K\.",
689
703
  r"Ukrain(e|ian)",
690
704
  r"Venice",
@@ -748,6 +762,7 @@ HIGHLIGHTED_NAMES = [
748
762
  r"(Janet\s*)?Yellen",
749
763
  r"(Jerome\s*)?Powell(?! M\. Cabot)",
750
764
  r"(Jimmy\s*)?Cayne",
765
+ r"Joon\s*Yun",
751
766
  r"JPMC?",
752
767
  r"j\.?p\.?\s*morgan(\.?com|\s*Chase)?",
753
768
  r"Madoff",
@@ -760,6 +775,7 @@ HIGHLIGHTED_NAMES = [
760
775
  r"(Peter L. )?Scher",
761
776
  r"(Ray\s*)?Dalio",
762
777
  r"(Richard\s*)?LeFrak",
778
+ r"Rockefeller(?! University)(\s*Foundation)?",
763
779
  r"(Ste(phen|ve)\s*)?Schwart?z?man",
764
780
  r"Serageldin",
765
781
  r"UBS",
@@ -823,6 +839,7 @@ HIGHLIGHTED_NAMES = [
823
839
  r"FTC",
824
840
  r"(General\s*)?P(a|e)traeus",
825
841
  r"Geoff\s*Ling",
842
+ r"Homeland\s*Security",
826
843
  r"IRS",
827
844
  r"(James\s*)?Comey",
828
845
  r"(Jennifer\s*Shasky\s*)?Calvery",
@@ -952,7 +969,7 @@ HIGHLIGHTED_NAMES = [
952
969
  'Alain Forget': 'author of "How To Get Out Of This World ALIVE"',
953
970
  'Alex Yablon': 'New York Magazine fact checker (?)',
954
971
  EDWARD_JAY_EPSTEIN: 'no relation, wrote books about spies',
955
- HENRY_HOLT: f"{MICHAEL_WOLFF}'s book publisher",
972
+ HENRY_HOLT: f"{MICHAEL_WOLFF}'s book publisher (company not a person)",
956
973
  JAMES_HILL: 'ABC News',
957
974
  JENNIFER_JACQUET: 'Future Science magazine',
958
975
  JOHN_BROCKMAN: 'literary agent and author specializing in scientific literature',
@@ -972,14 +989,14 @@ HIGHLIGHTED_NAMES = [
972
989
  r'Associated\s*Press',
973
990
  r"Axios",
974
991
  r"BBC",
975
- r"(Bob|Robert)\s*Costa",
992
+ r"(Bob|Robert)\s*(Costa|Woodward)",
976
993
  r"Breitbart",
977
994
  r"BuzzFeed(\s*News)?",
978
995
  r"C-?Span",
979
996
  r"CBS(\s*(4|Corp|News))?",
980
997
  r"Charlie\s*Rose",
981
998
  r"China\s*Daily",
982
- r"CNBC",
999
+ r"(C|MS)?NBC(\s*News)?",
983
1000
  r"CNN(politics?)?",
984
1001
  r"Con[cs]hita", r"Sarnoff",
985
1002
  r"Daily Business Review",
@@ -1000,6 +1017,7 @@ HIGHLIGHTED_NAMES = [
1000
1017
  r"Globe\s*and\s*Mail",
1001
1018
  r"Good\s*Morning\s*America",
1002
1019
  r"Graydon(\s*Carter)?",
1020
+ r"Hollywood\s*Reporter",
1003
1021
  r"Huff(ington)?(\s*Po(st)?)?",
1004
1022
  r"Ingram, David",
1005
1023
  r"James\s*Hill",
@@ -1007,6 +1025,7 @@ HIGHLIGHTED_NAMES = [
1007
1025
  r"Jesse Kornbluth",
1008
1026
  r"John\s*Connolly",
1009
1027
  r"Jonathan\s*Karl",
1028
+ r"Journal of Criminal Law and Criminology",
1010
1029
  r"Julie\s*(K.?\s*)?Brown", r'jbrown@miamiherald.com',
1011
1030
  r"(Katie\s*)?Couric",
1012
1031
  r"Keith\s*Larsen",
@@ -1025,7 +1044,6 @@ HIGHLIGHTED_NAMES = [
1025
1044
  r"PERVERSION\s*OF\s*JUSTICE",
1026
1045
  r"Politico",
1027
1046
  r"Pro\s*Publica",
1028
- r"Reuters",
1029
1047
  r"(Sean\s*)?Hannity",
1030
1048
  r"Sharon Churcher", # Daily Mail
1031
1049
  r"Sulzberger",
@@ -1038,7 +1056,9 @@ HIGHLIGHTED_NAMES = [
1038
1056
  r"(The\s*)?New\s*Yorker",
1039
1057
  r"(The\s*)?Wall\s*Street\s*Journal",
1040
1058
  r"(The\s*)?Wa(shington\s*)?Po(st)?",
1059
+ r"(Thomson\s*)?Reuters",
1041
1060
  r"(Uma\s*)?Sanghvi",
1061
+ r"USA\s*Today",
1042
1062
  r"Vanity\s*Fair",
1043
1063
  r"Viceland",
1044
1064
  r"Vick[iy]\s*Ward",
@@ -1072,6 +1092,7 @@ HIGHLIGHTED_NAMES = [
1072
1092
  r"Chile",
1073
1093
  r"Colombian?",
1074
1094
  r"Cuban?",
1095
+ r"el chapo",
1075
1096
  r"El\s*Salvador",
1076
1097
  r"((Enrique )?Pena )?Nieto",
1077
1098
  r"Lat(in)?\s*Am(erican?)?",
@@ -1113,12 +1134,16 @@ HIGHLIGHTED_NAMES = [
1113
1134
  r"Arizona(?! State University)",
1114
1135
  r"Aspen",
1115
1136
  r"Berkeley",
1137
+ r"Boston",
1116
1138
  r"Brooklyn",
1117
1139
  r"California",
1118
1140
  r"Canada",
1119
1141
  r"Cape Cod",
1142
+ r"Charlottesville",
1143
+ r"Colorado",
1120
1144
  r"Connecticut",
1121
1145
  r"Florida",
1146
+ r"Los Angeles",
1122
1147
  r"Loudoun\s*County?",
1123
1148
  r"Martha's\s*Vineyard",
1124
1149
  r"Miami(?!\s?Herald)",
@@ -1128,9 +1153,12 @@ HIGHLIGHTED_NAMES = [
1128
1153
  r"NY(C|\s*State)",
1129
1154
  r"Orange\s*County",
1130
1155
  r"Oregon",
1156
+ r"Palo Alto",
1157
+ r"Pennsylvania",
1131
1158
  r"Phoenix",
1132
1159
  r"Portland",
1133
- r"Santa\s*Fe",
1160
+ r"San Francisco",
1161
+ r"Sant[ae]\s*Fe",
1134
1162
  r"Telluride",
1135
1163
  r"Teterboro",
1136
1164
  r"Texas(?! A&M)",
@@ -1157,6 +1185,7 @@ HIGHLIGHTED_NAMES = [
1157
1185
  r"Afghanistan",
1158
1186
  r"Al[-\s]?Qa[ei]da",
1159
1187
  r"Ahmadinejad",
1188
+ r"(Rakhat )?Aliyev",
1160
1189
  r"Arab",
1161
1190
  r"Aramco",
1162
1191
  r"Armenia",
@@ -1185,6 +1214,7 @@ HIGHLIGHTED_NAMES = [
1185
1214
  r"Hamas",
1186
1215
  r"Hezbollah",
1187
1216
  r"HBJ",
1217
+ r"Hourani",
1188
1218
  r"Houthi",
1189
1219
  r"Imran\s+Khan",
1190
1220
  r"Iran(ian)?([-\s]Contra)?",
@@ -1207,10 +1237,11 @@ HIGHLIGHTED_NAMES = [
1207
1237
  r"MB(N|S|Z)",
1208
1238
  r"Mid(dle)?\s*East(ern)?",
1209
1239
  r"Mohammed\s+bin\s+Salman",
1210
- r"Morocco",
1240
+ r"Morocc(an|o)",
1211
1241
  r"Mubarak",
1212
1242
  r"Muslim(\s*Brotherhood)?",
1213
1243
  r"Nayaf",
1244
+ r"Nazarbayev",
1214
1245
  r"Pakistani?",
1215
1246
  r"Omar",
1216
1247
  r"(Osama\s*)?Bin\s*Laden",
@@ -1230,10 +1261,10 @@ HIGHLIGHTED_NAMES = [
1230
1261
  r"Syrian?",
1231
1262
  r"(Tarek\s*)?El\s*Sayed",
1232
1263
  r"Tehran",
1264
+ r"Timur\s*Kulibayev",
1233
1265
  r"Tripoli",
1234
1266
  r"Tunisian?",
1235
- r"Turk(ey|ish)",
1236
- r"Turks(?! & Caicos)",
1267
+ r"Turk(ey|ish)?(?!s & Caicos)",
1237
1268
  r"UAE",
1238
1269
  r"((Iraq|Iran|Kuwait|Qatar|Yemen)i?)",
1239
1270
  ],
@@ -1281,6 +1312,7 @@ HIGHLIGHTED_NAMES = [
1281
1312
  },
1282
1313
  patterns=[
1283
1314
  r"(Matt(hew)? )?Hiltzi[gk]",
1315
+ r"Philip\s*Barden",
1284
1316
  r"PR\s*Newswire",
1285
1317
  REPUTATION_MGMT,
1286
1318
  r"Reputation.com",
@@ -1308,6 +1340,7 @@ HIGHLIGHTED_NAMES = [
1308
1340
  r"Broidy",
1309
1341
  r"(Chris\s)?Christie",
1310
1342
  r"(?<!Merwin Dela )Cruz",
1343
+ r"Darrell\s*Issa",
1311
1344
  r"Devin\s*Nunes",
1312
1345
  r"(Don\s*)?McGa[hn]n",
1313
1346
  r"Erik Prince",
@@ -1333,7 +1366,7 @@ HIGHLIGHTED_NAMES = [
1333
1366
  r"(Michael\s)?Hayden",
1334
1367
  r"((General|Mike)\s*)?(Flynn|Pence)",
1335
1368
  r"(Mitt\s*)?Romney",
1336
- r"Mnuchin",
1369
+ r"(Steven?\s*)?Mnuchin",
1337
1370
  r"(Newt\s*)Gingrich",
1338
1371
  r"Nikki",
1339
1372
  r"Haley",
@@ -1346,7 +1379,9 @@ HIGHLIGHTED_NAMES = [
1346
1379
  r"(Rex\s*)?Till?erson",
1347
1380
  r"(?<!Cynthia )(Richard\s*)?Nixon",
1348
1381
  r"RNC",
1382
+ r"(Roy|Stephen)\s*Moore",
1349
1383
  r"Tea\s*Party",
1384
+ r"Wilbur\s*Ross",
1350
1385
  ],
1351
1386
  ),
1352
1387
  HighlightedNames(
@@ -1396,7 +1431,6 @@ HIGHLIGHTED_NAMES = [
1396
1431
  r"Russian?",
1397
1432
  r"Sberbank",
1398
1433
  r"Soviet(\s*Union)?",
1399
- r"Timur\s*Kulibayev",
1400
1434
  r"USSR",
1401
1435
  r"Vlad(imir)?(?! Yudash)",
1402
1436
  r"(Vladimir\s*)?Putin",
@@ -1435,6 +1469,7 @@ HIGHLIGHTED_NAMES = [
1435
1469
  REID_HOFFMAN: 'PayPal mafia member, founder of LinkedIn',
1436
1470
  STEVEN_SINOFSKY: 'ex-Microsoft, loves bitcoin',
1437
1471
  VINCENZO_IOZZO: 'CEO of the identity-security company SlashID',
1472
+ ZUBAIR_KHAN: 'Tranchulas cybersecurity, InsightsPod founder, Islamabad / Dubai',
1438
1473
  },
1439
1474
  patterns=[
1440
1475
  r"AG?I",
@@ -1443,6 +1478,7 @@ HIGHLIGHTED_NAMES = [
1443
1478
  r"Danny\s*Hillis",
1444
1479
  r"deep learning",
1445
1480
  r"Drew\s*Houston",
1481
+ r"Eliezer\s*Yudkowsky",
1446
1482
  r"Eric\s*Schmidt",
1447
1483
  r"Greylock(\s*Partners)?",
1448
1484
  r"(?<!(ustin|Moshe)\s)Hoffmand?",
@@ -1462,6 +1498,7 @@ HIGHLIGHTED_NAMES = [
1462
1498
  r"Softbank",
1463
1499
  r"SpaceX",
1464
1500
  r"Tim\s*Ferriss?",
1501
+ r"Vision\s*Fund",
1465
1502
  r"WikiLeak(ed|s)",
1466
1503
  ],
1467
1504
  ),
@@ -1518,6 +1555,7 @@ HIGHLIGHTED_NAMES = [
1518
1555
  r"(Kenneth E\. )?Mapp",
1519
1556
  r"PBI",
1520
1557
  r"Puerto\s*Ric(an|o)",
1558
+ r"San\s*Juan",
1521
1559
  r"S(ain)?t.?\s*Thomas",
1522
1560
  r"USVI",
1523
1561
  r"(?<!stein |vis-a-)VI(?!s-a-)",
@@ -1527,9 +1565,9 @@ HIGHLIGHTED_NAMES = [
1527
1565
  ),
1528
1566
  HighlightedNames(
1529
1567
  label='victim',
1530
- style='orchid1',
1568
+ style=VICTIM_COLOR,
1531
1569
  patterns=[
1532
- r"#metoo",
1570
+ r"child\s*pornography",
1533
1571
  r"(David\s*)?Bo[il]es(,?\s*Schiller( & Flexner)?)?",
1534
1572
  r"(Gloria\s*)?Allred",
1535
1573
  r"(Jane|Tiffany)\s*Doe",
@@ -1595,6 +1633,11 @@ HIGHLIGHTED_NAMES = [
1595
1633
  HighlightedNames(emailers={SULTAN_BIN_SULAYEM: 'chairman of ports in Dubai, CEO of DP World'}, style='green1', category=MIDEAST),
1596
1634
 
1597
1635
  # HighlightedText not HighlightedNames bc of word boundary issue
1636
+ HighlightedText(
1637
+ label='metoo',
1638
+ style=VICTIM_COLOR,
1639
+ patterns=[r"#metoo"]
1640
+ ),
1598
1641
  HighlightedText(
1599
1642
  label='phone_number',
1600
1643
  style='bright_green',
@@ -1615,7 +1658,7 @@ HIGHLIGHTED_TEXTS = [
1615
1658
  HighlightedText(
1616
1659
  label='header_field',
1617
1660
  style='plum4',
1618
- patterns=[r'^>? ?(Date|From|Sent|To|C[cC]|Importance|Reply[- ]?To|Subject|Bee|B[cC]{2}|Attachments|Flag|Classification|((A|De(stinataire)?|Envoye|Expe(cl|d)iteur|Objet|Q) ?)):'],
1661
+ patterns=[r'^[>• ]{,4}(Date ?|From|Sent|To|C[cC]|Importance|Reply[- ]?To|Subject|Bee|B[cC]{2}|Attachments|Flag|Classification|((A|Debut du message transfer[&e]|De(stinataire)?|Envoye|Expe(cl|d)iteur|Objet|Q|Sujet) ?)):|^on behalf of'],
1619
1662
  ),
1620
1663
  HighlightedText(
1621
1664
  label='http_links',
@@ -1625,16 +1668,16 @@ HIGHLIGHTED_TEXTS = [
1625
1668
  HighlightedText(
1626
1669
  label='quoted_reply_line',
1627
1670
  style='dim',
1628
- patterns=[REPLY_REGEX.pattern],
1671
+ patterns=[REPLY_REGEX.pattern, r"^(> )?wrote:$"],
1629
1672
  ),
1630
1673
  HighlightedText(
1631
1674
  label='redacted',
1632
1675
  style='grey58',
1633
- patterns=[fr"{REDACTED}|Privileged - Redacted"],
1676
+ patterns=[fr"{REDACTED}|<?Privileged - Redacted>?"],
1634
1677
  ),
1635
1678
  HighlightedText(
1636
1679
  label='sent_from',
1637
- style='gray42 italic',
1680
+ style='light_cyan3 italic dim',
1638
1681
  patterns=[SENT_FROM_REGEX.pattern],
1639
1682
  ),
1640
1683
  HighlightedText(