epstein-files 1.1.3__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
epstein_files/__init__.py CHANGED
@@ -21,7 +21,8 @@ from epstein_files.util.env import args
21
21
  from epstein_files.util.file_helper import coerce_file_path, extract_file_id
22
22
  from epstein_files.util.logging import exit_with_error, logger
23
23
  from epstein_files.util.output import (print_emails_section, print_json_files, print_json_stats,
24
- print_other_files_section, print_text_messages_section, print_email_timeline, print_json_metadata, write_urls)
24
+ print_other_files_section, print_text_messages_section, print_email_timeline, print_emailers_info_png,
25
+ print_json_metadata, write_urls)
25
26
  from epstein_files.util.rich import (build_highlighter, console, print_color_key, print_title_page_header,
26
27
  print_title_page_tables, print_subtitle_panel, write_html)
27
28
  from epstein_files.util.timer import Timer
@@ -43,8 +44,11 @@ def generate_html() -> None:
43
44
  elif args.json_files:
44
45
  print_json_files(epstein_files)
45
46
  exit()
47
+ elif args.emailers_info_png:
48
+ print_emailers_info_png(epstein_files)
49
+ exit()
46
50
 
47
- print_title_page_header(epstein_files)
51
+ print_title_page_header()
48
52
 
49
53
  if args.email_timeline:
50
54
  print_color_key()
@@ -96,8 +100,7 @@ def epstein_search():
96
100
  for search_term in args.positional_args:
97
101
  temp_highlighter = build_highlighter(search_term)
98
102
  search_results = epstein_files.docs_matching(search_term, args.names)
99
- console.line(2)
100
- print_subtitle_panel(f"Found {len(search_results)} documents matching '{search_term}'", padding=(0, 0, 0, 3))
103
+ print_subtitle_panel(f"Found {len(search_results)} documents matching '{search_term}'")
101
104
 
102
105
  for search_result in search_results:
103
106
  console.line()
@@ -115,11 +118,16 @@ def epstein_search():
115
118
  def epstein_show():
116
119
  """Show the color highlighted file. If --raw arg is passed, show the raw text of the file as well."""
117
120
  _assert_positional_args()
118
- ids = [extract_file_id(arg) for arg in args.positional_args]
119
- raw_docs = [Document(coerce_file_path(id)) for id in ids]
120
- docs = [document_cls(doc)(doc.file_path) for doc in raw_docs]
121
+ raw_docs: list[Document] = []
121
122
  console.line()
122
123
 
124
+ try:
125
+ ids = [extract_file_id(arg) for arg in args.positional_args]
126
+ raw_docs = [Document(coerce_file_path(id)) for id in ids]
127
+ docs = Document.sort_by_timestamp([document_cls(doc)(doc.file_path) for doc in raw_docs])
128
+ except Exception as e:
129
+ exit_with_error(str(e))
130
+
123
131
  for doc in docs:
124
132
  console.print('\n', doc, '\n')
125
133
 
@@ -9,7 +9,7 @@ from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, Document
9
9
  from epstein_files.util.constant.names import UNKNOWN
10
10
  from epstein_files.util.constants import FALLBACK_TIMESTAMP
11
11
  from epstein_files.util.doc_cfg import CommunicationCfg
12
- from epstein_files.util.highlighted_group import get_style_for_name
12
+ from epstein_files.util.highlighted_group import get_style_for_name, styled_name
13
13
  from epstein_files.util.rich import key_value_txt
14
14
 
15
15
  TIMESTAMP_SECONDS_REGEX = re.compile(r":\d{2}$")
@@ -25,10 +25,10 @@ class Communication(Document):
25
25
  return self.author or UNKNOWN
26
26
 
27
27
  def author_style(self) -> str:
28
- return get_style_for_name(self.author_or_unknown())
28
+ return get_style_for_name(self.author)
29
29
 
30
30
  def author_txt(self) -> Text:
31
- return Text(self.author_or_unknown(), style=self.author_style())
31
+ return styled_name(self.author)
32
32
 
33
33
  def external_links_txt(self, _style: str = '', include_alt_links: bool = True) -> Text:
34
34
  """Overrides super() method to apply self.author_style."""
@@ -63,7 +63,7 @@ class Document:
63
63
 
64
64
  Attributes:
65
65
  file_path (Path): Local path to file
66
- author (str | None): Who is responsible for the text in the file
66
+ author (Name): Who is responsible for the text in the file
67
67
  config (DocCfg): Information about this fil
68
68
  file_id (str): 6 digit (or 8 digits if it's a local extract file) string ID
69
69
  filename (str): File's basename
@@ -74,7 +74,7 @@ class Document:
74
74
  """
75
75
  file_path: Path
76
76
  # Optional fields
77
- author: str | None = None
77
+ author: Name = None
78
78
  config: EmailCfg | DocCfg | TextCfg | None = None
79
79
  file_id: str = field(init=False)
80
80
  filename: str = field(init=False)
@@ -88,6 +88,9 @@ class Document:
88
88
  strip_whitespace: ClassVar[bool] = True # Overridden in JsonFile
89
89
 
90
90
  def __post_init__(self):
91
+ if not self.file_path.exists():
92
+ raise FileNotFoundError(f"File '{self.file_path.name}' does not exist!")
93
+
91
94
  self.filename = self.file_path.name
92
95
  self.file_id = extract_file_id(self.filename)
93
96
  # config and url_slug could have been pre-set in Email
@@ -118,6 +121,10 @@ class Document:
118
121
  txt.append(f" because it's {DUPE_TYPE_STRS[self.config.dupe_type]} ")
119
122
  return txt.append(epstein_media_doc_link_txt(self.config.duplicate_of_id, style='royal_blue1'))
120
123
 
124
+ def duplicate_of_id(self) -> str | None:
125
+ if self.config and self.config.duplicate_of_id:
126
+ return self.config.duplicate_of_id
127
+
121
128
  def epsteinify_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
122
129
  return self.external_link(epsteinify_doc_url, style, link_txt)
123
130
 
@@ -175,7 +182,7 @@ class Document:
175
182
  return None
176
183
 
177
184
  def is_duplicate(self) -> bool:
178
- return bool(self.config and self.config.duplicate_of_id)
185
+ return bool(self.duplicate_of_id())
179
186
 
180
187
  def is_local_extract_file(self) -> bool:
181
188
  """True if extracted from other file (identifiable from filename e.g. HOUSE_OVERSIGHT_012345_1.txt)."""
@@ -20,11 +20,11 @@ from epstein_files.documents.emails.email_header import (BAD_EMAILER_REGEX, EMAI
20
20
  from epstein_files.util.constant.names import *
21
21
  from epstein_files.util.constant.strings import REDACTED
22
22
  from epstein_files.util.constants import *
23
- from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes, extract_last_name,
23
+ from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes,
24
24
  flatten, listify, remove_timezone, uniquify)
25
25
  from epstein_files.util.doc_cfg import EmailCfg, Metadata
26
26
  from epstein_files.util.file_helper import extract_file_id, file_stem_for_id
27
- from epstein_files.util.highlighted_group import get_style_for_name
27
+ from epstein_files.util.highlighted_group import JUNK_EMAILERS, get_style_for_name
28
28
  from epstein_files.util.logging import logger
29
29
  from epstein_files.util.rich import *
30
30
 
@@ -55,6 +55,7 @@ REPLY_SPLITTERS = [f"{field}:" for field in FIELD_NAMES] + [
55
55
 
56
56
  OCR_REPAIRS: dict[str | re.Pattern, str] = {
57
57
  re.compile(r'grnail\.com'): 'gmail.com',
58
+ 'Newsmax. corn': 'Newsmax.com',
58
59
  re.compile(r"^(From|To)(: )?[_1.]{5,}", re.MULTILINE): rf"\1: {REDACTED}", # Redacted email addresses
59
60
  # These 3 must come in this order!
60
61
  re.compile(r'([/vkT]|Ai|li|(I|7)v)rote:'): 'wrote:',
@@ -71,6 +72,7 @@ OCR_REPAIRS: dict[str | re.Pattern, str] = {
71
72
  # Signatures
72
73
  'BlackBerry by AT &T': 'BlackBerry by AT&T',
73
74
  'BlackBerry from T- Mobile': 'BlackBerry from T-Mobile',
75
+ 'Envoy& de mon iPhone': 'Envoyé de mon iPhone',
74
76
  "from my 'Phone": 'from my iPhone',
75
77
  'from Samsung Mob.le': 'from Samsung Mobile',
76
78
  'gJeremyRubin': '@JeremyRubin',
@@ -78,6 +80,7 @@ OCR_REPAIRS: dict[str | re.Pattern, str] = {
78
80
  'twitter glhsummers': 'twitter @lhsummers',
79
81
  re.compile(r"twitter\.com[i/][lI]krauss[1lt]"): "twitter.com/lkrauss1",
80
82
  re.compile(r'from my BlackBerry[0°] wireless device'): 'from my BlackBerry® wireless device',
83
+ re.compile(r'^INW$', re.MULTILINE): REDACTED,
81
84
  # links
82
85
  'Imps ://': 'https://',
83
86
  re.compile(r'timestopics/people/t/landon jr thomas/inde\n?x\n?\.\n?h\n?tml'): 'timestopics/people/t/landon_jr_thomas/index.html',
@@ -133,19 +136,24 @@ MAILING_LISTS = [
133
136
  JP_MORGAN_USGIO,
134
137
  ]
135
138
 
136
- TRUNCATE_ALL_EMAILS_FROM = JUNK_EMAILERS + MAILING_LISTS + [
139
+ BBC_LISTS = JUNK_EMAILERS + MAILING_LISTS
140
+
141
+ TRUNCATE_ALL_EMAILS_FROM = BBC_LISTS + [
137
142
  'Alan S Halperin',
138
143
  'Mitchell Bard',
139
144
  'Skip Rimer',
145
+ 'Steven Victor MD',
140
146
  ]
141
147
 
142
148
  TRUNCATION_LENGTHS = {
143
149
  '023627': 16_800, # Micheal Wolff article with brock pierce
144
- '030245': 7_500, # Epstein rationalizes his behavior in an open letter to the world
145
- '030781': 1_700, # Bannon email about crypto coin issues
146
- '032906': 750, # David Blaine email
150
+ '030245': None, # Epstein rationalizes his behavior in an open letter to the world
151
+ '030781': None, # Bannon email about crypto coin issues
152
+ '032906': None, # David Blaine email
147
153
  '026036': 6000, # Gino Yu blockchain mention
148
- '023208': 350_000, # Long discussion about leon black's finances
154
+ '023208': None, # Long discussion about leon black's finances
155
+ '029609': None, # Joi Ito
156
+ '025233': None, # Reputation.com discussion
149
157
  }
150
158
 
151
159
  # These are long forwarded articles so we force a trim to 1,333 chars if these strings exist
@@ -242,66 +250,15 @@ TRUNCATE_TERMS = [
242
250
  'https://www.washingtonpost.com/politics/2018/09/04/transcript-phone-call',
243
251
  ]
244
252
 
245
- # Some Paul Krassner emails have a ton of CCed parties we don't care about
246
- KRASSNER_RECIPIENTS = uniquify(flatten([ALL_FILE_CONFIGS[id].recipients for id in ['025329', '024923', '033568']]))
247
-
248
- # No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
249
- USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIPIENTS + [
250
- 'Alan Dlugash', # CCed with Richard Kahn
251
- 'Alan Rogers', # Random CC
252
- 'Andrew Friendly', # Presumably some relation of Kelly Friendly
253
- 'BS Stern', # A random fwd of email we have
254
- 'Cheryl Kleen', # Single email from Anne Boyles, displayed under Anne Boyles
255
- 'Connie Zaguirre', # Random CC
256
- 'Dan Fleuette', # CC from sean bannon
257
- 'Danny Goldberg', # Random Paul Krassner emails
258
- GERALD_LEFCOURT, # Single CC
259
- GORDON_GETTY, # Random CC
260
- JEFF_FULLER, # Random Jean Luc Brunel CC
261
- 'Jojo Fontanilla', # Random CC
262
- 'Joseph Vinciguerra', # Random CC
263
- 'Larry Cohen', # Random Bill Gates CC
264
- 'Lyn Fontanilla', # Random CC
265
- 'Mark Albert', # Random CC
266
- 'Matthew Schafer', # Random CC
267
- MICHAEL_BUCHHOLTZ, # Terry Kafka CC
268
- 'Nancy Dahl', # covered by Lawrence Krauss (her husband)
269
- 'Michael Simmons', # Random CC
270
- 'Nancy Portland', # Lawrence Krauss CC
271
- 'Oliver Goodenough', # Robert Trivers CC
272
- 'Peter Aldhous', # Lawrence Krauss CC
273
- 'Players2', # Hoffenberg CC
274
- 'Sam Harris', # Lawrence Krauss CC
275
- SAMUEL_LEFF, # Random CC
276
- 'Sean T Lehane', # Random CC
277
- 'Stephen Rubin', # Random CC
278
- 'Tim Kane', # Random CC
279
- 'Travis Pangburn', # Random CC
280
- 'Vahe Stepanian', # Random CC
281
- # Ross Gow BCC
282
- 'david.brown@thetimes.co.uk',
283
- 'io-anne.pugh@bbc.co.uk',
284
- 'martin.robinson@mailonline.co.uk',
285
- 'nick.alwav@bbc.co.uk'
286
- 'nick.sommerlad@mirror.co.uk',
287
- 'p.peachev@independent.co.uk',
288
- ]
289
-
290
- # Emails sent by epstein to himself that are just notes
291
- SELF_EMAILS_FILE_IDS = [
292
- '026677',
293
- '029752', # TODO: jokeland...
294
- '030238',
295
- # '033274', # TODO: Epstein's note to self doesn't get printed if we don't set the recipients to [None]
296
- ]
297
-
298
253
  METADATA_FIELDS = [
299
254
  'is_junk_mail',
255
+ 'is_mailing_list',
300
256
  'recipients',
301
257
  'sent_from_device',
302
258
  'subject',
303
259
  ]
304
260
 
261
+ # Note the line repair happens *after* 'Importance: High' is removed
305
262
  LINE_REPAIR_MERGES = {
306
263
  '017523': 4,
307
264
  '019407': [2, 4],
@@ -309,9 +266,14 @@ LINE_REPAIR_MERGES = {
309
266
  '022673': 9,
310
267
  '022684': 9,
311
268
  '022695': 4,
269
+ '029773': [2, 5],
312
270
  '023067': 3,
313
271
  '025790': 2,
272
+ '029841': 3,
273
+ '026345': 3,
314
274
  '026609': 4,
275
+ '033299': 3,
276
+ '026829': 3,
315
277
  '026924': [2, 4],
316
278
  '028931': [3, 6],
317
279
  '029154': [2, 5],
@@ -322,6 +284,7 @@ LINE_REPAIR_MERGES = {
322
284
  '029501': 2,
323
285
  '029835': [2, 4],
324
286
  '029889': 2,
287
+ '029545': [3, 5],
325
288
  '029976': 3,
326
289
  '030299': [7, 10],
327
290
  '030381': [2, 4],
@@ -337,6 +300,7 @@ LINE_REPAIR_MERGES = {
337
300
  '032405': 4,
338
301
  '033097': 2,
339
302
  '033144': [2, 4],
303
+ '033217': 3,
340
304
  '033228': [3, 5],
341
305
  '033357': [2, 4],
342
306
  '033486': [7, 9],
@@ -354,14 +318,14 @@ class Email(Communication):
354
318
  actual_text (str) - best effort at the text actually sent in this email, excluding quoted replies and forwards
355
319
  config (EmailCfg | None) - manual config for this email (if it exists)
356
320
  header (EmailHeader) - header data extracted from the text (from/to/sent/subject etc)
357
- recipients (list[str | None]) - who this email was sent to
321
+ recipients (list[Name]) - who this email was sent to
358
322
  sent_from_device (str | None) - "Sent from my iPhone" style signature (if it exists)
359
323
  signature_substitution_counts (dict[str, int]) - count of how many times a signature was replaced with <...snipped...> for each participant
360
324
  """
361
325
  actual_text: str = field(init=False)
362
326
  config: EmailCfg | None = None
363
327
  header: EmailHeader = field(init=False)
364
- recipients: list[str | None] = field(default_factory=list)
328
+ recipients: list[Name] = field(default_factory=list)
365
329
  sent_from_device: str | None = None
366
330
  signature_substitution_counts: dict[str, int] = field(default_factory=dict) # defaultdict breaks asdict :(
367
331
 
@@ -382,25 +346,21 @@ class Email(Communication):
382
346
 
383
347
  super().__post_init__()
384
348
 
385
- try:
386
- if self.config and self.config.recipients:
387
- self.recipients = self.config.recipients
388
- else:
389
- for recipient in self.header.recipients():
390
- self.recipients.extend(self._extract_emailer_names(recipient))
391
-
392
- if self.author in MAILING_LISTS and (len(self.recipients) == 0 or self.recipients == [self.author]):
393
- self.recipients = [JEFFREY_EPSTEIN] # Assume mailing list emails are to Epstein
394
- except Exception as e:
395
- console.print_exception()
396
- console.line(2)
397
- logger.fatal(f"Failed on {self.file_id}")
398
- console.line(2)
399
- raise e
400
-
401
- # Remove self CCs
402
- recipients = [r for r in self.recipients if r != self.author or self.file_id in SELF_EMAILS_FILE_IDS]
403
- self.recipients = list(set(recipients))
349
+ if self.config and self.config.recipients:
350
+ self.recipients = self.config.recipients
351
+ else:
352
+ for recipient in self.header.recipients():
353
+ self.recipients.extend(self._extract_emailer_names(recipient))
354
+
355
+ # Assume mailing list emails are to Epstein
356
+ if self.author in BBC_LISTS and (self.is_note_to_self() or not self.recipients):
357
+ self.recipients = [JEFFREY_EPSTEIN]
358
+
359
+ # Remove self CCs but preserve self emails
360
+ if not self.is_note_to_self():
361
+ self.recipients = [r for r in self.recipients if r != self.author]
362
+
363
+ self.recipients = sorted(list(set(self.recipients)), key=lambda r: r or UNKNOWN)
404
364
  self.text = self._prettify_text()
405
365
  self.actual_text = self._actual_text()
406
366
  self.sent_from_device = self._sent_from_device()
@@ -410,18 +370,30 @@ class Email(Communication):
410
370
 
411
371
  def info_txt(self) -> Text:
412
372
  email_type = 'fwded article' if self.is_fwded_article() else 'email'
413
- txt = Text(f"OCR text of {email_type} from ", style='grey46').append(self.author_txt()).append(' to ')
414
- return txt.append(self.recipients_txt()).append(highlighter(f" probably sent at {self.timestamp}"))
373
+ txt = Text(f"OCR text of {email_type} from ", style='grey46').append(self.author_txt())
374
+
375
+ if self.config and self.config.is_attribution_uncertain:
376
+ txt.append(f" {QUESTION_MARKS}", style=self.author_style())
377
+
378
+ txt.append(' to ').append(self.recipients_txt())
379
+ return txt.append(highlighter(f" probably sent at {self.timestamp}"))
415
380
 
416
381
  def is_fwded_article(self) -> bool:
417
382
  return bool(self.config and self.config.is_fwded_article)
418
383
 
419
384
  def is_junk_mail(self) -> bool:
420
- return self.author in JUNK_EMAILERS or self.author in MAILING_LISTS
385
+ return self.author in JUNK_EMAILERS
386
+
387
+ def is_mailing_list(self) -> bool:
388
+ return self.author in MAILING_LISTS or self.is_junk_mail()
389
+
390
+ def is_note_to_self(self) -> bool:
391
+ return self.recipients == [self.author]
421
392
 
422
393
  def metadata(self) -> Metadata:
423
394
  local_metadata = asdict(self)
424
395
  local_metadata['is_junk_mail'] = self.is_junk_mail()
396
+ local_metadata['is_mailing_list'] = self.is_junk_mail()
425
397
  local_metadata['subject'] = self.subject() or None
426
398
  metadata = super().metadata()
427
399
  metadata.update({k: v for k, v in local_metadata.items() if v and k in METADATA_FIELDS})
@@ -438,7 +410,10 @@ class Email(Communication):
438
410
  ], join=', ')
439
411
 
440
412
  def subject(self) -> str:
441
- return self.header.subject or ''
413
+ if self.config and self.config.subject:
414
+ return self.config.subject
415
+ else:
416
+ return self.header.subject or ''
442
417
 
443
418
  def summary(self) -> Text:
444
419
  """One line summary mostly for logging."""
@@ -489,11 +464,8 @@ class Email(Communication):
489
464
 
490
465
  def _border_style(self) -> str:
491
466
  """Color emails from epstein to others with the color for the first recipient."""
492
- if self.author == JEFFREY_EPSTEIN:
493
- if len(self.recipients) == 0 or self.recipients == [None]:
494
- style = self.author_style()
495
- else:
496
- style = get_style_for_name(self.recipients[0])
467
+ if self.author == JEFFREY_EPSTEIN and len(self.recipients) > 0:
468
+ style = get_style_for_name(self.recipients[0])
497
469
  else:
498
470
  style = self.author_style()
499
471
 
@@ -541,6 +513,8 @@ class Email(Communication):
541
513
  self.log_top_lines(msg='No email header match found!', level=log_level)
542
514
  self.header = EmailHeader(field_names=[])
543
515
 
516
+ logger.debug(f"{self.file_id} extracted header\n\n{self.header}\n")
517
+
544
518
  def _extract_timestamp(self) -> datetime:
545
519
  if self.config and self.config.timestamp:
546
520
  return self.config.timestamp
@@ -665,6 +639,9 @@ class Email(Communication):
665
639
  elif self.file_id in ['025329']:
666
640
  for _i in range(9):
667
641
  self._merge_lines(2)
642
+ elif self.file_id in ['025812']:
643
+ for _i in range(2):
644
+ self._merge_lines(3)
668
645
  elif self.file_id == '014860':
669
646
  self._merge_lines(3)
670
647
  self._merge_lines(4)
@@ -763,7 +740,7 @@ class Email(Communication):
763
740
  if args.whole_file:
764
741
  num_chars = len(self.text)
765
742
  elif self.file_id in TRUNCATION_LENGTHS:
766
- num_chars = TRUNCATION_LENGTHS[self.file_id]
743
+ num_chars = TRUNCATION_LENGTHS[self.file_id] or self.file_size()
767
744
  elif self.author in TRUNCATE_ALL_EMAILS_FROM or includes_truncate_term:
768
745
  num_chars = int(MAX_CHARS_TO_PRINT / 3)
769
746
  elif quote_cutoff and quote_cutoff < MAX_CHARS_TO_PRINT:
@@ -830,26 +807,47 @@ class Email(Communication):
830
807
  self.log_top_lines(self.header.num_header_rows + 4, f'Original header:')
831
808
 
832
809
  @staticmethod
833
- def build_emails_table(emails: list['Email'], _author: str | None, include_title: bool = False) -> Table:
834
- """Turn a set of Emails to/from a given _author into a Table."""
835
- author = _author or UNKNOWN
836
-
837
- table = Table(
838
- title=f"Emails to/from {author} starting {emails[0].timestamp.date()}" if include_title else None,
839
- border_style=get_style_for_name(author, allow_bold=False),
840
- header_style="bold"
810
+ def build_emails_table(emails: list['Email'], name: Name = '', title: str = '', show_length: bool = False) -> Table:
811
+ """Turn a set of Emails into a Table."""
812
+ if title and name:
813
+ raise ValueError(f"Can't provide both 'author' and 'title' args")
814
+ elif name == '' and title == '':
815
+ raise ValueError(f"Must provide either 'author' or 'title' arg")
816
+
817
+ author_style = get_style_for_name(name, allow_bold=False)
818
+ link_style = author_style if name else ARCHIVE_LINK_COLOR
819
+ min_width = len(name or UNKNOWN)
820
+ max_width = max(20, min_width)
821
+
822
+ columns = [
823
+ {'name': 'Sent At', 'justify': 'left', 'style': TIMESTAMP_DIM},
824
+ {'name': 'From', 'justify': 'left', 'min_width': min_width, 'max_width': max_width},
825
+ {'name': 'To', 'justify': 'left', 'min_width': min_width, 'max_width': max_width + 2},
826
+ {'name': 'Length', 'justify': 'right', 'style': 'wheat4'},
827
+ {'name': 'Subject', 'justify': 'left', 'min_width': 35, 'style': 'honeydew2'},
828
+ ]
829
+
830
+ table = build_table(
831
+ title or None,
832
+ cols=[col for col in columns if show_length or col['name'] not in ['Length']],
833
+ border_style=DEFAULT_TABLE_KWARGS['border_style'] if title else author_style,
834
+ header_style="bold",
835
+ highlight=True,
841
836
  )
842
837
 
843
- table.add_column('From', justify='left')
844
- table.add_column('Timestamp', justify='center')
845
- table.add_column('Subject', justify='left', style='honeydew2', min_width=70)
846
-
847
838
  for email in emails:
848
- table.add_row(
839
+ fields = [
840
+ email.epstein_media_link(link_txt=email.timestamp_without_seconds(), style=link_style),
849
841
  email.author_txt(),
850
- email.epstein_media_link(link_txt=email.timestamp_without_seconds()),
851
- highlighter(email.subject())
852
- )
842
+ email.recipients_txt(max_full_names=1),
843
+ f"{email.length()}",
844
+ email.subject(),
845
+ ]
846
+
847
+ if not show_length:
848
+ del fields[3]
849
+
850
+ table.add_row(*fields)
853
851
 
854
852
  return table
855
853
 
@@ -8,13 +8,13 @@ from epstein_files.util.doc_cfg import EmailCfg
8
8
  from epstein_files.util.logging import logger
9
9
  from epstein_files.util.rich import UNKNOWN
10
10
 
11
- FIELD_NAMES = ['From', 'Date', 'Sent', 'Subject']
11
+ FIELD_NAMES = ['Date', 'From', 'Sent', 'Subject']
12
12
  NON_HEADER_FIELDS = ['field_names', 'num_header_rows', 'was_initially_empty']
13
13
  ON_BEHALF_OF = 'on behalf of'
14
14
  TO_FIELDS = ['bcc', 'cc', 'to']
15
15
  EMAILER_FIELDS = [AUTHOR] + TO_FIELDS
16
16
 
17
- HEADER_REGEX_STR = r'(((?:(?:Date|From|Sent|To|C[cC]|Importance|Subject|Bee|B[cC]{2}|Attachments):|on behalf of ?)(?! +(by |from my|via )).*\n){3,})'
17
+ HEADER_REGEX_STR = r'(((?:(?:Date|From|Sent|To|C[cC]|Importance|Subject|Bee|B[cC]{2}|Attachments|Classification|Flag):|on behalf of ?)(?! +(by |from my|via )).*\n){3,})'
18
18
  EMAIL_SIMPLE_HEADER_REGEX = re.compile(rf'^{HEADER_REGEX_STR}')
19
19
  EMAIL_SIMPLE_HEADER_LINE_BREAK_REGEX = re.compile(HEADER_REGEX_STR)
20
20
  EMAIL_PRE_FORWARD_REGEX = re.compile(r"(.{3,2000}?)" + HEADER_REGEX_STR, re.DOTALL) # Match up to the next email header section
@@ -41,6 +41,8 @@ class EmailHeader:
41
41
  subject: str | None = None
42
42
  bcc: list[str] | None = None
43
43
  cc: list[str] | None = None
44
+ classification: str | None = None
45
+ flag: str | None = None
44
46
  importance: str | None = None
45
47
  attachments: str | None = None
46
48
  to: list[str] | None = None
@@ -4,38 +4,35 @@ from datetime import datetime
4
4
 
5
5
  from rich.text import Text
6
6
 
7
- from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN
7
+ from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN, Name, extract_last_name
8
8
  from epstein_files.util.constant.strings import TIMESTAMP_DIM
9
- from epstein_files.util.data import extract_last_name, iso_timestamp
9
+ from epstein_files.util.data import iso_timestamp
10
10
  from epstein_files.util.highlighted_group import get_style_for_name
11
11
  from epstein_files.util.logging import logger
12
12
  from epstein_files.util.rich import TEXT_LINK, highlighter
13
13
 
14
+ EPSTEIN_TEXTERS = ['e:', 'e:jeeitunes@gmail.com']
14
15
  MSG_DATE_FORMAT = r"%m/%d/%y %I:%M:%S %p"
15
16
  PHONE_NUMBER_REGEX = re.compile(r'^[\d+]+.*')
17
+ UNCERTAIN_SUFFIX = ' (?)'
16
18
 
17
19
  DISPLAY_LAST_NAME_ONLY = [
18
20
  JEFFREY_EPSTEIN,
19
21
  STEVE_BANNON,
20
22
  ]
21
23
 
22
- TEXTER_MAPPING = {
23
- 'e:': JEFFREY_EPSTEIN,
24
- 'e:jeeitunes@gmail.com': JEFFREY_EPSTEIN,
25
- }
26
-
27
24
 
28
25
  @dataclass(kw_only=True)
29
26
  class TextMessage:
30
27
  """Class representing a single iMessage text message."""
31
- author: str | None
28
+ author: Name
32
29
  author_str: str = ''
33
30
  is_id_confirmed: bool = False
34
31
  text: str
35
32
  timestamp_str: str
36
33
 
37
34
  def __post_init__(self):
38
- self.author = TEXTER_MAPPING.get(self.author or UNKNOWN, self.author)
35
+ self.author = JEFFREY_EPSTEIN if self.author in EPSTEIN_TEXTERS else self.author
39
36
 
40
37
  if not self.author:
41
38
  self.author_str = UNKNOWN
@@ -45,7 +42,7 @@ class TextMessage:
45
42
  self.author_str = self.author_str or self.author
46
43
 
47
44
  if not self.is_id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
48
- self.author_str += ' (?)'
45
+ self.author_str += UNCERTAIN_SUFFIX
49
46
 
50
47
  if self.is_link():
51
48
  self.text = self.text.replace('\n', '').replace(' ', '_')
@@ -59,12 +56,11 @@ class TextMessage:
59
56
  return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
60
57
 
61
58
  def timestamp_txt(self) -> Text:
62
- timestamp_str = self.timestamp_str
63
-
64
59
  try:
65
60
  timestamp_str = iso_timestamp(self.parse_timestamp())
66
61
  except Exception as e:
67
62
  logger.warning(f"Failed to parse timestamp for {self}")
63
+ timestamp_str = self.timestamp_str
68
64
 
69
65
  return Text(f"[{timestamp_str}]", style=TIMESTAMP_DIM)
70
66
 
@@ -10,11 +10,11 @@ from rich.text import Text
10
10
 
11
11
  from epstein_files.documents.communication import Communication
12
12
  from epstein_files.documents.imessage.text_message import TextMessage
13
- from epstein_files.util.constant.names import JEFFREY_EPSTEIN, UNKNOWN
13
+ from epstein_files.util.constant.names import JEFFREY_EPSTEIN, Name
14
14
  from epstein_files.util.constant.strings import AUTHOR, TIMESTAMP_STYLE
15
15
  from epstein_files.util.data import days_between, days_between_str, iso_timestamp, sort_dict
16
16
  from epstein_files.util.doc_cfg import Metadata, TextCfg
17
- from epstein_files.util.highlighted_group import get_style_for_name
17
+ from epstein_files.util.highlighted_group import styled_name
18
18
  from epstein_files.util.logging import logger
19
19
  from epstein_files.util.rich import LAST_TIMESTAMP_STYLE, build_table, highlighter
20
20
 
@@ -35,7 +35,7 @@ class MessengerLog(Communication):
35
35
  super().__post_init__()
36
36
  self.messages = [self._build_message(match) for match in MSG_REGEX.finditer(self.text)]
37
37
 
38
- def first_message_at(self, name: str | None) -> datetime:
38
+ def first_message_at(self, name: Name) -> datetime:
39
39
  return self.messages_by(name)[0].parse_timestamp()
40
40
 
41
41
  def info_txt(self) -> Text | None:
@@ -54,10 +54,10 @@ class MessengerLog(Communication):
54
54
 
55
55
  return txt.append(')')
56
56
 
57
- def last_message_at(self, name: str | None) -> datetime:
57
+ def last_message_at(self, name: Name) -> datetime:
58
58
  return self.messages_by(name)[-1].parse_timestamp()
59
59
 
60
- def messages_by(self, name: str | None) -> list[TextMessage]:
60
+ def messages_by(self, name: Name) -> list[TextMessage]:
61
61
  """Return all messages by 'name'."""
62
62
  return [m for m in self.messages if m.author == name]
63
63
 
@@ -129,9 +129,9 @@ class MessengerLog(Communication):
129
129
  yield message
130
130
 
131
131
  @classmethod
132
- def count_authors(cls, imessage_logs: list['MessengerLog']) -> dict[str | None, int]:
132
+ def count_authors(cls, imessage_logs: list['MessengerLog']) -> dict[Name, int]:
133
133
  """Count up how many texts were sent by each author."""
134
- sender_counts: dict[str | None, int] = defaultdict(int)
134
+ sender_counts: dict[Name, int] = defaultdict(int)
135
135
 
136
136
  for message_log in imessage_logs:
137
137
  for message in message_log.messages:
@@ -160,7 +160,7 @@ class MessengerLog(Communication):
160
160
  last_at = logs[-1].first_message_at(name)
161
161
 
162
162
  counts_table.add_row(
163
- Text(name or UNKNOWN, get_style_for_name(name)),
163
+ styled_name(name),
164
164
  str(len(logs)),
165
165
  f"{count:,}",
166
166
  iso_timestamp(first_at),