epstein-files 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
epstein_files/__init__.py CHANGED
@@ -19,10 +19,10 @@ from epstein_files.documents.email import Email
19
19
  from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, CHRONOLOGICAL_EMAILS_PATH, TEXT_MSGS_HTML_PATH, make_clean
20
20
  from epstein_files.util.env import args
21
21
  from epstein_files.util.file_helper import coerce_file_path, extract_file_id
22
- from epstein_files.util.logging import logger
22
+ from epstein_files.util.logging import exit_with_error, logger
23
23
  from epstein_files.util.output import (print_emails_section, print_json_files, print_json_stats,
24
24
  print_other_files_section, print_text_messages_section, write_complete_emails_timeline, write_json_metadata, write_urls)
25
- from epstein_files.util.rich import build_highlighter, console, print_title_page_header, print_title_page_tables, print_panel, write_html
25
+ from epstein_files.util.rich import build_highlighter, console, print_color_key, print_title_page_header, print_title_page_tables, print_subtitle_panel, write_html
26
26
  from epstein_files.util.timer import Timer
27
27
  from epstein_files.util.word_count import write_word_counts_html
28
28
 
@@ -45,15 +45,18 @@ def generate_html() -> None:
45
45
 
46
46
  print_title_page_header(epstein_files)
47
47
 
48
- if not args.email_timeline:
48
+ if args.email_timeline:
49
+ print_color_key()
50
+ else:
49
51
  print_title_page_tables(epstein_files)
50
52
 
51
53
  if args.colors_only:
52
54
  exit()
53
55
 
54
56
  if args.output_texts:
55
- print_text_messages_section(epstein_files)
56
- timer.print_at_checkpoint(f'Printed {len(epstein_files.imessage_logs)} text message logs')
57
+ imessage_logs = [log for log in epstein_files.imessage_logs if not args.names or log.author in args.names]
58
+ print_text_messages_section(imessage_logs)
59
+ timer.print_at_checkpoint(f'Printed {len(imessage_logs)} text message log files')
57
60
 
58
61
  if args.output_emails:
59
62
  emails_that_were_printed = print_emails_section(epstein_files)
@@ -101,15 +104,12 @@ def epstein_search():
101
104
  temp_highlighter = build_highlighter(search_term)
102
105
  search_results = epstein_files.docs_matching(search_term, args.names)
103
106
  console.line(2)
104
- print_panel(f"Found {len(search_results)} documents matching '{search_term}'", padding=(0, 0, 0, 3))
107
+ print_subtitle_panel(f"Found {len(search_results)} documents matching '{search_term}'", padding=(0, 0, 0, 3))
105
108
 
106
109
  for search_result in search_results:
107
110
  console.line()
108
111
 
109
112
  if args.whole_file:
110
- if isinstance(search_result.document, Email):
111
- search_result.document._truncation_allowed = False
112
-
113
113
  console.print(search_result.document)
114
114
  else:
115
115
  console.print(search_result.document.summary_panel())
@@ -128,9 +128,6 @@ def epstein_show():
128
128
  console.line()
129
129
 
130
130
  for doc in docs:
131
- if isinstance(doc, Email):
132
- doc._truncation_allowed = False
133
-
134
131
  console.print('\n', doc, '\n')
135
132
 
136
133
  if args.raw:
@@ -148,5 +145,4 @@ def epstein_word_count() -> None:
148
145
 
149
146
  def _assert_positional_args():
150
147
  if not args.positional_args:
151
- console.print(f"\n ERROR: No positional args!\n", style='red1')
152
- exit(1)
148
+ exit_with_error(f"No positional args provided!\n")
@@ -18,25 +18,24 @@ TIMESTAMP_SECONDS_REGEX = re.compile(r":\d{2}$")
18
18
  @dataclass
19
19
  class Communication(Document):
20
20
  """Superclass for Email and MessengerLog."""
21
- author_style: str = 'white'
22
- author_txt: Text = field(init=False)
23
21
  config: CommunicationCfg | None = None
24
22
  timestamp: datetime = FALLBACK_TIMESTAMP # TODO this default sucks (though it never happens)
25
23
 
26
- def __post_init__(self):
27
- super().__post_init__()
28
- self.author_style = get_style_for_name(self.author_or_unknown())
29
- self.author_txt = Text(self.author_or_unknown(), style=self.author_style)
30
-
31
24
  def author_or_unknown(self) -> str:
32
25
  return self.author or UNKNOWN
33
26
 
34
- def is_attribution_uncertain(self) -> bool:
35
- return bool(self.config and self.config.is_attribution_uncertain)
27
+ def author_style(self) -> str:
28
+ return get_style_for_name(self.author_or_unknown())
29
+
30
+ def author_txt(self) -> Text:
31
+ return Text(self.author_or_unknown(), style=self.author_style())
36
32
 
37
33
  def external_links_txt(self, _style: str = '', include_alt_links: bool = True) -> Text:
38
34
  """Overrides super() method to apply self.author_style."""
39
- return super().external_links_txt(self.author_style, include_alt_links=include_alt_links)
35
+ return super().external_links_txt(self.author_style(), include_alt_links=include_alt_links)
36
+
37
+ def is_attribution_uncertain(self) -> bool:
38
+ return bool(self.config and self.config.is_attribution_uncertain)
40
39
 
41
40
  def summary(self) -> Text:
42
41
  return self._summary().append(CLOSE_PROPERTIES_CHAR)
@@ -47,7 +46,4 @@ class Communication(Document):
47
46
  def _summary(self) -> Text:
48
47
  """One line summary mostly for logging."""
49
48
  txt = super().summary().append(', ')
50
- return txt.append(key_value_txt('author', Text(f"'{self.author_or_unknown()}'", style=self.author_style)))
51
-
52
-
53
- CommunicationType = TypeVar('CommunicationType', bound=Document)
49
+ return txt.append(key_value_txt('author', Text(f"'{self.author_or_unknown()}'", style=self.author_style())))
@@ -251,7 +251,7 @@ class Document:
251
251
  def summary(self) -> Text:
252
252
  """Summary of this file for logging. Brackets are left open for subclasses to add stuff."""
253
253
  txt = Text('').append(self._class_name(), style=self._class_style())
254
- txt.append(f" {self.url_slug}", style=FILENAME_STYLE)
254
+ txt.append(f" {self.file_path.stem}", style=FILENAME_STYLE)
255
255
 
256
256
  if self.timestamp:
257
257
  timestamp_str = remove_zero_time_from_timestamp_str(self.timestamp).replace('T', ' ')
@@ -1,3 +1,4 @@
1
+ import json
1
2
  import logging
2
3
  import re
3
4
  from copy import deepcopy
@@ -20,7 +21,7 @@ from epstein_files.util.constant.names import *
20
21
  from epstein_files.util.constant.strings import REDACTED
21
22
  from epstein_files.util.constants import *
22
23
  from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes, extract_last_name,
23
- flatten, remove_timezone, uniquify)
24
+ flatten, listify, remove_timezone, uniquify)
24
25
  from epstein_files.util.doc_cfg import EmailCfg, Metadata
25
26
  from epstein_files.util.file_helper import extract_file_id, file_stem_for_id
26
27
  from epstein_files.util.highlighted_group import get_style_for_name
@@ -42,7 +43,7 @@ LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
42
43
  SUPPRESS_LOGS_FOR_AUTHORS = ['Undisclosed recipients:', 'undisclosed-recipients:', 'Multiple Senders Multiple Senders']
43
44
  REWRITTEN_HEADER_MSG = "(janky OCR header fields were prettified, check source if something seems off)"
44
45
  URL_SIGNIFIERS = ['gclid', 'htm', 'ref=', 'utm']
45
- APPEARS_IN = 'Appears in'
46
+ APPEARS_IN = 'appears in'
46
47
  MAX_CHARS_TO_PRINT = 4000
47
48
  MAX_NUM_HEADER_LINES = 14
48
49
  MAX_QUOTED_REPLIES = 2
@@ -152,6 +153,8 @@ TRUNCATION_LENGTHS = {
152
153
  '030245': 7_500, # Epstein rationalizes his behavior in an open letter to the world
153
154
  '030781': 1_700, # Bannon email about crypto coin issues
154
155
  '032906': 750, # David Blaine email
156
+ '026036': 6000, # Gino Yu blockchain mention
157
+ '023208': 350_000, # Long discussion about leon black's finances
155
158
  }
156
159
 
157
160
  # These are long forwarded articles so we force a trim to 1,333 chars if these strings exist
@@ -276,6 +279,7 @@ USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIP
276
279
  'Nancy Portland', # Lawrence Krauss CC
277
280
  'Oliver Goodenough', # Robert Trivers CC
278
281
  'Peter Aldhous', # Lawrence Krauss CC
282
+ 'Players2', # Hoffenberg CC
279
283
  'Sam Harris', # Lawrence Krauss CC
280
284
  SAMUEL_LEFF, # Random CC
281
285
  'Sean T Lehane', # Random CC
@@ -283,6 +287,13 @@ USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIP
283
287
  'Tim Kane', # Random CC
284
288
  'Travis Pangburn', # Random CC
285
289
  'Vahe Stepanian', # Random CC
290
+ # Ross Gow BCC
291
+ 'david.brown@thetimes.co.uk',
292
+ 'io-anne.pugh@bbc.co.uk',
293
+ 'martin.robinson@mailonline.co.uk',
294
+ 'nick.alwav@bbc.co.uk'
295
+ 'nick.sommerlad@mirror.co.uk',
296
+ 'p.peachev@independent.co.uk',
286
297
  ]
287
298
 
288
299
  # Emails sent by epstein to himself that are just notes
@@ -300,6 +311,50 @@ METADATA_FIELDS = [
300
311
  'subject',
301
312
  ]
302
313
 
314
+ LINE_REPAIR_MERGES = {
315
+ '017523': 4,
316
+ '019407': [2, 4],
317
+ '021729': 2,
318
+ '022673': 9,
319
+ '022684': 9,
320
+ '022695': 4,
321
+ '023067': 3,
322
+ '025790': 2,
323
+ '026609': 4,
324
+ '026924': [2, 4],
325
+ '028931': [3, 6],
326
+ '029154': [2, 5],
327
+ '029163': [2, 5],
328
+ '029282': 2,
329
+ '029402': 5,
330
+ '029498': 2,
331
+ '029501': 2,
332
+ '029835': [2, 4],
333
+ '029889': 2,
334
+ '029976': 3,
335
+ '030299': [7, 10],
336
+ '030381': [2, 4],
337
+ '030384': [2, 4],
338
+ '030626': 2,
339
+ '030999': [2, 4],
340
+ '031384': 2,
341
+ '031428': 2,
342
+ '031442': 0,
343
+ '031980': [2, 4],
344
+ '032063': [3, 5],
345
+ '032272': 3,
346
+ '032405': 4,
347
+ '033097': 2,
348
+ '033144': [2, 4],
349
+ '033228': [3, 5],
350
+ '033357': [2, 4],
351
+ '033486': [7, 9],
352
+ '033512': 2,
353
+ '033575': [2, 4],
354
+ '033576': 3,
355
+ '033583': 2,
356
+ }
357
+
303
358
 
304
359
  @dataclass
305
360
  class Email(Communication):
@@ -318,7 +373,6 @@ class Email(Communication):
318
373
  recipients: list[str | None] = field(default_factory=list)
319
374
  sent_from_device: str | None = None
320
375
  signature_substitution_counts: dict[str, int] = field(default_factory=dict) # defaultdict breaks asdict :(
321
- _truncation_allowed: bool = True # Hacky way to get __rich_console__() not to truncate in epstein_show script
322
376
 
323
377
  # For logging how many headers we prettified while printing, kind of janky
324
378
  rewritten_header_ids: ClassVar[set[str]] = set([])
@@ -342,7 +396,7 @@ class Email(Communication):
342
396
  self.recipients = self.config.recipients
343
397
  else:
344
398
  for recipient in self.header.recipients():
345
- self.recipients.extend(self._emailer_names(recipient))
399
+ self.recipients.extend(self._extract_emailer_names(recipient))
346
400
 
347
401
  if self.author in MAILING_LISTS and (len(self.recipients) == 0 or self.recipients == [self.author]):
348
402
  self.recipients = [JEFFREY_EPSTEIN] # Assume mailing list emails are to Epstein
@@ -365,7 +419,7 @@ class Email(Communication):
365
419
 
366
420
  def info_txt(self) -> Text:
367
421
  email_type = 'fwded article' if self.is_fwded_article() else 'email'
368
- txt = Text(f"OCR text of {email_type} from ", style='grey46').append(self.author_txt).append(' to ')
422
+ txt = Text(f"OCR text of {email_type} from ", style='grey46').append(self.author_txt()).append(' to ')
369
423
  return txt.append(self.recipients_txt()).append(highlighter(f" probably sent at {self.timestamp}"))
370
424
 
371
425
  def is_fwded_article(self) -> bool:
@@ -446,15 +500,23 @@ class Email(Communication):
446
500
  """Color emails from epstein to others with the color for the first recipient."""
447
501
  if self.author == JEFFREY_EPSTEIN:
448
502
  if len(self.recipients) == 0 or self.recipients == [None]:
449
- style = self.author_style
503
+ style = self.author_style()
450
504
  else:
451
505
  style = get_style_for_name(self.recipients[0])
452
506
  else:
453
- style = self.author_style
507
+ style = self.author_style()
454
508
 
455
509
  return style.replace('bold', '').strip()
456
510
 
457
- def _emailer_names(self, emailer_str: str) -> list[str]:
511
+ def _extract_author(self) -> None:
512
+ self._extract_header()
513
+ super()._extract_author()
514
+
515
+ if not self.author and self.header.author:
516
+ authors = self._extract_emailer_names(self.header.author)
517
+ self.author = authors[0] if (len(authors) > 0 and authors[0]) else None
518
+
519
+ def _extract_emailer_names(self, emailer_str: str) -> list[str]:
458
520
  """Return a list of people's names found in 'emailer_str' (email author or recipients field)."""
459
521
  emailer_str = EmailHeader.cleanup_str(emailer_str)
460
522
 
@@ -474,14 +536,6 @@ class Email(Communication):
474
536
  names_found = names_found or [emailer_str]
475
537
  return [_reverse_first_and_last_names(name) for name in names_found]
476
538
 
477
- def _extract_author(self) -> None:
478
- self._extract_header()
479
- super()._extract_author()
480
-
481
- if not self.author and self.header.author:
482
- authors = self._emailer_names(self.header.author)
483
- self.author = authors[0] if (len(authors) > 0 and authors[0]) else None
484
-
485
539
  def _extract_header(self) -> None:
486
540
  """Extract an EmailHeader object from the OCR text."""
487
541
  header_match = EMAIL_SIMPLE_HEADER_REGEX.search(self.text)
@@ -579,44 +633,47 @@ class Email(Communication):
579
633
  self._set_computed_fields(lines=[line for line in self.lines if not BAD_LINE_REGEX.match(line)])
580
634
  old_text = self.text
581
635
 
582
- if self.file_id in ['031442']:
583
- self._merge_lines(0) # Merge 1st and 2nd rows
584
- elif self.file_id in '021729 025790 029282 029501 029889 030626 031384 031428 033097 033512 033583 029498 033583'.split():
585
- self._merge_lines(2) # Merge 3rd and 4th rows
636
+ if self.file_id in LINE_REPAIR_MERGES:
637
+ merge = LINE_REPAIR_MERGES[self.file_id]
638
+ merge_args = merge if isinstance(merge, list) else [merge]
639
+ self._merge_lines(*merge_args)
586
640
 
587
- if self.file_id in ['030626']: # Merge 6th and 7th (now 5th and 6th) rows
588
- self._merge_lines(4)
589
- elif self.file_id == '029889':
590
- self._merge_lines(2, 5)
591
- elif self.file_id in ['029498', '031428']:
592
- self._merge_lines(2, 4)
593
- elif self.file_id in ['029976', '023067', '033576']:
594
- self._merge_lines(3) # Merge 4th and 5th rows
595
- elif self.file_id in '026609 029402 032405 022695'.split():
596
- self._merge_lines(4) # Merge 5th and 6th rows
597
- elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381', '033357', '026924']:
598
- self._merge_lines(2, 4)
599
- elif self.file_id in ['029154', '029163']:
641
+ # These already had 2nd line merged
642
+ if self.file_id in ['030626']: # Merge 6th and 7th (now 5th and 6th) rows
643
+ self._merge_lines(4)
644
+ elif self.file_id == '029889':
600
645
  self._merge_lines(2, 5)
601
- elif self.file_id in ['033228', '032063']:
602
- self._merge_lines(3, 5)
603
- elif self.file_id == '028931':
604
- self._merge_lines(3, 6)
605
- elif self.file_id == '013415':
646
+ elif self.file_id in ['029498', '031428']:
647
+ self._merge_lines(2, 4)
648
+
649
+ # Multiline
650
+ if self.file_id == '013415':
651
+ for _i in range(2):
652
+ self._merge_lines(4)
653
+ elif self.file_id == '013405':
654
+ for _i in range(2):
655
+ self._merge_lines(4)
656
+ elif self.file_id == '029458':
657
+ for _i in range(3):
658
+ self._merge_lines(4)
659
+ elif self.file_id in ['025233']:
606
660
  for _i in range(2):
607
661
  self._merge_lines(4)
662
+
663
+ self.lines[4] = f"Attachments: {self.lines[4]}"
664
+ self._set_computed_fields(lines=self.lines)
665
+ elif self.file_id in ['023001']:
666
+ for _i in range(3):
667
+ self._merge_lines(5)
668
+ elif self.file_id in ['019105']:
669
+ for _i in range(4):
670
+ self._merge_lines(5)
608
671
  elif self.file_id in ['033568']:
609
672
  for _i in range(5):
610
673
  self._merge_lines(5)
611
674
  elif self.file_id in ['025329']:
612
675
  for _i in range(9):
613
676
  self._merge_lines(2)
614
- elif self.file_id == '033486':
615
- self._merge_lines(7, 9)
616
- elif self.file_id == '030299':
617
- self._merge_lines(7, 10)
618
- elif self.file_id in ['022673', '022684']:
619
- self._merge_lines(9)
620
677
  elif self.file_id == '014860':
621
678
  self._merge_lines(3)
622
679
  self._merge_lines(4)
@@ -629,7 +686,15 @@ class Email(Communication):
629
686
 
630
687
  self._merge_lines(4)
631
688
  self._merge_lines(2, 4)
632
- elif self.file_id == '025041':
689
+ elif self.file_id in ['033252']:
690
+ for _i in range(2):
691
+ self._merge_lines(9)
692
+ elif self.file_id in ['032637']:
693
+ for _i in range(3):
694
+ self._merge_lines(9)
695
+
696
+ # Bad line removal
697
+ if self.file_id == '025041':
633
698
  self._remove_line(4)
634
699
  self._remove_line(4)
635
700
  elif self.file_id == '029692':
@@ -679,7 +744,7 @@ class Email(Communication):
679
744
  """Copy info from original config for file this document was extracted from."""
680
745
  if self.file_id in ALL_FILE_CONFIGS:
681
746
  self.config = cast(EmailCfg, deepcopy(ALL_FILE_CONFIGS[self.file_id]))
682
- self.warn(f"Merging existing config for {self.file_id} with config for file this document was extracted from")
747
+ self.warn(f"Merging existing cfg for '{self.file_id}' with cfg for extracted document...")
683
748
  else:
684
749
  self.config = EmailCfg(id=self.file_id)
685
750
 
@@ -692,33 +757,55 @@ class Email(Communication):
692
757
  extracted_description += ' email'
693
758
 
694
759
  if self.config.description:
695
- self.warn(f"Overwriting description '{self.config.description}' with extract description '{self.config.description}'")
760
+ self.warn(f"Overwriting description '{self.config.description}' with extract's '{self.config.description}'")
696
761
 
697
762
  self.config.description = extracted_description
698
763
 
699
764
  self.config.is_interesting = self.config.is_interesting or extracted_from_doc_cfg.is_interesting
700
765
  self.log(f"Constructed synthetic config: {self.config}")
701
766
 
702
- def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
703
- logger.debug(f"Printing '{self.filename}'...")
704
- yield self.file_info_panel()
705
- should_rewrite_header = self.header.was_initially_empty and self.header.num_header_rows > 0
767
+ def _truncate_to_length(self) -> int:
768
+ """When printing truncate this email to this length."""
706
769
  quote_cutoff = self._idx_of_nth_quoted_reply(text=self.text) # Trim if there's many quoted replies
707
- num_chars = MAX_CHARS_TO_PRINT
708
- trim_footer_txt = None
709
- text = self.text
770
+ includes_truncate_term = next((term for term in TRUNCATE_TERMS if term in self.text), None)
710
771
 
711
- if self.file_id in TRUNCATION_LENGTHS:
772
+ if args.whole_file:
773
+ num_chars = len(self.text)
774
+ elif self.file_id in TRUNCATION_LENGTHS:
712
775
  num_chars = TRUNCATION_LENGTHS[self.file_id]
713
- elif self.author in TRUNCATE_ALL_EMAILS_FROM or any((term in self.text) for term in TRUNCATE_TERMS):
776
+ elif self.author in TRUNCATE_ALL_EMAILS_FROM or includes_truncate_term:
714
777
  num_chars = int(MAX_CHARS_TO_PRINT / 3)
715
778
  elif quote_cutoff and quote_cutoff < MAX_CHARS_TO_PRINT:
716
779
  num_chars = quote_cutoff
780
+ else:
781
+ num_chars = MAX_CHARS_TO_PRINT
782
+
783
+ if num_chars != MAX_CHARS_TO_PRINT and not self.is_duplicate():
784
+ log_args = {
785
+ 'num_chars': num_chars,
786
+ 'author_truncate': self.author in TRUNCATE_ALL_EMAILS_FROM,
787
+ 'is_fwded_article': self.is_fwded_article(),
788
+ 'is_quote_cutoff': quote_cutoff == num_chars,
789
+ 'includes_truncate_term': json.dumps(includes_truncate_term) if includes_truncate_term else None,
790
+ 'quote_cutoff': quote_cutoff,
791
+ }
792
+
793
+ if quote_cutoff != num_chars:
794
+ logger.debug(f'{self.summary()} truncating: ' + ', '.join([f"{k}={v}" for k, v in log_args.items() if v]) + '\n')
795
+
796
+ return num_chars
797
+
798
+ def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
799
+ logger.debug(f"Printing '{self.filename}'...")
800
+ should_rewrite_header = self.header.was_initially_empty and self.header.num_header_rows > 0
801
+ num_chars = self._truncate_to_length()
802
+ trim_footer_txt = None
803
+ text = self.text
717
804
 
718
805
  # Truncate long emails but leave a note explaining what happened w/link to source document
719
- if len(text) > num_chars and self._truncation_allowed:
806
+ if len(text) > num_chars:
720
807
  text = text[0:num_chars]
721
- doc_link_markup = epstein_media_doc_link_markup(self.url_slug, self.author_style)
808
+ doc_link_markup = epstein_media_doc_link_markup(self.url_slug, self.author_style())
722
809
  trim_note = f"<...trimmed to {num_chars} characters of {self.length()}, read the rest at {doc_link_markup}...>"
723
810
  trim_footer_txt = Text.from_markup(wrap_in_markup_style(trim_note, 'dim'))
724
811
 
@@ -738,38 +825,37 @@ class Email(Communication):
738
825
  text = _add_line_breaks(text) # This was skipped when _prettify_text() w/a broken header so we do it now
739
826
  self.rewritten_header_ids.add(self.file_id)
740
827
 
741
- panel_txt = highlighter(text)
742
-
743
828
  email_txt_panel = Panel(
744
- panel_txt.append('\n\n').append(trim_footer_txt) if trim_footer_txt else panel_txt,
829
+ highlighter(text).append('\n\n').append(trim_footer_txt) if trim_footer_txt else highlighter(text),
745
830
  border_style=self._border_style(),
746
831
  expand=False,
747
832
  subtitle=REWRITTEN_HEADER_MSG if should_rewrite_header else None,
748
833
  )
749
834
 
835
+ yield self.file_info_panel()
750
836
  yield Padding(email_txt_panel, (0, 0, 1, INFO_INDENT))
751
837
 
752
838
  if should_rewrite_header:
753
839
  self.log_top_lines(self.header.num_header_rows + 4, f'Original header:')
754
840
 
755
841
  @staticmethod
756
- def build_table(emails: list['Email'], _author: str | None) -> Table:
757
- """Turn a set of Email objects into a Table."""
842
+ def build_emails_table(emails: list['Email'], _author: str | None, include_title: bool = False) -> Table:
843
+ """Turn a set of Emails to/from a given _author into a Table."""
758
844
  author = _author or UNKNOWN
759
845
 
760
846
  table = Table(
761
- title=f"Emails to/from {author} starting {emails[0].timestamp.date()}",
847
+ title=f"Emails to/from {author} starting {emails[0].timestamp.date()}" if include_title else None,
762
848
  border_style=get_style_for_name(author, allow_bold=False),
763
849
  header_style="bold"
764
850
  )
765
851
 
766
852
  table.add_column('From', justify='left')
767
853
  table.add_column('Timestamp', justify='center')
768
- table.add_column('Subject', justify='left', style='honeydew2', min_width=60)
854
+ table.add_column('Subject', justify='left', style='honeydew2', min_width=70)
769
855
 
770
856
  for email in emails:
771
857
  table.add_row(
772
- email.author_txt,
858
+ email.author_txt(),
773
859
  email.epstein_media_link(link_txt=email.timestamp_without_seconds()),
774
860
  highlighter(email.subject())
775
861
  )
@@ -1,12 +1,12 @@
1
1
  import re
2
- from dataclasses import dataclass
2
+ from dataclasses import dataclass, field, fields
3
3
  from datetime import datetime
4
4
 
5
5
  from rich.text import Text
6
6
 
7
7
  from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN
8
8
  from epstein_files.util.constant.strings import TIMESTAMP_DIM
9
- from epstein_files.util.data import extract_last_name
9
+ from epstein_files.util.data import extract_last_name, iso_timestamp
10
10
  from epstein_files.util.highlighted_group import get_style_for_name
11
11
  from epstein_files.util.logging import logger
12
12
  from epstein_files.util.rich import TEXT_LINK, highlighter
@@ -30,7 +30,7 @@ class TextMessage:
30
30
  """Class representing a single iMessage text message."""
31
31
  author: str | None
32
32
  author_str: str = ''
33
- id_confirmed: bool = False
33
+ is_id_confirmed: bool = False
34
34
  text: str
35
35
  timestamp_str: str
36
36
 
@@ -44,38 +44,55 @@ class TextMessage:
44
44
  else:
45
45
  self.author_str = self.author_str or self.author
46
46
 
47
- if not self.id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
47
+ if not self.is_id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
48
48
  self.author_str += ' (?)'
49
49
 
50
- def timestamp(self) -> datetime:
51
- return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
50
+ if self.is_link():
51
+ self.text = self.text.replace('\n', '').replace(' ', '_')
52
+ else:
53
+ self.text = self.text.replace('\n', ' ')
52
54
 
53
- def _message(self) -> Text:
54
- lines = self.text.split('\n')
55
+ def is_link(self) -> bool:
56
+ return self.text.startswith('http')
55
57
 
56
- # Fix multiline links
57
- if self.text.startswith('http'):
58
- text = self.text
58
+ def parse_timestamp(self) -> datetime:
59
+ return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
59
60
 
60
- if len(lines) > 1 and not lines[0].endswith('html'):
61
- if len(lines) > 2 and lines[1].endswith('-'):
62
- text = text.replace('\n', '', 2)
63
- else:
64
- text = text.replace('\n', '', 1)
61
+ def timestamp_txt(self) -> Text:
62
+ timestamp_str = self.timestamp_str
65
63
 
66
- lines = text.split('\n')
67
- link_text = lines.pop()
68
- msg_txt = Text('').append(Text.from_markup(f"[link={link_text}]{link_text}[/link]", style=TEXT_LINK))
64
+ try:
65
+ timestamp_str = iso_timestamp(self.parse_timestamp())
66
+ except Exception as e:
67
+ logger.warning(f"Failed to parse timestamp for {self}")
69
68
 
70
- if len(lines) > 0:
71
- msg_txt.append('\n' + ' '.join(lines))
72
- else:
73
- msg_txt = highlighter(' '.join(lines)) # remove newlines
69
+ return Text(f"[{timestamp_str}]", style=TIMESTAMP_DIM)
74
70
 
75
- return msg_txt
71
+ def _message(self) -> Text:
72
+ if self.is_link():
73
+ return Text.from_markup(f"[link={self.text}]{self.text}[/link]", style=TEXT_LINK)
74
+ else:
75
+ return highlighter(self.text)
76
76
 
77
77
  def __rich__(self) -> Text:
78
- timestamp_txt = Text(f"[{self.timestamp_str}]", style=TIMESTAMP_DIM).append(' ')
78
+ timestamp_txt = self.timestamp_txt().append(' ')
79
79
  author_style = get_style_for_name(self.author_str if self.author_str.startswith('+') else self.author)
80
80
  author_txt = Text(self.author_str, style=author_style)
81
81
  return Text('').append(timestamp_txt).append(author_txt).append(': ', style='dim').append(self._message())
82
+
83
+ def __repr__(self) -> str:
84
+ props = []
85
+ add_prop = lambda k, v: props.append(f"{k}={v}")
86
+
87
+ for _field in sorted(fields(self), key=lambda f: f.name):
88
+ key = _field.name
89
+ value = getattr(self, key)
90
+
91
+ if key == 'author_str' and self.author and self.author_str.startswith(value):
92
+ continue
93
+ elif isinstance(value, str):
94
+ add_prop(key, f'"{value}"')
95
+ else:
96
+ add_prop(key, value)
97
+
98
+ return f"{type(self).__name__}(" + ', '.join(props) + f')'