epstein-files 1.1.5__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +5 -1
- epstein_files/documents/document.py +7 -3
- epstein_files/documents/email.py +43 -65
- epstein_files/documents/emails/email_header.py +4 -2
- epstein_files/documents/imessage/text_message.py +3 -3
- epstein_files/documents/messenger_log.py +7 -7
- epstein_files/epstein_files.py +117 -115
- epstein_files/person.py +350 -0
- epstein_files/util/constant/names.py +35 -11
- epstein_files/util/constant/output_files.py +1 -0
- epstein_files/util/constant/strings.py +3 -2
- epstein_files/util/constant/urls.py +14 -2
- epstein_files/util/constants.py +72 -20
- epstein_files/util/data.py +0 -19
- epstein_files/util/doc_cfg.py +24 -14
- epstein_files/util/env.py +3 -1
- epstein_files/util/highlighted_group.py +154 -127
- epstein_files/util/output.py +84 -152
- epstein_files/util/rich.py +6 -21
- epstein_files/util/word_count.py +1 -1
- {epstein_files-1.1.5.dist-info → epstein_files-1.2.0.dist-info}/METADATA +2 -1
- epstein_files-1.2.0.dist-info/RECORD +34 -0
- epstein_files-1.1.5.dist-info/RECORD +0 -33
- {epstein_files-1.1.5.dist-info → epstein_files-1.2.0.dist-info}/LICENSE +0 -0
- {epstein_files-1.1.5.dist-info → epstein_files-1.2.0.dist-info}/WHEEL +0 -0
- {epstein_files-1.1.5.dist-info → epstein_files-1.2.0.dist-info}/entry_points.txt +0 -0
epstein_files/__init__.py
CHANGED
|
@@ -21,7 +21,8 @@ from epstein_files.util.env import args
|
|
|
21
21
|
from epstein_files.util.file_helper import coerce_file_path, extract_file_id
|
|
22
22
|
from epstein_files.util.logging import exit_with_error, logger
|
|
23
23
|
from epstein_files.util.output import (print_emails_section, print_json_files, print_json_stats,
|
|
24
|
-
print_other_files_section, print_text_messages_section, print_email_timeline,
|
|
24
|
+
print_other_files_section, print_text_messages_section, print_email_timeline, print_emailers_info_png,
|
|
25
|
+
print_json_metadata, write_urls)
|
|
25
26
|
from epstein_files.util.rich import (build_highlighter, console, print_color_key, print_title_page_header,
|
|
26
27
|
print_title_page_tables, print_subtitle_panel, write_html)
|
|
27
28
|
from epstein_files.util.timer import Timer
|
|
@@ -43,6 +44,9 @@ def generate_html() -> None:
|
|
|
43
44
|
elif args.json_files:
|
|
44
45
|
print_json_files(epstein_files)
|
|
45
46
|
exit()
|
|
47
|
+
elif args.emailers_info_png:
|
|
48
|
+
print_emailers_info_png(epstein_files)
|
|
49
|
+
exit()
|
|
46
50
|
|
|
47
51
|
print_title_page_header()
|
|
48
52
|
|
|
@@ -63,7 +63,7 @@ class Document:
|
|
|
63
63
|
|
|
64
64
|
Attributes:
|
|
65
65
|
file_path (Path): Local path to file
|
|
66
|
-
author (
|
|
66
|
+
author (Name): Who is responsible for the text in the file
|
|
67
67
|
config (DocCfg): Information about this fil
|
|
68
68
|
file_id (str): 6 digit (or 8 digits if it's a local extract file) string ID
|
|
69
69
|
filename (str): File's basename
|
|
@@ -74,7 +74,7 @@ class Document:
|
|
|
74
74
|
"""
|
|
75
75
|
file_path: Path
|
|
76
76
|
# Optional fields
|
|
77
|
-
author:
|
|
77
|
+
author: Name = None
|
|
78
78
|
config: EmailCfg | DocCfg | TextCfg | None = None
|
|
79
79
|
file_id: str = field(init=False)
|
|
80
80
|
filename: str = field(init=False)
|
|
@@ -121,6 +121,10 @@ class Document:
|
|
|
121
121
|
txt.append(f" because it's {DUPE_TYPE_STRS[self.config.dupe_type]} ")
|
|
122
122
|
return txt.append(epstein_media_doc_link_txt(self.config.duplicate_of_id, style='royal_blue1'))
|
|
123
123
|
|
|
124
|
+
def duplicate_of_id(self) -> str | None:
|
|
125
|
+
if self.config and self.config.duplicate_of_id:
|
|
126
|
+
return self.config.duplicate_of_id
|
|
127
|
+
|
|
124
128
|
def epsteinify_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
|
|
125
129
|
return self.external_link(epsteinify_doc_url, style, link_txt)
|
|
126
130
|
|
|
@@ -178,7 +182,7 @@ class Document:
|
|
|
178
182
|
return None
|
|
179
183
|
|
|
180
184
|
def is_duplicate(self) -> bool:
|
|
181
|
-
return bool(self.
|
|
185
|
+
return bool(self.duplicate_of_id())
|
|
182
186
|
|
|
183
187
|
def is_local_extract_file(self) -> bool:
|
|
184
188
|
"""True if extracted from other file (identifiable from filename e.g. HOUSE_OVERSIGHT_012345_1.txt)."""
|
epstein_files/documents/email.py
CHANGED
|
@@ -20,7 +20,7 @@ from epstein_files.documents.emails.email_header import (BAD_EMAILER_REGEX, EMAI
|
|
|
20
20
|
from epstein_files.util.constant.names import *
|
|
21
21
|
from epstein_files.util.constant.strings import REDACTED
|
|
22
22
|
from epstein_files.util.constants import *
|
|
23
|
-
from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes,
|
|
23
|
+
from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes,
|
|
24
24
|
flatten, listify, remove_timezone, uniquify)
|
|
25
25
|
from epstein_files.util.doc_cfg import EmailCfg, Metadata
|
|
26
26
|
from epstein_files.util.file_helper import extract_file_id, file_stem_for_id
|
|
@@ -55,6 +55,7 @@ REPLY_SPLITTERS = [f"{field}:" for field in FIELD_NAMES] + [
|
|
|
55
55
|
|
|
56
56
|
OCR_REPAIRS: dict[str | re.Pattern, str] = {
|
|
57
57
|
re.compile(r'grnail\.com'): 'gmail.com',
|
|
58
|
+
'Newsmax. corn': 'Newsmax.com',
|
|
58
59
|
re.compile(r"^(From|To)(: )?[_1.]{5,}", re.MULTILINE): rf"\1: {REDACTED}", # Redacted email addresses
|
|
59
60
|
# These 3 must come in this order!
|
|
60
61
|
re.compile(r'([/vkT]|Ai|li|(I|7)v)rote:'): 'wrote:',
|
|
@@ -79,6 +80,7 @@ OCR_REPAIRS: dict[str | re.Pattern, str] = {
|
|
|
79
80
|
'twitter glhsummers': 'twitter @lhsummers',
|
|
80
81
|
re.compile(r"twitter\.com[i/][lI]krauss[1lt]"): "twitter.com/lkrauss1",
|
|
81
82
|
re.compile(r'from my BlackBerry[0°] wireless device'): 'from my BlackBerry® wireless device',
|
|
83
|
+
re.compile(r'^INW$', re.MULTILINE): REDACTED,
|
|
82
84
|
# links
|
|
83
85
|
'Imps ://': 'https://',
|
|
84
86
|
re.compile(r'timestopics/people/t/landon jr thomas/inde\n?x\n?\.\n?h\n?tml'): 'timestopics/people/t/landon_jr_thomas/index.html',
|
|
@@ -127,14 +129,6 @@ EMAIL_SIGNATURE_REGEXES = {
|
|
|
127
129
|
UNKNOWN: re.compile(r"(This message is directed to and is for the use of the above-noted addressee only.*\nhereon\.)", re.DOTALL),
|
|
128
130
|
}
|
|
129
131
|
|
|
130
|
-
EMAIL_TABLE_COLS = [
|
|
131
|
-
{'name': 'Sent At', 'justify': 'left', 'style': TIMESTAMP_DIM},
|
|
132
|
-
{'name': 'From', 'justify': 'left', 'max_width': 20},
|
|
133
|
-
{'name': 'To', 'justify': 'left', 'max_width': 22},
|
|
134
|
-
{'name': 'Length', 'justify': 'right', 'style': 'wheat4'},
|
|
135
|
-
{'name': 'Subject', 'justify': 'left', 'min_width': 35, 'style': 'honeydew2'},
|
|
136
|
-
]
|
|
137
|
-
|
|
138
132
|
MAILING_LISTS = [
|
|
139
133
|
CAROLYN_RANGEL,
|
|
140
134
|
INTELLIGENCE_SQUARED,
|
|
@@ -142,10 +136,13 @@ MAILING_LISTS = [
|
|
|
142
136
|
JP_MORGAN_USGIO,
|
|
143
137
|
]
|
|
144
138
|
|
|
145
|
-
|
|
139
|
+
BBC_LISTS = JUNK_EMAILERS + MAILING_LISTS
|
|
140
|
+
|
|
141
|
+
TRUNCATE_ALL_EMAILS_FROM = BBC_LISTS + [
|
|
146
142
|
'Alan S Halperin',
|
|
147
143
|
'Mitchell Bard',
|
|
148
144
|
'Skip Rimer',
|
|
145
|
+
'Steven Victor MD',
|
|
149
146
|
]
|
|
150
147
|
|
|
151
148
|
TRUNCATION_LENGTHS = {
|
|
@@ -253,58 +250,15 @@ TRUNCATE_TERMS = [
|
|
|
253
250
|
'https://www.washingtonpost.com/politics/2018/09/04/transcript-phone-call',
|
|
254
251
|
]
|
|
255
252
|
|
|
256
|
-
# Some Paul Krassner emails have a ton of CCed parties we don't care about
|
|
257
|
-
KRASSNER_RECIPIENTS = uniquify(flatten([ALL_FILE_CONFIGS[id].recipients for id in ['025329', '024923', '033568']]))
|
|
258
|
-
|
|
259
|
-
# No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
|
|
260
|
-
USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIPIENTS + [
|
|
261
|
-
'Alan Dlugash', # CCed with Richard Kahn
|
|
262
|
-
'Alan Rogers', # Random CC
|
|
263
|
-
'Andrew Friendly', # Presumably some relation of Kelly Friendly
|
|
264
|
-
'BS Stern', # A random fwd of email we have
|
|
265
|
-
'Cheryl Kleen', # Single email from Anne Boyles, displayed under Anne Boyles
|
|
266
|
-
'Connie Zaguirre', # Random CC
|
|
267
|
-
'Dan Fleuette', # CC from sean bannon
|
|
268
|
-
'Danny Goldberg', # Random Paul Krassner emails
|
|
269
|
-
GERALD_LEFCOURT, # Single CC
|
|
270
|
-
GORDON_GETTY, # Random CC
|
|
271
|
-
JEFF_FULLER, # Random Jean Luc Brunel CC
|
|
272
|
-
'Jojo Fontanilla', # Random CC
|
|
273
|
-
'Joseph Vinciguerra', # Random CC
|
|
274
|
-
'Larry Cohen', # Random Bill Gates CC
|
|
275
|
-
'Lyn Fontanilla', # Random CC
|
|
276
|
-
'Mark Albert', # Random CC
|
|
277
|
-
'Matthew Schafer', # Random CC
|
|
278
|
-
MICHAEL_BUCHHOLTZ, # Terry Kafka CC
|
|
279
|
-
'Nancy Dahl', # covered by Lawrence Krauss (her husband)
|
|
280
|
-
'Michael Simmons', # Random CC
|
|
281
|
-
'Nancy Portland', # Lawrence Krauss CC
|
|
282
|
-
'Oliver Goodenough', # Robert Trivers CC
|
|
283
|
-
'Peter Aldhous', # Lawrence Krauss CC
|
|
284
|
-
'Players2', # Hoffenberg CC
|
|
285
|
-
'Sam Harris', # Lawrence Krauss CC
|
|
286
|
-
SAMUEL_LEFF, # Random CC
|
|
287
|
-
'Sean T Lehane', # Random CC
|
|
288
|
-
'Stephen Rubin', # Random CC
|
|
289
|
-
'Tim Kane', # Random CC
|
|
290
|
-
'Travis Pangburn', # Random CC
|
|
291
|
-
'Vahe Stepanian', # Random CC
|
|
292
|
-
# Ross Gow BCC
|
|
293
|
-
'david.brown@thetimes.co.uk',
|
|
294
|
-
'io-anne.pugh@bbc.co.uk',
|
|
295
|
-
'martin.robinson@mailonline.co.uk',
|
|
296
|
-
'nick.alwav@bbc.co.uk'
|
|
297
|
-
'nick.sommerlad@mirror.co.uk',
|
|
298
|
-
'p.peachev@independent.co.uk',
|
|
299
|
-
]
|
|
300
|
-
|
|
301
253
|
METADATA_FIELDS = [
|
|
302
254
|
'is_junk_mail',
|
|
255
|
+
'is_mailing_list',
|
|
303
256
|
'recipients',
|
|
304
257
|
'sent_from_device',
|
|
305
258
|
'subject',
|
|
306
259
|
]
|
|
307
260
|
|
|
261
|
+
# Note the line repair happens *after* 'Importance: High' is removed
|
|
308
262
|
LINE_REPAIR_MERGES = {
|
|
309
263
|
'017523': 4,
|
|
310
264
|
'019407': [2, 4],
|
|
@@ -312,10 +266,14 @@ LINE_REPAIR_MERGES = {
|
|
|
312
266
|
'022673': 9,
|
|
313
267
|
'022684': 9,
|
|
314
268
|
'022695': 4,
|
|
269
|
+
'029773': [2, 5],
|
|
315
270
|
'023067': 3,
|
|
316
271
|
'025790': 2,
|
|
272
|
+
'029841': 3,
|
|
317
273
|
'026345': 3,
|
|
318
274
|
'026609': 4,
|
|
275
|
+
'033299': 3,
|
|
276
|
+
'026829': 3,
|
|
319
277
|
'026924': [2, 4],
|
|
320
278
|
'028931': [3, 6],
|
|
321
279
|
'029154': [2, 5],
|
|
@@ -326,6 +284,7 @@ LINE_REPAIR_MERGES = {
|
|
|
326
284
|
'029501': 2,
|
|
327
285
|
'029835': [2, 4],
|
|
328
286
|
'029889': 2,
|
|
287
|
+
'029545': [3, 5],
|
|
329
288
|
'029976': 3,
|
|
330
289
|
'030299': [7, 10],
|
|
331
290
|
'030381': [2, 4],
|
|
@@ -359,14 +318,14 @@ class Email(Communication):
|
|
|
359
318
|
actual_text (str) - best effort at the text actually sent in this email, excluding quoted replies and forwards
|
|
360
319
|
config (EmailCfg | None) - manual config for this email (if it exists)
|
|
361
320
|
header (EmailHeader) - header data extracted from the text (from/to/sent/subject etc)
|
|
362
|
-
recipients (list[
|
|
321
|
+
recipients (list[Name]) - who this email was sent to
|
|
363
322
|
sent_from_device (str | None) - "Sent from my iPhone" style signature (if it exists)
|
|
364
323
|
signature_substitution_counts (dict[str, int]) - count of how many times a signature was replaced with <...snipped...> for each participant
|
|
365
324
|
"""
|
|
366
325
|
actual_text: str = field(init=False)
|
|
367
326
|
config: EmailCfg | None = None
|
|
368
327
|
header: EmailHeader = field(init=False)
|
|
369
|
-
recipients: list[
|
|
328
|
+
recipients: list[Name] = field(default_factory=list)
|
|
370
329
|
sent_from_device: str | None = None
|
|
371
330
|
signature_substitution_counts: dict[str, int] = field(default_factory=dict) # defaultdict breaks asdict :(
|
|
372
331
|
|
|
@@ -394,7 +353,7 @@ class Email(Communication):
|
|
|
394
353
|
self.recipients.extend(self._extract_emailer_names(recipient))
|
|
395
354
|
|
|
396
355
|
# Assume mailing list emails are to Epstein
|
|
397
|
-
if self.author in
|
|
356
|
+
if self.author in BBC_LISTS and (self.is_note_to_self() or not self.recipients):
|
|
398
357
|
self.recipients = [JEFFREY_EPSTEIN]
|
|
399
358
|
|
|
400
359
|
# Remove self CCs but preserve self emails
|
|
@@ -423,7 +382,10 @@ class Email(Communication):
|
|
|
423
382
|
return bool(self.config and self.config.is_fwded_article)
|
|
424
383
|
|
|
425
384
|
def is_junk_mail(self) -> bool:
|
|
426
|
-
return self.author in JUNK_EMAILERS
|
|
385
|
+
return self.author in JUNK_EMAILERS
|
|
386
|
+
|
|
387
|
+
def is_mailing_list(self) -> bool:
|
|
388
|
+
return self.author in MAILING_LISTS or self.is_junk_mail()
|
|
427
389
|
|
|
428
390
|
def is_note_to_self(self) -> bool:
|
|
429
391
|
return self.recipients == [self.author]
|
|
@@ -431,6 +393,7 @@ class Email(Communication):
|
|
|
431
393
|
def metadata(self) -> Metadata:
|
|
432
394
|
local_metadata = asdict(self)
|
|
433
395
|
local_metadata['is_junk_mail'] = self.is_junk_mail()
|
|
396
|
+
local_metadata['is_mailing_list'] = self.is_junk_mail()
|
|
434
397
|
local_metadata['subject'] = self.subject() or None
|
|
435
398
|
metadata = super().metadata()
|
|
436
399
|
metadata.update({k: v for k, v in local_metadata.items() if v and k in METADATA_FIELDS})
|
|
@@ -550,6 +513,8 @@ class Email(Communication):
|
|
|
550
513
|
self.log_top_lines(msg='No email header match found!', level=log_level)
|
|
551
514
|
self.header = EmailHeader(field_names=[])
|
|
552
515
|
|
|
516
|
+
logger.debug(f"{self.file_id} extracted header\n\n{self.header}\n")
|
|
517
|
+
|
|
553
518
|
def _extract_timestamp(self) -> datetime:
|
|
554
519
|
if self.config and self.config.timestamp:
|
|
555
520
|
return self.config.timestamp
|
|
@@ -674,6 +639,9 @@ class Email(Communication):
|
|
|
674
639
|
elif self.file_id in ['025329']:
|
|
675
640
|
for _i in range(9):
|
|
676
641
|
self._merge_lines(2)
|
|
642
|
+
elif self.file_id in ['025812']:
|
|
643
|
+
for _i in range(2):
|
|
644
|
+
self._merge_lines(3)
|
|
677
645
|
elif self.file_id == '014860':
|
|
678
646
|
self._merge_lines(3)
|
|
679
647
|
self._merge_lines(4)
|
|
@@ -839,19 +807,29 @@ class Email(Communication):
|
|
|
839
807
|
self.log_top_lines(self.header.num_header_rows + 4, f'Original header:')
|
|
840
808
|
|
|
841
809
|
@staticmethod
|
|
842
|
-
def build_emails_table(emails: list['Email'],
|
|
810
|
+
def build_emails_table(emails: list['Email'], name: Name = '', title: str = '', show_length: bool = False) -> Table:
|
|
843
811
|
"""Turn a set of Emails into a Table."""
|
|
844
|
-
if title and
|
|
812
|
+
if title and name:
|
|
845
813
|
raise ValueError(f"Can't provide both 'author' and 'title' args")
|
|
846
|
-
elif
|
|
814
|
+
elif name == '' and title == '':
|
|
847
815
|
raise ValueError(f"Must provide either 'author' or 'title' arg")
|
|
848
816
|
|
|
849
|
-
author_style = get_style_for_name(
|
|
850
|
-
link_style = author_style if
|
|
817
|
+
author_style = get_style_for_name(name, allow_bold=False)
|
|
818
|
+
link_style = author_style if name else ARCHIVE_LINK_COLOR
|
|
819
|
+
min_width = len(name or UNKNOWN)
|
|
820
|
+
max_width = max(20, min_width)
|
|
821
|
+
|
|
822
|
+
columns = [
|
|
823
|
+
{'name': 'Sent At', 'justify': 'left', 'style': TIMESTAMP_DIM},
|
|
824
|
+
{'name': 'From', 'justify': 'left', 'min_width': min_width, 'max_width': max_width},
|
|
825
|
+
{'name': 'To', 'justify': 'left', 'min_width': min_width, 'max_width': max_width + 2},
|
|
826
|
+
{'name': 'Length', 'justify': 'right', 'style': 'wheat4'},
|
|
827
|
+
{'name': 'Subject', 'justify': 'left', 'min_width': 35, 'style': 'honeydew2'},
|
|
828
|
+
]
|
|
851
829
|
|
|
852
830
|
table = build_table(
|
|
853
831
|
title or None,
|
|
854
|
-
cols=[col for col in
|
|
832
|
+
cols=[col for col in columns if show_length or col['name'] not in ['Length']],
|
|
855
833
|
border_style=DEFAULT_TABLE_KWARGS['border_style'] if title else author_style,
|
|
856
834
|
header_style="bold",
|
|
857
835
|
highlight=True,
|
|
@@ -8,13 +8,13 @@ from epstein_files.util.doc_cfg import EmailCfg
|
|
|
8
8
|
from epstein_files.util.logging import logger
|
|
9
9
|
from epstein_files.util.rich import UNKNOWN
|
|
10
10
|
|
|
11
|
-
FIELD_NAMES = ['
|
|
11
|
+
FIELD_NAMES = ['Date', 'From', 'Sent', 'Subject']
|
|
12
12
|
NON_HEADER_FIELDS = ['field_names', 'num_header_rows', 'was_initially_empty']
|
|
13
13
|
ON_BEHALF_OF = 'on behalf of'
|
|
14
14
|
TO_FIELDS = ['bcc', 'cc', 'to']
|
|
15
15
|
EMAILER_FIELDS = [AUTHOR] + TO_FIELDS
|
|
16
16
|
|
|
17
|
-
HEADER_REGEX_STR = r'(((?:(?:Date|From|Sent|To|C[cC]|Importance|Subject|Bee|B[cC]{2}|Attachments):|on behalf of ?)(?! +(by |from my|via )).*\n){3,})'
|
|
17
|
+
HEADER_REGEX_STR = r'(((?:(?:Date|From|Sent|To|C[cC]|Importance|Subject|Bee|B[cC]{2}|Attachments|Classification|Flag):|on behalf of ?)(?! +(by |from my|via )).*\n){3,})'
|
|
18
18
|
EMAIL_SIMPLE_HEADER_REGEX = re.compile(rf'^{HEADER_REGEX_STR}')
|
|
19
19
|
EMAIL_SIMPLE_HEADER_LINE_BREAK_REGEX = re.compile(HEADER_REGEX_STR)
|
|
20
20
|
EMAIL_PRE_FORWARD_REGEX = re.compile(r"(.{3,2000}?)" + HEADER_REGEX_STR, re.DOTALL) # Match up to the next email header section
|
|
@@ -41,6 +41,8 @@ class EmailHeader:
|
|
|
41
41
|
subject: str | None = None
|
|
42
42
|
bcc: list[str] | None = None
|
|
43
43
|
cc: list[str] | None = None
|
|
44
|
+
classification: str | None = None
|
|
45
|
+
flag: str | None = None
|
|
44
46
|
importance: str | None = None
|
|
45
47
|
attachments: str | None = None
|
|
46
48
|
to: list[str] | None = None
|
|
@@ -4,9 +4,9 @@ from datetime import datetime
|
|
|
4
4
|
|
|
5
5
|
from rich.text import Text
|
|
6
6
|
|
|
7
|
-
from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN
|
|
7
|
+
from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN, Name, extract_last_name
|
|
8
8
|
from epstein_files.util.constant.strings import TIMESTAMP_DIM
|
|
9
|
-
from epstein_files.util.data import
|
|
9
|
+
from epstein_files.util.data import iso_timestamp
|
|
10
10
|
from epstein_files.util.highlighted_group import get_style_for_name
|
|
11
11
|
from epstein_files.util.logging import logger
|
|
12
12
|
from epstein_files.util.rich import TEXT_LINK, highlighter
|
|
@@ -25,7 +25,7 @@ DISPLAY_LAST_NAME_ONLY = [
|
|
|
25
25
|
@dataclass(kw_only=True)
|
|
26
26
|
class TextMessage:
|
|
27
27
|
"""Class representing a single iMessage text message."""
|
|
28
|
-
author:
|
|
28
|
+
author: Name
|
|
29
29
|
author_str: str = ''
|
|
30
30
|
is_id_confirmed: bool = False
|
|
31
31
|
text: str
|
|
@@ -10,11 +10,11 @@ from rich.text import Text
|
|
|
10
10
|
|
|
11
11
|
from epstein_files.documents.communication import Communication
|
|
12
12
|
from epstein_files.documents.imessage.text_message import TextMessage
|
|
13
|
-
from epstein_files.util.constant.names import JEFFREY_EPSTEIN,
|
|
13
|
+
from epstein_files.util.constant.names import JEFFREY_EPSTEIN, Name
|
|
14
14
|
from epstein_files.util.constant.strings import AUTHOR, TIMESTAMP_STYLE
|
|
15
15
|
from epstein_files.util.data import days_between, days_between_str, iso_timestamp, sort_dict
|
|
16
16
|
from epstein_files.util.doc_cfg import Metadata, TextCfg
|
|
17
|
-
from epstein_files.util.highlighted_group import
|
|
17
|
+
from epstein_files.util.highlighted_group import styled_name
|
|
18
18
|
from epstein_files.util.logging import logger
|
|
19
19
|
from epstein_files.util.rich import LAST_TIMESTAMP_STYLE, build_table, highlighter
|
|
20
20
|
|
|
@@ -35,7 +35,7 @@ class MessengerLog(Communication):
|
|
|
35
35
|
super().__post_init__()
|
|
36
36
|
self.messages = [self._build_message(match) for match in MSG_REGEX.finditer(self.text)]
|
|
37
37
|
|
|
38
|
-
def first_message_at(self, name:
|
|
38
|
+
def first_message_at(self, name: Name) -> datetime:
|
|
39
39
|
return self.messages_by(name)[0].parse_timestamp()
|
|
40
40
|
|
|
41
41
|
def info_txt(self) -> Text | None:
|
|
@@ -54,10 +54,10 @@ class MessengerLog(Communication):
|
|
|
54
54
|
|
|
55
55
|
return txt.append(')')
|
|
56
56
|
|
|
57
|
-
def last_message_at(self, name:
|
|
57
|
+
def last_message_at(self, name: Name) -> datetime:
|
|
58
58
|
return self.messages_by(name)[-1].parse_timestamp()
|
|
59
59
|
|
|
60
|
-
def messages_by(self, name:
|
|
60
|
+
def messages_by(self, name: Name) -> list[TextMessage]:
|
|
61
61
|
"""Return all messages by 'name'."""
|
|
62
62
|
return [m for m in self.messages if m.author == name]
|
|
63
63
|
|
|
@@ -129,9 +129,9 @@ class MessengerLog(Communication):
|
|
|
129
129
|
yield message
|
|
130
130
|
|
|
131
131
|
@classmethod
|
|
132
|
-
def count_authors(cls, imessage_logs: list['MessengerLog']) -> dict[
|
|
132
|
+
def count_authors(cls, imessage_logs: list['MessengerLog']) -> dict[Name, int]:
|
|
133
133
|
"""Count up how many texts were sent by each author."""
|
|
134
|
-
sender_counts: dict[
|
|
134
|
+
sender_counts: dict[Name, int] = defaultdict(int)
|
|
135
135
|
|
|
136
136
|
for message_log in imessage_logs:
|
|
137
137
|
for message in message_log.messages:
|