epstein-files 1.1.2__py3-none-any.whl → 1.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +17 -20
- epstein_files/documents/communication.py +3 -3
- epstein_files/documents/document.py +3 -0
- epstein_files/documents/email.py +75 -64
- epstein_files/documents/imessage/text_message.py +5 -9
- epstein_files/documents/messenger_log.py +2 -2
- epstein_files/epstein_files.py +17 -15
- epstein_files/util/constant/names.py +39 -38
- epstein_files/util/constant/strings.py +1 -0
- epstein_files/util/constants.py +65 -9
- epstein_files/util/data.py +9 -1
- epstein_files/util/doc_cfg.py +8 -2
- epstein_files/util/env.py +11 -1
- epstein_files/util/file_helper.py +4 -1
- epstein_files/util/highlighted_group.py +99 -52
- epstein_files/util/output.py +112 -94
- epstein_files/util/rich.py +28 -35
- epstein_files/util/word_count.py +1 -2
- {epstein_files-1.1.2.dist-info → epstein_files-1.1.5.dist-info}/METADATA +4 -1
- epstein_files-1.1.5.dist-info/RECORD +33 -0
- epstein_files-1.1.2.dist-info/RECORD +0 -33
- {epstein_files-1.1.2.dist-info → epstein_files-1.1.5.dist-info}/LICENSE +0 -0
- {epstein_files-1.1.2.dist-info → epstein_files-1.1.5.dist-info}/WHEEL +0 -0
- {epstein_files-1.1.2.dist-info → epstein_files-1.1.5.dist-info}/entry_points.txt +0 -0
epstein_files/__init__.py
CHANGED
|
@@ -16,13 +16,14 @@ from rich.text import Text
|
|
|
16
16
|
from epstein_files.epstein_files import EpsteinFiles, document_cls
|
|
17
17
|
from epstein_files.documents.document import INFO_PADDING, Document
|
|
18
18
|
from epstein_files.documents.email import Email
|
|
19
|
-
from epstein_files.util.constant.output_files import
|
|
19
|
+
from epstein_files.util.constant.output_files import make_clean
|
|
20
20
|
from epstein_files.util.env import args
|
|
21
21
|
from epstein_files.util.file_helper import coerce_file_path, extract_file_id
|
|
22
22
|
from epstein_files.util.logging import exit_with_error, logger
|
|
23
23
|
from epstein_files.util.output import (print_emails_section, print_json_files, print_json_stats,
|
|
24
|
-
print_other_files_section, print_text_messages_section,
|
|
25
|
-
from epstein_files.util.rich import build_highlighter, console, print_color_key, print_title_page_header,
|
|
24
|
+
print_other_files_section, print_text_messages_section, print_email_timeline, print_json_metadata, write_urls)
|
|
25
|
+
from epstein_files.util.rich import (build_highlighter, console, print_color_key, print_title_page_header,
|
|
26
|
+
print_title_page_tables, print_subtitle_panel, write_html)
|
|
26
27
|
from epstein_files.util.timer import Timer
|
|
27
28
|
from epstein_files.util.word_count import write_word_counts_html
|
|
28
29
|
|
|
@@ -37,13 +38,13 @@ def generate_html() -> None:
|
|
|
37
38
|
epstein_files = EpsteinFiles.get_files(timer)
|
|
38
39
|
|
|
39
40
|
if args.json_metadata:
|
|
40
|
-
|
|
41
|
+
print_json_metadata(epstein_files)
|
|
41
42
|
exit()
|
|
42
43
|
elif args.json_files:
|
|
43
44
|
print_json_files(epstein_files)
|
|
44
45
|
exit()
|
|
45
46
|
|
|
46
|
-
print_title_page_header(
|
|
47
|
+
print_title_page_header()
|
|
47
48
|
|
|
48
49
|
if args.email_timeline:
|
|
49
50
|
print_color_key()
|
|
@@ -62,7 +63,7 @@ def generate_html() -> None:
|
|
|
62
63
|
emails_that_were_printed = print_emails_section(epstein_files)
|
|
63
64
|
timer.print_at_checkpoint(f"Printed {len(emails_that_were_printed):,} emails")
|
|
64
65
|
elif args.email_timeline:
|
|
65
|
-
|
|
66
|
+
print_email_timeline(epstein_files)
|
|
66
67
|
timer.print_at_checkpoint(f"Printed chronological emails table")
|
|
67
68
|
|
|
68
69
|
if args.output_other:
|
|
@@ -74,15 +75,7 @@ def generate_html() -> None:
|
|
|
74
75
|
print_other_files_section(files, epstein_files)
|
|
75
76
|
timer.print_at_checkpoint(f"Printed {len(files)} other files (skipped {len(epstein_files.other_files) - len(files)})")
|
|
76
77
|
|
|
77
|
-
|
|
78
|
-
if args.all_emails:
|
|
79
|
-
output_path = ALL_EMAILS_PATH
|
|
80
|
-
elif args.email_timeline:
|
|
81
|
-
output_path = CHRONOLOGICAL_EMAILS_PATH
|
|
82
|
-
else:
|
|
83
|
-
output_path = TEXT_MSGS_HTML_PATH
|
|
84
|
-
|
|
85
|
-
write_html(output_path)
|
|
78
|
+
write_html(args.build)
|
|
86
79
|
logger.warning(f"Total time: {timer.seconds_since_start_str()}")
|
|
87
80
|
|
|
88
81
|
# JSON stats (mostly used for building pytest checks)
|
|
@@ -103,8 +96,7 @@ def epstein_search():
|
|
|
103
96
|
for search_term in args.positional_args:
|
|
104
97
|
temp_highlighter = build_highlighter(search_term)
|
|
105
98
|
search_results = epstein_files.docs_matching(search_term, args.names)
|
|
106
|
-
|
|
107
|
-
print_subtitle_panel(f"Found {len(search_results)} documents matching '{search_term}'", padding=(0, 0, 0, 3))
|
|
99
|
+
print_subtitle_panel(f"Found {len(search_results)} documents matching '{search_term}'")
|
|
108
100
|
|
|
109
101
|
for search_result in search_results:
|
|
110
102
|
console.line()
|
|
@@ -122,11 +114,16 @@ def epstein_search():
|
|
|
122
114
|
def epstein_show():
|
|
123
115
|
"""Show the color highlighted file. If --raw arg is passed, show the raw text of the file as well."""
|
|
124
116
|
_assert_positional_args()
|
|
125
|
-
|
|
126
|
-
raw_docs = [Document(coerce_file_path(id)) for id in ids]
|
|
127
|
-
docs = [document_cls(doc)(doc.file_path) for doc in raw_docs]
|
|
117
|
+
raw_docs: list[Document] = []
|
|
128
118
|
console.line()
|
|
129
119
|
|
|
120
|
+
try:
|
|
121
|
+
ids = [extract_file_id(arg) for arg in args.positional_args]
|
|
122
|
+
raw_docs = [Document(coerce_file_path(id)) for id in ids]
|
|
123
|
+
docs = Document.sort_by_timestamp([document_cls(doc)(doc.file_path) for doc in raw_docs])
|
|
124
|
+
except Exception as e:
|
|
125
|
+
exit_with_error(str(e))
|
|
126
|
+
|
|
130
127
|
for doc in docs:
|
|
131
128
|
console.print('\n', doc, '\n')
|
|
132
129
|
|
|
@@ -9,7 +9,7 @@ from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, Document
|
|
|
9
9
|
from epstein_files.util.constant.names import UNKNOWN
|
|
10
10
|
from epstein_files.util.constants import FALLBACK_TIMESTAMP
|
|
11
11
|
from epstein_files.util.doc_cfg import CommunicationCfg
|
|
12
|
-
from epstein_files.util.highlighted_group import get_style_for_name
|
|
12
|
+
from epstein_files.util.highlighted_group import get_style_for_name, styled_name
|
|
13
13
|
from epstein_files.util.rich import key_value_txt
|
|
14
14
|
|
|
15
15
|
TIMESTAMP_SECONDS_REGEX = re.compile(r":\d{2}$")
|
|
@@ -25,10 +25,10 @@ class Communication(Document):
|
|
|
25
25
|
return self.author or UNKNOWN
|
|
26
26
|
|
|
27
27
|
def author_style(self) -> str:
|
|
28
|
-
return get_style_for_name(self.
|
|
28
|
+
return get_style_for_name(self.author)
|
|
29
29
|
|
|
30
30
|
def author_txt(self) -> Text:
|
|
31
|
-
return
|
|
31
|
+
return styled_name(self.author)
|
|
32
32
|
|
|
33
33
|
def external_links_txt(self, _style: str = '', include_alt_links: bool = True) -> Text:
|
|
34
34
|
"""Overrides super() method to apply self.author_style."""
|
|
@@ -88,6 +88,9 @@ class Document:
|
|
|
88
88
|
strip_whitespace: ClassVar[bool] = True # Overridden in JsonFile
|
|
89
89
|
|
|
90
90
|
def __post_init__(self):
|
|
91
|
+
if not self.file_path.exists():
|
|
92
|
+
raise FileNotFoundError(f"File '{self.file_path.name}' does not exist!")
|
|
93
|
+
|
|
91
94
|
self.filename = self.file_path.name
|
|
92
95
|
self.file_id = extract_file_id(self.filename)
|
|
93
96
|
# config and url_slug could have been pre-set in Email
|
epstein_files/documents/email.py
CHANGED
|
@@ -24,7 +24,7 @@ from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_si
|
|
|
24
24
|
flatten, listify, remove_timezone, uniquify)
|
|
25
25
|
from epstein_files.util.doc_cfg import EmailCfg, Metadata
|
|
26
26
|
from epstein_files.util.file_helper import extract_file_id, file_stem_for_id
|
|
27
|
-
from epstein_files.util.highlighted_group import get_style_for_name
|
|
27
|
+
from epstein_files.util.highlighted_group import JUNK_EMAILERS, get_style_for_name
|
|
28
28
|
from epstein_files.util.logging import logger
|
|
29
29
|
from epstein_files.util.rich import *
|
|
30
30
|
|
|
@@ -71,6 +71,7 @@ OCR_REPAIRS: dict[str | re.Pattern, str] = {
|
|
|
71
71
|
# Signatures
|
|
72
72
|
'BlackBerry by AT &T': 'BlackBerry by AT&T',
|
|
73
73
|
'BlackBerry from T- Mobile': 'BlackBerry from T-Mobile',
|
|
74
|
+
'Envoy& de mon iPhone': 'Envoyé de mon iPhone',
|
|
74
75
|
"from my 'Phone": 'from my iPhone',
|
|
75
76
|
'from Samsung Mob.le': 'from Samsung Mobile',
|
|
76
77
|
'gJeremyRubin': '@JeremyRubin',
|
|
@@ -126,13 +127,12 @@ EMAIL_SIGNATURE_REGEXES = {
|
|
|
126
127
|
UNKNOWN: re.compile(r"(This message is directed to and is for the use of the above-noted addressee only.*\nhereon\.)", re.DOTALL),
|
|
127
128
|
}
|
|
128
129
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
'
|
|
132
|
-
|
|
133
|
-
'
|
|
134
|
-
'
|
|
135
|
-
'Jokeland',
|
|
130
|
+
EMAIL_TABLE_COLS = [
|
|
131
|
+
{'name': 'Sent At', 'justify': 'left', 'style': TIMESTAMP_DIM},
|
|
132
|
+
{'name': 'From', 'justify': 'left', 'max_width': 20},
|
|
133
|
+
{'name': 'To', 'justify': 'left', 'max_width': 22},
|
|
134
|
+
{'name': 'Length', 'justify': 'right', 'style': 'wheat4'},
|
|
135
|
+
{'name': 'Subject', 'justify': 'left', 'min_width': 35, 'style': 'honeydew2'},
|
|
136
136
|
]
|
|
137
137
|
|
|
138
138
|
MAILING_LISTS = [
|
|
@@ -150,11 +150,13 @@ TRUNCATE_ALL_EMAILS_FROM = JUNK_EMAILERS + MAILING_LISTS + [
|
|
|
150
150
|
|
|
151
151
|
TRUNCATION_LENGTHS = {
|
|
152
152
|
'023627': 16_800, # Micheal Wolff article with brock pierce
|
|
153
|
-
'030245':
|
|
154
|
-
'030781':
|
|
155
|
-
'032906':
|
|
153
|
+
'030245': None, # Epstein rationalizes his behavior in an open letter to the world
|
|
154
|
+
'030781': None, # Bannon email about crypto coin issues
|
|
155
|
+
'032906': None, # David Blaine email
|
|
156
156
|
'026036': 6000, # Gino Yu blockchain mention
|
|
157
|
-
'023208':
|
|
157
|
+
'023208': None, # Long discussion about leon black's finances
|
|
158
|
+
'029609': None, # Joi Ito
|
|
159
|
+
'025233': None, # Reputation.com discussion
|
|
158
160
|
}
|
|
159
161
|
|
|
160
162
|
# These are long forwarded articles so we force a trim to 1,333 chars if these strings exist
|
|
@@ -296,14 +298,6 @@ USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIP
|
|
|
296
298
|
'p.peachev@independent.co.uk',
|
|
297
299
|
]
|
|
298
300
|
|
|
299
|
-
# Emails sent by epstein to himself that are just notes
|
|
300
|
-
SELF_EMAILS_FILE_IDS = [
|
|
301
|
-
'026677',
|
|
302
|
-
'029752', # TODO: jokeland...
|
|
303
|
-
'030238',
|
|
304
|
-
# '033274', # TODO: Epstein's note to self doesn't get printed if we don't set the recipients to [None]
|
|
305
|
-
]
|
|
306
|
-
|
|
307
301
|
METADATA_FIELDS = [
|
|
308
302
|
'is_junk_mail',
|
|
309
303
|
'recipients',
|
|
@@ -320,6 +314,7 @@ LINE_REPAIR_MERGES = {
|
|
|
320
314
|
'022695': 4,
|
|
321
315
|
'023067': 3,
|
|
322
316
|
'025790': 2,
|
|
317
|
+
'026345': 3,
|
|
323
318
|
'026609': 4,
|
|
324
319
|
'026924': [2, 4],
|
|
325
320
|
'028931': [3, 6],
|
|
@@ -346,6 +341,7 @@ LINE_REPAIR_MERGES = {
|
|
|
346
341
|
'032405': 4,
|
|
347
342
|
'033097': 2,
|
|
348
343
|
'033144': [2, 4],
|
|
344
|
+
'033217': 3,
|
|
349
345
|
'033228': [3, 5],
|
|
350
346
|
'033357': [2, 4],
|
|
351
347
|
'033486': [7, 9],
|
|
@@ -391,25 +387,21 @@ class Email(Communication):
|
|
|
391
387
|
|
|
392
388
|
super().__post_init__()
|
|
393
389
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
# Remove self CCs
|
|
411
|
-
recipients = [r for r in self.recipients if r != self.author or self.file_id in SELF_EMAILS_FILE_IDS]
|
|
412
|
-
self.recipients = list(set(recipients))
|
|
390
|
+
if self.config and self.config.recipients:
|
|
391
|
+
self.recipients = self.config.recipients
|
|
392
|
+
else:
|
|
393
|
+
for recipient in self.header.recipients():
|
|
394
|
+
self.recipients.extend(self._extract_emailer_names(recipient))
|
|
395
|
+
|
|
396
|
+
# Assume mailing list emails are to Epstein
|
|
397
|
+
if self.author in MAILING_LISTS and (self.is_note_to_self() or not self.recipients):
|
|
398
|
+
self.recipients = [JEFFREY_EPSTEIN]
|
|
399
|
+
|
|
400
|
+
# Remove self CCs but preserve self emails
|
|
401
|
+
if not self.is_note_to_self():
|
|
402
|
+
self.recipients = [r for r in self.recipients if r != self.author]
|
|
403
|
+
|
|
404
|
+
self.recipients = sorted(list(set(self.recipients)), key=lambda r: r or UNKNOWN)
|
|
413
405
|
self.text = self._prettify_text()
|
|
414
406
|
self.actual_text = self._actual_text()
|
|
415
407
|
self.sent_from_device = self._sent_from_device()
|
|
@@ -419,8 +411,13 @@ class Email(Communication):
|
|
|
419
411
|
|
|
420
412
|
def info_txt(self) -> Text:
|
|
421
413
|
email_type = 'fwded article' if self.is_fwded_article() else 'email'
|
|
422
|
-
txt = Text(f"OCR text of {email_type} from ", style='grey46').append(self.author_txt())
|
|
423
|
-
|
|
414
|
+
txt = Text(f"OCR text of {email_type} from ", style='grey46').append(self.author_txt())
|
|
415
|
+
|
|
416
|
+
if self.config and self.config.is_attribution_uncertain:
|
|
417
|
+
txt.append(f" {QUESTION_MARKS}", style=self.author_style())
|
|
418
|
+
|
|
419
|
+
txt.append(' to ').append(self.recipients_txt())
|
|
420
|
+
return txt.append(highlighter(f" probably sent at {self.timestamp}"))
|
|
424
421
|
|
|
425
422
|
def is_fwded_article(self) -> bool:
|
|
426
423
|
return bool(self.config and self.config.is_fwded_article)
|
|
@@ -428,6 +425,9 @@ class Email(Communication):
|
|
|
428
425
|
def is_junk_mail(self) -> bool:
|
|
429
426
|
return self.author in JUNK_EMAILERS or self.author in MAILING_LISTS
|
|
430
427
|
|
|
428
|
+
def is_note_to_self(self) -> bool:
|
|
429
|
+
return self.recipients == [self.author]
|
|
430
|
+
|
|
431
431
|
def metadata(self) -> Metadata:
|
|
432
432
|
local_metadata = asdict(self)
|
|
433
433
|
local_metadata['is_junk_mail'] = self.is_junk_mail()
|
|
@@ -447,7 +447,10 @@ class Email(Communication):
|
|
|
447
447
|
], join=', ')
|
|
448
448
|
|
|
449
449
|
def subject(self) -> str:
|
|
450
|
-
|
|
450
|
+
if self.config and self.config.subject:
|
|
451
|
+
return self.config.subject
|
|
452
|
+
else:
|
|
453
|
+
return self.header.subject or ''
|
|
451
454
|
|
|
452
455
|
def summary(self) -> Text:
|
|
453
456
|
"""One line summary mostly for logging."""
|
|
@@ -498,11 +501,8 @@ class Email(Communication):
|
|
|
498
501
|
|
|
499
502
|
def _border_style(self) -> str:
|
|
500
503
|
"""Color emails from epstein to others with the color for the first recipient."""
|
|
501
|
-
if self.author == JEFFREY_EPSTEIN:
|
|
502
|
-
|
|
503
|
-
style = self.author_style()
|
|
504
|
-
else:
|
|
505
|
-
style = get_style_for_name(self.recipients[0])
|
|
504
|
+
if self.author == JEFFREY_EPSTEIN and len(self.recipients) > 0:
|
|
505
|
+
style = get_style_for_name(self.recipients[0])
|
|
506
506
|
else:
|
|
507
507
|
style = self.author_style()
|
|
508
508
|
|
|
@@ -772,7 +772,7 @@ class Email(Communication):
|
|
|
772
772
|
if args.whole_file:
|
|
773
773
|
num_chars = len(self.text)
|
|
774
774
|
elif self.file_id in TRUNCATION_LENGTHS:
|
|
775
|
-
num_chars = TRUNCATION_LENGTHS[self.file_id]
|
|
775
|
+
num_chars = TRUNCATION_LENGTHS[self.file_id] or self.file_size()
|
|
776
776
|
elif self.author in TRUNCATE_ALL_EMAILS_FROM or includes_truncate_term:
|
|
777
777
|
num_chars = int(MAX_CHARS_TO_PRINT / 3)
|
|
778
778
|
elif quote_cutoff and quote_cutoff < MAX_CHARS_TO_PRINT:
|
|
@@ -839,26 +839,37 @@ class Email(Communication):
|
|
|
839
839
|
self.log_top_lines(self.header.num_header_rows + 4, f'Original header:')
|
|
840
840
|
|
|
841
841
|
@staticmethod
|
|
842
|
-
def build_emails_table(emails: list['Email'],
|
|
843
|
-
"""Turn a set of Emails
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
842
|
+
def build_emails_table(emails: list['Email'], author: str | None = '', title: str = '', show_length: bool = False) -> Table:
|
|
843
|
+
"""Turn a set of Emails into a Table."""
|
|
844
|
+
if title and author:
|
|
845
|
+
raise ValueError(f"Can't provide both 'author' and 'title' args")
|
|
846
|
+
elif author == '' and title == '':
|
|
847
|
+
raise ValueError(f"Must provide either 'author' or 'title' arg")
|
|
848
|
+
|
|
849
|
+
author_style = get_style_for_name(author, allow_bold=False)
|
|
850
|
+
link_style = author_style if author else ARCHIVE_LINK_COLOR
|
|
851
|
+
|
|
852
|
+
table = build_table(
|
|
853
|
+
title or None,
|
|
854
|
+
cols=[col for col in EMAIL_TABLE_COLS if show_length or col['name'] not in ['Length']],
|
|
855
|
+
border_style=DEFAULT_TABLE_KWARGS['border_style'] if title else author_style,
|
|
856
|
+
header_style="bold",
|
|
857
|
+
highlight=True,
|
|
850
858
|
)
|
|
851
859
|
|
|
852
|
-
table.add_column('From', justify='left')
|
|
853
|
-
table.add_column('Timestamp', justify='center')
|
|
854
|
-
table.add_column('Subject', justify='left', style='honeydew2', min_width=70)
|
|
855
|
-
|
|
856
860
|
for email in emails:
|
|
857
|
-
|
|
861
|
+
fields = [
|
|
862
|
+
email.epstein_media_link(link_txt=email.timestamp_without_seconds(), style=link_style),
|
|
858
863
|
email.author_txt(),
|
|
859
|
-
email.
|
|
860
|
-
|
|
861
|
-
|
|
864
|
+
email.recipients_txt(max_full_names=1),
|
|
865
|
+
f"{email.length()}",
|
|
866
|
+
email.subject(),
|
|
867
|
+
]
|
|
868
|
+
|
|
869
|
+
if not show_length:
|
|
870
|
+
del fields[3]
|
|
871
|
+
|
|
872
|
+
table.add_row(*fields)
|
|
862
873
|
|
|
863
874
|
return table
|
|
864
875
|
|
|
@@ -11,19 +11,16 @@ from epstein_files.util.highlighted_group import get_style_for_name
|
|
|
11
11
|
from epstein_files.util.logging import logger
|
|
12
12
|
from epstein_files.util.rich import TEXT_LINK, highlighter
|
|
13
13
|
|
|
14
|
+
EPSTEIN_TEXTERS = ['e:', 'e:jeeitunes@gmail.com']
|
|
14
15
|
MSG_DATE_FORMAT = r"%m/%d/%y %I:%M:%S %p"
|
|
15
16
|
PHONE_NUMBER_REGEX = re.compile(r'^[\d+]+.*')
|
|
17
|
+
UNCERTAIN_SUFFIX = ' (?)'
|
|
16
18
|
|
|
17
19
|
DISPLAY_LAST_NAME_ONLY = [
|
|
18
20
|
JEFFREY_EPSTEIN,
|
|
19
21
|
STEVE_BANNON,
|
|
20
22
|
]
|
|
21
23
|
|
|
22
|
-
TEXTER_MAPPING = {
|
|
23
|
-
'e:': JEFFREY_EPSTEIN,
|
|
24
|
-
'e:jeeitunes@gmail.com': JEFFREY_EPSTEIN,
|
|
25
|
-
}
|
|
26
|
-
|
|
27
24
|
|
|
28
25
|
@dataclass(kw_only=True)
|
|
29
26
|
class TextMessage:
|
|
@@ -35,7 +32,7 @@ class TextMessage:
|
|
|
35
32
|
timestamp_str: str
|
|
36
33
|
|
|
37
34
|
def __post_init__(self):
|
|
38
|
-
self.author =
|
|
35
|
+
self.author = JEFFREY_EPSTEIN if self.author in EPSTEIN_TEXTERS else self.author
|
|
39
36
|
|
|
40
37
|
if not self.author:
|
|
41
38
|
self.author_str = UNKNOWN
|
|
@@ -45,7 +42,7 @@ class TextMessage:
|
|
|
45
42
|
self.author_str = self.author_str or self.author
|
|
46
43
|
|
|
47
44
|
if not self.is_id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
|
|
48
|
-
self.author_str +=
|
|
45
|
+
self.author_str += UNCERTAIN_SUFFIX
|
|
49
46
|
|
|
50
47
|
if self.is_link():
|
|
51
48
|
self.text = self.text.replace('\n', '').replace(' ', '_')
|
|
@@ -59,12 +56,11 @@ class TextMessage:
|
|
|
59
56
|
return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
|
|
60
57
|
|
|
61
58
|
def timestamp_txt(self) -> Text:
|
|
62
|
-
timestamp_str = self.timestamp_str
|
|
63
|
-
|
|
64
59
|
try:
|
|
65
60
|
timestamp_str = iso_timestamp(self.parse_timestamp())
|
|
66
61
|
except Exception as e:
|
|
67
62
|
logger.warning(f"Failed to parse timestamp for {self}")
|
|
63
|
+
timestamp_str = self.timestamp_str
|
|
68
64
|
|
|
69
65
|
return Text(f"[{timestamp_str}]", style=TIMESTAMP_DIM)
|
|
70
66
|
|
|
@@ -14,7 +14,7 @@ from epstein_files.util.constant.names import JEFFREY_EPSTEIN, UNKNOWN
|
|
|
14
14
|
from epstein_files.util.constant.strings import AUTHOR, TIMESTAMP_STYLE
|
|
15
15
|
from epstein_files.util.data import days_between, days_between_str, iso_timestamp, sort_dict
|
|
16
16
|
from epstein_files.util.doc_cfg import Metadata, TextCfg
|
|
17
|
-
from epstein_files.util.highlighted_group import get_style_for_name
|
|
17
|
+
from epstein_files.util.highlighted_group import get_style_for_name, styled_name
|
|
18
18
|
from epstein_files.util.logging import logger
|
|
19
19
|
from epstein_files.util.rich import LAST_TIMESTAMP_STYLE, build_table, highlighter
|
|
20
20
|
|
|
@@ -160,7 +160,7 @@ class MessengerLog(Communication):
|
|
|
160
160
|
last_at = logs[-1].first_message_at(name)
|
|
161
161
|
|
|
162
162
|
counts_table.add_row(
|
|
163
|
-
|
|
163
|
+
styled_name(name),
|
|
164
164
|
str(len(logs)),
|
|
165
165
|
f"{count:,}",
|
|
166
166
|
iso_timestamp(first_at),
|
epstein_files/epstein_files.py
CHANGED
|
@@ -30,7 +30,6 @@ from epstein_files.util.rich import (NA_TXT, add_cols_to_table, build_table, con
|
|
|
30
30
|
from epstein_files.util.search_result import SearchResult
|
|
31
31
|
from epstein_files.util.timer import Timer
|
|
32
32
|
|
|
33
|
-
EXCLUDED_EMAILERS = USELESS_EMAILERS + [JEFFREY_EPSTEIN]
|
|
34
33
|
DEVICE_SIGNATURE_SUBTITLE = f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown"
|
|
35
34
|
DEVICE_SIGNATURE = 'Device Signature'
|
|
36
35
|
DEVICE_SIGNATURE_PADDING = (1, 0)
|
|
@@ -116,9 +115,9 @@ class EpsteinFiles:
|
|
|
116
115
|
return self.imessage_logs + self.emails + self.other_files
|
|
117
116
|
|
|
118
117
|
def all_emailers(self, include_useless: bool = False) -> list[str | None]:
|
|
119
|
-
"""Returns all emailers
|
|
118
|
+
"""Returns all emailers USELESS_EMAILERS, sorted from least frequent to most."""
|
|
120
119
|
names = [a for a in self.email_author_counts.keys()] + [r for r in self.email_recipient_counts.keys()]
|
|
121
|
-
names = names if include_useless else [e for e in names if e not in
|
|
120
|
+
names = names if include_useless else [e for e in names if e not in USELESS_EMAILERS]
|
|
122
121
|
return sorted(list(set(names)), key=lambda e: self.email_author_counts[e] + self.email_recipient_counts[e])
|
|
123
122
|
|
|
124
123
|
def docs_matching(
|
|
@@ -167,7 +166,10 @@ class EpsteinFiles:
|
|
|
167
166
|
|
|
168
167
|
def emails_for(self, author: str | None) -> list[Email]:
|
|
169
168
|
"""Returns emails to or from a given 'author' sorted chronologically."""
|
|
170
|
-
|
|
169
|
+
if author == JEFFREY_EPSTEIN:
|
|
170
|
+
emails = [e for e in self.emails_by(JEFFREY_EPSTEIN) if e.is_note_to_self()]
|
|
171
|
+
else:
|
|
172
|
+
emails = self.emails_by(author) + self.emails_to(author)
|
|
171
173
|
|
|
172
174
|
if len(emails) == 0:
|
|
173
175
|
raise RuntimeError(f"No emails found for '{author}'")
|
|
@@ -182,7 +184,7 @@ class EpsteinFiles:
|
|
|
182
184
|
|
|
183
185
|
return Document.sort_by_timestamp(emails)
|
|
184
186
|
|
|
185
|
-
def
|
|
187
|
+
def for_ids(self, file_ids: str | list[str]) -> list[Document]:
|
|
186
188
|
file_ids = listify(file_ids)
|
|
187
189
|
docs = [doc for doc in self.all_documents() if doc.file_id in file_ids]
|
|
188
190
|
|
|
@@ -218,8 +220,8 @@ class EpsteinFiles:
|
|
|
218
220
|
return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
|
|
219
221
|
|
|
220
222
|
def print_files_summary(self) -> None:
|
|
221
|
-
table = build_table('
|
|
222
|
-
add_cols_to_table(table, ['File Type', '
|
|
223
|
+
table = build_table('File Overview')
|
|
224
|
+
add_cols_to_table(table, ['File Type', 'Count', 'Author Known', 'Author Unknown', 'Duplicates'])
|
|
223
225
|
table.columns[1].justify = 'right'
|
|
224
226
|
|
|
225
227
|
def add_row(label: str, docs: list):
|
|
@@ -247,13 +249,14 @@ class EpsteinFiles:
|
|
|
247
249
|
unique_emails = [email for email in emails if not email.is_duplicate()]
|
|
248
250
|
start_date = emails[0].timestamp.date()
|
|
249
251
|
author = _author or UNKNOWN
|
|
252
|
+
title = f"Found {len(unique_emails)} emails"
|
|
250
253
|
|
|
251
|
-
|
|
252
|
-
f"
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
)
|
|
254
|
+
if author == JEFFREY_EPSTEIN:
|
|
255
|
+
title += f" sent by {JEFFREY_EPSTEIN} to himself"
|
|
256
|
+
else:
|
|
257
|
+
title += f" to/from {author} starting {start_date} covering {num_days:,} days"
|
|
256
258
|
|
|
259
|
+
print_author_panel(title, get_info_for_name(author), get_style_for_name(author))
|
|
257
260
|
self.print_emails_table_for(_author)
|
|
258
261
|
last_printed_email_was_duplicate = False
|
|
259
262
|
|
|
@@ -272,11 +275,10 @@ class EpsteinFiles:
|
|
|
272
275
|
|
|
273
276
|
def print_emails_table_for(self, author: str | None) -> None:
|
|
274
277
|
emails = [email for email in self.emails_for(author) if not email.is_duplicate()] # Remove dupes
|
|
275
|
-
print_centered(Email.build_emails_table(emails, author))
|
|
276
|
-
console.line()
|
|
278
|
+
print_centered(Padding(Email.build_emails_table(emails, author), (0, 5, 1, 5)))
|
|
277
279
|
|
|
278
280
|
def print_email_device_info(self) -> None:
|
|
279
|
-
print_subtitle_panel(DEVICE_SIGNATURE_SUBTITLE
|
|
281
|
+
print_subtitle_panel(DEVICE_SIGNATURE_SUBTITLE)
|
|
280
282
|
console.print(_build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
|
|
281
283
|
console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
|
|
282
284
|
|
|
@@ -178,6 +178,7 @@ HENRY_HOLT = 'Henry Holt' # Actually a company?
|
|
|
178
178
|
IVANKA = 'Ivanka'
|
|
179
179
|
JAMES_PATTERSON = 'James Patterson'
|
|
180
180
|
JARED_KUSHNER = 'Jared Kushner'
|
|
181
|
+
JEFFREY_WERNICK = 'Jeffrey Wernick'
|
|
181
182
|
JULIE_K_BROWN = 'Julie K. Brown'
|
|
182
183
|
KARIM_SADJADPOUR = 'KARIM SADJADPOUR'.title()
|
|
183
184
|
MICHAEL_J_BOCCIO = 'Michael J. Boccio'
|
|
@@ -207,53 +208,53 @@ TRUMP_ORG = 'Trump Organization'
|
|
|
207
208
|
UBS = 'UBS'
|
|
208
209
|
|
|
209
210
|
# First and last names that should be made part of a highlighting regex for emailers
|
|
210
|
-
NAMES_TO_NOT_HIGHLIGHT
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
211
|
+
NAMES_TO_NOT_HIGHLIGHT = """
|
|
212
|
+
al alan alfredo allen alex alexander amanda andres andrew
|
|
213
|
+
bard barrett barry bill black boris brad bruce
|
|
214
|
+
carolyn chris christina
|
|
215
|
+
dan daniel danny darren dave david donald
|
|
216
|
+
ed edward edwards enterprise enterprises entourage epstein eric erika etienne
|
|
217
|
+
faith fred friendly frost fuller
|
|
218
|
+
gerald george gold
|
|
219
|
+
harry hay heather henry hill hoffman
|
|
220
|
+
ian
|
|
221
|
+
jack james jay jean jeff jeffrey jennifer jeremy jessica joel john jon jonathan joseph jr
|
|
222
|
+
kahn karl kate katherine kelly ken kevin
|
|
223
|
+
larry laurie lawrence leon lesley linda link lisa
|
|
224
|
+
mann marc marie mark martin melanie michael mike miller mitchell miles morris moskowitz
|
|
225
|
+
nancy neal new nicole
|
|
226
|
+
owen
|
|
227
|
+
paul paula pen peter philip prince
|
|
228
|
+
randall reid richard robert rodriguez roger rosenberg ross roth roy rubin
|
|
229
|
+
scott sean skip stanley stern stephen steve steven stone susan
|
|
230
|
+
the thomas tim tom tony tyler
|
|
231
|
+
victor
|
|
232
|
+
wade waters
|
|
233
|
+
y
|
|
234
|
+
""".strip().split()
|
|
233
235
|
|
|
234
236
|
# Names to color white in the word counts
|
|
235
237
|
OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
|
|
236
238
|
aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
|
|
237
239
|
baldwin barack barrett ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
|
|
238
240
|
chapman charles charlie christopher clint cohen colin collins conway
|
|
239
|
-
|
|
240
|
-
edmond elizabeth emily
|
|
241
|
-
ferguson flachsbart francis franco frank
|
|
242
|
-
gardner gary geoff geoffrey
|
|
243
|
-
hancock harold harrison
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
241
|
+
davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
|
|
242
|
+
edmond elizabeth emily entwistle erik evelyn
|
|
243
|
+
ferguson flachsbart francis franco frank
|
|
244
|
+
gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
|
|
245
|
+
hancock harold harrison helen hirsch hofstadter horowitz hussein
|
|
246
|
+
isaac isaacson
|
|
247
|
+
jamie jane janet jason jeffrey jen jim joe johnson jones josh julie justin
|
|
248
|
+
kathy kim kruger kyle
|
|
249
|
+
lawrence leo leonard lenny leslie lieberman louis lynch lynn
|
|
248
250
|
marcus marianne matt matthew melissa michele michelle moore moscowitz
|
|
249
|
-
nancy
|
|
250
|
-
|
|
251
|
-
paulson peter philippe
|
|
251
|
+
nancy nussbaum
|
|
252
|
+
paulson philippe
|
|
252
253
|
rafael ray richard richardson rob robert robin ron rubin rudolph ryan
|
|
253
|
-
sara sarah
|
|
254
|
-
ted theresa thompson tiffany timothy
|
|
254
|
+
sara sarah seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
|
|
255
|
+
ted theresa thompson tiffany timothy
|
|
255
256
|
valeria
|
|
256
|
-
walter warren
|
|
257
|
+
walter warren weinstein weiss william
|
|
257
258
|
zach zack
|
|
258
259
|
""".strip().split()
|
|
259
260
|
|