epstein-files 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +31 -18
- epstein_files/documents/communication.py +9 -5
- epstein_files/documents/document.py +225 -136
- epstein_files/documents/doj_file.py +242 -0
- epstein_files/documents/doj_files/full_text.py +166 -0
- epstein_files/documents/email.py +138 -163
- epstein_files/documents/emails/email_header.py +21 -11
- epstein_files/documents/emails/emailers.py +223 -0
- epstein_files/documents/imessage/text_message.py +2 -3
- epstein_files/documents/json_file.py +18 -14
- epstein_files/documents/messenger_log.py +23 -39
- epstein_files/documents/other_file.py +48 -44
- epstein_files/epstein_files.py +54 -33
- epstein_files/person.py +142 -110
- epstein_files/util/constant/names.py +29 -6
- epstein_files/util/constant/output_files.py +2 -0
- epstein_files/util/constant/strings.py +12 -6
- epstein_files/util/constant/urls.py +17 -0
- epstein_files/util/constants.py +101 -174
- epstein_files/util/data.py +2 -0
- epstein_files/util/doc_cfg.py +20 -15
- epstein_files/util/env.py +24 -16
- epstein_files/util/file_helper.py +28 -6
- epstein_files/util/helpers/debugging_helper.py +13 -0
- epstein_files/util/helpers/env_helpers.py +21 -0
- epstein_files/util/highlighted_group.py +57 -16
- epstein_files/util/layout/left_bar_panel.py +26 -0
- epstein_files/util/logging.py +28 -13
- epstein_files/util/output.py +33 -10
- epstein_files/util/rich.py +28 -2
- epstein_files/util/word_count.py +7 -7
- {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/METADATA +14 -1
- epstein_files-1.5.0.dist-info/RECORD +40 -0
- epstein_files-1.4.1.dist-info/RECORD +0 -34
- {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/LICENSE +0 -0
- {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/WHEEL +0 -0
- {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/entry_points.txt +0 -0
epstein_files/epstein_files.py
CHANGED
|
@@ -12,7 +12,8 @@ from typing import Sequence, Type, cast
|
|
|
12
12
|
from rich.table import Table
|
|
13
13
|
|
|
14
14
|
from epstein_files.documents.document import Document
|
|
15
|
-
from epstein_files.documents.
|
|
15
|
+
from epstein_files.documents.doj_file import DojFile
|
|
16
|
+
from epstein_files.documents.email import Email
|
|
16
17
|
from epstein_files.documents.json_file import JsonFile
|
|
17
18
|
from epstein_files.documents.messenger_log import MSG_REGEX, MessengerLog
|
|
18
19
|
from epstein_files.documents.other_file import OtherFile
|
|
@@ -21,7 +22,7 @@ from epstein_files.util.constant.strings import *
|
|
|
21
22
|
from epstein_files.util.constants import *
|
|
22
23
|
from epstein_files.util.data import flatten, json_safe, listify, uniquify
|
|
23
24
|
from epstein_files.util.doc_cfg import EmailCfg, Metadata
|
|
24
|
-
from epstein_files.util.env import DOCS_DIR, args, logger
|
|
25
|
+
from epstein_files.util.env import DOCS_DIR, DOJ_PDFS_20260130_DIR, args, logger
|
|
25
26
|
from epstein_files.util.file_helper import file_size_str
|
|
26
27
|
from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames
|
|
27
28
|
from epstein_files.util.search_result import SearchResult
|
|
@@ -49,14 +50,28 @@ class EpsteinFiles:
|
|
|
49
50
|
imessage_logs: list[MessengerLog] = field(default_factory=list)
|
|
50
51
|
json_files: list[JsonFile] = field(default_factory=list)
|
|
51
52
|
other_files: list[OtherFile] = field(default_factory=list)
|
|
53
|
+
doj_files: list[DojFile] = field(default_factory=list)
|
|
52
54
|
timer: Timer = field(default_factory=lambda: Timer())
|
|
53
55
|
uninteresting_ccs: list[Name] = field(default_factory=list)
|
|
54
56
|
|
|
57
|
+
@property
|
|
58
|
+
def all_documents(self) -> Sequence[Document]:
|
|
59
|
+
return self.imessage_logs + self.emails + self.other_files + self.doj_files
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def all_doj_files(self) -> Sequence[DojFile | Email]:
|
|
63
|
+
"""All files with the filename EFTAXXXXXX."""
|
|
64
|
+
return [doc for doc in self.all_documents if doc.is_doj_file]
|
|
65
|
+
|
|
55
66
|
def __post_init__(self):
|
|
56
67
|
"""Iterate through files and build appropriate objects."""
|
|
57
68
|
self.all_files = sorted([f for f in DOCS_DIR.iterdir() if f.is_file() and not f.name.startswith('.')])
|
|
58
|
-
|
|
59
|
-
|
|
69
|
+
|
|
70
|
+
if DOJ_PDFS_20260130_DIR:
|
|
71
|
+
self.all_files += sorted([f for f in DOJ_PDFS_20260130_DIR.glob('**/*.txt')])
|
|
72
|
+
|
|
73
|
+
docs = []
|
|
74
|
+
file_type_count = defaultdict(int) # Hack used by --skip-other-files option to get a few files parsed before skipping the rest
|
|
60
75
|
|
|
61
76
|
# Read through and classify all the files
|
|
62
77
|
for file_arg in self.all_files:
|
|
@@ -64,24 +79,25 @@ class EpsteinFiles:
|
|
|
64
79
|
document = Document(file_arg)
|
|
65
80
|
cls = document_cls(document)
|
|
66
81
|
|
|
67
|
-
if document.length
|
|
82
|
+
if document.length == 0:
|
|
68
83
|
logger.warning(f"Skipping empty file: {document}]")
|
|
69
84
|
continue
|
|
70
85
|
elif args.skip_other_files and cls == OtherFile and file_type_count[cls.__name__] > 1:
|
|
71
86
|
document.log(f"Skipping OtherFile...")
|
|
72
87
|
continue
|
|
73
88
|
|
|
74
|
-
|
|
75
|
-
logger.info(str(
|
|
89
|
+
docs.append(cls(file_arg, lines=document.lines, text=document.text).printable_document())
|
|
90
|
+
logger.info(str(docs[-1]))
|
|
76
91
|
file_type_count[cls.__name__] += 1
|
|
77
92
|
|
|
78
93
|
if doc_timer.seconds_since_start() > SLOW_FILE_SECONDS:
|
|
79
|
-
doc_timer.print_at_checkpoint(f"Slow file: {
|
|
94
|
+
doc_timer.print_at_checkpoint(f"Slow file: {docs[-1]} processed")
|
|
80
95
|
|
|
81
|
-
self.
|
|
82
|
-
self.
|
|
83
|
-
self.
|
|
84
|
-
self.json_files = [
|
|
96
|
+
self.doj_files = Document.sort_by_timestamp([d for d in docs if isinstance(d, DojFile)])
|
|
97
|
+
self.emails = Document.sort_by_timestamp([d for d in docs if isinstance(d, Email)])
|
|
98
|
+
self.imessage_logs = Document.sort_by_timestamp([d for d in docs if isinstance(d, MessengerLog)])
|
|
99
|
+
self.json_files = Document.sort_by_timestamp([d for d in docs if isinstance(d, JsonFile)])
|
|
100
|
+
self.other_files = Document.sort_by_timestamp([d for d in docs if isinstance(d, OtherFile) and not isinstance(d, DojFile)])
|
|
85
101
|
self._set_uninteresting_ccs()
|
|
86
102
|
self._copy_duplicate_email_properties()
|
|
87
103
|
self._find_email_attachments_and_set_is_first_for_user()
|
|
@@ -111,14 +127,11 @@ class EpsteinFiles:
|
|
|
111
127
|
timer.print_at_checkpoint(f'Processed {len(epstein_files.all_files):,} documents')
|
|
112
128
|
return epstein_files
|
|
113
129
|
|
|
114
|
-
def all_documents(self) -> Sequence[Document]:
|
|
115
|
-
return self.imessage_logs + self.emails + self.other_files
|
|
116
|
-
|
|
117
130
|
def docs_matching(self, pattern: re.Pattern | str, names: list[Name] | None = None) -> list[SearchResult]:
|
|
118
131
|
"""Find documents whose text matches a pattern (file_type and names args limit the documents searched)."""
|
|
119
132
|
results: list[SearchResult] = []
|
|
120
133
|
|
|
121
|
-
for doc in self.all_documents
|
|
134
|
+
for doc in self.all_documents:
|
|
122
135
|
if names and doc.author not in names:
|
|
123
136
|
continue
|
|
124
137
|
|
|
@@ -140,15 +153,15 @@ class EpsteinFiles:
|
|
|
140
153
|
|
|
141
154
|
def email_author_counts(self) -> dict[Name, int]:
|
|
142
155
|
return {
|
|
143
|
-
person.name: len(person.unique_emails_by
|
|
144
|
-
for person in self.emailers() if len(person.unique_emails_by
|
|
156
|
+
person.name: len(person.unique_emails_by)
|
|
157
|
+
for person in self.emailers() if len(person.unique_emails_by) > 0
|
|
145
158
|
}
|
|
146
159
|
|
|
147
160
|
def email_authors_to_device_signatures(self) -> dict[str, set[str]]:
|
|
148
161
|
signatures = defaultdict(set)
|
|
149
162
|
|
|
150
163
|
for email in [e for e in self.non_duplicate_emails() if e.sent_from_device]:
|
|
151
|
-
signatures[email.author_or_unknown
|
|
164
|
+
signatures[email.author_or_unknown].add(email.sent_from_device)
|
|
152
165
|
|
|
153
166
|
return signatures
|
|
154
167
|
|
|
@@ -156,14 +169,14 @@ class EpsteinFiles:
|
|
|
156
169
|
signatures = defaultdict(set)
|
|
157
170
|
|
|
158
171
|
for email in [e for e in self.non_duplicate_emails() if e.sent_from_device]:
|
|
159
|
-
signatures[email.sent_from_device].add(email.author_or_unknown
|
|
172
|
+
signatures[email.sent_from_device].add(email.author_or_unknown)
|
|
160
173
|
|
|
161
174
|
return signatures
|
|
162
175
|
|
|
163
176
|
def email_recipient_counts(self) -> dict[Name, int]:
|
|
164
177
|
return {
|
|
165
|
-
person.name: len(person.unique_emails_to
|
|
166
|
-
for person in self.emailers() if len(person.unique_emails_to
|
|
178
|
+
person.name: len(person.unique_emails_to)
|
|
179
|
+
for person in self.emailers() if len(person.unique_emails_to) > 0
|
|
167
180
|
}
|
|
168
181
|
|
|
169
182
|
def email_signature_substitution_counts(self) -> dict[str, int]:
|
|
@@ -212,7 +225,7 @@ class EpsteinFiles:
|
|
|
212
225
|
|
|
213
226
|
def for_ids(self, file_ids: str | list[str]) -> list[Document]:
|
|
214
227
|
file_ids = listify(file_ids)
|
|
215
|
-
docs = [doc for doc in self.all_documents
|
|
228
|
+
docs = [doc for doc in (list(self.all_documents) + self.doj_files) if doc.file_id in file_ids]
|
|
216
229
|
|
|
217
230
|
if len(docs) != len(file_ids):
|
|
218
231
|
logger.warning(f"{len(file_ids)} file IDs provided but only {len(docs)} Epstein files found!")
|
|
@@ -281,11 +294,17 @@ class EpsteinFiles:
|
|
|
281
294
|
return self._uninteresting_emailers
|
|
282
295
|
|
|
283
296
|
def _find_email_attachments_and_set_is_first_for_user(self) -> None:
|
|
284
|
-
for
|
|
285
|
-
if
|
|
286
|
-
email = self.email_for_id(
|
|
287
|
-
|
|
288
|
-
|
|
297
|
+
for other_file in self.other_files:
|
|
298
|
+
if other_file.config and other_file.config.attached_to_email_id:
|
|
299
|
+
email = self.email_for_id(other_file.config.attached_to_email_id)
|
|
300
|
+
email.attached_docs.append(other_file)
|
|
301
|
+
|
|
302
|
+
if other_file.timestamp \
|
|
303
|
+
and other_file.timestamp != email.timestamp \
|
|
304
|
+
and not other_file.config_timestamp:
|
|
305
|
+
other_file.warn(f"Overwriting '{other_file.timestamp}' with {email}'s timestamp {email.timestamp}")
|
|
306
|
+
|
|
307
|
+
other_file.timestamp = email.timestamp
|
|
289
308
|
|
|
290
309
|
for emailer in self.emailers():
|
|
291
310
|
first_email = emailer.emails[0]
|
|
@@ -294,10 +313,10 @@ class EpsteinFiles:
|
|
|
294
313
|
def _copy_duplicate_email_properties(self) -> None:
|
|
295
314
|
"""Ensure dupe emails have the properties of the emails they duplicate to capture any repairs, config etc."""
|
|
296
315
|
for email in self.emails:
|
|
297
|
-
if not email.is_duplicate
|
|
316
|
+
if not email.is_duplicate:
|
|
298
317
|
continue
|
|
299
318
|
|
|
300
|
-
original = self.email_for_id(email.duplicate_of_id
|
|
319
|
+
original = self.email_for_id(email.duplicate_of_id)
|
|
301
320
|
|
|
302
321
|
for field_name in DUPLICATE_PROPS_TO_COPY:
|
|
303
322
|
original_prop = getattr(original, field_name)
|
|
@@ -336,11 +355,13 @@ def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
|
|
|
336
355
|
def document_cls(doc: Document) -> Type[Document]:
|
|
337
356
|
search_area = doc.text[0:5000] # Limit search area to avoid pointless scans of huge files
|
|
338
357
|
|
|
339
|
-
if doc.length
|
|
358
|
+
if doc.length == 0:
|
|
340
359
|
return Document
|
|
360
|
+
elif doc.is_doj_file:
|
|
361
|
+
return DojFile
|
|
341
362
|
if doc.text[0] == '{':
|
|
342
363
|
return JsonFile
|
|
343
|
-
elif
|
|
364
|
+
elif Document.is_email(doc): # TODO: right now we setup the DojFile which makes an Email obj only later at print time
|
|
344
365
|
return Email
|
|
345
366
|
elif MSG_REGEX.search(search_area):
|
|
346
367
|
return MessengerLog
|
|
@@ -349,4 +370,4 @@ def document_cls(doc: Document) -> Type[Document]:
|
|
|
349
370
|
|
|
350
371
|
|
|
351
372
|
def _sorted_metadata(docs: Sequence[Document]) -> list[Metadata]:
|
|
352
|
-
return [json_safe(d.metadata
|
|
373
|
+
return [json_safe(d.metadata) for d in Document.sort_by_id(docs)]
|
epstein_files/person.py
CHANGED
|
@@ -19,7 +19,8 @@ from epstein_files.util.data import days_between, flatten, uniquify, without_fal
|
|
|
19
19
|
from epstein_files.util.env import args
|
|
20
20
|
from epstein_files.util.highlighted_group import (QUESTION_MARKS_TXT, HighlightedNames,
|
|
21
21
|
get_highlight_group_for_name, get_style_for_name, styled_category, styled_name)
|
|
22
|
-
from epstein_files.util.rich import GREY_NUMBERS, TABLE_TITLE_STYLE, build_table,
|
|
22
|
+
from epstein_files.util.rich import (GREY_NUMBERS, SKIPPED_FILE_MSG_PADDING, TABLE_TITLE_STYLE, build_table,
|
|
23
|
+
console, join_texts, print_centered)
|
|
23
24
|
|
|
24
25
|
ALT_INFO_STYLE = 'medium_purple4'
|
|
25
26
|
CC = 'cc:'
|
|
@@ -48,8 +49,9 @@ class Person:
|
|
|
48
49
|
self.emails = Document.sort_by_timestamp(self.emails)
|
|
49
50
|
self.imessage_logs = Document.sort_by_timestamp(self.imessage_logs)
|
|
50
51
|
|
|
52
|
+
@property
|
|
51
53
|
def category(self) -> str | None:
|
|
52
|
-
highlight_group = self.highlight_group
|
|
54
|
+
highlight_group = self.highlight_group
|
|
53
55
|
|
|
54
56
|
if highlight_group and isinstance(highlight_group, HighlightedNames):
|
|
55
57
|
category = highlight_group.category or highlight_group.label
|
|
@@ -57,60 +59,63 @@ class Person:
|
|
|
57
59
|
if category != self.name and category != 'paula': # TODO: this sucks
|
|
58
60
|
return category
|
|
59
61
|
|
|
62
|
+
@property
|
|
60
63
|
def category_txt(self) -> Text | None:
|
|
61
64
|
if self.name is None:
|
|
62
65
|
return None
|
|
63
|
-
elif self.category
|
|
64
|
-
return styled_category(self.category
|
|
65
|
-
elif self.is_a_mystery
|
|
66
|
+
elif self.category:
|
|
67
|
+
return styled_category(self.category)
|
|
68
|
+
elif self.is_a_mystery or self.is_uninteresting:
|
|
66
69
|
return QUESTION_MARKS_TXT
|
|
67
70
|
|
|
71
|
+
@property
|
|
68
72
|
def email_conversation_length_in_days(self) -> int:
|
|
69
73
|
return days_between(self.emails[0].timestamp, self.emails[-1].timestamp)
|
|
70
74
|
|
|
75
|
+
@property
|
|
71
76
|
def earliest_email_at(self) -> datetime:
|
|
72
77
|
return self.emails[0].timestamp
|
|
73
78
|
|
|
79
|
+
@property
|
|
74
80
|
def earliest_email_date(self) -> date:
|
|
75
|
-
return self.earliest_email_at
|
|
81
|
+
return self.earliest_email_at.date()
|
|
76
82
|
|
|
83
|
+
@property
|
|
77
84
|
def last_email_at(self) -> datetime:
|
|
78
85
|
return self.emails[-1].timestamp
|
|
79
86
|
|
|
87
|
+
@property
|
|
80
88
|
def last_email_date(self) -> date:
|
|
81
|
-
return self.last_email_at
|
|
89
|
+
return self.last_email_at.date()
|
|
82
90
|
|
|
91
|
+
@property
|
|
83
92
|
def emails_by(self) -> list[Email]:
|
|
84
93
|
return [e for e in self.emails if self.name == e.author]
|
|
85
94
|
|
|
95
|
+
@property
|
|
86
96
|
def emails_to(self) -> list[Email]:
|
|
87
97
|
return [
|
|
88
98
|
e for e in self.emails
|
|
89
99
|
if self.name in e.recipients or (self.name is None and len(e.recipients) == 0)
|
|
90
100
|
]
|
|
91
101
|
|
|
92
|
-
|
|
93
|
-
return PERSON_LINK_BUILDERS[site](self.name_str())
|
|
94
|
-
|
|
95
|
-
def external_link_txt(self, site: ExternalSite = EPSTEINIFY, link_str: str | None = None) -> Text:
|
|
96
|
-
if self.name is None:
|
|
97
|
-
return Text('')
|
|
98
|
-
|
|
99
|
-
return link_text_obj(self.external_link(site), link_str or site, style=self.style())
|
|
100
|
-
|
|
102
|
+
@property
|
|
101
103
|
def external_links_line(self) -> Text:
|
|
102
104
|
links = [self.external_link_txt(site) for site in PERSON_LINK_BUILDERS]
|
|
103
105
|
return Text('', justify='center', style='dim').append(join_texts(links, join=' / ')) #, encloser='()'))#, encloser='‹›'))
|
|
104
106
|
|
|
107
|
+
@property
|
|
105
108
|
def has_any_epstein_emails(self) -> bool:
|
|
106
109
|
contacts = [e.author for e in self.emails] + flatten([e.recipients for e in self.emails])
|
|
107
110
|
return JEFFREY_EPSTEIN in contacts
|
|
108
111
|
|
|
112
|
+
@property
|
|
109
113
|
def highlight_group(self) -> HighlightedNames | None:
|
|
110
114
|
return get_highlight_group_for_name(self.name)
|
|
111
115
|
|
|
116
|
+
@property
|
|
112
117
|
def info_panel(self) -> Padding:
|
|
113
|
-
"""
|
|
118
|
+
"""Return a `Panel` with the name of an emailer and a few tidbits of information about them."""
|
|
114
119
|
style = 'white' if (not self.style() or self.style() == DEFAULT) else self.style()
|
|
115
120
|
panel_style = f"black on {style} bold"
|
|
116
121
|
|
|
@@ -118,22 +123,23 @@ class Person:
|
|
|
118
123
|
email_count = len(self._printable_emails())
|
|
119
124
|
title_suffix = f"sent by {JEFFREY_EPSTEIN} to himself"
|
|
120
125
|
else:
|
|
121
|
-
email_count = len(self.unique_emails
|
|
122
|
-
num_days = self.email_conversation_length_in_days
|
|
123
|
-
title_suffix = f"{TO_FROM} {self.name_str
|
|
126
|
+
email_count = len(self.unique_emails)
|
|
127
|
+
num_days = self.email_conversation_length_in_days
|
|
128
|
+
title_suffix = f"{TO_FROM} {self.name_str} starting {self.earliest_email_date} covering {num_days:,} days"
|
|
124
129
|
|
|
125
130
|
title = f"Found {email_count} emails {title_suffix}"
|
|
126
|
-
width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category
|
|
131
|
+
width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category) + 8)
|
|
127
132
|
panel = Panel(Text(title, justify='center'), width=width, style=panel_style)
|
|
128
133
|
elements: list[RenderableType] = [panel]
|
|
129
134
|
|
|
130
|
-
if self.info_with_category
|
|
131
|
-
elements.append(Text(f"({self.info_with_category
|
|
135
|
+
if self.info_with_category:
|
|
136
|
+
elements.append(Text(f"({self.info_with_category})", justify='center', style=f"{style} italic"))
|
|
132
137
|
|
|
133
138
|
return Padding(Group(*elements), (2, 0, 1, 0))
|
|
134
139
|
|
|
140
|
+
@property
|
|
135
141
|
def info_str(self) -> str | None:
|
|
136
|
-
highlight_group = self.highlight_group
|
|
142
|
+
highlight_group = self.highlight_group
|
|
137
143
|
|
|
138
144
|
if highlight_group and isinstance(highlight_group, HighlightedNames) and self.name:
|
|
139
145
|
info = highlight_group.info_for(self.name)
|
|
@@ -141,111 +147,161 @@ class Person:
|
|
|
141
147
|
if info:
|
|
142
148
|
return info
|
|
143
149
|
|
|
144
|
-
if self.is_uninteresting and len(self.emails_by
|
|
145
|
-
if self.has_any_epstein_emails
|
|
150
|
+
if self.is_uninteresting and len(self.emails_by) == 0:
|
|
151
|
+
if self.has_any_epstein_emails:
|
|
146
152
|
return UNINTERESTING_CC_INFO
|
|
147
153
|
else:
|
|
148
154
|
return UNINTERESTING_CC_INFO_NO_CONTACT
|
|
149
155
|
|
|
150
|
-
|
|
151
|
-
return ', '.join(without_falsey([self.category(), self.info_str()]))
|
|
152
|
-
|
|
156
|
+
@property
|
|
153
157
|
def info_txt(self) -> Text | None:
|
|
154
158
|
if self.name == JEFFREY_EPSTEIN:
|
|
155
159
|
return Text('(emails sent by Epstein to himself are here)', style=ALT_INFO_STYLE)
|
|
156
160
|
elif self.name is None:
|
|
157
161
|
return Text('(emails whose author or recipient could not be determined)', style=ALT_INFO_STYLE)
|
|
158
|
-
elif self.category
|
|
162
|
+
elif self.category == JUNK:
|
|
159
163
|
return Text(f"({JUNK} mail)", style='bright_black dim')
|
|
160
|
-
elif self.is_uninteresting and (self.info_str
|
|
161
|
-
if self.sole_cc
|
|
162
|
-
return Text(f"(cc: from {self.sole_cc
|
|
163
|
-
elif self.info_str
|
|
164
|
-
return Text(f"({self.info_str
|
|
164
|
+
elif self.is_uninteresting and (self.info_str or '').startswith(UNINTERESTING_CC_INFO):
|
|
165
|
+
if self.sole_cc:
|
|
166
|
+
return Text(f"(cc: from {self.sole_cc} only)", style='wheat4 dim')
|
|
167
|
+
elif self.info_str == UNINTERESTING_CC_INFO:
|
|
168
|
+
return Text(f"({self.info_str})", style='wheat4 dim')
|
|
165
169
|
else:
|
|
166
|
-
return Text(f"({self.info_str
|
|
167
|
-
elif self.is_a_mystery
|
|
170
|
+
return Text(f"({self.info_str})", style='plum4 dim')
|
|
171
|
+
elif self.is_a_mystery:
|
|
168
172
|
return Text(QUESTION_MARKS, style='honeydew2 bold')
|
|
169
|
-
elif self.info_str
|
|
173
|
+
elif self.info_str is None:
|
|
170
174
|
if self.name in MAILING_LISTS:
|
|
171
175
|
return Text('(mailing list)', style=f"pale_turquoise4 dim")
|
|
172
|
-
elif self.category
|
|
176
|
+
elif self.category:
|
|
173
177
|
return Text(QUESTION_MARKS, style=self.style())
|
|
174
178
|
else:
|
|
175
179
|
return None
|
|
176
180
|
else:
|
|
177
|
-
return Text(self.info_str
|
|
181
|
+
return Text(self.info_str, style=self.style(allow_bold=False))
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def info_with_category(self) -> str:
|
|
185
|
+
return ', '.join(without_falsey([self.category, self.info_str]))
|
|
178
186
|
|
|
187
|
+
@property
|
|
179
188
|
def internal_link(self) -> Text:
|
|
180
189
|
"""Kind of like an anchor link to the section of the page containing these emails."""
|
|
181
|
-
return link_text_obj(internal_link_to_emails(self.name_str
|
|
190
|
+
return link_text_obj(internal_link_to_emails(self.name_str), self.name_str, style=self.style())
|
|
182
191
|
|
|
192
|
+
@property
|
|
183
193
|
def is_a_mystery(self) -> bool:
|
|
184
194
|
"""Return True if this is someone we theroetically could know more about."""
|
|
185
|
-
return self.is_unstyled
|
|
186
|
-
|
|
187
|
-
def sole_cc(self) -> str | None:
|
|
188
|
-
"""Return name if this person sent 0 emails and received CC from only one that name."""
|
|
189
|
-
email_authors = uniquify([e.author for e in self.emails_to()])
|
|
190
|
-
|
|
191
|
-
if len(self.unique_emails()) == 1 and len(email_authors) > 0:
|
|
192
|
-
logger.info(f"sole author of email to '{self.name}' is '{email_authors[0]}'")
|
|
193
|
-
else:
|
|
194
|
-
logger.info(f"'{self.name}' email_authors '{email_authors[0]}'")
|
|
195
|
-
|
|
196
|
-
if len(self.unique_emails_by()) > 0:
|
|
197
|
-
return None
|
|
198
|
-
|
|
199
|
-
if len(email_authors) == 1:
|
|
200
|
-
return email_authors[0]
|
|
195
|
+
return self.is_unstyled and not (self.is_email_address or self.info_str or self.is_uninteresting)
|
|
201
196
|
|
|
197
|
+
@property
|
|
202
198
|
def is_email_address(self) -> bool:
|
|
203
199
|
return '@' in (self.name or '')
|
|
204
200
|
|
|
201
|
+
@property
|
|
205
202
|
def is_linkable(self) -> bool:
|
|
206
203
|
"""Return True if it's likely that EpsteinWeb has a page for this name."""
|
|
207
204
|
if self.name is None or ' ' not in self.name:
|
|
208
205
|
return False
|
|
209
|
-
elif self.is_email_address
|
|
206
|
+
elif self.is_email_address or '/' in self.name or QUESTION_MARKS in self.name:
|
|
210
207
|
return False
|
|
211
208
|
elif self.name in INVALID_FOR_EPSTEIN_WEB:
|
|
212
209
|
return False
|
|
213
210
|
|
|
214
211
|
return True
|
|
215
212
|
|
|
216
|
-
|
|
217
|
-
"""True if we want to truncate all emails to/from this user."""
|
|
218
|
-
return self.name in TRUNCATE_EMAILS_FROM or self.is_uninteresting
|
|
219
|
-
|
|
213
|
+
@property
|
|
220
214
|
def is_unstyled(self) -> bool:
|
|
221
215
|
"""True if there's no highlight group for this name."""
|
|
222
216
|
return self.style() == DEFAULT_NAME_STYLE
|
|
223
217
|
|
|
224
|
-
|
|
225
|
-
return self.name or UNKNOWN
|
|
226
|
-
|
|
218
|
+
@property
|
|
227
219
|
def name_link(self) -> Text:
|
|
228
220
|
"""Will only link if it's worth linking, otherwise just a Text object."""
|
|
229
|
-
if not self.is_linkable
|
|
230
|
-
return self.name_txt
|
|
221
|
+
if not self.is_linkable:
|
|
222
|
+
return self.name_txt
|
|
231
223
|
else:
|
|
232
|
-
return Text.from_markup(link_markup(self.external_link(), self.name_str
|
|
224
|
+
return Text.from_markup(link_markup(self.external_link(), self.name_str, self.style()))
|
|
225
|
+
|
|
226
|
+
@property
|
|
227
|
+
def name_str(self) -> str:
|
|
228
|
+
return self.name or UNKNOWN
|
|
233
229
|
|
|
230
|
+
@property
|
|
234
231
|
def name_txt(self) -> Text:
|
|
235
232
|
return styled_name(self.name)
|
|
236
233
|
|
|
234
|
+
@property # TODO: unused?
|
|
235
|
+
def should_always_truncate(self) -> bool:
|
|
236
|
+
"""True if we want to truncate all emails to/from this user."""
|
|
237
|
+
return self.name in TRUNCATE_EMAILS_FROM or self.is_uninteresting
|
|
238
|
+
|
|
239
|
+
@property
|
|
240
|
+
def sole_cc(self) -> str | None:
|
|
241
|
+
"""Return name if this person sent 0 emails and received CC from only one that name."""
|
|
242
|
+
email_authors = uniquify([e.author for e in self.emails_to])
|
|
243
|
+
|
|
244
|
+
if len(self.unique_emails) == 1 and len(email_authors) > 0:
|
|
245
|
+
logger.info(f"sole author of email to '{self.name}' is '{email_authors[0]}'")
|
|
246
|
+
else:
|
|
247
|
+
logger.info(f"'{self.name}' email_authors '{email_authors[0]}'")
|
|
248
|
+
|
|
249
|
+
if len(self.unique_emails_by) > 0:
|
|
250
|
+
return None
|
|
251
|
+
|
|
252
|
+
if len(email_authors) == 1:
|
|
253
|
+
return email_authors[0]
|
|
254
|
+
|
|
255
|
+
@property
|
|
256
|
+
def sort_key(self) -> list[int | str]:
|
|
257
|
+
"""Key used to sort `Person` objects by the number of emails sent/received."""
|
|
258
|
+
counts = [
|
|
259
|
+
len(self.unique_emails),
|
|
260
|
+
-1 * int((self.info_str or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
|
|
261
|
+
-1 * int((self.info_str or '') == UNINTERESTING_CC_INFO),
|
|
262
|
+
int(self.has_any_epstein_emails),
|
|
263
|
+
]
|
|
264
|
+
|
|
265
|
+
counts = [-1 * count for count in counts]
|
|
266
|
+
|
|
267
|
+
if args.sort_alphabetical:
|
|
268
|
+
return [self.name_str] + counts
|
|
269
|
+
else:
|
|
270
|
+
return counts + [self.name_str]
|
|
271
|
+
|
|
272
|
+
@property
|
|
273
|
+
def unique_emails(self) -> Sequence[Email]:
|
|
274
|
+
return Document.without_dupes(self.emails)
|
|
275
|
+
|
|
276
|
+
@property
|
|
277
|
+
def unique_emails_by(self) -> list[Email]:
|
|
278
|
+
return Document.without_dupes(self.emails_by)
|
|
279
|
+
|
|
280
|
+
@property
|
|
281
|
+
def unique_emails_to(self) -> list[Email]:
|
|
282
|
+
return Document.without_dupes(self.emails_to)
|
|
283
|
+
|
|
284
|
+
def external_link(self, site: ExternalSite = EPSTEINIFY) -> str:
|
|
285
|
+
return PERSON_LINK_BUILDERS[site](self.name_str)
|
|
286
|
+
|
|
287
|
+
def external_link_txt(self, site: ExternalSite = EPSTEINIFY, link_str: str | None = None) -> Text:
|
|
288
|
+
if self.name is None:
|
|
289
|
+
return Text('')
|
|
290
|
+
|
|
291
|
+
return link_text_obj(self.external_link(site), link_str or site, style=self.style())
|
|
292
|
+
|
|
237
293
|
def print_emails(self) -> list[Email]:
|
|
238
294
|
"""Print complete emails to or from a particular 'author'. Returns the Emails that were printed."""
|
|
239
|
-
print_centered(self.info_panel
|
|
295
|
+
print_centered(self.info_panel)
|
|
240
296
|
self.print_emails_table()
|
|
241
297
|
last_printed_email_was_duplicate = False
|
|
242
298
|
|
|
243
|
-
if self.category
|
|
299
|
+
if self.category == JUNK:
|
|
244
300
|
logger.warning(f"Not printing junk emailer '{self.name}'")
|
|
245
301
|
else:
|
|
246
302
|
for email in self._printable_emails():
|
|
247
|
-
if email.is_duplicate
|
|
248
|
-
console.print(Padding(email.duplicate_file_txt
|
|
303
|
+
if email.is_duplicate:
|
|
304
|
+
console.print(Padding(email.duplicate_file_txt.append('...'), SKIPPED_FILE_MSG_PADDING))
|
|
249
305
|
last_printed_email_was_duplicate = True
|
|
250
306
|
else:
|
|
251
307
|
if last_printed_email_was_duplicate:
|
|
@@ -260,42 +316,18 @@ class Person:
|
|
|
260
316
|
table = Email.build_emails_table(self._unique_printable_emails(), self.name)
|
|
261
317
|
print_centered(Padding(table, (0, 5, 0, 5)))
|
|
262
318
|
|
|
263
|
-
if self.is_linkable
|
|
264
|
-
print_centered(self.external_links_line
|
|
319
|
+
if self.is_linkable:
|
|
320
|
+
print_centered(self.external_links_line)
|
|
265
321
|
|
|
266
322
|
console.line()
|
|
267
323
|
|
|
268
|
-
def sort_key(self) -> list[int | str]:
|
|
269
|
-
counts = [
|
|
270
|
-
len(self.unique_emails()),
|
|
271
|
-
-1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
|
|
272
|
-
-1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO),
|
|
273
|
-
int(self.has_any_epstein_emails()),
|
|
274
|
-
]
|
|
275
|
-
|
|
276
|
-
counts = [-1 * count for count in counts]
|
|
277
|
-
|
|
278
|
-
if args.sort_alphabetical:
|
|
279
|
-
return [self.name_str()] + counts
|
|
280
|
-
else:
|
|
281
|
-
return counts + [self.name_str()]
|
|
282
|
-
|
|
283
324
|
def style(self, allow_bold: bool = True) -> str:
|
|
284
325
|
return get_style_for_name(self.name, allow_bold=allow_bold)
|
|
285
326
|
|
|
286
|
-
def unique_emails(self) -> Sequence[Email]:
|
|
287
|
-
return Document.without_dupes(self.emails)
|
|
288
|
-
|
|
289
|
-
def unique_emails_by(self) -> list[Email]:
|
|
290
|
-
return Document.without_dupes(self.emails_by())
|
|
291
|
-
|
|
292
|
-
def unique_emails_to(self) -> list[Email]:
|
|
293
|
-
return Document.without_dupes(self.emails_to())
|
|
294
|
-
|
|
295
327
|
def _printable_emails(self):
|
|
296
328
|
"""For Epstein we only want to print emails he sent to himself."""
|
|
297
329
|
if self.name == JEFFREY_EPSTEIN:
|
|
298
|
-
return [e for e in self.emails if e.is_note_to_self
|
|
330
|
+
return [e for e in self.emails if e.is_note_to_self]
|
|
299
331
|
else:
|
|
300
332
|
return self.emails
|
|
301
333
|
|
|
@@ -303,7 +335,7 @@ class Person:
|
|
|
303
335
|
return Document.without_dupes(self._printable_emails())
|
|
304
336
|
|
|
305
337
|
def __str__(self):
|
|
306
|
-
return f"{self.name_str
|
|
338
|
+
return f"{self.name_str}"
|
|
307
339
|
|
|
308
340
|
@staticmethod
|
|
309
341
|
def emailer_info_table(people: list['Person'], highlighted: list['Person'] | None = None, show_epstein_total: bool = False) -> Table:
|
|
@@ -312,7 +344,7 @@ class Person:
|
|
|
312
344
|
highlighted_names = [p.name for p in highlighted]
|
|
313
345
|
is_selection = len(people) != len(highlighted) or args.emailers_info
|
|
314
346
|
all_emails = Person.emails_from_people(people)
|
|
315
|
-
email_authors = [p for p in people if p.emails_by
|
|
347
|
+
email_authors = [p for p in people if p.emails_by and p.name]
|
|
316
348
|
attributed_emails = [email for email in all_emails if email.author]
|
|
317
349
|
footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}" \
|
|
318
350
|
f" out of {len(all_emails):,} emails, {len(all_emails) - len(attributed_emails)} still unknown)"
|
|
@@ -337,7 +369,7 @@ class Person:
|
|
|
337
369
|
grey_idx = 0
|
|
338
370
|
|
|
339
371
|
for person in people:
|
|
340
|
-
earliest_email_date = person.earliest_email_date
|
|
372
|
+
earliest_email_date = person.earliest_email_date
|
|
341
373
|
is_on_page = False if show_epstein_total else person.name in highlighted_names
|
|
342
374
|
year_months = (earliest_email_date.year * 12) + earliest_email_date.month
|
|
343
375
|
|
|
@@ -352,13 +384,13 @@ class Person:
|
|
|
352
384
|
|
|
353
385
|
table.add_row(
|
|
354
386
|
Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[0 if is_selection else grey_idx]}"),
|
|
355
|
-
person.internal_link
|
|
356
|
-
person.category_txt
|
|
357
|
-
f"{len(person.unique_emails
|
|
358
|
-
str(len(person.unique_emails_by
|
|
359
|
-
str(len(person.unique_emails_to
|
|
360
|
-
f"{person.email_conversation_length_in_days
|
|
361
|
-
person.info_txt
|
|
387
|
+
person.internal_link if is_on_page and not person.is_uninteresting else person.name_txt,
|
|
388
|
+
person.category_txt,
|
|
389
|
+
f"{len(person.unique_emails if show_epstein_total else person._unique_printable_emails())}",
|
|
390
|
+
str(len(person.unique_emails_by)) if len(person.unique_emails_by) > 0 else '',
|
|
391
|
+
str(len(person.unique_emails_to)) if len(person.unique_emails_to) > 0 else '',
|
|
392
|
+
f"{person.email_conversation_length_in_days}",
|
|
393
|
+
person.info_txt or '',
|
|
362
394
|
style='' if show_epstein_total or is_on_page else 'dim',
|
|
363
395
|
)
|
|
364
396
|
|
|
@@ -366,4 +398,4 @@ class Person:
|
|
|
366
398
|
|
|
367
399
|
@staticmethod
|
|
368
400
|
def emails_from_people(people: list['Person']) -> Sequence[Email]:
|
|
369
|
-
return Document.uniquify(flatten([list(p.unique_emails
|
|
401
|
+
return Document.uniquify(flatten([list(p.unique_emails) for p in people]))
|