epstein-files 1.2.1__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +55 -11
- epstein_files/documents/document.py +13 -2
- epstein_files/documents/email.py +329 -258
- epstein_files/documents/emails/email_header.py +17 -8
- epstein_files/documents/other_file.py +8 -6
- epstein_files/epstein_files.py +18 -4
- epstein_files/person.py +65 -20
- epstein_files/util/constant/names.py +18 -12
- epstein_files/util/constant/output_files.py +8 -5
- epstein_files/util/constant/strings.py +4 -2
- epstein_files/util/constant/urls.py +13 -2
- epstein_files/util/constants.py +486 -224
- epstein_files/util/data.py +1 -0
- epstein_files/util/doc_cfg.py +33 -27
- epstein_files/util/env.py +18 -8
- epstein_files/util/file_helper.py +2 -0
- epstein_files/util/highlighted_group.py +321 -132
- epstein_files/util/output.py +19 -24
- epstein_files/util/rich.py +9 -3
- epstein_files/util/word_count.py +2 -2
- {epstein_files-1.2.1.dist-info → epstein_files-1.4.1.dist-info}/METADATA +3 -3
- epstein_files-1.4.1.dist-info/RECORD +34 -0
- {epstein_files-1.2.1.dist-info → epstein_files-1.4.1.dist-info}/entry_points.txt +1 -1
- epstein_files-1.2.1.dist-info/RECORD +0 -34
- {epstein_files-1.2.1.dist-info → epstein_files-1.4.1.dist-info}/LICENSE +0 -0
- {epstein_files-1.2.1.dist-info → epstein_files-1.4.1.dist-info}/WHEEL +0 -0
|
@@ -2,7 +2,7 @@ import json
|
|
|
2
2
|
import re
|
|
3
3
|
from dataclasses import asdict, dataclass, field
|
|
4
4
|
|
|
5
|
-
from epstein_files.util.constant.strings import AUTHOR, REDACTED
|
|
5
|
+
from epstein_files.util.constant.strings import AUTHOR, REDACTED, indented
|
|
6
6
|
from epstein_files.util.constants import ALL_CONFIGS
|
|
7
7
|
from epstein_files.util.doc_cfg import EmailCfg
|
|
8
8
|
from epstein_files.util.logging import logger
|
|
@@ -13,7 +13,10 @@ ON_BEHALF_OF = 'on behalf of'
|
|
|
13
13
|
TO_FIELDS = ['bcc', 'cc', 'to']
|
|
14
14
|
EMAILER_FIELDS = [AUTHOR] + TO_FIELDS
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
FIELD_PATTERNS = ['Date', 'From', 'Sent', 'To', r"C[cC]", r"B[cC][cC]", 'Importance', 'Subject', 'Attachments', 'Classification', 'Flag', 'Reply-To']
|
|
17
|
+
FIELDS_PATTERN = '|'.join(FIELD_PATTERNS)
|
|
18
|
+
FIELDS_COLON_PATTERN = fr"^({FIELDS_PATTERN}):"
|
|
19
|
+
HEADER_REGEX_STR = fr"(((?:(?:{FIELDS_PATTERN}|Bee):|on behalf of ?)(?! +(by |from my|via )).*\n){{3,}})"
|
|
17
20
|
EMAIL_SIMPLE_HEADER_REGEX = re.compile(rf'^{HEADER_REGEX_STR}')
|
|
18
21
|
EMAIL_SIMPLE_HEADER_LINE_BREAK_REGEX = re.compile(HEADER_REGEX_STR)
|
|
19
22
|
EMAIL_PRE_FORWARD_REGEX = re.compile(r"(.{3,2000}?)" + HEADER_REGEX_STR, re.DOTALL) # Match up to the next email header section
|
|
@@ -53,6 +56,7 @@ class EmailHeader:
|
|
|
53
56
|
importance: str | None = None
|
|
54
57
|
attachments: str | None = None
|
|
55
58
|
to: list[str] | None = None
|
|
59
|
+
reply_to: str | None = None
|
|
56
60
|
|
|
57
61
|
def __post_init__(self):
|
|
58
62
|
self.num_header_rows = len(self.field_names)
|
|
@@ -95,13 +99,10 @@ class EmailHeader:
|
|
|
95
99
|
logger.info(f"{log_prefix}, trying next line...")
|
|
96
100
|
num_headers += 1
|
|
97
101
|
value = email_lines[i + num_headers]
|
|
98
|
-
elif BAD_EMAILER_REGEX.match(value):
|
|
102
|
+
elif BAD_EMAILER_REGEX.match(value) or value.startswith('http'):
|
|
99
103
|
logger.info(f"{log_prefix}, decrementing num_headers and skipping...")
|
|
100
104
|
num_headers -= 1
|
|
101
105
|
continue
|
|
102
|
-
elif value.startswith('http'):
|
|
103
|
-
logger.info(f"{log_prefix}, using empty string instead...")
|
|
104
|
-
value = ''
|
|
105
106
|
|
|
106
107
|
value = [v.strip() for v in value.split(';') if len(v.strip()) > 0]
|
|
107
108
|
|
|
@@ -110,7 +111,12 @@ class EmailHeader:
|
|
|
110
111
|
self.num_header_rows = len(self.field_names) + num_headers
|
|
111
112
|
self.header_chars = '\n'.join(email_lines[0:self.num_header_rows])
|
|
112
113
|
log_msg = f"Corrected empty header using {self.num_header_rows} lines to:\n"
|
|
113
|
-
|
|
114
|
+
|
|
115
|
+
logger.warning(
|
|
116
|
+
f"{log_msg}{self}\n\n[top lines]:\n\n%s\n\n[body_lines]:\n\n%s\n\n",
|
|
117
|
+
indented('\n'.join(email_lines[0:(num_headers + 1) * 2]), prefix='> '),
|
|
118
|
+
indented('\n'.join(email_lines[self.num_header_rows:self.num_header_rows + 5]), prefix='> '),
|
|
119
|
+
)
|
|
114
120
|
|
|
115
121
|
def rewrite_header(self) -> str:
|
|
116
122
|
header_fields = {}
|
|
@@ -151,7 +157,7 @@ class EmailHeader:
|
|
|
151
157
|
#logger.debug(f"extracting header line: '{line}'")
|
|
152
158
|
key, value = [element.strip() for element in line.split(':', 1)]
|
|
153
159
|
value = value.rstrip('_')
|
|
154
|
-
key = AUTHOR if key == 'From' else ('sent_at' if key in ['Date', 'Sent'] else key.lower())
|
|
160
|
+
key = AUTHOR if key == 'From' else ('sent_at' if key in ['Date', 'Sent'] else key.lower().replace('-', '_'))
|
|
155
161
|
key = 'bcc' if key == 'bee' else key
|
|
156
162
|
|
|
157
163
|
if kw_args.get(key):
|
|
@@ -161,6 +167,9 @@ class EmailHeader:
|
|
|
161
167
|
|
|
162
168
|
field_names.append(key)
|
|
163
169
|
|
|
170
|
+
if key == 'reply_to':
|
|
171
|
+
logger.warning(f"Found value for Reply-To field: '{value}'")
|
|
172
|
+
|
|
164
173
|
if key in TO_FIELDS:
|
|
165
174
|
recipients = [element.strip() for element in value.split(';')]
|
|
166
175
|
recipients = [r for r in recipients if len(r) > 0]
|
|
@@ -122,8 +122,8 @@ class OtherFile(Document):
|
|
|
122
122
|
|
|
123
123
|
return Text(escape(self.preview_text()))
|
|
124
124
|
|
|
125
|
-
def is_interesting(self):
|
|
126
|
-
"""False for lame prefixes, duplicates, and other boring files."""
|
|
125
|
+
def is_interesting(self) -> bool:
|
|
126
|
+
"""Overloaded. False for lame prefixes, duplicates, and other boring files."""
|
|
127
127
|
info_sentences = self.info()
|
|
128
128
|
|
|
129
129
|
if self.is_duplicate():
|
|
@@ -164,8 +164,8 @@ class OtherFile(Document):
|
|
|
164
164
|
|
|
165
165
|
def _extract_timestamp(self) -> datetime | None:
|
|
166
166
|
"""Return configured timestamp or value extracted by scanning text with datefinder."""
|
|
167
|
-
if self.config and self.config.timestamp:
|
|
168
|
-
return self.config.timestamp
|
|
167
|
+
if self.config and self.config.timestamp():
|
|
168
|
+
return self.config.timestamp()
|
|
169
169
|
elif self.config and any([s in (self.config_description() or '') for s in SKIP_TIMESTAMP_EXTRACT]):
|
|
170
170
|
return None
|
|
171
171
|
|
|
@@ -210,9 +210,10 @@ class OtherFile(Document):
|
|
|
210
210
|
self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
|
|
211
211
|
|
|
212
212
|
@classmethod
|
|
213
|
-
def files_preview_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
|
|
213
|
+
def files_preview_table(cls, files: Sequence['OtherFile'], title_pfx: str = '', title: str = '') -> Table:
|
|
214
214
|
"""Build a table of OtherFile documents."""
|
|
215
|
-
|
|
215
|
+
title = title or f'{title_pfx}Other Files Details in Chronological Order'
|
|
216
|
+
table = build_table(title, show_lines=True, title_justify='left' if title else 'center')
|
|
216
217
|
table.add_column('File', justify='center', width=FILENAME_LENGTH)
|
|
217
218
|
table.add_column('Date', justify='center')
|
|
218
219
|
table.add_column('Size', justify='right', style='dim')
|
|
@@ -244,6 +245,7 @@ class OtherFile(Document):
|
|
|
244
245
|
|
|
245
246
|
@classmethod
|
|
246
247
|
def summary_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
|
|
248
|
+
"""Table showing file count by category."""
|
|
247
249
|
categories = uniquify([f.category() for f in files])
|
|
248
250
|
categories = sorted(categories, key=lambda c: -len([f for f in files if f.category() == c]))
|
|
249
251
|
table = cls.file_info_table(f'{title_pfx}Other Files Summary', 'Category')
|
epstein_files/epstein_files.py
CHANGED
|
@@ -84,6 +84,7 @@ class EpsteinFiles:
|
|
|
84
84
|
self.json_files = [doc for doc in self.other_files if isinstance(doc, JsonFile)]
|
|
85
85
|
self._set_uninteresting_ccs()
|
|
86
86
|
self._copy_duplicate_email_properties()
|
|
87
|
+
self._find_email_attachments_and_set_is_first_for_user()
|
|
87
88
|
|
|
88
89
|
@classmethod
|
|
89
90
|
def get_files(cls, timer: Timer | None = None) -> 'EpsteinFiles':
|
|
@@ -123,6 +124,9 @@ class EpsteinFiles:
|
|
|
123
124
|
|
|
124
125
|
lines = doc.matching_lines(pattern)
|
|
125
126
|
|
|
127
|
+
if args.min_line_length:
|
|
128
|
+
lines = [line for line in lines if len(line.line) > args.min_line_length]
|
|
129
|
+
|
|
126
130
|
if len(lines) > 0:
|
|
127
131
|
results.append(SearchResult(doc, lines))
|
|
128
132
|
|
|
@@ -251,7 +255,7 @@ class EpsteinFiles:
|
|
|
251
255
|
name=name,
|
|
252
256
|
emails=self.emails_for(name),
|
|
253
257
|
imessage_logs=self.imessage_logs_for(name),
|
|
254
|
-
|
|
258
|
+
is_uninteresting=name in self.uninteresting_emailers(),
|
|
255
259
|
other_files=[f for f in self.other_files if name and name == f.author]
|
|
256
260
|
)
|
|
257
261
|
for name in names
|
|
@@ -276,6 +280,17 @@ class EpsteinFiles:
|
|
|
276
280
|
|
|
277
281
|
return self._uninteresting_emailers
|
|
278
282
|
|
|
283
|
+
def _find_email_attachments_and_set_is_first_for_user(self) -> None:
|
|
284
|
+
for file in self.other_files:
|
|
285
|
+
if file.config and file.config.attached_to_email_id:
|
|
286
|
+
email = self.email_for_id(file.config.attached_to_email_id)
|
|
287
|
+
file.warn(f"Attaching to {email}")
|
|
288
|
+
email.attached_docs.append(file)
|
|
289
|
+
|
|
290
|
+
for emailer in self.emailers():
|
|
291
|
+
first_email = emailer.emails[0]
|
|
292
|
+
first_email._is_first_for_user = True
|
|
293
|
+
|
|
279
294
|
def _copy_duplicate_email_properties(self) -> None:
|
|
280
295
|
"""Ensure dupe emails have the properties of the emails they duplicate to capture any repairs, config etc."""
|
|
281
296
|
for email in self.emails:
|
|
@@ -297,7 +312,7 @@ class EpsteinFiles:
|
|
|
297
312
|
|
|
298
313
|
def _set_uninteresting_ccs(self) -> None:
|
|
299
314
|
for id in EMAILS_WITH_UNINTERESTING_BCCS:
|
|
300
|
-
self.uninteresting_ccs +=
|
|
315
|
+
self.uninteresting_ccs += [bcc.lower() for bcc in cast(list[str], self.email_for_id(id).header.bcc)]
|
|
301
316
|
|
|
302
317
|
for id in EMAILS_WITH_UNINTERESTING_CCS:
|
|
303
318
|
self.uninteresting_ccs += self.email_for_id(id).recipients
|
|
@@ -334,5 +349,4 @@ def document_cls(doc: Document) -> Type[Document]:
|
|
|
334
349
|
|
|
335
350
|
|
|
336
351
|
def _sorted_metadata(docs: Sequence[Document]) -> list[Metadata]:
|
|
337
|
-
|
|
338
|
-
return [json_safe(d.metadata()) for d in docs_sorted_by_id]
|
|
352
|
+
return [json_safe(d.metadata()) for d in Document.sort_by_id(docs)]
|
epstein_files/person.py
CHANGED
|
@@ -9,13 +9,13 @@ from rich.table import Table
|
|
|
9
9
|
from rich.text import Text
|
|
10
10
|
|
|
11
11
|
from epstein_files.documents.document import Document
|
|
12
|
-
from epstein_files.documents.email import MAILING_LISTS, JUNK_EMAILERS, Email
|
|
12
|
+
from epstein_files.documents.email import TRUNCATE_EMAILS_FROM, MAILING_LISTS, JUNK_EMAILERS, Email
|
|
13
13
|
from epstein_files.documents.messenger_log import MessengerLog
|
|
14
14
|
from epstein_files.documents.other_file import OtherFile
|
|
15
15
|
from epstein_files.util.constant.strings import *
|
|
16
16
|
from epstein_files.util.constant.urls import *
|
|
17
17
|
from epstein_files.util.constants import *
|
|
18
|
-
from epstein_files.util.data import days_between, flatten, without_falsey
|
|
18
|
+
from epstein_files.util.data import days_between, flatten, uniquify, without_falsey
|
|
19
19
|
from epstein_files.util.env import args
|
|
20
20
|
from epstein_files.util.highlighted_group import (QUESTION_MARKS_TXT, HighlightedNames,
|
|
21
21
|
get_highlight_group_for_name, get_style_for_name, styled_category, styled_name)
|
|
@@ -42,7 +42,7 @@ class Person:
|
|
|
42
42
|
emails: list[Email] = field(default_factory=list)
|
|
43
43
|
imessage_logs: list[MessengerLog] = field(default_factory=list)
|
|
44
44
|
other_files: list[OtherFile] = field(default_factory=list)
|
|
45
|
-
|
|
45
|
+
is_uninteresting: bool = False
|
|
46
46
|
|
|
47
47
|
def __post_init__(self):
|
|
48
48
|
self.emails = Document.sort_by_timestamp(self.emails)
|
|
@@ -62,7 +62,7 @@ class Person:
|
|
|
62
62
|
return None
|
|
63
63
|
elif self.category():
|
|
64
64
|
return styled_category(self.category())
|
|
65
|
-
elif self.is_a_mystery() or self.
|
|
65
|
+
elif self.is_a_mystery() or self.is_uninteresting:
|
|
66
66
|
return QUESTION_MARKS_TXT
|
|
67
67
|
|
|
68
68
|
def email_conversation_length_in_days(self) -> int:
|
|
@@ -120,7 +120,7 @@ class Person:
|
|
|
120
120
|
else:
|
|
121
121
|
email_count = len(self.unique_emails())
|
|
122
122
|
num_days = self.email_conversation_length_in_days()
|
|
123
|
-
title_suffix = f"
|
|
123
|
+
title_suffix = f"{TO_FROM} {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
|
|
124
124
|
|
|
125
125
|
title = f"Found {email_count} emails {title_suffix}"
|
|
126
126
|
width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category()) + 8)
|
|
@@ -136,8 +136,12 @@ class Person:
|
|
|
136
136
|
highlight_group = self.highlight_group()
|
|
137
137
|
|
|
138
138
|
if highlight_group and isinstance(highlight_group, HighlightedNames) and self.name:
|
|
139
|
-
|
|
140
|
-
|
|
139
|
+
info = highlight_group.info_for(self.name)
|
|
140
|
+
|
|
141
|
+
if info:
|
|
142
|
+
return info
|
|
143
|
+
|
|
144
|
+
if self.is_uninteresting and len(self.emails_by()) == 0:
|
|
141
145
|
if self.has_any_epstein_emails():
|
|
142
146
|
return UNINTERESTING_CC_INFO
|
|
143
147
|
else:
|
|
@@ -152,9 +156,11 @@ class Person:
|
|
|
152
156
|
elif self.name is None:
|
|
153
157
|
return Text('(emails whose author or recipient could not be determined)', style=ALT_INFO_STYLE)
|
|
154
158
|
elif self.category() == JUNK:
|
|
155
|
-
return Text(f"({JUNK} mail)", style='
|
|
156
|
-
elif self.
|
|
157
|
-
if self.
|
|
159
|
+
return Text(f"({JUNK} mail)", style='bright_black dim')
|
|
160
|
+
elif self.is_uninteresting and (self.info_str() or '').startswith(UNINTERESTING_CC_INFO):
|
|
161
|
+
if self.sole_cc():
|
|
162
|
+
return Text(f"(cc: from {self.sole_cc()} only)", style='wheat4 dim')
|
|
163
|
+
elif self.info_str() == UNINTERESTING_CC_INFO:
|
|
158
164
|
return Text(f"({self.info_str()})", style='wheat4 dim')
|
|
159
165
|
else:
|
|
160
166
|
return Text(f"({self.info_str()})", style='plum4 dim')
|
|
@@ -168,11 +174,30 @@ class Person:
|
|
|
168
174
|
else:
|
|
169
175
|
return None
|
|
170
176
|
else:
|
|
171
|
-
return Text(self.info_str())
|
|
177
|
+
return Text(self.info_str(), style=self.style(allow_bold=False))
|
|
178
|
+
|
|
179
|
+
def internal_link(self) -> Text:
|
|
180
|
+
"""Kind of like an anchor link to the section of the page containing these emails."""
|
|
181
|
+
return link_text_obj(internal_link_to_emails(self.name_str()), self.name_str(), style=self.style())
|
|
172
182
|
|
|
173
183
|
def is_a_mystery(self) -> bool:
|
|
174
184
|
"""Return True if this is someone we theroetically could know more about."""
|
|
175
|
-
return self.is_unstyled() and not (self.is_email_address() or self.info_str() or self.
|
|
185
|
+
return self.is_unstyled() and not (self.is_email_address() or self.info_str() or self.is_uninteresting)
|
|
186
|
+
|
|
187
|
+
def sole_cc(self) -> str | None:
|
|
188
|
+
"""Return name if this person sent 0 emails and received CC from only one that name."""
|
|
189
|
+
email_authors = uniquify([e.author for e in self.emails_to()])
|
|
190
|
+
|
|
191
|
+
if len(self.unique_emails()) == 1 and len(email_authors) > 0:
|
|
192
|
+
logger.info(f"sole author of email to '{self.name}' is '{email_authors[0]}'")
|
|
193
|
+
else:
|
|
194
|
+
logger.info(f"'{self.name}' email_authors '{email_authors[0]}'")
|
|
195
|
+
|
|
196
|
+
if len(self.unique_emails_by()) > 0:
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
if len(email_authors) == 1:
|
|
200
|
+
return email_authors[0]
|
|
176
201
|
|
|
177
202
|
def is_email_address(self) -> bool:
|
|
178
203
|
return '@' in (self.name or '')
|
|
@@ -188,6 +213,10 @@ class Person:
|
|
|
188
213
|
|
|
189
214
|
return True
|
|
190
215
|
|
|
216
|
+
def should_always_truncate(self) -> bool:
|
|
217
|
+
"""True if we want to truncate all emails to/from this user."""
|
|
218
|
+
return self.name in TRUNCATE_EMAILS_FROM or self.is_uninteresting
|
|
219
|
+
|
|
191
220
|
def is_unstyled(self) -> bool:
|
|
192
221
|
"""True if there's no highlight group for this name."""
|
|
193
222
|
return self.style() == DEFAULT_NAME_STYLE
|
|
@@ -237,7 +266,13 @@ class Person:
|
|
|
237
266
|
console.line()
|
|
238
267
|
|
|
239
268
|
def sort_key(self) -> list[int | str]:
|
|
240
|
-
counts = [
|
|
269
|
+
counts = [
|
|
270
|
+
len(self.unique_emails()),
|
|
271
|
+
-1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
|
|
272
|
+
-1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO),
|
|
273
|
+
int(self.has_any_epstein_emails()),
|
|
274
|
+
]
|
|
275
|
+
|
|
241
276
|
counts = [-1 * count for count in counts]
|
|
242
277
|
|
|
243
278
|
if args.sort_alphabetical:
|
|
@@ -245,8 +280,8 @@ class Person:
|
|
|
245
280
|
else:
|
|
246
281
|
return counts + [self.name_str()]
|
|
247
282
|
|
|
248
|
-
def style(self) -> str:
|
|
249
|
-
return get_style_for_name(self.name)
|
|
283
|
+
def style(self, allow_bold: bool = True) -> str:
|
|
284
|
+
return get_style_for_name(self.name, allow_bold=allow_bold)
|
|
250
285
|
|
|
251
286
|
def unique_emails(self) -> Sequence[Email]:
|
|
252
287
|
return Document.without_dupes(self.emails)
|
|
@@ -276,6 +311,11 @@ class Person:
|
|
|
276
311
|
highlighted = highlighted or people
|
|
277
312
|
highlighted_names = [p.name for p in highlighted]
|
|
278
313
|
is_selection = len(people) != len(highlighted) or args.emailers_info
|
|
314
|
+
all_emails = Person.emails_from_people(people)
|
|
315
|
+
email_authors = [p for p in people if p.emails_by() and p.name]
|
|
316
|
+
attributed_emails = [email for email in all_emails if email.author]
|
|
317
|
+
footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}" \
|
|
318
|
+
f" out of {len(all_emails):,} emails, {len(all_emails) - len(attributed_emails)} still unknown)"
|
|
279
319
|
|
|
280
320
|
if is_selection:
|
|
281
321
|
title = Text(f"{EMAILER_INFO_TITLE} in This Order for the Highlighted Names (", style=TABLE_TITLE_STYLE)
|
|
@@ -283,7 +323,7 @@ class Person:
|
|
|
283
323
|
else:
|
|
284
324
|
title = f"{EMAILER_INFO_TITLE} in Chronological Order Based on Timestamp of First Email"
|
|
285
325
|
|
|
286
|
-
table = build_table(title)
|
|
326
|
+
table = build_table(title, caption=footer)
|
|
287
327
|
table.add_column('First')
|
|
288
328
|
table.add_column('Name', max_width=24, no_wrap=True)
|
|
289
329
|
table.add_column('Category', justify='left', style='dim italic')
|
|
@@ -298,6 +338,7 @@ class Person:
|
|
|
298
338
|
|
|
299
339
|
for person in people:
|
|
300
340
|
earliest_email_date = person.earliest_email_date()
|
|
341
|
+
is_on_page = False if show_epstein_total else person.name in highlighted_names
|
|
301
342
|
year_months = (earliest_email_date.year * 12) + earliest_email_date.month
|
|
302
343
|
|
|
303
344
|
# Color year rollovers more brightly
|
|
@@ -311,14 +352,18 @@ class Person:
|
|
|
311
352
|
|
|
312
353
|
table.add_row(
|
|
313
354
|
Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[0 if is_selection else grey_idx]}"),
|
|
314
|
-
person.
|
|
355
|
+
person.internal_link() if is_on_page and not person.is_uninteresting else person.name_txt(),
|
|
315
356
|
person.category_txt(),
|
|
316
357
|
f"{len(person.unique_emails() if show_epstein_total else person._unique_printable_emails())}",
|
|
317
|
-
|
|
318
|
-
|
|
358
|
+
str(len(person.unique_emails_by())) if len(person.unique_emails_by()) > 0 else '',
|
|
359
|
+
str(len(person.unique_emails_to())) if len(person.unique_emails_to()) > 0 else '',
|
|
319
360
|
f"{person.email_conversation_length_in_days()}",
|
|
320
361
|
person.info_txt() or '',
|
|
321
|
-
style='' if
|
|
362
|
+
style='' if show_epstein_total or is_on_page else 'dim',
|
|
322
363
|
)
|
|
323
364
|
|
|
324
365
|
return table
|
|
366
|
+
|
|
367
|
+
@staticmethod
|
|
368
|
+
def emails_from_people(people: list['Person']) -> Sequence[Email]:
|
|
369
|
+
return Document.uniquify(flatten([list(p.unique_emails()) for p in people]))
|
|
@@ -61,6 +61,7 @@ DIANE_ZIMAN = 'Diane Ziman'
|
|
|
61
61
|
DONALD_TRUMP = 'Donald Trump'
|
|
62
62
|
EDUARDO_ROBLES = 'Eduardo Robles'
|
|
63
63
|
EDWARD_JAY_EPSTEIN = 'Edward Jay Epstein'
|
|
64
|
+
EDWARD_ROD_LARSEN = 'Edward Rod Larsen'
|
|
64
65
|
EHUD_BARAK = 'Ehud Barak'
|
|
65
66
|
ERIC_ROTH = 'Eric Roth'
|
|
66
67
|
FAITH_KATES = 'Faith Kates'
|
|
@@ -129,6 +130,7 @@ MOSHE_HOFFMAN = 'Moshe Hoffman'
|
|
|
129
130
|
NADIA_MARCINKO = 'Nadia Marcinko'
|
|
130
131
|
NEAL_KASSELL = 'Neal Kassell'
|
|
131
132
|
NICHOLAS_RIBIS = 'Nicholas Ribis'
|
|
133
|
+
NILI_PRIELL_BARAK = 'Nili Priell Barak'
|
|
132
134
|
NOAM_CHOMSKY = 'Noam Chomsky'
|
|
133
135
|
NORMAN_D_RAU = 'Norman D. Rau'
|
|
134
136
|
OLIVIER_COLOM = 'Olivier Colom'
|
|
@@ -215,24 +217,24 @@ UBS = 'UBS'
|
|
|
215
217
|
# First and last names that should be made part of a highlighting regex for emailers
|
|
216
218
|
NAMES_TO_NOT_HIGHLIGHT = """
|
|
217
219
|
al alain alan alfredo allen alex alexander amanda andres andrew anthony
|
|
218
|
-
bard barrett barry bennet bill black bob boris brad bruce
|
|
219
|
-
caroline carolyn chris christina cohen
|
|
220
|
-
dan daniel danny darren dave david donald
|
|
220
|
+
bard barrett barry bennet bernard bill black bob boris brad brenner bruce
|
|
221
|
+
cameron caroline carolyn chris christina cohen
|
|
222
|
+
dan daniel danny darren dave david debbie donald
|
|
221
223
|
ed edward edwards enforcement enterprise enterprises entourage epstein eric erika etienne
|
|
222
|
-
faith forget fred friendly frost fuller
|
|
223
|
-
gerald george gold gordon
|
|
224
|
-
haddad harry hay heather henry hill hoffman
|
|
224
|
+
faith fisher forget fred friendly frost fuller
|
|
225
|
+
gates gerald george gold gordon
|
|
226
|
+
haddad hanson harry hay heather henry hill hoffman howard
|
|
225
227
|
ian ivan
|
|
226
228
|
jack james jay jean jeff jeffrey jennifer jeremy jessica joel john jon jonathan joseph jr
|
|
227
|
-
kahn karl kate katherine kelly ken kevin krassner
|
|
228
|
-
larry laurie lawrence leon lesley linda link lisa
|
|
229
|
+
kafka kahn karl kate katherine kelly ken kevin krassner
|
|
230
|
+
larry larsen laurie lawrence leon lesley linda link lisa
|
|
229
231
|
mann marc marie mark martin matthew melanie michael mike miller mitchell miles morris moskowitz
|
|
230
232
|
nancy neal new nicole norman
|
|
231
233
|
owen
|
|
232
234
|
paul paula pen peter philip prince
|
|
233
|
-
randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubin
|
|
234
|
-
scott sean skip stanley stern stephen steve steven stone susan
|
|
235
|
-
the thomas tim tom tony tyler
|
|
235
|
+
randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubenstein rubin
|
|
236
|
+
scott sean skip smith stanley stern stephen steve steven stone susan
|
|
237
|
+
terry the thomas tim tom tony tyler
|
|
236
238
|
victor
|
|
237
239
|
wade waters
|
|
238
240
|
y
|
|
@@ -243,7 +245,7 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
|
|
|
243
245
|
aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
|
|
244
246
|
baldwin barack barrett ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
|
|
245
247
|
chapman charles charlie christopher clint cohen colin collins conway
|
|
246
|
-
davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
|
|
248
|
+
davis dean debbie debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
|
|
247
249
|
edmond elizabeth emily entwistle erik evelyn
|
|
248
250
|
ferguson flachsbart francis franco frank
|
|
249
251
|
gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
|
|
@@ -267,6 +269,10 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
|
|
|
267
269
|
def constantize_name(name: str) -> str:
|
|
268
270
|
if name == 'Andrzej Duda or entourage':
|
|
269
271
|
return 'ANDRZEJ_DUDA'
|
|
272
|
+
elif name == MIROSLAV_LAJCAK:
|
|
273
|
+
return 'MIROSLAV_LAJCAK'
|
|
274
|
+
elif name == 'Paula Heil Fisher (???)':
|
|
275
|
+
return 'PAULA'
|
|
270
276
|
|
|
271
277
|
variable_name = remove_question_marks(name)
|
|
272
278
|
variable_name = variable_name.removesuffix('.').removesuffix('Jr').replace('ź', 'z').replace('ø', 'o').strip()
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
3
|
from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
|
|
4
|
+
from epstein_files.util.logging import logger
|
|
4
5
|
|
|
5
6
|
# Files output by the code
|
|
6
7
|
HTML_DIR = Path('docs')
|
|
@@ -16,9 +17,10 @@ URLS_ENV = '.urls.env'
|
|
|
16
17
|
EMAILERS_TABLE_PNG_PATH = HTML_DIR.joinpath('emailers_info_table.png')
|
|
17
18
|
|
|
18
19
|
# Deployment URLS
|
|
19
|
-
# NOTE: don't rename these variables without changing deploy.sh
|
|
20
|
+
# NOTE: don't rename these variables without changing deploy.sh
|
|
21
|
+
GH_REPO_NAME = 'epstein_text_messages'
|
|
20
22
|
GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
|
|
21
|
-
TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/
|
|
23
|
+
TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/{GH_REPO_NAME}"
|
|
22
24
|
ALL_EMAILS_URL = f"{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}"
|
|
23
25
|
CHRONOLOGICAL_EMAILS_URL = f"{TEXT_MSGS_URL}/{CHRONOLOGICAL_EMAILS_PATH.name}"
|
|
24
26
|
JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
|
|
@@ -44,6 +46,7 @@ BUILD_ARTIFACTS = [
|
|
|
44
46
|
def make_clean() -> None:
|
|
45
47
|
"""Delete all build artifacts."""
|
|
46
48
|
for build_file in BUILD_ARTIFACTS:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
49
|
+
for file in [build_file, Path(f"{build_file}.txt")]:
|
|
50
|
+
if file.exists():
|
|
51
|
+
logger.warning(f"Removing build file '{file}'...")
|
|
52
|
+
file.unlink()
|
|
@@ -64,7 +64,8 @@ REDACTED = '<REDACTED>'
|
|
|
64
64
|
QUESTION_MARKS = '(???)'
|
|
65
65
|
|
|
66
66
|
# Regexes
|
|
67
|
-
|
|
67
|
+
ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
|
|
68
|
+
FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({ID_REGEX.pattern})")
|
|
68
69
|
FILE_NAME_REGEX = re.compile(fr"{FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
|
|
69
70
|
QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
|
|
70
71
|
|
|
@@ -79,6 +80,7 @@ OTHER_FILE_CLASS = 'OtherFile'
|
|
|
79
80
|
remove_question_marks = lambda name: QUESTION_MARKS_REGEX.sub('', name).strip()
|
|
80
81
|
|
|
81
82
|
|
|
82
|
-
def indented(s: str, spaces: int = 4) -> str:
|
|
83
|
+
def indented(s: str, spaces: int = 4, prefix: str = '') -> str:
|
|
83
84
|
indent = ' ' * spaces
|
|
85
|
+
indent += prefix
|
|
84
86
|
return indent + f"\n{indent}".join(s.split('\n'))
|
|
@@ -6,6 +6,7 @@ from inflection import parameterize
|
|
|
6
6
|
from rich.text import Text
|
|
7
7
|
|
|
8
8
|
from epstein_files.util.constant.output_files import *
|
|
9
|
+
from epstein_files.util.constant.strings import remove_question_marks
|
|
9
10
|
from epstein_files.util.env import args
|
|
10
11
|
from epstein_files.util.file_helper import coerce_file_stem
|
|
11
12
|
|
|
@@ -22,10 +23,11 @@ JMAIL = 'Jmail'
|
|
|
22
23
|
ROLLCALL = 'RollCall'
|
|
23
24
|
TWITTER = 'search X'
|
|
24
25
|
|
|
25
|
-
GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/
|
|
26
|
+
GH_PROJECT_URL = f'https://github.com/michelcrypt4d4mus/{GH_REPO_NAME}'
|
|
26
27
|
GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
|
|
27
28
|
ATTRIBUTIONS_URL = f'{GH_MASTER_URL}/epstein_files/util/constants.py'
|
|
28
29
|
EXTRACTS_BASE_URL = f'{GH_MASTER_URL}/emails_extracted_from_legal_filings'
|
|
30
|
+
TO_FROM = 'to/from'
|
|
29
31
|
|
|
30
32
|
extracted_file_url = lambda f: f"{EXTRACTS_BASE_URL}/{f}"
|
|
31
33
|
|
|
@@ -72,7 +74,6 @@ rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL],
|
|
|
72
74
|
search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
|
|
73
75
|
search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
|
|
74
76
|
|
|
75
|
-
|
|
76
77
|
PERSON_LINK_BUILDERS: dict[ExternalSite, Callable[[str], str]] = {
|
|
77
78
|
EPSTEIN_MEDIA: epstein_media_person_url,
|
|
78
79
|
EPSTEIN_WEB: epstein_web_person_url,
|
|
@@ -98,6 +99,12 @@ def external_doc_link_txt(site: ExternalSite, filename_or_id: int | str, style:
|
|
|
98
99
|
return Text.from_markup(external_doc_link_markup(site, filename_or_id, style))
|
|
99
100
|
|
|
100
101
|
|
|
102
|
+
def internal_link_to_emails(name: str) -> str:
|
|
103
|
+
"""e.g. https://michelcrypt4d4mus.github.io/epstein_text_messages/all_emails_epstein_files_nov_2025.html#:~:text=to%2Ffrom%20Jack%20Goldberger"""
|
|
104
|
+
search_term = urllib.parse.quote(f"{TO_FROM} {remove_question_marks(name)}")
|
|
105
|
+
return f"{this_site_url()}#:~:text={search_term}"
|
|
106
|
+
|
|
107
|
+
|
|
101
108
|
def link_markup(
|
|
102
109
|
url: str,
|
|
103
110
|
link_text: str | None = None,
|
|
@@ -121,6 +128,10 @@ def other_site_url() -> str:
|
|
|
121
128
|
return SITE_URLS[other_site_type()]
|
|
122
129
|
|
|
123
130
|
|
|
131
|
+
def this_site_url() -> str:
|
|
132
|
+
return SITE_URLS[EMAIL if other_site_type() == TEXT_MESSAGE else TEXT_MESSAGE]
|
|
133
|
+
|
|
134
|
+
|
|
124
135
|
CRYPTADAMUS_TWITTER = link_markup('https://x.com/cryptadamist', '@cryptadamist')
|
|
125
136
|
THE_OTHER_PAGE_MARKUP = link_markup(other_site_url(), 'the other page', style='light_slate_grey bold')
|
|
126
137
|
THE_OTHER_PAGE_TXT = Text.from_markup(THE_OTHER_PAGE_MARKUP)
|