epstein-files 1.2.0__py3-none-any.whl → 1.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +42 -30
- epstein_files/documents/communication.py +0 -3
- epstein_files/documents/document.py +66 -19
- epstein_files/documents/email.py +203 -208
- epstein_files/documents/emails/email_header.py +10 -2
- epstein_files/documents/imessage/text_message.py +3 -2
- epstein_files/documents/other_file.py +16 -34
- epstein_files/epstein_files.py +24 -35
- epstein_files/person.py +67 -73
- epstein_files/util/constant/names.py +21 -12
- epstein_files/util/constant/output_files.py +8 -5
- epstein_files/util/constant/strings.py +2 -2
- epstein_files/util/constant/urls.py +14 -2
- epstein_files/util/constants.py +38 -12
- epstein_files/util/data.py +2 -1
- epstein_files/util/doc_cfg.py +3 -3
- epstein_files/util/env.py +10 -7
- epstein_files/util/highlighted_group.py +366 -202
- epstein_files/util/logging.py +1 -1
- epstein_files/util/output.py +54 -21
- epstein_files/util/rich.py +21 -16
- epstein_files/util/timer.py +14 -0
- epstein_files/util/word_count.py +1 -1
- {epstein_files-1.2.0.dist-info → epstein_files-1.2.5.dist-info}/METADATA +5 -2
- epstein_files-1.2.5.dist-info/RECORD +34 -0
- epstein_files-1.2.0.dist-info/RECORD +0 -34
- {epstein_files-1.2.0.dist-info → epstein_files-1.2.5.dist-info}/LICENSE +0 -0
- {epstein_files-1.2.0.dist-info → epstein_files-1.2.5.dist-info}/WHEEL +0 -0
- {epstein_files-1.2.0.dist-info → epstein_files-1.2.5.dist-info}/entry_points.txt +0 -0
|
@@ -4,7 +4,7 @@ from datetime import datetime
|
|
|
4
4
|
|
|
5
5
|
from rich.text import Text
|
|
6
6
|
|
|
7
|
-
from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN, Name, extract_last_name
|
|
7
|
+
from epstein_files.util.constant.names import ANTHONY_SCARAMUCCI, JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN, Name, extract_last_name
|
|
8
8
|
from epstein_files.util.constant.strings import TIMESTAMP_DIM
|
|
9
9
|
from epstein_files.util.data import iso_timestamp
|
|
10
10
|
from epstein_files.util.highlighted_group import get_style_for_name
|
|
@@ -17,6 +17,7 @@ PHONE_NUMBER_REGEX = re.compile(r'^[\d+]+.*')
|
|
|
17
17
|
UNCERTAIN_SUFFIX = ' (?)'
|
|
18
18
|
|
|
19
19
|
DISPLAY_LAST_NAME_ONLY = [
|
|
20
|
+
ANTHONY_SCARAMUCCI,
|
|
20
21
|
JEFFREY_EPSTEIN,
|
|
21
22
|
STEVE_BANNON,
|
|
22
23
|
]
|
|
@@ -59,7 +60,7 @@ class TextMessage:
|
|
|
59
60
|
try:
|
|
60
61
|
timestamp_str = iso_timestamp(self.parse_timestamp())
|
|
61
62
|
except Exception as e:
|
|
62
|
-
logger.
|
|
63
|
+
logger.info(f"Failed to parse timestamp for {self}")
|
|
63
64
|
timestamp_str = self.timestamp_str
|
|
64
65
|
|
|
65
66
|
return Text(f"[{timestamp_str}]", style=TIMESTAMP_DIM)
|
|
@@ -22,7 +22,7 @@ from epstein_files.util.data import days_between, escape_single_quotes, remove_t
|
|
|
22
22
|
from epstein_files.util.file_helper import FILENAME_LENGTH, file_size_to_str
|
|
23
23
|
from epstein_files.util.env import args
|
|
24
24
|
from epstein_files.util.highlighted_group import QUESTION_MARKS_TXT, styled_category
|
|
25
|
-
from epstein_files.util.rich import build_table, highlighter
|
|
25
|
+
from epstein_files.util.rich import add_cols_to_table, build_table, highlighter
|
|
26
26
|
from epstein_files.util.logging import logger
|
|
27
27
|
|
|
28
28
|
FIRST_FEW_LINES = 'First Few Lines'
|
|
@@ -209,39 +209,8 @@ class OtherFile(Document):
|
|
|
209
209
|
if num_days_spanned > MAX_DAYS_SPANNED_TO_BE_VALID and VAST_HOUSE not in self.text:
|
|
210
210
|
self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
|
|
211
211
|
|
|
212
|
-
@
|
|
213
|
-
def
|
|
214
|
-
counts = defaultdict(int)
|
|
215
|
-
category_bytes = defaultdict(int)
|
|
216
|
-
|
|
217
|
-
for file in files:
|
|
218
|
-
if file.category() is None:
|
|
219
|
-
logger.warning(f"file {file.file_id} has no category")
|
|
220
|
-
|
|
221
|
-
counts[file.category()] += 1
|
|
222
|
-
category_bytes[file.category()] += file.file_size()
|
|
223
|
-
|
|
224
|
-
table = build_table(f'{title_pfx}Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
|
|
225
|
-
table.columns[-1].justify = 'right'
|
|
226
|
-
table.columns[0].min_width = 14
|
|
227
|
-
table.columns[-1].style = 'dim'
|
|
228
|
-
|
|
229
|
-
for (category, count) in sort_dict(counts):
|
|
230
|
-
category_files = [f for f in files if f.category() == category]
|
|
231
|
-
known_author_count = Document.known_author_count(category_files)
|
|
232
|
-
|
|
233
|
-
table.add_row(
|
|
234
|
-
styled_category(category),
|
|
235
|
-
str(count),
|
|
236
|
-
str(known_author_count),
|
|
237
|
-
str(count - known_author_count),
|
|
238
|
-
file_size_to_str(category_bytes[category]),
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
return table
|
|
242
|
-
|
|
243
|
-
@staticmethod
|
|
244
|
-
def files_preview_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
|
|
212
|
+
@classmethod
|
|
213
|
+
def files_preview_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
|
|
245
214
|
"""Build a table of OtherFile documents."""
|
|
246
215
|
table = build_table(f'{title_pfx}Other Files Details in Chronological Order', show_lines=True)
|
|
247
216
|
table.add_column('File', justify='center', width=FILENAME_LENGTH)
|
|
@@ -272,3 +241,16 @@ class OtherFile(Document):
|
|
|
272
241
|
)
|
|
273
242
|
|
|
274
243
|
return table
|
|
244
|
+
|
|
245
|
+
@classmethod
|
|
246
|
+
def summary_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
|
|
247
|
+
categories = uniquify([f.category() for f in files])
|
|
248
|
+
categories = sorted(categories, key=lambda c: -len([f for f in files if f.category() == c]))
|
|
249
|
+
table = cls.file_info_table(f'{title_pfx}Other Files Summary', 'Category')
|
|
250
|
+
|
|
251
|
+
for category in categories:
|
|
252
|
+
category_files = [f for f in files if f.category() == category]
|
|
253
|
+
table.add_row(styled_category(category), *cls.files_info_row(category_files))
|
|
254
|
+
|
|
255
|
+
table.columns = table.columns[:-2] + [table.columns[-1]] # Removee unknown author col
|
|
256
|
+
return table
|
epstein_files/epstein_files.py
CHANGED
|
@@ -9,6 +9,8 @@ from datetime import datetime
|
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
from typing import Sequence, Type, cast
|
|
11
11
|
|
|
12
|
+
from rich.table import Table
|
|
13
|
+
|
|
12
14
|
from epstein_files.documents.document import Document
|
|
13
15
|
from epstein_files.documents.email import DETECT_EMAIL_REGEX, Email
|
|
14
16
|
from epstein_files.documents.json_file import JsonFile
|
|
@@ -22,7 +24,6 @@ from epstein_files.util.doc_cfg import EmailCfg, Metadata
|
|
|
22
24
|
from epstein_files.util.env import DOCS_DIR, args, logger
|
|
23
25
|
from epstein_files.util.file_helper import file_size_str
|
|
24
26
|
from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames
|
|
25
|
-
from epstein_files.util.rich import NA_TXT, add_cols_to_table, build_table, console, print_centered
|
|
26
27
|
from epstein_files.util.search_result import SearchResult
|
|
27
28
|
from epstein_files.util.timer import Timer
|
|
28
29
|
|
|
@@ -31,9 +32,13 @@ PICKLED_PATH = Path("the_epstein_files.pkl.gz")
|
|
|
31
32
|
SLOW_FILE_SECONDS = 1.0
|
|
32
33
|
|
|
33
34
|
EMAILS_WITH_UNINTERESTING_CCS = [
|
|
34
|
-
'025329',
|
|
35
|
-
'024923',
|
|
36
|
-
'033568',
|
|
35
|
+
'025329', # Krassner
|
|
36
|
+
'024923', # Krassner
|
|
37
|
+
'033568', # Krassner
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
EMAILS_WITH_UNINTERESTING_BCCS = [
|
|
41
|
+
'014797_1', # Ross Gow
|
|
37
42
|
]
|
|
38
43
|
|
|
39
44
|
|
|
@@ -45,7 +50,7 @@ class EpsteinFiles:
|
|
|
45
50
|
json_files: list[JsonFile] = field(default_factory=list)
|
|
46
51
|
other_files: list[OtherFile] = field(default_factory=list)
|
|
47
52
|
timer: Timer = field(default_factory=lambda: Timer())
|
|
48
|
-
uninteresting_ccs: list[Name] = field(
|
|
53
|
+
uninteresting_ccs: list[Name] = field(default_factory=list)
|
|
49
54
|
|
|
50
55
|
def __post_init__(self):
|
|
51
56
|
"""Iterate through files and build appropriate objects."""
|
|
@@ -88,13 +93,12 @@ class EpsteinFiles:
|
|
|
88
93
|
if PICKLED_PATH.exists() and not args.overwrite_pickle and not args.skip_other_files:
|
|
89
94
|
with gzip.open(PICKLED_PATH, 'rb') as file:
|
|
90
95
|
epstein_files = pickle.load(file)
|
|
91
|
-
epstein_files.timer = timer
|
|
92
96
|
timer_msg = f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}'"
|
|
93
|
-
|
|
97
|
+
timer.print_at_checkpoint(f"{timer_msg} ({file_size_str(PICKLED_PATH)})")
|
|
94
98
|
return epstein_files
|
|
95
99
|
|
|
96
100
|
logger.warning(f"Building new cache file, this will take a few minutes...")
|
|
97
|
-
epstein_files = EpsteinFiles(
|
|
101
|
+
epstein_files = EpsteinFiles()
|
|
98
102
|
|
|
99
103
|
if args.skip_other_files:
|
|
100
104
|
logger.warning(f"Not writing pickled data because --skip-other-files")
|
|
@@ -235,7 +239,7 @@ class EpsteinFiles:
|
|
|
235
239
|
return json.dumps(metadata, indent=4, sort_keys=True)
|
|
236
240
|
|
|
237
241
|
def non_duplicate_emails(self) -> list[Email]:
|
|
238
|
-
return
|
|
242
|
+
return Document.without_dupes(self.emails)
|
|
239
243
|
|
|
240
244
|
def non_json_other_files(self) -> list[OtherFile]:
|
|
241
245
|
return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
|
|
@@ -253,34 +257,20 @@ class EpsteinFiles:
|
|
|
253
257
|
for name in names
|
|
254
258
|
]
|
|
255
259
|
|
|
256
|
-
def
|
|
257
|
-
table =
|
|
258
|
-
|
|
259
|
-
table.
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
table.add_row(
|
|
265
|
-
label,
|
|
266
|
-
f"{len(docs):,}",
|
|
267
|
-
f"{known:,}" if known is not None else NA_TXT,
|
|
268
|
-
f"{len(docs) - known:,}" if known is not None else NA_TXT,
|
|
269
|
-
f"{len([d for d in docs if d.is_duplicate()])}",
|
|
270
|
-
)
|
|
271
|
-
|
|
272
|
-
add_row('Emails', self.emails)
|
|
273
|
-
add_row('iMessage Logs', self.imessage_logs)
|
|
274
|
-
add_row('JSON Data', self.json_files)
|
|
275
|
-
add_row('Other', self.non_json_other_files())
|
|
276
|
-
print_centered(table)
|
|
277
|
-
console.line()
|
|
260
|
+
def overview_table(self) -> Table:
|
|
261
|
+
table = Document.file_info_table('Files Overview', 'File Type')
|
|
262
|
+
table.add_row('Emails', *Document.files_info_row(self.emails))
|
|
263
|
+
table.add_row('iMessage Logs', *Document.files_info_row(self.imessage_logs))
|
|
264
|
+
table.add_row('JSON Data', *Document.files_info_row(self.json_files, True))
|
|
265
|
+
table.add_row('Other', *Document.files_info_row(self.non_json_other_files()))
|
|
266
|
+
return table
|
|
278
267
|
|
|
279
268
|
def unknown_recipient_ids(self) -> list[str]:
|
|
280
269
|
"""IDs of emails whose recipient is not known."""
|
|
281
270
|
return sorted([e.file_id for e in self.emails if None in e.recipients or not e.recipients])
|
|
282
271
|
|
|
283
272
|
def uninteresting_emailers(self) -> list[Name]:
|
|
273
|
+
"""Emailers whom we don't want to print a separate section for because they're just CCed."""
|
|
284
274
|
if '_uninteresting_emailers' not in vars(self):
|
|
285
275
|
self._uninteresting_emailers = sorted(uniquify(UNINTERESTING_EMAILERS + self.uninteresting_ccs))
|
|
286
276
|
|
|
@@ -306,8 +296,8 @@ class EpsteinFiles:
|
|
|
306
296
|
self.emails = Document.sort_by_timestamp(self.emails)
|
|
307
297
|
|
|
308
298
|
def _set_uninteresting_ccs(self) -> None:
|
|
309
|
-
|
|
310
|
-
|
|
299
|
+
for id in EMAILS_WITH_UNINTERESTING_BCCS:
|
|
300
|
+
self.uninteresting_ccs += [bcc.lower() for bcc in cast(list[str], self.email_for_id(id).header.bcc)]
|
|
311
301
|
|
|
312
302
|
for id in EMAILS_WITH_UNINTERESTING_CCS:
|
|
313
303
|
self.uninteresting_ccs += self.email_for_id(id).recipients
|
|
@@ -344,5 +334,4 @@ def document_cls(doc: Document) -> Type[Document]:
|
|
|
344
334
|
|
|
345
335
|
|
|
346
336
|
def _sorted_metadata(docs: Sequence[Document]) -> list[Metadata]:
|
|
347
|
-
|
|
348
|
-
return [json_safe(d.metadata()) for d in docs_sorted_by_id]
|
|
337
|
+
return [json_safe(d.metadata()) for d in Document.sort_by_id(docs)]
|
epstein_files/person.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from datetime import datetime, date
|
|
3
|
+
from typing import Sequence
|
|
3
4
|
|
|
4
5
|
from rich.console import Group, RenderableType
|
|
5
6
|
from rich.padding import Padding
|
|
@@ -18,13 +19,14 @@ from epstein_files.util.data import days_between, flatten, without_falsey
|
|
|
18
19
|
from epstein_files.util.env import args
|
|
19
20
|
from epstein_files.util.highlighted_group import (QUESTION_MARKS_TXT, HighlightedNames,
|
|
20
21
|
get_highlight_group_for_name, get_style_for_name, styled_category, styled_name)
|
|
21
|
-
from epstein_files.util.rich import GREY_NUMBERS,
|
|
22
|
+
from epstein_files.util.rich import GREY_NUMBERS, TABLE_TITLE_STYLE, build_table, console, join_texts, print_centered
|
|
22
23
|
|
|
23
24
|
ALT_INFO_STYLE = 'medium_purple4'
|
|
24
25
|
CC = 'cc:'
|
|
25
26
|
MIN_AUTHOR_PANEL_WIDTH = 80
|
|
26
27
|
EMAILER_INFO_TITLE = 'Email Conversations Will Appear'
|
|
27
|
-
UNINTERESTING_CC_INFO = "
|
|
28
|
+
UNINTERESTING_CC_INFO = "cc: or bcc: recipient only"
|
|
29
|
+
UNINTERESTING_CC_INFO_NO_CONTACT = f"{UNINTERESTING_CC_INFO}, no direct contact with Epstein"
|
|
28
30
|
|
|
29
31
|
INVALID_FOR_EPSTEIN_WEB = JUNK_EMAILERS + MAILING_LISTS + [
|
|
30
32
|
'ACT for America',
|
|
@@ -100,6 +102,10 @@ class Person:
|
|
|
100
102
|
links = [self.external_link_txt(site) for site in PERSON_LINK_BUILDERS]
|
|
101
103
|
return Text('', justify='center', style='dim').append(join_texts(links, join=' / ')) #, encloser='()'))#, encloser='‹›'))
|
|
102
104
|
|
|
105
|
+
def has_any_epstein_emails(self) -> bool:
|
|
106
|
+
contacts = [e.author for e in self.emails] + flatten([e.recipients for e in self.emails])
|
|
107
|
+
return JEFFREY_EPSTEIN in contacts
|
|
108
|
+
|
|
103
109
|
def highlight_group(self) -> HighlightedNames | None:
|
|
104
110
|
return get_highlight_group_for_name(self.name)
|
|
105
111
|
|
|
@@ -114,7 +120,7 @@ class Person:
|
|
|
114
120
|
else:
|
|
115
121
|
email_count = len(self.unique_emails())
|
|
116
122
|
num_days = self.email_conversation_length_in_days()
|
|
117
|
-
title_suffix = f"
|
|
123
|
+
title_suffix = f"{TO_FROM} {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
|
|
118
124
|
|
|
119
125
|
title = f"Found {email_count} emails {title_suffix}"
|
|
120
126
|
width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category()) + 8)
|
|
@@ -130,9 +136,16 @@ class Person:
|
|
|
130
136
|
highlight_group = self.highlight_group()
|
|
131
137
|
|
|
132
138
|
if highlight_group and isinstance(highlight_group, HighlightedNames) and self.name:
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
139
|
+
info = highlight_group.info_for(self.name)
|
|
140
|
+
|
|
141
|
+
if info:
|
|
142
|
+
return info
|
|
143
|
+
|
|
144
|
+
if self.is_uninteresting_cc:
|
|
145
|
+
if self.has_any_epstein_emails():
|
|
146
|
+
return UNINTERESTING_CC_INFO
|
|
147
|
+
else:
|
|
148
|
+
return UNINTERESTING_CC_INFO_NO_CONTACT
|
|
136
149
|
|
|
137
150
|
def info_with_category(self) -> str:
|
|
138
151
|
return ', '.join(without_falsey([self.category(), self.info_str()]))
|
|
@@ -143,18 +156,27 @@ class Person:
|
|
|
143
156
|
elif self.name is None:
|
|
144
157
|
return Text('(emails whose author or recipient could not be determined)', style=ALT_INFO_STYLE)
|
|
145
158
|
elif self.category() == JUNK:
|
|
146
|
-
return Text(f"({JUNK} mail)", style='
|
|
159
|
+
return Text(f"({JUNK} mail)", style='bright_black dim')
|
|
160
|
+
elif self.is_uninteresting_cc and (self.info_str() or '').startswith(UNINTERESTING_CC_INFO):
|
|
161
|
+
if self.info_str() == UNINTERESTING_CC_INFO:
|
|
162
|
+
return Text(f"({self.info_str()})", style='wheat4 dim')
|
|
163
|
+
else:
|
|
164
|
+
return Text(f"({self.info_str()})", style='plum4 dim')
|
|
147
165
|
elif self.is_a_mystery():
|
|
148
|
-
return Text(QUESTION_MARKS, style='
|
|
149
|
-
elif self.is_uninteresting_cc and self.info_str() == UNINTERESTING_CC_INFO:
|
|
150
|
-
return Text(f"({self.info_str()})", style='wheat4 dim')
|
|
166
|
+
return Text(QUESTION_MARKS, style='honeydew2 bold')
|
|
151
167
|
elif self.info_str() is None:
|
|
152
168
|
if self.name in MAILING_LISTS:
|
|
153
|
-
return Text('(mailing list)', style=f"
|
|
169
|
+
return Text('(mailing list)', style=f"pale_turquoise4 dim")
|
|
170
|
+
elif self.category():
|
|
171
|
+
return Text(QUESTION_MARKS, style=self.style())
|
|
154
172
|
else:
|
|
155
173
|
return None
|
|
156
174
|
else:
|
|
157
|
-
return Text(self.info_str())
|
|
175
|
+
return Text(self.info_str(), style=self.style())
|
|
176
|
+
|
|
177
|
+
def internal_link(self) -> Text:
|
|
178
|
+
"""Kind of like an anchor link to the section of the page containing these emails."""
|
|
179
|
+
return link_text_obj(internal_link_to_emails(self.name_str()), self.name_str(), style=self.style())
|
|
158
180
|
|
|
159
181
|
def is_a_mystery(self) -> bool:
|
|
160
182
|
"""Return True if this is someone we theroetically could know more about."""
|
|
@@ -214,8 +236,8 @@ class Person:
|
|
|
214
236
|
return self._printable_emails()
|
|
215
237
|
|
|
216
238
|
def print_emails_table(self) -> None:
|
|
217
|
-
|
|
218
|
-
print_centered(Padding(
|
|
239
|
+
table = Email.build_emails_table(self._unique_printable_emails(), self.name)
|
|
240
|
+
print_centered(Padding(table, (0, 5, 0, 5)))
|
|
219
241
|
|
|
220
242
|
if self.is_linkable():
|
|
221
243
|
print_centered(self.external_links_line())
|
|
@@ -223,7 +245,13 @@ class Person:
|
|
|
223
245
|
console.line()
|
|
224
246
|
|
|
225
247
|
def sort_key(self) -> list[int | str]:
|
|
226
|
-
counts = [
|
|
248
|
+
counts = [
|
|
249
|
+
len(self.unique_emails()),
|
|
250
|
+
-1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
|
|
251
|
+
-1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO),
|
|
252
|
+
int(self.has_any_epstein_emails()),
|
|
253
|
+
]
|
|
254
|
+
|
|
227
255
|
counts = [-1 * count for count in counts]
|
|
228
256
|
|
|
229
257
|
if args.sort_alphabetical:
|
|
@@ -234,14 +262,14 @@ class Person:
|
|
|
234
262
|
def style(self) -> str:
|
|
235
263
|
return get_style_for_name(self.name)
|
|
236
264
|
|
|
237
|
-
def unique_emails(self) ->
|
|
238
|
-
return
|
|
265
|
+
def unique_emails(self) -> Sequence[Email]:
|
|
266
|
+
return Document.without_dupes(self.emails)
|
|
239
267
|
|
|
240
268
|
def unique_emails_by(self) -> list[Email]:
|
|
241
|
-
return
|
|
269
|
+
return Document.without_dupes(self.emails_by())
|
|
242
270
|
|
|
243
271
|
def unique_emails_to(self) -> list[Email]:
|
|
244
|
-
return
|
|
272
|
+
return Document.without_dupes(self.emails_to())
|
|
245
273
|
|
|
246
274
|
def _printable_emails(self):
|
|
247
275
|
"""For Epstein we only want to print emails he sent to himself."""
|
|
@@ -250,24 +278,32 @@ class Person:
|
|
|
250
278
|
else:
|
|
251
279
|
return self.emails
|
|
252
280
|
|
|
281
|
+
def _unique_printable_emails(self):
|
|
282
|
+
return Document.without_dupes(self._printable_emails())
|
|
283
|
+
|
|
253
284
|
def __str__(self):
|
|
254
285
|
return f"{self.name_str()}"
|
|
255
286
|
|
|
256
287
|
@staticmethod
|
|
257
|
-
def emailer_info_table(people: list['Person'], highlighted: list['Person'] | None = None) -> Table:
|
|
288
|
+
def emailer_info_table(people: list['Person'], highlighted: list['Person'] | None = None, show_epstein_total: bool = False) -> Table:
|
|
258
289
|
"""Table of info about emailers."""
|
|
259
290
|
highlighted = highlighted or people
|
|
260
291
|
highlighted_names = [p.name for p in highlighted]
|
|
261
|
-
is_selection = len(people) != len(highlighted) or args.
|
|
292
|
+
is_selection = len(people) != len(highlighted) or args.emailers_info
|
|
293
|
+
all_emails = Document.uniquify(flatten([list(p.unique_emails()) for p in people]))
|
|
294
|
+
email_authors = [p for p in people if p.emails_by() and p.name]
|
|
295
|
+
attributed_emails = [email for email in all_emails if email.author]
|
|
296
|
+
footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}" \
|
|
297
|
+
f" out of {len(all_emails):,} emails, {len(all_emails) - len(attributed_emails)} still unknown)"
|
|
262
298
|
|
|
263
299
|
if is_selection:
|
|
264
|
-
title = Text(f"{EMAILER_INFO_TITLE} in This Order for the Highlighted Names (
|
|
265
|
-
title.append(THE_OTHER_PAGE_TXT).append("
|
|
300
|
+
title = Text(f"{EMAILER_INFO_TITLE} in This Order for the Highlighted Names (", style=TABLE_TITLE_STYLE)
|
|
301
|
+
title.append(THE_OTHER_PAGE_TXT).append(" has the rest)")
|
|
266
302
|
else:
|
|
267
303
|
title = f"{EMAILER_INFO_TITLE} in Chronological Order Based on Timestamp of First Email"
|
|
268
304
|
|
|
269
|
-
table = build_table(title)
|
|
270
|
-
table.add_column('
|
|
305
|
+
table = build_table(title, caption=footer)
|
|
306
|
+
table.add_column('First')
|
|
271
307
|
table.add_column('Name', max_width=24, no_wrap=True)
|
|
272
308
|
table.add_column('Category', justify='left', style='dim italic')
|
|
273
309
|
table.add_column('Num', justify='right', style='white')
|
|
@@ -281,6 +317,7 @@ class Person:
|
|
|
281
317
|
|
|
282
318
|
for person in people:
|
|
283
319
|
earliest_email_date = person.earliest_email_date()
|
|
320
|
+
is_on_page = False if show_epstein_total else person.name in highlighted_names
|
|
284
321
|
year_months = (earliest_email_date.year * 12) + earliest_email_date.month
|
|
285
322
|
|
|
286
323
|
# Color year rollovers more brightly
|
|
@@ -294,57 +331,14 @@ class Person:
|
|
|
294
331
|
|
|
295
332
|
table.add_row(
|
|
296
333
|
Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[0 if is_selection else grey_idx]}"),
|
|
297
|
-
person.
|
|
334
|
+
person.internal_link() if is_on_page and not person.is_uninteresting_cc else person.name_txt(),
|
|
298
335
|
person.category_txt(),
|
|
299
|
-
f"{len(person.
|
|
300
|
-
f"{len(person.unique_emails_by())}",
|
|
301
|
-
f"{len(person.unique_emails_to())}",
|
|
336
|
+
f"{len(person.unique_emails() if show_epstein_total else person._unique_printable_emails())}",
|
|
337
|
+
Text(f"{len(person.unique_emails_by())}", style='dim' if len(person.unique_emails_by()) == 0 else ''),
|
|
338
|
+
Text(f"{len(person.unique_emails_to())}", style='dim' if len(person.unique_emails_to()) == 0 else ''),
|
|
302
339
|
f"{person.email_conversation_length_in_days()}",
|
|
303
340
|
person.info_txt() or '',
|
|
304
|
-
style='' if
|
|
341
|
+
style='' if show_epstein_total or is_on_page else 'dim',
|
|
305
342
|
)
|
|
306
343
|
|
|
307
344
|
return table
|
|
308
|
-
|
|
309
|
-
@staticmethod
|
|
310
|
-
def emailer_stats_table(people: list['Person']) -> Table:
|
|
311
|
-
email_authors = [p for p in people if p.emails_by() and p.name]
|
|
312
|
-
all_emails = Document.uniquify(flatten([p.unique_emails() for p in people]))
|
|
313
|
-
attributed_emails = [email for email in all_emails if email.author]
|
|
314
|
-
footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}"
|
|
315
|
-
footer = f"{footer} out of {len(attributed_emails):,} emails)"
|
|
316
|
-
|
|
317
|
-
counts_table = build_table(
|
|
318
|
-
f"All {len(email_authors)} People Who Sent or Received an Email in the Files",
|
|
319
|
-
caption=footer,
|
|
320
|
-
cols=[
|
|
321
|
-
'Name',
|
|
322
|
-
{'name': 'Count', 'justify': 'right', 'style': 'bold bright_white'},
|
|
323
|
-
{'name': 'Sent', 'justify': 'right', 'style': 'gray74'},
|
|
324
|
-
{'name': 'Recv', 'justify': 'right', 'style': 'gray74'},
|
|
325
|
-
{'name': 'First', 'style': TIMESTAMP_STYLE},
|
|
326
|
-
{'name': 'Last', 'style': LAST_TIMESTAMP_STYLE},
|
|
327
|
-
{'name': 'Days', 'justify': 'right', 'style': 'dim'},
|
|
328
|
-
JMAIL,
|
|
329
|
-
EPSTEIN_MEDIA,
|
|
330
|
-
EPSTEIN_WEB,
|
|
331
|
-
'Twitter',
|
|
332
|
-
]
|
|
333
|
-
)
|
|
334
|
-
|
|
335
|
-
for person in sorted(people, key=lambda person: person.sort_key()):
|
|
336
|
-
counts_table.add_row(
|
|
337
|
-
person.name_link(),
|
|
338
|
-
f"{len(person.unique_emails()):,}",
|
|
339
|
-
f"{len(person.unique_emails_by()):,}",
|
|
340
|
-
f"{len(person.unique_emails_to()):,}",
|
|
341
|
-
str(person.earliest_email_date()),
|
|
342
|
-
str(person.last_email_date()),
|
|
343
|
-
f"{person.email_conversation_length_in_days()}",
|
|
344
|
-
person.external_link_txt(JMAIL),
|
|
345
|
-
person.external_link_txt(EPSTEIN_MEDIA) if person.is_linkable() else '',
|
|
346
|
-
person.external_link_txt(EPSTEIN_WEB) if person.is_linkable() else '',
|
|
347
|
-
person.external_link_txt(TWITTER),
|
|
348
|
-
)
|
|
349
|
-
|
|
350
|
-
return counts_table
|
|
@@ -61,6 +61,7 @@ DIANE_ZIMAN = 'Diane Ziman'
|
|
|
61
61
|
DONALD_TRUMP = 'Donald Trump'
|
|
62
62
|
EDUARDO_ROBLES = 'Eduardo Robles'
|
|
63
63
|
EDWARD_JAY_EPSTEIN = 'Edward Jay Epstein'
|
|
64
|
+
EDWARD_ROD_LARSEN = 'Edward Rod Larsen'
|
|
64
65
|
EHUD_BARAK = 'Ehud Barak'
|
|
65
66
|
ERIC_ROTH = 'Eric Roth'
|
|
66
67
|
FAITH_KATES = 'Faith Kates'
|
|
@@ -129,6 +130,7 @@ MOSHE_HOFFMAN = 'Moshe Hoffman'
|
|
|
129
130
|
NADIA_MARCINKO = 'Nadia Marcinko'
|
|
130
131
|
NEAL_KASSELL = 'Neal Kassell'
|
|
131
132
|
NICHOLAS_RIBIS = 'Nicholas Ribis'
|
|
133
|
+
NILI_PRIELL_BARAK = 'Nili Priell Barak'
|
|
132
134
|
NOAM_CHOMSKY = 'Noam Chomsky'
|
|
133
135
|
NORMAN_D_RAU = 'Norman D. Rau'
|
|
134
136
|
OLIVIER_COLOM = 'Olivier Colom'
|
|
@@ -214,23 +216,23 @@ UBS = 'UBS'
|
|
|
214
216
|
|
|
215
217
|
# First and last names that should be made part of a highlighting regex for emailers
|
|
216
218
|
NAMES_TO_NOT_HIGHLIGHT = """
|
|
217
|
-
al alain alan alfredo allen alex alexander amanda andres andrew
|
|
218
|
-
bard barrett barry bill black bob boris brad bruce
|
|
219
|
-
carolyn chris christina
|
|
220
|
-
dan daniel danny darren dave david donald
|
|
221
|
-
ed edward edwards enterprise enterprises entourage epstein eric erika etienne
|
|
222
|
-
faith forget fred friendly frost fuller
|
|
223
|
-
gerald george gold gordon
|
|
224
|
-
haddad harry hay heather henry hill hoffman
|
|
219
|
+
al alain alan alfredo allen alex alexander amanda andres andrew anthony
|
|
220
|
+
bard barrett barry bennet bernard bill black bob boris brad brenner bruce
|
|
221
|
+
caroline carolyn chris christina cohen
|
|
222
|
+
dan daniel danny darren dave david debbie donald
|
|
223
|
+
ed edward edwards enforcement enterprise enterprises entourage epstein eric erika etienne
|
|
224
|
+
faith fisher forget fred friendly frost fuller
|
|
225
|
+
gates gerald george gold gordon
|
|
226
|
+
haddad harry hay heather henry hill hoffman howard
|
|
225
227
|
ian ivan
|
|
226
228
|
jack james jay jean jeff jeffrey jennifer jeremy jessica joel john jon jonathan joseph jr
|
|
227
229
|
kahn karl kate katherine kelly ken kevin krassner
|
|
228
230
|
larry laurie lawrence leon lesley linda link lisa
|
|
229
|
-
mann marc marie mark martin melanie michael mike miller mitchell miles morris moskowitz
|
|
230
|
-
nancy neal new nicole
|
|
231
|
+
mann marc marie mark martin matthew melanie michael mike miller mitchell miles morris moskowitz
|
|
232
|
+
nancy neal new nicole norman
|
|
231
233
|
owen
|
|
232
234
|
paul paula pen peter philip prince
|
|
233
|
-
randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubin
|
|
235
|
+
randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubenstein rubin
|
|
234
236
|
scott sean skip stanley stern stephen steve steven stone susan
|
|
235
237
|
the thomas tim tom tony tyler
|
|
236
238
|
victor
|
|
@@ -243,7 +245,7 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
|
|
|
243
245
|
aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
|
|
244
246
|
baldwin barack barrett ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
|
|
245
247
|
chapman charles charlie christopher clint cohen colin collins conway
|
|
246
|
-
davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
|
|
248
|
+
davis dean debbie debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
|
|
247
249
|
edmond elizabeth emily entwistle erik evelyn
|
|
248
250
|
ferguson flachsbart francis franco frank
|
|
249
251
|
gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
|
|
@@ -296,3 +298,10 @@ def extract_last_name(name: str) -> str:
|
|
|
296
298
|
return ' '.join(first_last_names[-2:])
|
|
297
299
|
else:
|
|
298
300
|
return first_last_names[-1]
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def reversed_name(name: str) -> str:
|
|
304
|
+
if ' ' not in name:
|
|
305
|
+
return name
|
|
306
|
+
|
|
307
|
+
return f"{extract_last_name(name)}, {extract_first_name(name)}"
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
3
|
from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
|
|
4
|
+
from epstein_files.util.logging import logger
|
|
4
5
|
|
|
5
6
|
# Files output by the code
|
|
6
7
|
HTML_DIR = Path('docs')
|
|
@@ -16,9 +17,10 @@ URLS_ENV = '.urls.env'
|
|
|
16
17
|
EMAILERS_TABLE_PNG_PATH = HTML_DIR.joinpath('emailers_info_table.png')
|
|
17
18
|
|
|
18
19
|
# Deployment URLS
|
|
19
|
-
# NOTE: don't rename these variables without changing deploy.sh
|
|
20
|
+
# NOTE: don't rename these variables without changing deploy.sh
|
|
21
|
+
GH_REPO_NAME = 'epstein_text_messages'
|
|
20
22
|
GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
|
|
21
|
-
TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/
|
|
23
|
+
TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/{GH_REPO_NAME}"
|
|
22
24
|
ALL_EMAILS_URL = f"{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}"
|
|
23
25
|
CHRONOLOGICAL_EMAILS_URL = f"{TEXT_MSGS_URL}/{CHRONOLOGICAL_EMAILS_PATH.name}"
|
|
24
26
|
JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
|
|
@@ -44,6 +46,7 @@ BUILD_ARTIFACTS = [
|
|
|
44
46
|
def make_clean() -> None:
|
|
45
47
|
"""Delete all build artifacts."""
|
|
46
48
|
for build_file in BUILD_ARTIFACTS:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
49
|
+
for file in [build_file, Path(f"{build_file}.txt")]:
|
|
50
|
+
if file.exists():
|
|
51
|
+
logger.warning(f"Removing build file '{file}'...")
|
|
52
|
+
file.unlink()
|
|
@@ -9,7 +9,6 @@ ARTICLE = 'article'
|
|
|
9
9
|
BOOK = 'book'
|
|
10
10
|
BUSINESS = 'business'
|
|
11
11
|
CONFERENCE = 'conference'
|
|
12
|
-
ENTERTAINER = 'entertainer'
|
|
13
12
|
FINANCE = 'finance'
|
|
14
13
|
FRIEND = 'friend'
|
|
15
14
|
FLIGHT_LOG = 'flight log'
|
|
@@ -65,7 +64,8 @@ REDACTED = '<REDACTED>'
|
|
|
65
64
|
QUESTION_MARKS = '(???)'
|
|
66
65
|
|
|
67
66
|
# Regexes
|
|
68
|
-
|
|
67
|
+
ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
|
|
68
|
+
FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({ID_REGEX.pattern})")
|
|
69
69
|
FILE_NAME_REGEX = re.compile(fr"{FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
|
|
70
70
|
QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
|
|
71
71
|
|
|
@@ -6,6 +6,7 @@ from inflection import parameterize
|
|
|
6
6
|
from rich.text import Text
|
|
7
7
|
|
|
8
8
|
from epstein_files.util.constant.output_files import *
|
|
9
|
+
from epstein_files.util.constant.strings import remove_question_marks
|
|
9
10
|
from epstein_files.util.env import args
|
|
10
11
|
from epstein_files.util.file_helper import coerce_file_stem
|
|
11
12
|
|
|
@@ -22,10 +23,11 @@ JMAIL = 'Jmail'
|
|
|
22
23
|
ROLLCALL = 'RollCall'
|
|
23
24
|
TWITTER = 'search X'
|
|
24
25
|
|
|
25
|
-
GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/
|
|
26
|
+
GH_PROJECT_URL = f'https://github.com/michelcrypt4d4mus/{GH_REPO_NAME}'
|
|
26
27
|
GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
|
|
27
28
|
ATTRIBUTIONS_URL = f'{GH_MASTER_URL}/epstein_files/util/constants.py'
|
|
28
29
|
EXTRACTS_BASE_URL = f'{GH_MASTER_URL}/emails_extracted_from_legal_filings'
|
|
30
|
+
TO_FROM = 'to/from'
|
|
29
31
|
|
|
30
32
|
extracted_file_url = lambda f: f"{EXTRACTS_BASE_URL}/{f}"
|
|
31
33
|
|
|
@@ -33,6 +35,7 @@ extracted_file_url = lambda f: f"{EXTRACTS_BASE_URL}/{f}"
|
|
|
33
35
|
# External URLs
|
|
34
36
|
COFFEEZILLA_ARCHIVE_URL = 'https://journaliststudio.google.com/pinpoint/search?collection=061ce61c9e70bdfd'
|
|
35
37
|
COURIER_NEWSROOM_ARCHIVE_URL = 'https://journaliststudio.google.com/pinpoint/search?collection=092314e384a58618'
|
|
38
|
+
EPSTEIN_DOCS_URL = 'https://epstein-docs.github.io'
|
|
36
39
|
OVERSIGHT_REPUBLICANS_PRESSER_URL = 'https://oversight.house.gov/release/oversight-committee-releases-additional-epstein-estate-documents/'
|
|
37
40
|
RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL = 'https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_'
|
|
38
41
|
SUBSTACK_URL = 'https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great'
|
|
@@ -71,7 +74,6 @@ rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL],
|
|
|
71
74
|
search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
|
|
72
75
|
search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
|
|
73
76
|
|
|
74
|
-
|
|
75
77
|
PERSON_LINK_BUILDERS: dict[ExternalSite, Callable[[str], str]] = {
|
|
76
78
|
EPSTEIN_MEDIA: epstein_media_person_url,
|
|
77
79
|
EPSTEIN_WEB: epstein_web_person_url,
|
|
@@ -97,6 +99,12 @@ def external_doc_link_txt(site: ExternalSite, filename_or_id: int | str, style:
|
|
|
97
99
|
return Text.from_markup(external_doc_link_markup(site, filename_or_id, style))
|
|
98
100
|
|
|
99
101
|
|
|
102
|
+
def internal_link_to_emails(name: str) -> str:
|
|
103
|
+
"""e.g. https://michelcrypt4d4mus.github.io/epstein_text_messages/all_emails_epstein_files_nov_2025.html#:~:text=to%2Ffrom%20Jack%20Goldberger"""
|
|
104
|
+
search_term = urllib.parse.quote(f"{TO_FROM} {remove_question_marks(name)}")
|
|
105
|
+
return f"{this_site_url()}#:~:text={search_term}"
|
|
106
|
+
|
|
107
|
+
|
|
100
108
|
def link_markup(
|
|
101
109
|
url: str,
|
|
102
110
|
link_text: str | None = None,
|
|
@@ -120,6 +128,10 @@ def other_site_url() -> str:
|
|
|
120
128
|
return SITE_URLS[other_site_type()]
|
|
121
129
|
|
|
122
130
|
|
|
131
|
+
def this_site_url() -> str:
|
|
132
|
+
return SITE_URLS[EMAIL if other_site_type() == TEXT_MESSAGE else TEXT_MESSAGE]
|
|
133
|
+
|
|
134
|
+
|
|
123
135
|
CRYPTADAMUS_TWITTER = link_markup('https://x.com/cryptadamist', '@cryptadamist')
|
|
124
136
|
THE_OTHER_PAGE_MARKUP = link_markup(other_site_url(), 'the other page', style='light_slate_grey bold')
|
|
125
137
|
THE_OTHER_PAGE_TXT = Text.from_markup(THE_OTHER_PAGE_MARKUP)
|