epstein-files 1.1.0__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +16 -27
- epstein_files/documents/communication.py +10 -14
- epstein_files/documents/document.py +1 -1
- epstein_files/documents/email.py +152 -75
- epstein_files/documents/imessage/text_message.py +42 -25
- epstein_files/documents/messenger_log.py +31 -12
- epstein_files/documents/other_file.py +13 -12
- epstein_files/epstein_files.py +20 -81
- epstein_files/util/constant/common_words.py +3 -3
- epstein_files/util/constant/html.py +4 -5
- epstein_files/util/constant/names.py +18 -6
- epstein_files/util/constant/strings.py +6 -2
- epstein_files/util/constant/urls.py +1 -1
- epstein_files/util/constants.py +19 -23
- epstein_files/util/env.py +55 -36
- epstein_files/util/file_helper.py +1 -2
- epstein_files/util/highlighted_group.py +1019 -189
- epstein_files/util/logging.py +8 -1
- epstein_files/util/output.py +183 -89
- epstein_files/util/rich.py +35 -69
- epstein_files/util/timer.py +1 -1
- epstein_files/util/word_count.py +3 -4
- {epstein_files-1.1.0.dist-info → epstein_files-1.1.3.dist-info}/METADATA +4 -1
- epstein_files-1.1.3.dist-info/RECORD +33 -0
- epstein_files-1.1.0.dist-info/RECORD +0 -33
- {epstein_files-1.1.0.dist-info → epstein_files-1.1.3.dist-info}/LICENSE +0 -0
- {epstein_files-1.1.0.dist-info → epstein_files-1.1.3.dist-info}/WHEEL +0 -0
- {epstein_files-1.1.0.dist-info → epstein_files-1.1.3.dist-info}/entry_points.txt +0 -0
epstein_files/util/logging.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from os import environ
|
|
3
|
+
from sys import exit
|
|
3
4
|
|
|
4
5
|
import datefinder
|
|
5
6
|
import rich_argparse_plus
|
|
@@ -39,7 +40,7 @@ class LogHighlighter(ReprHighlighter):
|
|
|
39
40
|
|
|
40
41
|
log_console = Console(color_system='256', theme=Theme(LOG_THEME))
|
|
41
42
|
log_handler = RichHandler(console=log_console, highlighter=LogHighlighter())
|
|
42
|
-
logging.basicConfig(level="NOTSET", format="%(message)s", datefmt="
|
|
43
|
+
logging.basicConfig(level="NOTSET", format="%(message)s", datefmt=" ", handlers=[log_handler])
|
|
43
44
|
logger = logging.getLogger("rich")
|
|
44
45
|
|
|
45
46
|
|
|
@@ -58,3 +59,9 @@ if env_log_level_str:
|
|
|
58
59
|
|
|
59
60
|
logger.warning(f"Setting log level to {env_log_level} based on {LOG_LEVEL_ENV_VAR} env var...")
|
|
60
61
|
logger.setLevel(env_log_level)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def exit_with_error(msg: str) -> None:
|
|
65
|
+
print('')
|
|
66
|
+
logger.error(msg + '\n')
|
|
67
|
+
exit(1)
|
epstein_files/util/output.py
CHANGED
|
@@ -3,7 +3,7 @@ import json
|
|
|
3
3
|
from rich.padding import Padding
|
|
4
4
|
|
|
5
5
|
from epstein_files.documents.document import Document
|
|
6
|
-
from epstein_files.documents.email import Email
|
|
6
|
+
from epstein_files.documents.email import KRASSNER_RECIPIENTS, Email
|
|
7
7
|
from epstein_files.documents.messenger_log import MessengerLog
|
|
8
8
|
from epstein_files.documents.other_file import FIRST_FEW_LINES, OtherFile
|
|
9
9
|
from epstein_files.epstein_files import EpsteinFiles, count_by_month
|
|
@@ -11,10 +11,11 @@ from epstein_files.util.constant import output_files
|
|
|
11
11
|
from epstein_files.util.constant.html import *
|
|
12
12
|
from epstein_files.util.constant.names import *
|
|
13
13
|
from epstein_files.util.constant.output_files import JSON_FILES_JSON_PATH, JSON_METADATA_PATH
|
|
14
|
-
from epstein_files.util.constant.strings import TIMESTAMP_DIM
|
|
15
|
-
from epstein_files.util.data import dict_sets_to_lists
|
|
14
|
+
from epstein_files.util.constant.strings import TIMESTAMP_DIM, TIMESTAMP_STYLE
|
|
15
|
+
from epstein_files.util.data import dict_sets_to_lists, sort_dict
|
|
16
16
|
from epstein_files.util.env import args
|
|
17
17
|
from epstein_files.util.file_helper import log_file_write
|
|
18
|
+
from epstein_files.util.highlighted_group import QUESTION_MARKS_TXT
|
|
18
19
|
from epstein_files.util.logging import logger
|
|
19
20
|
from epstein_files.util.rich import *
|
|
20
21
|
|
|
@@ -35,53 +36,71 @@ DEFAULT_EMAILERS = [
|
|
|
35
36
|
EHUD_BARAK,
|
|
36
37
|
MARTIN_NOWAK,
|
|
37
38
|
STEVE_BANNON,
|
|
39
|
+
TYLER_SHEARS,
|
|
38
40
|
JIDE_ZEITLIN,
|
|
41
|
+
CHRISTINA_GALBRAITH,
|
|
39
42
|
DAVID_STERN,
|
|
40
43
|
MOHAMED_WAHEED_HASSAN,
|
|
41
44
|
JENNIFER_JACQUET,
|
|
42
|
-
TYLER_SHEARS,
|
|
43
|
-
CHRISTINA_GALBRAITH,
|
|
44
45
|
ZUBAIR_KHAN,
|
|
45
46
|
None,
|
|
46
47
|
]
|
|
47
48
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
ARIANE_DE_ROTHSCHILD,
|
|
49
|
+
INVALID_FOR_EPSTEIN_WEB = JUNK_EMAILERS + KRASSNER_RECIPIENTS + [
|
|
50
|
+
'ACT for America',
|
|
51
|
+
'BS Stern',
|
|
52
|
+
INTELLIGENCE_SQUARED,
|
|
53
|
+
UNKNOWN,
|
|
54
54
|
]
|
|
55
55
|
|
|
56
|
-
|
|
57
|
-
|
|
56
|
+
|
|
57
|
+
def print_email_timeline(epstein_files: EpsteinFiles) -> None:
|
|
58
|
+
"""Print a table of all emails in chronological order."""
|
|
59
|
+
emails = [email for email in epstein_files.non_duplicate_emails() if not email.is_junk_mail()]
|
|
60
|
+
table = build_table(f'All {len(emails):,} Non-Junk Emails in Chronological Order', highlight=True)
|
|
61
|
+
table.add_column('ID', style=TIMESTAMP_DIM)
|
|
62
|
+
table.add_column('Sent At', style='dim')
|
|
63
|
+
table.add_column('Author', max_width=20)
|
|
64
|
+
table.add_column('Recipients', max_width=22)
|
|
65
|
+
table.add_column('Length', justify='right', style='wheat4')
|
|
66
|
+
table.add_column('Subject')
|
|
67
|
+
|
|
68
|
+
for email in Document.sort_by_timestamp(emails):
|
|
69
|
+
if email.is_junk_mail():
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
table.add_row(
|
|
73
|
+
email.epstein_media_link(link_txt=email.source_file_id()),
|
|
74
|
+
email.timestamp_without_seconds(),
|
|
75
|
+
email.author_txt(),
|
|
76
|
+
email.recipients_txt(max_full_names=1),
|
|
77
|
+
f"{email.length()}",
|
|
78
|
+
email.subject(),
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
console.line(2)
|
|
82
|
+
console.print(table)
|
|
58
83
|
|
|
59
84
|
|
|
60
85
|
def print_emails_section(epstein_files: EpsteinFiles) -> list[Email]:
|
|
61
86
|
"""Returns emails that were printed (may contain dupes if printed for both author and recipient)."""
|
|
62
87
|
print_section_header(('Selections from ' if not args.all_emails else '') + 'His Emails')
|
|
63
|
-
print_other_page_link(epstein_files)
|
|
64
88
|
emailers_to_print: list[str | None]
|
|
65
|
-
emailer_tables: list[str | None] = []
|
|
66
89
|
already_printed_emails: list[Email] = []
|
|
67
90
|
num_emails_printed_since_last_color_key = 0
|
|
68
91
|
|
|
69
92
|
if args.names:
|
|
70
93
|
emailers_to_print = args.names
|
|
71
94
|
else:
|
|
72
|
-
print_centered(Padding(epstein_files.table_of_emailers(), (2, 0)))
|
|
73
|
-
|
|
74
95
|
if args.all_emails:
|
|
75
96
|
emailers_to_print = sorted(epstein_files.all_emailers(), key=lambda e: epstein_files.earliest_email_at(e))
|
|
76
|
-
console.print('Email conversations are sorted chronologically based on time of the first email.')
|
|
77
|
-
print_numbered_list_of_emailers(emailers_to_print, epstein_files)
|
|
78
97
|
else:
|
|
79
98
|
emailers_to_print = DEFAULT_EMAILERS
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
99
|
+
|
|
100
|
+
print_other_page_link(epstein_files)
|
|
101
|
+
console.line(2)
|
|
102
|
+
console.print(_table_of_selected_emailers(emailers_to_print, epstein_files))
|
|
103
|
+
console.print(Padding(_all_emailers_table(epstein_files), (2, 0)))
|
|
85
104
|
|
|
86
105
|
for author in emailers_to_print:
|
|
87
106
|
author_emails = epstein_files.print_emails_for(author)
|
|
@@ -93,12 +112,6 @@ def print_emails_section(epstein_files: EpsteinFiles) -> list[Email]:
|
|
|
93
112
|
print_color_key()
|
|
94
113
|
num_emails_printed_since_last_color_key = 0
|
|
95
114
|
|
|
96
|
-
if emailer_tables:
|
|
97
|
-
print_author_panel(f"Email Tables for {len(emailer_tables)} Other People", 'white')
|
|
98
|
-
|
|
99
|
-
for name in DEFAULT_EMAILER_TABLES:
|
|
100
|
-
epstein_files.print_emails_table_for(name)
|
|
101
|
-
|
|
102
115
|
if not args.names:
|
|
103
116
|
epstein_files.print_email_device_info()
|
|
104
117
|
|
|
@@ -112,8 +125,9 @@ def print_emails_section(epstein_files: EpsteinFiles) -> list[Email]:
|
|
|
112
125
|
|
|
113
126
|
|
|
114
127
|
def print_json_files(epstein_files: EpsteinFiles):
|
|
128
|
+
"""Print all the JsonFile objects"""
|
|
115
129
|
if args.build:
|
|
116
|
-
json_data = {
|
|
130
|
+
json_data = {jf.url_slug: jf.json_data() for jf in epstein_files.json_files}
|
|
117
131
|
|
|
118
132
|
with open(JSON_FILES_JSON_PATH, 'w') as f:
|
|
119
133
|
f.write(json.dumps(json_data, sort_keys=True))
|
|
@@ -125,6 +139,17 @@ def print_json_files(epstein_files: EpsteinFiles):
|
|
|
125
139
|
console.print_json(json_file.json_str(), indent=4, sort_keys=False)
|
|
126
140
|
|
|
127
141
|
|
|
142
|
+
def print_json_metadata(epstein_files: EpsteinFiles) -> None:
|
|
143
|
+
json_str = epstein_files.json_metadata()
|
|
144
|
+
|
|
145
|
+
if args.build:
|
|
146
|
+
with open(JSON_METADATA_PATH, 'w') as f:
|
|
147
|
+
f.write(json_str)
|
|
148
|
+
log_file_write(JSON_METADATA_PATH)
|
|
149
|
+
else:
|
|
150
|
+
console.print_json(json_str, indent=4, sort_keys=True)
|
|
151
|
+
|
|
152
|
+
|
|
128
153
|
def print_json_stats(epstein_files: EpsteinFiles) -> None:
|
|
129
154
|
console.line(5)
|
|
130
155
|
console.print(Panel('JSON Stats Dump', expand=True, style='reverse bold'), '\n')
|
|
@@ -140,72 +165,32 @@ def print_json_stats(epstein_files: EpsteinFiles) -> None:
|
|
|
140
165
|
|
|
141
166
|
def print_other_files_section(files: list[OtherFile], epstein_files: EpsteinFiles) -> None:
|
|
142
167
|
"""Returns the OtherFile objects that were interesting enough to print."""
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
print_section_header(f"{FIRST_FEW_LINES} of {len(files)} {
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
console.line(1)
|
|
150
|
-
else:
|
|
151
|
-
print_other_page_link(epstein_files)
|
|
152
|
-
console.line(2)
|
|
153
|
-
|
|
154
|
-
for table in [category_table, other_files_preview_table]:
|
|
155
|
-
table.title = f"{header_pfx}{table.title}"
|
|
156
|
-
|
|
157
|
-
print_centered(category_table)
|
|
158
|
-
console.line(2)
|
|
168
|
+
title_pfx = '' if args.all_other_files else 'Selected '
|
|
169
|
+
category_table = OtherFile.count_by_category_table(files, title_pfx=title_pfx)
|
|
170
|
+
other_files_preview_table = OtherFile.files_preview_table(files, title_pfx=title_pfx)
|
|
171
|
+
print_section_header(f"{FIRST_FEW_LINES} of {len(files)} {title_pfx}Files That Are Neither Emails Nor Text Messages")
|
|
172
|
+
print_other_page_link(epstein_files)
|
|
173
|
+
print_centered(Padding(category_table, (2, 0)))
|
|
159
174
|
console.print(other_files_preview_table)
|
|
160
175
|
|
|
161
176
|
|
|
162
|
-
def print_text_messages_section(
|
|
177
|
+
def print_text_messages_section(imessage_logs: list[MessengerLog]) -> None:
|
|
163
178
|
"""Print summary table and stats for text messages."""
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
for log_file in epstein_files.imessage_logs:
|
|
168
|
-
console.print(Padding(log_file))
|
|
169
|
-
console.line(2)
|
|
170
|
-
|
|
171
|
-
print_centered(MessengerLog.summary_table(epstein_files.imessage_logs))
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
def write_complete_emails_timeline(epstein_files: EpsteinFiles) -> None:
|
|
175
|
-
table = build_table('All Non-Junk Emails In Chronological Order', highlight=True)
|
|
176
|
-
table.add_column('ID', style='dim')
|
|
177
|
-
table.add_column('Sent At', style=TIMESTAMP_DIM)
|
|
178
|
-
table.add_column('Author', max_width=22)
|
|
179
|
-
table.add_column('Recipients', max_width=30)
|
|
180
|
-
table.add_column('Length', justify='right', style='wheat4')
|
|
181
|
-
table.add_column('Subject')
|
|
182
|
-
|
|
183
|
-
for email in Document.sort_by_timestamp(epstein_files.non_duplicate_emails()):
|
|
184
|
-
if email.is_junk_mail():
|
|
185
|
-
continue
|
|
186
|
-
|
|
187
|
-
table.add_row(
|
|
188
|
-
email.source_file_id(),
|
|
189
|
-
email.epstein_media_link(link_txt=email.timestamp_without_seconds()),
|
|
190
|
-
email.author_txt,
|
|
191
|
-
email.recipients_txt(max_full_names=1),
|
|
192
|
-
f"{email.length()}",
|
|
193
|
-
email.subject(),
|
|
194
|
-
)
|
|
179
|
+
if not imessage_logs:
|
|
180
|
+
logger.warning(f"No MessengerLog objects to output...")
|
|
181
|
+
return
|
|
195
182
|
|
|
183
|
+
print_section_header('All of His Text Messages')
|
|
184
|
+
print_centered("(conversations are sorted chronologically based on timestamp of first message in the log file)", style='dim')
|
|
196
185
|
console.line(2)
|
|
197
|
-
console.print(table)
|
|
198
|
-
|
|
199
186
|
|
|
200
|
-
|
|
201
|
-
|
|
187
|
+
if not args.names:
|
|
188
|
+
print_centered(MessengerLog.summary_table(imessage_logs))
|
|
189
|
+
console.line(2)
|
|
202
190
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
log_file_write(JSON_METADATA_PATH)
|
|
207
|
-
else:
|
|
208
|
-
console.print_json(json_str, indent=4, sort_keys=True)
|
|
191
|
+
for log_file in imessage_logs:
|
|
192
|
+
console.print(Padding(log_file))
|
|
193
|
+
console.line(2)
|
|
209
194
|
|
|
210
195
|
|
|
211
196
|
def write_urls() -> None:
|
|
@@ -230,6 +215,115 @@ def write_urls() -> None:
|
|
|
230
215
|
logger.warning(f"Wrote {len(url_vars)} URL variables to '{URLS_ENV}'\n")
|
|
231
216
|
|
|
232
217
|
|
|
218
|
+
def _all_emailers_table(epstein_files: EpsteinFiles) -> Table:
|
|
219
|
+
attributed_emails = [e for e in epstein_files.non_duplicate_emails() if e.author]
|
|
220
|
+
footer = f"(identified {len(epstein_files.email_author_counts)} authors of {len(attributed_emails):,}"
|
|
221
|
+
footer = f"{footer} out of {len(epstein_files.non_duplicate_emails()):,} emails)"
|
|
222
|
+
counts_table = build_table("All of the Email Counterparties Who Appear in the Files", caption=footer)
|
|
223
|
+
|
|
224
|
+
add_cols_to_table(counts_table, [
|
|
225
|
+
'Name',
|
|
226
|
+
{'name': 'Count', 'justify': 'right', 'style': 'bold bright_white'},
|
|
227
|
+
{'name': 'Sent', 'justify': 'right', 'style': 'gray74'},
|
|
228
|
+
{'name': 'Recv', 'justify': 'right', 'style': 'gray74'},
|
|
229
|
+
{'name': 'First', 'style': TIMESTAMP_STYLE},
|
|
230
|
+
{'name': 'Last', 'style': LAST_TIMESTAMP_STYLE},
|
|
231
|
+
{'name': 'Days', 'justify': 'right', 'style': 'dim'},
|
|
232
|
+
JMAIL,
|
|
233
|
+
EPSTEIN_MEDIA,
|
|
234
|
+
EPSTEIN_WEB,
|
|
235
|
+
'Twitter',
|
|
236
|
+
])
|
|
237
|
+
|
|
238
|
+
emailer_counts = {
|
|
239
|
+
emailer: epstein_files.email_author_counts[emailer] + epstein_files.email_recipient_counts[emailer]
|
|
240
|
+
for emailer in epstein_files.all_emailers(True)
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
for name, count in sort_dict(emailer_counts):
|
|
244
|
+
style = get_style_for_name(name, default_style=DEFAULT_NAME_STYLE)
|
|
245
|
+
emails = epstein_files.emails_for(name)
|
|
246
|
+
|
|
247
|
+
counts_table.add_row(
|
|
248
|
+
Text.from_markup(link_markup(epsteinify_name_url(name or UNKNOWN), name or UNKNOWN, style)),
|
|
249
|
+
f"{count:,}",
|
|
250
|
+
str(epstein_files.email_author_counts[name]),
|
|
251
|
+
str(epstein_files.email_recipient_counts[name]),
|
|
252
|
+
emails[0].date_str(),
|
|
253
|
+
emails[-1].date_str(),
|
|
254
|
+
f"{epstein_files.email_conversation_length_in_days(name)}",
|
|
255
|
+
link_text_obj(search_jmail_url(name), JMAIL) if name else '',
|
|
256
|
+
link_text_obj(epstein_media_person_url(name), EPSTEIN_MEDIA) if _is_ok_for_epstein_web(name) else '',
|
|
257
|
+
link_text_obj(epstein_web_person_url(name), EPSTEIN_WEB) if _is_ok_for_epstein_web(name) else '',
|
|
258
|
+
link_text_obj(search_twitter_url(name), 'search X') if name else '',
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
return counts_table
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _is_ok_for_epstein_web(name: str | None) -> bool:
|
|
265
|
+
"""Return True if it's likely that EpsteinWeb has a page for this name."""
|
|
266
|
+
if name is None or ' ' not in name:
|
|
267
|
+
return False
|
|
268
|
+
elif '@' in name or '/' in name or '??' in name:
|
|
269
|
+
return False
|
|
270
|
+
elif name in INVALID_FOR_EPSTEIN_WEB:
|
|
271
|
+
return False
|
|
272
|
+
|
|
273
|
+
return True
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _table_of_selected_emailers(_list: list[str | None], epstein_files: EpsteinFiles) -> Table:
|
|
277
|
+
"""Add the first emailed_at timestamp for each emailer if 'epstein_files' provided."""
|
|
278
|
+
header_pfx = '' if args.all_emails else 'Selected '
|
|
279
|
+
table = build_table(f'{header_pfx}Email Conversations Grouped by Counterparty Will Appear in this Order')
|
|
280
|
+
table.add_column('Start Date')
|
|
281
|
+
table.add_column('Name', max_width=25, no_wrap=True)
|
|
282
|
+
table.add_column('Category', justify='center', style='dim italic')
|
|
283
|
+
table.add_column('Num', justify='right', style='wheat4')
|
|
284
|
+
table.add_column('Info', style='white italic')
|
|
285
|
+
current_year = 1990
|
|
286
|
+
current_year_month = current_year * 12
|
|
287
|
+
grey_idx = 0
|
|
288
|
+
|
|
289
|
+
for i, name in enumerate(_list):
|
|
290
|
+
earliest_email_date = (epstein_files.earliest_email_at(name) or FALLBACK_TIMESTAMP).date()
|
|
291
|
+
year_months = (earliest_email_date.year * 12) + earliest_email_date.month
|
|
292
|
+
|
|
293
|
+
# Color year rollovers more brightly
|
|
294
|
+
if current_year != earliest_email_date.year:
|
|
295
|
+
grey_idx = 0
|
|
296
|
+
elif current_year_month != year_months:
|
|
297
|
+
grey_idx = ((current_year_month - 1) % 12) + 1
|
|
298
|
+
|
|
299
|
+
current_year_month = year_months
|
|
300
|
+
current_year = earliest_email_date.year
|
|
301
|
+
category = get_category_txt_for_name(name)
|
|
302
|
+
info = get_info_for_name(name)
|
|
303
|
+
style = get_style_for_name(name, default_style='none')
|
|
304
|
+
|
|
305
|
+
if category and category.plain == 'paula': # TODO: hacky
|
|
306
|
+
category = None
|
|
307
|
+
elif category and info:
|
|
308
|
+
info = info.removeprefix(f"{category.plain}, ").removeprefix(category.plain)
|
|
309
|
+
elif not name:
|
|
310
|
+
info = Text('(emails whose author or recipient could not be determined)', style='medium_purple4')
|
|
311
|
+
elif name in JUNK_EMAILERS:
|
|
312
|
+
category = Text('junk', style='gray30')
|
|
313
|
+
elif style == 'none' and '@' not in name and not (category or info):
|
|
314
|
+
info = QUESTION_MARKS_TXT
|
|
315
|
+
|
|
316
|
+
table.add_row(
|
|
317
|
+
Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[grey_idx]}"),
|
|
318
|
+
Text(name or UNKNOWN, style=get_style_for_name(name or UNKNOWN, default_style='dim')),
|
|
319
|
+
category,
|
|
320
|
+
f"{len(epstein_files.emails_for(name)):,}",
|
|
321
|
+
info or '',
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
return table
|
|
325
|
+
|
|
326
|
+
|
|
233
327
|
def _verify_all_emails_were_printed(epstein_files: EpsteinFiles, already_printed_emails: list[Email]) -> None:
|
|
234
328
|
"""Log warnings if some emails were never printed."""
|
|
235
329
|
email_ids_that_were_printed = set([email.file_id for email in already_printed_emails])
|
epstein_files/util/rich.py
CHANGED
|
@@ -20,25 +20,27 @@ from epstein_files.util.constants import FALLBACK_TIMESTAMP, HEADER_ABBREVIATION
|
|
|
20
20
|
from epstein_files.util.data import json_safe
|
|
21
21
|
from epstein_files.util.env import args
|
|
22
22
|
from epstein_files.util.file_helper import log_file_write
|
|
23
|
-
from epstein_files.util.highlighted_group import ALL_HIGHLIGHTS, HIGHLIGHTED_NAMES, EpsteinHighlighter
|
|
23
|
+
from epstein_files.util.highlighted_group import (ALL_HIGHLIGHTS, HIGHLIGHTED_NAMES, EpsteinHighlighter,
|
|
24
|
+
get_category_txt_for_name, get_info_for_name, get_style_for_name)
|
|
24
25
|
from epstein_files.util.logging import logger
|
|
25
26
|
|
|
26
27
|
TITLE_WIDTH = 50
|
|
28
|
+
MIN_AUTHOR_PANEL_WIDTH = 80
|
|
27
29
|
NUM_COLOR_KEY_COLS = 4
|
|
28
30
|
NA_TXT = Text(NA, style='dim')
|
|
29
|
-
QUESTION_MARK_TXT = Text(QUESTION_MARKS, style='dim')
|
|
30
31
|
GREY_NUMBERS = [58, 39, 39, 35, 30, 27, 23, 23, 19, 19, 15, 15, 15]
|
|
31
32
|
|
|
32
33
|
DEFAULT_NAME_STYLE = 'gray46'
|
|
33
34
|
INFO_STYLE = 'white dim italic'
|
|
34
|
-
KEY_STYLE='honeydew2 bold'
|
|
35
|
-
LAST_TIMESTAMP_STYLE='wheat4'
|
|
35
|
+
KEY_STYLE = 'honeydew2 bold'
|
|
36
|
+
LAST_TIMESTAMP_STYLE = 'wheat4'
|
|
37
|
+
OTHER_PAGE_MSG_STYLE = 'gray78 dim'
|
|
36
38
|
SECTION_HEADER_STYLE = 'bold white on blue3'
|
|
37
39
|
SOCIAL_MEDIA_LINK_STYLE = 'pale_turquoise4'
|
|
38
40
|
SUBSTACK_POST_LINK_STYLE = 'bright_cyan'
|
|
39
41
|
SYMBOL_STYLE = 'grey70'
|
|
40
42
|
TABLE_BORDER_STYLE = 'grey46'
|
|
41
|
-
TABLE_TITLE_STYLE = f"
|
|
43
|
+
TABLE_TITLE_STYLE = f"gray54 italic"
|
|
42
44
|
TITLE_STYLE = 'black on bright_white bold'
|
|
43
45
|
|
|
44
46
|
AUX_SITE_LINK_STYLE = 'dark_orange3'
|
|
@@ -46,6 +48,7 @@ OTHER_SITE_LINK_STYLE = 'dark_goldenrod'
|
|
|
46
48
|
|
|
47
49
|
DEFAULT_TABLE_KWARGS = {
|
|
48
50
|
'border_style': TABLE_BORDER_STYLE,
|
|
51
|
+
'caption_style': 'navajo_white3 dim italic',
|
|
49
52
|
'header_style': "bold",
|
|
50
53
|
'title_style': TABLE_TITLE_STYLE,
|
|
51
54
|
}
|
|
@@ -82,15 +85,21 @@ highlighter = CONSOLE_ARGS['highlighter']
|
|
|
82
85
|
def add_cols_to_table(table: Table, col_names: list[str | dict]) -> None:
|
|
83
86
|
"""Left most col will be left justified, rest are center justified."""
|
|
84
87
|
for i, col in enumerate(col_names):
|
|
88
|
+
justify='left' if i == 0 else 'center'
|
|
89
|
+
|
|
85
90
|
if isinstance(col, dict):
|
|
86
91
|
col_name = col['name']
|
|
87
92
|
kwargs = col
|
|
88
93
|
del kwargs['name']
|
|
94
|
+
|
|
95
|
+
if 'justify' in col:
|
|
96
|
+
justify = col['justify']
|
|
97
|
+
del col['justify']
|
|
89
98
|
else:
|
|
90
99
|
col_name = col
|
|
91
100
|
kwargs = {}
|
|
92
101
|
|
|
93
|
-
table.add_column(col_name, justify=
|
|
102
|
+
table.add_column(col_name, justify=justify, **kwargs)
|
|
94
103
|
|
|
95
104
|
|
|
96
105
|
def build_highlighter(pattern: str) -> EpsteinHighlighter:
|
|
@@ -144,10 +153,9 @@ def parenthesize(msg: str | Text, style: str = '') -> Text:
|
|
|
144
153
|
|
|
145
154
|
def print_author_panel(msg: str, color: str | None, footer: str | None = None) -> None:
|
|
146
155
|
"""Print a panel with the name of an emailer and a few tidbits of information about them."""
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
panel = Panel(txt, width=80, style=f"black on {color} bold")
|
|
156
|
+
color = 'white' if (not color or color == DEFAULT) else color
|
|
157
|
+
width = max(MIN_AUTHOR_PANEL_WIDTH, len(msg) + 4)
|
|
158
|
+
panel = Panel(Text(msg, justify='center'), width=width, style=f"black on {color} bold")
|
|
151
159
|
console.print('\n', Align.center(panel))
|
|
152
160
|
|
|
153
161
|
if footer:
|
|
@@ -181,16 +189,11 @@ def print_color_key() -> None:
|
|
|
181
189
|
|
|
182
190
|
|
|
183
191
|
def print_title_page_header(epstein_files: 'EpsteinFiles') -> None:
|
|
184
|
-
not_optimized_msg = f"This page isn't optimized for mobile"
|
|
185
|
-
|
|
186
|
-
if not args.all_emails:
|
|
187
|
-
not_optimized_msg += f" but if you get past the header it should be readable"
|
|
188
|
-
|
|
189
|
-
console.print(f"{not_optimized_msg}.\n", style='dim')
|
|
190
192
|
print_page_title(width=TITLE_WIDTH)
|
|
191
193
|
site_type = EMAIL if (args.all_emails or args.email_timeline) else TEXT_MESSAGE
|
|
192
|
-
title = f"This is the " + ('chronological ' if args.email_timeline else '') + f"Epstein {site_type.title()}s
|
|
193
|
-
print_starred_header(title, num_spaces=
|
|
194
|
+
title = f"This is the " + ('chronological ' if args.email_timeline else '') + f"Epstein {site_type.title()}s Page"
|
|
195
|
+
print_starred_header(title, num_spaces=9 if args.all_emails else 6, num_stars=14)
|
|
196
|
+
print_centered(f"These documents come from the Nov. 2025 House Oversight Committee release.\n", style='gray74')
|
|
194
197
|
other_site_msg = "another page with" + (' all of' if other_site_type() == EMAIL else '')
|
|
195
198
|
other_site_msg += f" Epstein's {other_site_type()}s also generated by this code"
|
|
196
199
|
|
|
@@ -218,8 +221,6 @@ def print_title_page_tables(epstein_files: 'EpsteinFiles') -> None:
|
|
|
218
221
|
|
|
219
222
|
|
|
220
223
|
def print_json(label: str, obj: object, skip_falsey: bool = False) -> None:
|
|
221
|
-
print(obj)
|
|
222
|
-
|
|
223
224
|
if isinstance(obj, dict):
|
|
224
225
|
if skip_falsey:
|
|
225
226
|
obj = {k: v for k, v in obj.items() if v}
|
|
@@ -232,68 +233,33 @@ def print_json(label: str, obj: object, skip_falsey: bool = False) -> None:
|
|
|
232
233
|
console.line()
|
|
233
234
|
|
|
234
235
|
|
|
235
|
-
def print_numbered_list_of_emailers(_list: list[str | None], epstein_files = None) -> None:
|
|
236
|
-
"""Add the first emailed_at timestamp for each emailer if 'epstein_files' provided."""
|
|
237
|
-
current_year = 1990
|
|
238
|
-
current_year_month = current_year * 12
|
|
239
|
-
grey_idx = 0
|
|
240
|
-
console.line()
|
|
241
|
-
|
|
242
|
-
for i, name in enumerate(_list):
|
|
243
|
-
indent = ' ' if i < 9 else (' ' if i < 99 else ' ')
|
|
244
|
-
txt = Text((indent) + F" {i + 1}. ", style=DEFAULT_NAME_STYLE)
|
|
245
|
-
|
|
246
|
-
if epstein_files:
|
|
247
|
-
earliest_email_date = (epstein_files.earliest_email_at(name) or FALLBACK_TIMESTAMP).date()
|
|
248
|
-
year_months = (earliest_email_date.year * 12) + earliest_email_date.month
|
|
249
|
-
|
|
250
|
-
# Color year rollovers more brightly
|
|
251
|
-
if current_year != earliest_email_date.year:
|
|
252
|
-
grey_idx = 0
|
|
253
|
-
elif current_year_month != year_months:
|
|
254
|
-
grey_idx = ((current_year_month - 1) % 12) + 1
|
|
255
|
-
|
|
256
|
-
current_year_month = year_months
|
|
257
|
-
current_year = earliest_email_date.year
|
|
258
|
-
txt.append(escape(f"[{earliest_email_date}] "), style=f"grey{GREY_NUMBERS[grey_idx]}")
|
|
259
|
-
|
|
260
|
-
txt.append(highlighter(name or UNKNOWN))
|
|
261
|
-
|
|
262
|
-
if epstein_files:
|
|
263
|
-
num_days_in_converation = epstein_files.email_conversation_length_in_days(name)
|
|
264
|
-
msg = f" ({len(epstein_files.emails_for(name))} emails over {num_days_in_converation:,} days)"
|
|
265
|
-
txt.append(msg, style=f'dim italic')
|
|
266
|
-
|
|
267
|
-
console.print(txt)
|
|
268
|
-
|
|
269
|
-
console.line()
|
|
270
|
-
|
|
271
|
-
|
|
272
236
|
def print_other_page_link(epstein_files: 'EpsteinFiles') -> None:
|
|
273
237
|
markup_msg = link_markup(other_site_url(), 'the other page', style='light_slate_grey bold')
|
|
274
238
|
|
|
275
239
|
if other_site_type() == EMAIL:
|
|
276
|
-
txt = Text.from_markup(markup_msg).append(f' is uncurated and has all {len(epstein_files.
|
|
277
|
-
txt.append(f"
|
|
240
|
+
txt = Text.from_markup(markup_msg).append(f' is uncurated and has all {len(epstein_files.emails):,} emails')
|
|
241
|
+
txt.append(f" and {len(epstein_files.other_files)} unclassifiable files")
|
|
278
242
|
else:
|
|
279
|
-
txt = Text.from_markup(markup_msg).append(f' displays
|
|
243
|
+
txt = Text.from_markup(markup_msg).append(f' displays a limited collection of emails and')
|
|
280
244
|
txt.append(" unclassifiable files of particular interest")
|
|
281
245
|
|
|
282
|
-
print_centered(parenthesize(txt), style=
|
|
246
|
+
print_centered(parenthesize(txt), style=OTHER_PAGE_MSG_STYLE)
|
|
283
247
|
chrono_emails_markup = link_text_obj(CHRONOLOGICAL_EMAILS_URL, 'a page', style='light_slate_grey bold')
|
|
284
248
|
chrono_emails_txt = Text(f"there's also ").append(chrono_emails_markup)
|
|
285
249
|
chrono_emails_txt.append(' with a table of all the emails in chronological order')
|
|
286
|
-
print_centered(parenthesize(chrono_emails_txt), style=
|
|
250
|
+
print_centered(parenthesize(chrono_emails_txt), style=OTHER_PAGE_MSG_STYLE)
|
|
287
251
|
|
|
288
252
|
|
|
289
253
|
def print_page_title(expand: bool = True, width: int | None = None) -> None:
|
|
254
|
+
warning = f"This page was generated by {link_markup('https://pypi.org/project/rich/', 'rich')}."
|
|
255
|
+
print_centered(f"{warning} It is not optimized for mobile.", style='dim')
|
|
290
256
|
title_panel = Panel(Text(PAGE_TITLE, justify='center'), expand=expand, style=TITLE_STYLE, width=width)
|
|
291
|
-
|
|
257
|
+
print_centered(vertically_pad(title_panel))
|
|
292
258
|
_print_social_media_links()
|
|
293
259
|
console.line(2)
|
|
294
260
|
|
|
295
261
|
|
|
296
|
-
def
|
|
262
|
+
def print_subtitle_panel(msg: str, style: str = 'black on white', padding: tuple | None = None, centered: bool = False) -> None:
|
|
297
263
|
_padding: list[int] = list(padding or [0, 0, 0, 0])
|
|
298
264
|
_padding[2] += 1 # Bottom pad
|
|
299
265
|
actual_padding: tuple[int, int, int, int] = tuple(_padding)
|
|
@@ -308,7 +274,7 @@ def print_panel(msg: str, style: str = 'black on white', padding: tuple | None =
|
|
|
308
274
|
def print_section_header(msg: str, style: str = SECTION_HEADER_STYLE, is_centered: bool = False) -> None:
|
|
309
275
|
panel = Panel(Text(msg, justify='center'), expand=True, padding=(1, 1), style=style)
|
|
310
276
|
panel = Align.center(panel) if is_centered else panel
|
|
311
|
-
console.print(Padding(panel, (3,
|
|
277
|
+
console.print(Padding(panel, (3, 0, 1, 0)))
|
|
312
278
|
|
|
313
279
|
|
|
314
280
|
def print_starred_header(msg: str, num_stars: int = 7, num_spaces: int = 2, style: str = TITLE_STYLE) -> None:
|
|
@@ -340,8 +306,8 @@ def wrap_in_markup_style(msg: str, style: str | None = None) -> str:
|
|
|
340
306
|
return msg
|
|
341
307
|
|
|
342
308
|
|
|
343
|
-
def write_html(output_path: Path) -> None:
|
|
344
|
-
if not
|
|
309
|
+
def write_html(output_path: Path | None) -> None:
|
|
310
|
+
if not output_path:
|
|
345
311
|
logger.warning(f"Not writing HTML because args.build={args.build}.")
|
|
346
312
|
return
|
|
347
313
|
|
|
@@ -393,5 +359,5 @@ def _print_social_media_links() -> None:
|
|
|
393
359
|
print_centered(join_texts(social_links, join=' / '))#, encloser='()'))#, encloser='‹›'))
|
|
394
360
|
|
|
395
361
|
|
|
396
|
-
|
|
397
|
-
|
|
362
|
+
if args.colors_only:
|
|
363
|
+
print_json('THEME_STYLES', THEME_STYLES)
|
epstein_files/util/timer.py
CHANGED
|
@@ -11,7 +11,7 @@ class Timer:
|
|
|
11
11
|
decimals: int = 2
|
|
12
12
|
|
|
13
13
|
def print_at_checkpoint(self, msg: str) -> None:
|
|
14
|
-
logger.warning(f"{msg} in {self.seconds_since_checkpoint_str()}")
|
|
14
|
+
logger.warning(f"{msg} in {self.seconds_since_checkpoint_str()}...")
|
|
15
15
|
self.checkpoint_at = time.perf_counter()
|
|
16
16
|
|
|
17
17
|
def seconds_since_checkpoint_str(self) -> str:
|
epstein_files/util/word_count.py
CHANGED
|
@@ -17,7 +17,7 @@ from epstein_files.util.data import ALL_NAMES, flatten, sort_dict
|
|
|
17
17
|
from epstein_files.util.env import args
|
|
18
18
|
from epstein_files.util.logging import logger
|
|
19
19
|
from epstein_files.util.rich import (console, highlighter, print_centered, print_color_key, print_page_title,
|
|
20
|
-
|
|
20
|
+
print_subtitle_panel, print_starred_header, write_html)
|
|
21
21
|
from epstein_files.util.search_result import MatchedLine, SearchResult
|
|
22
22
|
from epstein_files.util.timer import Timer
|
|
23
23
|
|
|
@@ -196,7 +196,6 @@ def write_word_counts_html() -> None:
|
|
|
196
196
|
epstein_files = EpsteinFiles.get_files(timer)
|
|
197
197
|
email_subjects: set[str] = set()
|
|
198
198
|
word_count = WordCount()
|
|
199
|
-
|
|
200
199
|
# Remove dupes, junk mail, and fwded articles from emails
|
|
201
200
|
emails = [e for e in epstein_files.non_duplicate_emails() if not (e.is_junk_mail() or e.is_fwded_article())]
|
|
202
201
|
|
|
@@ -225,7 +224,7 @@ def write_word_counts_html() -> None:
|
|
|
225
224
|
for i, msg in enumerate(imessage_log.messages):
|
|
226
225
|
if args.names and msg.author not in args.names:
|
|
227
226
|
continue
|
|
228
|
-
elif HTML_REGEX.search(
|
|
227
|
+
elif HTML_REGEX.search(msg.text):
|
|
229
228
|
continue
|
|
230
229
|
|
|
231
230
|
for word in msg.text.split():
|
|
@@ -239,7 +238,7 @@ def write_word_counts_html() -> None:
|
|
|
239
238
|
console.line()
|
|
240
239
|
console.print(word_count)
|
|
241
240
|
console.line(2)
|
|
242
|
-
|
|
241
|
+
print_subtitle_panel(f"{len(COMMON_WORDS_LIST):,} Excluded Words", centered=True)
|
|
243
242
|
console.print(', '.join(COMMON_WORDS_LIST), highlight=False)
|
|
244
243
|
write_html(WORD_COUNT_HTML_PATH)
|
|
245
244
|
timer.print_at_checkpoint(f"Finished counting words")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: epstein-files
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.3
|
|
4
4
|
Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
|
|
5
5
|
Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
|
|
6
6
|
License: GPL-3.0-or-later
|
|
@@ -81,6 +81,9 @@ epstein_diff 030999 020442
|
|
|
81
81
|
```
|
|
82
82
|
|
|
83
83
|
The first time you run anything it will take a few minutes to fix all the janky OCR text, attribute the redacted emails, etc. After that things will be quick.
|
|
84
|
+
|
|
85
|
+
The commands used to build the various sites that are deployed on Github Pages can be found in [`deploy.sh`](./deploy.sh).
|
|
86
|
+
|
|
84
87
|
Run `epstein_generate --help` for command line option assistance.
|
|
85
88
|
|
|
86
89
|
**Optional:** There are a handful of emails that I extracted from the legal filings they were contained in. If you want to include these files in your local analysis you'll need to copy those files from the repo into your local document directory. Something like:
|