epstein-files 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +63 -131
- epstein_files/documents/document.py +5 -1
- epstein_files/epstein_files.py +37 -35
- epstein_files/util/constant/names.py +2 -1
- epstein_files/util/constant/output_files.py +29 -0
- epstein_files/util/constant/urls.py +10 -7
- epstein_files/util/constants.py +140 -120
- epstein_files/util/data.py +12 -33
- epstein_files/util/doc_cfg.py +2 -10
- epstein_files/util/env.py +3 -2
- epstein_files/util/file_helper.py +0 -22
- epstein_files/util/highlighted_group.py +2 -2
- epstein_files/util/logging.py +6 -0
- epstein_files/util/output.py +180 -0
- epstein_files/util/rich.py +16 -9
- {epstein_files-1.0.1.dist-info → epstein_files-1.0.2.dist-info}/METADATA +19 -4
- epstein_files-1.0.2.dist-info/RECORD +33 -0
- epstein_files-1.0.2.dist-info/entry_points.txt +7 -0
- epstein_files-1.0.1.dist-info/RECORD +0 -30
- {epstein_files-1.0.1.dist-info → epstein_files-1.0.2.dist-info}/LICENSE +0 -0
- {epstein_files-1.0.1.dist-info → epstein_files-1.0.2.dist-info}/WHEEL +0 -0
epstein_files/__init__.py
CHANGED
|
@@ -10,67 +10,24 @@ from sys import exit
|
|
|
10
10
|
|
|
11
11
|
from dotenv import load_dotenv
|
|
12
12
|
load_dotenv()
|
|
13
|
+
|
|
14
|
+
from rich.markup import escape
|
|
13
15
|
from rich.padding import Padding
|
|
16
|
+
from rich.panel import Panel
|
|
14
17
|
|
|
18
|
+
from epstein_files.epstein_files import EpsteinFiles, document_cls
|
|
19
|
+
from epstein_files.documents.document import INFO_PADDING, Document
|
|
15
20
|
from epstein_files.documents.email import Email
|
|
16
|
-
from epstein_files.documents.messenger_log import MessengerLog
|
|
17
|
-
from epstein_files.epstein_files import EpsteinFiles, count_by_month
|
|
18
21
|
from epstein_files.util.constant.html import *
|
|
19
22
|
from epstein_files.util.constant.names import *
|
|
20
|
-
from epstein_files.util.constant.
|
|
21
|
-
from epstein_files.util.data import dict_sets_to_lists
|
|
23
|
+
from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_HTML_PATH, make_clean
|
|
22
24
|
from epstein_files.util.env import args, specified_names
|
|
23
|
-
from epstein_files.util.file_helper import
|
|
25
|
+
from epstein_files.util.file_helper import coerce_file_path, extract_file_id
|
|
24
26
|
from epstein_files.util.logging import logger
|
|
25
|
-
from epstein_files.util.
|
|
27
|
+
from epstein_files.util.output import print_emails, print_json_metadata, print_json_stats, print_text_messages, write_urls
|
|
28
|
+
from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
|
|
26
29
|
from epstein_files.util.timer import Timer
|
|
27
30
|
|
|
28
|
-
PRINT_COLOR_KEY_EVERY_N_EMAILS = 150
|
|
29
|
-
|
|
30
|
-
# Order matters. Default names to print emails for.
|
|
31
|
-
DEFAULT_EMAILERS = [
|
|
32
|
-
JEREMY_RUBIN,
|
|
33
|
-
AL_SECKEL,
|
|
34
|
-
JOI_ITO,
|
|
35
|
-
JABOR_Y,
|
|
36
|
-
STEVEN_SINOFSKY,
|
|
37
|
-
DANIEL_SIAD,
|
|
38
|
-
JEAN_LUC_BRUNEL,
|
|
39
|
-
STEVEN_HOFFENBERG,
|
|
40
|
-
EHUD_BARAK,
|
|
41
|
-
MARTIN_NOWAK,
|
|
42
|
-
MASHA_DROKOVA,
|
|
43
|
-
RENATA_BOLOTOVA,
|
|
44
|
-
STEVE_BANNON,
|
|
45
|
-
OLIVIER_COLOM,
|
|
46
|
-
BORIS_NIKOLIC,
|
|
47
|
-
PRINCE_ANDREW,
|
|
48
|
-
JIDE_ZEITLIN,
|
|
49
|
-
DAVID_STERN,
|
|
50
|
-
MOHAMED_WAHEED_HASSAN,
|
|
51
|
-
JENNIFER_JACQUET,
|
|
52
|
-
None,
|
|
53
|
-
]
|
|
54
|
-
|
|
55
|
-
# Order matters. Default names to print tables w/email subject, timestamp, etc for.
|
|
56
|
-
# TODO: get rid of this
|
|
57
|
-
DEFAULT_EMAILER_TABLES: list[str | None] = [
|
|
58
|
-
GHISLAINE_MAXWELL,
|
|
59
|
-
LEON_BLACK,
|
|
60
|
-
LANDON_THOMAS,
|
|
61
|
-
KATHRYN_RUEMMLER,
|
|
62
|
-
DARREN_INDYKE,
|
|
63
|
-
RICHARD_KAHN,
|
|
64
|
-
TYLER_SHEARS,
|
|
65
|
-
SULTAN_BIN_SULAYEM,
|
|
66
|
-
DEEPAK_CHOPRA,
|
|
67
|
-
ARIANE_DE_ROTHSCHILD,
|
|
68
|
-
TOM_PRITZKER,
|
|
69
|
-
]
|
|
70
|
-
|
|
71
|
-
if len(set(DEFAULT_EMAILERS).intersection(set(DEFAULT_EMAILER_TABLES))) > 0:
|
|
72
|
-
raise RuntimeError(f"Some names appear in both DEFAULT_EMAILERS and DEFAULT_EMAILER_TABLES")
|
|
73
|
-
|
|
74
31
|
|
|
75
32
|
def generate_html() -> None:
|
|
76
33
|
if args.make_clean:
|
|
@@ -81,15 +38,7 @@ def generate_html() -> None:
|
|
|
81
38
|
epstein_files = EpsteinFiles.get_files(timer)
|
|
82
39
|
|
|
83
40
|
if args.json_metadata:
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if args.build:
|
|
87
|
-
with open(JSON_METADATA_PATH, 'w') as f:
|
|
88
|
-
f.write(json_str)
|
|
89
|
-
timer.print_at_checkpoint(f"Wrote {file_size_str(JSON_METADATA_PATH)} to '{JSON_METADATA_PATH}'")
|
|
90
|
-
else:
|
|
91
|
-
console.print_json(json_str, indent=4, sort_keys=True)
|
|
92
|
-
|
|
41
|
+
print_json_metadata(epstein_files)
|
|
93
42
|
exit()
|
|
94
43
|
|
|
95
44
|
print_header(epstein_files)
|
|
@@ -98,11 +47,11 @@ def generate_html() -> None:
|
|
|
98
47
|
exit()
|
|
99
48
|
|
|
100
49
|
if args.output_texts:
|
|
101
|
-
|
|
50
|
+
print_text_messages(epstein_files)
|
|
102
51
|
timer.print_at_checkpoint(f'Printed {len(epstein_files.imessage_logs)} text message logs')
|
|
103
52
|
|
|
104
53
|
if args.output_emails:
|
|
105
|
-
emails_printed =
|
|
54
|
+
emails_printed = print_emails(epstein_files)
|
|
106
55
|
timer.print_at_checkpoint(f"Printed {emails_printed:,} emails")
|
|
107
56
|
|
|
108
57
|
if args.output_other_files:
|
|
@@ -110,93 +59,76 @@ def generate_html() -> None:
|
|
|
110
59
|
timer.print_at_checkpoint(f"Printed {len(files_printed)} other files")
|
|
111
60
|
|
|
112
61
|
# Save output
|
|
113
|
-
write_html(
|
|
62
|
+
write_html(ALL_EMAILS_PATH if args.all_emails else TEXT_MSGS_HTML_PATH)
|
|
114
63
|
logger.warning(f"Total time: {timer.seconds_since_start_str()}")
|
|
115
64
|
|
|
116
65
|
# JSON stats (mostly used for building pytest checks)
|
|
117
66
|
if args.json_stats:
|
|
118
|
-
|
|
119
|
-
_print_json_stats(epstein_files)
|
|
120
|
-
|
|
67
|
+
print_json_stats(epstein_files)
|
|
121
68
|
|
|
122
|
-
def _print_emails(epstein_files: EpsteinFiles) -> int:
|
|
123
|
-
"""Returns number of emails printed."""
|
|
124
|
-
print_section_header(('Selections from ' if not args.all_emails else '') + 'His Emails')
|
|
125
|
-
print_other_site_link(is_header=False)
|
|
126
69
|
|
|
127
|
-
|
|
128
|
-
|
|
70
|
+
def epstein_diff():
|
|
71
|
+
"""Diff the cleaned up text of two files."""
|
|
72
|
+
Document.diff_files(args.positional_args)
|
|
129
73
|
|
|
130
|
-
emailers_to_print: list[str | None]
|
|
131
|
-
emailer_tables: list[str | None] = []
|
|
132
|
-
already_printed_emails: list[Email] = []
|
|
133
|
-
num_emails_printed_since_last_color_key = 0
|
|
134
74
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
else:
|
|
140
|
-
emailers_to_print = specified_names if specified_names else DEFAULT_EMAILERS
|
|
141
|
-
console.print('Email conversations grouped by counterparty can be found in the order listed below.')
|
|
142
|
-
print_numbered_list_of_emailers(emailers_to_print)
|
|
143
|
-
console.print("\nAfter that there's tables linking to (but not displaying) all known emails for each of these people:")
|
|
144
|
-
|
|
145
|
-
if len(specified_names) > 0:
|
|
146
|
-
print_numbered_list_of_emailers(DEFAULT_EMAILER_TABLES)
|
|
75
|
+
def epstein_search():
|
|
76
|
+
"""Search the cleaned up text of the files."""
|
|
77
|
+
_assert_positional_args()
|
|
78
|
+
epstein_files = EpsteinFiles.get_files(use_pickled=True)
|
|
147
79
|
|
|
148
|
-
for
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
80
|
+
for search_term in args.positional_args:
|
|
81
|
+
temp_highlighter = build_highlighter(search_term)
|
|
82
|
+
search_results = epstein_files.docs_matching(search_term, specified_names)
|
|
83
|
+
console.line(2)
|
|
84
|
+
print_panel(f"Found {len(search_results)} documents matching '{search_term}'", padding=(0, 0, 0, 3))
|
|
152
85
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
print_color_key()
|
|
156
|
-
num_emails_printed_since_last_color_key = 0
|
|
86
|
+
for search_result in search_results:
|
|
87
|
+
console.line()
|
|
157
88
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
89
|
+
if args.whole_file:
|
|
90
|
+
console.print(search_result.document)
|
|
91
|
+
else:
|
|
92
|
+
console.print(search_result.document.description_panel())
|
|
161
93
|
|
|
162
|
-
|
|
163
|
-
|
|
94
|
+
for matching_line in search_result.lines:
|
|
95
|
+
line_txt = matching_line.__rich__()
|
|
96
|
+
console.print(Padding(temp_highlighter(line_txt), INFO_PADDING), style='gray37')
|
|
164
97
|
|
|
165
|
-
epstein_files.print_email_device_info()
|
|
166
98
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
99
|
+
def epstein_show():
|
|
100
|
+
"""Show the color highlighted file. If --raw arg is passed, show the raw text of the file as well."""
|
|
101
|
+
_assert_positional_args()
|
|
102
|
+
ids = [extract_file_id(arg) for arg in args.positional_args]
|
|
103
|
+
console.line()
|
|
171
104
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
105
|
+
if args.pickled:
|
|
106
|
+
epstein_files = EpsteinFiles.get_files(use_pickled=True)
|
|
107
|
+
docs = epstein_files.get_documents_by_id(ids)
|
|
108
|
+
else:
|
|
109
|
+
raw_docs = [Document(coerce_file_path(id)) for id in ids]
|
|
110
|
+
docs = [document_cls(doc)(doc.file_path) for doc in raw_docs]
|
|
175
111
|
|
|
176
|
-
|
|
177
|
-
|
|
112
|
+
for doc in docs:
|
|
113
|
+
console.line()
|
|
114
|
+
console.print(doc)
|
|
178
115
|
|
|
116
|
+
if args.raw:
|
|
117
|
+
console.line()
|
|
118
|
+
console.print(Panel(f"*** {doc.url_slug} RAW ***", expand=False, style=doc._border_style()))
|
|
119
|
+
console.print(escape(doc.raw_text()))
|
|
179
120
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
log_files = epstein_files.imessage_logs_for(authors)
|
|
121
|
+
if isinstance(doc, Email):
|
|
122
|
+
console.line()
|
|
123
|
+
console.print(Panel(f"*** {doc.url_slug} actual_text ***", expand=False, style=doc._border_style()))
|
|
124
|
+
console.print(escape(doc._actual_text()))
|
|
185
125
|
|
|
186
|
-
for log_file in log_files:
|
|
187
|
-
console.print(Padding(log_file))
|
|
188
|
-
console.line(2)
|
|
189
126
|
|
|
190
|
-
|
|
127
|
+
def epstein_dump_urls() -> None:
|
|
128
|
+
write_urls()
|
|
191
129
|
|
|
192
130
|
|
|
193
|
-
def
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
print_json(f"{EMAIL_CLASS} Recipient Counts", epstein_files.email_recipient_counts, skip_falsey=True)
|
|
198
|
-
print_json("Email signature_substitution_countss", epstein_files.email_signature_substitution_counts(), skip_falsey=True)
|
|
199
|
-
print_json("email_author_device_signatures", dict_sets_to_lists(epstein_files.email_authors_to_device_signatures))
|
|
200
|
-
print_json("email_sent_from_devices", dict_sets_to_lists(epstein_files.email_device_signatures_to_authors))
|
|
201
|
-
print_json("email_unknown_recipient_file_ids", epstein_files.email_unknown_recipient_file_ids())
|
|
202
|
-
print_json("count_by_month", count_by_month(epstein_files.all_documents()))
|
|
131
|
+
def _assert_positional_args():
|
|
132
|
+
if not args.positional_args:
|
|
133
|
+
console.print(f"\n ERROR: No positional args!\n", style='red1')
|
|
134
|
+
exit(1)
|
|
@@ -255,7 +255,11 @@ class Document:
|
|
|
255
255
|
txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
|
|
256
256
|
|
|
257
257
|
txt.append(' [').append(key_value_txt('size', Text(self.file_size_str(), style='aquamarine1')))
|
|
258
|
-
txt.append(", ").append(key_value_txt('lines',
|
|
258
|
+
txt.append(", ").append(key_value_txt('lines', self.num_lines))
|
|
259
|
+
|
|
260
|
+
if self.config and self.config.dupe_of_id:
|
|
261
|
+
txt.append(", ").append(key_value_txt('dupe_of', Text(self.config.dupe_of_id, style='magenta')))
|
|
262
|
+
|
|
259
263
|
return txt
|
|
260
264
|
|
|
261
265
|
def top_lines(self, n: int = 10) -> str:
|
epstein_files/epstein_files.py
CHANGED
|
@@ -19,6 +19,7 @@ from epstein_files.documents.emails.email_header import AUTHOR
|
|
|
19
19
|
from epstein_files.documents.json_file import JsonFile
|
|
20
20
|
from epstein_files.documents.messenger_log import MSG_REGEX, MessengerLog
|
|
21
21
|
from epstein_files.documents.other_file import OtherFile
|
|
22
|
+
from epstein_files.util.constant.output_files import PICKLED_PATH
|
|
22
23
|
from epstein_files.util.constant.strings import *
|
|
23
24
|
from epstein_files.util.constant.urls import (EPSTEIN_WEB, JMAIL, epsteinify_name_url, epstein_web_person_url,
|
|
24
25
|
search_jmail_url, search_twitter_url)
|
|
@@ -26,7 +27,7 @@ from epstein_files.util.constants import *
|
|
|
26
27
|
from epstein_files.util.data import dict_sets_to_lists, json_safe, sort_dict
|
|
27
28
|
from epstein_files.util.doc_cfg import EmailCfg
|
|
28
29
|
from epstein_files.util.env import args, logger
|
|
29
|
-
from epstein_files.util.file_helper import DOCS_DIR,
|
|
30
|
+
from epstein_files.util.file_helper import DOCS_DIR, file_size_str
|
|
30
31
|
from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
|
|
31
32
|
from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, add_cols_to_table, console, highlighter,
|
|
32
33
|
link_text_obj, link_markup, print_author_header, print_centered, print_other_site_link, print_panel,
|
|
@@ -37,7 +38,7 @@ from epstein_files.util.timer import Timer
|
|
|
37
38
|
DEVICE_SIGNATURE = 'Device Signature'
|
|
38
39
|
DEVICE_SIGNATURE_PADDING = (1, 0)
|
|
39
40
|
NOT_INCLUDED_EMAILERS = [e.lower() for e in (USELESS_EMAILERS + [JEFFREY_EPSTEIN])]
|
|
40
|
-
SLOW_FILE_SECONDS = 0
|
|
41
|
+
SLOW_FILE_SECONDS = 1.0
|
|
41
42
|
|
|
42
43
|
INVALID_FOR_EPSTEIN_WEB = JUNK_EMAILERS + KRASSNER_RECIPIENTS + [
|
|
43
44
|
'ACT for America',
|
|
@@ -54,6 +55,7 @@ class EpsteinFiles:
|
|
|
54
55
|
imessage_logs: list[MessengerLog] = field(default_factory=list)
|
|
55
56
|
json_files: list[JsonFile] = field(default_factory=list)
|
|
56
57
|
other_files: list[OtherFile] = field(default_factory=list)
|
|
58
|
+
timer: Timer = field(default_factory=lambda: Timer())
|
|
57
59
|
|
|
58
60
|
# Analytics / calculations
|
|
59
61
|
email_author_counts: dict[str | None, int] = field(default_factory=lambda: defaultdict(int))
|
|
@@ -90,17 +92,18 @@ class EpsteinFiles:
|
|
|
90
92
|
self._tally_email_data()
|
|
91
93
|
|
|
92
94
|
@classmethod
|
|
93
|
-
def get_files(cls, timer: Timer | None = None) -> 'EpsteinFiles':
|
|
95
|
+
def get_files(cls, timer: Timer | None = None, use_pickled: bool = False) -> 'EpsteinFiles':
|
|
94
96
|
"""Alternate constructor that reads/writes a pickled version of the data ('timer' arg is for logging)."""
|
|
95
97
|
timer = timer or Timer()
|
|
96
98
|
|
|
97
|
-
if (args.pickled and PICKLED_PATH.exists()) and not args.overwrite_pickle:
|
|
99
|
+
if ((args.pickled or use_pickled) and PICKLED_PATH.exists()) and not args.overwrite_pickle:
|
|
98
100
|
with gzip.open(PICKLED_PATH, 'rb') as file:
|
|
99
101
|
epstein_files = pickle.load(file)
|
|
100
102
|
timer.print_at_checkpoint(f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})")
|
|
103
|
+
epstein_files.timer = timer
|
|
101
104
|
return epstein_files
|
|
102
105
|
|
|
103
|
-
epstein_files = EpsteinFiles()
|
|
106
|
+
epstein_files = EpsteinFiles(timer=timer)
|
|
104
107
|
|
|
105
108
|
if args.overwrite_pickle or not PICKLED_PATH.exists():
|
|
106
109
|
with gzip.open(PICKLED_PATH, 'wb') as file:
|
|
@@ -197,37 +200,36 @@ class EpsteinFiles:
|
|
|
197
200
|
|
|
198
201
|
def json_metadata(self) -> str:
|
|
199
202
|
metadata = {
|
|
200
|
-
EMAIL_CLASS: [json_safe(
|
|
201
|
-
|
|
202
|
-
|
|
203
|
+
EMAIL_CLASS: [json_safe(d.metadata()) for d in self.emails],
|
|
204
|
+
JSON_FILE_CLASS: [json_safe(d.metadata()) for d in self.json_files],
|
|
205
|
+
MESSENGER_LOG_CLASS: [json_safe(d.metadata()) for d in self.imessage_logs],
|
|
206
|
+
OTHER_FILE_CLASS: [json_safe(d.metadata()) for d in self.other_files if not isinstance(d, JsonFile)],
|
|
203
207
|
}
|
|
204
208
|
|
|
205
209
|
return json.dumps(metadata, indent=4, sort_keys=True)
|
|
206
210
|
|
|
207
|
-
def
|
|
208
|
-
|
|
209
|
-
dupes = defaultdict(int)
|
|
210
|
-
|
|
211
|
-
for doc in self.all_documents():
|
|
212
|
-
if doc.is_duplicate:
|
|
213
|
-
dupes[doc.class_name()] += 1
|
|
211
|
+
def non_json_other_files(self) -> list[OtherFile]:
|
|
212
|
+
return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
|
|
214
213
|
|
|
214
|
+
def print_files_summary(self) -> None:
|
|
215
215
|
table = Table(title='Summary of Document Types')
|
|
216
216
|
add_cols_to_table(table, ['File Type', 'Files', 'Author Known', 'Author Unknown', 'Duplicates'])
|
|
217
217
|
|
|
218
|
-
def add_row(label: str, docs: list
|
|
218
|
+
def add_row(label: str, docs: list):
|
|
219
|
+
known = None if isinstance(docs[0], JsonFile) else len([d for d in docs if d.author])
|
|
220
|
+
|
|
219
221
|
table.add_row(
|
|
220
222
|
label,
|
|
221
223
|
f"{len(docs):,}",
|
|
222
|
-
f"{known:,}" if known else NA_TXT,
|
|
223
|
-
f"{len(docs) - known:,}" if known else NA_TXT,
|
|
224
|
-
f"{
|
|
224
|
+
f"{known:,}" if known is not None else NA_TXT,
|
|
225
|
+
f"{len(docs) - known:,}" if known is not None else NA_TXT,
|
|
226
|
+
f"{len([d for d in docs if d.is_duplicate])}",
|
|
225
227
|
)
|
|
226
228
|
|
|
227
|
-
add_row('iMessage Logs', self.imessage_logs
|
|
228
|
-
add_row('Emails', self.emails
|
|
229
|
-
add_row('JSON Data', self.json_files
|
|
230
|
-
add_row('Other',
|
|
229
|
+
add_row('iMessage Logs', self.imessage_logs)
|
|
230
|
+
add_row('Emails', self.emails)
|
|
231
|
+
add_row('JSON Data', self.json_files)
|
|
232
|
+
add_row('Other', self.non_json_other_files())
|
|
231
233
|
console.print(Align.center(table))
|
|
232
234
|
console.line()
|
|
233
235
|
|
|
@@ -357,6 +359,18 @@ def build_signature_table(keyed_sets: dict[str, set[str]], cols: tuple[str, str]
|
|
|
357
359
|
return Padding(table, DEVICE_SIGNATURE_PADDING)
|
|
358
360
|
|
|
359
361
|
|
|
362
|
+
def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
|
|
363
|
+
counts: dict[str | None, int] = defaultdict(int)
|
|
364
|
+
|
|
365
|
+
for doc in docs:
|
|
366
|
+
if doc.timestamp:
|
|
367
|
+
counts[doc.timestamp.date().isoformat()[0:7]] += 1
|
|
368
|
+
else:
|
|
369
|
+
counts[None] += 1
|
|
370
|
+
|
|
371
|
+
return counts
|
|
372
|
+
|
|
373
|
+
|
|
360
374
|
def document_cls(document: Document) -> Type[Document]:
|
|
361
375
|
search_area = document.text[0:5000] # Limit search area to avoid pointless scans of huge files
|
|
362
376
|
|
|
@@ -380,15 +394,3 @@ def is_ok_for_epstein_web(name: str | None) -> bool:
|
|
|
380
394
|
return False
|
|
381
395
|
|
|
382
396
|
return True
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
|
|
386
|
-
counts: dict[str | None, int] = defaultdict(int)
|
|
387
|
-
|
|
388
|
-
for doc in docs:
|
|
389
|
-
if doc.timestamp:
|
|
390
|
-
counts[doc.timestamp.date().isoformat()[0:7]] += 1
|
|
391
|
-
else:
|
|
392
|
-
counts[None] += 1
|
|
393
|
-
|
|
394
|
-
return counts
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from epstein_files.util.constant.strings import QUESTION_MARKS, remove_question_marks
|
|
2
2
|
|
|
3
|
-
|
|
4
3
|
UNKNOWN = '(unknown)'
|
|
5
4
|
|
|
6
5
|
# Texting Names
|
|
@@ -170,6 +169,7 @@ ZUBAIR_KHAN = 'Zubair Khan'
|
|
|
170
169
|
|
|
171
170
|
# No communications but name is in the files
|
|
172
171
|
BILL_GATES = 'Bill Gates'
|
|
172
|
+
DONALD_TRUMP = 'Donald Trump'
|
|
173
173
|
ELON_MUSK = 'Elon Musk'
|
|
174
174
|
HENRY_HOLT = 'Henry Holt' # Actually a company?
|
|
175
175
|
IVANKA = 'Ivanka'
|
|
@@ -195,6 +195,7 @@ INSIGHTS_POD = f"InsightsPod" # Zubair bots
|
|
|
195
195
|
NEXT_MANAGEMENT = 'Next Management LLC'
|
|
196
196
|
JP_MORGAN = 'JP Morgan'
|
|
197
197
|
OSBORNE_LLP = f"{IAN_OSBORNE} & Partners LLP" # Ian Osborne's PR firm
|
|
198
|
+
TRUMP_ORG = 'Trump Organization'
|
|
198
199
|
UBS = 'UBS'
|
|
199
200
|
|
|
200
201
|
# Locations
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
PICKLED_PATH = Path("the_epstein_files.pkl.gz")
|
|
4
|
+
|
|
5
|
+
EPSTEIN_FILES_NOV_2025 = 'epstein_files_nov_2025'
|
|
6
|
+
URLS_ENV = '.urls.env'
|
|
7
|
+
|
|
8
|
+
HTML_DIR = Path('docs')
|
|
9
|
+
ALL_EMAILS_PATH = HTML_DIR.joinpath(f'all_emails_{EPSTEIN_FILES_NOV_2025}.html')
|
|
10
|
+
JSON_METADATA_PATH = HTML_DIR.joinpath(f'file_metadata_{EPSTEIN_FILES_NOV_2025}.json')
|
|
11
|
+
TEXT_MSGS_HTML_PATH = HTML_DIR.joinpath('index.html')
|
|
12
|
+
WORD_COUNT_HTML_PATH = HTML_DIR.joinpath(f'communication_word_count_{EPSTEIN_FILES_NOV_2025}.html')
|
|
13
|
+
# EPSTEIN_WORD_COUNT_HTML_PATH = HTML_DIR.joinpath('epstein_texts_and_emails_word_count.html')
|
|
14
|
+
|
|
15
|
+
BUILD_ARTIFACTS = [
|
|
16
|
+
ALL_EMAILS_PATH,
|
|
17
|
+
# EPSTEIN_WORD_COUNT_HTML_PATH,
|
|
18
|
+
JSON_METADATA_PATH,
|
|
19
|
+
TEXT_MSGS_HTML_PATH,
|
|
20
|
+
WORD_COUNT_HTML_PATH,
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def make_clean() -> None:
|
|
25
|
+
"""Delete all build artifacts."""
|
|
26
|
+
for build_file in BUILD_ARTIFACTS:
|
|
27
|
+
if build_file.exists():
|
|
28
|
+
print(f"Removing build file '{build_file}'...")
|
|
29
|
+
build_file.unlink()
|
|
@@ -5,8 +5,9 @@ from typing import Literal
|
|
|
5
5
|
from inflection import parameterize
|
|
6
6
|
from rich.text import Text
|
|
7
7
|
|
|
8
|
+
from epstein_files.util.constant.output_files import *
|
|
8
9
|
from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
|
|
9
|
-
from epstein_files.util.file_helper import
|
|
10
|
+
from epstein_files.util.file_helper import coerce_file_stem
|
|
10
11
|
|
|
11
12
|
# Style stuff
|
|
12
13
|
ARCHIVE_LINK_COLOR = 'slate_blue3'
|
|
@@ -21,15 +22,17 @@ EPSTEINIFY = 'epsteinify'
|
|
|
21
22
|
JMAIL = 'Jmail'
|
|
22
23
|
|
|
23
24
|
|
|
24
|
-
#
|
|
25
|
+
# Deployment URLS
|
|
26
|
+
# NOTE: don't rename these variables without changing deploy.sh!
|
|
25
27
|
GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
28
|
+
TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/epstein_text_messages"
|
|
29
|
+
ALL_EMAILS_URL = f'{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}'
|
|
30
|
+
JSON_METADATA_URL = f'{TEXT_MSGS_URL}/{JSON_METADATA_PATH.name}'
|
|
31
|
+
WORD_COUNT_URL = f'{TEXT_MSGS_URL}/{WORD_COUNT_HTML_PATH.name}'
|
|
29
32
|
|
|
30
33
|
SITE_URLS: dict[SiteType, str] = {
|
|
31
|
-
EMAIL:
|
|
32
|
-
TEXT_MESSAGE:
|
|
34
|
+
EMAIL: ALL_EMAILS_URL,
|
|
35
|
+
TEXT_MESSAGE: TEXT_MSGS_URL,
|
|
33
36
|
}
|
|
34
37
|
|
|
35
38
|
GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
|