epstein-files 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +66 -131
- epstein_files/documents/document.py +12 -3
- epstein_files/documents/email.py +33 -13
- epstein_files/documents/imessage/text_message.py +11 -15
- epstein_files/documents/messenger_log.py +15 -11
- epstein_files/documents/other_file.py +13 -8
- epstein_files/epstein_files.py +51 -43
- epstein_files/util/constant/names.py +21 -24
- epstein_files/util/constant/output_files.py +29 -0
- epstein_files/util/constant/strings.py +8 -2
- epstein_files/util/constant/urls.py +11 -7
- epstein_files/util/constants.py +325 -227
- epstein_files/util/data.py +12 -33
- epstein_files/util/doc_cfg.py +7 -14
- epstein_files/util/env.py +5 -3
- epstein_files/util/file_helper.py +0 -22
- epstein_files/util/highlighted_group.py +31 -26
- epstein_files/util/logging.py +7 -0
- epstein_files/util/output.py +179 -0
- epstein_files/util/rich.py +22 -10
- {epstein_files-1.0.1.dist-info → epstein_files-1.0.3.dist-info}/METADATA +32 -7
- epstein_files-1.0.3.dist-info/RECORD +33 -0
- epstein_files-1.0.3.dist-info/entry_points.txt +7 -0
- epstein_files-1.0.1.dist-info/RECORD +0 -30
- {epstein_files-1.0.1.dist-info → epstein_files-1.0.3.dist-info}/LICENSE +0 -0
- {epstein_files-1.0.1.dist-info → epstein_files-1.0.3.dist-info}/WHEEL +0 -0
epstein_files/epstein_files.py
CHANGED
|
@@ -19,14 +19,15 @@ from epstein_files.documents.emails.email_header import AUTHOR
|
|
|
19
19
|
from epstein_files.documents.json_file import JsonFile
|
|
20
20
|
from epstein_files.documents.messenger_log import MSG_REGEX, MessengerLog
|
|
21
21
|
from epstein_files.documents.other_file import OtherFile
|
|
22
|
+
from epstein_files.util.constant.output_files import PICKLED_PATH
|
|
22
23
|
from epstein_files.util.constant.strings import *
|
|
23
|
-
from epstein_files.util.constant.urls import (EPSTEIN_WEB, JMAIL,
|
|
24
|
-
search_jmail_url, search_twitter_url)
|
|
24
|
+
from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL, epstein_media_person_url,
|
|
25
|
+
epsteinify_name_url, epstein_web_person_url, search_jmail_url, search_twitter_url)
|
|
25
26
|
from epstein_files.util.constants import *
|
|
26
27
|
from epstein_files.util.data import dict_sets_to_lists, json_safe, sort_dict
|
|
27
|
-
from epstein_files.util.doc_cfg import EmailCfg
|
|
28
|
+
from epstein_files.util.doc_cfg import EmailCfg, Metadata
|
|
28
29
|
from epstein_files.util.env import args, logger
|
|
29
|
-
from epstein_files.util.file_helper import DOCS_DIR,
|
|
30
|
+
from epstein_files.util.file_helper import DOCS_DIR, file_size_str
|
|
30
31
|
from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
|
|
31
32
|
from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, add_cols_to_table, console, highlighter,
|
|
32
33
|
link_text_obj, link_markup, print_author_header, print_centered, print_other_site_link, print_panel,
|
|
@@ -37,7 +38,7 @@ from epstein_files.util.timer import Timer
|
|
|
37
38
|
DEVICE_SIGNATURE = 'Device Signature'
|
|
38
39
|
DEVICE_SIGNATURE_PADDING = (1, 0)
|
|
39
40
|
NOT_INCLUDED_EMAILERS = [e.lower() for e in (USELESS_EMAILERS + [JEFFREY_EPSTEIN])]
|
|
40
|
-
SLOW_FILE_SECONDS = 0
|
|
41
|
+
SLOW_FILE_SECONDS = 1.0
|
|
41
42
|
|
|
42
43
|
INVALID_FOR_EPSTEIN_WEB = JUNK_EMAILERS + KRASSNER_RECIPIENTS + [
|
|
43
44
|
'ACT for America',
|
|
@@ -54,6 +55,7 @@ class EpsteinFiles:
|
|
|
54
55
|
imessage_logs: list[MessengerLog] = field(default_factory=list)
|
|
55
56
|
json_files: list[JsonFile] = field(default_factory=list)
|
|
56
57
|
other_files: list[OtherFile] = field(default_factory=list)
|
|
58
|
+
timer: Timer = field(default_factory=lambda: Timer())
|
|
57
59
|
|
|
58
60
|
# Analytics / calculations
|
|
59
61
|
email_author_counts: dict[str | None, int] = field(default_factory=lambda: defaultdict(int))
|
|
@@ -66,6 +68,7 @@ class EpsteinFiles:
|
|
|
66
68
|
"""Iterate through files and build appropriate objects."""
|
|
67
69
|
self.all_files = [f for f in DOCS_DIR.iterdir() if f.is_file() and not f.name.startswith('.')]
|
|
68
70
|
documents = []
|
|
71
|
+
file_type_count = defaultdict(int)
|
|
69
72
|
|
|
70
73
|
# Read through and classify all the files
|
|
71
74
|
for file_arg in self.all_files:
|
|
@@ -73,12 +76,13 @@ class EpsteinFiles:
|
|
|
73
76
|
document = Document(file_arg)
|
|
74
77
|
|
|
75
78
|
if document.length == 0:
|
|
76
|
-
logger.warning(f"Skipping empty file: {document}")
|
|
79
|
+
logger.warning(f"Skipping empty file: {document}]")
|
|
77
80
|
continue
|
|
78
81
|
|
|
79
82
|
cls = document_cls(document)
|
|
80
83
|
documents.append(cls(file_arg, text=document.text))
|
|
81
84
|
logger.info(str(documents[-1]))
|
|
85
|
+
file_type_count[cls.__name__] += 1
|
|
82
86
|
|
|
83
87
|
if doc_timer.seconds_since_start() > SLOW_FILE_SECONDS:
|
|
84
88
|
doc_timer.print_at_checkpoint(f"Slow file: {documents[-1]} processed")
|
|
@@ -90,17 +94,18 @@ class EpsteinFiles:
|
|
|
90
94
|
self._tally_email_data()
|
|
91
95
|
|
|
92
96
|
@classmethod
|
|
93
|
-
def get_files(cls, timer: Timer | None = None) -> 'EpsteinFiles':
|
|
97
|
+
def get_files(cls, timer: Timer | None = None, use_pickled: bool = False) -> 'EpsteinFiles':
|
|
94
98
|
"""Alternate constructor that reads/writes a pickled version of the data ('timer' arg is for logging)."""
|
|
95
99
|
timer = timer or Timer()
|
|
96
100
|
|
|
97
|
-
if (args.pickled and PICKLED_PATH.exists()) and not args.overwrite_pickle:
|
|
101
|
+
if ((args.pickled or use_pickled) and PICKLED_PATH.exists()) and not args.overwrite_pickle:
|
|
98
102
|
with gzip.open(PICKLED_PATH, 'rb') as file:
|
|
99
103
|
epstein_files = pickle.load(file)
|
|
100
104
|
timer.print_at_checkpoint(f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})")
|
|
105
|
+
epstein_files.timer = timer
|
|
101
106
|
return epstein_files
|
|
102
107
|
|
|
103
|
-
epstein_files = EpsteinFiles()
|
|
108
|
+
epstein_files = EpsteinFiles(timer=timer)
|
|
104
109
|
|
|
105
110
|
if args.overwrite_pickle or not PICKLED_PATH.exists():
|
|
106
111
|
with gzip.open(PICKLED_PATH, 'wb') as file:
|
|
@@ -192,42 +197,39 @@ class EpsteinFiles:
|
|
|
192
197
|
def imessage_logs_for(self, author: str | None | list[str | None]) -> Sequence[MessengerLog]:
|
|
193
198
|
return MessengerLog.logs_for(author, self.imessage_logs)
|
|
194
199
|
|
|
195
|
-
def identified_imessage_log_count(self) -> int:
|
|
196
|
-
return len([log for log in self.imessage_logs if log.author])
|
|
197
|
-
|
|
198
200
|
def json_metadata(self) -> str:
|
|
201
|
+
"""Create a JSON string containing metadata for all the files."""
|
|
199
202
|
metadata = {
|
|
200
|
-
EMAIL_CLASS:
|
|
201
|
-
|
|
202
|
-
|
|
203
|
+
EMAIL_CLASS: _sorted_metadata(self.emails),
|
|
204
|
+
JSON_FILE_CLASS: _sorted_metadata(self.json_files),
|
|
205
|
+
MESSENGER_LOG_CLASS: _sorted_metadata(self.imessage_logs),
|
|
206
|
+
OTHER_FILE_CLASS: _sorted_metadata(self.non_json_other_files()),
|
|
203
207
|
}
|
|
204
208
|
|
|
205
209
|
return json.dumps(metadata, indent=4, sort_keys=True)
|
|
206
210
|
|
|
207
|
-
def
|
|
208
|
-
|
|
209
|
-
dupes = defaultdict(int)
|
|
210
|
-
|
|
211
|
-
for doc in self.all_documents():
|
|
212
|
-
if doc.is_duplicate:
|
|
213
|
-
dupes[doc.class_name()] += 1
|
|
211
|
+
def non_json_other_files(self) -> list[OtherFile]:
|
|
212
|
+
return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
|
|
214
213
|
|
|
214
|
+
def print_files_summary(self) -> None:
|
|
215
215
|
table = Table(title='Summary of Document Types')
|
|
216
216
|
add_cols_to_table(table, ['File Type', 'Files', 'Author Known', 'Author Unknown', 'Duplicates'])
|
|
217
217
|
|
|
218
|
-
def add_row(label: str, docs: list
|
|
218
|
+
def add_row(label: str, docs: list):
|
|
219
|
+
known = None if isinstance(docs[0], JsonFile) else Document.known_author_count(docs)
|
|
220
|
+
|
|
219
221
|
table.add_row(
|
|
220
222
|
label,
|
|
221
223
|
f"{len(docs):,}",
|
|
222
|
-
f"{known:,}" if known else NA_TXT,
|
|
223
|
-
f"{len(docs) - known:,}" if known else NA_TXT,
|
|
224
|
-
f"{
|
|
224
|
+
f"{known:,}" if known is not None else NA_TXT,
|
|
225
|
+
f"{len(docs) - known:,}" if known is not None else NA_TXT,
|
|
226
|
+
f"{len([d for d in docs if d.is_duplicate])}",
|
|
225
227
|
)
|
|
226
228
|
|
|
227
|
-
add_row('iMessage Logs', self.imessage_logs
|
|
228
|
-
add_row('Emails', self.emails
|
|
229
|
-
add_row('JSON Data', self.json_files
|
|
230
|
-
add_row('Other',
|
|
229
|
+
add_row('iMessage Logs', self.imessage_logs)
|
|
230
|
+
add_row('Emails', self.emails)
|
|
231
|
+
add_row('JSON Data', self.json_files)
|
|
232
|
+
add_row('Other', self.non_json_other_files())
|
|
231
233
|
console.print(Align.center(table))
|
|
232
234
|
console.line()
|
|
233
235
|
|
|
@@ -272,7 +274,7 @@ class EpsteinFiles:
|
|
|
272
274
|
def print_emailer_counts_table(self) -> None:
|
|
273
275
|
footer = f"Identified authors of {self.attributed_email_count():,} emails out of {len(self.emails):,}."
|
|
274
276
|
counts_table = Table(title=f"Email Counts", caption=footer, header_style="bold")
|
|
275
|
-
add_cols_to_table(counts_table, ['Name', 'Count', 'Sent', "Recv'd", JMAIL, EPSTEIN_WEB, 'Twitter'])
|
|
277
|
+
add_cols_to_table(counts_table, ['Name', 'Count', 'Sent', "Recv'd", JMAIL, EPSTEIN_MEDIA, EPSTEIN_WEB, 'Twitter'])
|
|
276
278
|
|
|
277
279
|
emailer_counts = {
|
|
278
280
|
emailer: self.email_author_counts[emailer] + self.email_recipient_counts[emailer]
|
|
@@ -288,7 +290,8 @@ class EpsteinFiles:
|
|
|
288
290
|
str(self.email_author_counts[p]),
|
|
289
291
|
str(self.email_recipient_counts[p]),
|
|
290
292
|
'' if p is None else link_text_obj(search_jmail_url(p), JMAIL),
|
|
291
|
-
'' if not is_ok_for_epstein_web(p) else link_text_obj(
|
|
293
|
+
'' if not is_ok_for_epstein_web(p) else link_text_obj(epstein_media_person_url(p), EPSTEIN_MEDIA),
|
|
294
|
+
'' if not is_ok_for_epstein_web(p) else link_text_obj(epstein_web_person_url(p), EPSTEIN_WEB),
|
|
292
295
|
'' if p is None else link_text_obj(search_twitter_url(p), 'search X'),
|
|
293
296
|
)
|
|
294
297
|
|
|
@@ -297,7 +300,7 @@ class EpsteinFiles:
|
|
|
297
300
|
def print_imessage_summary(self) -> None:
|
|
298
301
|
"""Print summary table and stats for text messages."""
|
|
299
302
|
console.print(MessengerLog.summary_table(self.imessage_logs))
|
|
300
|
-
text_summary_msg = f"\nDeanonymized {self.
|
|
303
|
+
text_summary_msg = f"\nDeanonymized {Document.known_author_count(self.imessage_logs)} of "
|
|
301
304
|
text_summary_msg += f"{len(self.imessage_logs)} {TEXT_MESSAGE} logs found in {len(self.all_files):,} files."
|
|
302
305
|
console.print(text_summary_msg)
|
|
303
306
|
imessage_msg_count = sum([len(log.messages()) for log in self.imessage_logs])
|
|
@@ -357,6 +360,18 @@ def build_signature_table(keyed_sets: dict[str, set[str]], cols: tuple[str, str]
|
|
|
357
360
|
return Padding(table, DEVICE_SIGNATURE_PADDING)
|
|
358
361
|
|
|
359
362
|
|
|
363
|
+
def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
|
|
364
|
+
counts: dict[str | None, int] = defaultdict(int)
|
|
365
|
+
|
|
366
|
+
for doc in docs:
|
|
367
|
+
if doc.timestamp:
|
|
368
|
+
counts[doc.timestamp.date().isoformat()[0:7]] += 1
|
|
369
|
+
else:
|
|
370
|
+
counts[None] += 1
|
|
371
|
+
|
|
372
|
+
return counts
|
|
373
|
+
|
|
374
|
+
|
|
360
375
|
def document_cls(document: Document) -> Type[Document]:
|
|
361
376
|
search_area = document.text[0:5000] # Limit search area to avoid pointless scans of huge files
|
|
362
377
|
|
|
@@ -382,13 +397,6 @@ def is_ok_for_epstein_web(name: str | None) -> bool:
|
|
|
382
397
|
return True
|
|
383
398
|
|
|
384
399
|
|
|
385
|
-
def
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
for doc in docs:
|
|
389
|
-
if doc.timestamp:
|
|
390
|
-
counts[doc.timestamp.date().isoformat()[0:7]] += 1
|
|
391
|
-
else:
|
|
392
|
-
counts[None] += 1
|
|
393
|
-
|
|
394
|
-
return counts
|
|
400
|
+
def _sorted_metadata(docs: Sequence[Document]) -> list[Metadata]:
|
|
401
|
+
docs_sorted_by_id = sorted(docs, key=lambda d: d.file_id)
|
|
402
|
+
return [json_safe(d.metadata()) for d in docs_sorted_by_id]
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from epstein_files.util.constant.strings import QUESTION_MARKS, remove_question_marks
|
|
2
2
|
|
|
3
|
-
|
|
4
3
|
UNKNOWN = '(unknown)'
|
|
5
4
|
|
|
6
5
|
# Texting Names
|
|
@@ -170,6 +169,7 @@ ZUBAIR_KHAN = 'Zubair Khan'
|
|
|
170
169
|
|
|
171
170
|
# No communications but name is in the files
|
|
172
171
|
BILL_GATES = 'Bill Gates'
|
|
172
|
+
DONALD_TRUMP = 'Donald Trump'
|
|
173
173
|
ELON_MUSK = 'Elon Musk'
|
|
174
174
|
HENRY_HOLT = 'Henry Holt' # Actually a company?
|
|
175
175
|
IVANKA = 'Ivanka'
|
|
@@ -195,16 +195,13 @@ INSIGHTS_POD = f"InsightsPod" # Zubair bots
|
|
|
195
195
|
NEXT_MANAGEMENT = 'Next Management LLC'
|
|
196
196
|
JP_MORGAN = 'JP Morgan'
|
|
197
197
|
OSBORNE_LLP = f"{IAN_OSBORNE} & Partners LLP" # Ian Osborne's PR firm
|
|
198
|
+
TRUMP_ORG = 'Trump Organization'
|
|
198
199
|
UBS = 'UBS'
|
|
199
200
|
|
|
200
|
-
# Locations
|
|
201
|
-
PALM_BEACH = 'Palm Beach'
|
|
202
|
-
VIRGIN_ISLANDS = 'Virgin Islands'
|
|
203
|
-
|
|
204
201
|
# First and last names that should be made part of a highlighting regex for emailers
|
|
205
202
|
NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
|
|
206
|
-
'Al', 'Alfredo', 'Allen', 'Alex', 'Alexander', 'Amanda', 'Andres', 'Andrew',
|
|
207
|
-
'Bard', 'Barry', 'Bill', 'Black', 'Brad', 'Bruce',
|
|
203
|
+
'Al', 'Alan', 'Alfredo', 'Allen', 'Alex', 'Alexander', 'Amanda', 'Andres', 'Andrew',
|
|
204
|
+
'Bard', 'Barry', 'Bill', 'Black', 'Boris', 'Brad', 'Bruce',
|
|
208
205
|
'Carolyn', 'Chris', 'Christina',
|
|
209
206
|
'Dan', 'Daniel', 'Danny', 'Darren', 'Dave', 'David',
|
|
210
207
|
'Ed', 'Edward', 'Edwards', 'Epstein', 'Eric', 'Erika', 'Etienne',
|
|
@@ -214,10 +211,10 @@ NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
|
|
|
214
211
|
'Ian',
|
|
215
212
|
'Jack', 'James', 'Jay', 'Jean', 'Jeff', 'Jeffrey', 'Jennifer', 'Jeremy', 'jessica', 'Joel', 'John', 'Jon', 'Jonathan', 'Joseph', 'Jr',
|
|
216
213
|
'Kahn', 'Katherine', 'Ken', 'Kevin',
|
|
217
|
-
'Leon', 'Lesley', 'Linda', 'Link', 'Lisa',
|
|
214
|
+
'Larry', 'Leon', 'Lesley', 'Linda', 'Link', 'Lisa',
|
|
218
215
|
'Mann', 'Marc', 'Marie', 'Mark', 'Martin', 'Melanie', 'Michael', 'Mike', 'Miller', 'Mitchell', 'Miles', 'Morris', 'Moskowitz',
|
|
219
216
|
'Nancy', 'Neal', 'New',
|
|
220
|
-
'Paul', 'Paula', 'Pen', 'Peter', 'Philip',
|
|
217
|
+
'Paul', 'Paula', 'Pen', 'Peter', 'Philip', 'Prince',
|
|
221
218
|
'Randall', 'Reid', 'Richard', 'Robert', 'Rodriguez', 'Roger', 'Rosenberg', 'Ross', 'Roth', 'Rubin',
|
|
222
219
|
'Scott', 'Sean', 'Stanley', 'Stern', 'Stephen', 'Steve', 'Steven', 'Stone', 'Susan',
|
|
223
220
|
'The', 'Thomas', 'Tim', 'Tom', 'Tyler',
|
|
@@ -227,25 +224,25 @@ NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
|
|
|
227
224
|
]]
|
|
228
225
|
|
|
229
226
|
# Names to color white in the word counts
|
|
230
|
-
OTHER_NAMES = """
|
|
231
|
-
aaron albert alberto alec
|
|
232
|
-
baldwin barack
|
|
233
|
-
chapman charles charlie
|
|
234
|
-
|
|
235
|
-
|
|
227
|
+
OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
|
|
228
|
+
aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
|
|
229
|
+
baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bruno bryant burton
|
|
230
|
+
chapman charles charlie christopher clint cohen colin collins conway
|
|
231
|
+
davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
|
|
232
|
+
edmond elizabeth emily entwistle erik evelyn
|
|
236
233
|
ferguson flachsbart francis franco frank
|
|
237
|
-
gardner gary geoff geoffrey
|
|
234
|
+
gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
|
|
238
235
|
hancock harold harrison harry helen hirsch hofstadter horowitz hussein
|
|
239
236
|
isaac isaacson
|
|
240
|
-
jamie
|
|
241
|
-
kate kathy kelly
|
|
242
|
-
leonard lenny lieberman louis lynch lynn
|
|
243
|
-
marcus marianne matt matthew melissa michele michelle
|
|
237
|
+
jamie jane janet jason jen jim joe johnson jones josh julie justin
|
|
238
|
+
kate kathy kelly kim kruger kyle
|
|
239
|
+
leo leonard lenny leslie lieberman louis lynch lynn
|
|
240
|
+
marcus marianne matt matthew melissa michele michelle moore moscowitz
|
|
244
241
|
nicole nussbaum
|
|
245
|
-
|
|
246
|
-
rafael ray richardson rob robin
|
|
247
|
-
sara sarah seligman serge sergey silverman sloman smith snowden sorkin
|
|
248
|
-
ted theresa thompson tiffany
|
|
242
|
+
paulson philippe
|
|
243
|
+
rafael ray richardson rob robin ron rudolph ryan
|
|
244
|
+
sara sarah seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
|
|
245
|
+
ted theresa thompson tiffany timothy
|
|
249
246
|
valeria
|
|
250
247
|
walter warren weinstein weiss william
|
|
251
248
|
zach zack
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
PICKLED_PATH = Path("the_epstein_files.pkl.gz")
|
|
4
|
+
|
|
5
|
+
EPSTEIN_FILES_NOV_2025 = 'epstein_files_nov_2025'
|
|
6
|
+
URLS_ENV = '.urls.env'
|
|
7
|
+
|
|
8
|
+
HTML_DIR = Path('docs')
|
|
9
|
+
ALL_EMAILS_PATH = HTML_DIR.joinpath(f'all_emails_{EPSTEIN_FILES_NOV_2025}.html')
|
|
10
|
+
JSON_METADATA_PATH = HTML_DIR.joinpath(f'file_metadata_{EPSTEIN_FILES_NOV_2025}.json')
|
|
11
|
+
TEXT_MSGS_HTML_PATH = HTML_DIR.joinpath('index.html')
|
|
12
|
+
WORD_COUNT_HTML_PATH = HTML_DIR.joinpath(f'communication_word_count_{EPSTEIN_FILES_NOV_2025}.html')
|
|
13
|
+
# EPSTEIN_WORD_COUNT_HTML_PATH = HTML_DIR.joinpath('epstein_texts_and_emails_word_count.html')
|
|
14
|
+
|
|
15
|
+
BUILD_ARTIFACTS = [
|
|
16
|
+
ALL_EMAILS_PATH,
|
|
17
|
+
# EPSTEIN_WORD_COUNT_HTML_PATH,
|
|
18
|
+
JSON_METADATA_PATH,
|
|
19
|
+
TEXT_MSGS_HTML_PATH,
|
|
20
|
+
WORD_COUNT_HTML_PATH,
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def make_clean() -> None:
|
|
25
|
+
"""Delete all build artifacts."""
|
|
26
|
+
for build_file in BUILD_ARTIFACTS:
|
|
27
|
+
if build_file.exists():
|
|
28
|
+
print(f"Removing build file '{build_file}'...")
|
|
29
|
+
build_file.unlink()
|
|
@@ -30,6 +30,10 @@ REPUTATION = 'reputation'
|
|
|
30
30
|
SOCIAL = 'social'
|
|
31
31
|
SPEECH = 'speech'
|
|
32
32
|
|
|
33
|
+
# Locations
|
|
34
|
+
PALM_BEACH = 'Palm Beach'
|
|
35
|
+
VIRGIN_ISLANDS = 'Virgin Islands'
|
|
36
|
+
|
|
33
37
|
# Publications
|
|
34
38
|
BBC = 'BBC'
|
|
35
39
|
BLOOMBERG = 'Bloomberg'
|
|
@@ -38,10 +42,12 @@ DAILY_MAIL = 'Daily Mail'
|
|
|
38
42
|
DAILY_TELEGRAPH = "Daily Telegraph"
|
|
39
43
|
LA_TIMES = 'LA Times'
|
|
40
44
|
MIAMI_HERALD = 'Miami Herald'
|
|
41
|
-
|
|
42
|
-
|
|
45
|
+
NYT = "New York Times"
|
|
46
|
+
PALM_BEACH_DAILY_NEWS = f'{PALM_BEACH} Daily News'
|
|
47
|
+
PALM_BEACH_POST = f'{PALM_BEACH} Post'
|
|
43
48
|
THE_REAL_DEAL = 'The Real Deal'
|
|
44
49
|
WAPO = 'WaPo'
|
|
50
|
+
VI_DAILY_NEWS = f'{VIRGIN_ISLANDS} Daily News'
|
|
45
51
|
|
|
46
52
|
# Site types
|
|
47
53
|
EMAIL = 'email'
|
|
@@ -5,8 +5,9 @@ from typing import Literal
|
|
|
5
5
|
from inflection import parameterize
|
|
6
6
|
from rich.text import Text
|
|
7
7
|
|
|
8
|
+
from epstein_files.util.constant.output_files import *
|
|
8
9
|
from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
|
|
9
|
-
from epstein_files.util.file_helper import
|
|
10
|
+
from epstein_files.util.file_helper import coerce_file_stem
|
|
10
11
|
|
|
11
12
|
# Style stuff
|
|
12
13
|
ARCHIVE_LINK_COLOR = 'slate_blue3'
|
|
@@ -21,15 +22,17 @@ EPSTEINIFY = 'epsteinify'
|
|
|
21
22
|
JMAIL = 'Jmail'
|
|
22
23
|
|
|
23
24
|
|
|
24
|
-
#
|
|
25
|
+
# Deployment URLS
|
|
26
|
+
# NOTE: don't rename these variables without changing deploy.sh!
|
|
25
27
|
GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
28
|
+
TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/epstein_text_messages"
|
|
29
|
+
ALL_EMAILS_URL = f'{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}'
|
|
30
|
+
JSON_METADATA_URL = f'{TEXT_MSGS_URL}/{JSON_METADATA_PATH.name}'
|
|
31
|
+
WORD_COUNT_URL = f'{TEXT_MSGS_URL}/{WORD_COUNT_HTML_PATH.name}'
|
|
29
32
|
|
|
30
33
|
SITE_URLS: dict[SiteType, str] = {
|
|
31
|
-
EMAIL:
|
|
32
|
-
TEXT_MESSAGE:
|
|
34
|
+
EMAIL: ALL_EMAILS_URL,
|
|
35
|
+
TEXT_MESSAGE: TEXT_MSGS_URL,
|
|
33
36
|
}
|
|
34
37
|
|
|
35
38
|
GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
|
|
@@ -68,6 +71,7 @@ epsteinify_name_url = lambda name: f"{EPSTEINIFY_URL}/?name={urllib.parse.quote(
|
|
|
68
71
|
epstein_media_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEIN_MEDIA], file_stem, True)
|
|
69
72
|
epstein_media_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEIN_MEDIA, filename_or_id, style)
|
|
70
73
|
epstein_media_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_markup(epstein_media_doc_link_markup(filename_or_id, style))
|
|
74
|
+
epstein_media_person_url = lambda person: f"{EPSTEIN_MEDIA_URL}/people/{parameterize(person)}"
|
|
71
75
|
|
|
72
76
|
epstein_web_doc_url = lambda file_stem: f"{DOC_LINK_BASE_URLS[EPSTEIN_WEB]}/{file_stem}.jpg"
|
|
73
77
|
epstein_web_person_url = lambda person: f"{EPSTEIN_WEB_URL}/{parameterize(person)}"
|