epstein-files 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,437 @@
1
+ import gzip
2
+ import pickle
3
+ import re
4
+ from collections import defaultdict
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+ from typing import Literal, Sequence
9
+
10
+ from rich.align import Align
11
+ from rich.console import Group
12
+ from rich.padding import Padding
13
+ from rich.table import Table
14
+ from rich.text import Text
15
+
16
+ from epstein_files.documents.document import Document
17
+ from epstein_files.documents.email import DETECT_EMAIL_REGEX, JUNK_EMAILERS, KRASSNER_RECIPIENTS, USELESS_EMAILERS, Email
18
+ from epstein_files.documents.emails.email_header import AUTHOR
19
+ from epstein_files.documents.json_file import JsonFile
20
+ from epstein_files.documents.messenger_log import MSG_REGEX, MessengerLog
21
+ from epstein_files.documents.other_file import OtherFile
22
+ from epstein_files.util.constant.strings import *
23
+ from epstein_files.util.constant.urls import (EPSTEIN_WEB, JMAIL, epsteinify_name_url, epstein_web_person_url,
24
+ search_jmail_url, search_twitter_url)
25
+ from epstein_files.util.constants import *
26
+ from epstein_files.util.data import Timer, dict_sets_to_lists, iso_timestamp, sort_dict
27
+ from epstein_files.util.env import args, logger
28
+ from epstein_files.util.file_cfg import MessageCfg
29
+ from epstein_files.util.file_helper import DOCS_DIR, FILENAME_LENGTH, PICKLED_PATH, file_size_str
30
+ from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
31
+ from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, QUESTION_MARK_TXT, add_cols_to_table, console,
32
+ highlighter, link_text_obj, link_markup, print_author_header, print_centered, print_other_site_link, print_panel,
33
+ print_section_header, vertically_pad)
34
+ from epstein_files.util.search_result import SearchResult
35
+
36
+ DEVICE_SIGNATURE = 'Device Signature'
37
+ FIRST_FEW_LINES = 'First Few Lines'
38
+ DEVICE_SIGNATURE_PADDING = (1, 0)
39
+ NOT_INCLUDED_EMAILERS = [e.lower() for e in (USELESS_EMAILERS + [JEFFREY_EPSTEIN])]
40
+
41
+ INVALID_FOR_EPSTEIN_WEB = JUNK_EMAILERS + KRASSNER_RECIPIENTS + [
42
+ 'ACT for America',
43
+ 'BS Stern',
44
+ INTELLIGENCE_SQUARED,
45
+ UNKNOWN,
46
+ ]
47
+
48
+
49
+ @dataclass
50
+ class EpsteinFiles:
51
+ all_files: list[Path] = field(init=False)
52
+ emails: list[Email] = field(default_factory=list)
53
+ imessage_logs: list[MessengerLog] = field(default_factory=list)
54
+ json_files: list[JsonFile] = field(default_factory=list)
55
+ other_files: list[OtherFile] = field(default_factory=list)
56
+
57
+ # Analytics / calculations
58
+ email_author_counts: dict[str | None, int] = field(default_factory=lambda: defaultdict(int))
59
+ email_authors_to_device_signatures: dict[str, set] = field(default_factory=lambda: defaultdict(set))
60
+ email_device_signatures_to_authors: dict[str, set] = field(default_factory=lambda: defaultdict(set))
61
+ email_recipient_counts: dict[str | None, int] = field(default_factory=lambda: defaultdict(int))
62
+ _email_unknown_recipient_file_ids: set[str] = field(default_factory=set)
63
+
64
+ def __post_init__(self):
65
+ self.all_files = [f for f in DOCS_DIR.iterdir() if f.is_file() and not f.name.startswith('.')]
66
+
67
+ # Read through and classify all the files
68
+ for file_arg in self.all_files:
69
+ logger.info(f"Scanning '{file_arg.name}'...")
70
+ document = Document(file_arg)
71
+
72
+ if document.length == 0:
73
+ logger.info(f"Skipping empty file {document.description().plain}")
74
+ elif document.text[0] == '{':
75
+ # Handle JSON files
76
+ self.json_files.append(JsonFile(file_arg, text=document.text))
77
+ logger.info(self.json_files[-1].description().plain)
78
+ elif MSG_REGEX.search(document.text):
79
+ # Handle iMessage log files
80
+ self.imessage_logs.append(MessengerLog(file_arg, text=document.text))
81
+ logger.info(self.imessage_logs[-1].description().plain)
82
+ elif DETECT_EMAIL_REGEX.match(document.text) or isinstance(document.config, MessageCfg):
83
+ # Handle emails
84
+ email = Email(file_arg, text=document.text)
85
+ logger.info(email.description().plain)
86
+ self.emails.append(email)
87
+ self.email_author_counts[email.author] += 1
88
+
89
+ if len(email.recipients) == 0:
90
+ self._email_unknown_recipient_file_ids.add(email.file_id)
91
+ self.email_recipient_counts[None] += 1
92
+ else:
93
+ for recipient in email.recipients:
94
+ self.email_recipient_counts[recipient] += 1
95
+
96
+ if email.sent_from_device:
97
+ self.email_authors_to_device_signatures[email.author_or_unknown()].add(email.sent_from_device)
98
+ self.email_device_signatures_to_authors[email.sent_from_device].add(email.author_or_unknown())
99
+ else:
100
+ # Handle OtherFiles
101
+ self.other_files.append(OtherFile(file_arg, text=document.text))
102
+ logger.info(self.other_files[-1].description().plain)
103
+
104
+ self.emails = Document.sort_by_timestamp(self.emails)
105
+ self.imessage_logs = Document.sort_by_timestamp(self.imessage_logs)
106
+ self.other_files = Document.sort_by_timestamp(self.other_files + self.json_files)
107
+
108
+ @classmethod
109
+ def get_files(cls, timer: Timer | None = None) -> 'EpsteinFiles':
110
+ """Alternate constructor that reads/writes a pickled version of the data ('timer' arg is for logging)."""
111
+ timer = timer or Timer()
112
+
113
+ if (args.pickled and PICKLED_PATH.exists()) and not args.overwrite_pickle:
114
+ with gzip.open(PICKLED_PATH, 'rb') as file:
115
+ epstein_files = pickle.load(file)
116
+ timer.print_at_checkpoint(f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})")
117
+ return epstein_files
118
+
119
+ epstein_files = EpsteinFiles()
120
+
121
+ if args.overwrite_pickle or not PICKLED_PATH.exists():
122
+ with gzip.open(PICKLED_PATH, 'wb') as file:
123
+ pickle.dump(epstein_files, file)
124
+ logger.warning(f"Pickled data to '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})...")
125
+
126
+ timer.print_at_checkpoint(f'Processed {len(epstein_files.all_files):,} documents')
127
+ return epstein_files
128
+
129
+ def all_documents(self) -> Sequence[Document]:
130
+ return self.imessage_logs + self.emails + self.other_files
131
+
132
+ def all_emailers(self, include_useless: bool = False) -> list[str | None]:
133
+ """Returns all emailers except Epstein and USELESS_EMAILERS, sorted from least frequent to most."""
134
+ names = [a for a in self.email_author_counts.keys()] + [r for r in self.email_recipient_counts.keys()]
135
+ names = names if include_useless else [e for e in names if e is None or e.lower() not in NOT_INCLUDED_EMAILERS]
136
+ return sorted(list(set(names)), key=lambda e: self.email_author_counts[e] + self.email_recipient_counts[e])
137
+
138
+ def attributed_email_count(self) -> int:
139
+ return sum([i for author, i in self.email_author_counts.items() if author != UNKNOWN])
140
+
141
+ def docs_matching(
142
+ self,
143
+ pattern: re.Pattern | str,
144
+ file_type: Literal['all', 'other'] = 'all',
145
+ names: list[str | None] | None = None
146
+ ) -> list[SearchResult]:
147
+ """Find documents whose text matches a pattern (file_type and names args limit the documents searched)."""
148
+ results: list[SearchResult] = []
149
+
150
+ for doc in (self.all_documents() if file_type == 'all' else self.other_files):
151
+ lines = doc.lines_matching_txt(pattern)
152
+
153
+ if names and ((not isinstance(doc, (Email, MessengerLog))) or doc.author not in names):
154
+ continue
155
+
156
+ if len(lines) > 0:
157
+ results.append(SearchResult(doc, lines))
158
+
159
+ return results
160
+
161
+ def earliest_email_at(self, author: str | None) -> datetime:
162
+ return self.emails_for(author)[0].timestamp
163
+
164
+ def last_email_at(self, author: str | None) -> datetime:
165
+ return self.emails_for(author)[-1].timestamp
166
+
167
+ def email_conversation_length_in_days(self, author: str | None) -> int:
168
+ return (self.last_email_at(author) - self.earliest_email_at(author)).days + 1
169
+
170
+ def email_signature_substitution_counts(self) -> dict[str, int]:
171
+ """Return the number of times an email signature was replaced with "<...snipped...>" for each author."""
172
+ substitution_counts = defaultdict(int)
173
+
174
+ for email in self.emails:
175
+ for name, num_replaced in email.signature_substitution_counts.items():
176
+ substitution_counts[name] += num_replaced
177
+
178
+ return substitution_counts
179
+
180
+ def email_unknown_recipient_file_ids(self) -> list[str]:
181
+ return sorted(list(self._email_unknown_recipient_file_ids))
182
+
183
+ def emails_by(self, author: str | None) -> list[Email]:
184
+ return [e for e in self.emails if e.author == author]
185
+
186
+ def emails_for(self, author: str | None) -> list[Email]:
187
+ """Returns emails to or from a given 'author' sorted chronologically."""
188
+ emails = self.emails if author == EVERYONE else (self.emails_by(author) + self.emails_to(author))
189
+
190
+ if len(emails) == 0:
191
+ raise RuntimeError(f"No emails found for '{author}'")
192
+
193
+ return Document.sort_by_timestamp(Document.uniquify(emails))
194
+
195
+ def emails_to(self, author: str | None) -> list[Email]:
196
+ if author is None:
197
+ return [e for e in self.emails if len(e.recipients) == 0 or None in e.recipients]
198
+ else:
199
+ return [e for e in self.emails if author in e.recipients]
200
+
201
+ def imessage_logs_for(self, author: str | None | list[str | None]) -> Sequence[MessengerLog]:
202
+ if author in [EVERYONE, JEFFREY_EPSTEIN]:
203
+ return self.imessage_logs
204
+
205
+ authors = author if isinstance(author, list) else [author]
206
+ return [log for log in self.imessage_logs if log.author in authors]
207
+
208
+ def identified_imessage_log_count(self) -> int:
209
+ return len([log for log in self.imessage_logs if log.author])
210
+
211
+ def imessage_sender_counts(self) -> dict[str | None, int]:
212
+ sender_counts: dict[str | None, int] = defaultdict(int)
213
+
214
+ for message_log in self.imessage_logs:
215
+ for message in message_log.messages():
216
+ sender_counts[message.author] += 1
217
+
218
+ return sender_counts
219
+
220
+ def print_files_summary(self) -> None:
221
+ dupes = defaultdict(int)
222
+
223
+ for doc in self.all_documents():
224
+ if doc.is_duplicate:
225
+ dupes[doc.document_type()] += 1
226
+
227
+ table = Table()
228
+ add_cols_to_table(table, ['File Type', 'Files', 'Author Known', 'Author Unknown', 'Duplicates'])
229
+
230
+ def add_row(label: str, docs: list, known: int | None = None, dupes: int | None = None):
231
+ table.add_row(
232
+ label,
233
+ f"{len(docs):,}",
234
+ f"{known:,}" if known else NA_TXT,
235
+ f"{len(docs) - known:,}" if known else NA_TXT,
236
+ f"{dupes:,}" if dupes else NA_TXT,
237
+ )
238
+
239
+ add_row('iMessage Logs', self.imessage_logs, self.identified_imessage_log_count())
240
+ add_row('Emails', self.emails, len([e for e in self.emails if e.author]), dupes[EMAIL_CLASS])
241
+ add_row('JSON Data', self.json_files, dupes=0)
242
+ add_row('Other', self.other_files, dupes=dupes[OTHER_FILE_CLASS])
243
+ console.print(Align.center(table))
244
+ console.line()
245
+
246
+ def print_emails_for(self, _author: str | None) -> list[Email]:
247
+ """Print complete emails to or from a particular 'author'. Returns the Emails that were printed."""
248
+ conversation_length = self.email_conversation_length_in_days(_author)
249
+ emails = self.emails_for(_author)
250
+ author = _author or UNKNOWN
251
+
252
+ print_author_header(
253
+ f"Found {len(emails)} {author} emails starting {emails[0].timestamp.date()} over {conversation_length:,} days",
254
+ get_style_for_name(author),
255
+ get_info_for_name(author)
256
+ )
257
+
258
+ self.print_emails_table_for(_author)
259
+ last_printed_email_was_duplicate = False
260
+
261
+ for email in emails:
262
+ if email.is_duplicate:
263
+ console.print(Padding(email.duplicate_file_txt().append('...'), (0, 0, 0, 4)))
264
+ last_printed_email_was_duplicate = True
265
+ else:
266
+ if last_printed_email_was_duplicate:
267
+ console.line()
268
+
269
+ console.print(email)
270
+ last_printed_email_was_duplicate = False
271
+
272
+ return emails
273
+
274
+ def print_emails_table_for(self, _author: str | None) -> None:
275
+ emails = [email for email in self.emails_for(_author) if not email.is_duplicate] # Remove dupes
276
+ author = _author or UNKNOWN
277
+
278
+ table = Table(
279
+ title=f"Emails to/from {author} starting {emails[0].timestamp.date()}",
280
+ border_style=get_style_for_name(author, allow_bold=False),
281
+ header_style="bold"
282
+ )
283
+
284
+ table.add_column('From', justify='left')
285
+ table.add_column('Timestamp', justify='center')
286
+ table.add_column('Subject', justify='left', style='honeydew2', min_width=60)
287
+
288
+ for email in emails:
289
+ table.add_row(
290
+ email.author_txt,
291
+ email.epstein_media_link(link_txt=email.timestamp_without_seconds()),
292
+ highlighter(email.subject())
293
+ )
294
+
295
+ console.print(Align.center(table), '\n')
296
+
297
+ def print_email_device_info(self) -> None:
298
+ print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(4, 0, 0, 0), centered=True)
299
+ console.print(build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
300
+ console.print(build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
301
+
302
+ def print_emailer_counts_table(self) -> None:
303
+ footer = f"Identified authors of {self.attributed_email_count()} emails out of {len(self.emails)} potential email files."
304
+ counts_table = Table(title=f"Email Counts", caption=footer, header_style="bold")
305
+ add_cols_to_table(counts_table, ['Name', 'Count', 'Sent', "Recv'd", JMAIL, EPSTEIN_WEB, 'Twitter'])
306
+
307
+ emailer_counts = {
308
+ e: self.email_author_counts[e] + self.email_recipient_counts[e]
309
+ for e in self.all_emailers(True)
310
+ }
311
+
312
+ for p, count in sort_dict(emailer_counts):
313
+ style = get_style_for_name(p, default_style=DEFAULT_NAME_STYLE)
314
+
315
+ counts_table.add_row(
316
+ Text.from_markup(link_markup(epsteinify_name_url(p or UNKNOWN), p or UNKNOWN, style)),
317
+ str(count),
318
+ str(self.email_author_counts[p]),
319
+ str(self.email_recipient_counts[p]),
320
+ '' if p is None else link_text_obj(search_jmail_url(p), JMAIL),
321
+ '' if not is_ok_for_epstein_web(p) else link_text_obj(epstein_web_person_url(p), EPSTEIN_WEB.lower()),
322
+ '' if p is None else link_text_obj(search_twitter_url(p), 'search X'),
323
+ )
324
+
325
+ console.print(vertically_pad(counts_table, 2))
326
+
327
+ def print_imessage_summary(self) -> None:
328
+ """Print summary table and stats for text messages."""
329
+ counts_table = Table(title="Text Message Counts By Author", header_style="bold")
330
+ counts_table.add_column(AUTHOR.title(), justify='left', style="steel_blue bold", width=30)
331
+ counts_table.add_column('Files', justify='right', style='white')
332
+ counts_table.add_column("Msgs", justify='right')
333
+ counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
334
+ counts_table.add_column('Last Sent At', justify='center', style='wheat4', width=21)
335
+ counts_table.add_column('Days', justify='right', style='dim')
336
+
337
+ for name, count in sort_dict(self.imessage_sender_counts()):
338
+ logs = self.imessage_logs_for(name)
339
+ first_at = logs[0].first_message_at(name)
340
+ last_at = logs[-1].first_message_at(name)
341
+
342
+ counts_table.add_row(
343
+ Text(name or UNKNOWN,
344
+ get_style_for_name(name)),
345
+ str(len(logs)),
346
+ f"{count:,}",
347
+ iso_timestamp(first_at),
348
+ iso_timestamp(last_at),
349
+ str((last_at - first_at).days + 1),
350
+ )
351
+
352
+ console.print(counts_table)
353
+ text_summary_msg = f"\nDeanonymized {self.identified_imessage_log_count()} of "
354
+ text_summary_msg += f"{len(self.imessage_logs)} {TEXT_MESSAGE} logs found in {len(self.all_files)} files."
355
+ console.print(text_summary_msg)
356
+ imessage_msg_count = sum([len(log.messages()) for log in self.imessage_logs])
357
+ console.print(f"Found {imessage_msg_count} total text messages in {len(self.imessage_logs)} conversations.")
358
+ console.print(f"(Last deploy found 4668 messages in 77 conversations)", style='dim')
359
+
360
+ def print_other_files_table(self) -> list[OtherFile]:
361
+ """Returns the OtherFiles that were interesting enough to print."""
362
+ interesting_files = [doc for doc in self.other_files if args.all_other_files or doc.is_interesting()]
363
+ header_pfx = '' if args.all_other_files else 'Selected '
364
+ print_section_header(f"{FIRST_FEW_LINES} of {len(interesting_files)} {header_pfx}Files That Are Neither Emails Nor Text Msgs")
365
+
366
+ if not args.all_other_files:
367
+ print_centered(f"(the other site is uncurated and has all {len(self.other_files)} unclassifiable files and all {len(self.emails):,} emails)", style='dim')
368
+ print_other_site_link(False)
369
+ console.line(2)
370
+
371
+ table = Table(header_style='bold', show_lines=True)
372
+ table.add_column('File', justify='center', width=FILENAME_LENGTH)
373
+ table.add_column('Date', justify='center')
374
+ table.add_column('Length', justify='center')
375
+ table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
376
+
377
+ for doc in interesting_files:
378
+ link_and_info = [doc.raw_document_link_txt(), *doc.hints()]
379
+ date_str = doc.date_str()
380
+
381
+ if doc.is_duplicate:
382
+ preview_text = doc.duplicate_file_txt()
383
+ row_style = ' dim'
384
+ else:
385
+ preview_text = doc.highlighted_preview_text()
386
+ row_style = ''
387
+
388
+ table.add_row(
389
+ Group(*link_and_info),
390
+ Text(date_str, style=TIMESTAMP_DIM) if date_str else QUESTION_MARK_TXT,
391
+ doc.file_size_str(),
392
+ preview_text,
393
+ style=row_style
394
+ )
395
+
396
+ console.print(table)
397
+ logger.warning(f"Skipped {len(self.other_files) - len(interesting_files)} uninteresting files...")
398
+ return interesting_files
399
+
400
+
401
+ def build_signature_table(keyed_sets: dict[str, set[str]], cols: tuple[str, str], join_char: str = '\n') -> Padding:
402
+ title = 'Signatures Used By Authors' if cols[0] == AUTHOR else 'Authors Seen Using Signatures'
403
+ table = Table(header_style="bold reverse", show_lines=True, title=title)
404
+
405
+ for i, col in enumerate(cols):
406
+ table.add_column(col.title() + ('s' if i == 1 else ''))
407
+
408
+ new_dict = dict_sets_to_lists(keyed_sets)
409
+
410
+ for k in sorted(new_dict.keys()):
411
+ table.add_row(highlighter(k or UNKNOWN), highlighter(join_char.join(sorted(new_dict[k]))))
412
+
413
+ return Padding(table, DEVICE_SIGNATURE_PADDING)
414
+
415
+
416
+ def is_ok_for_epstein_web(name: str | None) -> bool:
417
+ """Return True if it's likely that EpsteinWeb has a page for this name."""
418
+ if name is None or ' ' not in name:
419
+ return False
420
+ elif '@' in name or '/' in name or '??' in name:
421
+ return False
422
+ elif name in INVALID_FOR_EPSTEIN_WEB:
423
+ return False
424
+
425
+ return True
426
+
427
+
428
+ def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
429
+ counts: dict[str | None, int] = defaultdict(int)
430
+
431
+ for doc in docs:
432
+ if doc.timestamp:
433
+ counts[doc.timestamp.date().isoformat()[0:7]] += 1
434
+ else:
435
+ counts[None] += 1
436
+
437
+ return counts
@@ -0,0 +1,94 @@
1
+ from epstein_files.util.env import args
2
+
3
+ # Removed: look, make, no, see, think, up, use, want
4
+ # https://www.gonaturalenglish.com/1000-most-common-words-in-the-english-language/
5
+ MOST_COMMON_WORDS = """
6
+ a about after all also am an and any are as at
7
+ be because been being but by
8
+ came can can't cannot cant come could couldnt
9
+ day do doing dont did didnt
10
+ even
11
+ find first for from
12
+ get getting got give go going
13
+ had hadnt has hasnt have havent having he hed her here him his how
14
+ i if in into is isnt it its ive
15
+ just
16
+ know
17
+ like
18
+ man many me more my
19
+ new not now
20
+ of on one only or other our out
21
+ people pm
22
+ re
23
+ said say saying says she shed so some subject
24
+ take than that the their them then there these they theyd theyll theyre theyve thing this those through time to too two
25
+ very
26
+ was way we well went were werent weve
27
+ what whatever when whenever where wherever which whichever who whoever why
28
+ will with without wont would wouldnt wouldve
29
+ year you youd youll your youre youve
30
+ """.strip().split()
31
+
32
+ OTHER_COMMON_WORDS = """
33
+ january february march april may june july august september october november december
34
+ jan feb mar apr jun jul aug sep sept oct nov dec
35
+ sunday monday tuesday wednesday thursday friday saturday
36
+ sun mon tue tues wed thu thur thurs fri sat
37
+ st nd rd th skrev
38
+
39
+ addthis attachments ave
40
+ bcc bst btn
41
+ cc ce cel
42
+ date de des div dont du
43
+ each ecrit edt el email en envoye epstein et
44
+ fa fax fb fw fwd
45
+ herself himself
46
+ id ii iii im iphone iPad BlackBerry
47
+ je jeffrey jr
48
+ kl
49
+ las le les let
50
+ mr mrs ms much
51
+ ne nonus nor
52
+ ou over
53
+ pdt pst
54
+ rss
55
+ sent ses si signature smtp snipped somers
56
+ te tel tenu tho though trimmed
57
+ via vous voye
58
+ was wasnt whether while wrote
59
+ """.strip().split()
60
+
61
+ COMMON_WORDS = {line.lower(): True for line in (MOST_COMMON_WORDS + OTHER_COMMON_WORDS)}
62
+ COMMON_WORDS_LIST = sorted([word for word in COMMON_WORDS.keys()])
63
+
64
+ UNSINGULARIZABLE_WORDS = """
65
+ abbas academia acosta aids alas algeria alice always andres angeles anus apparatus apropos arabia ares asia asus atlanta australia austria avia
66
+ bahamas bata beatles beta betts bias boies bonus brookings brussels
67
+ california campus candia cannes carlos caucus cbs cds census chaos chorus chris christmas clothes cms collins columbia com comms conchita consensus costa csis curves cvs cyprus
68
+ dallas data davis davos dawkins deborah dementia denis dennis des diabetes dis drougas
69
+ emirates emphasis encyclopedia ens eps eta
70
+ facs ferris focus folks forbes francis
71
+ gas gaydos georgia gittes gloria gmt gps gravitas
72
+ halitosis hamas harris has hiatus hillis his hivaids hopkins
73
+ impetus india indonesia ios ips irs isis isosceles
74
+ jacques jános jones josephus jules
75
+ kansas
76
+ las lens les lewis lhs lls los louis luis
77
+ madars malaysia maldives marcus maria massachusetts mbs media melania meta mets meyers mlpf&s mongolia moonves multimedia
78
+ nadia nafta natalie nautilus nas nigeria novartis nucleus nunes
79
+ olas orleans
80
+ pants paris parkes patricia pbs pennsylvania peres perhaps philadelphia physics pls plus potus pres prevus
81
+ rees reis-dennis reuters rodgers rogers russia
82
+ sachs sadis saks santa ses shia simmons slovakia sometimes soros stimulus surplus syria
83
+ tennis texas this thus trans tries tunisia
84
+ ups uterus
85
+ valeria vegas versus via victoria villafaria vinicius virginia vis
86
+ was whereas whoops wikipedia
87
+ yemen yes yikes
88
+ zakaria
89
+ """.strip().split()
90
+
91
+
92
+ if args.deep_debug:
93
+ word_str = '\n'.join(COMMON_WORDS_LIST)
94
+ print(f"common words:\n\n{word_str}")
@@ -0,0 +1,57 @@
1
+ from rich.terminal_theme import TerminalTheme
2
+
3
+ from epstein_files.util.env import args
4
+
5
+
6
+ PAGE_TITLE = ' ∞ Michel de Cryptadamus ∞ '
7
+
8
+ CONSOLE_HTML_FORMAT = """<!DOCTYPE html>
9
+ <html>
10
+ <head>
11
+ <meta charset="UTF-8">
12
+ <link rel="icon" type="image/x-icon" href="https://media.universeodon.com/accounts/avatars/109/363/179/904/598/380/original/eecdc2393e75e8bf.jpg" />
13
+
14
+ <style>
15
+ {stylesheet}
16
+ body {{
17
+ color: {foreground};
18
+ background-color: {background};
19
+ }}
20
+ </style>
21
+ """ + f"<title>Epstein {'Emails' if args.all_emails else 'Text Messages'}</title>" + """
22
+ </head>
23
+ <body>
24
+ <pre style="font-family: Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace; white-space: pre-wrap; overflow-wrap: break-word;">
25
+ <code style="font-family: inherit; white-space: pre-wrap; overflow-wrap: break-word;">
26
+ {code}
27
+ </code>
28
+ </pre>
29
+ </body>
30
+ </html>
31
+ """
32
+
33
+ # Swap black for white
34
+ HTML_TERMINAL_THEME = TerminalTheme(
35
+ (0, 0, 0),
36
+ (255, 255, 255),
37
+ [
38
+ (0, 0, 0),
39
+ (128, 0, 0),
40
+ (0, 128, 0),
41
+ (128, 128, 0),
42
+ (0, 0, 128),
43
+ (128, 0, 128),
44
+ (0, 128, 128),
45
+ (192, 192, 192),
46
+ ],
47
+ [
48
+ (128, 128, 128),
49
+ (255, 0, 0),
50
+ (0, 255, 0),
51
+ (255, 255, 0),
52
+ (0, 0, 255),
53
+ (255, 0, 255),
54
+ (0, 255, 255),
55
+ (255, 255, 255),
56
+ ],
57
+ )