epstein-files 1.0.5__tar.gz → 1.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {epstein_files-1.0.5 → epstein_files-1.0.6}/PKG-INFO +4 -1
- {epstein_files-1.0.5 → epstein_files-1.0.6}/README.md +3 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/__init__.py +21 -17
- epstein_files-1.0.6/epstein_files/count_words.py +72 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/documents/json_file.py +4 -4
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/documents/messenger_log.py +2 -1
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/documents/other_file.py +2 -2
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/epstein_files.py +22 -22
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/constant/output_files.py +18 -1
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/constant/strings.py +0 -1
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/constant/urls.py +5 -20
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/env.py +4 -3
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/output.py +25 -7
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/rich.py +51 -27
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/word_count.py +10 -10
- {epstein_files-1.0.5 → epstein_files-1.0.6}/pyproject.toml +2 -2
- {epstein_files-1.0.5 → epstein_files-1.0.6}/LICENSE +0 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/documents/communication.py +0 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/documents/document.py +0 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/documents/email.py +0 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/documents/emails/email_header.py +0 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/documents/imessage/text_message.py +0 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/constant/common_words.py +0 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/constant/html.py +0 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/constant/names.py +0 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/constants.py +0 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/data.py +0 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/doc_cfg.py +0 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/file_helper.py +0 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/highlighted_group.py +0 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/logging.py +0 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/search_result.py +0 -0
- {epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/util/timer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: epstein-files
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.6
|
|
4
4
|
Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
|
|
5
5
|
Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
|
|
6
6
|
License: GPL-3.0-or-later
|
|
@@ -70,6 +70,9 @@ epstein_show --raw 030999
|
|
|
70
70
|
# This also works:
|
|
71
71
|
epstein_show HOUSE_OVERSIGHT_030999
|
|
72
72
|
|
|
73
|
+
# Count words used by Epstein and Bannon
|
|
74
|
+
epstein_word_count --name 'Jeffrey Epstein' --name 'Steve Bannon'
|
|
75
|
+
|
|
73
76
|
# Diff two epstein files after all the cleanup (stripping BOMs, matching newline chars, etc):
|
|
74
77
|
epstein_diff 030999 020442
|
|
75
78
|
```
|
|
@@ -38,6 +38,9 @@ epstein_show --raw 030999
|
|
|
38
38
|
# This also works:
|
|
39
39
|
epstein_show HOUSE_OVERSIGHT_030999
|
|
40
40
|
|
|
41
|
+
# Count words used by Epstein and Bannon
|
|
42
|
+
epstein_word_count --name 'Jeffrey Epstein' --name 'Steve Bannon'
|
|
43
|
+
|
|
41
44
|
# Diff two epstein files after all the cleanup (stripping BOMs, matching newline chars, etc):
|
|
42
45
|
epstein_diff 030999 020442
|
|
43
46
|
```
|
|
@@ -10,11 +10,12 @@ from sys import exit
|
|
|
10
10
|
|
|
11
11
|
from dotenv import load_dotenv
|
|
12
12
|
load_dotenv()
|
|
13
|
-
|
|
14
13
|
from rich.markup import escape
|
|
15
14
|
from rich.padding import Padding
|
|
16
15
|
from rich.panel import Panel
|
|
16
|
+
from rich.text import Text
|
|
17
17
|
|
|
18
|
+
from epstein_files.count_words import write_word_counts_html
|
|
18
19
|
from epstein_files.epstein_files import EpsteinFiles, document_cls
|
|
19
20
|
from epstein_files.documents.document import INFO_PADDING, Document
|
|
20
21
|
from epstein_files.documents.email import Email
|
|
@@ -24,22 +25,25 @@ from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_
|
|
|
24
25
|
from epstein_files.util.env import args, specified_names
|
|
25
26
|
from epstein_files.util.file_helper import coerce_file_path, extract_file_id
|
|
26
27
|
from epstein_files.util.logging import logger
|
|
27
|
-
from epstein_files.util.output import print_emails, print_json_metadata, print_json_stats, print_text_messages, write_urls
|
|
28
|
+
from epstein_files.util.output import print_emails, print_json_files, print_json_metadata, print_json_stats, print_text_messages, write_urls
|
|
28
29
|
from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
|
|
29
30
|
from epstein_files.util.timer import Timer
|
|
30
31
|
|
|
32
|
+
timer = Timer()
|
|
33
|
+
epstein_files = EpsteinFiles.get_files(timer)
|
|
34
|
+
|
|
31
35
|
|
|
32
36
|
def generate_html() -> None:
|
|
33
37
|
if args.make_clean:
|
|
34
38
|
make_clean()
|
|
39
|
+
write_urls()
|
|
35
40
|
exit()
|
|
36
|
-
|
|
37
|
-
timer = Timer()
|
|
38
|
-
epstein_files = EpsteinFiles.get_files(timer)
|
|
39
|
-
|
|
40
|
-
if args.json_metadata:
|
|
41
|
+
elif args.json_metadata:
|
|
41
42
|
print_json_metadata(epstein_files)
|
|
42
43
|
exit()
|
|
44
|
+
elif args.output_json_files:
|
|
45
|
+
print_json_files(epstein_files)
|
|
46
|
+
exit()
|
|
43
47
|
|
|
44
48
|
print_header(epstein_files)
|
|
45
49
|
|
|
@@ -108,22 +112,22 @@ def epstein_show():
|
|
|
108
112
|
console.line()
|
|
109
113
|
|
|
110
114
|
for doc in docs:
|
|
111
|
-
|
|
112
|
-
|
|
115
|
+
if isinstance(doc, Email):
|
|
116
|
+
doc.truncation_allowed = False
|
|
117
|
+
|
|
118
|
+
console.print('\n', doc, '\n')
|
|
113
119
|
|
|
114
120
|
if args.raw:
|
|
115
|
-
console.
|
|
116
|
-
console.print(
|
|
117
|
-
console.print(escape(doc.raw_text()))
|
|
121
|
+
console.print(Panel(Text("RAW: ").append(doc.summary()), expand=False, style=doc._border_style()))
|
|
122
|
+
console.print(escape(doc.raw_text()), '\n')
|
|
118
123
|
|
|
119
124
|
if isinstance(doc, Email):
|
|
120
|
-
console.
|
|
121
|
-
console.print(
|
|
122
|
-
console.print(escape(doc._actual_text()))
|
|
125
|
+
console.print(Panel(Text("actual_text: ").append(doc.summary()), expand=False, style=doc._border_style()))
|
|
126
|
+
console.print(escape(doc._actual_text()), '\n')
|
|
123
127
|
|
|
124
128
|
|
|
125
|
-
def
|
|
126
|
-
|
|
129
|
+
def epstein_word_count() -> None:
|
|
130
|
+
write_word_counts_html()
|
|
127
131
|
|
|
128
132
|
|
|
129
133
|
def _assert_positional_args():
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Count word usage in emails and texts
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from epstein_files.epstein_files import EpsteinFiles
|
|
5
|
+
from epstein_files.util.constant.common_words import COMMON_WORDS_LIST
|
|
6
|
+
from epstein_files.util.constant.output_files import WORD_COUNT_HTML_PATH
|
|
7
|
+
from epstein_files.util.env import args, specified_names
|
|
8
|
+
from epstein_files.util.logging import logger
|
|
9
|
+
from epstein_files.util.rich import (console, print_centered, print_color_key, print_page_title, print_panel,
|
|
10
|
+
print_starred_header, write_html)
|
|
11
|
+
from epstein_files.util.search_result import MatchedLine, SearchResult
|
|
12
|
+
from epstein_files.util.timer import Timer
|
|
13
|
+
from epstein_files.util.word_count import WordCount
|
|
14
|
+
|
|
15
|
+
HTML_REGEX = re.compile(r"^http|#yiv")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def write_word_counts_html() -> None:
|
|
19
|
+
timer = Timer()
|
|
20
|
+
epstein_files = EpsteinFiles.get_files(timer)
|
|
21
|
+
email_subjects: set[str] = set()
|
|
22
|
+
word_count = WordCount()
|
|
23
|
+
|
|
24
|
+
# Remove dupes, junk mail, and fwded articles from emails
|
|
25
|
+
emails = [
|
|
26
|
+
e for e in epstein_files.emails
|
|
27
|
+
if not (e.is_duplicate or e.is_junk_mail() or (e.config and e.config.is_fwded_article)) \
|
|
28
|
+
and (len(specified_names) == 0 or e.author in specified_names)
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
for email in emails:
|
|
32
|
+
logger.info(f"Counting words in {email}\n [SUBJECT] {email.subject()}")
|
|
33
|
+
lines = email.actual_text.split('\n')
|
|
34
|
+
|
|
35
|
+
if email.subject() not in email_subjects and f'Re: {email.subject()}' not in email_subjects:
|
|
36
|
+
email_subjects.add(email.subject())
|
|
37
|
+
lines.append(email.subject())
|
|
38
|
+
|
|
39
|
+
for i, line in enumerate(lines):
|
|
40
|
+
if HTML_REGEX.search(line):
|
|
41
|
+
continue
|
|
42
|
+
|
|
43
|
+
for word in line.split():
|
|
44
|
+
word_count.tally_word(word, SearchResult(email, [MatchedLine(line, i)]))
|
|
45
|
+
|
|
46
|
+
# Add in iMessage conversation words
|
|
47
|
+
imessage_logs = epstein_files.imessage_logs_for(specified_names) if specified_names else epstein_files.imessage_logs
|
|
48
|
+
|
|
49
|
+
for imessage_log in imessage_logs:
|
|
50
|
+
logger.info(f"Counting words in {imessage_log}")
|
|
51
|
+
|
|
52
|
+
for msg in imessage_log.messages():
|
|
53
|
+
if len(specified_names) > 0 and msg.author not in specified_names:
|
|
54
|
+
continue
|
|
55
|
+
elif HTML_REGEX.search(line):
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
for word in msg.text.split():
|
|
59
|
+
word_count.tally_word(word, SearchResult(imessage_log, [msg.text]))
|
|
60
|
+
|
|
61
|
+
print_page_title(expand=False)
|
|
62
|
+
print_starred_header(f"Most Common Words in {len(emails):,} Emails and {len(imessage_logs)} iMessage Logs")
|
|
63
|
+
print_centered(f"(excluding {len(COMMON_WORDS_LIST)} particularly common words at bottom)", style='dim')
|
|
64
|
+
console.line()
|
|
65
|
+
print_color_key()
|
|
66
|
+
console.line()
|
|
67
|
+
console.print(word_count)
|
|
68
|
+
console.line(2)
|
|
69
|
+
print_panel(f"{len(COMMON_WORDS_LIST):,} Excluded Words", centered=True)
|
|
70
|
+
console.print(', '.join(COMMON_WORDS_LIST), highlight=False)
|
|
71
|
+
write_html(WORD_COUNT_HTML_PATH)
|
|
72
|
+
timer.print_at_checkpoint(f"Finished counting words")
|
|
@@ -21,14 +21,11 @@ class JsonFile(OtherFile):
|
|
|
21
21
|
if self.url_slug.endswith('.txt') or self.url_slug.endswith('.json'):
|
|
22
22
|
self.url_slug = Path(self.url_slug).stem
|
|
23
23
|
|
|
24
|
-
self._set_computed_fields(text=self.
|
|
24
|
+
self._set_computed_fields(text=self.json_str())
|
|
25
25
|
|
|
26
26
|
def category(self) -> str:
|
|
27
27
|
return JSON
|
|
28
28
|
|
|
29
|
-
def formatted_json(self) -> str:
|
|
30
|
-
return json.dumps(self.json_data(), indent=4)
|
|
31
|
-
|
|
32
29
|
def info_txt(self) -> Text | None:
|
|
33
30
|
return Text(f"JSON file, possibly iMessage or similar app metadata", style='white dim italic')
|
|
34
31
|
|
|
@@ -38,3 +35,6 @@ class JsonFile(OtherFile):
|
|
|
38
35
|
def json_data(self) -> object:
|
|
39
36
|
with open(self.file_path, encoding='utf-8-sig') as f:
|
|
40
37
|
return json.load(f)
|
|
38
|
+
|
|
39
|
+
def json_str(self) -> str:
|
|
40
|
+
return json.dumps(self.json_data(), indent=4)
|
|
@@ -15,6 +15,7 @@ from epstein_files.util.data import iso_timestamp, listify, sort_dict
|
|
|
15
15
|
from epstein_files.util.doc_cfg import Metadata, TextCfg
|
|
16
16
|
from epstein_files.util.highlighted_group import get_style_for_name
|
|
17
17
|
from epstein_files.util.logging import logger
|
|
18
|
+
from epstein_files.util.rich import build_table
|
|
18
19
|
|
|
19
20
|
CONFIRMED_MSG = 'Found confirmed counterparty'
|
|
20
21
|
GUESSED_MSG = 'This is probably a conversation with'
|
|
@@ -111,7 +112,7 @@ class MessengerLog(Communication):
|
|
|
111
112
|
@classmethod
|
|
112
113
|
def summary_table(cls, imessage_logs: list['MessengerLog']) -> Table:
|
|
113
114
|
"""Build a table summarizing the text messages in 'imessage_logs'."""
|
|
114
|
-
counts_table =
|
|
115
|
+
counts_table = build_table("Text Message Counts By Author")
|
|
115
116
|
counts_table.add_column(AUTHOR.title(), justify='left', style="steel_blue bold", width=30)
|
|
116
117
|
counts_table.add_column('Files', justify='right', style='white')
|
|
117
118
|
counts_table.add_column("Msgs", justify='right')
|
|
@@ -20,7 +20,7 @@ from epstein_files.util.data import escape_single_quotes, remove_timezone, uniqu
|
|
|
20
20
|
from epstein_files.util.file_helper import FILENAME_LENGTH
|
|
21
21
|
from epstein_files.util.env import args
|
|
22
22
|
from epstein_files.util.highlighted_group import get_style_for_category
|
|
23
|
-
from epstein_files.util.rich import QUESTION_MARK_TXT, highlighter
|
|
23
|
+
from epstein_files.util.rich import QUESTION_MARK_TXT, build_table, highlighter
|
|
24
24
|
from epstein_files.util.logging import logger
|
|
25
25
|
|
|
26
26
|
MAX_DAYS_SPANNED_TO_BE_VALID = 10
|
|
@@ -233,7 +233,7 @@ class OtherFile(Document):
|
|
|
233
233
|
@staticmethod
|
|
234
234
|
def build_table(docs: list['OtherFile']) -> Table:
|
|
235
235
|
"""Build a table of OtherFile documents."""
|
|
236
|
-
table =
|
|
236
|
+
table = build_table(None, show_lines=True)
|
|
237
237
|
table.add_column('File', justify='center', width=FILENAME_LENGTH)
|
|
238
238
|
table.add_column('Date', justify='center')
|
|
239
239
|
table.add_column('Size', justify='center')
|
|
@@ -28,9 +28,9 @@ from epstein_files.util.doc_cfg import EmailCfg, Metadata
|
|
|
28
28
|
from epstein_files.util.env import args, logger
|
|
29
29
|
from epstein_files.util.file_helper import DOCS_DIR, file_size_str
|
|
30
30
|
from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
|
|
31
|
-
from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT,
|
|
32
|
-
link_text_obj, link_markup, print_author_header, print_centered,
|
|
33
|
-
print_section_header, vertically_pad)
|
|
31
|
+
from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, TABLE_BORDER_STYLE, add_cols_to_table,
|
|
32
|
+
build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
|
|
33
|
+
print_other_site_link, print_panel, print_section_header, vertically_pad)
|
|
34
34
|
from epstein_files.util.search_result import SearchResult
|
|
35
35
|
from epstein_files.util.timer import Timer
|
|
36
36
|
|
|
@@ -212,7 +212,7 @@ class EpsteinFiles:
|
|
|
212
212
|
return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
|
|
213
213
|
|
|
214
214
|
def print_files_summary(self) -> None:
|
|
215
|
-
table =
|
|
215
|
+
table = build_table('Summary of Document Types')
|
|
216
216
|
add_cols_to_table(table, ['File Type', 'Files', 'Author Known', 'Author Unknown', 'Duplicates'])
|
|
217
217
|
|
|
218
218
|
def add_row(label: str, docs: list):
|
|
@@ -268,12 +268,12 @@ class EpsteinFiles:
|
|
|
268
268
|
|
|
269
269
|
def print_email_device_info(self) -> None:
|
|
270
270
|
print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(4, 0, 0, 0), centered=True)
|
|
271
|
-
console.print(
|
|
272
|
-
console.print(
|
|
271
|
+
console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
|
|
272
|
+
console.print(_build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
|
|
273
273
|
|
|
274
274
|
def print_emailer_counts_table(self) -> None:
|
|
275
275
|
footer = f"Identified authors of {self.attributed_email_count():,} emails out of {len(self.emails):,}."
|
|
276
|
-
counts_table =
|
|
276
|
+
counts_table = build_table("Email Counts", caption=footer)
|
|
277
277
|
add_cols_to_table(counts_table, ['Name', 'Count', 'Sent', "Recv'd", JMAIL, EPSTEIN_MEDIA, EPSTEIN_WEB, 'Twitter'])
|
|
278
278
|
|
|
279
279
|
emailer_counts = {
|
|
@@ -345,21 +345,6 @@ class EpsteinFiles:
|
|
|
345
345
|
self.email_device_signatures_to_authors[email.sent_from_device].add(email.author_or_unknown())
|
|
346
346
|
|
|
347
347
|
|
|
348
|
-
def build_signature_table(keyed_sets: dict[str, set[str]], cols: tuple[str, str], join_char: str = '\n') -> Padding:
|
|
349
|
-
title = 'Signatures Used By Authors' if cols[0] == AUTHOR else 'Authors Seen Using Signatures'
|
|
350
|
-
table = Table(header_style="bold reverse", show_lines=True, title=title)
|
|
351
|
-
|
|
352
|
-
for i, col in enumerate(cols):
|
|
353
|
-
table.add_column(col.title() + ('s' if i == 1 else ''))
|
|
354
|
-
|
|
355
|
-
new_dict = dict_sets_to_lists(keyed_sets)
|
|
356
|
-
|
|
357
|
-
for k in sorted(new_dict.keys()):
|
|
358
|
-
table.add_row(highlighter(k or UNKNOWN), highlighter(join_char.join(sorted(new_dict[k]))))
|
|
359
|
-
|
|
360
|
-
return Padding(table, DEVICE_SIGNATURE_PADDING)
|
|
361
|
-
|
|
362
|
-
|
|
363
348
|
def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
|
|
364
349
|
counts: dict[str | None, int] = defaultdict(int)
|
|
365
350
|
|
|
@@ -397,6 +382,21 @@ def is_ok_for_epstein_web(name: str | None) -> bool:
|
|
|
397
382
|
return True
|
|
398
383
|
|
|
399
384
|
|
|
385
|
+
def _build_signature_table(keyed_sets: dict[str, set[str]], cols: tuple[str, str], join_char: str = '\n') -> Padding:
|
|
386
|
+
title = 'Signatures Used By Authors' if cols[0] == AUTHOR else 'Authors Seen Using Signatures'
|
|
387
|
+
table = build_table(title, header_style="bold reverse", show_lines=True)
|
|
388
|
+
|
|
389
|
+
for i, col in enumerate(cols):
|
|
390
|
+
table.add_column(col.title() + ('s' if i == 1 else ''))
|
|
391
|
+
|
|
392
|
+
new_dict = dict_sets_to_lists(keyed_sets)
|
|
393
|
+
|
|
394
|
+
for k in sorted(new_dict.keys()):
|
|
395
|
+
table.add_row(highlighter(k or UNKNOWN), highlighter(join_char.join(sorted(new_dict[k]))))
|
|
396
|
+
|
|
397
|
+
return Padding(table, DEVICE_SIGNATURE_PADDING)
|
|
398
|
+
|
|
399
|
+
|
|
400
400
|
def _sorted_metadata(docs: Sequence[Document]) -> list[Metadata]:
|
|
401
401
|
docs_sorted_by_id = sorted(docs, key=lambda d: d.file_id)
|
|
402
402
|
return [json_safe(d.metadata()) for d in docs_sorted_by_id]
|
|
@@ -1,19 +1,36 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
|
|
4
4
|
|
|
5
5
|
# Files output by the code
|
|
6
6
|
HTML_DIR = Path('docs')
|
|
7
7
|
EPSTEIN_FILES_NOV_2025 = 'epstein_files_nov_2025'
|
|
8
8
|
ALL_EMAILS_PATH = HTML_DIR.joinpath(f'all_emails_{EPSTEIN_FILES_NOV_2025}.html')
|
|
9
|
+
JSON_FILES_JSON_PATH = HTML_DIR.joinpath(f'json_files_from_{EPSTEIN_FILES_NOV_2025}.json')
|
|
9
10
|
JSON_METADATA_PATH = HTML_DIR.joinpath(f'file_metadata_{EPSTEIN_FILES_NOV_2025}.json')
|
|
10
11
|
TEXT_MSGS_HTML_PATH = HTML_DIR.joinpath('index.html')
|
|
11
12
|
WORD_COUNT_HTML_PATH = HTML_DIR.joinpath(f'communication_word_count_{EPSTEIN_FILES_NOV_2025}.html')
|
|
12
13
|
# EPSTEIN_WORD_COUNT_HTML_PATH = HTML_DIR.joinpath('epstein_texts_and_emails_word_count.html')
|
|
14
|
+
URLS_ENV = '.urls.env'
|
|
15
|
+
|
|
16
|
+
# Deployment URLS
|
|
17
|
+
# NOTE: don't rename these variables without changing deploy.sh!
|
|
18
|
+
GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
|
|
19
|
+
TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/epstein_text_messages"
|
|
20
|
+
ALL_EMAILS_URL = f"{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}"
|
|
21
|
+
JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
|
|
22
|
+
JSON_METADATA_URL = f"{TEXT_MSGS_URL}/{JSON_METADATA_PATH.name}"
|
|
23
|
+
WORD_COUNT_URL = f"{TEXT_MSGS_URL}/{WORD_COUNT_HTML_PATH.name}"
|
|
24
|
+
|
|
25
|
+
SITE_URLS: dict[SiteType, str] = {
|
|
26
|
+
EMAIL: ALL_EMAILS_URL,
|
|
27
|
+
TEXT_MESSAGE: TEXT_MSGS_URL,
|
|
28
|
+
}
|
|
13
29
|
|
|
14
30
|
BUILD_ARTIFACTS = [
|
|
15
31
|
ALL_EMAILS_PATH,
|
|
16
32
|
# EPSTEIN_WORD_COUNT_HTML_PATH,
|
|
33
|
+
JSON_FILES_JSON_PATH,
|
|
17
34
|
JSON_METADATA_PATH,
|
|
18
35
|
TEXT_MSGS_HTML_PATH,
|
|
19
36
|
WORD_COUNT_HTML_PATH,
|
|
@@ -6,7 +6,6 @@ from inflection import parameterize
|
|
|
6
6
|
from rich.text import Text
|
|
7
7
|
|
|
8
8
|
from epstein_files.util.constant.output_files import *
|
|
9
|
-
from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
|
|
10
9
|
from epstein_files.util.file_helper import coerce_file_stem
|
|
11
10
|
|
|
12
11
|
# Style stuff
|
|
@@ -15,26 +14,11 @@ TEXT_LINK = 'text_link'
|
|
|
15
14
|
|
|
16
15
|
# External site names
|
|
17
16
|
ExternalSite = Literal['epstein.media', 'epsteinify', 'EpsteinWeb']
|
|
18
|
-
|
|
19
17
|
EPSTEIN_MEDIA = 'epstein.media'
|
|
20
18
|
EPSTEIN_WEB = 'EpsteinWeb'
|
|
21
19
|
EPSTEINIFY = 'epsteinify'
|
|
22
20
|
JMAIL = 'Jmail'
|
|
23
21
|
|
|
24
|
-
|
|
25
|
-
# Deployment URLS
|
|
26
|
-
# NOTE: don't rename these variables without changing deploy.sh!
|
|
27
|
-
GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
|
|
28
|
-
TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/epstein_text_messages"
|
|
29
|
-
ALL_EMAILS_URL = f'{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}'
|
|
30
|
-
JSON_METADATA_URL = f'{TEXT_MSGS_URL}/{JSON_METADATA_PATH.name}'
|
|
31
|
-
WORD_COUNT_URL = f'{TEXT_MSGS_URL}/{WORD_COUNT_HTML_PATH.name}'
|
|
32
|
-
|
|
33
|
-
SITE_URLS: dict[SiteType, str] = {
|
|
34
|
-
EMAIL: ALL_EMAILS_URL,
|
|
35
|
-
TEXT_MESSAGE: TEXT_MSGS_URL,
|
|
36
|
-
}
|
|
37
|
-
|
|
38
22
|
GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
|
|
39
23
|
GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
|
|
40
24
|
ATTRIBUTIONS_URL = f'{GH_MASTER_URL}/epstein_files/util/constants.py'
|
|
@@ -46,13 +30,15 @@ extracted_file_url = lambda f: f"{EXTRACTS_BASE_URL}/{f}"
|
|
|
46
30
|
# External URLs
|
|
47
31
|
COFFEEZILLA_ARCHIVE_URL = 'https://journaliststudio.google.com/pinpoint/search?collection=061ce61c9e70bdfd'
|
|
48
32
|
COURIER_NEWSROOM_ARCHIVE_URL = 'https://journaliststudio.google.com/pinpoint/search?collection=092314e384a58618'
|
|
33
|
+
OVERSIGHT_REPUBLICANS_PRESSER_URL = 'https://oversight.house.gov/release/oversight-committee-releases-additional-epstein-estate-documents/'
|
|
34
|
+
RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL = 'https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_'
|
|
35
|
+
SUBSTACK_URL = 'https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great'
|
|
36
|
+
|
|
37
|
+
# Document source sites
|
|
49
38
|
EPSTEINIFY_URL = 'https://epsteinify.com'
|
|
50
39
|
EPSTEIN_MEDIA_URL = 'https://epstein.media'
|
|
51
40
|
EPSTEIN_WEB_URL = 'https://epsteinweb.org'
|
|
52
41
|
JMAIL_URL = 'https://jmail.world'
|
|
53
|
-
OVERSIGHT_REPUBLICANS_PRESSER_URL = 'https://oversight.house.gov/release/oversight-committee-releases-additional-epstein-estate-documents/'
|
|
54
|
-
RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL = 'https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_'
|
|
55
|
-
SUBSTACK_URL = 'https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great'
|
|
56
42
|
|
|
57
43
|
DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
|
|
58
44
|
EPSTEIN_MEDIA: f"{EPSTEIN_MEDIA_URL}/files",
|
|
@@ -61,7 +47,6 @@ DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
|
|
|
61
47
|
}
|
|
62
48
|
|
|
63
49
|
|
|
64
|
-
# TODO: epsteinify.com seems to be down as of 2025-12-30, switched to epstein.web for links
|
|
65
50
|
epsteinify_api_url = lambda file_id: f"{EPSTEINIFY_URL}/api/documents/HOUSE_OVERSIGHT_{file_id}"
|
|
66
51
|
epsteinify_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEINIFY, filename_or_id, style)
|
|
67
52
|
epsteinify_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_markup(external_doc_link_markup(filename_or_id, style))
|
|
@@ -6,9 +6,9 @@ from sys import argv
|
|
|
6
6
|
|
|
7
7
|
from epstein_files.util.logging import datefinder_logger, env_log_level, logger
|
|
8
8
|
|
|
9
|
-
COUNT_WORDS_SCRIPT = '
|
|
9
|
+
COUNT_WORDS_SCRIPT = 'epstein_word_count'
|
|
10
10
|
DEFAULT_WIDTH = 145
|
|
11
|
-
HTML_SCRIPTS = ['epstein_generate',
|
|
11
|
+
HTML_SCRIPTS = ['epstein_generate', COUNT_WORDS_SCRIPT]
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
parser = ArgumentParser(description="Parse epstein OCR docs and generate HTML page.")
|
|
@@ -19,8 +19,9 @@ output = parser.add_argument_group('OUTPUT')
|
|
|
19
19
|
output.add_argument('--all-emails', '-ae', action='store_true', help='all the emails instead of just the interesting ones')
|
|
20
20
|
output.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just the interesting ones')
|
|
21
21
|
output.add_argument('--build', '-b', action='store_true', help='write output to HTML file')
|
|
22
|
-
output.add_argument('--make-clean',
|
|
22
|
+
output.add_argument('--make-clean', action='store_true', help='delete all HTML build artifact and write latest URLs to .urls.env')
|
|
23
23
|
output.add_argument('--output-emails', '-oe', action='store_true', help='generate other files section')
|
|
24
|
+
output.add_argument('--output-json-files', action='store_true', help='pretty print all the raw JSON data files in the collection')
|
|
24
25
|
output.add_argument('--output-other-files', '-oo', action='store_true', help='generate other files section')
|
|
25
26
|
output.add_argument('--output-texts', '-ot', action='store_true', help='generate other files section')
|
|
26
27
|
output.add_argument('--suppress-output', action='store_true', help='no output to terminal (use with --build)')
|
|
@@ -1,12 +1,14 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
1
3
|
from rich.padding import Padding
|
|
2
4
|
|
|
3
5
|
from epstein_files.documents.email import Email
|
|
4
6
|
from epstein_files.documents.messenger_log import MessengerLog
|
|
5
7
|
from epstein_files.epstein_files import EpsteinFiles, count_by_month
|
|
6
|
-
from epstein_files.util.constant
|
|
7
|
-
from epstein_files.util.constant import urls
|
|
8
|
+
from epstein_files.util.constant import output_files
|
|
8
9
|
from epstein_files.util.constant.html import *
|
|
9
10
|
from epstein_files.util.constant.names import *
|
|
11
|
+
from epstein_files.util.constant.output_files import JSON_FILES_JSON_PATH, JSON_METADATA_PATH
|
|
10
12
|
from epstein_files.util.data import dict_sets_to_lists
|
|
11
13
|
from epstein_files.util.env import args, specified_names
|
|
12
14
|
from epstein_files.util.logging import log_file_write, logger
|
|
@@ -107,6 +109,20 @@ def print_emails(epstein_files: EpsteinFiles) -> int:
|
|
|
107
109
|
return len(already_printed_emails)
|
|
108
110
|
|
|
109
111
|
|
|
112
|
+
def print_json_files(epstein_files: EpsteinFiles):
|
|
113
|
+
if args.build:
|
|
114
|
+
json_data = {json_file.url_slug: json_file.json_data() for json_file in epstein_files.json_files}
|
|
115
|
+
|
|
116
|
+
with open(JSON_FILES_JSON_PATH, 'w') as f:
|
|
117
|
+
f.write(json.dumps(json_data, sort_keys=True))
|
|
118
|
+
log_file_write(JSON_FILES_JSON_PATH)
|
|
119
|
+
else:
|
|
120
|
+
for json_file in epstein_files.json_files:
|
|
121
|
+
console.line(2)
|
|
122
|
+
console.print(json_file.description_panel())
|
|
123
|
+
console.print_json(json_file.json_str(), indent=4, sort_keys=False)
|
|
124
|
+
|
|
125
|
+
|
|
110
126
|
def print_json_metadata(epstein_files: EpsteinFiles) -> None:
|
|
111
127
|
json_str = epstein_files.json_metadata()
|
|
112
128
|
|
|
@@ -146,10 +162,10 @@ def print_text_messages(epstein_files: EpsteinFiles) -> None:
|
|
|
146
162
|
|
|
147
163
|
def write_urls() -> None:
|
|
148
164
|
"""Write _URL style constant variables to a file bash scripts can load as env vars."""
|
|
149
|
-
url_vars = {
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
165
|
+
url_vars = {k: v for k, v in vars(output_files).items() if k.endswith('URL') and not k.startswith('GH')}
|
|
166
|
+
|
|
167
|
+
if not args.suppress_output:
|
|
168
|
+
console.line()
|
|
153
169
|
|
|
154
170
|
with open(URLS_ENV, 'w') as f:
|
|
155
171
|
for var_name, url in url_vars.items():
|
|
@@ -160,7 +176,9 @@ def write_urls() -> None:
|
|
|
160
176
|
|
|
161
177
|
f.write(f"{key_value}\n")
|
|
162
178
|
|
|
163
|
-
|
|
179
|
+
if not args.suppress_output:
|
|
180
|
+
console.line()
|
|
181
|
+
|
|
164
182
|
logger.warning(f"Wrote {len(url_vars)} URL variables to '{URLS_ENV}'\n")
|
|
165
183
|
|
|
166
184
|
|
|
@@ -14,7 +14,8 @@ from rich.theme import Theme
|
|
|
14
14
|
|
|
15
15
|
from epstein_files.util.constant.html import CONSOLE_HTML_FORMAT, HTML_TERMINAL_THEME, PAGE_TITLE
|
|
16
16
|
from epstein_files.util.constant.names import UNKNOWN
|
|
17
|
-
from epstein_files.util.constant.
|
|
17
|
+
from epstein_files.util.constant.output_files import SITE_URLS
|
|
18
|
+
from epstein_files.util.constant.strings import DEFAULT, EMAIL, NA, QUESTION_MARKS, TEXT_MESSAGE, SiteType
|
|
18
19
|
from epstein_files.util.constant.urls import *
|
|
19
20
|
from epstein_files.util.constants import FALLBACK_TIMESTAMP, HEADER_ABBREVIATIONS
|
|
20
21
|
from epstein_files.util.data import json_safe
|
|
@@ -31,11 +32,22 @@ GREY_NUMBERS = [58, 39, 39, 35, 30, 27, 23, 23, 19, 19, 15, 15, 15]
|
|
|
31
32
|
DEFAULT_NAME_STYLE = 'gray46'
|
|
32
33
|
KEY_STYLE='honeydew2 bold'
|
|
33
34
|
SECTION_HEADER_STYLE = 'bold white on blue3'
|
|
34
|
-
SOCIAL_MEDIA_LINK_STYLE = '
|
|
35
|
+
SOCIAL_MEDIA_LINK_STYLE = 'pale_turquoise4'
|
|
35
36
|
SUBSTACK_POST_LINK_STYLE = 'bright_cyan'
|
|
36
37
|
SYMBOL_STYLE = 'grey70'
|
|
38
|
+
TABLE_BORDER_STYLE = 'grey46'
|
|
39
|
+
TABLE_TITLE_STYLE = f"gray85 italic"
|
|
37
40
|
TITLE_STYLE = 'black on bright_white bold'
|
|
38
41
|
|
|
42
|
+
AUX_SITE_LINK_STYLE = 'dark_orange3'
|
|
43
|
+
OTHER_SITE_LINK_STYLE = 'dark_goldenrod'
|
|
44
|
+
|
|
45
|
+
DEFAULT_TABLE_KWARGS = {
|
|
46
|
+
'border_style': TABLE_BORDER_STYLE,
|
|
47
|
+
'header_style': "bold",
|
|
48
|
+
'title_style': TABLE_TITLE_STYLE,
|
|
49
|
+
}
|
|
50
|
+
|
|
39
51
|
HIGHLIGHTED_GROUP_COLOR_KEYS = [
|
|
40
52
|
Text(highlight_group.label.replace('_', ' '), style=highlight_group.style)
|
|
41
53
|
for highlight_group in sorted(HIGHLIGHTED_NAMES, key=lambda hg: hg.label)
|
|
@@ -79,7 +91,11 @@ def build_highlighter(pattern: str) -> EpsteinHighlighter:
|
|
|
79
91
|
return TempHighlighter()
|
|
80
92
|
|
|
81
93
|
|
|
82
|
-
def
|
|
94
|
+
def build_table(title: str | None, **kwargs) -> Table:
|
|
95
|
+
return Table(title=title, **{**DEFAULT_TABLE_KWARGS, **kwargs})
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def join_texts(txts: list[Text], join: str = ' ', encloser: str = '', encloser_style: str = 'wheat4') -> Text:
|
|
83
99
|
"""Join rich.Text objs into one."""
|
|
84
100
|
if encloser:
|
|
85
101
|
if len(encloser) != 2:
|
|
@@ -91,8 +107,9 @@ def join_texts(txts: list[Text], join: str = ' ', encloser: str = '') -> Text:
|
|
|
91
107
|
|
|
92
108
|
txt = Text('')
|
|
93
109
|
|
|
94
|
-
for i,
|
|
95
|
-
txt.append(join if i >= 1 else '').append(enclose_start)
|
|
110
|
+
for i, _txt in enumerate(txts):
|
|
111
|
+
txt.append(join if i >= 1 else '').append(enclose_start, style=encloser_style)
|
|
112
|
+
txt.append(_txt).append(enclose_end, style=encloser_style)
|
|
96
113
|
|
|
97
114
|
return txt
|
|
98
115
|
|
|
@@ -132,7 +149,7 @@ def print_centered_link(url: str, link_text: str, style: str | None = None) -> N
|
|
|
132
149
|
|
|
133
150
|
|
|
134
151
|
def print_color_key() -> None:
|
|
135
|
-
color_table =
|
|
152
|
+
color_table = build_table('Rough Guide to Highlighted Colors', show_header=False)
|
|
136
153
|
num_colors = len(HIGHLIGHTED_GROUP_COLOR_KEYS)
|
|
137
154
|
row_number = 0
|
|
138
155
|
|
|
@@ -164,7 +181,7 @@ def print_header(epstein_files: 'EpsteinFiles') -> None:
|
|
|
164
181
|
print_centered(f"if you think there's an attribution error or can deanonymize an {UNKNOWN} contact {CRYPTADAMUS_TWITTER}", 'grey46')
|
|
165
182
|
print_centered('note this site is based on the OCR text provided by Congress which is not always the greatest', 'grey23')
|
|
166
183
|
print_centered(f"(thanks to {link_markup('https://x.com/ImDrinknWyn', '@ImDrinknWyn', 'dodger_blue3')} + others for help attributing redacted emails)")
|
|
167
|
-
print_centered_link(
|
|
184
|
+
print_centered_link(JSON_METADATA_URL, "(explanations of author attributions)", style='magenta')
|
|
168
185
|
|
|
169
186
|
|
|
170
187
|
def print_json(label: str, obj: object, skip_falsey: bool = False) -> None:
|
|
@@ -233,17 +250,18 @@ def print_other_site_link(is_header: bool = True) -> None:
|
|
|
233
250
|
print_centered(parenthesize(Text.from_markup(markup_msg)), style='bold')
|
|
234
251
|
|
|
235
252
|
if is_header:
|
|
236
|
-
|
|
237
|
-
print_centered(parenthesize(metadata_link))
|
|
238
|
-
word_count_link = link_text_obj(WORD_COUNT_URL, 'most frequently used words', OTHER_SITE_LINK_STYLE)
|
|
253
|
+
word_count_link = link_text_obj(WORD_COUNT_URL, 'most frequently used words in the emails and texts', AUX_SITE_LINK_STYLE)
|
|
239
254
|
print_centered(parenthesize(word_count_link))
|
|
240
|
-
|
|
255
|
+
metadata_link = link_text_obj(JSON_METADATA_URL, 'author attribution explanations', AUX_SITE_LINK_STYLE)
|
|
256
|
+
print_centered(parenthesize(metadata_link))
|
|
257
|
+
json_link = link_text_obj(WORD_COUNT_URL, "epstein's json files", AUX_SITE_LINK_STYLE)
|
|
258
|
+
print_centered(parenthesize(json_link))
|
|
241
259
|
|
|
242
260
|
|
|
243
261
|
def print_page_title(expand: bool = True, width: int | None = None) -> None:
|
|
244
262
|
title_panel = Panel(Text(PAGE_TITLE, justify='center'), expand=expand, style=TITLE_STYLE, width=width)
|
|
245
263
|
console.print(Align.center(vertically_pad(title_panel)))
|
|
246
|
-
|
|
264
|
+
_print_social_media_links()
|
|
247
265
|
console.line(2)
|
|
248
266
|
|
|
249
267
|
|
|
@@ -265,19 +283,6 @@ def print_section_header(msg: str, style: str = SECTION_HEADER_STYLE, is_centere
|
|
|
265
283
|
console.print(Padding(panel, (3, 0, 1, 0)))
|
|
266
284
|
|
|
267
285
|
|
|
268
|
-
def print_social_media_links() -> None:
|
|
269
|
-
print_centered_link(SUBSTACK_URL, "I Made Epstein's Text Messages Great Again (And You Should Read Them)", style=f'{SUBSTACK_POST_LINK_STYLE} bold')
|
|
270
|
-
print_centered_link(SUBSTACK_URL, SUBSTACK_URL.removeprefix('https://'), style=f'{SUBSTACK_POST_LINK_STYLE} dim')
|
|
271
|
-
|
|
272
|
-
social_links = [
|
|
273
|
-
link_text_obj('https://x.com/Cryptadamist/status/1990866804630036988', '@cryptadamist', style=SOCIAL_MEDIA_LINK_STYLE),
|
|
274
|
-
link_text_obj('https://cryptadamus.substack.com/', 'substack', style=SOCIAL_MEDIA_LINK_STYLE),
|
|
275
|
-
link_text_obj('https://universeodon.com/@cryptadamist/115572634993386057', 'mastodon', style=SOCIAL_MEDIA_LINK_STYLE),
|
|
276
|
-
]
|
|
277
|
-
|
|
278
|
-
print_centered(join_texts(social_links, join=' ', encloser='[]'))
|
|
279
|
-
|
|
280
|
-
|
|
281
286
|
def print_starred_header(msg: str, num_stars: int = 7, num_spaces: int = 2, style: str = TITLE_STYLE) -> None:
|
|
282
287
|
stars = '*' * num_stars
|
|
283
288
|
spaces = ' ' * num_spaces
|
|
@@ -317,7 +322,7 @@ def write_html(output_path: Path) -> None:
|
|
|
317
322
|
|
|
318
323
|
|
|
319
324
|
def _print_abbreviations_table() -> None:
|
|
320
|
-
table =
|
|
325
|
+
table = build_table(title="Abbreviations Used Frequently In These Conversations", show_header=False)
|
|
321
326
|
table.add_column("Abbreviation", justify="center", style='bold')
|
|
322
327
|
table.add_column("Translation", style="white", justify="center")
|
|
323
328
|
|
|
@@ -329,7 +334,7 @@ def _print_abbreviations_table() -> None:
|
|
|
329
334
|
|
|
330
335
|
def _print_external_links() -> None:
|
|
331
336
|
console.line()
|
|
332
|
-
|
|
337
|
+
print_centered(Text('External Links', style=TABLE_TITLE_STYLE))
|
|
333
338
|
presser_link = link_text_obj(OVERSIGHT_REPUBLICANS_PRESSER_URL, 'Official Oversight Committee Press Release')
|
|
334
339
|
raw_docs_link = join_texts([link_text_obj(RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL, 'raw files', style=f"{ARCHIVE_LINK_COLOR} dim")], encloser='()')
|
|
335
340
|
print_centered(join_texts([presser_link, raw_docs_link]))
|
|
@@ -341,5 +346,24 @@ def _print_external_links() -> None:
|
|
|
341
346
|
print_centered(link_markup(EPSTEIN_MEDIA_URL) + " (raw document images)")
|
|
342
347
|
|
|
343
348
|
|
|
349
|
+
def _print_social_media_links() -> None:
|
|
350
|
+
print_centered_link(
|
|
351
|
+
SUBSTACK_URL,
|
|
352
|
+
"I Made Epstein's Text Messages Great Again (And You Should Read Them)",
|
|
353
|
+
style=f'{SUBSTACK_POST_LINK_STYLE} bold'
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
print_centered_link(SUBSTACK_URL, SUBSTACK_URL.removeprefix('https://'), style=f'{SUBSTACK_POST_LINK_STYLE} dim')
|
|
357
|
+
|
|
358
|
+
social_links = [
|
|
359
|
+
link_text_obj('https://universeodon.com/@cryptadamist/115572634993386057', '@mastodon', style=SOCIAL_MEDIA_LINK_STYLE),
|
|
360
|
+
link_text_obj(SUBSTACK_URL, '@substack', style=SOCIAL_MEDIA_LINK_STYLE),
|
|
361
|
+
link_text_obj('https://x.com/Cryptadamist/status/1990866804630036988', '@twitter', style=SOCIAL_MEDIA_LINK_STYLE),
|
|
362
|
+
link_text_obj('https://github.com/michelcrypt4d4mus/epstein_text_messages', '@github', style=SOCIAL_MEDIA_LINK_STYLE)
|
|
363
|
+
]
|
|
364
|
+
|
|
365
|
+
print_centered(join_texts(social_links, join=' / '))#, encloser='()'))#, encloser='‹›'))
|
|
366
|
+
|
|
367
|
+
|
|
344
368
|
# if args.deep_debug:
|
|
345
369
|
# print_json('THEME_STYLES', THEME_STYLES)
|
|
@@ -20,6 +20,14 @@ from epstein_files.util.search_result import SearchResult
|
|
|
20
20
|
FIRST_AND_LAST_NAMES = flatten([n.split() for n in ALL_NAMES])
|
|
21
21
|
FIRST_AND_LAST_NAMES = [n.lower() for n in FIRST_AND_LAST_NAMES] + OTHER_NAMES
|
|
22
22
|
|
|
23
|
+
HTML_REGEX = re.compile(r"com/|cae-v2w=|content-(transfe|type)|font(/|-(family|size))|http|\.html?\??|margin-bottom|padding-left|quoted-printable|region=|text-decoration|ttps|www|\.(gif|jpe?g|png);?$")
|
|
24
|
+
HYPHENATED_WORD_REGEX = re.compile(r"[a-z]+-[a-z]+", re.IGNORECASE)
|
|
25
|
+
OK_SYMBOL_WORDS = ['mar-a-lago', 'p/e', 's&p', ':)', ':).', ';)', ':-)', ';-)']
|
|
26
|
+
ONLY_SYMBOLS_REGEX = re.compile(r"^[^a-zA-Z0-9]+$")
|
|
27
|
+
SYMBOL_WORD_REGEX = re.compile(r"^[-—–@%/?.,&=]+$")
|
|
28
|
+
SPLIT_WORDS_BY = ['@', '/']
|
|
29
|
+
FLAGGED_WORDS = [] # For debugging, log extra info when one of these is encountered
|
|
30
|
+
|
|
23
31
|
NON_SINGULARIZABLE = UNSINGULARIZABLE_WORDS + [n for n in FIRST_AND_LAST_NAMES if n.endswith('s')]
|
|
24
32
|
SKIP_WORDS_REGEX = re.compile(r"^(asmallworld@|enwiki|http|imagepng|nymagcomnymetro|addresswww|mailto|www|/font|colordu|classdms|targetdblank|nymagcom|palmbeachdailynews)|jee[vy]acation|fontfamily|(gif|html?|jpe?g|utm)$")
|
|
25
33
|
BAD_CHARS_REGEX = re.compile(r"[-–=+()$€£©°«—^&%!#_`,.;:'‘’\"„“”?\d\\]")
|
|
@@ -100,21 +108,13 @@ SINGULARIZATIONS = {
|
|
|
100
108
|
'twittercom': 'twitter',
|
|
101
109
|
}
|
|
102
110
|
|
|
103
|
-
HTML_REGEX = re.compile(r"com/|cae-v2w=|content-(transfe|type)|font(/|-(family|size))|http|\.html?\??|margin-bottom|padding-left|quoted-printable|region=|text-decoration|ttps|www|\.(gif|jpe?g|png);?$")
|
|
104
|
-
HYPHENATED_WORD_REGEX = re.compile(r"[a-z]+-[a-z]+", re.IGNORECASE)
|
|
105
|
-
OK_SYMBOL_WORDS = ['mar-a-lago', 'p/e', 's&p', ':)', ':).', ';)', ':-)', ';-)']
|
|
106
|
-
SYMBOL_WORD_REGEX = re.compile(r"^[-—–@%/?.,&=]+$")
|
|
107
|
-
ONLY_SYMBOLS_REGEX = re.compile(r"^[^a-zA-Z0-9]+$")
|
|
108
|
-
SPLIT_WORDS_BY = ['@', '/']
|
|
109
|
-
FLAGGED_WORDS = [] # For debugging, log extra info when one of these is encountered
|
|
110
|
-
|
|
111
111
|
|
|
112
112
|
@dataclass
|
|
113
113
|
class WordCount:
|
|
114
114
|
count: dict[str, int] = field(default_factory=lambda: defaultdict(int))
|
|
115
115
|
singularized: dict[str, int] = field(default_factory=lambda: defaultdict(int))
|
|
116
116
|
|
|
117
|
-
def
|
|
117
|
+
def tally_word(self, word: str, document_line: SearchResult) -> None:
|
|
118
118
|
word = EmailHeader.cleanup_str(word).lower().strip()
|
|
119
119
|
raw_word = word
|
|
120
120
|
|
|
@@ -148,7 +148,7 @@ class WordCount:
|
|
|
148
148
|
continue
|
|
149
149
|
|
|
150
150
|
for w in word.split(symbol):
|
|
151
|
-
self.
|
|
151
|
+
self.tally_word(w, document_line)
|
|
152
152
|
|
|
153
153
|
logger.info(f" Split word with '{symbol}' in it '{word}'...")
|
|
154
154
|
return
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "epstein-files"
|
|
3
|
-
version = "1.0.
|
|
3
|
+
version = "1.0.6"
|
|
4
4
|
description = "Tools for working with the Jeffrey Epstein documents released in November 2025."
|
|
5
5
|
authors = ["Michel de Cryptadamus"]
|
|
6
6
|
readme = "README.md"
|
|
@@ -44,10 +44,10 @@ pytest = "^9.0.1"
|
|
|
44
44
|
|
|
45
45
|
[tool.poetry.scripts]
|
|
46
46
|
epstein_diff = 'epstein_files:epstein_diff'
|
|
47
|
-
epstein_dump_urls = 'epstein_files:epstein_dump_urls'
|
|
48
47
|
epstein_generate = 'epstein_files:generate_html'
|
|
49
48
|
epstein_search = 'epstein_files:epstein_search'
|
|
50
49
|
epstein_show = 'epstein_files:epstein_show'
|
|
50
|
+
epstein_word_count = 'epstein_files:epstein_word_count'
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
[tool.poetry.urls]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{epstein_files-1.0.5 → epstein_files-1.0.6}/epstein_files/documents/imessage/text_message.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|