epstein-files 1.0.1__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {epstein_files-1.0.1 → epstein_files-1.0.2}/PKG-INFO +19 -4
- {epstein_files-1.0.1 → epstein_files-1.0.2}/README.md +3 -3
- epstein_files-1.0.2/epstein_files/__init__.py +134 -0
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/documents/document.py +5 -1
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/epstein_files.py +37 -35
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/util/constant/names.py +2 -1
- epstein_files-1.0.2/epstein_files/util/constant/output_files.py +29 -0
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/util/constant/urls.py +10 -7
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/util/constants.py +140 -120
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/util/data.py +12 -33
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/util/doc_cfg.py +2 -10
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/util/env.py +3 -2
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/util/file_helper.py +0 -22
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/util/highlighted_group.py +2 -2
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/util/logging.py +6 -0
- epstein_files-1.0.2/epstein_files/util/output.py +180 -0
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/util/rich.py +16 -9
- epstein_files-1.0.2/pyproject.toml +67 -0
- epstein_files-1.0.1/epstein_files/__init__.py +0 -202
- epstein_files-1.0.1/pyproject.toml +0 -31
- {epstein_files-1.0.1 → epstein_files-1.0.2}/LICENSE +0 -0
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/documents/communication.py +0 -0
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/documents/email.py +0 -0
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/documents/emails/email_header.py +0 -0
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/documents/imessage/text_message.py +0 -0
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/documents/json_file.py +0 -0
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/documents/messenger_log.py +0 -0
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/documents/other_file.py +0 -0
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/util/constant/common_words.py +0 -0
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/util/constant/html.py +0 -0
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/util/constant/strings.py +0 -0
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/util/search_result.py +0 -0
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/util/timer.py +0 -0
- {epstein_files-1.0.1 → epstein_files-1.0.2}/epstein_files/util/word_count.py +0 -0
|
@@ -1,26 +1,41 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: epstein-files
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
|
|
5
|
+
Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
|
|
6
|
+
License: GPL-3.0-or-later
|
|
7
|
+
Keywords: Epstein,Jeffrey Epstein
|
|
5
8
|
Author: Michel de Cryptadamus
|
|
6
9
|
Requires-Python: >=3.11,<4.0
|
|
10
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
11
|
+
Classifier: Environment :: Console
|
|
12
|
+
Classifier: Intended Audience :: Information Technology
|
|
13
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
14
|
+
Classifier: Programming Language :: Python
|
|
7
15
|
Classifier: Programming Language :: Python :: 3
|
|
8
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
9
20
|
Requires-Dist: datefinder (>=0.7.3,<0.8.0)
|
|
10
21
|
Requires-Dist: inflection (>=0.5.1,<0.6.0)
|
|
11
22
|
Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
|
|
12
23
|
Requires-Dist: python-dotenv (>=1.2.1,<2.0.0)
|
|
13
24
|
Requires-Dist: requests (>=2.32.5,<3.0.0)
|
|
14
25
|
Requires-Dist: rich (>=14.2.0,<15.0.0)
|
|
26
|
+
Project-URL: Emails, https://michelcrypt4d4mus.github.io/epstein_text_messages/all_emails_epstein_files_nov_2025.html
|
|
27
|
+
Project-URL: Metadata, https://michelcrypt4d4mus.github.io/epstein_text_messages/file_metadata_epstein_files_nov_2025.json
|
|
28
|
+
Project-URL: TextMessages, https://michelcrypt4d4mus.github.io/epstein_text_messages
|
|
29
|
+
Project-URL: WordCounts, https://michelcrypt4d4mus.github.io/epstein_text_messages/communication_word_count_epstein_files_nov_2025.html
|
|
15
30
|
Description-Content-Type: text/markdown
|
|
16
31
|
|
|
17
32
|
# I Made Epstein's Text Messages Great Again
|
|
18
33
|
|
|
19
34
|
* [I Made Epstein's Text Messages Great Again (And You Should Read Them)](https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great) post on [Substack](https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great)
|
|
20
35
|
* The Epstein text messages (and some of the emails along with summary counts of sent emails to/from Epstein) generated by this code can be viewed [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/).
|
|
21
|
-
* All of His Emails can be read at another page also generated by this code [here](https://michelcrypt4d4mus.github.io/
|
|
22
|
-
* Word counts for the emails and text messages are [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/
|
|
23
|
-
* Metadata containing what I have figured out about who sent or received the communications in a given file (and a brief explanation for how I figured it out for each file) is deployed [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/
|
|
36
|
+
* All of His Emails can be read at another page also generated by this code [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/all_emails_epstein_files_nov_2025.html).
|
|
37
|
+
* Word counts for the emails and text messages are [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/communication_word_count_epstein_files_nov_2025.html).
|
|
38
|
+
* Metadata containing what I have figured out about who sent or received the communications in a given file (and a brief explanation for how I figured it out for each file) is deployed [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/file_metadata_epstein_files_nov_2025.json)
|
|
24
39
|
* Configuration variables assigning specific `HOUSE_OVERSIGHT_XXXXXX.txt` file IDs (the `111111` part) as being emails to or from particular people based on various research and contributions can be found in [constants.py](./epstein_files/util/constants.py). Everything in `constants.py` should also appear in the JSON metadata.
|
|
25
40
|
|
|
26
41
|
|
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
* [I Made Epstein's Text Messages Great Again (And You Should Read Them)](https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great) post on [Substack](https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great)
|
|
4
4
|
* The Epstein text messages (and some of the emails along with summary counts of sent emails to/from Epstein) generated by this code can be viewed [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/).
|
|
5
|
-
* All of His Emails can be read at another page also generated by this code [here](https://michelcrypt4d4mus.github.io/
|
|
6
|
-
* Word counts for the emails and text messages are [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/
|
|
7
|
-
* Metadata containing what I have figured out about who sent or received the communications in a given file (and a brief explanation for how I figured it out for each file) is deployed [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/
|
|
5
|
+
* All of His Emails can be read at another page also generated by this code [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/all_emails_epstein_files_nov_2025.html).
|
|
6
|
+
* Word counts for the emails and text messages are [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/communication_word_count_epstein_files_nov_2025.html).
|
|
7
|
+
* Metadata containing what I have figured out about who sent or received the communications in a given file (and a brief explanation for how I figured it out for each file) is deployed [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/file_metadata_epstein_files_nov_2025.json)
|
|
8
8
|
* Configuration variables assigning specific `HOUSE_OVERSIGHT_XXXXXX.txt` file IDs (the `111111` part) as being emails to or from particular people based on various research and contributions can be found in [constants.py](./epstein_files/util/constants.py). Everything in `constants.py` should also appear in the JSON metadata.
|
|
9
9
|
|
|
10
10
|
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
Reformat Epstein text message files for readability and count email senders.
|
|
4
|
+
For use with iMessage log files from https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_
|
|
5
|
+
|
|
6
|
+
Install: 'poetry install'
|
|
7
|
+
Run: 'EPSTEIN_DOCS_DIR=/path/to/TXT ./generate.py'
|
|
8
|
+
"""
|
|
9
|
+
from sys import exit
|
|
10
|
+
|
|
11
|
+
from dotenv import load_dotenv
|
|
12
|
+
load_dotenv()
|
|
13
|
+
|
|
14
|
+
from rich.markup import escape
|
|
15
|
+
from rich.padding import Padding
|
|
16
|
+
from rich.panel import Panel
|
|
17
|
+
|
|
18
|
+
from epstein_files.epstein_files import EpsteinFiles, document_cls
|
|
19
|
+
from epstein_files.documents.document import INFO_PADDING, Document
|
|
20
|
+
from epstein_files.documents.email import Email
|
|
21
|
+
from epstein_files.util.constant.html import *
|
|
22
|
+
from epstein_files.util.constant.names import *
|
|
23
|
+
from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_HTML_PATH, make_clean
|
|
24
|
+
from epstein_files.util.env import args, specified_names
|
|
25
|
+
from epstein_files.util.file_helper import coerce_file_path, extract_file_id
|
|
26
|
+
from epstein_files.util.logging import logger
|
|
27
|
+
from epstein_files.util.output import print_emails, print_json_metadata, print_json_stats, print_text_messages, write_urls
|
|
28
|
+
from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
|
|
29
|
+
from epstein_files.util.timer import Timer
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def generate_html() -> None:
|
|
33
|
+
if args.make_clean:
|
|
34
|
+
make_clean()
|
|
35
|
+
exit()
|
|
36
|
+
|
|
37
|
+
timer = Timer()
|
|
38
|
+
epstein_files = EpsteinFiles.get_files(timer)
|
|
39
|
+
|
|
40
|
+
if args.json_metadata:
|
|
41
|
+
print_json_metadata(epstein_files)
|
|
42
|
+
exit()
|
|
43
|
+
|
|
44
|
+
print_header(epstein_files)
|
|
45
|
+
|
|
46
|
+
if args.colors_only:
|
|
47
|
+
exit()
|
|
48
|
+
|
|
49
|
+
if args.output_texts:
|
|
50
|
+
print_text_messages(epstein_files)
|
|
51
|
+
timer.print_at_checkpoint(f'Printed {len(epstein_files.imessage_logs)} text message logs')
|
|
52
|
+
|
|
53
|
+
if args.output_emails:
|
|
54
|
+
emails_printed = print_emails(epstein_files)
|
|
55
|
+
timer.print_at_checkpoint(f"Printed {emails_printed:,} emails")
|
|
56
|
+
|
|
57
|
+
if args.output_other_files:
|
|
58
|
+
files_printed = epstein_files.print_other_files_table()
|
|
59
|
+
timer.print_at_checkpoint(f"Printed {len(files_printed)} other files")
|
|
60
|
+
|
|
61
|
+
# Save output
|
|
62
|
+
write_html(ALL_EMAILS_PATH if args.all_emails else TEXT_MSGS_HTML_PATH)
|
|
63
|
+
logger.warning(f"Total time: {timer.seconds_since_start_str()}")
|
|
64
|
+
|
|
65
|
+
# JSON stats (mostly used for building pytest checks)
|
|
66
|
+
if args.json_stats:
|
|
67
|
+
print_json_stats(epstein_files)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def epstein_diff():
|
|
71
|
+
"""Diff the cleaned up text of two files."""
|
|
72
|
+
Document.diff_files(args.positional_args)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def epstein_search():
|
|
76
|
+
"""Search the cleaned up text of the files."""
|
|
77
|
+
_assert_positional_args()
|
|
78
|
+
epstein_files = EpsteinFiles.get_files(use_pickled=True)
|
|
79
|
+
|
|
80
|
+
for search_term in args.positional_args:
|
|
81
|
+
temp_highlighter = build_highlighter(search_term)
|
|
82
|
+
search_results = epstein_files.docs_matching(search_term, specified_names)
|
|
83
|
+
console.line(2)
|
|
84
|
+
print_panel(f"Found {len(search_results)} documents matching '{search_term}'", padding=(0, 0, 0, 3))
|
|
85
|
+
|
|
86
|
+
for search_result in search_results:
|
|
87
|
+
console.line()
|
|
88
|
+
|
|
89
|
+
if args.whole_file:
|
|
90
|
+
console.print(search_result.document)
|
|
91
|
+
else:
|
|
92
|
+
console.print(search_result.document.description_panel())
|
|
93
|
+
|
|
94
|
+
for matching_line in search_result.lines:
|
|
95
|
+
line_txt = matching_line.__rich__()
|
|
96
|
+
console.print(Padding(temp_highlighter(line_txt), INFO_PADDING), style='gray37')
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def epstein_show():
|
|
100
|
+
"""Show the color highlighted file. If --raw arg is passed, show the raw text of the file as well."""
|
|
101
|
+
_assert_positional_args()
|
|
102
|
+
ids = [extract_file_id(arg) for arg in args.positional_args]
|
|
103
|
+
console.line()
|
|
104
|
+
|
|
105
|
+
if args.pickled:
|
|
106
|
+
epstein_files = EpsteinFiles.get_files(use_pickled=True)
|
|
107
|
+
docs = epstein_files.get_documents_by_id(ids)
|
|
108
|
+
else:
|
|
109
|
+
raw_docs = [Document(coerce_file_path(id)) for id in ids]
|
|
110
|
+
docs = [document_cls(doc)(doc.file_path) for doc in raw_docs]
|
|
111
|
+
|
|
112
|
+
for doc in docs:
|
|
113
|
+
console.line()
|
|
114
|
+
console.print(doc)
|
|
115
|
+
|
|
116
|
+
if args.raw:
|
|
117
|
+
console.line()
|
|
118
|
+
console.print(Panel(f"*** {doc.url_slug} RAW ***", expand=False, style=doc._border_style()))
|
|
119
|
+
console.print(escape(doc.raw_text()))
|
|
120
|
+
|
|
121
|
+
if isinstance(doc, Email):
|
|
122
|
+
console.line()
|
|
123
|
+
console.print(Panel(f"*** {doc.url_slug} actual_text ***", expand=False, style=doc._border_style()))
|
|
124
|
+
console.print(escape(doc._actual_text()))
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def epstein_dump_urls() -> None:
|
|
128
|
+
write_urls()
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _assert_positional_args():
|
|
132
|
+
if not args.positional_args:
|
|
133
|
+
console.print(f"\n ERROR: No positional args!\n", style='red1')
|
|
134
|
+
exit(1)
|
|
@@ -255,7 +255,11 @@ class Document:
|
|
|
255
255
|
txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
|
|
256
256
|
|
|
257
257
|
txt.append(' [').append(key_value_txt('size', Text(self.file_size_str(), style='aquamarine1')))
|
|
258
|
-
txt.append(", ").append(key_value_txt('lines',
|
|
258
|
+
txt.append(", ").append(key_value_txt('lines', self.num_lines))
|
|
259
|
+
|
|
260
|
+
if self.config and self.config.dupe_of_id:
|
|
261
|
+
txt.append(", ").append(key_value_txt('dupe_of', Text(self.config.dupe_of_id, style='magenta')))
|
|
262
|
+
|
|
259
263
|
return txt
|
|
260
264
|
|
|
261
265
|
def top_lines(self, n: int = 10) -> str:
|
|
@@ -19,6 +19,7 @@ from epstein_files.documents.emails.email_header import AUTHOR
|
|
|
19
19
|
from epstein_files.documents.json_file import JsonFile
|
|
20
20
|
from epstein_files.documents.messenger_log import MSG_REGEX, MessengerLog
|
|
21
21
|
from epstein_files.documents.other_file import OtherFile
|
|
22
|
+
from epstein_files.util.constant.output_files import PICKLED_PATH
|
|
22
23
|
from epstein_files.util.constant.strings import *
|
|
23
24
|
from epstein_files.util.constant.urls import (EPSTEIN_WEB, JMAIL, epsteinify_name_url, epstein_web_person_url,
|
|
24
25
|
search_jmail_url, search_twitter_url)
|
|
@@ -26,7 +27,7 @@ from epstein_files.util.constants import *
|
|
|
26
27
|
from epstein_files.util.data import dict_sets_to_lists, json_safe, sort_dict
|
|
27
28
|
from epstein_files.util.doc_cfg import EmailCfg
|
|
28
29
|
from epstein_files.util.env import args, logger
|
|
29
|
-
from epstein_files.util.file_helper import DOCS_DIR,
|
|
30
|
+
from epstein_files.util.file_helper import DOCS_DIR, file_size_str
|
|
30
31
|
from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
|
|
31
32
|
from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, add_cols_to_table, console, highlighter,
|
|
32
33
|
link_text_obj, link_markup, print_author_header, print_centered, print_other_site_link, print_panel,
|
|
@@ -37,7 +38,7 @@ from epstein_files.util.timer import Timer
|
|
|
37
38
|
DEVICE_SIGNATURE = 'Device Signature'
|
|
38
39
|
DEVICE_SIGNATURE_PADDING = (1, 0)
|
|
39
40
|
NOT_INCLUDED_EMAILERS = [e.lower() for e in (USELESS_EMAILERS + [JEFFREY_EPSTEIN])]
|
|
40
|
-
SLOW_FILE_SECONDS = 0
|
|
41
|
+
SLOW_FILE_SECONDS = 1.0
|
|
41
42
|
|
|
42
43
|
INVALID_FOR_EPSTEIN_WEB = JUNK_EMAILERS + KRASSNER_RECIPIENTS + [
|
|
43
44
|
'ACT for America',
|
|
@@ -54,6 +55,7 @@ class EpsteinFiles:
|
|
|
54
55
|
imessage_logs: list[MessengerLog] = field(default_factory=list)
|
|
55
56
|
json_files: list[JsonFile] = field(default_factory=list)
|
|
56
57
|
other_files: list[OtherFile] = field(default_factory=list)
|
|
58
|
+
timer: Timer = field(default_factory=lambda: Timer())
|
|
57
59
|
|
|
58
60
|
# Analytics / calculations
|
|
59
61
|
email_author_counts: dict[str | None, int] = field(default_factory=lambda: defaultdict(int))
|
|
@@ -90,17 +92,18 @@ class EpsteinFiles:
|
|
|
90
92
|
self._tally_email_data()
|
|
91
93
|
|
|
92
94
|
@classmethod
|
|
93
|
-
def get_files(cls, timer: Timer | None = None) -> 'EpsteinFiles':
|
|
95
|
+
def get_files(cls, timer: Timer | None = None, use_pickled: bool = False) -> 'EpsteinFiles':
|
|
94
96
|
"""Alternate constructor that reads/writes a pickled version of the data ('timer' arg is for logging)."""
|
|
95
97
|
timer = timer or Timer()
|
|
96
98
|
|
|
97
|
-
if (args.pickled and PICKLED_PATH.exists()) and not args.overwrite_pickle:
|
|
99
|
+
if ((args.pickled or use_pickled) and PICKLED_PATH.exists()) and not args.overwrite_pickle:
|
|
98
100
|
with gzip.open(PICKLED_PATH, 'rb') as file:
|
|
99
101
|
epstein_files = pickle.load(file)
|
|
100
102
|
timer.print_at_checkpoint(f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})")
|
|
103
|
+
epstein_files.timer = timer
|
|
101
104
|
return epstein_files
|
|
102
105
|
|
|
103
|
-
epstein_files = EpsteinFiles()
|
|
106
|
+
epstein_files = EpsteinFiles(timer=timer)
|
|
104
107
|
|
|
105
108
|
if args.overwrite_pickle or not PICKLED_PATH.exists():
|
|
106
109
|
with gzip.open(PICKLED_PATH, 'wb') as file:
|
|
@@ -197,37 +200,36 @@ class EpsteinFiles:
|
|
|
197
200
|
|
|
198
201
|
def json_metadata(self) -> str:
|
|
199
202
|
metadata = {
|
|
200
|
-
EMAIL_CLASS: [json_safe(
|
|
201
|
-
|
|
202
|
-
|
|
203
|
+
EMAIL_CLASS: [json_safe(d.metadata()) for d in self.emails],
|
|
204
|
+
JSON_FILE_CLASS: [json_safe(d.metadata()) for d in self.json_files],
|
|
205
|
+
MESSENGER_LOG_CLASS: [json_safe(d.metadata()) for d in self.imessage_logs],
|
|
206
|
+
OTHER_FILE_CLASS: [json_safe(d.metadata()) for d in self.other_files if not isinstance(d, JsonFile)],
|
|
203
207
|
}
|
|
204
208
|
|
|
205
209
|
return json.dumps(metadata, indent=4, sort_keys=True)
|
|
206
210
|
|
|
207
|
-
def
|
|
208
|
-
|
|
209
|
-
dupes = defaultdict(int)
|
|
210
|
-
|
|
211
|
-
for doc in self.all_documents():
|
|
212
|
-
if doc.is_duplicate:
|
|
213
|
-
dupes[doc.class_name()] += 1
|
|
211
|
+
def non_json_other_files(self) -> list[OtherFile]:
|
|
212
|
+
return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
|
|
214
213
|
|
|
214
|
+
def print_files_summary(self) -> None:
|
|
215
215
|
table = Table(title='Summary of Document Types')
|
|
216
216
|
add_cols_to_table(table, ['File Type', 'Files', 'Author Known', 'Author Unknown', 'Duplicates'])
|
|
217
217
|
|
|
218
|
-
def add_row(label: str, docs: list
|
|
218
|
+
def add_row(label: str, docs: list):
|
|
219
|
+
known = None if isinstance(docs[0], JsonFile) else len([d for d in docs if d.author])
|
|
220
|
+
|
|
219
221
|
table.add_row(
|
|
220
222
|
label,
|
|
221
223
|
f"{len(docs):,}",
|
|
222
|
-
f"{known:,}" if known else NA_TXT,
|
|
223
|
-
f"{len(docs) - known:,}" if known else NA_TXT,
|
|
224
|
-
f"{
|
|
224
|
+
f"{known:,}" if known is not None else NA_TXT,
|
|
225
|
+
f"{len(docs) - known:,}" if known is not None else NA_TXT,
|
|
226
|
+
f"{len([d for d in docs if d.is_duplicate])}",
|
|
225
227
|
)
|
|
226
228
|
|
|
227
|
-
add_row('iMessage Logs', self.imessage_logs
|
|
228
|
-
add_row('Emails', self.emails
|
|
229
|
-
add_row('JSON Data', self.json_files
|
|
230
|
-
add_row('Other',
|
|
229
|
+
add_row('iMessage Logs', self.imessage_logs)
|
|
230
|
+
add_row('Emails', self.emails)
|
|
231
|
+
add_row('JSON Data', self.json_files)
|
|
232
|
+
add_row('Other', self.non_json_other_files())
|
|
231
233
|
console.print(Align.center(table))
|
|
232
234
|
console.line()
|
|
233
235
|
|
|
@@ -357,6 +359,18 @@ def build_signature_table(keyed_sets: dict[str, set[str]], cols: tuple[str, str]
|
|
|
357
359
|
return Padding(table, DEVICE_SIGNATURE_PADDING)
|
|
358
360
|
|
|
359
361
|
|
|
362
|
+
def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
|
|
363
|
+
counts: dict[str | None, int] = defaultdict(int)
|
|
364
|
+
|
|
365
|
+
for doc in docs:
|
|
366
|
+
if doc.timestamp:
|
|
367
|
+
counts[doc.timestamp.date().isoformat()[0:7]] += 1
|
|
368
|
+
else:
|
|
369
|
+
counts[None] += 1
|
|
370
|
+
|
|
371
|
+
return counts
|
|
372
|
+
|
|
373
|
+
|
|
360
374
|
def document_cls(document: Document) -> Type[Document]:
|
|
361
375
|
search_area = document.text[0:5000] # Limit search area to avoid pointless scans of huge files
|
|
362
376
|
|
|
@@ -380,15 +394,3 @@ def is_ok_for_epstein_web(name: str | None) -> bool:
|
|
|
380
394
|
return False
|
|
381
395
|
|
|
382
396
|
return True
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
|
|
386
|
-
counts: dict[str | None, int] = defaultdict(int)
|
|
387
|
-
|
|
388
|
-
for doc in docs:
|
|
389
|
-
if doc.timestamp:
|
|
390
|
-
counts[doc.timestamp.date().isoformat()[0:7]] += 1
|
|
391
|
-
else:
|
|
392
|
-
counts[None] += 1
|
|
393
|
-
|
|
394
|
-
return counts
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from epstein_files.util.constant.strings import QUESTION_MARKS, remove_question_marks
|
|
2
2
|
|
|
3
|
-
|
|
4
3
|
UNKNOWN = '(unknown)'
|
|
5
4
|
|
|
6
5
|
# Texting Names
|
|
@@ -170,6 +169,7 @@ ZUBAIR_KHAN = 'Zubair Khan'
|
|
|
170
169
|
|
|
171
170
|
# No communications but name is in the files
|
|
172
171
|
BILL_GATES = 'Bill Gates'
|
|
172
|
+
DONALD_TRUMP = 'Donald Trump'
|
|
173
173
|
ELON_MUSK = 'Elon Musk'
|
|
174
174
|
HENRY_HOLT = 'Henry Holt' # Actually a company?
|
|
175
175
|
IVANKA = 'Ivanka'
|
|
@@ -195,6 +195,7 @@ INSIGHTS_POD = f"InsightsPod" # Zubair bots
|
|
|
195
195
|
NEXT_MANAGEMENT = 'Next Management LLC'
|
|
196
196
|
JP_MORGAN = 'JP Morgan'
|
|
197
197
|
OSBORNE_LLP = f"{IAN_OSBORNE} & Partners LLP" # Ian Osborne's PR firm
|
|
198
|
+
TRUMP_ORG = 'Trump Organization'
|
|
198
199
|
UBS = 'UBS'
|
|
199
200
|
|
|
200
201
|
# Locations
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
PICKLED_PATH = Path("the_epstein_files.pkl.gz")
|
|
4
|
+
|
|
5
|
+
EPSTEIN_FILES_NOV_2025 = 'epstein_files_nov_2025'
|
|
6
|
+
URLS_ENV = '.urls.env'
|
|
7
|
+
|
|
8
|
+
HTML_DIR = Path('docs')
|
|
9
|
+
ALL_EMAILS_PATH = HTML_DIR.joinpath(f'all_emails_{EPSTEIN_FILES_NOV_2025}.html')
|
|
10
|
+
JSON_METADATA_PATH = HTML_DIR.joinpath(f'file_metadata_{EPSTEIN_FILES_NOV_2025}.json')
|
|
11
|
+
TEXT_MSGS_HTML_PATH = HTML_DIR.joinpath('index.html')
|
|
12
|
+
WORD_COUNT_HTML_PATH = HTML_DIR.joinpath(f'communication_word_count_{EPSTEIN_FILES_NOV_2025}.html')
|
|
13
|
+
# EPSTEIN_WORD_COUNT_HTML_PATH = HTML_DIR.joinpath('epstein_texts_and_emails_word_count.html')
|
|
14
|
+
|
|
15
|
+
BUILD_ARTIFACTS = [
|
|
16
|
+
ALL_EMAILS_PATH,
|
|
17
|
+
# EPSTEIN_WORD_COUNT_HTML_PATH,
|
|
18
|
+
JSON_METADATA_PATH,
|
|
19
|
+
TEXT_MSGS_HTML_PATH,
|
|
20
|
+
WORD_COUNT_HTML_PATH,
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def make_clean() -> None:
|
|
25
|
+
"""Delete all build artifacts."""
|
|
26
|
+
for build_file in BUILD_ARTIFACTS:
|
|
27
|
+
if build_file.exists():
|
|
28
|
+
print(f"Removing build file '{build_file}'...")
|
|
29
|
+
build_file.unlink()
|
|
@@ -5,8 +5,9 @@ from typing import Literal
|
|
|
5
5
|
from inflection import parameterize
|
|
6
6
|
from rich.text import Text
|
|
7
7
|
|
|
8
|
+
from epstein_files.util.constant.output_files import *
|
|
8
9
|
from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
|
|
9
|
-
from epstein_files.util.file_helper import
|
|
10
|
+
from epstein_files.util.file_helper import coerce_file_stem
|
|
10
11
|
|
|
11
12
|
# Style stuff
|
|
12
13
|
ARCHIVE_LINK_COLOR = 'slate_blue3'
|
|
@@ -21,15 +22,17 @@ EPSTEINIFY = 'epsteinify'
|
|
|
21
22
|
JMAIL = 'Jmail'
|
|
22
23
|
|
|
23
24
|
|
|
24
|
-
#
|
|
25
|
+
# Deployment URLS
|
|
26
|
+
# NOTE: don't rename these variables without changing deploy.sh!
|
|
25
27
|
GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
28
|
+
TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/epstein_text_messages"
|
|
29
|
+
ALL_EMAILS_URL = f'{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}'
|
|
30
|
+
JSON_METADATA_URL = f'{TEXT_MSGS_URL}/{JSON_METADATA_PATH.name}'
|
|
31
|
+
WORD_COUNT_URL = f'{TEXT_MSGS_URL}/{WORD_COUNT_HTML_PATH.name}'
|
|
29
32
|
|
|
30
33
|
SITE_URLS: dict[SiteType, str] = {
|
|
31
|
-
EMAIL:
|
|
32
|
-
TEXT_MESSAGE:
|
|
34
|
+
EMAIL: ALL_EMAILS_URL,
|
|
35
|
+
TEXT_MESSAGE: TEXT_MSGS_URL,
|
|
33
36
|
}
|
|
34
37
|
|
|
35
38
|
GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
|