epstein-files 1.2.5__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +55 -23
- epstein_files/documents/communication.py +9 -5
- epstein_files/documents/document.py +231 -135
- epstein_files/documents/doj_file.py +242 -0
- epstein_files/documents/doj_files/full_text.py +166 -0
- epstein_files/documents/email.py +289 -232
- epstein_files/documents/emails/email_header.py +35 -16
- epstein_files/documents/emails/emailers.py +223 -0
- epstein_files/documents/imessage/text_message.py +2 -3
- epstein_files/documents/json_file.py +18 -14
- epstein_files/documents/messenger_log.py +23 -39
- epstein_files/documents/other_file.py +54 -48
- epstein_files/epstein_files.py +65 -29
- epstein_files/person.py +151 -94
- epstein_files/util/constant/names.py +37 -10
- epstein_files/util/constant/output_files.py +2 -0
- epstein_files/util/constant/strings.py +14 -7
- epstein_files/util/constant/urls.py +17 -0
- epstein_files/util/constants.py +556 -391
- epstein_files/util/data.py +2 -0
- epstein_files/util/doc_cfg.py +44 -33
- epstein_files/util/env.py +34 -19
- epstein_files/util/file_helper.py +30 -6
- epstein_files/util/helpers/debugging_helper.py +13 -0
- epstein_files/util/helpers/env_helpers.py +21 -0
- epstein_files/util/highlighted_group.py +121 -37
- epstein_files/util/layout/left_bar_panel.py +26 -0
- epstein_files/util/logging.py +28 -13
- epstein_files/util/output.py +49 -40
- epstein_files/util/rich.py +30 -3
- epstein_files/util/word_count.py +7 -7
- {epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/METADATA +16 -3
- epstein_files-1.5.0.dist-info/RECORD +40 -0
- {epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/entry_points.txt +1 -1
- epstein_files-1.2.5.dist-info/RECORD +0 -34
- {epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/LICENSE +0 -0
- {epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/WHEEL +0 -0
epstein_files/__init__.py
CHANGED
|
@@ -4,6 +4,7 @@ Reformat Epstein text message files for readability and count email senders.
|
|
|
4
4
|
|
|
5
5
|
Run: 'EPSTEIN_DOCS_DIR=/path/to/TXT epstein_generate'
|
|
6
6
|
"""
|
|
7
|
+
import re
|
|
7
8
|
from sys import exit
|
|
8
9
|
|
|
9
10
|
from dotenv import load_dotenv
|
|
@@ -15,20 +16,21 @@ from rich.text import Text
|
|
|
15
16
|
|
|
16
17
|
from epstein_files.epstein_files import EpsteinFiles, document_cls
|
|
17
18
|
from epstein_files.documents.document import INFO_PADDING, Document
|
|
19
|
+
from epstein_files.documents.doj_file import DojFile
|
|
18
20
|
from epstein_files.documents.email import Email
|
|
19
21
|
from epstein_files.documents.messenger_log import MessengerLog
|
|
20
22
|
from epstein_files.documents.other_file import OtherFile
|
|
21
23
|
from epstein_files.util.constant.output_files import make_clean
|
|
22
|
-
from epstein_files.util.constant.strings import
|
|
24
|
+
from epstein_files.util.constant.strings import HOUSE_OVERSIGHT_NOV_2025_ID_REGEX
|
|
23
25
|
from epstein_files.util.data import flatten
|
|
24
26
|
from epstein_files.util.env import args
|
|
25
27
|
from epstein_files.util.file_helper import coerce_file_path, extract_file_id
|
|
26
28
|
from epstein_files.util.logging import exit_with_error, logger
|
|
27
|
-
from epstein_files.util.output import (print_emails_section, print_json_files,
|
|
29
|
+
from epstein_files.util.output import (print_doj_files, print_emails_section, print_json_files, print_stats,
|
|
28
30
|
print_other_files_section, print_text_messages_section, print_email_timeline, print_emailers_info,
|
|
29
31
|
print_json_metadata, write_urls)
|
|
30
|
-
from epstein_files.util.rich import (build_highlighter, console, highlighter, print_color_key,
|
|
31
|
-
print_title_page_tables, print_subtitle_panel, write_html)
|
|
32
|
+
from epstein_files.util.rich import (build_highlighter, console, highlighter, print_color_key, print_json,
|
|
33
|
+
print_title_page_header, print_title_page_tables, print_subtitle_panel, write_html)
|
|
32
34
|
from epstein_files.util.timer import Timer
|
|
33
35
|
from epstein_files.util.word_count import write_word_counts_html
|
|
34
36
|
|
|
@@ -62,6 +64,10 @@ def generate_html() -> None:
|
|
|
62
64
|
if args.colors_only:
|
|
63
65
|
exit()
|
|
64
66
|
|
|
67
|
+
if args.output_doj_files:
|
|
68
|
+
printed_doj_files = print_doj_files(epstein_files)
|
|
69
|
+
timer.log_section_complete('DojFile', epstein_files.doj_files, printed_doj_files)
|
|
70
|
+
|
|
65
71
|
if args.output_texts:
|
|
66
72
|
printed_logs = print_text_messages_section(epstein_files)
|
|
67
73
|
timer.log_section_complete('MessengerLog', epstein_files.imessage_logs, printed_logs)
|
|
@@ -83,9 +89,8 @@ def generate_html() -> None:
|
|
|
83
89
|
if args.debug:
|
|
84
90
|
highlighter.print_highlight_counts(console)
|
|
85
91
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
print_json_stats(epstein_files)
|
|
92
|
+
if args.stats:
|
|
93
|
+
print_stats(epstein_files) # Used for building pytest checks
|
|
89
94
|
|
|
90
95
|
|
|
91
96
|
def epstein_diff():
|
|
@@ -93,11 +98,11 @@ def epstein_diff():
|
|
|
93
98
|
Document.diff_files(args.positional_args)
|
|
94
99
|
|
|
95
100
|
|
|
96
|
-
def
|
|
101
|
+
def epstein_grep():
|
|
97
102
|
"""Search the cleaned up text of the files."""
|
|
98
103
|
epstein_files = EpsteinFiles.get_files()
|
|
99
104
|
|
|
100
|
-
if
|
|
105
|
+
if HOUSE_OVERSIGHT_NOV_2025_ID_REGEX.match(args.positional_args[0]):
|
|
101
106
|
logger.warning(f"'{args.positional_args[0]}' seems to be an ID, running epstein_show instead...")
|
|
102
107
|
epstein_show()
|
|
103
108
|
return
|
|
@@ -106,26 +111,41 @@ def epstein_search():
|
|
|
106
111
|
temp_highlighter = build_highlighter(search_term)
|
|
107
112
|
search_results = epstein_files.docs_matching(search_term, args.names)
|
|
108
113
|
print_subtitle_panel(f"Found {len(search_results)} documents matching '{search_term}'")
|
|
114
|
+
last_document = None
|
|
109
115
|
|
|
110
116
|
for search_result in search_results:
|
|
111
|
-
|
|
117
|
+
doc = search_result.document
|
|
118
|
+
lines = search_result.lines
|
|
112
119
|
|
|
113
|
-
if (isinstance(
|
|
114
|
-
or (isinstance(
|
|
115
|
-
or (isinstance(
|
|
116
|
-
|
|
120
|
+
if (isinstance(doc, Email) and not args.output_emails) \
|
|
121
|
+
or (isinstance(doc, (DojFile, OtherFile)) and not args.output_other) \
|
|
122
|
+
or (isinstance(doc, MessengerLog) and not args.output_texts):
|
|
123
|
+
doc.log(f"{type(doc).__name__} Skipping search result...")
|
|
117
124
|
continue
|
|
125
|
+
elif isinstance(doc, Email) and args.email_body:
|
|
126
|
+
lines = [l for l in search_result.lines if l.line_number > doc.header.num_header_rows]
|
|
127
|
+
|
|
128
|
+
if not lines:
|
|
129
|
+
doc.log(f"None of the matches for '{search_term}' seem to be in the body of the email")
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
if doc.is_duplicate:
|
|
133
|
+
if last_document and not last_document.is_duplicate:
|
|
134
|
+
console.line()
|
|
118
135
|
|
|
119
|
-
|
|
120
|
-
console.print(
|
|
136
|
+
last_document = doc
|
|
137
|
+
console.print(doc.duplicate_file_txt)
|
|
138
|
+
elif args.whole_file:
|
|
139
|
+
console.print(doc)
|
|
121
140
|
else:
|
|
122
|
-
console.print(
|
|
141
|
+
console.print(doc.summary_panel)
|
|
123
142
|
|
|
124
|
-
for matching_line in
|
|
143
|
+
for matching_line in lines:
|
|
125
144
|
line_txt = matching_line.__rich__()
|
|
126
145
|
console.print(Padding(temp_highlighter(line_txt), INFO_PADDING), style='gray37')
|
|
127
146
|
|
|
128
147
|
console.line()
|
|
148
|
+
console.print(doc.local_path_and_url + '\n', style='dim')
|
|
129
149
|
|
|
130
150
|
|
|
131
151
|
def epstein_show():
|
|
@@ -138,23 +158,35 @@ def epstein_show():
|
|
|
138
158
|
people = EpsteinFiles.get_files().person_objs(args.names)
|
|
139
159
|
raw_docs = [doc for doc in flatten([p.emails for p in people])]
|
|
140
160
|
else:
|
|
141
|
-
ids = [extract_file_id(arg) for arg in args.positional_args]
|
|
142
|
-
|
|
161
|
+
ids = [extract_file_id(arg.strip().strip('_')) for arg in args.positional_args]
|
|
162
|
+
logger.info(f"extracted IDs: {ids}")
|
|
163
|
+
raw_docs = [Document.from_file_id(id) for id in ids]
|
|
164
|
+
logger.info(f"raw docs: {raw_docs}")
|
|
143
165
|
|
|
166
|
+
# Rebuild the Document objs so we can see result of latest processing
|
|
144
167
|
docs = Document.sort_by_timestamp([document_cls(doc)(doc.file_path) for doc in raw_docs])
|
|
168
|
+
logger.info(f"Document types: {[doc._class_name for doc in docs]}")
|
|
145
169
|
except Exception as e:
|
|
170
|
+
console.print_exception()
|
|
146
171
|
exit_with_error(str(e))
|
|
147
172
|
|
|
148
173
|
for doc in docs:
|
|
149
174
|
console.print('\n', doc, '\n')
|
|
150
175
|
|
|
151
176
|
if args.raw:
|
|
152
|
-
console.print(Panel(Text("RAW: ").append(doc.summary()), expand=False, style=doc.
|
|
177
|
+
console.print(Panel(Text("RAW: ").append(doc.summary()), expand=False, style=doc.border_style))
|
|
153
178
|
console.print(escape(doc.raw_text()), '\n')
|
|
154
179
|
|
|
155
180
|
if isinstance(doc, Email):
|
|
156
|
-
console.print(Panel(Text("actual_text: ").append(doc.summary()), expand=False, style=doc.
|
|
157
|
-
console.print(escape(doc.
|
|
181
|
+
console.print(Panel(Text("actual_text: ").append(doc.summary()), expand=False, style=doc.border_style))
|
|
182
|
+
console.print(escape(doc._extract_actual_text()), '\n')
|
|
183
|
+
metadata = doc.metadata
|
|
184
|
+
metadata['is_fwded_article'] = doc.is_fwded_article
|
|
185
|
+
metadata['is_word_count_worthy'] = doc.is_word_count_worthy
|
|
186
|
+
metadata['_is_first_for_user'] = doc._is_first_for_user
|
|
187
|
+
print_json(f"{doc.file_id} Metadata", metadata)
|
|
188
|
+
|
|
189
|
+
console.print(doc.local_path_and_url, style='dim')
|
|
158
190
|
|
|
159
191
|
|
|
160
192
|
def epstein_word_count() -> None:
|
|
@@ -21,26 +21,30 @@ class Communication(Document):
|
|
|
21
21
|
config: CommunicationCfg | None = None
|
|
22
22
|
timestamp: datetime = FALLBACK_TIMESTAMP # TODO this default sucks (though it never happens)
|
|
23
23
|
|
|
24
|
+
@property
|
|
24
25
|
def author_or_unknown(self) -> str:
|
|
25
26
|
return self.author or UNKNOWN
|
|
26
27
|
|
|
28
|
+
@property
|
|
27
29
|
def author_style(self) -> str:
|
|
28
30
|
return get_style_for_name(self.author)
|
|
29
31
|
|
|
32
|
+
@property
|
|
30
33
|
def author_txt(self) -> Text:
|
|
31
34
|
return styled_name(self.author)
|
|
32
35
|
|
|
36
|
+
@property
|
|
37
|
+
def timestamp_without_seconds(self) -> str:
|
|
38
|
+
return TIMESTAMP_SECONDS_REGEX.sub('', str(self.timestamp))
|
|
39
|
+
|
|
33
40
|
def external_links_txt(self, _style: str = '', include_alt_links: bool = True) -> Text:
|
|
34
41
|
"""Overrides super() method to apply self.author_style."""
|
|
35
|
-
return super().external_links_txt(self.author_style
|
|
42
|
+
return super().external_links_txt(self.author_style, include_alt_links=include_alt_links)
|
|
36
43
|
|
|
37
44
|
def summary(self) -> Text:
|
|
38
45
|
return self._summary().append(CLOSE_PROPERTIES_CHAR)
|
|
39
46
|
|
|
40
|
-
def timestamp_without_seconds(self) -> str:
|
|
41
|
-
return TIMESTAMP_SECONDS_REGEX.sub('', str(self.timestamp))
|
|
42
|
-
|
|
43
47
|
def _summary(self) -> Text:
|
|
44
48
|
"""One line summary mostly for logging."""
|
|
45
49
|
txt = super().summary().append(', ')
|
|
46
|
-
return txt.append(key_value_txt('author', Text(f"'{self.author_or_unknown
|
|
50
|
+
return txt.append(key_value_txt('author', Text(f"'{self.author_or_unknown}'", style=self.author_style)))
|