epstein-files 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +75 -135
- epstein_files/documents/communication.py +9 -9
- epstein_files/documents/document.py +115 -87
- epstein_files/documents/email.py +154 -85
- epstein_files/documents/emails/email_header.py +7 -6
- epstein_files/documents/imessage/text_message.py +3 -2
- epstein_files/documents/json_file.py +17 -0
- epstein_files/documents/messenger_log.py +62 -3
- epstein_files/documents/other_file.py +165 -17
- epstein_files/epstein_files.py +128 -169
- epstein_files/util/constant/names.py +8 -1
- epstein_files/util/constant/output_files.py +29 -0
- epstein_files/util/constant/strings.py +27 -0
- epstein_files/util/constant/urls.py +25 -9
- epstein_files/util/constants.py +1018 -1045
- epstein_files/util/data.py +20 -55
- epstein_files/util/{file_cfg.py → doc_cfg.py} +121 -43
- epstein_files/util/env.py +19 -20
- epstein_files/util/file_helper.py +38 -21
- epstein_files/util/highlighted_group.py +229 -177
- epstein_files/util/logging.py +63 -0
- epstein_files/util/output.py +180 -0
- epstein_files/util/rich.py +29 -17
- epstein_files/util/search_result.py +14 -6
- epstein_files/util/timer.py +24 -0
- epstein_files/util/word_count.py +2 -1
- {epstein_files-1.0.0.dist-info → epstein_files-1.0.2.dist-info}/METADATA +20 -4
- epstein_files-1.0.2.dist-info/RECORD +33 -0
- epstein_files-1.0.2.dist-info/entry_points.txt +7 -0
- epstein_files-1.0.0.dist-info/RECORD +0 -28
- {epstein_files-1.0.0.dist-info → epstein_files-1.0.2.dist-info}/LICENSE +0 -0
- {epstein_files-1.0.0.dist-info → epstein_files-1.0.2.dist-info}/WHEEL +0 -0
epstein_files/__init__.py
CHANGED
|
@@ -10,185 +10,125 @@ from sys import exit
|
|
|
10
10
|
|
|
11
11
|
from dotenv import load_dotenv
|
|
12
12
|
load_dotenv()
|
|
13
|
+
|
|
14
|
+
from rich.markup import escape
|
|
13
15
|
from rich.padding import Padding
|
|
16
|
+
from rich.panel import Panel
|
|
14
17
|
|
|
18
|
+
from epstein_files.epstein_files import EpsteinFiles, document_cls
|
|
19
|
+
from epstein_files.documents.document import INFO_PADDING, Document
|
|
15
20
|
from epstein_files.documents.email import Email
|
|
16
|
-
from epstein_files.epstein_files import EpsteinFiles, count_by_month
|
|
17
21
|
from epstein_files.util.constant.html import *
|
|
18
22
|
from epstein_files.util.constant.names import *
|
|
19
|
-
from epstein_files.util.constant.
|
|
20
|
-
from epstein_files.util.
|
|
21
|
-
from epstein_files.util.
|
|
22
|
-
from epstein_files.util.
|
|
23
|
-
from epstein_files.util.
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
# Order matters (will be order of output)
|
|
28
|
-
PEOPLE_WHOSE_EMAILS_SHOULD_BE_PRINTED: list[str | None] = [
|
|
29
|
-
JEREMY_RUBIN,
|
|
30
|
-
AL_SECKEL,
|
|
31
|
-
JOI_ITO,
|
|
32
|
-
JABOR_Y,
|
|
33
|
-
STEVEN_SINOFSKY,
|
|
34
|
-
DANIEL_SIAD,
|
|
35
|
-
JEAN_LUC_BRUNEL,
|
|
36
|
-
STEVEN_HOFFENBERG,
|
|
37
|
-
EHUD_BARAK,
|
|
38
|
-
MARTIN_NOWAK,
|
|
39
|
-
MASHA_DROKOVA,
|
|
40
|
-
RENATA_BOLOTOVA,
|
|
41
|
-
STEVE_BANNON,
|
|
42
|
-
OLIVIER_COLOM,
|
|
43
|
-
BORIS_NIKOLIC,
|
|
44
|
-
PRINCE_ANDREW,
|
|
45
|
-
JIDE_ZEITLIN,
|
|
46
|
-
DAVID_STERN,
|
|
47
|
-
MOHAMED_WAHEED_HASSAN,
|
|
48
|
-
JENNIFER_JACQUET,
|
|
49
|
-
None,
|
|
50
|
-
]
|
|
51
|
-
|
|
52
|
-
# Order matters (will be order of output)
|
|
53
|
-
PEOPLE_WHOSE_EMAILS_SHOULD_BE_TABLES: list[str | None] = [
|
|
54
|
-
GHISLAINE_MAXWELL,
|
|
55
|
-
LEON_BLACK,
|
|
56
|
-
LANDON_THOMAS,
|
|
57
|
-
KATHRYN_RUEMMLER,
|
|
58
|
-
DARREN_INDYKE,
|
|
59
|
-
RICHARD_KAHN,
|
|
60
|
-
TYLER_SHEARS,
|
|
61
|
-
SULTAN_BIN_SULAYEM,
|
|
62
|
-
DEEPAK_CHOPRA,
|
|
63
|
-
ARIANE_DE_ROTHSCHILD,
|
|
64
|
-
TOM_PRITZKER,
|
|
65
|
-
]
|
|
23
|
+
from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_HTML_PATH, make_clean
|
|
24
|
+
from epstein_files.util.env import args, specified_names
|
|
25
|
+
from epstein_files.util.file_helper import coerce_file_path, extract_file_id
|
|
26
|
+
from epstein_files.util.logging import logger
|
|
27
|
+
from epstein_files.util.output import print_emails, print_json_metadata, print_json_stats, print_text_messages, write_urls
|
|
28
|
+
from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
|
|
29
|
+
from epstein_files.util.timer import Timer
|
|
66
30
|
|
|
67
31
|
|
|
68
32
|
def generate_html() -> None:
|
|
33
|
+
if args.make_clean:
|
|
34
|
+
make_clean()
|
|
35
|
+
exit()
|
|
36
|
+
|
|
69
37
|
timer = Timer()
|
|
70
38
|
epstein_files = EpsteinFiles.get_files(timer)
|
|
39
|
+
|
|
40
|
+
if args.json_metadata:
|
|
41
|
+
print_json_metadata(epstein_files)
|
|
42
|
+
exit()
|
|
43
|
+
|
|
71
44
|
print_header(epstein_files)
|
|
72
45
|
|
|
73
46
|
if args.colors_only:
|
|
74
47
|
exit()
|
|
75
48
|
|
|
76
|
-
# Text messages section
|
|
77
49
|
if args.output_texts:
|
|
78
50
|
print_text_messages(epstein_files)
|
|
79
|
-
timer.print_at_checkpoint(f'Printed {len(epstein_files.imessage_logs)
|
|
51
|
+
timer.print_at_checkpoint(f'Printed {len(epstein_files.imessage_logs)} text message logs')
|
|
80
52
|
|
|
81
|
-
# Emails section
|
|
82
53
|
if args.output_emails:
|
|
83
54
|
emails_printed = print_emails(epstein_files)
|
|
84
55
|
timer.print_at_checkpoint(f"Printed {emails_printed:,} emails")
|
|
85
56
|
|
|
86
57
|
if args.output_other_files:
|
|
87
|
-
epstein_files.print_other_files_table()
|
|
88
|
-
timer.print_at_checkpoint(f"Printed {len(
|
|
89
|
-
else:
|
|
90
|
-
logger.warning(f"Skipping other files section...")
|
|
58
|
+
files_printed = epstein_files.print_other_files_table()
|
|
59
|
+
timer.print_at_checkpoint(f"Printed {len(files_printed)} other files")
|
|
91
60
|
|
|
92
61
|
# Save output
|
|
93
|
-
write_html(
|
|
94
|
-
logger.warning(f"Total time: {timer.
|
|
62
|
+
write_html(ALL_EMAILS_PATH if args.all_emails else TEXT_MSGS_HTML_PATH)
|
|
63
|
+
logger.warning(f"Total time: {timer.seconds_since_start_str()}")
|
|
95
64
|
|
|
96
65
|
# JSON stats (mostly used for building pytest checks)
|
|
97
66
|
if args.json_stats:
|
|
98
|
-
console.line(5)
|
|
99
67
|
print_json_stats(epstein_files)
|
|
100
68
|
|
|
101
69
|
|
|
102
|
-
def
|
|
103
|
-
"""
|
|
104
|
-
|
|
105
|
-
print_other_site_link(is_header=False)
|
|
106
|
-
|
|
107
|
-
if len(specified_names) == 0:
|
|
108
|
-
epstein_files.print_emailer_counts_table()
|
|
109
|
-
|
|
110
|
-
emailers_to_print: list[str | None]
|
|
111
|
-
emailer_tables: list[str | None] = []
|
|
112
|
-
emails_that_were_printed: list[Email] = []
|
|
113
|
-
num_emails_printed_since_last_color_key = 0
|
|
70
|
+
def epstein_diff():
|
|
71
|
+
"""Diff the cleaned up text of two files."""
|
|
72
|
+
Document.diff_files(args.positional_args)
|
|
114
73
|
|
|
115
|
-
if args.all_emails:
|
|
116
|
-
console.print('Email conversations are sorted chronologically based on time of the first email.')
|
|
117
|
-
emailers_to_print = sorted(epstein_files.all_emailers(), key=lambda e: epstein_files.earliest_email_at(e))
|
|
118
|
-
print_numbered_list_of_emailers(emailers_to_print, epstein_files)
|
|
119
|
-
else:
|
|
120
|
-
if len(specified_names) > 0:
|
|
121
|
-
emailers_to_print = specified_names
|
|
122
|
-
else:
|
|
123
|
-
emailers_to_print = PEOPLE_WHOSE_EMAILS_SHOULD_BE_PRINTED
|
|
124
|
-
|
|
125
|
-
console.print('Email conversations grouped by counterparty can be found in the order listed below.')
|
|
126
|
-
print_numbered_list_of_emailers(emailers_to_print)
|
|
127
|
-
console.print("\nAfter that there's tables linking to (but not displaying) all known emails for each of these people:")
|
|
128
|
-
|
|
129
|
-
if len(specified_names) > 0:
|
|
130
|
-
if args.all_email_tables:
|
|
131
|
-
emailer_tables = sorted(epstein_files.all_emailers(), key=lambda e: epstein_files.earliest_email_at(e))
|
|
132
|
-
else:
|
|
133
|
-
emailer_tables = PEOPLE_WHOSE_EMAILS_SHOULD_BE_TABLES
|
|
134
|
-
|
|
135
|
-
print_numbered_list_of_emailers(emailer_tables)
|
|
136
74
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
75
|
+
def epstein_search():
|
|
76
|
+
"""Search the cleaned up text of the files."""
|
|
77
|
+
_assert_positional_args()
|
|
78
|
+
epstein_files = EpsteinFiles.get_files(use_pickled=True)
|
|
141
79
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
if len(emailer_tables) > 0 and len(specified_names) == 0:
|
|
148
|
-
print_author_header(f"Email Tables for {len(emailer_tables)} Other People", 'white')
|
|
80
|
+
for search_term in args.positional_args:
|
|
81
|
+
temp_highlighter = build_highlighter(search_term)
|
|
82
|
+
search_results = epstein_files.docs_matching(search_term, specified_names)
|
|
83
|
+
console.line(2)
|
|
84
|
+
print_panel(f"Found {len(search_results)} documents matching '{search_term}'", padding=(0, 0, 0, 3))
|
|
149
85
|
|
|
150
|
-
for
|
|
151
|
-
|
|
86
|
+
for search_result in search_results:
|
|
87
|
+
console.line()
|
|
152
88
|
|
|
153
|
-
|
|
154
|
-
|
|
89
|
+
if args.whole_file:
|
|
90
|
+
console.print(search_result.document)
|
|
91
|
+
else:
|
|
92
|
+
console.print(search_result.document.description_panel())
|
|
155
93
|
|
|
156
|
-
|
|
94
|
+
for matching_line in search_result.lines:
|
|
95
|
+
line_txt = matching_line.__rich__()
|
|
96
|
+
console.print(Padding(temp_highlighter(line_txt), INFO_PADDING), style='gray37')
|
|
157
97
|
|
|
158
|
-
if args.all_emails:
|
|
159
|
-
email_ids_that_were_printed = set([email.file_id for email in emails_that_were_printed])
|
|
160
|
-
logger.warning(f"Printed {len(emails_that_were_printed)} emails of {len(email_ids_that_were_printed)} unique file IDs.")
|
|
161
98
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
99
|
+
def epstein_show():
|
|
100
|
+
"""Show the color highlighted file. If --raw arg is passed, show the raw text of the file as well."""
|
|
101
|
+
_assert_positional_args()
|
|
102
|
+
ids = [extract_file_id(arg) for arg in args.positional_args]
|
|
103
|
+
console.line()
|
|
165
104
|
|
|
166
|
-
|
|
105
|
+
if args.pickled:
|
|
106
|
+
epstein_files = EpsteinFiles.get_files(use_pickled=True)
|
|
107
|
+
docs = epstein_files.get_documents_by_id(ids)
|
|
108
|
+
else:
|
|
109
|
+
raw_docs = [Document(coerce_file_path(id)) for id in ids]
|
|
110
|
+
docs = [document_cls(doc)(doc.file_path) for doc in raw_docs]
|
|
167
111
|
|
|
112
|
+
for doc in docs:
|
|
113
|
+
console.line()
|
|
114
|
+
console.print(doc)
|
|
168
115
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
116
|
+
if args.raw:
|
|
117
|
+
console.line()
|
|
118
|
+
console.print(Panel(f"*** {doc.url_slug} RAW ***", expand=False, style=doc._border_style()))
|
|
119
|
+
console.print(escape(doc.raw_text()))
|
|
172
120
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
121
|
+
if isinstance(doc, Email):
|
|
122
|
+
console.line()
|
|
123
|
+
console.print(Panel(f"*** {doc.url_slug} actual_text ***", expand=False, style=doc._border_style()))
|
|
124
|
+
console.print(escape(doc._actual_text()))
|
|
177
125
|
|
|
178
|
-
for log_file in log_files:
|
|
179
|
-
console.print(Padding(log_file))
|
|
180
|
-
console.line(2)
|
|
181
126
|
|
|
182
|
-
|
|
127
|
+
def epstein_dump_urls() -> None:
|
|
128
|
+
write_urls()
|
|
183
129
|
|
|
184
130
|
|
|
185
|
-
def
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
print_json(f"{EMAIL_CLASS} Recipient Counts", epstein_files.email_recipient_counts, skip_falsey=True)
|
|
190
|
-
print_json("Email signature_substitution_countss", epstein_files.email_signature_substitution_counts(), skip_falsey=True)
|
|
191
|
-
print_json("email_author_device_signatures", dict_sets_to_lists(epstein_files.email_authors_to_device_signatures))
|
|
192
|
-
print_json("email_sent_from_devices", dict_sets_to_lists(epstein_files.email_device_signatures_to_authors))
|
|
193
|
-
print_json("email_unknown_recipient_file_ids", epstein_files.email_unknown_recipient_file_ids())
|
|
194
|
-
print_json("count_by_month", count_by_month(epstein_files.all_documents()))
|
|
131
|
+
def _assert_positional_args():
|
|
132
|
+
if not args.positional_args:
|
|
133
|
+
console.print(f"\n ERROR: No positional args!\n", style='red1')
|
|
134
|
+
exit(1)
|
|
@@ -8,7 +8,7 @@ from rich.text import Text
|
|
|
8
8
|
from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, Document
|
|
9
9
|
from epstein_files.util.constant.names import UNKNOWN
|
|
10
10
|
from epstein_files.util.constants import FALLBACK_TIMESTAMP
|
|
11
|
-
from epstein_files.util.
|
|
11
|
+
from epstein_files.util.doc_cfg import CommunicationCfg
|
|
12
12
|
from epstein_files.util.highlighted_group import get_style_for_name
|
|
13
13
|
from epstein_files.util.rich import key_value_txt
|
|
14
14
|
|
|
@@ -20,7 +20,7 @@ class Communication(Document):
|
|
|
20
20
|
"""Superclass for Email and MessengerLog."""
|
|
21
21
|
author_style: str = 'white'
|
|
22
22
|
author_txt: Text = field(init=False)
|
|
23
|
-
config:
|
|
23
|
+
config: CommunicationCfg | None = None
|
|
24
24
|
timestamp: datetime = FALLBACK_TIMESTAMP # TODO this default sucks (though it never happens)
|
|
25
25
|
|
|
26
26
|
def __post_init__(self):
|
|
@@ -31,22 +31,22 @@ class Communication(Document):
|
|
|
31
31
|
def author_or_unknown(self) -> str:
|
|
32
32
|
return self.author or UNKNOWN
|
|
33
33
|
|
|
34
|
-
def
|
|
35
|
-
return self.
|
|
36
|
-
|
|
37
|
-
def is_attribution_uncertain(self) -> bool | None:
|
|
38
|
-
return self.config and self.config.is_attribution_uncertain
|
|
34
|
+
def is_attribution_uncertain(self) -> bool:
|
|
35
|
+
return bool(self.config and self.config.is_attribution_uncertain)
|
|
39
36
|
|
|
40
37
|
def raw_document_link_txt(self, _style: str = '', include_alt_link: bool = True) -> Text:
|
|
41
38
|
"""Overrides super() method to apply self.author_style."""
|
|
42
39
|
return super().raw_document_link_txt(self.author_style, include_alt_link=include_alt_link)
|
|
43
40
|
|
|
41
|
+
def summary(self) -> Text:
|
|
42
|
+
return self._summary().append(CLOSE_PROPERTIES_CHAR)
|
|
43
|
+
|
|
44
44
|
def timestamp_without_seconds(self) -> str:
|
|
45
45
|
return TIMESTAMP_SECONDS_REGEX.sub('', str(self.timestamp))
|
|
46
46
|
|
|
47
|
-
def
|
|
47
|
+
def _summary(self) -> Text:
|
|
48
48
|
"""One line summary mostly for logging."""
|
|
49
|
-
txt = super().
|
|
49
|
+
txt = super().summary().append(', ')
|
|
50
50
|
return txt.append(key_value_txt('author', Text(f"'{self.author_or_unknown()}'", style=self.author_style)))
|
|
51
51
|
|
|
52
52
|
|