epstein-files 1.0.13__py3-none-any.whl → 1.0.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +11 -6
- epstein_files/documents/communication.py +2 -2
- epstein_files/documents/document.py +52 -46
- epstein_files/documents/email.py +32 -29
- epstein_files/documents/imessage/text_message.py +4 -4
- epstein_files/documents/json_file.py +9 -3
- epstein_files/documents/messenger_log.py +20 -17
- epstein_files/documents/other_file.py +50 -71
- epstein_files/epstein_files.py +89 -67
- epstein_files/util/constant/names.py +1 -1
- epstein_files/util/constant/strings.py +1 -1
- epstein_files/util/constants.py +62 -44
- epstein_files/util/data.py +2 -0
- epstein_files/util/doc_cfg.py +7 -7
- epstein_files/util/env.py +2 -5
- epstein_files/util/highlighted_group.py +7 -15
- epstein_files/util/output.py +15 -30
- epstein_files/util/rich.py +29 -29
- epstein_files/util/word_count.py +1 -1
- {epstein_files-1.0.13.dist-info → epstein_files-1.0.14.dist-info}/METADATA +10 -3
- epstein_files-1.0.14.dist-info/RECORD +33 -0
- epstein_files-1.0.13.dist-info/RECORD +0 -33
- {epstein_files-1.0.13.dist-info → epstein_files-1.0.14.dist-info}/LICENSE +0 -0
- {epstein_files-1.0.13.dist-info → epstein_files-1.0.14.dist-info}/WHEEL +0 -0
- {epstein_files-1.0.13.dist-info → epstein_files-1.0.14.dist-info}/entry_points.txt +0 -0
|
@@ -18,7 +18,7 @@ from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, WHITESPACE_R
|
|
|
18
18
|
from epstein_files.util.constant.strings import *
|
|
19
19
|
from epstein_files.util.constants import *
|
|
20
20
|
from epstein_files.util.doc_cfg import FINANCIAL_REPORTS_AUTHORS, DocCfg, Metadata
|
|
21
|
-
from epstein_files.util.data import escape_single_quotes, remove_timezone, sort_dict, uniquify
|
|
21
|
+
from epstein_files.util.data import days_between, escape_single_quotes, remove_timezone, sort_dict, uniquify
|
|
22
22
|
from epstein_files.util.file_helper import FILENAME_LENGTH, file_size_to_str
|
|
23
23
|
from epstein_files.util.env import args
|
|
24
24
|
from epstein_files.util.highlighted_group import styled_category
|
|
@@ -36,94 +36,62 @@ TIMESTAMP_LOG_INDENT = f'{LOG_INDENT} '
|
|
|
36
36
|
VAST_HOUSE = 'vast house' # Michael Wolff article draft about Epstein indicator
|
|
37
37
|
VI_DAILY_NEWS_REGEX = re.compile(r'virgin\s*is[kl][ai]nds\s*daily\s*news', re.IGNORECASE)
|
|
38
38
|
|
|
39
|
-
|
|
39
|
+
SKIP_TIMESTAMP_EXTRACT = [
|
|
40
|
+
PALM_BEACH_TSV,
|
|
41
|
+
PALM_BEACH_PROPERTY_INFO,
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
UNINTERESTING_CATEGORIES = [
|
|
45
|
+
ACADEMIA,
|
|
46
|
+
ARTICLE,
|
|
40
47
|
ARTS,
|
|
41
48
|
BOOK,
|
|
49
|
+
CONFERENCE,
|
|
42
50
|
JUNK,
|
|
51
|
+
POLITICS,
|
|
43
52
|
SKYPE_LOG,
|
|
44
|
-
SPEECH,
|
|
45
53
|
]
|
|
46
54
|
|
|
47
55
|
# OtherFiles whose descriptions/info match these prefixes are not displayed unless --all-other-files is used
|
|
48
|
-
UNINTERESTING_PREFIXES =
|
|
56
|
+
UNINTERESTING_PREFIXES = [
|
|
49
57
|
'article about',
|
|
50
|
-
ARTICLE_DRAFT,
|
|
51
|
-
'Aviation International',
|
|
52
|
-
BBC,
|
|
53
|
-
BLOOMBERG,
|
|
54
|
-
'Boston Globe',
|
|
55
58
|
BROCKMAN_INC,
|
|
56
|
-
CHINA_DAILY,
|
|
57
|
-
CNN,
|
|
58
|
-
'completely redacted',
|
|
59
59
|
CVRA,
|
|
60
|
-
DAILY_MAIL,
|
|
61
|
-
DAILY_TELEGRAPH,
|
|
62
|
-
CVRA_LEXIS_SEARCH[0:-12], # Because date at end :(
|
|
63
60
|
DERSH_GIUFFRE_TWEET,
|
|
64
|
-
'Financial Times',
|
|
65
|
-
'Forbes',
|
|
66
|
-
'Frontlines',
|
|
67
|
-
'Future Science',
|
|
68
|
-
'Globe and Mail',
|
|
69
61
|
GORDON_GETTY,
|
|
70
62
|
f"{HARVARD} Econ",
|
|
71
63
|
HARVARD_POETRY,
|
|
72
|
-
'Inference',
|
|
73
64
|
JASTA,
|
|
74
|
-
|
|
75
|
-
JOHN_BOLTON_PRESS_CLIPPING,
|
|
76
|
-
'Journal of Criminal',
|
|
77
|
-
LA_TIMES,
|
|
78
|
-
'Litigation Daily',
|
|
79
|
-
LAWRENCE_KRAUSS,
|
|
80
|
-
LAWRENCE_KRAUSS_ASU_ORIGINS,
|
|
81
|
-
'MarketWatch',
|
|
82
|
-
MARTIN_NOWAK,
|
|
83
|
-
'Morning News',
|
|
65
|
+
LEXIS_NEXIS,
|
|
84
66
|
NOBEL_CHARITABLE_TRUST,
|
|
85
|
-
'Nautilus',
|
|
86
|
-
'New Yorker',
|
|
87
|
-
NYT,
|
|
88
67
|
PALM_BEACH_CODE_ENFORCEMENT,
|
|
89
|
-
PALM_BEACH_DAILY_NEWS,
|
|
90
|
-
PALM_BEACH_POST,
|
|
91
68
|
PALM_BEACH_TSV,
|
|
92
69
|
PALM_BEACH_WATER_COMMITTEE,
|
|
93
|
-
PAUL_KRASSNER,
|
|
94
|
-
PEGGY_SIEGAL,
|
|
95
|
-
'Politifact',
|
|
96
|
-
'Rafanelli',
|
|
97
|
-
ROBERT_LAWRENCE_KUHN,
|
|
98
|
-
ROBERT_TRIVERS,
|
|
99
|
-
'SCMP',
|
|
100
|
-
'SciencExpress',
|
|
101
|
-
'Scowcroft',
|
|
102
|
-
SHIMON_POST_ARTICLE,
|
|
103
|
-
SINGLE_PAGE,
|
|
104
|
-
STACEY_PLASKETT,
|
|
105
|
-
'Tatler',
|
|
106
|
-
TERJE_ROD_LARSEN,
|
|
107
|
-
TEXT_OF_US_LAW,
|
|
108
|
-
TRANSLATION,
|
|
109
70
|
TWEET,
|
|
110
|
-
REAL_DEAL_ARTICLE,
|
|
111
|
-
TRUMP_DISCLOSURES,
|
|
112
|
-
UBS_CIO_REPORT,
|
|
113
71
|
UN_GENERAL_ASSEMBLY,
|
|
114
|
-
'U.S. News',
|
|
115
72
|
'US Office',
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
INTERESTING_AUTHORS = [
|
|
76
|
+
EDWARD_JAY_EPSTEIN,
|
|
77
|
+
EHUD_BARAK,
|
|
78
|
+
JOI_ITO,
|
|
79
|
+
NOAM_CHOMSKY,
|
|
80
|
+
MICHAEL_WOLFF,
|
|
81
|
+
SVETLANA_POZHIDAEVA,
|
|
119
82
|
]
|
|
120
83
|
|
|
121
84
|
|
|
122
85
|
@dataclass
|
|
123
86
|
class OtherFile(Document):
|
|
124
|
-
"""
|
|
87
|
+
"""
|
|
88
|
+
File that is not an email, an iMessage log, or JSON data.
|
|
125
89
|
|
|
126
|
-
|
|
90
|
+
Attributes:
|
|
91
|
+
was_timestamp_extracted (bool): True if the timestamp was programmatically extracted (and could be wrong)
|
|
92
|
+
"""
|
|
93
|
+
was_timestamp_extracted: bool = False
|
|
94
|
+
include_description_in_summary_panel: ClassVar[bool] = True # Class var for logging output
|
|
127
95
|
|
|
128
96
|
def __post_init__(self):
|
|
129
97
|
super().__post_init__()
|
|
@@ -162,11 +130,13 @@ class OtherFile(Document):
|
|
|
162
130
|
elif len(info_sentences) == 0:
|
|
163
131
|
return True
|
|
164
132
|
elif self.config:
|
|
165
|
-
if self.config.is_interesting:
|
|
133
|
+
if self.config.is_interesting is not None:
|
|
134
|
+
return self.config.is_interesting
|
|
135
|
+
elif self.config.author in INTERESTING_AUTHORS:
|
|
166
136
|
return True
|
|
167
137
|
elif self.category() == FINANCE and self.author is not None:
|
|
168
138
|
return False
|
|
169
|
-
elif self.category() in
|
|
139
|
+
elif self.category() in UNINTERESTING_CATEGORIES:
|
|
170
140
|
return False
|
|
171
141
|
|
|
172
142
|
for prefix in UNINTERESTING_PREFIXES:
|
|
@@ -178,6 +148,10 @@ class OtherFile(Document):
|
|
|
178
148
|
def metadata(self) -> Metadata:
|
|
179
149
|
metadata = super().metadata()
|
|
180
150
|
metadata['is_interesting'] = self.is_interesting()
|
|
151
|
+
|
|
152
|
+
if self.was_timestamp_extracted:
|
|
153
|
+
metadata['was_timestamp_extracted'] = self.was_timestamp_extracted
|
|
154
|
+
|
|
181
155
|
return metadata
|
|
182
156
|
|
|
183
157
|
def preview_text(self) -> str:
|
|
@@ -191,6 +165,8 @@ class OtherFile(Document):
|
|
|
191
165
|
"""Return configured timestamp or value extracted by scanning text with datefinder."""
|
|
192
166
|
if self.config and self.config.timestamp:
|
|
193
167
|
return self.config.timestamp
|
|
168
|
+
elif self.config and any([s in (self.config_description() or '') for s in SKIP_TIMESTAMP_EXTRACT]):
|
|
169
|
+
return None
|
|
194
170
|
|
|
195
171
|
timestamps: list[datetime] = []
|
|
196
172
|
|
|
@@ -214,7 +190,10 @@ class OtherFile(Document):
|
|
|
214
190
|
self.log_top_lines(15, msg=f"No timestamps found")
|
|
215
191
|
|
|
216
192
|
return None
|
|
217
|
-
|
|
193
|
+
|
|
194
|
+
self.was_timestamp_extracted = True
|
|
195
|
+
|
|
196
|
+
if len(timestamps) == 1:
|
|
218
197
|
return timestamps[0]
|
|
219
198
|
else:
|
|
220
199
|
timestamps = sorted(uniquify(timestamps), reverse=True)
|
|
@@ -222,7 +201,7 @@ class OtherFile(Document):
|
|
|
222
201
|
return timestamps[0] # Most recent timestamp appearing in text is usually the closest
|
|
223
202
|
|
|
224
203
|
def _log_extracted_timestamps_info(self, timestamps: list[datetime]) -> None:
|
|
225
|
-
num_days_spanned = (timestamps[
|
|
204
|
+
num_days_spanned = days_between(timestamps[-1], timestamps[0])
|
|
226
205
|
timestamps_log_msg = f"Extracted {len(timestamps)} timestamps spanning {num_days_spanned} days{TIMESTAMP_LOG_INDENT}"
|
|
227
206
|
timestamps_log_msg += TIMESTAMP_LOG_INDENT.join([str(dt) for dt in timestamps])
|
|
228
207
|
|
|
@@ -230,9 +209,9 @@ class OtherFile(Document):
|
|
|
230
209
|
self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
|
|
231
210
|
|
|
232
211
|
@staticmethod
|
|
233
|
-
def
|
|
212
|
+
def files_preview_table(files: Sequence['OtherFile']) -> Table:
|
|
234
213
|
"""Build a table of OtherFile documents."""
|
|
235
|
-
table = build_table(
|
|
214
|
+
table = build_table('Other Files Details', show_lines=True)
|
|
236
215
|
table.add_column('File', justify='center', width=FILENAME_LENGTH)
|
|
237
216
|
table.add_column('Date', justify='center')
|
|
238
217
|
table.add_column('Size', justify='center')
|
|
@@ -240,7 +219,7 @@ class OtherFile(Document):
|
|
|
240
219
|
table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
|
|
241
220
|
|
|
242
221
|
for file in files:
|
|
243
|
-
link_and_info = [file.
|
|
222
|
+
link_and_info = [file.external_links_txt()]
|
|
244
223
|
date_str = file.date_str()
|
|
245
224
|
|
|
246
225
|
if file.is_duplicate():
|
|
@@ -272,10 +251,10 @@ class OtherFile(Document):
|
|
|
272
251
|
logger.warning(f"file {file.file_id} has no category")
|
|
273
252
|
|
|
274
253
|
counts[file.category()] += 1
|
|
275
|
-
category_bytes[file.category()] += file.
|
|
254
|
+
category_bytes[file.category()] += file.file_size()
|
|
276
255
|
|
|
277
|
-
table = build_table('Other Files Summary')
|
|
278
|
-
|
|
256
|
+
table = build_table('Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
|
|
257
|
+
table.columns[0].min_width = 14
|
|
279
258
|
table.columns[-1].style = 'dim'
|
|
280
259
|
|
|
281
260
|
for (category, count) in sort_dict(counts):
|
epstein_files/epstein_files.py
CHANGED
|
@@ -23,14 +23,14 @@ from epstein_files.util.constant.strings import *
|
|
|
23
23
|
from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL, epstein_media_person_url,
|
|
24
24
|
epsteinify_name_url, epstein_web_person_url, search_jmail_url, search_twitter_url)
|
|
25
25
|
from epstein_files.util.constants import *
|
|
26
|
-
from epstein_files.util.data import
|
|
26
|
+
from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify, sort_dict
|
|
27
27
|
from epstein_files.util.doc_cfg import EmailCfg, Metadata
|
|
28
|
-
from epstein_files.util.env import DOCS_DIR, args, logger
|
|
28
|
+
from epstein_files.util.env import DOCS_DIR, args, logger, specified_names
|
|
29
29
|
from epstein_files.util.file_helper import file_size_str
|
|
30
|
-
from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
|
|
30
|
+
from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames, get_info_for_name, get_style_for_name
|
|
31
31
|
from epstein_files.util.rich import (DEFAULT_NAME_STYLE, LAST_TIMESTAMP_STYLE, NA_TXT, add_cols_to_table,
|
|
32
|
-
build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
|
|
33
|
-
|
|
32
|
+
print_all_files_page_link, build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
|
|
33
|
+
print_panel, print_section_header, vertically_pad)
|
|
34
34
|
from epstein_files.util.search_result import SearchResult
|
|
35
35
|
from epstein_files.util.timer import Timer
|
|
36
36
|
|
|
@@ -72,18 +72,18 @@ class EpsteinFiles:
|
|
|
72
72
|
|
|
73
73
|
# Read through and classify all the files
|
|
74
74
|
for file_arg in self.all_files:
|
|
75
|
-
doc_timer = Timer(decimals=
|
|
75
|
+
doc_timer = Timer(decimals=2)
|
|
76
76
|
document = Document(file_arg)
|
|
77
77
|
cls = document_cls(document)
|
|
78
78
|
|
|
79
|
-
if document.length == 0:
|
|
79
|
+
if document.length() == 0:
|
|
80
80
|
logger.warning(f"Skipping empty file: {document}]")
|
|
81
81
|
continue
|
|
82
82
|
elif args.skip_other_files and cls == OtherFile and file_type_count[cls.__name__] > 1:
|
|
83
|
-
|
|
83
|
+
document.log(f"Skipping OtherFile...")
|
|
84
84
|
continue
|
|
85
85
|
|
|
86
|
-
documents.append(cls(file_arg, text=document.text))
|
|
86
|
+
documents.append(cls(file_arg, lines=document.lines, text=document.text))
|
|
87
87
|
logger.info(str(documents[-1]))
|
|
88
88
|
file_type_count[cls.__name__] += 1
|
|
89
89
|
|
|
@@ -104,16 +104,20 @@ class EpsteinFiles:
|
|
|
104
104
|
if PICKLED_PATH.exists() and not args.overwrite_pickle:
|
|
105
105
|
with gzip.open(PICKLED_PATH, 'rb') as file:
|
|
106
106
|
epstein_files = pickle.load(file)
|
|
107
|
-
timer.print_at_checkpoint(f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})")
|
|
108
107
|
epstein_files.timer = timer
|
|
108
|
+
timer_msg = f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}'"
|
|
109
|
+
epstein_files.timer.print_at_checkpoint(f"{timer_msg} ({file_size_str(PICKLED_PATH)})")
|
|
109
110
|
return epstein_files
|
|
110
111
|
|
|
111
112
|
logger.warning(f"Building new cache file, this will take a few minutes...")
|
|
112
113
|
epstein_files = EpsteinFiles(timer=timer)
|
|
113
114
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
115
|
+
if args.skip_other_files:
|
|
116
|
+
logger.warning(f"Not writing pickled data because --skip-other-files")
|
|
117
|
+
else:
|
|
118
|
+
with gzip.open(PICKLED_PATH, 'wb') as file:
|
|
119
|
+
pickle.dump(epstein_files, file)
|
|
120
|
+
logger.warning(f"Pickled data to '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})...")
|
|
117
121
|
|
|
118
122
|
timer.print_at_checkpoint(f'Processed {len(epstein_files.all_files):,} documents')
|
|
119
123
|
return epstein_files
|
|
@@ -127,9 +131,6 @@ class EpsteinFiles:
|
|
|
127
131
|
names = names if include_useless else [e for e in names if e is None or e.lower() not in EXCLUDED_EMAILERS]
|
|
128
132
|
return sorted(list(set(names)), key=lambda e: self.email_author_counts[e] + self.email_recipient_counts[e])
|
|
129
133
|
|
|
130
|
-
def attributed_email_count(self) -> int:
|
|
131
|
-
return sum([i for author, i in self.email_author_counts.items() if author != UNKNOWN])
|
|
132
|
-
|
|
133
134
|
def docs_matching(
|
|
134
135
|
self,
|
|
135
136
|
pattern: re.Pattern | str,
|
|
@@ -156,7 +157,7 @@ class EpsteinFiles:
|
|
|
156
157
|
return self.emails_for(author)[-1].timestamp
|
|
157
158
|
|
|
158
159
|
def email_conversation_length_in_days(self, author: str | None) -> int:
|
|
159
|
-
return (self.
|
|
160
|
+
return days_between(self.earliest_email_at(author), self.last_email_at(author))
|
|
160
161
|
|
|
161
162
|
def email_signature_substitution_counts(self) -> dict[str, int]:
|
|
162
163
|
"""Return the number of times an email signature was replaced with "<...snipped...>" for each author."""
|
|
@@ -172,7 +173,7 @@ class EpsteinFiles:
|
|
|
172
173
|
return sorted(list(self.unknown_recipient_email_ids))
|
|
173
174
|
|
|
174
175
|
def emails_by(self, author: str | None) -> list[Email]:
|
|
175
|
-
return [e for e in self.emails if e.author == author]
|
|
176
|
+
return Document.sort_by_timestamp([e for e in self.emails if e.author == author])
|
|
176
177
|
|
|
177
178
|
def emails_for(self, author: str | None) -> list[Email]:
|
|
178
179
|
"""Returns emails to or from a given 'author' sorted chronologically."""
|
|
@@ -185,9 +186,11 @@ class EpsteinFiles:
|
|
|
185
186
|
|
|
186
187
|
def emails_to(self, author: str | None) -> list[Email]:
|
|
187
188
|
if author is None:
|
|
188
|
-
|
|
189
|
+
emails = [e for e in self.emails if len(e.recipients) == 0 or None in e.recipients]
|
|
189
190
|
else:
|
|
190
|
-
|
|
191
|
+
emails = [e for e in self.emails if author in e.recipients]
|
|
192
|
+
|
|
193
|
+
return Document.sort_by_timestamp(emails)
|
|
191
194
|
|
|
192
195
|
def get_documents_by_id(self, file_ids: str | list[str]) -> list[Document]:
|
|
193
196
|
file_ids = listify(file_ids)
|
|
@@ -204,14 +207,26 @@ class EpsteinFiles:
|
|
|
204
207
|
def json_metadata(self) -> str:
|
|
205
208
|
"""Create a JSON string containing metadata for all the files."""
|
|
206
209
|
metadata = {
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
210
|
+
'files': {
|
|
211
|
+
Email.__name__: _sorted_metadata(self.emails),
|
|
212
|
+
JsonFile.__name__: _sorted_metadata(self.json_files),
|
|
213
|
+
MessengerLog.__name__: _sorted_metadata(self.imessage_logs),
|
|
214
|
+
OtherFile.__name__: _sorted_metadata(self.non_json_other_files()),
|
|
215
|
+
},
|
|
216
|
+
'people': {
|
|
217
|
+
name: highlighted_group.get_info(name)
|
|
218
|
+
for highlighted_group in HIGHLIGHTED_NAMES
|
|
219
|
+
if isinstance(highlighted_group, HighlightedNames)
|
|
220
|
+
for name, description in highlighted_group.emailers.items()
|
|
221
|
+
if description
|
|
222
|
+
}
|
|
211
223
|
}
|
|
212
224
|
|
|
213
225
|
return json.dumps(metadata, indent=4, sort_keys=True)
|
|
214
226
|
|
|
227
|
+
def non_duplicate_emails(self) -> list[Email]:
|
|
228
|
+
return [email for email in self.emails if not email.is_duplicate()]
|
|
229
|
+
|
|
215
230
|
def non_json_other_files(self) -> list[OtherFile]:
|
|
216
231
|
return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
|
|
217
232
|
|
|
@@ -230,8 +245,8 @@ class EpsteinFiles:
|
|
|
230
245
|
f"{len([d for d in docs if d.is_duplicate()])}",
|
|
231
246
|
)
|
|
232
247
|
|
|
233
|
-
add_row('iMessage Logs', self.imessage_logs)
|
|
234
248
|
add_row('Emails', self.emails)
|
|
249
|
+
add_row('iMessage Logs', self.imessage_logs)
|
|
235
250
|
add_row('JSON Data', self.json_files)
|
|
236
251
|
add_row('Other', self.non_json_other_files())
|
|
237
252
|
console.print(Align.center(table))
|
|
@@ -271,12 +286,51 @@ class EpsteinFiles:
|
|
|
271
286
|
console.print(Align.center(Email.build_table(emails, author)), '\n')
|
|
272
287
|
|
|
273
288
|
def print_email_device_info(self) -> None:
|
|
274
|
-
print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(
|
|
289
|
+
print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(2, 0, 0, 0), centered=True)
|
|
275
290
|
console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
|
|
276
291
|
console.print(_build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
|
|
277
292
|
|
|
278
|
-
def
|
|
279
|
-
|
|
293
|
+
def print_other_files_section(self, files: list[OtherFile]) -> None:
|
|
294
|
+
"""Returns the OtherFile objects that were interesting enough to print."""
|
|
295
|
+
category_table = OtherFile.count_by_category_table(files)
|
|
296
|
+
other_files_preview_table = OtherFile.files_preview_table(files)
|
|
297
|
+
header_pfx = '' if args.all_other_files else 'Selected '
|
|
298
|
+
print_section_header(f"{FIRST_FEW_LINES} of {len(files)} {header_pfx}Files That Are Neither Emails Nor Text Messages")
|
|
299
|
+
|
|
300
|
+
if args.all_other_files:
|
|
301
|
+
console.line(1)
|
|
302
|
+
else:
|
|
303
|
+
print_all_files_page_link(self)
|
|
304
|
+
console.line(2)
|
|
305
|
+
|
|
306
|
+
for table in [category_table, other_files_preview_table]:
|
|
307
|
+
table.title = f"{header_pfx}{table.title}"
|
|
308
|
+
|
|
309
|
+
print_centered(category_table)
|
|
310
|
+
console.line(2)
|
|
311
|
+
console.print(other_files_preview_table)
|
|
312
|
+
|
|
313
|
+
def print_text_messages_section(self) -> None:
|
|
314
|
+
"""Print summary table and stats for text messages."""
|
|
315
|
+
print_section_header('All of His Text Messages')
|
|
316
|
+
print_centered("(conversations are sorted chronologically based on timestamp of first message)\n", style='gray30')
|
|
317
|
+
authors: list[str | None] = specified_names if specified_names else [JEFFREY_EPSTEIN]
|
|
318
|
+
log_files = self.imessage_logs_for(authors)
|
|
319
|
+
|
|
320
|
+
for log_file in log_files:
|
|
321
|
+
console.print(Padding(log_file))
|
|
322
|
+
console.line(2)
|
|
323
|
+
|
|
324
|
+
print_centered(MessengerLog.summary_table(self.imessage_logs))
|
|
325
|
+
text_summary_msg = f"\nDeanonymized {Document.known_author_count(self.imessage_logs)} of "
|
|
326
|
+
text_summary_msg += f"{len(self.imessage_logs)} {TEXT_MESSAGE} logs found in {len(self.all_files):,} files."
|
|
327
|
+
console.print(text_summary_msg)
|
|
328
|
+
imessage_msg_count = sum([len(log.messages) for log in self.imessage_logs])
|
|
329
|
+
console.print(f"Found {imessage_msg_count} text messages in {len(self.imessage_logs)} iMessage log files.")
|
|
330
|
+
|
|
331
|
+
def table_of_emailers(self) -> Table:
|
|
332
|
+
attributed_emails = [e for e in self.non_duplicate_emails() if e.author]
|
|
333
|
+
footer = f"Identified authors of {len(attributed_emails):,} out of {len(self.non_duplicate_emails()):,} emails."
|
|
280
334
|
counts_table = build_table("Email Counts", caption=footer)
|
|
281
335
|
|
|
282
336
|
add_cols_to_table(counts_table, [
|
|
@@ -308,49 +362,17 @@ class EpsteinFiles:
|
|
|
308
362
|
str(self.email_recipient_counts[name]),
|
|
309
363
|
emails[0].timestamp_without_seconds(),
|
|
310
364
|
emails[-1].timestamp_without_seconds(),
|
|
311
|
-
|
|
312
|
-
'' if
|
|
313
|
-
'' if
|
|
314
|
-
|
|
365
|
+
link_text_obj(search_jmail_url(name), JMAIL) if name else '',
|
|
366
|
+
link_text_obj(epstein_media_person_url(name), 'eMedia') if is_ok_for_epstein_web(name) else '',
|
|
367
|
+
link_text_obj(epstein_web_person_url(name), 'eWeb') if is_ok_for_epstein_web(name) else '',
|
|
368
|
+
link_text_obj(search_twitter_url(name), 'search X') if name else '',
|
|
315
369
|
)
|
|
316
370
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
def print_imessage_summary(self) -> None:
|
|
320
|
-
"""Print summary table and stats for text messages."""
|
|
321
|
-
console.print(MessengerLog.summary_table(self.imessage_logs))
|
|
322
|
-
text_summary_msg = f"\nDeanonymized {Document.known_author_count(self.imessage_logs)} of "
|
|
323
|
-
text_summary_msg += f"{len(self.imessage_logs)} {TEXT_MESSAGE} logs found in {len(self.all_files):,} files."
|
|
324
|
-
console.print(text_summary_msg)
|
|
325
|
-
imessage_msg_count = sum([len(log.messages) for log in self.imessage_logs])
|
|
326
|
-
console.print(f"Found {imessage_msg_count} text messages in {len(self.imessage_logs)} iMessage log files.")
|
|
327
|
-
|
|
328
|
-
def print_other_files_table(self) -> list[OtherFile]:
|
|
329
|
-
"""Returns the OtherFile objects that were interesting enough to print."""
|
|
330
|
-
interesting_files = [doc for doc in self.other_files if args.all_other_files or doc.is_interesting()]
|
|
331
|
-
header_pfx = '' if args.all_other_files else 'Selected '
|
|
332
|
-
print_section_header(f"{FIRST_FEW_LINES} of {len(interesting_files)} {header_pfx}Files That Are Neither Emails Nor Text Msgs")
|
|
333
|
-
|
|
334
|
-
if not args.all_other_files:
|
|
335
|
-
print_centered(f"(the other site is uncurated and has all {len(self.other_files)} unclassifiable files and {len(self.emails):,} emails)", style='dim')
|
|
336
|
-
print_other_site_link(False)
|
|
337
|
-
console.line(2)
|
|
338
|
-
|
|
339
|
-
console.print(OtherFile.build_table(interesting_files))
|
|
340
|
-
console.print(Padding(OtherFile.count_by_category_table(interesting_files), (2, 0, 2, 2)))
|
|
341
|
-
skipped_file_count = len(self.other_files) - len(interesting_files)
|
|
342
|
-
|
|
343
|
-
if skipped_file_count > 0:
|
|
344
|
-
logger.warning(f"Skipped {skipped_file_count} uninteresting other files...")
|
|
345
|
-
|
|
346
|
-
return interesting_files
|
|
371
|
+
return counts_table
|
|
347
372
|
|
|
348
373
|
def _tally_email_data(self) -> None:
|
|
349
374
|
"""Tally up summary info about Email objects."""
|
|
350
|
-
for email in self.
|
|
351
|
-
if email.is_duplicate():
|
|
352
|
-
continue
|
|
353
|
-
|
|
375
|
+
for email in self.non_duplicate_emails():
|
|
354
376
|
self.email_author_counts[email.author] += 1
|
|
355
377
|
|
|
356
378
|
if len(email.recipients) == 0:
|
|
@@ -380,7 +402,7 @@ def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
|
|
|
380
402
|
def document_cls(doc: Document) -> Type[Document]:
|
|
381
403
|
search_area = doc.text[0:5000] # Limit search area to avoid pointless scans of huge files
|
|
382
404
|
|
|
383
|
-
if doc.length == 0:
|
|
405
|
+
if doc.length() == 0:
|
|
384
406
|
return Document
|
|
385
407
|
if doc.text[0] == '{':
|
|
386
408
|
return JsonFile
|
|
@@ -238,7 +238,7 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
|
|
|
238
238
|
ian isaac isaacson
|
|
239
239
|
james jamie jane janet jason jen jim joe johnson jones josh julie justin
|
|
240
240
|
karl kate kathy kelly kim kruger kyle
|
|
241
|
-
laurie leo leonard lenny leslie lieberman louis lynch lynn
|
|
241
|
+
laurie lawrence leo leonard lenny leslie lieberman louis lynch lynn
|
|
242
242
|
marcus marianne matt matthew melissa michele michelle moore moscowitz
|
|
243
243
|
nancy nicole nussbaum
|
|
244
244
|
owen
|
|
@@ -22,7 +22,6 @@ PUBLICIST = 'publicist'
|
|
|
22
22
|
REPUTATION = 'reputation'
|
|
23
23
|
SKYPE_LOG = 'Skype log'
|
|
24
24
|
SOCIAL = 'social'
|
|
25
|
-
SPEECH = 'speech'
|
|
26
25
|
|
|
27
26
|
# Locations
|
|
28
27
|
PALM_BEACH = 'Palm Beach'
|
|
@@ -35,6 +34,7 @@ CHINA_DAILY = "China Daily"
|
|
|
35
34
|
DAILY_MAIL = 'Daily Mail'
|
|
36
35
|
DAILY_TELEGRAPH = "Daily Telegraph"
|
|
37
36
|
LA_TIMES = 'LA Times'
|
|
37
|
+
LEXIS_NEXIS = 'Lexis Nexis'
|
|
38
38
|
MIAMI_HERALD = 'Miami Herald'
|
|
39
39
|
NYT = "New York Times"
|
|
40
40
|
PALM_BEACH_DAILY_NEWS = f'{PALM_BEACH} Daily News'
|