epstein-files 1.2.5__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +32 -13
- epstein_files/documents/document.py +8 -1
- epstein_files/documents/email.py +179 -97
- epstein_files/documents/emails/email_header.py +17 -8
- epstein_files/documents/other_file.py +8 -6
- epstein_files/epstein_files.py +16 -1
- epstein_files/person.py +40 -15
- epstein_files/util/constant/names.py +10 -6
- epstein_files/util/constant/strings.py +2 -1
- epstein_files/util/constants.py +463 -225
- epstein_files/util/doc_cfg.py +33 -27
- epstein_files/util/env.py +10 -3
- epstein_files/util/file_helper.py +2 -0
- epstein_files/util/highlighted_group.py +66 -23
- epstein_files/util/output.py +17 -31
- epstein_files/util/rich.py +2 -1
- epstein_files/util/word_count.py +1 -1
- {epstein_files-1.2.5.dist-info → epstein_files-1.4.1.dist-info}/METADATA +3 -3
- epstein_files-1.4.1.dist-info/RECORD +34 -0
- {epstein_files-1.2.5.dist-info → epstein_files-1.4.1.dist-info}/entry_points.txt +1 -1
- epstein_files-1.2.5.dist-info/RECORD +0 -34
- {epstein_files-1.2.5.dist-info → epstein_files-1.4.1.dist-info}/LICENSE +0 -0
- {epstein_files-1.2.5.dist-info → epstein_files-1.4.1.dist-info}/WHEEL +0 -0
|
@@ -122,8 +122,8 @@ class OtherFile(Document):
|
|
|
122
122
|
|
|
123
123
|
return Text(escape(self.preview_text()))
|
|
124
124
|
|
|
125
|
-
def is_interesting(self):
|
|
126
|
-
"""False for lame prefixes, duplicates, and other boring files."""
|
|
125
|
+
def is_interesting(self) -> bool:
|
|
126
|
+
"""Overloaded. False for lame prefixes, duplicates, and other boring files."""
|
|
127
127
|
info_sentences = self.info()
|
|
128
128
|
|
|
129
129
|
if self.is_duplicate():
|
|
@@ -164,8 +164,8 @@ class OtherFile(Document):
|
|
|
164
164
|
|
|
165
165
|
def _extract_timestamp(self) -> datetime | None:
|
|
166
166
|
"""Return configured timestamp or value extracted by scanning text with datefinder."""
|
|
167
|
-
if self.config and self.config.timestamp:
|
|
168
|
-
return self.config.timestamp
|
|
167
|
+
if self.config and self.config.timestamp():
|
|
168
|
+
return self.config.timestamp()
|
|
169
169
|
elif self.config and any([s in (self.config_description() or '') for s in SKIP_TIMESTAMP_EXTRACT]):
|
|
170
170
|
return None
|
|
171
171
|
|
|
@@ -210,9 +210,10 @@ class OtherFile(Document):
|
|
|
210
210
|
self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
|
|
211
211
|
|
|
212
212
|
@classmethod
|
|
213
|
-
def files_preview_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
|
|
213
|
+
def files_preview_table(cls, files: Sequence['OtherFile'], title_pfx: str = '', title: str = '') -> Table:
|
|
214
214
|
"""Build a table of OtherFile documents."""
|
|
215
|
-
|
|
215
|
+
title = title or f'{title_pfx}Other Files Details in Chronological Order'
|
|
216
|
+
table = build_table(title, show_lines=True, title_justify='left' if title else 'center')
|
|
216
217
|
table.add_column('File', justify='center', width=FILENAME_LENGTH)
|
|
217
218
|
table.add_column('Date', justify='center')
|
|
218
219
|
table.add_column('Size', justify='right', style='dim')
|
|
@@ -244,6 +245,7 @@ class OtherFile(Document):
|
|
|
244
245
|
|
|
245
246
|
@classmethod
|
|
246
247
|
def summary_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
|
|
248
|
+
"""Table showing file count by category."""
|
|
247
249
|
categories = uniquify([f.category() for f in files])
|
|
248
250
|
categories = sorted(categories, key=lambda c: -len([f for f in files if f.category() == c]))
|
|
249
251
|
table = cls.file_info_table(f'{title_pfx}Other Files Summary', 'Category')
|
epstein_files/epstein_files.py
CHANGED
|
@@ -84,6 +84,7 @@ class EpsteinFiles:
|
|
|
84
84
|
self.json_files = [doc for doc in self.other_files if isinstance(doc, JsonFile)]
|
|
85
85
|
self._set_uninteresting_ccs()
|
|
86
86
|
self._copy_duplicate_email_properties()
|
|
87
|
+
self._find_email_attachments_and_set_is_first_for_user()
|
|
87
88
|
|
|
88
89
|
@classmethod
|
|
89
90
|
def get_files(cls, timer: Timer | None = None) -> 'EpsteinFiles':
|
|
@@ -123,6 +124,9 @@ class EpsteinFiles:
|
|
|
123
124
|
|
|
124
125
|
lines = doc.matching_lines(pattern)
|
|
125
126
|
|
|
127
|
+
if args.min_line_length:
|
|
128
|
+
lines = [line for line in lines if len(line.line) > args.min_line_length]
|
|
129
|
+
|
|
126
130
|
if len(lines) > 0:
|
|
127
131
|
results.append(SearchResult(doc, lines))
|
|
128
132
|
|
|
@@ -251,7 +255,7 @@ class EpsteinFiles:
|
|
|
251
255
|
name=name,
|
|
252
256
|
emails=self.emails_for(name),
|
|
253
257
|
imessage_logs=self.imessage_logs_for(name),
|
|
254
|
-
|
|
258
|
+
is_uninteresting=name in self.uninteresting_emailers(),
|
|
255
259
|
other_files=[f for f in self.other_files if name and name == f.author]
|
|
256
260
|
)
|
|
257
261
|
for name in names
|
|
@@ -276,6 +280,17 @@ class EpsteinFiles:
|
|
|
276
280
|
|
|
277
281
|
return self._uninteresting_emailers
|
|
278
282
|
|
|
283
|
+
def _find_email_attachments_and_set_is_first_for_user(self) -> None:
|
|
284
|
+
for file in self.other_files:
|
|
285
|
+
if file.config and file.config.attached_to_email_id:
|
|
286
|
+
email = self.email_for_id(file.config.attached_to_email_id)
|
|
287
|
+
file.warn(f"Attaching to {email}")
|
|
288
|
+
email.attached_docs.append(file)
|
|
289
|
+
|
|
290
|
+
for emailer in self.emailers():
|
|
291
|
+
first_email = emailer.emails[0]
|
|
292
|
+
first_email._is_first_for_user = True
|
|
293
|
+
|
|
279
294
|
def _copy_duplicate_email_properties(self) -> None:
|
|
280
295
|
"""Ensure dupe emails have the properties of the emails they duplicate to capture any repairs, config etc."""
|
|
281
296
|
for email in self.emails:
|
epstein_files/person.py
CHANGED
|
@@ -9,13 +9,13 @@ from rich.table import Table
|
|
|
9
9
|
from rich.text import Text
|
|
10
10
|
|
|
11
11
|
from epstein_files.documents.document import Document
|
|
12
|
-
from epstein_files.documents.email import MAILING_LISTS, JUNK_EMAILERS, Email
|
|
12
|
+
from epstein_files.documents.email import TRUNCATE_EMAILS_FROM, MAILING_LISTS, JUNK_EMAILERS, Email
|
|
13
13
|
from epstein_files.documents.messenger_log import MessengerLog
|
|
14
14
|
from epstein_files.documents.other_file import OtherFile
|
|
15
15
|
from epstein_files.util.constant.strings import *
|
|
16
16
|
from epstein_files.util.constant.urls import *
|
|
17
17
|
from epstein_files.util.constants import *
|
|
18
|
-
from epstein_files.util.data import days_between, flatten, without_falsey
|
|
18
|
+
from epstein_files.util.data import days_between, flatten, uniquify, without_falsey
|
|
19
19
|
from epstein_files.util.env import args
|
|
20
20
|
from epstein_files.util.highlighted_group import (QUESTION_MARKS_TXT, HighlightedNames,
|
|
21
21
|
get_highlight_group_for_name, get_style_for_name, styled_category, styled_name)
|
|
@@ -42,7 +42,7 @@ class Person:
|
|
|
42
42
|
emails: list[Email] = field(default_factory=list)
|
|
43
43
|
imessage_logs: list[MessengerLog] = field(default_factory=list)
|
|
44
44
|
other_files: list[OtherFile] = field(default_factory=list)
|
|
45
|
-
|
|
45
|
+
is_uninteresting: bool = False
|
|
46
46
|
|
|
47
47
|
def __post_init__(self):
|
|
48
48
|
self.emails = Document.sort_by_timestamp(self.emails)
|
|
@@ -62,7 +62,7 @@ class Person:
|
|
|
62
62
|
return None
|
|
63
63
|
elif self.category():
|
|
64
64
|
return styled_category(self.category())
|
|
65
|
-
elif self.is_a_mystery() or self.
|
|
65
|
+
elif self.is_a_mystery() or self.is_uninteresting:
|
|
66
66
|
return QUESTION_MARKS_TXT
|
|
67
67
|
|
|
68
68
|
def email_conversation_length_in_days(self) -> int:
|
|
@@ -141,7 +141,7 @@ class Person:
|
|
|
141
141
|
if info:
|
|
142
142
|
return info
|
|
143
143
|
|
|
144
|
-
if self.
|
|
144
|
+
if self.is_uninteresting and len(self.emails_by()) == 0:
|
|
145
145
|
if self.has_any_epstein_emails():
|
|
146
146
|
return UNINTERESTING_CC_INFO
|
|
147
147
|
else:
|
|
@@ -157,8 +157,10 @@ class Person:
|
|
|
157
157
|
return Text('(emails whose author or recipient could not be determined)', style=ALT_INFO_STYLE)
|
|
158
158
|
elif self.category() == JUNK:
|
|
159
159
|
return Text(f"({JUNK} mail)", style='bright_black dim')
|
|
160
|
-
elif self.
|
|
161
|
-
if self.
|
|
160
|
+
elif self.is_uninteresting and (self.info_str() or '').startswith(UNINTERESTING_CC_INFO):
|
|
161
|
+
if self.sole_cc():
|
|
162
|
+
return Text(f"(cc: from {self.sole_cc()} only)", style='wheat4 dim')
|
|
163
|
+
elif self.info_str() == UNINTERESTING_CC_INFO:
|
|
162
164
|
return Text(f"({self.info_str()})", style='wheat4 dim')
|
|
163
165
|
else:
|
|
164
166
|
return Text(f"({self.info_str()})", style='plum4 dim')
|
|
@@ -172,7 +174,7 @@ class Person:
|
|
|
172
174
|
else:
|
|
173
175
|
return None
|
|
174
176
|
else:
|
|
175
|
-
return Text(self.info_str(), style=self.style())
|
|
177
|
+
return Text(self.info_str(), style=self.style(allow_bold=False))
|
|
176
178
|
|
|
177
179
|
def internal_link(self) -> Text:
|
|
178
180
|
"""Kind of like an anchor link to the section of the page containing these emails."""
|
|
@@ -180,7 +182,22 @@ class Person:
|
|
|
180
182
|
|
|
181
183
|
def is_a_mystery(self) -> bool:
|
|
182
184
|
"""Return True if this is someone we theroetically could know more about."""
|
|
183
|
-
return self.is_unstyled() and not (self.is_email_address() or self.info_str() or self.
|
|
185
|
+
return self.is_unstyled() and not (self.is_email_address() or self.info_str() or self.is_uninteresting)
|
|
186
|
+
|
|
187
|
+
def sole_cc(self) -> str | None:
|
|
188
|
+
"""Return name if this person sent 0 emails and received CC from only one that name."""
|
|
189
|
+
email_authors = uniquify([e.author for e in self.emails_to()])
|
|
190
|
+
|
|
191
|
+
if len(self.unique_emails()) == 1 and len(email_authors) > 0:
|
|
192
|
+
logger.info(f"sole author of email to '{self.name}' is '{email_authors[0]}'")
|
|
193
|
+
else:
|
|
194
|
+
logger.info(f"'{self.name}' email_authors '{email_authors[0]}'")
|
|
195
|
+
|
|
196
|
+
if len(self.unique_emails_by()) > 0:
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
if len(email_authors) == 1:
|
|
200
|
+
return email_authors[0]
|
|
184
201
|
|
|
185
202
|
def is_email_address(self) -> bool:
|
|
186
203
|
return '@' in (self.name or '')
|
|
@@ -196,6 +213,10 @@ class Person:
|
|
|
196
213
|
|
|
197
214
|
return True
|
|
198
215
|
|
|
216
|
+
def should_always_truncate(self) -> bool:
|
|
217
|
+
"""True if we want to truncate all emails to/from this user."""
|
|
218
|
+
return self.name in TRUNCATE_EMAILS_FROM or self.is_uninteresting
|
|
219
|
+
|
|
199
220
|
def is_unstyled(self) -> bool:
|
|
200
221
|
"""True if there's no highlight group for this name."""
|
|
201
222
|
return self.style() == DEFAULT_NAME_STYLE
|
|
@@ -259,8 +280,8 @@ class Person:
|
|
|
259
280
|
else:
|
|
260
281
|
return counts + [self.name_str()]
|
|
261
282
|
|
|
262
|
-
def style(self) -> str:
|
|
263
|
-
return get_style_for_name(self.name)
|
|
283
|
+
def style(self, allow_bold: bool = True) -> str:
|
|
284
|
+
return get_style_for_name(self.name, allow_bold=allow_bold)
|
|
264
285
|
|
|
265
286
|
def unique_emails(self) -> Sequence[Email]:
|
|
266
287
|
return Document.without_dupes(self.emails)
|
|
@@ -290,7 +311,7 @@ class Person:
|
|
|
290
311
|
highlighted = highlighted or people
|
|
291
312
|
highlighted_names = [p.name for p in highlighted]
|
|
292
313
|
is_selection = len(people) != len(highlighted) or args.emailers_info
|
|
293
|
-
all_emails =
|
|
314
|
+
all_emails = Person.emails_from_people(people)
|
|
294
315
|
email_authors = [p for p in people if p.emails_by() and p.name]
|
|
295
316
|
attributed_emails = [email for email in all_emails if email.author]
|
|
296
317
|
footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}" \
|
|
@@ -331,14 +352,18 @@ class Person:
|
|
|
331
352
|
|
|
332
353
|
table.add_row(
|
|
333
354
|
Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[0 if is_selection else grey_idx]}"),
|
|
334
|
-
person.internal_link() if is_on_page and not person.
|
|
355
|
+
person.internal_link() if is_on_page and not person.is_uninteresting else person.name_txt(),
|
|
335
356
|
person.category_txt(),
|
|
336
357
|
f"{len(person.unique_emails() if show_epstein_total else person._unique_printable_emails())}",
|
|
337
|
-
|
|
338
|
-
|
|
358
|
+
str(len(person.unique_emails_by())) if len(person.unique_emails_by()) > 0 else '',
|
|
359
|
+
str(len(person.unique_emails_to())) if len(person.unique_emails_to()) > 0 else '',
|
|
339
360
|
f"{person.email_conversation_length_in_days()}",
|
|
340
361
|
person.info_txt() or '',
|
|
341
362
|
style='' if show_epstein_total or is_on_page else 'dim',
|
|
342
363
|
)
|
|
343
364
|
|
|
344
365
|
return table
|
|
366
|
+
|
|
367
|
+
@staticmethod
|
|
368
|
+
def emails_from_people(people: list['Person']) -> Sequence[Email]:
|
|
369
|
+
return Document.uniquify(flatten([list(p.unique_emails()) for p in people]))
|
|
@@ -218,23 +218,23 @@ UBS = 'UBS'
|
|
|
218
218
|
NAMES_TO_NOT_HIGHLIGHT = """
|
|
219
219
|
al alain alan alfredo allen alex alexander amanda andres andrew anthony
|
|
220
220
|
bard barrett barry bennet bernard bill black bob boris brad brenner bruce
|
|
221
|
-
caroline carolyn chris christina cohen
|
|
221
|
+
cameron caroline carolyn chris christina cohen
|
|
222
222
|
dan daniel danny darren dave david debbie donald
|
|
223
223
|
ed edward edwards enforcement enterprise enterprises entourage epstein eric erika etienne
|
|
224
224
|
faith fisher forget fred friendly frost fuller
|
|
225
225
|
gates gerald george gold gordon
|
|
226
|
-
haddad harry hay heather henry hill hoffman howard
|
|
226
|
+
haddad hanson harry hay heather henry hill hoffman howard
|
|
227
227
|
ian ivan
|
|
228
228
|
jack james jay jean jeff jeffrey jennifer jeremy jessica joel john jon jonathan joseph jr
|
|
229
|
-
kahn karl kate katherine kelly ken kevin krassner
|
|
230
|
-
larry laurie lawrence leon lesley linda link lisa
|
|
229
|
+
kafka kahn karl kate katherine kelly ken kevin krassner
|
|
230
|
+
larry larsen laurie lawrence leon lesley linda link lisa
|
|
231
231
|
mann marc marie mark martin matthew melanie michael mike miller mitchell miles morris moskowitz
|
|
232
232
|
nancy neal new nicole norman
|
|
233
233
|
owen
|
|
234
234
|
paul paula pen peter philip prince
|
|
235
235
|
randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubenstein rubin
|
|
236
|
-
scott sean skip stanley stern stephen steve steven stone susan
|
|
237
|
-
the thomas tim tom tony tyler
|
|
236
|
+
scott sean skip smith stanley stern stephen steve steven stone susan
|
|
237
|
+
terry the thomas tim tom tony tyler
|
|
238
238
|
victor
|
|
239
239
|
wade waters
|
|
240
240
|
y
|
|
@@ -269,6 +269,10 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
|
|
|
269
269
|
def constantize_name(name: str) -> str:
|
|
270
270
|
if name == 'Andrzej Duda or entourage':
|
|
271
271
|
return 'ANDRZEJ_DUDA'
|
|
272
|
+
elif name == MIROSLAV_LAJCAK:
|
|
273
|
+
return 'MIROSLAV_LAJCAK'
|
|
274
|
+
elif name == 'Paula Heil Fisher (???)':
|
|
275
|
+
return 'PAULA'
|
|
272
276
|
|
|
273
277
|
variable_name = remove_question_marks(name)
|
|
274
278
|
variable_name = variable_name.removesuffix('.').removesuffix('Jr').replace('ź', 'z').replace('ø', 'o').strip()
|
|
@@ -80,6 +80,7 @@ OTHER_FILE_CLASS = 'OtherFile'
|
|
|
80
80
|
remove_question_marks = lambda name: QUESTION_MARKS_REGEX.sub('', name).strip()
|
|
81
81
|
|
|
82
82
|
|
|
83
|
-
def indented(s: str, spaces: int = 4) -> str:
|
|
83
|
+
def indented(s: str, spaces: int = 4, prefix: str = '') -> str:
|
|
84
84
|
indent = ' ' * spaces
|
|
85
|
+
indent += prefix
|
|
85
86
|
return indent + f"\n{indent}".join(s.split('\n'))
|