epstein-files 1.0.11__py3-none-any.whl → 1.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +3 -3
- epstein_files/documents/communication.py +2 -2
- epstein_files/documents/document.py +43 -69
- epstein_files/documents/email.py +48 -6
- epstein_files/documents/imessage/text_message.py +1 -1
- epstein_files/documents/json_file.py +1 -1
- epstein_files/documents/messenger_log.py +3 -3
- epstein_files/documents/other_file.py +2 -2
- epstein_files/epstein_files.py +27 -12
- epstein_files/util/constant/names.py +12 -9
- epstein_files/util/constant/strings.py +2 -1
- epstein_files/util/constant/urls.py +13 -8
- epstein_files/util/constants.py +21 -15
- epstein_files/util/data.py +1 -1
- epstein_files/util/doc_cfg.py +20 -42
- epstein_files/util/file_helper.py +3 -9
- epstein_files/util/highlighted_group.py +32 -21
- epstein_files/util/logging.py +1 -1
- epstein_files/util/output.py +1 -1
- epstein_files/util/rich.py +11 -2
- {epstein_files-1.0.11.dist-info → epstein_files-1.0.13.dist-info}/METADATA +1 -1
- epstein_files-1.0.13.dist-info/RECORD +33 -0
- epstein_files-1.0.11.dist-info/RECORD +0 -33
- {epstein_files-1.0.11.dist-info → epstein_files-1.0.13.dist-info}/LICENSE +0 -0
- {epstein_files-1.0.11.dist-info → epstein_files-1.0.13.dist-info}/WHEEL +0 -0
- {epstein_files-1.0.11.dist-info → epstein_files-1.0.13.dist-info}/entry_points.txt +0 -0
epstein_files/__init__.py
CHANGED
|
@@ -20,8 +20,8 @@ from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_
|
|
|
20
20
|
from epstein_files.util.env import args, specified_names
|
|
21
21
|
from epstein_files.util.file_helper import coerce_file_path, extract_file_id
|
|
22
22
|
from epstein_files.util.logging import logger
|
|
23
|
-
from epstein_files.util.output import (print_emails, print_json_files,
|
|
24
|
-
print_text_messages, write_urls)
|
|
23
|
+
from epstein_files.util.output import (print_emails, print_json_files, print_json_stats,
|
|
24
|
+
print_text_messages, write_json_metadata, write_urls)
|
|
25
25
|
from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
|
|
26
26
|
from epstein_files.util.timer import Timer
|
|
27
27
|
from epstein_files.util.word_count import write_word_counts_html
|
|
@@ -37,7 +37,7 @@ def generate_html() -> None:
|
|
|
37
37
|
epstein_files = EpsteinFiles.get_files(timer)
|
|
38
38
|
|
|
39
39
|
if args.json_metadata:
|
|
40
|
-
|
|
40
|
+
write_json_metadata(epstein_files)
|
|
41
41
|
exit()
|
|
42
42
|
elif args.json_files:
|
|
43
43
|
print_json_files(epstein_files)
|
|
@@ -34,9 +34,9 @@ class Communication(Document):
|
|
|
34
34
|
def is_attribution_uncertain(self) -> bool:
|
|
35
35
|
return bool(self.config and self.config.is_attribution_uncertain)
|
|
36
36
|
|
|
37
|
-
def
|
|
37
|
+
def external_links(self, _style: str = '', include_alt_links: bool = True) -> Text:
|
|
38
38
|
"""Overrides super() method to apply self.author_style."""
|
|
39
|
-
return super().
|
|
39
|
+
return super().external_links(self.author_style, include_alt_links=include_alt_links)
|
|
40
40
|
|
|
41
41
|
def summary(self) -> Text:
|
|
42
42
|
return self._summary().append(CLOSE_PROPERTIES_CHAR)
|
|
@@ -5,7 +5,7 @@ from dataclasses import asdict, dataclass, field
|
|
|
5
5
|
from datetime import datetime
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from subprocess import run
|
|
8
|
-
from typing import ClassVar, Sequence, TypeVar
|
|
8
|
+
from typing import Callable, ClassVar, Sequence, TypeVar
|
|
9
9
|
|
|
10
10
|
from rich.console import Console, ConsoleOptions, Group, RenderResult
|
|
11
11
|
from rich.padding import Padding
|
|
@@ -16,8 +16,8 @@ from epstein_files.util.constant.names import *
|
|
|
16
16
|
from epstein_files.util.constant.strings import *
|
|
17
17
|
from epstein_files.util.constant.urls import *
|
|
18
18
|
from epstein_files.util.constants import ALL_FILE_CONFIGS, FALLBACK_TIMESTAMP
|
|
19
|
-
from epstein_files.util.data import collapse_newlines, date_str,
|
|
20
|
-
from epstein_files.util.doc_cfg import EmailCfg, DocCfg, Metadata, TextCfg
|
|
19
|
+
from epstein_files.util.data import collapse_newlines, date_str, patternize, remove_zero_time_from_timestamp_str, without_falsey
|
|
20
|
+
from epstein_files.util.doc_cfg import DUPE_TYPE_STRS, EmailCfg, DocCfg, Metadata, TextCfg
|
|
21
21
|
from epstein_files.util.env import DOCS_DIR, args
|
|
22
22
|
from epstein_files.util.file_helper import (file_stem_for_id, extract_file_id, file_size,
|
|
23
23
|
file_size_str, is_local_extract_file)
|
|
@@ -31,10 +31,8 @@ INFO_INDENT = 2
|
|
|
31
31
|
INFO_PADDING = (0, 0, 0, INFO_INDENT)
|
|
32
32
|
MAX_TOP_LINES_LEN = 4000 # Only for logging
|
|
33
33
|
MIN_DOCUMENT_ID = 10477
|
|
34
|
-
LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
|
|
35
34
|
WHITESPACE_REGEX = re.compile(r"\s{2,}|\t|\n", re.MULTILINE)
|
|
36
35
|
|
|
37
|
-
EXTRACTED_FROM = 'Extracted from'
|
|
38
36
|
MIN_TIMESTAMP = datetime(1991, 1, 1)
|
|
39
37
|
MID_TIMESTAMP = datetime(2007, 1, 1)
|
|
40
38
|
MAX_TIMESTAMP = datetime(2020, 1, 1)
|
|
@@ -96,15 +94,9 @@ class Document:
|
|
|
96
94
|
def __post_init__(self):
|
|
97
95
|
self.filename = self.file_path.name
|
|
98
96
|
self.file_id = extract_file_id(self.filename)
|
|
99
|
-
self.config = deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
|
|
97
|
+
self.config = self.config or deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
|
|
100
98
|
|
|
101
|
-
if self
|
|
102
|
-
self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
|
|
103
|
-
extracted_from_doc_id = self.url_slug.split('_')[-1]
|
|
104
|
-
|
|
105
|
-
if extracted_from_doc_id in ALL_FILE_CONFIGS:
|
|
106
|
-
self._set_extract_config(deepcopy(ALL_FILE_CONFIGS[extracted_from_doc_id]))
|
|
107
|
-
else:
|
|
99
|
+
if 'url_slug' not in vars(self):
|
|
108
100
|
self.url_slug = self.file_path.stem
|
|
109
101
|
|
|
110
102
|
self._set_computed_fields(text=self.text or self._load_file())
|
|
@@ -122,28 +114,51 @@ class Document:
|
|
|
122
114
|
|
|
123
115
|
def duplicate_file_txt(self) -> Text:
|
|
124
116
|
"""If the file is a dupe make a nice message to explain what file it's a duplicate of."""
|
|
125
|
-
if not self.config or not self.config.dupe_of_id:
|
|
117
|
+
if not self.config or not self.config.dupe_of_id or self.config.dupe_type is None:
|
|
126
118
|
raise RuntimeError(f"duplicate_file_txt() called on {self.summary()} but not a dupe! config:\n\n{self.config}")
|
|
127
119
|
|
|
128
120
|
txt = Text(f"Not showing ", style=INFO_STYLE).append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
|
|
129
|
-
txt.append(f" because it's {self.config.
|
|
121
|
+
txt.append(f" because it's {DUPE_TYPE_STRS[self.config.dupe_type]} ")
|
|
130
122
|
return txt.append(epstein_media_doc_link_txt(self.config.dupe_of_id, style='royal_blue1'))
|
|
131
123
|
|
|
132
124
|
def epsteinify_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
|
|
133
|
-
|
|
134
|
-
return link_text_obj(epsteinify_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
|
|
125
|
+
return self.external_url(epsteinify_doc_url, style, link_txt)
|
|
135
126
|
|
|
136
127
|
def epstein_media_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
|
|
137
|
-
|
|
138
|
-
return link_text_obj(epstein_media_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
|
|
128
|
+
return self.external_url(epstein_media_doc_url, style, link_txt)
|
|
139
129
|
|
|
140
130
|
def epstein_web_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
|
|
141
|
-
|
|
142
|
-
|
|
131
|
+
return self.external_url(epstein_web_doc_url, style, link_txt)
|
|
132
|
+
|
|
133
|
+
def rollcall_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
|
|
134
|
+
return self.external_url(rollcall_doc_url, style, link_txt)
|
|
135
|
+
|
|
136
|
+
def external_url(self, fxn: Callable[[str], str], style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
|
|
137
|
+
return link_text_obj(fxn(self.url_slug), link_txt or self.file_path.stem, style)
|
|
138
|
+
|
|
139
|
+
def external_links(self, style: str = '', include_alt_links: bool = False) -> Text:
|
|
140
|
+
"""Returns colored links to epstein.media and and epsteinweb in a Text object."""
|
|
141
|
+
txt = Text('', style='white' if include_alt_links else ARCHIVE_LINK_COLOR)
|
|
142
|
+
|
|
143
|
+
if args.use_epstein_web:
|
|
144
|
+
txt.append(self.epstein_web_link(style=style))
|
|
145
|
+
alt_link = self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)
|
|
146
|
+
else:
|
|
147
|
+
txt.append(self.epstein_media_link(style=style))
|
|
148
|
+
alt_link = self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)
|
|
149
|
+
|
|
150
|
+
if include_alt_links:
|
|
151
|
+
txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
|
|
152
|
+
txt.append(' (').append(alt_link).append(')')
|
|
153
|
+
|
|
154
|
+
if self._class_name() == 'Email':
|
|
155
|
+
txt.append(' (').append(self.rollcall_link(style='white dim', link_txt=ROLLCALL)).append(')')
|
|
156
|
+
|
|
157
|
+
return txt
|
|
143
158
|
|
|
144
159
|
def file_info_panel(self) -> Group:
|
|
145
160
|
"""Panel with filename linking to raw file plus any additional info about the file."""
|
|
146
|
-
panel = Panel(self.
|
|
161
|
+
panel = Panel(self.external_links(include_alt_links=True), border_style=self._border_style(), expand=False)
|
|
147
162
|
padded_info = [Padding(sentence, INFO_PADDING) for sentence in self.info()]
|
|
148
163
|
return Group(*([panel] + padded_info))
|
|
149
164
|
|
|
@@ -155,12 +170,10 @@ class Document:
|
|
|
155
170
|
|
|
156
171
|
def info(self) -> list[Text]:
|
|
157
172
|
"""0 to 2 sentences containing the info_txt() as well as any configured description."""
|
|
158
|
-
|
|
173
|
+
return without_falsey([
|
|
159
174
|
self.info_txt(),
|
|
160
175
|
highlighter(Text(self.config_description(), style=INFO_STYLE)) if self.config_description() else None
|
|
161
|
-
]
|
|
162
|
-
|
|
163
|
-
return without_falsey(sentences)
|
|
176
|
+
])
|
|
164
177
|
|
|
165
178
|
def info_txt(self) -> Text | None:
|
|
166
179
|
"""Secondary info about this file (recipients, level of certainty, etc). Overload in subclasses."""
|
|
@@ -197,9 +210,9 @@ class Document:
|
|
|
197
210
|
|
|
198
211
|
if self.is_local_extract_file():
|
|
199
212
|
metadata['extracted_file'] = {
|
|
200
|
-
'explanation': '
|
|
201
|
-
'
|
|
202
|
-
'
|
|
213
|
+
'explanation': 'Manually extracted from one of the court filings.',
|
|
214
|
+
'extracted_from': self.url_slug + '.txt',
|
|
215
|
+
'url': extracted_file_url(self.filename),
|
|
203
216
|
}
|
|
204
217
|
|
|
205
218
|
return metadata
|
|
@@ -208,25 +221,6 @@ class Document:
|
|
|
208
221
|
with open(self.file_path) as f:
|
|
209
222
|
return f.read()
|
|
210
223
|
|
|
211
|
-
def raw_document_link_txt(self, style: str = '', include_alt_link: bool = False) -> Text:
|
|
212
|
-
"""Returns colored links to epstein.media and and epsteinweb in a Text object."""
|
|
213
|
-
txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
|
|
214
|
-
|
|
215
|
-
if args.use_epstein_web:
|
|
216
|
-
txt.append(self.epstein_web_link(style=style))
|
|
217
|
-
|
|
218
|
-
if include_alt_link:
|
|
219
|
-
txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
|
|
220
|
-
txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
|
|
221
|
-
else:
|
|
222
|
-
txt.append(self.epstein_media_link(style=style))
|
|
223
|
-
|
|
224
|
-
if include_alt_link:
|
|
225
|
-
txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
|
|
226
|
-
txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
|
|
227
|
-
|
|
228
|
-
return txt
|
|
229
|
-
|
|
230
224
|
def repair_ocr_text(self, repairs: dict[str | re.Pattern, str], text: str) -> str:
|
|
231
225
|
"""Apply a dict of repairs (key is pattern or string, value is replacement string) to text."""
|
|
232
226
|
for k, v in repairs.items():
|
|
@@ -253,7 +247,7 @@ class Document:
|
|
|
253
247
|
txt.append(f" {self.url_slug}", style=FILENAME_STYLE)
|
|
254
248
|
|
|
255
249
|
if self.timestamp:
|
|
256
|
-
timestamp_str =
|
|
250
|
+
timestamp_str = remove_zero_time_from_timestamp_str(self.timestamp).replace('T', ' ')
|
|
257
251
|
txt.append(' (', style=SYMBOL_STYLE)
|
|
258
252
|
txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
|
|
259
253
|
|
|
@@ -327,26 +321,6 @@ class Document:
|
|
|
327
321
|
self.lines = [line.strip() if self.strip_whitespace else line for line in self.text.split('\n')]
|
|
328
322
|
self.num_lines = len(self.lines)
|
|
329
323
|
|
|
330
|
-
def _set_extract_config(self, doc_cfg: DocCfg | EmailCfg) -> None:
|
|
331
|
-
"""Copy info from original config for file this document was extracted from."""
|
|
332
|
-
if self.config:
|
|
333
|
-
self.warn(f"Merging existing config with config for file this document was extracted from")
|
|
334
|
-
else:
|
|
335
|
-
self.config = EmailCfg(id=self.file_id)
|
|
336
|
-
|
|
337
|
-
extracted_from_description = doc_cfg.complete_description()
|
|
338
|
-
|
|
339
|
-
if extracted_from_description:
|
|
340
|
-
extracted_description = f"{EXTRACTED_FROM} {extracted_from_description}"
|
|
341
|
-
|
|
342
|
-
if self.config.description:
|
|
343
|
-
self.warn(f"Overwriting description '{self.config.description}' with extract description '{doc_cfg.description}'")
|
|
344
|
-
|
|
345
|
-
self.config.description = extracted_description
|
|
346
|
-
|
|
347
|
-
self.config.is_interesting = self.config.is_interesting or doc_cfg.is_interesting
|
|
348
|
-
self.warn(f"Constructed local config\n{self.config}")
|
|
349
|
-
|
|
350
324
|
def _write_clean_text(self, output_path: Path) -> None:
|
|
351
325
|
"""Write self.text to 'output_path'. Used only for diffing files."""
|
|
352
326
|
if output_path.exists():
|
epstein_files/documents/email.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import re
|
|
3
|
+
from copy import deepcopy
|
|
3
4
|
from dataclasses import asdict, dataclass, field
|
|
4
5
|
from datetime import datetime
|
|
5
6
|
from typing import ClassVar, cast
|
|
@@ -21,6 +22,7 @@ from epstein_files.util.constants import *
|
|
|
21
22
|
from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes, extract_last_name,
|
|
22
23
|
flatten, remove_timezone, uniquify)
|
|
23
24
|
from epstein_files.util.doc_cfg import EmailCfg, Metadata
|
|
25
|
+
from epstein_files.util.file_helper import extract_file_id, file_stem_for_id
|
|
24
26
|
from epstein_files.util.highlighted_group import get_style_for_name
|
|
25
27
|
from epstein_files.util.logging import logger
|
|
26
28
|
from epstein_files.util.rich import *
|
|
@@ -35,9 +37,11 @@ REPLY_TEXT_REGEX = re.compile(rf"^(.*?){REPLY_LINE_PATTERN}", re.DOTALL | re.IGN
|
|
|
35
37
|
BAD_TIMEZONE_REGEX = re.compile(fr'\((UTC|GMT\+\d\d:\d\d)\)|{REDACTED}')
|
|
36
38
|
DATE_HEADER_REGEX = re.compile(r'(?:Date|Sent):? +(?!by|from|to|via)([^\n]{6,})\n')
|
|
37
39
|
TIMESTAMP_LINE_REGEX = re.compile(r"\d+:\d+")
|
|
40
|
+
LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
|
|
38
41
|
|
|
39
42
|
SUPPRESS_LOGS_FOR_AUTHORS = ['Undisclosed recipients:', 'undisclosed-recipients:', 'Multiple Senders Multiple Senders']
|
|
40
43
|
REWRITTEN_HEADER_MSG = "(janky OCR header fields were prettified, check source if something seems off)"
|
|
44
|
+
APPEARS_IN = 'Appears in'
|
|
41
45
|
MAX_CHARS_TO_PRINT = 4000
|
|
42
46
|
MAX_NUM_HEADER_LINES = 14
|
|
43
47
|
MAX_QUOTED_REPLIES = 2
|
|
@@ -128,7 +132,6 @@ JUNK_EMAILERS = [
|
|
|
128
132
|
'How To Academy',
|
|
129
133
|
'Jokeland',
|
|
130
134
|
JP_MORGAN_USGIO,
|
|
131
|
-
'Saved by Internet Explorer 11',
|
|
132
135
|
]
|
|
133
136
|
|
|
134
137
|
MAILING_LISTS = [
|
|
@@ -248,6 +251,7 @@ KRASSNER_RECIPIENTS = uniquify(flatten([ALL_FILE_CONFIGS[id].recipients for id i
|
|
|
248
251
|
|
|
249
252
|
# No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
|
|
250
253
|
USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIPIENTS + [
|
|
254
|
+
'Alan Dlugash', # CCed with Richard Kahn
|
|
251
255
|
'Alan Rogers', # Random CC
|
|
252
256
|
'Andrew Friendly', # Presumably some relation of Kelly Friendly
|
|
253
257
|
'BS Stern', # A random fwd of email we have
|
|
@@ -264,14 +268,14 @@ USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIP
|
|
|
264
268
|
'Lyn Fontanilla', # Random CC
|
|
265
269
|
'Mark Albert', # Random CC
|
|
266
270
|
'Matthew Schafer', # Random CC
|
|
271
|
+
MICHAEL_BUCHHOLTZ, # Terry Kafka CC
|
|
272
|
+
'Nancy Dahl', # covered by Lawrence Krauss (her husband)
|
|
267
273
|
'Michael Simmons', # Random CC
|
|
268
274
|
'Nancy Portland', # Lawrence Krauss CC
|
|
269
275
|
'Oliver Goodenough', # Robert Trivers CC
|
|
270
|
-
'Owen Blicksilver', # Landon Thomas CC
|
|
271
276
|
'Peter Aldhous', # Lawrence Krauss CC
|
|
272
277
|
'Sam Harris', # Lawrence Krauss CC
|
|
273
278
|
SAMUEL_LEFF, # Random CC
|
|
274
|
-
"Saved by Internet Explorer 11",
|
|
275
279
|
'Sean T Lehane', # Random CC
|
|
276
280
|
'Stephen Rubin', # Random CC
|
|
277
281
|
'Tim Kane', # Random CC
|
|
@@ -318,6 +322,17 @@ class Email(Communication):
|
|
|
318
322
|
rewritten_header_ids: ClassVar[set[str]] = set([])
|
|
319
323
|
|
|
320
324
|
def __post_init__(self):
|
|
325
|
+
self.filename = self.file_path.name
|
|
326
|
+
self.file_id = extract_file_id(self.filename)
|
|
327
|
+
|
|
328
|
+
# Special handling for copying properties out of the config for the document this one was extracted from
|
|
329
|
+
if self.is_local_extract_file():
|
|
330
|
+
self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
|
|
331
|
+
extracted_from_doc_id = self.url_slug.split('_')[-1]
|
|
332
|
+
|
|
333
|
+
if extracted_from_doc_id in ALL_FILE_CONFIGS:
|
|
334
|
+
self._set_config_for_extracted_file(ALL_FILE_CONFIGS[extracted_from_doc_id])
|
|
335
|
+
|
|
321
336
|
super().__post_init__()
|
|
322
337
|
|
|
323
338
|
try:
|
|
@@ -340,8 +355,12 @@ class Email(Communication):
|
|
|
340
355
|
self.actual_text = self._actual_text()
|
|
341
356
|
self.sent_from_device = self._sent_from_device()
|
|
342
357
|
|
|
358
|
+
def attachments(self) -> list[str]:
|
|
359
|
+
return (self.header.attachments or '').split(';')
|
|
360
|
+
|
|
343
361
|
def info_txt(self) -> Text:
|
|
344
|
-
|
|
362
|
+
email_type = 'fwded article' if self.is_fwded_article() else 'email'
|
|
363
|
+
txt = Text(f"OCR text of {email_type} from ", style='grey46').append(self.author_txt).append(' to ')
|
|
345
364
|
return txt.append(self._recipients_txt()).append(highlighter(f" probably sent at {self.timestamp}"))
|
|
346
365
|
|
|
347
366
|
def is_fwded_article(self) -> bool:
|
|
@@ -566,11 +585,11 @@ class Email(Communication):
|
|
|
566
585
|
self._merge_lines(2, 5)
|
|
567
586
|
elif self.file_id in ['029498', '031428']:
|
|
568
587
|
self._merge_lines(2, 4)
|
|
569
|
-
elif self.file_id in ['029976', '023067']:
|
|
588
|
+
elif self.file_id in ['029976', '023067', '033576']:
|
|
570
589
|
self._merge_lines(3) # Merge 4th and 5th rows
|
|
571
590
|
elif self.file_id in '026609 029402 032405 022695'.split():
|
|
572
591
|
self._merge_lines(4) # Merge 5th and 6th rows
|
|
573
|
-
elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381']:
|
|
592
|
+
elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381', '033357']:
|
|
574
593
|
self._merge_lines(2, 4)
|
|
575
594
|
elif self.file_id in ['029154', '029163']:
|
|
576
595
|
self._merge_lines(2, 5)
|
|
@@ -591,6 +610,8 @@ class Email(Communication):
|
|
|
591
610
|
self._merge_lines(7, 9)
|
|
592
611
|
elif self.file_id == '030299':
|
|
593
612
|
self._merge_lines(7, 10)
|
|
613
|
+
elif self.file_id in ['022673', '022684']:
|
|
614
|
+
self._merge_lines(9)
|
|
594
615
|
elif self.file_id == '014860':
|
|
595
616
|
self._merge_lines(3)
|
|
596
617
|
self._merge_lines(4)
|
|
@@ -649,6 +670,27 @@ class Email(Communication):
|
|
|
649
670
|
sent_from = sent_from_match.group(0)
|
|
650
671
|
return 'S' + sent_from[1:] if sent_from.startswith('sent') else sent_from
|
|
651
672
|
|
|
673
|
+
def _set_config_for_extracted_file(self, extracted_from_doc_cfg: DocCfg) -> None:
|
|
674
|
+
"""Copy info from original config for file this document was extracted from."""
|
|
675
|
+
if self.file_id in ALL_FILE_CONFIGS:
|
|
676
|
+
self.config = cast(EmailCfg, deepcopy(ALL_FILE_CONFIGS[self.file_id]))
|
|
677
|
+
self.warn(f"Merging existing config for {self.file_id} with config for file this document was extracted from")
|
|
678
|
+
else:
|
|
679
|
+
self.config = EmailCfg(id=self.file_id)
|
|
680
|
+
|
|
681
|
+
extracted_from_description = extracted_from_doc_cfg.complete_description()
|
|
682
|
+
|
|
683
|
+
if extracted_from_description:
|
|
684
|
+
extracted_description = f"{APPEARS_IN} {extracted_from_description}"
|
|
685
|
+
|
|
686
|
+
if self.config.description:
|
|
687
|
+
self.warn(f"Overwriting description '{self.config.description}' with extract description '{self.config.description}'")
|
|
688
|
+
|
|
689
|
+
self.config.description = extracted_description
|
|
690
|
+
|
|
691
|
+
self.config.is_interesting = self.config.is_interesting or extracted_from_doc_cfg.is_interesting
|
|
692
|
+
self.warn(f"Constructed synthetic config: {self.config}")
|
|
693
|
+
|
|
652
694
|
def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
|
|
653
695
|
logger.debug(f"Printing '{self.filename}'...")
|
|
654
696
|
yield self.file_info_panel()
|
|
@@ -45,7 +45,7 @@ class TextMessage:
|
|
|
45
45
|
self.author_str = self.author_str or self.author
|
|
46
46
|
|
|
47
47
|
if not self.id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
|
|
48
|
-
self.author_str
|
|
48
|
+
self.author_str += ' (?)'
|
|
49
49
|
|
|
50
50
|
def timestamp(self) -> datetime:
|
|
51
51
|
return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
|
|
@@ -39,7 +39,7 @@ class JsonFile(OtherFile):
|
|
|
39
39
|
return JSON
|
|
40
40
|
|
|
41
41
|
def info_txt(self) -> Text | None:
|
|
42
|
-
return Text(f"JSON file,
|
|
42
|
+
return Text(f"JSON file, contains preview data for links sent a messaging app", style=INFO_STYLE)
|
|
43
43
|
|
|
44
44
|
def is_interesting(self):
|
|
45
45
|
return False
|
|
@@ -16,7 +16,7 @@ from epstein_files.util.data import iso_timestamp, listify, sort_dict
|
|
|
16
16
|
from epstein_files.util.doc_cfg import Metadata, TextCfg
|
|
17
17
|
from epstein_files.util.highlighted_group import get_style_for_name
|
|
18
18
|
from epstein_files.util.logging import logger
|
|
19
|
-
from epstein_files.util.rich import build_table, highlighter
|
|
19
|
+
from epstein_files.util.rich import LAST_TIMESTAMP_STYLE, build_table, highlighter
|
|
20
20
|
|
|
21
21
|
CONFIRMED_MSG = 'Found confirmed counterparty'
|
|
22
22
|
GUESSED_MSG = 'This is probably a conversation with'
|
|
@@ -76,7 +76,7 @@ class MessengerLog(Communication):
|
|
|
76
76
|
is_phone_number = author_str.startswith('+')
|
|
77
77
|
|
|
78
78
|
if is_phone_number:
|
|
79
|
-
logger.
|
|
79
|
+
logger.info(f"{self.summary()} Found phone number: {author_str}")
|
|
80
80
|
self.phone_number = author_str
|
|
81
81
|
|
|
82
82
|
# If the Sender: is redacted or if it's an unredacted phone number that means it's from self.author
|
|
@@ -130,7 +130,7 @@ class MessengerLog(Communication):
|
|
|
130
130
|
counts_table.add_column('Files', justify='right', style='white')
|
|
131
131
|
counts_table.add_column("Msgs", justify='right')
|
|
132
132
|
counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
|
|
133
|
-
counts_table.add_column('Last Sent At', justify='center', style=
|
|
133
|
+
counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE, width=21)
|
|
134
134
|
counts_table.add_column('Days', justify='right', style='dim')
|
|
135
135
|
|
|
136
136
|
for name, count in sort_dict(cls.count_authors(imessage_logs)):
|
|
@@ -107,7 +107,7 @@ UNINTERESTING_PREFIXES = FINANCIAL_REPORTS_AUTHORS + [
|
|
|
107
107
|
TEXT_OF_US_LAW,
|
|
108
108
|
TRANSLATION,
|
|
109
109
|
TWEET,
|
|
110
|
-
|
|
110
|
+
REAL_DEAL_ARTICLE,
|
|
111
111
|
TRUMP_DISCLOSURES,
|
|
112
112
|
UBS_CIO_REPORT,
|
|
113
113
|
UN_GENERAL_ASSEMBLY,
|
|
@@ -240,7 +240,7 @@ class OtherFile(Document):
|
|
|
240
240
|
table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
|
|
241
241
|
|
|
242
242
|
for file in files:
|
|
243
|
-
link_and_info = [file.
|
|
243
|
+
link_and_info = [file.external_links()]
|
|
244
244
|
date_str = file.date_str()
|
|
245
245
|
|
|
246
246
|
if file.is_duplicate():
|
epstein_files/epstein_files.py
CHANGED
|
@@ -23,12 +23,12 @@ from epstein_files.util.constant.strings import *
|
|
|
23
23
|
from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL, epstein_media_person_url,
|
|
24
24
|
epsteinify_name_url, epstein_web_person_url, search_jmail_url, search_twitter_url)
|
|
25
25
|
from epstein_files.util.constants import *
|
|
26
|
-
from epstein_files.util.data import dict_sets_to_lists, json_safe, listify, sort_dict
|
|
26
|
+
from epstein_files.util.data import dict_sets_to_lists, iso_timestamp, json_safe, listify, sort_dict
|
|
27
27
|
from epstein_files.util.doc_cfg import EmailCfg, Metadata
|
|
28
28
|
from epstein_files.util.env import DOCS_DIR, args, logger
|
|
29
29
|
from epstein_files.util.file_helper import file_size_str
|
|
30
30
|
from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
|
|
31
|
-
from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, add_cols_to_table,
|
|
31
|
+
from epstein_files.util.rich import (DEFAULT_NAME_STYLE, LAST_TIMESTAMP_STYLE, NA_TXT, add_cols_to_table,
|
|
32
32
|
build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
|
|
33
33
|
print_other_site_link, print_panel, print_section_header, vertically_pad)
|
|
34
34
|
from epstein_files.util.search_result import SearchResult
|
|
@@ -278,25 +278,40 @@ class EpsteinFiles:
|
|
|
278
278
|
def print_emailer_counts_table(self) -> None:
|
|
279
279
|
footer = f"Identified authors of {self.attributed_email_count():,} out of {len(self.emails):,} emails ."
|
|
280
280
|
counts_table = build_table("Email Counts", caption=footer)
|
|
281
|
-
|
|
281
|
+
|
|
282
|
+
add_cols_to_table(counts_table, [
|
|
283
|
+
'Name',
|
|
284
|
+
'Num',
|
|
285
|
+
'Sent',
|
|
286
|
+
"Recv",
|
|
287
|
+
{'name': 'First', 'highlight': True},
|
|
288
|
+
{'name': 'Last', 'style': LAST_TIMESTAMP_STYLE},
|
|
289
|
+
JMAIL,
|
|
290
|
+
'eMedia',
|
|
291
|
+
'eWeb',
|
|
292
|
+
'Twitter',
|
|
293
|
+
])
|
|
282
294
|
|
|
283
295
|
emailer_counts = {
|
|
284
296
|
emailer: self.email_author_counts[emailer] + self.email_recipient_counts[emailer]
|
|
285
297
|
for emailer in self.all_emailers(True)
|
|
286
298
|
}
|
|
287
299
|
|
|
288
|
-
for
|
|
289
|
-
style = get_style_for_name(
|
|
300
|
+
for name, count in sort_dict(emailer_counts):
|
|
301
|
+
style = get_style_for_name(name, default_style=DEFAULT_NAME_STYLE)
|
|
302
|
+
emails = self.emails_for(name)
|
|
290
303
|
|
|
291
304
|
counts_table.add_row(
|
|
292
|
-
Text.from_markup(link_markup(epsteinify_name_url(
|
|
305
|
+
Text.from_markup(link_markup(epsteinify_name_url(name or UNKNOWN), name or UNKNOWN, style)),
|
|
293
306
|
str(count),
|
|
294
|
-
str(self.email_author_counts[
|
|
295
|
-
str(self.email_recipient_counts[
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
'' if
|
|
299
|
-
'' if
|
|
307
|
+
str(self.email_author_counts[name]),
|
|
308
|
+
str(self.email_recipient_counts[name]),
|
|
309
|
+
emails[0].timestamp_without_seconds(),
|
|
310
|
+
emails[-1].timestamp_without_seconds(),
|
|
311
|
+
'' if name is None else link_text_obj(search_jmail_url(name), JMAIL),
|
|
312
|
+
'' if not is_ok_for_epstein_web(name) else link_text_obj(epstein_media_person_url(name), 'eMedia'),
|
|
313
|
+
'' if not is_ok_for_epstein_web(name) else link_text_obj(epstein_web_person_url(name), 'eWeb'),
|
|
314
|
+
'' if name is None else link_text_obj(search_twitter_url(name), 'search X'),
|
|
300
315
|
)
|
|
301
316
|
|
|
302
317
|
console.print(vertically_pad(counts_table, 2))
|
|
@@ -42,6 +42,7 @@ CECILE_DE_JONGH = 'Cecile de Jongh'
|
|
|
42
42
|
CECILIA_STEEN = 'Cecilia Steen'
|
|
43
43
|
CELINA_DUBIN = 'Celina Dubin'
|
|
44
44
|
CHRISTINA_GALBRAITH = 'Christina Galbraith' # Works with Tyler Shears on reputation stuff
|
|
45
|
+
DANGENE_AND_JENNIE_ENTERPRISE = 'Dangene and Jennie Enterprise'
|
|
45
46
|
DANIEL_SABBA = 'Daniel Sabba'
|
|
46
47
|
DANIEL_SIAD = 'Daniel Siad'
|
|
47
48
|
DANNY_FROST = 'Danny Frost'
|
|
@@ -143,7 +144,7 @@ REID_HOFFMAN = 'Reid Hoffman'
|
|
|
143
144
|
REID_WEINGARTEN = 'Reid Weingarten'
|
|
144
145
|
RENATA_BOLOTOVA = 'Renata Bolotova'
|
|
145
146
|
RICHARD_KAHN = 'Richard Kahn'
|
|
146
|
-
|
|
147
|
+
ROBERT_D_CRITTON_JR = 'Robert D. Critton Jr.'
|
|
147
148
|
ROBERT_LAWRENCE_KUHN = 'Robert Lawrence Kuhn'
|
|
148
149
|
ROBERT_TRIVERS = 'Robert Trivers'
|
|
149
150
|
ROGER_SCHANK = 'Roger Schank'
|
|
@@ -178,6 +179,7 @@ JARED_KUSHNER = 'Jared Kushner'
|
|
|
178
179
|
JULIE_K_BROWN = 'Julie K. Brown'
|
|
179
180
|
KARIM_SADJADPOUR = 'KARIM SADJADPOUR'.title()
|
|
180
181
|
MICHAEL_J_BOCCIO = 'Michael J. Boccio'
|
|
182
|
+
NERIO_ALESSANDRI = 'Nerio Alessandri (Founder and Chairman of Technogym S.p.A. Italy)'
|
|
181
183
|
PAUL_G_CASSELL = 'Paul G. Cassell'
|
|
182
184
|
RUDY_GIULIANI = 'Rudy Giuliani'
|
|
183
185
|
TULSI_GABBARD = 'Tulsi Gabbard'
|
|
@@ -226,22 +228,23 @@ NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
|
|
|
226
228
|
# Names to color white in the word counts
|
|
227
229
|
OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
|
|
228
230
|
aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
|
|
229
|
-
baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bruno bryant burton
|
|
231
|
+
baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
|
|
230
232
|
chapman charles charlie christopher clint cohen colin collins conway
|
|
231
|
-
davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
|
|
233
|
+
danny davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
|
|
232
234
|
edmond elizabeth emily entwistle erik evelyn
|
|
233
|
-
ferguson flachsbart francis franco frank
|
|
235
|
+
ferguson flachsbart francis franco frank frost
|
|
234
236
|
gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
|
|
235
|
-
hancock harold harrison harry helen hirsch hofstadter horowitz hussein
|
|
237
|
+
hancock harold harrison harry hay helen hill hirsch hofstadter horowitz hussein
|
|
236
238
|
ian isaac isaacson
|
|
237
|
-
jamie jane janet jason jen jim joe johnson jones josh julie justin
|
|
239
|
+
james jamie jane janet jason jen jim joe johnson jones josh julie justin
|
|
238
240
|
karl kate kathy kelly kim kruger kyle
|
|
239
|
-
leo leonard lenny leslie lieberman louis lynch lynn
|
|
241
|
+
laurie leo leonard lenny leslie lieberman louis lynch lynn
|
|
240
242
|
marcus marianne matt matthew melissa michele michelle moore moscowitz
|
|
241
|
-
nicole nussbaum
|
|
243
|
+
nancy nicole nussbaum
|
|
244
|
+
owen
|
|
242
245
|
paulson philippe
|
|
243
246
|
rafael ray richard richardson rob robin ron rubin rudolph ryan
|
|
244
|
-
sara sarah seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
|
|
247
|
+
sara sarah sean seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
|
|
245
248
|
ted theresa thompson tiffany timothy tony
|
|
246
249
|
valeria
|
|
247
250
|
walter warren weinstein weiss william
|
|
@@ -20,7 +20,7 @@ POLITICS = 'politics'
|
|
|
20
20
|
PROPERTY = 'property'
|
|
21
21
|
PUBLICIST = 'publicist'
|
|
22
22
|
REPUTATION = 'reputation'
|
|
23
|
-
SKYPE_LOG= '
|
|
23
|
+
SKYPE_LOG = 'Skype log'
|
|
24
24
|
SOCIAL = 'social'
|
|
25
25
|
SPEECH = 'speech'
|
|
26
26
|
|
|
@@ -39,6 +39,7 @@ MIAMI_HERALD = 'Miami Herald'
|
|
|
39
39
|
NYT = "New York Times"
|
|
40
40
|
PALM_BEACH_DAILY_NEWS = f'{PALM_BEACH} Daily News'
|
|
41
41
|
PALM_BEACH_POST = f'{PALM_BEACH} Post'
|
|
42
|
+
SHIMON_POST = 'The Shimon Post'
|
|
42
43
|
THE_REAL_DEAL = 'The Real Deal'
|
|
43
44
|
WAPO = 'WaPo'
|
|
44
45
|
VI_DAILY_NEWS = f'{VIRGIN_ISLANDS} Daily News'
|
|
@@ -13,11 +13,12 @@ ARCHIVE_LINK_COLOR = 'slate_blue3'
|
|
|
13
13
|
TEXT_LINK = 'text_link'
|
|
14
14
|
|
|
15
15
|
# External site names
|
|
16
|
-
ExternalSite = Literal['epstein.media', 'epsteinify', 'EpsteinWeb']
|
|
16
|
+
ExternalSite = Literal['epstein.media', 'epsteinify', 'EpsteinWeb', 'RollCall']
|
|
17
17
|
EPSTEIN_MEDIA = 'epstein.media'
|
|
18
18
|
EPSTEIN_WEB = 'EpsteinWeb'
|
|
19
19
|
EPSTEINIFY = 'epsteinify'
|
|
20
20
|
JMAIL = 'Jmail'
|
|
21
|
+
ROLLCALL = 'RollCall'
|
|
21
22
|
|
|
22
23
|
GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
|
|
23
24
|
GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
|
|
@@ -41,9 +42,10 @@ EPSTEIN_WEB_URL = 'https://epsteinweb.org'
|
|
|
41
42
|
JMAIL_URL = 'https://jmail.world'
|
|
42
43
|
|
|
43
44
|
DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
|
|
44
|
-
EPSTEIN_MEDIA: f"{EPSTEIN_MEDIA_URL}/files",
|
|
45
|
-
EPSTEIN_WEB: f'{EPSTEIN_WEB_URL}/wp-content/uploads/epstein_evidence/images',
|
|
46
|
-
EPSTEINIFY: f"{EPSTEINIFY_URL}/document",
|
|
45
|
+
EPSTEIN_MEDIA: f"{EPSTEIN_MEDIA_URL}/files/",
|
|
46
|
+
EPSTEIN_WEB: f'{EPSTEIN_WEB_URL}/wp-content/uploads/epstein_evidence/images/',
|
|
47
|
+
EPSTEINIFY: f"{EPSTEINIFY_URL}/document/",
|
|
48
|
+
ROLLCALL: f'https://rollcall.com/factbase/epstein/file?id=',
|
|
47
49
|
}
|
|
48
50
|
|
|
49
51
|
|
|
@@ -53,7 +55,7 @@ epsteinify_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_ma
|
|
|
53
55
|
epsteinify_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEINIFY], file_stem)
|
|
54
56
|
epsteinify_name_url = lambda name: f"{EPSTEINIFY_URL}/?name={urllib.parse.quote(name)}"
|
|
55
57
|
|
|
56
|
-
epstein_media_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEIN_MEDIA], file_stem,
|
|
58
|
+
epstein_media_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEIN_MEDIA], file_stem, 'lower')
|
|
57
59
|
epstein_media_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEIN_MEDIA, filename_or_id, style)
|
|
58
60
|
epstein_media_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_markup(epstein_media_doc_link_markup(filename_or_id, style))
|
|
59
61
|
epstein_media_person_url = lambda person: f"{EPSTEIN_MEDIA_URL}/people/{parameterize(person)}"
|
|
@@ -62,16 +64,19 @@ epstein_web_doc_url = lambda file_stem: f"{DOC_LINK_BASE_URLS[EPSTEIN_WEB]}/{fil
|
|
|
62
64
|
epstein_web_person_url = lambda person: f"{EPSTEIN_WEB_URL}/{parameterize(person)}"
|
|
63
65
|
epstein_web_search_url = lambda s: f"{EPSTEIN_WEB_URL}/?ewmfileq={urllib.parse.quote(s)}&ewmfilepp=20"
|
|
64
66
|
|
|
67
|
+
rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL], file_stem, 'title')
|
|
68
|
+
|
|
65
69
|
search_archive_url = lambda txt: f"{COURIER_NEWSROOM_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
|
|
66
70
|
search_coffeezilla_url = lambda txt: f"{COFFEEZILLA_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
|
|
67
71
|
search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
|
|
68
72
|
search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
|
|
69
73
|
|
|
70
74
|
|
|
71
|
-
def build_doc_url(base_url: str, filename_or_id: int | str,
|
|
75
|
+
def build_doc_url(base_url: str, filename_or_id: int | str, case: Literal['lower', 'title'] | None = None) -> str:
|
|
72
76
|
file_stem = coerce_file_stem(filename_or_id)
|
|
73
|
-
file_stem = file_stem.lower() if
|
|
74
|
-
|
|
77
|
+
file_stem = file_stem.lower() if case == 'lower' else file_stem
|
|
78
|
+
file_stem = file_stem.title() if case == 'title' else file_stem
|
|
79
|
+
return f"{base_url}{file_stem}"
|
|
75
80
|
|
|
76
81
|
|
|
77
82
|
def external_doc_link_markup(site: ExternalSite, filename_or_id: int | str, style: str = TEXT_LINK) -> str:
|