epstein-files 1.0.11__tar.gz → 1.0.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {epstein_files-1.0.11 → epstein_files-1.0.13}/PKG-INFO +1 -1
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/__init__.py +3 -3
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/communication.py +2 -2
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/document.py +43 -69
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/email.py +48 -6
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/imessage/text_message.py +1 -1
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/json_file.py +1 -1
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/messenger_log.py +3 -3
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/other_file.py +2 -2
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/epstein_files.py +27 -12
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/constant/names.py +12 -9
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/constant/strings.py +2 -1
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/constant/urls.py +13 -8
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/constants.py +21 -15
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/data.py +1 -1
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/doc_cfg.py +20 -42
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/file_helper.py +3 -9
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/highlighted_group.py +32 -21
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/logging.py +1 -1
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/output.py +1 -1
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/rich.py +11 -2
- {epstein_files-1.0.11 → epstein_files-1.0.13}/pyproject.toml +1 -1
- {epstein_files-1.0.11 → epstein_files-1.0.13}/LICENSE +0 -0
- {epstein_files-1.0.11 → epstein_files-1.0.13}/README.md +0 -0
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/emails/email_header.py +0 -0
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/constant/common_words.py +0 -0
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/constant/html.py +0 -0
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/constant/output_files.py +0 -0
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/env.py +0 -0
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/search_result.py +0 -0
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/timer.py +0 -0
- {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/word_count.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: epstein-files
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.13
|
|
4
4
|
Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
|
|
5
5
|
Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
|
|
6
6
|
License: GPL-3.0-or-later
|
|
@@ -20,8 +20,8 @@ from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_
|
|
|
20
20
|
from epstein_files.util.env import args, specified_names
|
|
21
21
|
from epstein_files.util.file_helper import coerce_file_path, extract_file_id
|
|
22
22
|
from epstein_files.util.logging import logger
|
|
23
|
-
from epstein_files.util.output import (print_emails, print_json_files,
|
|
24
|
-
print_text_messages, write_urls)
|
|
23
|
+
from epstein_files.util.output import (print_emails, print_json_files, print_json_stats,
|
|
24
|
+
print_text_messages, write_json_metadata, write_urls)
|
|
25
25
|
from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
|
|
26
26
|
from epstein_files.util.timer import Timer
|
|
27
27
|
from epstein_files.util.word_count import write_word_counts_html
|
|
@@ -37,7 +37,7 @@ def generate_html() -> None:
|
|
|
37
37
|
epstein_files = EpsteinFiles.get_files(timer)
|
|
38
38
|
|
|
39
39
|
if args.json_metadata:
|
|
40
|
-
|
|
40
|
+
write_json_metadata(epstein_files)
|
|
41
41
|
exit()
|
|
42
42
|
elif args.json_files:
|
|
43
43
|
print_json_files(epstein_files)
|
|
@@ -34,9 +34,9 @@ class Communication(Document):
|
|
|
34
34
|
def is_attribution_uncertain(self) -> bool:
|
|
35
35
|
return bool(self.config and self.config.is_attribution_uncertain)
|
|
36
36
|
|
|
37
|
-
def
|
|
37
|
+
def external_links(self, _style: str = '', include_alt_links: bool = True) -> Text:
|
|
38
38
|
"""Overrides super() method to apply self.author_style."""
|
|
39
|
-
return super().
|
|
39
|
+
return super().external_links(self.author_style, include_alt_links=include_alt_links)
|
|
40
40
|
|
|
41
41
|
def summary(self) -> Text:
|
|
42
42
|
return self._summary().append(CLOSE_PROPERTIES_CHAR)
|
|
@@ -5,7 +5,7 @@ from dataclasses import asdict, dataclass, field
|
|
|
5
5
|
from datetime import datetime
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from subprocess import run
|
|
8
|
-
from typing import ClassVar, Sequence, TypeVar
|
|
8
|
+
from typing import Callable, ClassVar, Sequence, TypeVar
|
|
9
9
|
|
|
10
10
|
from rich.console import Console, ConsoleOptions, Group, RenderResult
|
|
11
11
|
from rich.padding import Padding
|
|
@@ -16,8 +16,8 @@ from epstein_files.util.constant.names import *
|
|
|
16
16
|
from epstein_files.util.constant.strings import *
|
|
17
17
|
from epstein_files.util.constant.urls import *
|
|
18
18
|
from epstein_files.util.constants import ALL_FILE_CONFIGS, FALLBACK_TIMESTAMP
|
|
19
|
-
from epstein_files.util.data import collapse_newlines, date_str,
|
|
20
|
-
from epstein_files.util.doc_cfg import EmailCfg, DocCfg, Metadata, TextCfg
|
|
19
|
+
from epstein_files.util.data import collapse_newlines, date_str, patternize, remove_zero_time_from_timestamp_str, without_falsey
|
|
20
|
+
from epstein_files.util.doc_cfg import DUPE_TYPE_STRS, EmailCfg, DocCfg, Metadata, TextCfg
|
|
21
21
|
from epstein_files.util.env import DOCS_DIR, args
|
|
22
22
|
from epstein_files.util.file_helper import (file_stem_for_id, extract_file_id, file_size,
|
|
23
23
|
file_size_str, is_local_extract_file)
|
|
@@ -31,10 +31,8 @@ INFO_INDENT = 2
|
|
|
31
31
|
INFO_PADDING = (0, 0, 0, INFO_INDENT)
|
|
32
32
|
MAX_TOP_LINES_LEN = 4000 # Only for logging
|
|
33
33
|
MIN_DOCUMENT_ID = 10477
|
|
34
|
-
LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
|
|
35
34
|
WHITESPACE_REGEX = re.compile(r"\s{2,}|\t|\n", re.MULTILINE)
|
|
36
35
|
|
|
37
|
-
EXTRACTED_FROM = 'Extracted from'
|
|
38
36
|
MIN_TIMESTAMP = datetime(1991, 1, 1)
|
|
39
37
|
MID_TIMESTAMP = datetime(2007, 1, 1)
|
|
40
38
|
MAX_TIMESTAMP = datetime(2020, 1, 1)
|
|
@@ -96,15 +94,9 @@ class Document:
|
|
|
96
94
|
def __post_init__(self):
|
|
97
95
|
self.filename = self.file_path.name
|
|
98
96
|
self.file_id = extract_file_id(self.filename)
|
|
99
|
-
self.config = deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
|
|
97
|
+
self.config = self.config or deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
|
|
100
98
|
|
|
101
|
-
if self
|
|
102
|
-
self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
|
|
103
|
-
extracted_from_doc_id = self.url_slug.split('_')[-1]
|
|
104
|
-
|
|
105
|
-
if extracted_from_doc_id in ALL_FILE_CONFIGS:
|
|
106
|
-
self._set_extract_config(deepcopy(ALL_FILE_CONFIGS[extracted_from_doc_id]))
|
|
107
|
-
else:
|
|
99
|
+
if 'url_slug' not in vars(self):
|
|
108
100
|
self.url_slug = self.file_path.stem
|
|
109
101
|
|
|
110
102
|
self._set_computed_fields(text=self.text or self._load_file())
|
|
@@ -122,28 +114,51 @@ class Document:
|
|
|
122
114
|
|
|
123
115
|
def duplicate_file_txt(self) -> Text:
|
|
124
116
|
"""If the file is a dupe make a nice message to explain what file it's a duplicate of."""
|
|
125
|
-
if not self.config or not self.config.dupe_of_id:
|
|
117
|
+
if not self.config or not self.config.dupe_of_id or self.config.dupe_type is None:
|
|
126
118
|
raise RuntimeError(f"duplicate_file_txt() called on {self.summary()} but not a dupe! config:\n\n{self.config}")
|
|
127
119
|
|
|
128
120
|
txt = Text(f"Not showing ", style=INFO_STYLE).append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
|
|
129
|
-
txt.append(f" because it's {self.config.
|
|
121
|
+
txt.append(f" because it's {DUPE_TYPE_STRS[self.config.dupe_type]} ")
|
|
130
122
|
return txt.append(epstein_media_doc_link_txt(self.config.dupe_of_id, style='royal_blue1'))
|
|
131
123
|
|
|
132
124
|
def epsteinify_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
|
|
133
|
-
|
|
134
|
-
return link_text_obj(epsteinify_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
|
|
125
|
+
return self.external_url(epsteinify_doc_url, style, link_txt)
|
|
135
126
|
|
|
136
127
|
def epstein_media_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
|
|
137
|
-
|
|
138
|
-
return link_text_obj(epstein_media_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
|
|
128
|
+
return self.external_url(epstein_media_doc_url, style, link_txt)
|
|
139
129
|
|
|
140
130
|
def epstein_web_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
|
|
141
|
-
|
|
142
|
-
|
|
131
|
+
return self.external_url(epstein_web_doc_url, style, link_txt)
|
|
132
|
+
|
|
133
|
+
def rollcall_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
|
|
134
|
+
return self.external_url(rollcall_doc_url, style, link_txt)
|
|
135
|
+
|
|
136
|
+
def external_url(self, fxn: Callable[[str], str], style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
|
|
137
|
+
return link_text_obj(fxn(self.url_slug), link_txt or self.file_path.stem, style)
|
|
138
|
+
|
|
139
|
+
def external_links(self, style: str = '', include_alt_links: bool = False) -> Text:
|
|
140
|
+
"""Returns colored links to epstein.media and and epsteinweb in a Text object."""
|
|
141
|
+
txt = Text('', style='white' if include_alt_links else ARCHIVE_LINK_COLOR)
|
|
142
|
+
|
|
143
|
+
if args.use_epstein_web:
|
|
144
|
+
txt.append(self.epstein_web_link(style=style))
|
|
145
|
+
alt_link = self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)
|
|
146
|
+
else:
|
|
147
|
+
txt.append(self.epstein_media_link(style=style))
|
|
148
|
+
alt_link = self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)
|
|
149
|
+
|
|
150
|
+
if include_alt_links:
|
|
151
|
+
txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
|
|
152
|
+
txt.append(' (').append(alt_link).append(')')
|
|
153
|
+
|
|
154
|
+
if self._class_name() == 'Email':
|
|
155
|
+
txt.append(' (').append(self.rollcall_link(style='white dim', link_txt=ROLLCALL)).append(')')
|
|
156
|
+
|
|
157
|
+
return txt
|
|
143
158
|
|
|
144
159
|
def file_info_panel(self) -> Group:
|
|
145
160
|
"""Panel with filename linking to raw file plus any additional info about the file."""
|
|
146
|
-
panel = Panel(self.
|
|
161
|
+
panel = Panel(self.external_links(include_alt_links=True), border_style=self._border_style(), expand=False)
|
|
147
162
|
padded_info = [Padding(sentence, INFO_PADDING) for sentence in self.info()]
|
|
148
163
|
return Group(*([panel] + padded_info))
|
|
149
164
|
|
|
@@ -155,12 +170,10 @@ class Document:
|
|
|
155
170
|
|
|
156
171
|
def info(self) -> list[Text]:
|
|
157
172
|
"""0 to 2 sentences containing the info_txt() as well as any configured description."""
|
|
158
|
-
|
|
173
|
+
return without_falsey([
|
|
159
174
|
self.info_txt(),
|
|
160
175
|
highlighter(Text(self.config_description(), style=INFO_STYLE)) if self.config_description() else None
|
|
161
|
-
]
|
|
162
|
-
|
|
163
|
-
return without_falsey(sentences)
|
|
176
|
+
])
|
|
164
177
|
|
|
165
178
|
def info_txt(self) -> Text | None:
|
|
166
179
|
"""Secondary info about this file (recipients, level of certainty, etc). Overload in subclasses."""
|
|
@@ -197,9 +210,9 @@ class Document:
|
|
|
197
210
|
|
|
198
211
|
if self.is_local_extract_file():
|
|
199
212
|
metadata['extracted_file'] = {
|
|
200
|
-
'explanation': '
|
|
201
|
-
'
|
|
202
|
-
'
|
|
213
|
+
'explanation': 'Manually extracted from one of the court filings.',
|
|
214
|
+
'extracted_from': self.url_slug + '.txt',
|
|
215
|
+
'url': extracted_file_url(self.filename),
|
|
203
216
|
}
|
|
204
217
|
|
|
205
218
|
return metadata
|
|
@@ -208,25 +221,6 @@ class Document:
|
|
|
208
221
|
with open(self.file_path) as f:
|
|
209
222
|
return f.read()
|
|
210
223
|
|
|
211
|
-
def raw_document_link_txt(self, style: str = '', include_alt_link: bool = False) -> Text:
|
|
212
|
-
"""Returns colored links to epstein.media and and epsteinweb in a Text object."""
|
|
213
|
-
txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
|
|
214
|
-
|
|
215
|
-
if args.use_epstein_web:
|
|
216
|
-
txt.append(self.epstein_web_link(style=style))
|
|
217
|
-
|
|
218
|
-
if include_alt_link:
|
|
219
|
-
txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
|
|
220
|
-
txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
|
|
221
|
-
else:
|
|
222
|
-
txt.append(self.epstein_media_link(style=style))
|
|
223
|
-
|
|
224
|
-
if include_alt_link:
|
|
225
|
-
txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
|
|
226
|
-
txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
|
|
227
|
-
|
|
228
|
-
return txt
|
|
229
|
-
|
|
230
224
|
def repair_ocr_text(self, repairs: dict[str | re.Pattern, str], text: str) -> str:
|
|
231
225
|
"""Apply a dict of repairs (key is pattern or string, value is replacement string) to text."""
|
|
232
226
|
for k, v in repairs.items():
|
|
@@ -253,7 +247,7 @@ class Document:
|
|
|
253
247
|
txt.append(f" {self.url_slug}", style=FILENAME_STYLE)
|
|
254
248
|
|
|
255
249
|
if self.timestamp:
|
|
256
|
-
timestamp_str =
|
|
250
|
+
timestamp_str = remove_zero_time_from_timestamp_str(self.timestamp).replace('T', ' ')
|
|
257
251
|
txt.append(' (', style=SYMBOL_STYLE)
|
|
258
252
|
txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
|
|
259
253
|
|
|
@@ -327,26 +321,6 @@ class Document:
|
|
|
327
321
|
self.lines = [line.strip() if self.strip_whitespace else line for line in self.text.split('\n')]
|
|
328
322
|
self.num_lines = len(self.lines)
|
|
329
323
|
|
|
330
|
-
def _set_extract_config(self, doc_cfg: DocCfg | EmailCfg) -> None:
|
|
331
|
-
"""Copy info from original config for file this document was extracted from."""
|
|
332
|
-
if self.config:
|
|
333
|
-
self.warn(f"Merging existing config with config for file this document was extracted from")
|
|
334
|
-
else:
|
|
335
|
-
self.config = EmailCfg(id=self.file_id)
|
|
336
|
-
|
|
337
|
-
extracted_from_description = doc_cfg.complete_description()
|
|
338
|
-
|
|
339
|
-
if extracted_from_description:
|
|
340
|
-
extracted_description = f"{EXTRACTED_FROM} {extracted_from_description}"
|
|
341
|
-
|
|
342
|
-
if self.config.description:
|
|
343
|
-
self.warn(f"Overwriting description '{self.config.description}' with extract description '{doc_cfg.description}'")
|
|
344
|
-
|
|
345
|
-
self.config.description = extracted_description
|
|
346
|
-
|
|
347
|
-
self.config.is_interesting = self.config.is_interesting or doc_cfg.is_interesting
|
|
348
|
-
self.warn(f"Constructed local config\n{self.config}")
|
|
349
|
-
|
|
350
324
|
def _write_clean_text(self, output_path: Path) -> None:
|
|
351
325
|
"""Write self.text to 'output_path'. Used only for diffing files."""
|
|
352
326
|
if output_path.exists():
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import re
|
|
3
|
+
from copy import deepcopy
|
|
3
4
|
from dataclasses import asdict, dataclass, field
|
|
4
5
|
from datetime import datetime
|
|
5
6
|
from typing import ClassVar, cast
|
|
@@ -21,6 +22,7 @@ from epstein_files.util.constants import *
|
|
|
21
22
|
from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes, extract_last_name,
|
|
22
23
|
flatten, remove_timezone, uniquify)
|
|
23
24
|
from epstein_files.util.doc_cfg import EmailCfg, Metadata
|
|
25
|
+
from epstein_files.util.file_helper import extract_file_id, file_stem_for_id
|
|
24
26
|
from epstein_files.util.highlighted_group import get_style_for_name
|
|
25
27
|
from epstein_files.util.logging import logger
|
|
26
28
|
from epstein_files.util.rich import *
|
|
@@ -35,9 +37,11 @@ REPLY_TEXT_REGEX = re.compile(rf"^(.*?){REPLY_LINE_PATTERN}", re.DOTALL | re.IGN
|
|
|
35
37
|
BAD_TIMEZONE_REGEX = re.compile(fr'\((UTC|GMT\+\d\d:\d\d)\)|{REDACTED}')
|
|
36
38
|
DATE_HEADER_REGEX = re.compile(r'(?:Date|Sent):? +(?!by|from|to|via)([^\n]{6,})\n')
|
|
37
39
|
TIMESTAMP_LINE_REGEX = re.compile(r"\d+:\d+")
|
|
40
|
+
LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
|
|
38
41
|
|
|
39
42
|
SUPPRESS_LOGS_FOR_AUTHORS = ['Undisclosed recipients:', 'undisclosed-recipients:', 'Multiple Senders Multiple Senders']
|
|
40
43
|
REWRITTEN_HEADER_MSG = "(janky OCR header fields were prettified, check source if something seems off)"
|
|
44
|
+
APPEARS_IN = 'Appears in'
|
|
41
45
|
MAX_CHARS_TO_PRINT = 4000
|
|
42
46
|
MAX_NUM_HEADER_LINES = 14
|
|
43
47
|
MAX_QUOTED_REPLIES = 2
|
|
@@ -128,7 +132,6 @@ JUNK_EMAILERS = [
|
|
|
128
132
|
'How To Academy',
|
|
129
133
|
'Jokeland',
|
|
130
134
|
JP_MORGAN_USGIO,
|
|
131
|
-
'Saved by Internet Explorer 11',
|
|
132
135
|
]
|
|
133
136
|
|
|
134
137
|
MAILING_LISTS = [
|
|
@@ -248,6 +251,7 @@ KRASSNER_RECIPIENTS = uniquify(flatten([ALL_FILE_CONFIGS[id].recipients for id i
|
|
|
248
251
|
|
|
249
252
|
# No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
|
|
250
253
|
USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIPIENTS + [
|
|
254
|
+
'Alan Dlugash', # CCed with Richard Kahn
|
|
251
255
|
'Alan Rogers', # Random CC
|
|
252
256
|
'Andrew Friendly', # Presumably some relation of Kelly Friendly
|
|
253
257
|
'BS Stern', # A random fwd of email we have
|
|
@@ -264,14 +268,14 @@ USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIP
|
|
|
264
268
|
'Lyn Fontanilla', # Random CC
|
|
265
269
|
'Mark Albert', # Random CC
|
|
266
270
|
'Matthew Schafer', # Random CC
|
|
271
|
+
MICHAEL_BUCHHOLTZ, # Terry Kafka CC
|
|
272
|
+
'Nancy Dahl', # covered by Lawrence Krauss (her husband)
|
|
267
273
|
'Michael Simmons', # Random CC
|
|
268
274
|
'Nancy Portland', # Lawrence Krauss CC
|
|
269
275
|
'Oliver Goodenough', # Robert Trivers CC
|
|
270
|
-
'Owen Blicksilver', # Landon Thomas CC
|
|
271
276
|
'Peter Aldhous', # Lawrence Krauss CC
|
|
272
277
|
'Sam Harris', # Lawrence Krauss CC
|
|
273
278
|
SAMUEL_LEFF, # Random CC
|
|
274
|
-
"Saved by Internet Explorer 11",
|
|
275
279
|
'Sean T Lehane', # Random CC
|
|
276
280
|
'Stephen Rubin', # Random CC
|
|
277
281
|
'Tim Kane', # Random CC
|
|
@@ -318,6 +322,17 @@ class Email(Communication):
|
|
|
318
322
|
rewritten_header_ids: ClassVar[set[str]] = set([])
|
|
319
323
|
|
|
320
324
|
def __post_init__(self):
|
|
325
|
+
self.filename = self.file_path.name
|
|
326
|
+
self.file_id = extract_file_id(self.filename)
|
|
327
|
+
|
|
328
|
+
# Special handling for copying properties out of the config for the document this one was extracted from
|
|
329
|
+
if self.is_local_extract_file():
|
|
330
|
+
self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
|
|
331
|
+
extracted_from_doc_id = self.url_slug.split('_')[-1]
|
|
332
|
+
|
|
333
|
+
if extracted_from_doc_id in ALL_FILE_CONFIGS:
|
|
334
|
+
self._set_config_for_extracted_file(ALL_FILE_CONFIGS[extracted_from_doc_id])
|
|
335
|
+
|
|
321
336
|
super().__post_init__()
|
|
322
337
|
|
|
323
338
|
try:
|
|
@@ -340,8 +355,12 @@ class Email(Communication):
|
|
|
340
355
|
self.actual_text = self._actual_text()
|
|
341
356
|
self.sent_from_device = self._sent_from_device()
|
|
342
357
|
|
|
358
|
+
def attachments(self) -> list[str]:
|
|
359
|
+
return (self.header.attachments or '').split(';')
|
|
360
|
+
|
|
343
361
|
def info_txt(self) -> Text:
|
|
344
|
-
|
|
362
|
+
email_type = 'fwded article' if self.is_fwded_article() else 'email'
|
|
363
|
+
txt = Text(f"OCR text of {email_type} from ", style='grey46').append(self.author_txt).append(' to ')
|
|
345
364
|
return txt.append(self._recipients_txt()).append(highlighter(f" probably sent at {self.timestamp}"))
|
|
346
365
|
|
|
347
366
|
def is_fwded_article(self) -> bool:
|
|
@@ -566,11 +585,11 @@ class Email(Communication):
|
|
|
566
585
|
self._merge_lines(2, 5)
|
|
567
586
|
elif self.file_id in ['029498', '031428']:
|
|
568
587
|
self._merge_lines(2, 4)
|
|
569
|
-
elif self.file_id in ['029976', '023067']:
|
|
588
|
+
elif self.file_id in ['029976', '023067', '033576']:
|
|
570
589
|
self._merge_lines(3) # Merge 4th and 5th rows
|
|
571
590
|
elif self.file_id in '026609 029402 032405 022695'.split():
|
|
572
591
|
self._merge_lines(4) # Merge 5th and 6th rows
|
|
573
|
-
elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381']:
|
|
592
|
+
elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381', '033357']:
|
|
574
593
|
self._merge_lines(2, 4)
|
|
575
594
|
elif self.file_id in ['029154', '029163']:
|
|
576
595
|
self._merge_lines(2, 5)
|
|
@@ -591,6 +610,8 @@ class Email(Communication):
|
|
|
591
610
|
self._merge_lines(7, 9)
|
|
592
611
|
elif self.file_id == '030299':
|
|
593
612
|
self._merge_lines(7, 10)
|
|
613
|
+
elif self.file_id in ['022673', '022684']:
|
|
614
|
+
self._merge_lines(9)
|
|
594
615
|
elif self.file_id == '014860':
|
|
595
616
|
self._merge_lines(3)
|
|
596
617
|
self._merge_lines(4)
|
|
@@ -649,6 +670,27 @@ class Email(Communication):
|
|
|
649
670
|
sent_from = sent_from_match.group(0)
|
|
650
671
|
return 'S' + sent_from[1:] if sent_from.startswith('sent') else sent_from
|
|
651
672
|
|
|
673
|
+
def _set_config_for_extracted_file(self, extracted_from_doc_cfg: DocCfg) -> None:
|
|
674
|
+
"""Copy info from original config for file this document was extracted from."""
|
|
675
|
+
if self.file_id in ALL_FILE_CONFIGS:
|
|
676
|
+
self.config = cast(EmailCfg, deepcopy(ALL_FILE_CONFIGS[self.file_id]))
|
|
677
|
+
self.warn(f"Merging existing config for {self.file_id} with config for file this document was extracted from")
|
|
678
|
+
else:
|
|
679
|
+
self.config = EmailCfg(id=self.file_id)
|
|
680
|
+
|
|
681
|
+
extracted_from_description = extracted_from_doc_cfg.complete_description()
|
|
682
|
+
|
|
683
|
+
if extracted_from_description:
|
|
684
|
+
extracted_description = f"{APPEARS_IN} {extracted_from_description}"
|
|
685
|
+
|
|
686
|
+
if self.config.description:
|
|
687
|
+
self.warn(f"Overwriting description '{self.config.description}' with extract description '{self.config.description}'")
|
|
688
|
+
|
|
689
|
+
self.config.description = extracted_description
|
|
690
|
+
|
|
691
|
+
self.config.is_interesting = self.config.is_interesting or extracted_from_doc_cfg.is_interesting
|
|
692
|
+
self.warn(f"Constructed synthetic config: {self.config}")
|
|
693
|
+
|
|
652
694
|
def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
|
|
653
695
|
logger.debug(f"Printing '{self.filename}'...")
|
|
654
696
|
yield self.file_info_panel()
|
{epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/imessage/text_message.py
RENAMED
|
@@ -45,7 +45,7 @@ class TextMessage:
|
|
|
45
45
|
self.author_str = self.author_str or self.author
|
|
46
46
|
|
|
47
47
|
if not self.id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
|
|
48
|
-
self.author_str
|
|
48
|
+
self.author_str += ' (?)'
|
|
49
49
|
|
|
50
50
|
def timestamp(self) -> datetime:
|
|
51
51
|
return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
|
|
@@ -39,7 +39,7 @@ class JsonFile(OtherFile):
|
|
|
39
39
|
return JSON
|
|
40
40
|
|
|
41
41
|
def info_txt(self) -> Text | None:
|
|
42
|
-
return Text(f"JSON file,
|
|
42
|
+
return Text(f"JSON file, contains preview data for links sent a messaging app", style=INFO_STYLE)
|
|
43
43
|
|
|
44
44
|
def is_interesting(self):
|
|
45
45
|
return False
|
|
@@ -16,7 +16,7 @@ from epstein_files.util.data import iso_timestamp, listify, sort_dict
|
|
|
16
16
|
from epstein_files.util.doc_cfg import Metadata, TextCfg
|
|
17
17
|
from epstein_files.util.highlighted_group import get_style_for_name
|
|
18
18
|
from epstein_files.util.logging import logger
|
|
19
|
-
from epstein_files.util.rich import build_table, highlighter
|
|
19
|
+
from epstein_files.util.rich import LAST_TIMESTAMP_STYLE, build_table, highlighter
|
|
20
20
|
|
|
21
21
|
CONFIRMED_MSG = 'Found confirmed counterparty'
|
|
22
22
|
GUESSED_MSG = 'This is probably a conversation with'
|
|
@@ -76,7 +76,7 @@ class MessengerLog(Communication):
|
|
|
76
76
|
is_phone_number = author_str.startswith('+')
|
|
77
77
|
|
|
78
78
|
if is_phone_number:
|
|
79
|
-
logger.
|
|
79
|
+
logger.info(f"{self.summary()} Found phone number: {author_str}")
|
|
80
80
|
self.phone_number = author_str
|
|
81
81
|
|
|
82
82
|
# If the Sender: is redacted or if it's an unredacted phone number that means it's from self.author
|
|
@@ -130,7 +130,7 @@ class MessengerLog(Communication):
|
|
|
130
130
|
counts_table.add_column('Files', justify='right', style='white')
|
|
131
131
|
counts_table.add_column("Msgs", justify='right')
|
|
132
132
|
counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
|
|
133
|
-
counts_table.add_column('Last Sent At', justify='center', style=
|
|
133
|
+
counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE, width=21)
|
|
134
134
|
counts_table.add_column('Days', justify='right', style='dim')
|
|
135
135
|
|
|
136
136
|
for name, count in sort_dict(cls.count_authors(imessage_logs)):
|
|
@@ -107,7 +107,7 @@ UNINTERESTING_PREFIXES = FINANCIAL_REPORTS_AUTHORS + [
|
|
|
107
107
|
TEXT_OF_US_LAW,
|
|
108
108
|
TRANSLATION,
|
|
109
109
|
TWEET,
|
|
110
|
-
|
|
110
|
+
REAL_DEAL_ARTICLE,
|
|
111
111
|
TRUMP_DISCLOSURES,
|
|
112
112
|
UBS_CIO_REPORT,
|
|
113
113
|
UN_GENERAL_ASSEMBLY,
|
|
@@ -240,7 +240,7 @@ class OtherFile(Document):
|
|
|
240
240
|
table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
|
|
241
241
|
|
|
242
242
|
for file in files:
|
|
243
|
-
link_and_info = [file.
|
|
243
|
+
link_and_info = [file.external_links()]
|
|
244
244
|
date_str = file.date_str()
|
|
245
245
|
|
|
246
246
|
if file.is_duplicate():
|
|
@@ -23,12 +23,12 @@ from epstein_files.util.constant.strings import *
|
|
|
23
23
|
from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL, epstein_media_person_url,
|
|
24
24
|
epsteinify_name_url, epstein_web_person_url, search_jmail_url, search_twitter_url)
|
|
25
25
|
from epstein_files.util.constants import *
|
|
26
|
-
from epstein_files.util.data import dict_sets_to_lists, json_safe, listify, sort_dict
|
|
26
|
+
from epstein_files.util.data import dict_sets_to_lists, iso_timestamp, json_safe, listify, sort_dict
|
|
27
27
|
from epstein_files.util.doc_cfg import EmailCfg, Metadata
|
|
28
28
|
from epstein_files.util.env import DOCS_DIR, args, logger
|
|
29
29
|
from epstein_files.util.file_helper import file_size_str
|
|
30
30
|
from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
|
|
31
|
-
from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, add_cols_to_table,
|
|
31
|
+
from epstein_files.util.rich import (DEFAULT_NAME_STYLE, LAST_TIMESTAMP_STYLE, NA_TXT, add_cols_to_table,
|
|
32
32
|
build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
|
|
33
33
|
print_other_site_link, print_panel, print_section_header, vertically_pad)
|
|
34
34
|
from epstein_files.util.search_result import SearchResult
|
|
@@ -278,25 +278,40 @@ class EpsteinFiles:
|
|
|
278
278
|
def print_emailer_counts_table(self) -> None:
|
|
279
279
|
footer = f"Identified authors of {self.attributed_email_count():,} out of {len(self.emails):,} emails ."
|
|
280
280
|
counts_table = build_table("Email Counts", caption=footer)
|
|
281
|
-
|
|
281
|
+
|
|
282
|
+
add_cols_to_table(counts_table, [
|
|
283
|
+
'Name',
|
|
284
|
+
'Num',
|
|
285
|
+
'Sent',
|
|
286
|
+
"Recv",
|
|
287
|
+
{'name': 'First', 'highlight': True},
|
|
288
|
+
{'name': 'Last', 'style': LAST_TIMESTAMP_STYLE},
|
|
289
|
+
JMAIL,
|
|
290
|
+
'eMedia',
|
|
291
|
+
'eWeb',
|
|
292
|
+
'Twitter',
|
|
293
|
+
])
|
|
282
294
|
|
|
283
295
|
emailer_counts = {
|
|
284
296
|
emailer: self.email_author_counts[emailer] + self.email_recipient_counts[emailer]
|
|
285
297
|
for emailer in self.all_emailers(True)
|
|
286
298
|
}
|
|
287
299
|
|
|
288
|
-
for
|
|
289
|
-
style = get_style_for_name(
|
|
300
|
+
for name, count in sort_dict(emailer_counts):
|
|
301
|
+
style = get_style_for_name(name, default_style=DEFAULT_NAME_STYLE)
|
|
302
|
+
emails = self.emails_for(name)
|
|
290
303
|
|
|
291
304
|
counts_table.add_row(
|
|
292
|
-
Text.from_markup(link_markup(epsteinify_name_url(
|
|
305
|
+
Text.from_markup(link_markup(epsteinify_name_url(name or UNKNOWN), name or UNKNOWN, style)),
|
|
293
306
|
str(count),
|
|
294
|
-
str(self.email_author_counts[
|
|
295
|
-
str(self.email_recipient_counts[
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
'' if
|
|
299
|
-
'' if
|
|
307
|
+
str(self.email_author_counts[name]),
|
|
308
|
+
str(self.email_recipient_counts[name]),
|
|
309
|
+
emails[0].timestamp_without_seconds(),
|
|
310
|
+
emails[-1].timestamp_without_seconds(),
|
|
311
|
+
'' if name is None else link_text_obj(search_jmail_url(name), JMAIL),
|
|
312
|
+
'' if not is_ok_for_epstein_web(name) else link_text_obj(epstein_media_person_url(name), 'eMedia'),
|
|
313
|
+
'' if not is_ok_for_epstein_web(name) else link_text_obj(epstein_web_person_url(name), 'eWeb'),
|
|
314
|
+
'' if name is None else link_text_obj(search_twitter_url(name), 'search X'),
|
|
300
315
|
)
|
|
301
316
|
|
|
302
317
|
console.print(vertically_pad(counts_table, 2))
|
|
@@ -42,6 +42,7 @@ CECILE_DE_JONGH = 'Cecile de Jongh'
|
|
|
42
42
|
CECILIA_STEEN = 'Cecilia Steen'
|
|
43
43
|
CELINA_DUBIN = 'Celina Dubin'
|
|
44
44
|
CHRISTINA_GALBRAITH = 'Christina Galbraith' # Works with Tyler Shears on reputation stuff
|
|
45
|
+
DANGENE_AND_JENNIE_ENTERPRISE = 'Dangene and Jennie Enterprise'
|
|
45
46
|
DANIEL_SABBA = 'Daniel Sabba'
|
|
46
47
|
DANIEL_SIAD = 'Daniel Siad'
|
|
47
48
|
DANNY_FROST = 'Danny Frost'
|
|
@@ -143,7 +144,7 @@ REID_HOFFMAN = 'Reid Hoffman'
|
|
|
143
144
|
REID_WEINGARTEN = 'Reid Weingarten'
|
|
144
145
|
RENATA_BOLOTOVA = 'Renata Bolotova'
|
|
145
146
|
RICHARD_KAHN = 'Richard Kahn'
|
|
146
|
-
|
|
147
|
+
ROBERT_D_CRITTON_JR = 'Robert D. Critton Jr.'
|
|
147
148
|
ROBERT_LAWRENCE_KUHN = 'Robert Lawrence Kuhn'
|
|
148
149
|
ROBERT_TRIVERS = 'Robert Trivers'
|
|
149
150
|
ROGER_SCHANK = 'Roger Schank'
|
|
@@ -178,6 +179,7 @@ JARED_KUSHNER = 'Jared Kushner'
|
|
|
178
179
|
JULIE_K_BROWN = 'Julie K. Brown'
|
|
179
180
|
KARIM_SADJADPOUR = 'KARIM SADJADPOUR'.title()
|
|
180
181
|
MICHAEL_J_BOCCIO = 'Michael J. Boccio'
|
|
182
|
+
NERIO_ALESSANDRI = 'Nerio Alessandri (Founder and Chairman of Technogym S.p.A. Italy)'
|
|
181
183
|
PAUL_G_CASSELL = 'Paul G. Cassell'
|
|
182
184
|
RUDY_GIULIANI = 'Rudy Giuliani'
|
|
183
185
|
TULSI_GABBARD = 'Tulsi Gabbard'
|
|
@@ -226,22 +228,23 @@ NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
|
|
|
226
228
|
# Names to color white in the word counts
|
|
227
229
|
OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
|
|
228
230
|
aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
|
|
229
|
-
baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bruno bryant burton
|
|
231
|
+
baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
|
|
230
232
|
chapman charles charlie christopher clint cohen colin collins conway
|
|
231
|
-
davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
|
|
233
|
+
danny davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
|
|
232
234
|
edmond elizabeth emily entwistle erik evelyn
|
|
233
|
-
ferguson flachsbart francis franco frank
|
|
235
|
+
ferguson flachsbart francis franco frank frost
|
|
234
236
|
gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
|
|
235
|
-
hancock harold harrison harry helen hirsch hofstadter horowitz hussein
|
|
237
|
+
hancock harold harrison harry hay helen hill hirsch hofstadter horowitz hussein
|
|
236
238
|
ian isaac isaacson
|
|
237
|
-
jamie jane janet jason jen jim joe johnson jones josh julie justin
|
|
239
|
+
james jamie jane janet jason jen jim joe johnson jones josh julie justin
|
|
238
240
|
karl kate kathy kelly kim kruger kyle
|
|
239
|
-
leo leonard lenny leslie lieberman louis lynch lynn
|
|
241
|
+
laurie leo leonard lenny leslie lieberman louis lynch lynn
|
|
240
242
|
marcus marianne matt matthew melissa michele michelle moore moscowitz
|
|
241
|
-
nicole nussbaum
|
|
243
|
+
nancy nicole nussbaum
|
|
244
|
+
owen
|
|
242
245
|
paulson philippe
|
|
243
246
|
rafael ray richard richardson rob robin ron rubin rudolph ryan
|
|
244
|
-
sara sarah seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
|
|
247
|
+
sara sarah sean seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
|
|
245
248
|
ted theresa thompson tiffany timothy tony
|
|
246
249
|
valeria
|
|
247
250
|
walter warren weinstein weiss william
|
|
@@ -20,7 +20,7 @@ POLITICS = 'politics'
|
|
|
20
20
|
PROPERTY = 'property'
|
|
21
21
|
PUBLICIST = 'publicist'
|
|
22
22
|
REPUTATION = 'reputation'
|
|
23
|
-
SKYPE_LOG= '
|
|
23
|
+
SKYPE_LOG = 'Skype log'
|
|
24
24
|
SOCIAL = 'social'
|
|
25
25
|
SPEECH = 'speech'
|
|
26
26
|
|
|
@@ -39,6 +39,7 @@ MIAMI_HERALD = 'Miami Herald'
|
|
|
39
39
|
NYT = "New York Times"
|
|
40
40
|
PALM_BEACH_DAILY_NEWS = f'{PALM_BEACH} Daily News'
|
|
41
41
|
PALM_BEACH_POST = f'{PALM_BEACH} Post'
|
|
42
|
+
SHIMON_POST = 'The Shimon Post'
|
|
42
43
|
THE_REAL_DEAL = 'The Real Deal'
|
|
43
44
|
WAPO = 'WaPo'
|
|
44
45
|
VI_DAILY_NEWS = f'{VIRGIN_ISLANDS} Daily News'
|
|
@@ -13,11 +13,12 @@ ARCHIVE_LINK_COLOR = 'slate_blue3'
|
|
|
13
13
|
TEXT_LINK = 'text_link'
|
|
14
14
|
|
|
15
15
|
# External site names
|
|
16
|
-
ExternalSite = Literal['epstein.media', 'epsteinify', 'EpsteinWeb']
|
|
16
|
+
ExternalSite = Literal['epstein.media', 'epsteinify', 'EpsteinWeb', 'RollCall']
|
|
17
17
|
EPSTEIN_MEDIA = 'epstein.media'
|
|
18
18
|
EPSTEIN_WEB = 'EpsteinWeb'
|
|
19
19
|
EPSTEINIFY = 'epsteinify'
|
|
20
20
|
JMAIL = 'Jmail'
|
|
21
|
+
ROLLCALL = 'RollCall'
|
|
21
22
|
|
|
22
23
|
GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
|
|
23
24
|
GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
|
|
@@ -41,9 +42,10 @@ EPSTEIN_WEB_URL = 'https://epsteinweb.org'
|
|
|
41
42
|
JMAIL_URL = 'https://jmail.world'
|
|
42
43
|
|
|
43
44
|
DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
|
|
44
|
-
EPSTEIN_MEDIA: f"{EPSTEIN_MEDIA_URL}/files",
|
|
45
|
-
EPSTEIN_WEB: f'{EPSTEIN_WEB_URL}/wp-content/uploads/epstein_evidence/images',
|
|
46
|
-
EPSTEINIFY: f"{EPSTEINIFY_URL}/document",
|
|
45
|
+
EPSTEIN_MEDIA: f"{EPSTEIN_MEDIA_URL}/files/",
|
|
46
|
+
EPSTEIN_WEB: f'{EPSTEIN_WEB_URL}/wp-content/uploads/epstein_evidence/images/',
|
|
47
|
+
EPSTEINIFY: f"{EPSTEINIFY_URL}/document/",
|
|
48
|
+
ROLLCALL: f'https://rollcall.com/factbase/epstein/file?id=',
|
|
47
49
|
}
|
|
48
50
|
|
|
49
51
|
|
|
@@ -53,7 +55,7 @@ epsteinify_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_ma
|
|
|
53
55
|
epsteinify_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEINIFY], file_stem)
|
|
54
56
|
epsteinify_name_url = lambda name: f"{EPSTEINIFY_URL}/?name={urllib.parse.quote(name)}"
|
|
55
57
|
|
|
56
|
-
epstein_media_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEIN_MEDIA], file_stem,
|
|
58
|
+
epstein_media_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEIN_MEDIA], file_stem, 'lower')
|
|
57
59
|
epstein_media_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEIN_MEDIA, filename_or_id, style)
|
|
58
60
|
epstein_media_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_markup(epstein_media_doc_link_markup(filename_or_id, style))
|
|
59
61
|
epstein_media_person_url = lambda person: f"{EPSTEIN_MEDIA_URL}/people/{parameterize(person)}"
|
|
@@ -62,16 +64,19 @@ epstein_web_doc_url = lambda file_stem: f"{DOC_LINK_BASE_URLS[EPSTEIN_WEB]}/{fil
|
|
|
62
64
|
epstein_web_person_url = lambda person: f"{EPSTEIN_WEB_URL}/{parameterize(person)}"
|
|
63
65
|
epstein_web_search_url = lambda s: f"{EPSTEIN_WEB_URL}/?ewmfileq={urllib.parse.quote(s)}&ewmfilepp=20"
|
|
64
66
|
|
|
67
|
+
rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL], file_stem, 'title')
|
|
68
|
+
|
|
65
69
|
search_archive_url = lambda txt: f"{COURIER_NEWSROOM_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
|
|
66
70
|
search_coffeezilla_url = lambda txt: f"{COFFEEZILLA_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
|
|
67
71
|
search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
|
|
68
72
|
search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
|
|
69
73
|
|
|
70
74
|
|
|
71
|
-
def build_doc_url(base_url: str, filename_or_id: int | str,
|
|
75
|
+
def build_doc_url(base_url: str, filename_or_id: int | str, case: Literal['lower', 'title'] | None = None) -> str:
|
|
72
76
|
file_stem = coerce_file_stem(filename_or_id)
|
|
73
|
-
file_stem = file_stem.lower() if
|
|
74
|
-
|
|
77
|
+
file_stem = file_stem.lower() if case == 'lower' else file_stem
|
|
78
|
+
file_stem = file_stem.title() if case == 'title' else file_stem
|
|
79
|
+
return f"{base_url}{file_stem}"
|
|
75
80
|
|
|
76
81
|
|
|
77
82
|
def external_doc_link_markup(site: ExternalSite, filename_or_id: int | str, style: str = TEXT_LINK) -> str:
|
|
@@ -65,8 +65,8 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
65
65
|
BORIS_NIKOLIC: re.compile(r'(boris )?nikolic?', re.IGNORECASE),
|
|
66
66
|
BRAD_EDWARDS: re.compile(r'Brad(ley)?(\s*J(.?|ames))?\s*Edwards', re.IGNORECASE),
|
|
67
67
|
BRAD_KARP: re.compile(r'Brad (S.? )?Karp|Karp, Brad', re.IGNORECASE),
|
|
68
|
-
|
|
69
|
-
DANNY_FROST: re.compile(r'Frost, Danny|frostd@dany.nyc.gov', re.IGNORECASE),
|
|
68
|
+
DANGENE_AND_JENNIE_ENTERPRISE: re.compile(r'Dangene and Jennie Enterprise?', re.IGNORECASE),
|
|
69
|
+
DANNY_FROST: re.compile(r'Frost, Danny|frostd@dany.nyc.gov|Danny\s*Frost', re.IGNORECASE),
|
|
70
70
|
DARREN_INDYKE: re.compile(r'darren$|Darren\s*(K\.?\s*)?[il]n[dq]_?yke?|dkiesq', re.IGNORECASE),
|
|
71
71
|
DAVID_FISZEL: re.compile(r'David\s*Fis?zel', re.IGNORECASE),
|
|
72
72
|
DAVID_HAIG: re.compile(fr'{DAVID_HAIG}|Haig, David', re.IGNORECASE),
|
|
@@ -128,7 +128,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
128
128
|
PRINCE_ANDREW: re.compile(r'Prince Andrew|The Duke', re.IGNORECASE),
|
|
129
129
|
REID_WEINGARTEN: re.compile(r'Weingarten, Rei[cdi]|Rei[cdi] Weingarten', re.IGNORECASE),
|
|
130
130
|
RICHARD_KAHN: re.compile(r'rich(ard)? kahn?', re.IGNORECASE),
|
|
131
|
-
|
|
131
|
+
ROBERT_D_CRITTON_JR: re.compile(r'Robert D.? Critton Jr.?', re.IGNORECASE),
|
|
132
132
|
ROBERT_LAWRENCE_KUHN: re.compile(r'Robert\s*(Lawrence)?\s*Kuhn', re.IGNORECASE),
|
|
133
133
|
ROBERT_TRIVERS: re.compile(r'tri[vy]ersr@gmail|Robert\s*Trivers?', re.IGNORECASE),
|
|
134
134
|
ROSS_GOW: re.compile(fr"{ROSS_GOW}|ross@acuityreputation.com", re.IGNORECASE),
|
|
@@ -163,6 +163,7 @@ EMAILERS = [
|
|
|
163
163
|
DEEPAK_CHOPRA,
|
|
164
164
|
GLENN_DUBIN,
|
|
165
165
|
GORDON_GETTY,
|
|
166
|
+
'Kevin Bright',
|
|
166
167
|
'Jack Lang',
|
|
167
168
|
JACK_SCAROLA,
|
|
168
169
|
JAY_LEFKOWITZ,
|
|
@@ -257,7 +258,6 @@ JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
|
|
|
257
258
|
LAWRENCE_KRAUSS_ASU_ORIGINS = f"{LAWRENCE_KRAUSS}'s ASU Origins Project"
|
|
258
259
|
KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
|
|
259
260
|
MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
|
|
260
|
-
NERIO_ALESSANDRI = 'Nerio Alessandri (Founder and Chairman of Technogym S.p.A. Italy)'
|
|
261
261
|
NIGHT_FLIGHT_BOOK = f'"Night Flight" (draft)'
|
|
262
262
|
NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
|
|
263
263
|
OBAMA_JOKE = 'joke about Obama'
|
|
@@ -265,12 +265,11 @@ PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} Code Enforcement'
|
|
|
265
265
|
PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
|
|
266
266
|
PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
|
|
267
267
|
PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
|
|
268
|
-
|
|
268
|
+
REAL_DEAL_ARTICLE = 'article by Keith Larsen'
|
|
269
269
|
SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
|
|
270
270
|
SINGLE_PAGE = 'single page of'
|
|
271
271
|
STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
|
|
272
272
|
SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
|
|
273
|
-
THE_REAL_DEAL_ARTICLE = 'article by Keith Larsen'
|
|
274
273
|
TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
|
|
275
274
|
UBS_CIO_REPORT = 'CIO Monthly Extended report'
|
|
276
275
|
UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
|
|
@@ -500,7 +499,7 @@ EMAILS_CONFIG = [
|
|
|
500
499
|
EmailCfg(
|
|
501
500
|
id='029977',
|
|
502
501
|
author=LAWRANCE_VISOSKI,
|
|
503
|
-
recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE,
|
|
502
|
+
recipients=cast(list[str | None], [JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE),
|
|
504
503
|
attribution_reason=LARRY_REASON,
|
|
505
504
|
duplicate_ids=['031129'],
|
|
506
505
|
),
|
|
@@ -508,7 +507,7 @@ EMAILS_CONFIG = [
|
|
|
508
507
|
EmailCfg(id='033488', author=LAWRANCE_VISOSKI, duplicate_ids=['033154']),
|
|
509
508
|
EmailCfg(id='033309', author=LINDA_STONE, attribution_reason='"Co-authored with iPhone autocorrect"'),
|
|
510
509
|
EmailCfg(id='017581', author='Lisa Randall', attribution_reason='reply header'),
|
|
511
|
-
EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd'),
|
|
510
|
+
EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd, Mark Green is in signature'),
|
|
512
511
|
EmailCfg(id='030472', author=MARTIN_WEINBERG, attribution_reason='Maybe. in reply', is_attribution_uncertain=True),
|
|
513
512
|
EmailCfg(id='030235', author=MELANIE_WALKER, attribution_reason='In fwd'),
|
|
514
513
|
EmailCfg(id='032343', author=MELANIE_WALKER, attribution_reason='Name seen in later reply 032346'),
|
|
@@ -573,7 +572,7 @@ EMAILS_CONFIG = [
|
|
|
573
572
|
attribution_reason='ends with "Respectfully, terry"',
|
|
574
573
|
author=TERRY_KAFKA,
|
|
575
574
|
fwded_text_after='From: Mike Cohen',
|
|
576
|
-
recipients=[JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS,
|
|
575
|
+
recipients=cast(list[str | None], [JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS),
|
|
577
576
|
duplicate_ids=['028482'],
|
|
578
577
|
),
|
|
579
578
|
EmailCfg(id='029992', author=TERRY_KAFKA, attribution_reason='Quoted reply'),
|
|
@@ -665,6 +664,10 @@ EMAILS_CONFIG = [
|
|
|
665
664
|
EmailCfg(id='029849', is_fwded_article=True, duplicate_ids=['033482']), # Fareed Zakaria: Trump sells America short),
|
|
666
665
|
EmailCfg(id='032023', is_fwded_article=True, duplicate_ids=['032012']), # American-Israeli Cooperative Enterprise Newsletter
|
|
667
666
|
EmailCfg(id='021758', is_fwded_article=True, duplicate_ids=['030616']), # Radar Online article about Epstein's early prison release
|
|
667
|
+
EmailCfg(id='031774', is_fwded_article=True), # Krassner fwd of Palmer Report article
|
|
668
|
+
EmailCfg(id='033345', is_fwded_article=True), # Krassner fwd of Palmer Report article
|
|
669
|
+
EmailCfg(id='029903', is_fwded_article=True), # Krassner fwd of Ann Coulter article about Epstein
|
|
670
|
+
EmailCfg(id='030266', is_fwded_article=True), # Krassner fwd of article about Dershowitz
|
|
668
671
|
EmailCfg(id='030868', is_fwded_article=True), # 'He doesn't like this sh*t': Trump reportedly hates his job and his staff after 1 month
|
|
669
672
|
EmailCfg(id='026755', is_fwded_article=True), # HuffPo
|
|
670
673
|
EmailCfg(id='016218', is_fwded_article=True), # AT&T confirms it paid Trump lawyer Cohen for insights on Trump
|
|
@@ -710,6 +713,8 @@ EMAILS_CONFIG = [
|
|
|
710
713
|
EmailCfg(id='033311', is_fwded_article=True), # 2016 election polls
|
|
711
714
|
EmailCfg(id='026580', is_fwded_article=True), # NPR: Antigua: Land Of Sun, Sand, And Super Cheap
|
|
712
715
|
EmailCfg(id='031340', is_fwded_article=True), # Article about Alex Jones threatening Robert Mueller
|
|
716
|
+
EmailCfg(id='030209', is_fwded_article=True), # Atlantic Council Syria: Blackberry Diplomacy
|
|
717
|
+
EmailCfg(id='026605', is_fwded_article=True), # Article about Ruemmler turning down attorney general job by NEDRA PICKLER
|
|
713
718
|
EmailCfg(id='033297', is_fwded_article=True, duplicate_ids=['033586']), # Sultan Sulayem fwding article about Trump and Russia
|
|
714
719
|
EmailCfg(id='032475', timestamp=parse('2017-02-15 13:31:25')),
|
|
715
720
|
EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
|
|
@@ -852,9 +857,9 @@ EMAILS_CONFIG = [
|
|
|
852
857
|
EmailCfg(id='030015', fwded_text_after='Bill Clinton reportedly'),
|
|
853
858
|
EmailCfg(id='026312', fwded_text_after='Steve Bannon trying to get on disgraced'),
|
|
854
859
|
EmailCfg(id='031742', fwded_text_after="Trump's former campaign manager Paul Manafort"),
|
|
855
|
-
EmailCfg(id='012197_4', fwded_text_after="Thanks -- Jay"),
|
|
856
860
|
EmailCfg(id='028925', fwded_text_after='> on Jan 4, 2015'),
|
|
857
861
|
EmailCfg(id='029773', fwded_text_after='Omar Quadhafi', duplicate_ids=['012685']),
|
|
862
|
+
EmailCfg(id='012197_4', fwded_text_after="Thanks -- Jay"),
|
|
858
863
|
]
|
|
859
864
|
|
|
860
865
|
|
|
@@ -925,6 +930,7 @@ OTHER_FILES_ARTICLES = [
|
|
|
925
930
|
DocCfg(id='029865', author=LA_TIMES, description=f"front page article about {DEEPAK_CHOPRA} and young Iranians", date='2016-11-05'),
|
|
926
931
|
DocCfg(id='026598', author=LA_TIMES, description=f"op-ed about why America needs a Ministry of Culture"),
|
|
927
932
|
DocCfg(id='027024', author=LA_TIMES, description=f"Scientists Create Human Embryos to Make Stem Cells", date='2013-05-15'),
|
|
933
|
+
DocCfg(id='022811', author='Law.com', description='Sarah Ransome Identifies Herself in Epstein Sex Trafficking Case', date='2018-01-09'),
|
|
928
934
|
DocCfg(id='031776', author='Law360', description=f"article about Michael Avenatti by Andrew Strickler"),
|
|
929
935
|
DocCfg(id='023102', author=f'Litigation Daily', description=f"article about {REID_WEINGARTEN}", date='2015-09-04'),
|
|
930
936
|
DocCfg(id='029340', author=f'MarketWatch', description=f'article about estate taxes, particularly Epstein\'s favoured GRATs'),
|
|
@@ -1186,7 +1192,7 @@ OTHER_FILES_LEGAL = [
|
|
|
1186
1192
|
]
|
|
1187
1193
|
|
|
1188
1194
|
OTHER_FILES_CONFERENCES = [
|
|
1189
|
-
DocCfg(id='014315', author=BOFA_MERRILL, description=f'2016 Future of Financials Conference'),
|
|
1195
|
+
DocCfg(id='014315', author=BOFA_MERRILL, description=f'2016 Future of Financials Conference, attached to 014312'),
|
|
1190
1196
|
DocCfg(id='026825', author=DEUTSCHE_BANK, description=f"Asset & Wealth Management featured speaker bios"), # Really "Deutsche Asset" which may not be Deutsche Bank?
|
|
1191
1197
|
DocCfg(id='023123', author=LAWRENCE_KRAUSS_ASU_ORIGINS, description=f"{STRANGE_BEDFELLOWS} (old draft)"),
|
|
1192
1198
|
DocCfg(id='023120', author=LAWRENCE_KRAUSS_ASU_ORIGINS, description=STRANGE_BEDFELLOWS, duplicate_ids=['023121'], dupe_type='earlier'),
|
|
@@ -1335,8 +1341,8 @@ OTHER_FILES_PROPERTY = [
|
|
|
1335
1341
|
DocCfg(id='016554', author=PALM_BEACH_CODE_ENFORCEMENT, description='board minutes', date='2008-07-17', duplicate_ids=['016616', '016574']),
|
|
1336
1342
|
DocCfg(id='016636', author=PALM_BEACH_WATER_COMMITTEE, description=f"Meeting on January 29, 2009"),
|
|
1337
1343
|
DocCfg(id='022417', author='Park Partners NYC', description=f"letter to partners in real estate project with architectural plans"),
|
|
1338
|
-
DocCfg(id='027068', author=THE_REAL_DEAL, description=f"{
|
|
1339
|
-
DocCfg(id='029520', author=THE_REAL_DEAL, description=f"{
|
|
1344
|
+
DocCfg(id='027068', author=THE_REAL_DEAL, description=f"{REAL_DEAL_ARTICLE} Palm House Hotel Bankruptcy and EB-5 Visa Fraud Allegations"),
|
|
1345
|
+
DocCfg(id='029520', author=THE_REAL_DEAL, description=f"{REAL_DEAL_ARTICLE} 'Lost Paradise at the Palm House'", date='2019-06-17'),
|
|
1340
1346
|
DocCfg(id='016597', author='Trump Properties LLC', description=f'appeal of some decision about Mar-a-Lago by {PALM_BEACH} authorities'),
|
|
1341
1347
|
DocCfg(id='018743', description=f"Las Vegas property listing"),
|
|
1342
1348
|
DocCfg(id='016695', description=f"{PALM_BEACH} property info (?)"),
|
|
@@ -1497,13 +1503,13 @@ OTHER_FILES_MISC = [
|
|
|
1497
1503
|
DocCfg(id='032206', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
|
|
1498
1504
|
DocCfg(id='032208', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
|
|
1499
1505
|
DocCfg(id='032209', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
|
|
1506
|
+
DocCfg(id='032210', category=SKYPE_LOG, author='linkspirit', is_interesting=True),
|
|
1500
1507
|
DocCfg(
|
|
1501
1508
|
id='018224',
|
|
1502
1509
|
category=SKYPE_LOG,
|
|
1503
|
-
|
|
1510
|
+
author=f'linkspirit (French?) and {LAWRENCE_KRAUSS}',
|
|
1504
1511
|
is_interesting=True, # we don't know who linkspirit is yet
|
|
1505
1512
|
),
|
|
1506
|
-
DocCfg(id='032210', category=SKYPE_LOG, description=f'Skype conversation with linkspirit', is_interesting=True),
|
|
1507
1513
|
DocCfg(
|
|
1508
1514
|
id='025147',
|
|
1509
1515
|
author=BROCKMAN_INC,
|
|
@@ -21,12 +21,12 @@ ALL_NAMES = [v for k, v in vars(names).items() if isinstance(v, str) and CONSTAN
|
|
|
21
21
|
PACIFIC_TZ = tz.gettz("America/Los_Angeles")
|
|
22
22
|
TIMEZONE_INFO = {"PDT": PACIFIC_TZ, "PST": PACIFIC_TZ} # Suppresses annoying warnings from parse() calls
|
|
23
23
|
|
|
24
|
-
|
|
25
24
|
collapse_newlines = lambda text: MULTINEWLINE_REGEX.sub('\n\n', text)
|
|
26
25
|
date_str = lambda dt: dt.isoformat()[0:10] if dt else None
|
|
27
26
|
escape_double_quotes = lambda text: text.replace('"', r'\"')
|
|
28
27
|
escape_single_quotes = lambda text: text.replace("'", r"\'")
|
|
29
28
|
iso_timestamp = lambda dt: dt.isoformat().replace('T', ' ')
|
|
29
|
+
remove_zero_time_from_timestamp_str = lambda dt: dt.isoformat().removesuffix('T00:00:00')
|
|
30
30
|
uniquify = lambda _list: list(set(_list))
|
|
31
31
|
without_falsey = lambda _list: [e for e in _list if e]
|
|
32
32
|
|
|
@@ -8,7 +8,7 @@ from dateutil.parser import parse
|
|
|
8
8
|
|
|
9
9
|
from epstein_files.util.constant.names import *
|
|
10
10
|
from epstein_files.util.constant.strings import *
|
|
11
|
-
from epstein_files.util.data import without_falsey
|
|
11
|
+
from epstein_files.util.data import remove_zero_time_from_timestamp_str, without_falsey
|
|
12
12
|
|
|
13
13
|
DuplicateType = Literal['earlier', 'quoted', 'redacted', 'same']
|
|
14
14
|
Metadata = dict[str, bool | datetime | int | str | list[str | None] |dict[str, bool | str]]
|
|
@@ -47,12 +47,11 @@ FINANCIAL_REPORTS_AUTHORS = [
|
|
|
47
47
|
]
|
|
48
48
|
|
|
49
49
|
# Fields like timestamp and author are better added from the Document object
|
|
50
|
-
|
|
50
|
+
NON_METADATA_FIELDS = [
|
|
51
51
|
'actual_text',
|
|
52
52
|
'date',
|
|
53
53
|
'id',
|
|
54
|
-
'
|
|
55
|
-
'was_generated',
|
|
54
|
+
'is_synthetic',
|
|
56
55
|
]
|
|
57
56
|
|
|
58
57
|
|
|
@@ -68,10 +67,10 @@ class DocCfg:
|
|
|
68
67
|
date (str | None): If passed will be immediated parsed into the 'timestamp' field
|
|
69
68
|
dupe_of_id (str | None): If this is a dupe the ID of the duplicated file. This file will be suppressed
|
|
70
69
|
dupe_type (DuplicateType | None): The type of duplicate this file is or its 'duplicate_ids' are
|
|
71
|
-
duplicate_ids (list[str]):
|
|
70
|
+
duplicate_ids (list[str]): IDs of *other* documents that are dupes of this document
|
|
72
71
|
is_interesting (bool): Override other considerations and always consider this file interesting
|
|
73
72
|
timestamp (datetime | None): Time this email was sent, file was created, article published, etc.
|
|
74
|
-
|
|
73
|
+
is_synthetic (bool): True if this config was generated by the duplicate_cfgs() method
|
|
75
74
|
"""
|
|
76
75
|
id: str
|
|
77
76
|
author: str | None = None
|
|
@@ -82,8 +81,8 @@ class DocCfg:
|
|
|
82
81
|
dupe_type: DuplicateType | None = None
|
|
83
82
|
duplicate_ids: list[str] = field(default_factory=list)
|
|
84
83
|
is_interesting: bool = False
|
|
84
|
+
is_synthetic: bool = False
|
|
85
85
|
timestamp: datetime | None = None
|
|
86
|
-
was_generated: bool = False
|
|
87
86
|
|
|
88
87
|
def __post_init__(self):
|
|
89
88
|
if self.date:
|
|
@@ -94,13 +93,17 @@ class DocCfg:
|
|
|
94
93
|
|
|
95
94
|
def complete_description(self) -> str | None:
|
|
96
95
|
"""String that summarizes what is known about this document."""
|
|
97
|
-
if self.category and not self.description:
|
|
96
|
+
if self.category and not self.description and not self.author:
|
|
98
97
|
return self.category
|
|
99
98
|
elif self.category == REPUTATION:
|
|
100
99
|
return f"{REPUTATION_MGMT}: {self.description}"
|
|
100
|
+
elif self.category == SKYPE_LOG:
|
|
101
|
+
msg = f"{self.category} of conversation with {self.author}" if self.author else self.category
|
|
102
|
+
return f"{msg} {self.description}" if self.description else msg
|
|
101
103
|
elif self.author and self.description:
|
|
102
104
|
if self.category in [ACADEMIA, BOOK]:
|
|
103
|
-
|
|
105
|
+
title = self.description if '"' in self.description else f"'{self.description}'"
|
|
106
|
+
return f"{title} by {self.author}"
|
|
104
107
|
elif self.category == FINANCE and self.author in FINANCIAL_REPORTS_AUTHORS:
|
|
105
108
|
return f"{self.author} report: '{self.description}'"
|
|
106
109
|
elif self.category == LEGAL and 'v.' in self.author:
|
|
@@ -111,10 +114,6 @@ class DocCfg:
|
|
|
111
114
|
pieces = without_falsey([self.author, self.description])
|
|
112
115
|
return ' '.join(pieces) if pieces else None
|
|
113
116
|
|
|
114
|
-
def duplicate_reason(self) -> str | None:
|
|
115
|
-
if self.dupe_type is not None:
|
|
116
|
-
return DUPE_TYPE_STRS[self.dupe_type]
|
|
117
|
-
|
|
118
117
|
def duplicate_cfgs(self) -> Generator['DocCfg', None, None]:
|
|
119
118
|
"""Create synthetic DocCfg objects that set the 'dupe_of_id' field to point back to this object."""
|
|
120
119
|
for id in self.duplicate_ids:
|
|
@@ -123,35 +122,17 @@ class DocCfg:
|
|
|
123
122
|
dupe_cfg.dupe_of_id = self.id
|
|
124
123
|
dupe_cfg.duplicate_ids = []
|
|
125
124
|
dupe_cfg.dupe_type = self.dupe_type
|
|
126
|
-
dupe_cfg.
|
|
125
|
+
dupe_cfg.is_synthetic = True
|
|
127
126
|
yield dupe_cfg
|
|
128
127
|
|
|
129
128
|
def metadata(self) -> Metadata:
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
if self.category in [EMAIL, TEXT_MESSAGE]:
|
|
133
|
-
del non_null_fields['category']
|
|
134
|
-
|
|
135
|
-
return non_null_fields
|
|
136
|
-
|
|
137
|
-
def non_null_field_names(self) -> list[str]:
|
|
138
|
-
return [f.name for f in self.sorted_fields() if getattr(self, f.name)]
|
|
139
|
-
|
|
140
|
-
def sorted_fields(self) -> list[Field]:
|
|
141
|
-
return sorted(fields(self), key=lambda f: FIELD_SORT_KEY.get(f.name, f.name))
|
|
142
|
-
|
|
143
|
-
def title_by_author(self) -> str:
|
|
144
|
-
if not (self.author and self.description):
|
|
145
|
-
raise RuntimeError(f"Can't call title_by_author() without author and description!")
|
|
146
|
-
|
|
147
|
-
title = self.description if '"' in self.description else f"'{self.description}'"
|
|
148
|
-
return f"{title} by {self.author}"
|
|
129
|
+
return {k: v for k, v in asdict(self).items() if k not in NON_METADATA_FIELDS and v}
|
|
149
130
|
|
|
150
131
|
def _props_strs(self) -> list[str]:
|
|
151
132
|
props = []
|
|
152
133
|
add_prop = lambda f, value: props.append(f"{f.name}={value}")
|
|
153
134
|
|
|
154
|
-
for _field in self.
|
|
135
|
+
for _field in sorted(fields(self), key=lambda f: FIELD_SORT_KEY.get(f.name, f.name)):
|
|
155
136
|
value = getattr(self, _field.name)
|
|
156
137
|
|
|
157
138
|
if value is None or value is False or (isinstance(value, list) and len(value) == 0):
|
|
@@ -160,13 +141,13 @@ class DocCfg:
|
|
|
160
141
|
add_prop(_field, constantize_name(str(value)) if CONSTANTIZE_NAMES else f"'{value}'")
|
|
161
142
|
elif _field.name == 'category' and value in [EMAIL, TEXT_MESSAGE]:
|
|
162
143
|
continue
|
|
163
|
-
elif _field.name == 'recipients' and
|
|
144
|
+
elif _field.name == 'recipients' and value:
|
|
164
145
|
recipients_str = str([constantize_name(r) if (CONSTANTIZE_NAMES and r) else r for r in value])
|
|
165
146
|
add_prop(_field, recipients_str.replace("'", '') if CONSTANTIZE_NAMES else recipients_str)
|
|
166
147
|
elif _field.name == 'timestamp' and self.date is not None:
|
|
167
148
|
continue # Don't print both timestamp and date
|
|
168
149
|
elif isinstance(value, datetime):
|
|
169
|
-
value_str =
|
|
150
|
+
value_str = remove_zero_time_from_timestamp_str(value)
|
|
170
151
|
add_prop(_field, f"parse('{value_str}')" if CONSTANTIZE_NAMES else f"'{value}'")
|
|
171
152
|
elif isinstance(value, str):
|
|
172
153
|
if "'" in value:
|
|
@@ -221,18 +202,15 @@ class EmailCfg(CommunicationCfg):
|
|
|
221
202
|
"""
|
|
222
203
|
Attributes:
|
|
223
204
|
actual_text (str | None): In dire cases of broken OCR we just configure the body of the email as a string.
|
|
205
|
+
fwded_text_after (str | None): If set, any text after this is a fwd of an article or similar
|
|
224
206
|
is_fwded_article (bool): True if this is a newspaper article someone fwded. Used to exclude articles from word counting.
|
|
225
207
|
recipients (list[str | None]): Who received the email
|
|
226
208
|
"""
|
|
227
|
-
actual_text: str | None = None
|
|
228
|
-
fwded_text_after: str | None = None
|
|
209
|
+
actual_text: str | None = None
|
|
210
|
+
fwded_text_after: str | None = None
|
|
229
211
|
is_fwded_article: bool = False
|
|
230
212
|
recipients: list[str | None] = field(default_factory=list)
|
|
231
213
|
|
|
232
|
-
@classmethod
|
|
233
|
-
def from_doc_cfg(cls, cfg: DocCfg) -> 'EmailCfg':
|
|
234
|
-
return cls(**asdict(cfg))
|
|
235
|
-
|
|
236
214
|
# This is necessary because for some dumb reason @dataclass(repr=False) doesn't cut it
|
|
237
215
|
def __repr__(self) -> str:
|
|
238
216
|
return super().__repr__()
|
|
@@ -11,8 +11,10 @@ FILENAME_LENGTH = len(HOUSE_OVERSIGHT_PREFIX) + 6
|
|
|
11
11
|
KB = 1024
|
|
12
12
|
MB = KB * KB
|
|
13
13
|
|
|
14
|
+
file_size = lambda file_path: Path(file_path).stat().st_size
|
|
15
|
+
file_size_str = lambda file_path: file_size_to_str(file_size(file_path))
|
|
14
16
|
|
|
15
|
-
# Coerce methods
|
|
17
|
+
# Coerce methods handle both string and int arguments.
|
|
16
18
|
coerce_file_name = lambda filename_or_id: coerce_file_stem(filename_or_id) + '.txt'
|
|
17
19
|
coerce_file_path = lambda filename_or_id: DOCS_DIR.joinpath(coerce_file_name(filename_or_id))
|
|
18
20
|
id_str = lambda id: f"{int(id):06d}"
|
|
@@ -44,14 +46,6 @@ def extract_file_id(filename_or_id: int | str | Path) -> str:
|
|
|
44
46
|
return file_match.group(1)
|
|
45
47
|
|
|
46
48
|
|
|
47
|
-
def file_size(file_path: str | Path) -> int:
|
|
48
|
-
return Path(file_path).stat().st_size
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def file_size_str(file_path: str | Path) -> str:
|
|
52
|
-
return file_size_to_str(file_size(file_path))
|
|
53
|
-
|
|
54
|
-
|
|
55
49
|
def file_size_to_str(size: int) -> str:
|
|
56
50
|
digits = 2
|
|
57
51
|
|
|
@@ -223,6 +223,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
223
223
|
'Linda Pinto': 'interior design at Alberto Pinto Cabinet',
|
|
224
224
|
MERWIN_DELA_CRUZ: None, # HOUSE_OVERSIGHT_032652 Groff says "Jojo and Merwin both requested off Nov. 25 and 26"
|
|
225
225
|
NADIA_MARCINKO: 'pilot',
|
|
226
|
+
'Sean J. Lancaster': 'airplane reseller',
|
|
226
227
|
}
|
|
227
228
|
),
|
|
228
229
|
HighlightedNames(
|
|
@@ -260,6 +261,8 @@ HIGHLIGHTED_NAMES = [
|
|
|
260
261
|
MARTIN_WEINBERG: CRIMINAL_DEFENSE_ATTORNEY,
|
|
261
262
|
MICHAEL_MILLER: 'Steptoe LLP partner',
|
|
262
263
|
REID_WEINGARTEN: 'Steptoe LLP partner',
|
|
264
|
+
ROBERT_D_CRITTON_JR: 'criminal defense attorney',
|
|
265
|
+
'Robert Gold': None,
|
|
263
266
|
'Roy Black': CRIMINAL_DEFENSE_2008,
|
|
264
267
|
SCOTT_J_LINK: None,
|
|
265
268
|
TONJA_HADDAD_COLEMAN: f'{EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY}, maybe daughter of Fred Haddad?',
|
|
@@ -297,28 +300,17 @@ HIGHLIGHTED_NAMES = [
|
|
|
297
300
|
}
|
|
298
301
|
),
|
|
299
302
|
HighlightedNames(
|
|
300
|
-
label=
|
|
301
|
-
style='tan',
|
|
302
|
-
pattern=r"Andrew Farkas|Thomas\s*(J\.?\s*)?Barrack(\s*Jr)?",
|
|
303
|
-
emailers = {
|
|
304
|
-
DAVID_STERN: f'emailed Epstein from Moscow, appears to know chairman of {DEUTSCHE_BANK}',
|
|
305
|
-
JONATHAN_FARKAS: "heir to the Alexander's department store fortune",
|
|
306
|
-
'linkspirit': "Skype username of someone Epstein communicated with",
|
|
307
|
-
'Peter Thomas Roth': 'student of Epstein at Dalton, skincare company founder',
|
|
308
|
-
STEPHEN_HANSON: None,
|
|
309
|
-
TOM_BARRACK: 'long time friend of Trump',
|
|
310
|
-
}
|
|
311
|
-
),
|
|
312
|
-
HighlightedNames(
|
|
313
|
-
label='finance',
|
|
303
|
+
label=FINANCE,
|
|
314
304
|
style='green',
|
|
315
|
-
pattern=r'Apollo|Ari\s*Glass|Bank|(Bernie\s*)?Madoff|Black(rock|stone)|B\s*of\s*A|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|Mizrahi\s*Bank|MLPF&S|((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
|
|
305
|
+
pattern=r'Apollo|Ari\s*Glass|Bank|(Bernie\s*)?Madoff|Black(rock|stone)|B\s*of\s*A|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|Mizrahi\s*Bank|MLPF&S|((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|(Richard\s*)?LeFrak|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
|
|
316
306
|
emailers={
|
|
317
307
|
AMANDA_ENS: 'Citigroup',
|
|
308
|
+
BRAD_WECHSLER: f"head of {LEON_BLACK}'s personal investment vehicle according to FT",
|
|
318
309
|
DANIEL_SABBA: 'UBS Investment Bank',
|
|
319
310
|
DAVID_FISZEL: 'CIO Honeycomb Asset Management',
|
|
320
311
|
JES_STALEY: 'former CEO of Barclays',
|
|
321
312
|
JIDE_ZEITLIN: 'former partner at Goldman Sachs, allegations of sexual misconduct',
|
|
313
|
+
'Laurie Cameron': 'currency trading',
|
|
322
314
|
LEON_BLACK: 'Apollo CEO',
|
|
323
315
|
MARC_LEON: 'Luxury Properties Sari Morrocco',
|
|
324
316
|
MELANIE_SPINELLA: f'representative of {LEON_BLACK}',
|
|
@@ -327,9 +319,23 @@ HIGHLIGHTED_NAMES = [
|
|
|
327
319
|
PAUL_MORRIS: 'Deutsche Bank',
|
|
328
320
|
}
|
|
329
321
|
),
|
|
322
|
+
HighlightedNames(
|
|
323
|
+
label='friend',
|
|
324
|
+
style='tan',
|
|
325
|
+
pattern=r"Andrew Farkas|Thomas\s*(J\.?\s*)?Barrack(\s*Jr)?",
|
|
326
|
+
emailers = {
|
|
327
|
+
DANGENE_AND_JENNIE_ENTERPRISE: 'founders of the members-only CORE club',
|
|
328
|
+
DAVID_STERN: f'emailed Epstein from Moscow, appears to know chairman of {DEUTSCHE_BANK}',
|
|
329
|
+
JONATHAN_FARKAS: "heir to the Alexander's department store fortune",
|
|
330
|
+
'linkspirit': "Skype username of someone Epstein communicated with",
|
|
331
|
+
'Peter Thomas Roth': 'student of Epstein at Dalton, skincare company founder',
|
|
332
|
+
STEPHEN_HANSON: None,
|
|
333
|
+
TOM_BARRACK: 'long time friend of Trump',
|
|
334
|
+
},
|
|
335
|
+
),
|
|
330
336
|
HighlightedNames(
|
|
331
337
|
label=HARVARD.lower(),
|
|
332
|
-
style='
|
|
338
|
+
style='light_goldenrod3',
|
|
333
339
|
pattern=r'Cambridge|(Derek\s*)?Bok|Elisa(\s*New)?|Harvard(\s*(Business|Law|University)(\s*School)?)?|(Jonathan\s*)?Zittrain|(Stephen\s*)?Kosslyn',
|
|
334
340
|
emailers = {
|
|
335
341
|
"Donald Rubin": f"Professor of Statistics",
|
|
@@ -378,7 +384,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
378
384
|
HighlightedNames(
|
|
379
385
|
label=JOURNALIST,
|
|
380
386
|
style='bright_yellow',
|
|
381
|
-
pattern=r'Palm\s*Beach\s*(Daily\s*News|Post)|ABC(\s*News)?|Alex\s*Yablon|(Andrew\s*)?Marra|Arianna(\s*Huffington)?|(Arthur\s*)?Kretchmer|BBC|Bloomberg|Breitbart|Charlie\s*Rose|China\s*Daily|CNBC|CNN(politics?)?|Con[cs]hita|Sarnoff|(?<!Virgin[-\s]Islands[-\s])Daily\s*(Beast|Mail|News|Telegraph)|(David\s*)?Pecker|David\s*Brooks|Ed\s*Krassenstein|(Emily\s*)?Michot|Ezra\s*Klein|(George\s*)?Stephanopoulus|Globe\s*and\s*Mail|Good\s*Morning\s*America|Graydon(\s*Carter)?|Huffington(\s*Post)?|Ingram, David|(James\s*)?Patterson|Jonathan\s*Karl|Julie\s*(K.?\s*)?Brown|(Katie\s*)?Couric|Keith\s*Larsen|L\.?A\.?\s*Times|Miami\s*Herald|(Michele\s*)?Dargan|(National\s*)?Enquirer|(The\s*)?N(ew\s*)?Y(ork\s*)?(P(ost)?|T(imes)?)|(The\s*)?New\s*Yorker|NYer|PERVERSION\s*OF\s*JUSTICE|Politico|Pro\s*Publica|(Sean\s*)?Hannity|Sulzberger|SunSentinel|Susan Edelman|(Uma\s*)?Sanghvi|(The\s*)?Wa(shington\s*)?Po(st)?|Viceland|Vick[iy]\s*Ward|Vox|WGBH|(The\s*)?Wall\s*Street\s*Journal|WSJ|[-\w.]+@(bbc|independent|mailonline|mirror|thetimes)\.co\.uk',
|
|
387
|
+
pattern=r'Palm\s*Beach\s*(Daily\s*News|Post)|ABC(\s*News)?|Alex\s*Yablon|(Andrew\s*)?Marra|Arianna(\s*Huffington)?|(Arthur\s*)?Kretchmer|BBC|Bloomberg|Breitbart|Charlie\s*Rose|China\s*Daily|CNBC|CNN(politics?)?|Con[cs]hita|Sarnoff|(?<!Virgin[-\s]Islands[-\s])Daily\s*(Beast|Mail|News|Telegraph)|(David\s*)?Pecker|David\s*Brooks|Ed\s*Krassenstein|(Emily\s*)?Michot|Ezra\s*Klein|(George\s*)?Stephanopoulus|Globe\s*and\s*Mail|Good\s*Morning\s*America|Graydon(\s*Carter)?|Huffington(\s*Post)?|Ingram, David|(James\s*)?(Hill|Patterson)|Jonathan\s*Karl|Julie\s*(K.?\s*)?Brown|(Katie\s*)?Couric|Keith\s*Larsen|L\.?A\.?\s*Times|Miami\s*Herald|(Michele\s*)?Dargan|(National\s*)?Enquirer|(The\s*)?N(ew\s*)?Y(ork\s*)?(P(ost)?|T(imes)?)|(The\s*)?New\s*Yorker|NYer|PERVERSION\s*OF\s*JUSTICE|Politico|Pro\s*Publica|(Sean\s*)?Hannity|Sulzberger|SunSentinel|Susan Edelman|(Uma\s*)?Sanghvi|(The\s*)?Wa(shington\s*)?Po(st)?|Viceland|Vick[iy]\s*Ward|Vox|WGBH|(The\s*)?Wall\s*Street\s*Journal|WSJ|[-\w.]+@(bbc|independent|mailonline|mirror|thetimes)\.co\.uk',
|
|
382
388
|
emailers = {
|
|
383
389
|
EDWARD_JAY_EPSTEIN: 'reporter who wrote about the kinds of crimes Epstein was involved in, no relation to Jeffrey',
|
|
384
390
|
'James Hill': 'ABC News',
|
|
@@ -398,7 +404,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
398
404
|
HighlightedNames(
|
|
399
405
|
label='law enforcement',
|
|
400
406
|
style='color(24) bold',
|
|
401
|
-
pattern=r'ag|(Alicia\s*)?Valle|AML|attorney|((Bob|Robert)\s*)?Mueller|(Byung\s)?Pak|CFTC?|CIA|CIS|CVRA|Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)|DHS|DOJ|FBI|FCPA|FDIC|Federal\s*Bureau\s*of\s*Investigation|FinCEN|FINRA|FOIA|FTC|IRS|(James\s*)?Comey|(Jennifer\s*Shasky\s*)?Calvery|((Judge|Mark)\s*)?(Carney|Filip)|(Kirk )?Blouin|KYC|NIH|NS(A|C)|OCC|OFAC|(Lann?a\s*)?Belohlavek|lawyer|(Michael\s*)?Reiter|OGE|Office\s*of\s*Government\s*Ethics|Police Code Enforcement|(Preet\s*)?Bharara|SCOTUS|SD(FL|NY)|Southern\s*District\s*of\s*(Florida|New\s*York)|SEC|Secret\s*Service|Securities\s*and\s*Exchange\s*Commission|State\s*Dep(artmen)?t|Strzok|Supreme\s*Court|Treasury\s*(Dep(artmen)?t|Secretary)|TSA|USAID|(William\s*J\.?\s*)?Zloch',
|
|
407
|
+
pattern=r'ag|(Alicia\s*)?Valle|AML|(Andrew\s*)?McCabe|attorney|((Bob|Robert)\s*)?Mueller|(Byung\s)?Pak|CFTC?|CIA|CIS|CVRA|Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)|DHS|DOJ|FBI|FCPA|FDIC|Federal\s*Bureau\s*of\s*Investigation|FinCEN|FINRA|FOIA|FTC|IRS|(James\s*)?Comey|(Jennifer\s*Shasky\s*)?Calvery|((Judge|Mark)\s*)?(Carney|Filip)|(Kirk )?Blouin|KYC|NIH|NS(A|C)|OCC|OFAC|(Lann?a\s*)?Belohlavek|lawyer|(Michael\s*)?Reiter|OGE|Office\s*of\s*Government\s*Ethics|Police Code Enforcement|(Preet\s*)?Bharara|SCOTUS|SD(FL|NY)|Southern\s*District\s*of\s*(Florida|New\s*York)|SEC|Secret\s*Service|Securities\s*and\s*Exchange\s*Commission|State\s*Dep(artmen)?t|Strzok|Supreme\s*Court|Treasury\s*(Dep(artmen)?t|Secretary)|TSA|USAID|(William\s*J\.?\s*)?Zloch',
|
|
402
408
|
emailers = {
|
|
403
409
|
ANN_MARIE_VILLAFANA: 'southern district of Florida U.S. Attorney',
|
|
404
410
|
DANNY_FROST: 'Director of Communications at Manhattan DA',
|
|
@@ -457,7 +463,9 @@ HIGHLIGHTED_NAMES = [
|
|
|
457
463
|
CHRISTINA_GALBRAITH: f"{REPUTATION_MGMT}, worked on Epstein's Google search results with {TYLER_SHEARS}",
|
|
458
464
|
IAN_OSBORNE: f"{OSBORNE_LLP} reputation repairer possibly hired by Epstein ca. 2011-06",
|
|
459
465
|
MICHAEL_SITRICK: 'crisis PR',
|
|
466
|
+
'Owen Blicksilver': 'OBPR, Inc.',
|
|
460
467
|
PEGGY_SIEGAL: 'socialite',
|
|
468
|
+
'R. Couri Hay': None,
|
|
461
469
|
ROSS_GOW: 'Acuity Reputation Management',
|
|
462
470
|
TYLER_SHEARS: f"{REPUTATION_MGMT}, worked on Epstein's Google search results with {CHRISTINA_GALBRAITH}",
|
|
463
471
|
}
|
|
@@ -465,7 +473,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
465
473
|
HighlightedNames(
|
|
466
474
|
label='republicans',
|
|
467
475
|
style='bold dark_red',
|
|
468
|
-
pattern=r'Alberto\sGonzale[sz]|(Alex\s*)?Acosta|(Bill\s*)?Barr|Bill\s*Shine|(Bob\s*)?Corker|(John\s*(R.?\s*)?)Bolton|Broidy|(Chris\s)?Christie|Devin\s*Nunes|(Don\s*)?McGa[hn]n|McMaster|(George\s*)?Nader|GOP|(Brett\s*)?Kavanaugh|Kissinger|Kobach|Koch\s*Brothers|Kolfage|Kudlow|Lewandowski|(Marco\s)?Rubio|(Mark\s*)Meadows|Mattis|McCain|(?<!Merwin Dela )Cruz|(Michael\s)?Hayden|((General|Mike)\s*)?(Flynn|Pence)|(Mitt\s*)?Romney|Mnuchin|Nikki|Haley|(Paul\s+)?(Manafort|Volcker)|(Peter\s)?Navarro|Pompeo|Reagan|Reince|Priebus|Republican|(Rex\s*)?Tillerson|(?<!Cynthia )(Richard\s*)?Nixon|Sasse',
|
|
476
|
+
pattern=r'Alberto\sGonzale[sz]|(Alex\s*)?Acosta|(Bill\s*)?Barr|Bill\s*Shine|(Bob\s*)?Corker|(John\s*(R.?\s*)?)Bolton|Broidy|(Chris\s)?Christie|Devin\s*Nunes|(Don\s*)?McGa[hn]n|McMaster|(George\s*)?Nader|GOP|(Brett\s*)?Kavanaugh|Kissinger|Kobach|Koch\s*Brothers|Kolfage|Kudlow|Lewandowski|(Marco\s)?Rubio|(Mark\s*)Meadows|Mattis|McCain|(?<!Merwin Dela )Cruz|(Michael\s)?Hayden|((General|Mike)\s*)?(Flynn|Pence)|(Mitt\s*)?Romney|Mnuchin|Nikki|Haley|(Paul\s+)?(Manafort|Volcker)|(Peter\s)?Navarro|Pompeo|Reagan|Reince|Priebus|Republican|(Rex\s*)?Tillerson|(?<!Cynthia )(Richard\s*)?Nixon|Sasse|Tea\s*Party',
|
|
469
477
|
# There's no emails from these people, they're just here to automate the regex creation for both first + last names
|
|
470
478
|
emailers = {
|
|
471
479
|
RUDY_GIULIANI: 'disbarred formed mayor of New York City',
|
|
@@ -485,6 +493,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
485
493
|
style='red bold',
|
|
486
494
|
pattern=r'Alfa\s*Bank|Anya\s*Rasulova|Chernobyl|Day\s+One\s+Ventures|(Dmitry\s)?(Kiselyov|(Lana\s*)?Pozhidaeva|Medvedev|Rybolo(o?l?ev|vlev))|Dmitry|FSB|GRU|KGB|Kislyak|Kremlin|Kuznetsova|Lavrov|Lukoil|Moscow|(Oleg\s*)?Deripaska|Oleksandr Vilkul|Rosneft|RT|St.?\s*?Petersburg|Russian?|Sberbank|Soviet(\s*Union)?|USSR|Vladimir|(Vladimir\s*)?(Putin|Yudashkin)|Women\s*Empowerment|Xitrans',
|
|
487
495
|
emailers = {
|
|
496
|
+
'Dasha Zhukova': 'art collector, daughter of Alexander Zhukov',
|
|
488
497
|
MASHA_DROKOVA: 'silicon valley VC, former Putin Youth',
|
|
489
498
|
RENATA_BOLOTOVA: 'former aspiring model, now fund manager at New York State Insurance Fund',
|
|
490
499
|
SVETLANA_POZHIDAEVA: f'Epstein\'s Russian assistant who was recommended for a visa by Sergei Belyakov (FSB) and {DAVID_BLAINE}',
|
|
@@ -493,14 +502,16 @@ HIGHLIGHTED_NAMES = [
|
|
|
493
502
|
HighlightedNames(
|
|
494
503
|
label=ACADEMIA,
|
|
495
504
|
style='light_goldenrod2',
|
|
496
|
-
pattern=r'Alain Forget|Brotherton|Carl\s*Sagan|Columbia|David Grosof|J(ames|im)\s*Watson|(Lord\s*)?Martin\s*Rees|Massachusetts\s*Institute\s*of\s*Technology|MIT(\s*Media\s*Lab)?|Media\s*Lab|Minsky|((Noam|Valeria)\s*)?Chomsky|Praluent|Regeneron|(Richard\s*)?Dawkins|Sanofi|Stanford|(Stephen\s*)?Hawking|(Steven?\s*)?Pinker|UCLA',
|
|
505
|
+
pattern=r'Alain Forget|Brotherton|Carl\s*Sagan|Columbia|David Grosof|J(ames|im)\s*Watson|(Lord\s*)?Martin\s*Rees|Massachusetts\s*Institute\s*of\s*Technology|MIT(\s*Media\s*Lab)?|Media\s*Lab|Minsky|((Noam|Valeria)\s*)?Chomsky|Norman\s*Finkelstein|Praluent|Regeneron|(Richard\s*)?Dawkins|Sanofi|Stanford|(Stephen\s*)?Hawking|(Steven?\s*)?Pinker|UCLA',
|
|
497
506
|
emailers = {
|
|
498
507
|
DAVID_HAIG: None,
|
|
499
508
|
JOSCHA_BACH: 'cognitive science / AI research',
|
|
500
509
|
'Daniel Kahneman': 'Nobel economic sciences laureate and cognitivie psychologist (?)',
|
|
510
|
+
'Ed Boyden': 'Associate Professor, MIT Media Lab neurobiology',
|
|
501
511
|
LAWRENCE_KRAUSS: 'theoretical physicist',
|
|
502
512
|
LINDA_STONE: 'ex-Microsoft, MIT Media Lab',
|
|
503
513
|
MARK_TRAMO: 'professor of neurology at UCLA',
|
|
514
|
+
'Nancy Dahl': f'wife of {LAWRENCE_KRAUSS}',
|
|
504
515
|
NEAL_KASSELL: 'professor of neurosurgery at University of Virginia',
|
|
505
516
|
PETER_ATTIA: 'longevity medicine',
|
|
506
517
|
ROBERT_TRIVERS: 'evolutionary biology',
|
|
@@ -661,7 +672,7 @@ def get_style_for_category(category: str) -> str | None:
|
|
|
661
672
|
elif category in [CONFERENCE, SPEECH]:
|
|
662
673
|
return f"{get_style_for_category(ACADEMIA)} dim"
|
|
663
674
|
elif category == SOCIAL:
|
|
664
|
-
return
|
|
675
|
+
return get_style_for_category(PUBLICIST)
|
|
665
676
|
|
|
666
677
|
category = CATEGORY_STYLE_MAPPING.get(category, category)
|
|
667
678
|
|
|
@@ -32,7 +32,7 @@ LOG_LEVEL_ENV_VAR = 'LOG_LEVEL'
|
|
|
32
32
|
# Augment the standard log highlighter with 'epstein_filename' matcher
|
|
33
33
|
class LogHighlighter(ReprHighlighter):
|
|
34
34
|
highlights = ReprHighlighter.highlights + [
|
|
35
|
-
*[fr"(?P<{doc_type}>{doc_type})" for doc_type in DOC_TYPE_STYLES.keys()],
|
|
35
|
+
*[fr"(?P<{doc_type}>{doc_type}(Cfg)?)" for doc_type in DOC_TYPE_STYLES.keys()],
|
|
36
36
|
"(?P<epstein_filename>" + FILE_NAME_REGEX.pattern + ')',
|
|
37
37
|
]
|
|
38
38
|
|
|
@@ -125,7 +125,7 @@ def print_json_files(epstein_files: EpsteinFiles):
|
|
|
125
125
|
console.print_json(json_file.json_str(), indent=4, sort_keys=False)
|
|
126
126
|
|
|
127
127
|
|
|
128
|
-
def
|
|
128
|
+
def write_json_metadata(epstein_files: EpsteinFiles) -> None:
|
|
129
129
|
json_str = epstein_files.json_metadata()
|
|
130
130
|
|
|
131
131
|
if args.build:
|
|
@@ -33,6 +33,7 @@ GREY_NUMBERS = [58, 39, 39, 35, 30, 27, 23, 23, 19, 19, 15, 15, 15]
|
|
|
33
33
|
DEFAULT_NAME_STYLE = 'gray46'
|
|
34
34
|
INFO_STYLE = 'white dim italic'
|
|
35
35
|
KEY_STYLE='honeydew2 bold'
|
|
36
|
+
LAST_TIMESTAMP_STYLE='wheat4'
|
|
36
37
|
SECTION_HEADER_STYLE = 'bold white on blue3'
|
|
37
38
|
SOCIAL_MEDIA_LINK_STYLE = 'pale_turquoise4'
|
|
38
39
|
SUBSTACK_POST_LINK_STYLE = 'bright_cyan'
|
|
@@ -79,10 +80,18 @@ console = Console(**CONSOLE_ARGS)
|
|
|
79
80
|
highlighter = CONSOLE_ARGS['highlighter']
|
|
80
81
|
|
|
81
82
|
|
|
82
|
-
def add_cols_to_table(table: Table, col_names: list[str]) -> None:
|
|
83
|
+
def add_cols_to_table(table: Table, col_names: list[str | dict]) -> None:
|
|
83
84
|
"""Left most col will be left justified, rest are center justified."""
|
|
84
85
|
for i, col in enumerate(col_names):
|
|
85
|
-
|
|
86
|
+
if isinstance(col, dict):
|
|
87
|
+
col_name = col['name']
|
|
88
|
+
kwargs = col
|
|
89
|
+
del kwargs['name']
|
|
90
|
+
else:
|
|
91
|
+
col_name = col
|
|
92
|
+
kwargs = {}
|
|
93
|
+
|
|
94
|
+
table.add_column(col_name, justify='left' if i == 0 else 'center', **kwargs)
|
|
86
95
|
|
|
87
96
|
|
|
88
97
|
def build_highlighter(pattern: str) -> EpsteinHighlighter:
|
|
File without changes
|
|
File without changes
|
{epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/emails/email_header.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|