epstein-files 1.0.11__py3-none-any.whl → 1.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +3 -3
- epstein_files/documents/communication.py +2 -2
- epstein_files/documents/document.py +32 -62
- epstein_files/documents/email.py +40 -1
- epstein_files/documents/imessage/text_message.py +1 -1
- epstein_files/documents/json_file.py +1 -1
- epstein_files/documents/messenger_log.py +1 -1
- epstein_files/documents/other_file.py +2 -2
- epstein_files/util/constant/names.py +9 -8
- epstein_files/util/constant/strings.py +2 -1
- epstein_files/util/constants.py +17 -13
- epstein_files/util/data.py +1 -1
- epstein_files/util/doc_cfg.py +20 -42
- epstein_files/util/file_helper.py +3 -9
- epstein_files/util/highlighted_group.py +13 -4
- epstein_files/util/logging.py +1 -1
- epstein_files/util/output.py +1 -1
- {epstein_files-1.0.11.dist-info → epstein_files-1.0.12.dist-info}/METADATA +1 -1
- epstein_files-1.0.12.dist-info/RECORD +33 -0
- epstein_files-1.0.11.dist-info/RECORD +0 -33
- {epstein_files-1.0.11.dist-info → epstein_files-1.0.12.dist-info}/LICENSE +0 -0
- {epstein_files-1.0.11.dist-info → epstein_files-1.0.12.dist-info}/WHEEL +0 -0
- {epstein_files-1.0.11.dist-info → epstein_files-1.0.12.dist-info}/entry_points.txt +0 -0
epstein_files/__init__.py
CHANGED
|
@@ -20,8 +20,8 @@ from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_
|
|
|
20
20
|
from epstein_files.util.env import args, specified_names
|
|
21
21
|
from epstein_files.util.file_helper import coerce_file_path, extract_file_id
|
|
22
22
|
from epstein_files.util.logging import logger
|
|
23
|
-
from epstein_files.util.output import (print_emails, print_json_files,
|
|
24
|
-
print_text_messages, write_urls)
|
|
23
|
+
from epstein_files.util.output import (print_emails, print_json_files, print_json_stats,
|
|
24
|
+
print_text_messages, write_json_metadata, write_urls)
|
|
25
25
|
from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
|
|
26
26
|
from epstein_files.util.timer import Timer
|
|
27
27
|
from epstein_files.util.word_count import write_word_counts_html
|
|
@@ -37,7 +37,7 @@ def generate_html() -> None:
|
|
|
37
37
|
epstein_files = EpsteinFiles.get_files(timer)
|
|
38
38
|
|
|
39
39
|
if args.json_metadata:
|
|
40
|
-
|
|
40
|
+
write_json_metadata(epstein_files)
|
|
41
41
|
exit()
|
|
42
42
|
elif args.json_files:
|
|
43
43
|
print_json_files(epstein_files)
|
|
@@ -34,9 +34,9 @@ class Communication(Document):
|
|
|
34
34
|
def is_attribution_uncertain(self) -> bool:
|
|
35
35
|
return bool(self.config and self.config.is_attribution_uncertain)
|
|
36
36
|
|
|
37
|
-
def
|
|
37
|
+
def external_links(self, _style: str = '', include_alt_link: bool = True) -> Text:
|
|
38
38
|
"""Overrides super() method to apply self.author_style."""
|
|
39
|
-
return super().
|
|
39
|
+
return super().external_links(self.author_style, include_alt_link=include_alt_link)
|
|
40
40
|
|
|
41
41
|
def summary(self) -> Text:
|
|
42
42
|
return self._summary().append(CLOSE_PROPERTIES_CHAR)
|
|
@@ -16,8 +16,8 @@ from epstein_files.util.constant.names import *
|
|
|
16
16
|
from epstein_files.util.constant.strings import *
|
|
17
17
|
from epstein_files.util.constant.urls import *
|
|
18
18
|
from epstein_files.util.constants import ALL_FILE_CONFIGS, FALLBACK_TIMESTAMP
|
|
19
|
-
from epstein_files.util.data import collapse_newlines, date_str,
|
|
20
|
-
from epstein_files.util.doc_cfg import EmailCfg, DocCfg, Metadata, TextCfg
|
|
19
|
+
from epstein_files.util.data import collapse_newlines, date_str, patternize, remove_time_from_timestamp_str, without_falsey
|
|
20
|
+
from epstein_files.util.doc_cfg import DUPE_TYPE_STRS, EmailCfg, DocCfg, Metadata, TextCfg
|
|
21
21
|
from epstein_files.util.env import DOCS_DIR, args
|
|
22
22
|
from epstein_files.util.file_helper import (file_stem_for_id, extract_file_id, file_size,
|
|
23
23
|
file_size_str, is_local_extract_file)
|
|
@@ -31,10 +31,8 @@ INFO_INDENT = 2
|
|
|
31
31
|
INFO_PADDING = (0, 0, 0, INFO_INDENT)
|
|
32
32
|
MAX_TOP_LINES_LEN = 4000 # Only for logging
|
|
33
33
|
MIN_DOCUMENT_ID = 10477
|
|
34
|
-
LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
|
|
35
34
|
WHITESPACE_REGEX = re.compile(r"\s{2,}|\t|\n", re.MULTILINE)
|
|
36
35
|
|
|
37
|
-
EXTRACTED_FROM = 'Extracted from'
|
|
38
36
|
MIN_TIMESTAMP = datetime(1991, 1, 1)
|
|
39
37
|
MID_TIMESTAMP = datetime(2007, 1, 1)
|
|
40
38
|
MAX_TIMESTAMP = datetime(2020, 1, 1)
|
|
@@ -96,15 +94,9 @@ class Document:
|
|
|
96
94
|
def __post_init__(self):
|
|
97
95
|
self.filename = self.file_path.name
|
|
98
96
|
self.file_id = extract_file_id(self.filename)
|
|
99
|
-
self.config = deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
|
|
97
|
+
self.config = self.config or deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
|
|
100
98
|
|
|
101
|
-
if self
|
|
102
|
-
self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
|
|
103
|
-
extracted_from_doc_id = self.url_slug.split('_')[-1]
|
|
104
|
-
|
|
105
|
-
if extracted_from_doc_id in ALL_FILE_CONFIGS:
|
|
106
|
-
self._set_extract_config(deepcopy(ALL_FILE_CONFIGS[extracted_from_doc_id]))
|
|
107
|
-
else:
|
|
99
|
+
if 'url_slug' not in vars(self):
|
|
108
100
|
self.url_slug = self.file_path.stem
|
|
109
101
|
|
|
110
102
|
self._set_computed_fields(text=self.text or self._load_file())
|
|
@@ -122,11 +114,11 @@ class Document:
|
|
|
122
114
|
|
|
123
115
|
def duplicate_file_txt(self) -> Text:
|
|
124
116
|
"""If the file is a dupe make a nice message to explain what file it's a duplicate of."""
|
|
125
|
-
if not self.config or not self.config.dupe_of_id:
|
|
117
|
+
if not self.config or not self.config.dupe_of_id or self.config.dupe_type is None:
|
|
126
118
|
raise RuntimeError(f"duplicate_file_txt() called on {self.summary()} but not a dupe! config:\n\n{self.config}")
|
|
127
119
|
|
|
128
120
|
txt = Text(f"Not showing ", style=INFO_STYLE).append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
|
|
129
|
-
txt.append(f" because it's {self.config.
|
|
121
|
+
txt.append(f" because it's {DUPE_TYPE_STRS[self.config.dupe_type]} ")
|
|
130
122
|
return txt.append(epstein_media_doc_link_txt(self.config.dupe_of_id, style='royal_blue1'))
|
|
131
123
|
|
|
132
124
|
def epsteinify_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
|
|
@@ -141,9 +133,28 @@ class Document:
|
|
|
141
133
|
"""Create a Text obj link to this document on EpsteinWeb."""
|
|
142
134
|
return link_text_obj(epstein_web_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
|
|
143
135
|
|
|
136
|
+
def external_links(self, style: str = '', include_alt_link: bool = False) -> Text:
|
|
137
|
+
"""Returns colored links to epstein.media and and epsteinweb in a Text object."""
|
|
138
|
+
txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
|
|
139
|
+
|
|
140
|
+
if args.use_epstein_web:
|
|
141
|
+
txt.append(self.epstein_web_link(style=style))
|
|
142
|
+
|
|
143
|
+
if include_alt_link:
|
|
144
|
+
txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
|
|
145
|
+
txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
|
|
146
|
+
else:
|
|
147
|
+
txt.append(self.epstein_media_link(style=style))
|
|
148
|
+
|
|
149
|
+
if include_alt_link:
|
|
150
|
+
txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
|
|
151
|
+
txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
|
|
152
|
+
|
|
153
|
+
return txt
|
|
154
|
+
|
|
144
155
|
def file_info_panel(self) -> Group:
|
|
145
156
|
"""Panel with filename linking to raw file plus any additional info about the file."""
|
|
146
|
-
panel = Panel(self.
|
|
157
|
+
panel = Panel(self.external_links(include_alt_link=True), border_style=self._border_style(), expand=False)
|
|
147
158
|
padded_info = [Padding(sentence, INFO_PADDING) for sentence in self.info()]
|
|
148
159
|
return Group(*([panel] + padded_info))
|
|
149
160
|
|
|
@@ -155,12 +166,10 @@ class Document:
|
|
|
155
166
|
|
|
156
167
|
def info(self) -> list[Text]:
|
|
157
168
|
"""0 to 2 sentences containing the info_txt() as well as any configured description."""
|
|
158
|
-
|
|
169
|
+
return without_falsey([
|
|
159
170
|
self.info_txt(),
|
|
160
171
|
highlighter(Text(self.config_description(), style=INFO_STYLE)) if self.config_description() else None
|
|
161
|
-
]
|
|
162
|
-
|
|
163
|
-
return without_falsey(sentences)
|
|
172
|
+
])
|
|
164
173
|
|
|
165
174
|
def info_txt(self) -> Text | None:
|
|
166
175
|
"""Secondary info about this file (recipients, level of certainty, etc). Overload in subclasses."""
|
|
@@ -197,9 +206,9 @@ class Document:
|
|
|
197
206
|
|
|
198
207
|
if self.is_local_extract_file():
|
|
199
208
|
metadata['extracted_file'] = {
|
|
200
|
-
'explanation': '
|
|
201
|
-
'
|
|
202
|
-
'
|
|
209
|
+
'explanation': 'Manually extracted from one of the court filings.',
|
|
210
|
+
'extracted_from': self.url_slug + '.txt',
|
|
211
|
+
'url': extracted_file_url(self.filename),
|
|
203
212
|
}
|
|
204
213
|
|
|
205
214
|
return metadata
|
|
@@ -208,25 +217,6 @@ class Document:
|
|
|
208
217
|
with open(self.file_path) as f:
|
|
209
218
|
return f.read()
|
|
210
219
|
|
|
211
|
-
def raw_document_link_txt(self, style: str = '', include_alt_link: bool = False) -> Text:
|
|
212
|
-
"""Returns colored links to epstein.media and and epsteinweb in a Text object."""
|
|
213
|
-
txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
|
|
214
|
-
|
|
215
|
-
if args.use_epstein_web:
|
|
216
|
-
txt.append(self.epstein_web_link(style=style))
|
|
217
|
-
|
|
218
|
-
if include_alt_link:
|
|
219
|
-
txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
|
|
220
|
-
txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
|
|
221
|
-
else:
|
|
222
|
-
txt.append(self.epstein_media_link(style=style))
|
|
223
|
-
|
|
224
|
-
if include_alt_link:
|
|
225
|
-
txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
|
|
226
|
-
txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
|
|
227
|
-
|
|
228
|
-
return txt
|
|
229
|
-
|
|
230
220
|
def repair_ocr_text(self, repairs: dict[str | re.Pattern, str], text: str) -> str:
|
|
231
221
|
"""Apply a dict of repairs (key is pattern or string, value is replacement string) to text."""
|
|
232
222
|
for k, v in repairs.items():
|
|
@@ -253,7 +243,7 @@ class Document:
|
|
|
253
243
|
txt.append(f" {self.url_slug}", style=FILENAME_STYLE)
|
|
254
244
|
|
|
255
245
|
if self.timestamp:
|
|
256
|
-
timestamp_str =
|
|
246
|
+
timestamp_str = remove_time_from_timestamp_str(self.timestamp)
|
|
257
247
|
txt.append(' (', style=SYMBOL_STYLE)
|
|
258
248
|
txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
|
|
259
249
|
|
|
@@ -327,26 +317,6 @@ class Document:
|
|
|
327
317
|
self.lines = [line.strip() if self.strip_whitespace else line for line in self.text.split('\n')]
|
|
328
318
|
self.num_lines = len(self.lines)
|
|
329
319
|
|
|
330
|
-
def _set_extract_config(self, doc_cfg: DocCfg | EmailCfg) -> None:
|
|
331
|
-
"""Copy info from original config for file this document was extracted from."""
|
|
332
|
-
if self.config:
|
|
333
|
-
self.warn(f"Merging existing config with config for file this document was extracted from")
|
|
334
|
-
else:
|
|
335
|
-
self.config = EmailCfg(id=self.file_id)
|
|
336
|
-
|
|
337
|
-
extracted_from_description = doc_cfg.complete_description()
|
|
338
|
-
|
|
339
|
-
if extracted_from_description:
|
|
340
|
-
extracted_description = f"{EXTRACTED_FROM} {extracted_from_description}"
|
|
341
|
-
|
|
342
|
-
if self.config.description:
|
|
343
|
-
self.warn(f"Overwriting description '{self.config.description}' with extract description '{doc_cfg.description}'")
|
|
344
|
-
|
|
345
|
-
self.config.description = extracted_description
|
|
346
|
-
|
|
347
|
-
self.config.is_interesting = self.config.is_interesting or doc_cfg.is_interesting
|
|
348
|
-
self.warn(f"Constructed local config\n{self.config}")
|
|
349
|
-
|
|
350
320
|
def _write_clean_text(self, output_path: Path) -> None:
|
|
351
321
|
"""Write self.text to 'output_path'. Used only for diffing files."""
|
|
352
322
|
if output_path.exists():
|
epstein_files/documents/email.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import re
|
|
3
|
+
from copy import deepcopy
|
|
3
4
|
from dataclasses import asdict, dataclass, field
|
|
4
5
|
from datetime import datetime
|
|
5
6
|
from typing import ClassVar, cast
|
|
@@ -21,6 +22,7 @@ from epstein_files.util.constants import *
|
|
|
21
22
|
from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes, extract_last_name,
|
|
22
23
|
flatten, remove_timezone, uniquify)
|
|
23
24
|
from epstein_files.util.doc_cfg import EmailCfg, Metadata
|
|
25
|
+
from epstein_files.util.file_helper import extract_file_id, file_stem_for_id
|
|
24
26
|
from epstein_files.util.highlighted_group import get_style_for_name
|
|
25
27
|
from epstein_files.util.logging import logger
|
|
26
28
|
from epstein_files.util.rich import *
|
|
@@ -35,9 +37,11 @@ REPLY_TEXT_REGEX = re.compile(rf"^(.*?){REPLY_LINE_PATTERN}", re.DOTALL | re.IGN
|
|
|
35
37
|
BAD_TIMEZONE_REGEX = re.compile(fr'\((UTC|GMT\+\d\d:\d\d)\)|{REDACTED}')
|
|
36
38
|
DATE_HEADER_REGEX = re.compile(r'(?:Date|Sent):? +(?!by|from|to|via)([^\n]{6,})\n')
|
|
37
39
|
TIMESTAMP_LINE_REGEX = re.compile(r"\d+:\d+")
|
|
40
|
+
LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
|
|
38
41
|
|
|
39
42
|
SUPPRESS_LOGS_FOR_AUTHORS = ['Undisclosed recipients:', 'undisclosed-recipients:', 'Multiple Senders Multiple Senders']
|
|
40
43
|
REWRITTEN_HEADER_MSG = "(janky OCR header fields were prettified, check source if something seems off)"
|
|
44
|
+
APPEARS_IN = 'Appears in'
|
|
41
45
|
MAX_CHARS_TO_PRINT = 4000
|
|
42
46
|
MAX_NUM_HEADER_LINES = 14
|
|
43
47
|
MAX_QUOTED_REPLIES = 2
|
|
@@ -248,6 +252,7 @@ KRASSNER_RECIPIENTS = uniquify(flatten([ALL_FILE_CONFIGS[id].recipients for id i
|
|
|
248
252
|
|
|
249
253
|
# No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
|
|
250
254
|
USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIPIENTS + [
|
|
255
|
+
'Alan Dlugash', # CCed with Richard Kahn
|
|
251
256
|
'Alan Rogers', # Random CC
|
|
252
257
|
'Andrew Friendly', # Presumably some relation of Kelly Friendly
|
|
253
258
|
'BS Stern', # A random fwd of email we have
|
|
@@ -264,6 +269,8 @@ USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIP
|
|
|
264
269
|
'Lyn Fontanilla', # Random CC
|
|
265
270
|
'Mark Albert', # Random CC
|
|
266
271
|
'Matthew Schafer', # Random CC
|
|
272
|
+
MICHAEL_BUCHHOLTZ, # Terry Kafka CC
|
|
273
|
+
'Nancy Dahl', # covered by Lawrence Krauss (her husband)
|
|
267
274
|
'Michael Simmons', # Random CC
|
|
268
275
|
'Nancy Portland', # Lawrence Krauss CC
|
|
269
276
|
'Oliver Goodenough', # Robert Trivers CC
|
|
@@ -318,6 +325,17 @@ class Email(Communication):
|
|
|
318
325
|
rewritten_header_ids: ClassVar[set[str]] = set([])
|
|
319
326
|
|
|
320
327
|
def __post_init__(self):
|
|
328
|
+
self.filename = self.file_path.name
|
|
329
|
+
self.file_id = extract_file_id(self.filename)
|
|
330
|
+
|
|
331
|
+
# Special handling for copying properties out of the config for the document this one was extracted from
|
|
332
|
+
if self.is_local_extract_file():
|
|
333
|
+
self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
|
|
334
|
+
extracted_from_doc_id = self.url_slug.split('_')[-1]
|
|
335
|
+
|
|
336
|
+
if extracted_from_doc_id in ALL_FILE_CONFIGS:
|
|
337
|
+
self._set_config_for_extracted_file(ALL_FILE_CONFIGS[extracted_from_doc_id])
|
|
338
|
+
|
|
321
339
|
super().__post_init__()
|
|
322
340
|
|
|
323
341
|
try:
|
|
@@ -570,7 +588,7 @@ class Email(Communication):
|
|
|
570
588
|
self._merge_lines(3) # Merge 4th and 5th rows
|
|
571
589
|
elif self.file_id in '026609 029402 032405 022695'.split():
|
|
572
590
|
self._merge_lines(4) # Merge 5th and 6th rows
|
|
573
|
-
elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381']:
|
|
591
|
+
elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381', '033357']:
|
|
574
592
|
self._merge_lines(2, 4)
|
|
575
593
|
elif self.file_id in ['029154', '029163']:
|
|
576
594
|
self._merge_lines(2, 5)
|
|
@@ -649,6 +667,27 @@ class Email(Communication):
|
|
|
649
667
|
sent_from = sent_from_match.group(0)
|
|
650
668
|
return 'S' + sent_from[1:] if sent_from.startswith('sent') else sent_from
|
|
651
669
|
|
|
670
|
+
def _set_config_for_extracted_file(self, extracted_from_doc_cfg: DocCfg) -> None:
|
|
671
|
+
"""Copy info from original config for file this document was extracted from."""
|
|
672
|
+
if self.file_id in ALL_FILE_CONFIGS:
|
|
673
|
+
self.config = cast(EmailCfg, deepcopy(ALL_FILE_CONFIGS[self.file_id]))
|
|
674
|
+
self.warn(f"Merging existing config for {self.file_id} with config for file this document was extracted from")
|
|
675
|
+
else:
|
|
676
|
+
self.config = EmailCfg(id=self.file_id)
|
|
677
|
+
|
|
678
|
+
extracted_from_description = extracted_from_doc_cfg.complete_description()
|
|
679
|
+
|
|
680
|
+
if extracted_from_description:
|
|
681
|
+
extracted_description = f"{APPEARS_IN} {extracted_from_description}"
|
|
682
|
+
|
|
683
|
+
if self.config.description:
|
|
684
|
+
self.warn(f"Overwriting description '{self.config.description}' with extract description '{self.config.description}'")
|
|
685
|
+
|
|
686
|
+
self.config.description = extracted_description
|
|
687
|
+
|
|
688
|
+
self.config.is_interesting = self.config.is_interesting or extracted_from_doc_cfg.is_interesting
|
|
689
|
+
self.warn(f"Constructed synthetic config: {self.config}")
|
|
690
|
+
|
|
652
691
|
def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
|
|
653
692
|
logger.debug(f"Printing '{self.filename}'...")
|
|
654
693
|
yield self.file_info_panel()
|
|
@@ -45,7 +45,7 @@ class TextMessage:
|
|
|
45
45
|
self.author_str = self.author_str or self.author
|
|
46
46
|
|
|
47
47
|
if not self.id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
|
|
48
|
-
self.author_str
|
|
48
|
+
self.author_str += ' (?)'
|
|
49
49
|
|
|
50
50
|
def timestamp(self) -> datetime:
|
|
51
51
|
return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
|
|
@@ -39,7 +39,7 @@ class JsonFile(OtherFile):
|
|
|
39
39
|
return JSON
|
|
40
40
|
|
|
41
41
|
def info_txt(self) -> Text | None:
|
|
42
|
-
return Text(f"JSON file,
|
|
42
|
+
return Text(f"JSON file, contains preview data for links sent a messaging app", style=INFO_STYLE)
|
|
43
43
|
|
|
44
44
|
def is_interesting(self):
|
|
45
45
|
return False
|
|
@@ -76,7 +76,7 @@ class MessengerLog(Communication):
|
|
|
76
76
|
is_phone_number = author_str.startswith('+')
|
|
77
77
|
|
|
78
78
|
if is_phone_number:
|
|
79
|
-
logger.
|
|
79
|
+
logger.info(f"{self.summary()} Found phone number: {author_str}")
|
|
80
80
|
self.phone_number = author_str
|
|
81
81
|
|
|
82
82
|
# If the Sender: is redacted or if it's an unredacted phone number that means it's from self.author
|
|
@@ -107,7 +107,7 @@ UNINTERESTING_PREFIXES = FINANCIAL_REPORTS_AUTHORS + [
|
|
|
107
107
|
TEXT_OF_US_LAW,
|
|
108
108
|
TRANSLATION,
|
|
109
109
|
TWEET,
|
|
110
|
-
|
|
110
|
+
REAL_DEAL_ARTICLE,
|
|
111
111
|
TRUMP_DISCLOSURES,
|
|
112
112
|
UBS_CIO_REPORT,
|
|
113
113
|
UN_GENERAL_ASSEMBLY,
|
|
@@ -240,7 +240,7 @@ class OtherFile(Document):
|
|
|
240
240
|
table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
|
|
241
241
|
|
|
242
242
|
for file in files:
|
|
243
|
-
link_and_info = [file.
|
|
243
|
+
link_and_info = [file.external_links()]
|
|
244
244
|
date_str = file.date_str()
|
|
245
245
|
|
|
246
246
|
if file.is_duplicate():
|
|
@@ -143,7 +143,7 @@ REID_HOFFMAN = 'Reid Hoffman'
|
|
|
143
143
|
REID_WEINGARTEN = 'Reid Weingarten'
|
|
144
144
|
RENATA_BOLOTOVA = 'Renata Bolotova'
|
|
145
145
|
RICHARD_KAHN = 'Richard Kahn'
|
|
146
|
-
|
|
146
|
+
ROBERT_D_CRITTON_JR = 'Robert D. Critton Jr.'
|
|
147
147
|
ROBERT_LAWRENCE_KUHN = 'Robert Lawrence Kuhn'
|
|
148
148
|
ROBERT_TRIVERS = 'Robert Trivers'
|
|
149
149
|
ROGER_SCHANK = 'Roger Schank'
|
|
@@ -178,6 +178,7 @@ JARED_KUSHNER = 'Jared Kushner'
|
|
|
178
178
|
JULIE_K_BROWN = 'Julie K. Brown'
|
|
179
179
|
KARIM_SADJADPOUR = 'KARIM SADJADPOUR'.title()
|
|
180
180
|
MICHAEL_J_BOCCIO = 'Michael J. Boccio'
|
|
181
|
+
NERIO_ALESSANDRI = 'Nerio Alessandri (Founder and Chairman of Technogym S.p.A. Italy)'
|
|
181
182
|
PAUL_G_CASSELL = 'Paul G. Cassell'
|
|
182
183
|
RUDY_GIULIANI = 'Rudy Giuliani'
|
|
183
184
|
TULSI_GABBARD = 'Tulsi Gabbard'
|
|
@@ -226,22 +227,22 @@ NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
|
|
|
226
227
|
# Names to color white in the word counts
|
|
227
228
|
OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
|
|
228
229
|
aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
|
|
229
|
-
baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bruno bryant burton
|
|
230
|
+
baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
|
|
230
231
|
chapman charles charlie christopher clint cohen colin collins conway
|
|
231
|
-
davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
|
|
232
|
+
danny davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
|
|
232
233
|
edmond elizabeth emily entwistle erik evelyn
|
|
233
|
-
ferguson flachsbart francis franco frank
|
|
234
|
+
ferguson flachsbart francis franco frank frost
|
|
234
235
|
gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
|
|
235
|
-
hancock harold harrison harry helen hirsch hofstadter horowitz hussein
|
|
236
|
+
hancock harold harrison harry hay helen hirsch hofstadter horowitz hussein
|
|
236
237
|
ian isaac isaacson
|
|
237
238
|
jamie jane janet jason jen jim joe johnson jones josh julie justin
|
|
238
239
|
karl kate kathy kelly kim kruger kyle
|
|
239
|
-
leo leonard lenny leslie lieberman louis lynch lynn
|
|
240
|
+
laurie leo leonard lenny leslie lieberman louis lynch lynn
|
|
240
241
|
marcus marianne matt matthew melissa michele michelle moore moscowitz
|
|
241
|
-
nicole nussbaum
|
|
242
|
+
nancy nicole nussbaum
|
|
242
243
|
paulson philippe
|
|
243
244
|
rafael ray richard richardson rob robin ron rubin rudolph ryan
|
|
244
|
-
sara sarah seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
|
|
245
|
+
sara sarah sean seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
|
|
245
246
|
ted theresa thompson tiffany timothy tony
|
|
246
247
|
valeria
|
|
247
248
|
walter warren weinstein weiss william
|
|
@@ -20,7 +20,7 @@ POLITICS = 'politics'
|
|
|
20
20
|
PROPERTY = 'property'
|
|
21
21
|
PUBLICIST = 'publicist'
|
|
22
22
|
REPUTATION = 'reputation'
|
|
23
|
-
SKYPE_LOG= '
|
|
23
|
+
SKYPE_LOG = 'Skype log'
|
|
24
24
|
SOCIAL = 'social'
|
|
25
25
|
SPEECH = 'speech'
|
|
26
26
|
|
|
@@ -39,6 +39,7 @@ MIAMI_HERALD = 'Miami Herald'
|
|
|
39
39
|
NYT = "New York Times"
|
|
40
40
|
PALM_BEACH_DAILY_NEWS = f'{PALM_BEACH} Daily News'
|
|
41
41
|
PALM_BEACH_POST = f'{PALM_BEACH} Post'
|
|
42
|
+
SHIMON_POST = 'The Shimon Post'
|
|
42
43
|
THE_REAL_DEAL = 'The Real Deal'
|
|
43
44
|
WAPO = 'WaPo'
|
|
44
45
|
VI_DAILY_NEWS = f'{VIRGIN_ISLANDS} Daily News'
|
epstein_files/util/constants.py
CHANGED
|
@@ -66,7 +66,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
66
66
|
BRAD_EDWARDS: re.compile(r'Brad(ley)?(\s*J(.?|ames))?\s*Edwards', re.IGNORECASE),
|
|
67
67
|
BRAD_KARP: re.compile(r'Brad (S.? )?Karp|Karp, Brad', re.IGNORECASE),
|
|
68
68
|
'Dangene and Jennie Enterprise': re.compile(r'Dangene and Jennie Enterprise?', re.IGNORECASE),
|
|
69
|
-
DANNY_FROST: re.compile(r'Frost, Danny|frostd@dany.nyc.gov', re.IGNORECASE),
|
|
69
|
+
DANNY_FROST: re.compile(r'Frost, Danny|frostd@dany.nyc.gov|Danny\s*Frost', re.IGNORECASE),
|
|
70
70
|
DARREN_INDYKE: re.compile(r'darren$|Darren\s*(K\.?\s*)?[il]n[dq]_?yke?|dkiesq', re.IGNORECASE),
|
|
71
71
|
DAVID_FISZEL: re.compile(r'David\s*Fis?zel', re.IGNORECASE),
|
|
72
72
|
DAVID_HAIG: re.compile(fr'{DAVID_HAIG}|Haig, David', re.IGNORECASE),
|
|
@@ -128,7 +128,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
128
128
|
PRINCE_ANDREW: re.compile(r'Prince Andrew|The Duke', re.IGNORECASE),
|
|
129
129
|
REID_WEINGARTEN: re.compile(r'Weingarten, Rei[cdi]|Rei[cdi] Weingarten', re.IGNORECASE),
|
|
130
130
|
RICHARD_KAHN: re.compile(r'rich(ard)? kahn?', re.IGNORECASE),
|
|
131
|
-
|
|
131
|
+
ROBERT_D_CRITTON_JR: re.compile(r'Robert D.? Critton Jr.?', re.IGNORECASE),
|
|
132
132
|
ROBERT_LAWRENCE_KUHN: re.compile(r'Robert\s*(Lawrence)?\s*Kuhn', re.IGNORECASE),
|
|
133
133
|
ROBERT_TRIVERS: re.compile(r'tri[vy]ersr@gmail|Robert\s*Trivers?', re.IGNORECASE),
|
|
134
134
|
ROSS_GOW: re.compile(fr"{ROSS_GOW}|ross@acuityreputation.com", re.IGNORECASE),
|
|
@@ -163,6 +163,7 @@ EMAILERS = [
|
|
|
163
163
|
DEEPAK_CHOPRA,
|
|
164
164
|
GLENN_DUBIN,
|
|
165
165
|
GORDON_GETTY,
|
|
166
|
+
'Kevin Bright',
|
|
166
167
|
'Jack Lang',
|
|
167
168
|
JACK_SCAROLA,
|
|
168
169
|
JAY_LEFKOWITZ,
|
|
@@ -257,7 +258,6 @@ JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
|
|
|
257
258
|
LAWRENCE_KRAUSS_ASU_ORIGINS = f"{LAWRENCE_KRAUSS}'s ASU Origins Project"
|
|
258
259
|
KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
|
|
259
260
|
MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
|
|
260
|
-
NERIO_ALESSANDRI = 'Nerio Alessandri (Founder and Chairman of Technogym S.p.A. Italy)'
|
|
261
261
|
NIGHT_FLIGHT_BOOK = f'"Night Flight" (draft)'
|
|
262
262
|
NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
|
|
263
263
|
OBAMA_JOKE = 'joke about Obama'
|
|
@@ -265,12 +265,11 @@ PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} Code Enforcement'
|
|
|
265
265
|
PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
|
|
266
266
|
PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
|
|
267
267
|
PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
|
|
268
|
-
|
|
268
|
+
REAL_DEAL_ARTICLE = 'article by Keith Larsen'
|
|
269
269
|
SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
|
|
270
270
|
SINGLE_PAGE = 'single page of'
|
|
271
271
|
STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
|
|
272
272
|
SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
|
|
273
|
-
THE_REAL_DEAL_ARTICLE = 'article by Keith Larsen'
|
|
274
273
|
TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
|
|
275
274
|
UBS_CIO_REPORT = 'CIO Monthly Extended report'
|
|
276
275
|
UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
|
|
@@ -500,7 +499,7 @@ EMAILS_CONFIG = [
|
|
|
500
499
|
EmailCfg(
|
|
501
500
|
id='029977',
|
|
502
501
|
author=LAWRANCE_VISOSKI,
|
|
503
|
-
recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE,
|
|
502
|
+
recipients=cast(list[str | None], [JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE),
|
|
504
503
|
attribution_reason=LARRY_REASON,
|
|
505
504
|
duplicate_ids=['031129'],
|
|
506
505
|
),
|
|
@@ -508,7 +507,7 @@ EMAILS_CONFIG = [
|
|
|
508
507
|
EmailCfg(id='033488', author=LAWRANCE_VISOSKI, duplicate_ids=['033154']),
|
|
509
508
|
EmailCfg(id='033309', author=LINDA_STONE, attribution_reason='"Co-authored with iPhone autocorrect"'),
|
|
510
509
|
EmailCfg(id='017581', author='Lisa Randall', attribution_reason='reply header'),
|
|
511
|
-
EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd'),
|
|
510
|
+
EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd, Mark Green is in signature'),
|
|
512
511
|
EmailCfg(id='030472', author=MARTIN_WEINBERG, attribution_reason='Maybe. in reply', is_attribution_uncertain=True),
|
|
513
512
|
EmailCfg(id='030235', author=MELANIE_WALKER, attribution_reason='In fwd'),
|
|
514
513
|
EmailCfg(id='032343', author=MELANIE_WALKER, attribution_reason='Name seen in later reply 032346'),
|
|
@@ -573,7 +572,7 @@ EMAILS_CONFIG = [
|
|
|
573
572
|
attribution_reason='ends with "Respectfully, terry"',
|
|
574
573
|
author=TERRY_KAFKA,
|
|
575
574
|
fwded_text_after='From: Mike Cohen',
|
|
576
|
-
recipients=[JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS,
|
|
575
|
+
recipients=cast(list[str | None], [JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS),
|
|
577
576
|
duplicate_ids=['028482'],
|
|
578
577
|
),
|
|
579
578
|
EmailCfg(id='029992', author=TERRY_KAFKA, attribution_reason='Quoted reply'),
|
|
@@ -665,6 +664,10 @@ EMAILS_CONFIG = [
|
|
|
665
664
|
EmailCfg(id='029849', is_fwded_article=True, duplicate_ids=['033482']), # Fareed Zakaria: Trump sells America short),
|
|
666
665
|
EmailCfg(id='032023', is_fwded_article=True, duplicate_ids=['032012']), # American-Israeli Cooperative Enterprise Newsletter
|
|
667
666
|
EmailCfg(id='021758', is_fwded_article=True, duplicate_ids=['030616']), # Radar Online article about Epstein's early prison release
|
|
667
|
+
EmailCfg(id='031774', is_fwded_article=True), # Krassner fwd of Palmer Report article
|
|
668
|
+
EmailCfg(id='033345', is_fwded_article=True), # Krassner fwd of Palmer Report article
|
|
669
|
+
EmailCfg(id='029903', is_fwded_article=True), # Krassner fwd of Ann Coulter article about Epstein
|
|
670
|
+
EmailCfg(id='030266', is_fwded_article=True), # Krassner fwd of article about Dershowitz
|
|
668
671
|
EmailCfg(id='030868', is_fwded_article=True), # 'He doesn't like this sh*t': Trump reportedly hates his job and his staff after 1 month
|
|
669
672
|
EmailCfg(id='026755', is_fwded_article=True), # HuffPo
|
|
670
673
|
EmailCfg(id='016218', is_fwded_article=True), # AT&T confirms it paid Trump lawyer Cohen for insights on Trump
|
|
@@ -710,6 +713,7 @@ EMAILS_CONFIG = [
|
|
|
710
713
|
EmailCfg(id='033311', is_fwded_article=True), # 2016 election polls
|
|
711
714
|
EmailCfg(id='026580', is_fwded_article=True), # NPR: Antigua: Land Of Sun, Sand, And Super Cheap
|
|
712
715
|
EmailCfg(id='031340', is_fwded_article=True), # Article about Alex Jones threatening Robert Mueller
|
|
716
|
+
EmailCfg(id='030209', is_fwded_article=True), # Atlantic Council Syria: Blackberry Diplomacy
|
|
713
717
|
EmailCfg(id='033297', is_fwded_article=True, duplicate_ids=['033586']), # Sultan Sulayem fwding article about Trump and Russia
|
|
714
718
|
EmailCfg(id='032475', timestamp=parse('2017-02-15 13:31:25')),
|
|
715
719
|
EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
|
|
@@ -852,9 +856,9 @@ EMAILS_CONFIG = [
|
|
|
852
856
|
EmailCfg(id='030015', fwded_text_after='Bill Clinton reportedly'),
|
|
853
857
|
EmailCfg(id='026312', fwded_text_after='Steve Bannon trying to get on disgraced'),
|
|
854
858
|
EmailCfg(id='031742', fwded_text_after="Trump's former campaign manager Paul Manafort"),
|
|
855
|
-
EmailCfg(id='012197_4', fwded_text_after="Thanks -- Jay"),
|
|
856
859
|
EmailCfg(id='028925', fwded_text_after='> on Jan 4, 2015'),
|
|
857
860
|
EmailCfg(id='029773', fwded_text_after='Omar Quadhafi', duplicate_ids=['012685']),
|
|
861
|
+
EmailCfg(id='012197_4', fwded_text_after="Thanks -- Jay"),
|
|
858
862
|
]
|
|
859
863
|
|
|
860
864
|
|
|
@@ -1335,8 +1339,8 @@ OTHER_FILES_PROPERTY = [
|
|
|
1335
1339
|
DocCfg(id='016554', author=PALM_BEACH_CODE_ENFORCEMENT, description='board minutes', date='2008-07-17', duplicate_ids=['016616', '016574']),
|
|
1336
1340
|
DocCfg(id='016636', author=PALM_BEACH_WATER_COMMITTEE, description=f"Meeting on January 29, 2009"),
|
|
1337
1341
|
DocCfg(id='022417', author='Park Partners NYC', description=f"letter to partners in real estate project with architectural plans"),
|
|
1338
|
-
DocCfg(id='027068', author=THE_REAL_DEAL, description=f"{
|
|
1339
|
-
DocCfg(id='029520', author=THE_REAL_DEAL, description=f"{
|
|
1342
|
+
DocCfg(id='027068', author=THE_REAL_DEAL, description=f"{REAL_DEAL_ARTICLE} Palm House Hotel Bankruptcy and EB-5 Visa Fraud Allegations"),
|
|
1343
|
+
DocCfg(id='029520', author=THE_REAL_DEAL, description=f"{REAL_DEAL_ARTICLE} 'Lost Paradise at the Palm House'", date='2019-06-17'),
|
|
1340
1344
|
DocCfg(id='016597', author='Trump Properties LLC', description=f'appeal of some decision about Mar-a-Lago by {PALM_BEACH} authorities'),
|
|
1341
1345
|
DocCfg(id='018743', description=f"Las Vegas property listing"),
|
|
1342
1346
|
DocCfg(id='016695', description=f"{PALM_BEACH} property info (?)"),
|
|
@@ -1497,13 +1501,13 @@ OTHER_FILES_MISC = [
|
|
|
1497
1501
|
DocCfg(id='032206', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
|
|
1498
1502
|
DocCfg(id='032208', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
|
|
1499
1503
|
DocCfg(id='032209', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
|
|
1504
|
+
DocCfg(id='032210', category=SKYPE_LOG, author='linkspirit', is_interesting=True),
|
|
1500
1505
|
DocCfg(
|
|
1501
1506
|
id='018224',
|
|
1502
1507
|
category=SKYPE_LOG,
|
|
1503
|
-
|
|
1508
|
+
author=f'linkspirit (French?) and {LAWRENCE_KRAUSS}',
|
|
1504
1509
|
is_interesting=True, # we don't know who linkspirit is yet
|
|
1505
1510
|
),
|
|
1506
|
-
DocCfg(id='032210', category=SKYPE_LOG, description=f'Skype conversation with linkspirit', is_interesting=True),
|
|
1507
1511
|
DocCfg(
|
|
1508
1512
|
id='025147',
|
|
1509
1513
|
author=BROCKMAN_INC,
|
epstein_files/util/data.py
CHANGED
|
@@ -21,12 +21,12 @@ ALL_NAMES = [v for k, v in vars(names).items() if isinstance(v, str) and CONSTAN
|
|
|
21
21
|
PACIFIC_TZ = tz.gettz("America/Los_Angeles")
|
|
22
22
|
TIMEZONE_INFO = {"PDT": PACIFIC_TZ, "PST": PACIFIC_TZ} # Suppresses annoying warnings from parse() calls
|
|
23
23
|
|
|
24
|
-
|
|
25
24
|
collapse_newlines = lambda text: MULTINEWLINE_REGEX.sub('\n\n', text)
|
|
26
25
|
date_str = lambda dt: dt.isoformat()[0:10] if dt else None
|
|
27
26
|
escape_double_quotes = lambda text: text.replace('"', r'\"')
|
|
28
27
|
escape_single_quotes = lambda text: text.replace("'", r"\'")
|
|
29
28
|
iso_timestamp = lambda dt: dt.isoformat().replace('T', ' ')
|
|
29
|
+
remove_time_from_timestamp_str = lambda dt: dt.isoformat().removesuffix('T00:00:00')
|
|
30
30
|
uniquify = lambda _list: list(set(_list))
|
|
31
31
|
without_falsey = lambda _list: [e for e in _list if e]
|
|
32
32
|
|
epstein_files/util/doc_cfg.py
CHANGED
|
@@ -8,7 +8,7 @@ from dateutil.parser import parse
|
|
|
8
8
|
|
|
9
9
|
from epstein_files.util.constant.names import *
|
|
10
10
|
from epstein_files.util.constant.strings import *
|
|
11
|
-
from epstein_files.util.data import without_falsey
|
|
11
|
+
from epstein_files.util.data import remove_time_from_timestamp_str, without_falsey
|
|
12
12
|
|
|
13
13
|
DuplicateType = Literal['earlier', 'quoted', 'redacted', 'same']
|
|
14
14
|
Metadata = dict[str, bool | datetime | int | str | list[str | None] |dict[str, bool | str]]
|
|
@@ -47,12 +47,11 @@ FINANCIAL_REPORTS_AUTHORS = [
|
|
|
47
47
|
]
|
|
48
48
|
|
|
49
49
|
# Fields like timestamp and author are better added from the Document object
|
|
50
|
-
|
|
50
|
+
NON_METADATA_FIELDS = [
|
|
51
51
|
'actual_text',
|
|
52
52
|
'date',
|
|
53
53
|
'id',
|
|
54
|
-
'
|
|
55
|
-
'was_generated',
|
|
54
|
+
'is_synthetic',
|
|
56
55
|
]
|
|
57
56
|
|
|
58
57
|
|
|
@@ -68,10 +67,10 @@ class DocCfg:
|
|
|
68
67
|
date (str | None): If passed will be immediated parsed into the 'timestamp' field
|
|
69
68
|
dupe_of_id (str | None): If this is a dupe the ID of the duplicated file. This file will be suppressed
|
|
70
69
|
dupe_type (DuplicateType | None): The type of duplicate this file is or its 'duplicate_ids' are
|
|
71
|
-
duplicate_ids (list[str]):
|
|
70
|
+
duplicate_ids (list[str]): IDs of *other* documents that are dupes of this document
|
|
72
71
|
is_interesting (bool): Override other considerations and always consider this file interesting
|
|
73
72
|
timestamp (datetime | None): Time this email was sent, file was created, article published, etc.
|
|
74
|
-
|
|
73
|
+
is_synthetic (bool): True if this config was generated by the duplicate_cfgs() method
|
|
75
74
|
"""
|
|
76
75
|
id: str
|
|
77
76
|
author: str | None = None
|
|
@@ -82,8 +81,8 @@ class DocCfg:
|
|
|
82
81
|
dupe_type: DuplicateType | None = None
|
|
83
82
|
duplicate_ids: list[str] = field(default_factory=list)
|
|
84
83
|
is_interesting: bool = False
|
|
84
|
+
is_synthetic: bool = False
|
|
85
85
|
timestamp: datetime | None = None
|
|
86
|
-
was_generated: bool = False
|
|
87
86
|
|
|
88
87
|
def __post_init__(self):
|
|
89
88
|
if self.date:
|
|
@@ -94,13 +93,17 @@ class DocCfg:
|
|
|
94
93
|
|
|
95
94
|
def complete_description(self) -> str | None:
|
|
96
95
|
"""String that summarizes what is known about this document."""
|
|
97
|
-
if self.category and not self.description:
|
|
96
|
+
if self.category and not self.description and not self.author:
|
|
98
97
|
return self.category
|
|
99
98
|
elif self.category == REPUTATION:
|
|
100
99
|
return f"{REPUTATION_MGMT}: {self.description}"
|
|
100
|
+
elif self.category == SKYPE_LOG:
|
|
101
|
+
msg = f"{self.category} of conversation with {self.author}" if self.author else self.category
|
|
102
|
+
return f"{msg} {self.description}" if self.description else msg
|
|
101
103
|
elif self.author and self.description:
|
|
102
104
|
if self.category in [ACADEMIA, BOOK]:
|
|
103
|
-
|
|
105
|
+
title = self.description if '"' in self.description else f"'{self.description}'"
|
|
106
|
+
return f"{title} by {self.author}"
|
|
104
107
|
elif self.category == FINANCE and self.author in FINANCIAL_REPORTS_AUTHORS:
|
|
105
108
|
return f"{self.author} report: '{self.description}'"
|
|
106
109
|
elif self.category == LEGAL and 'v.' in self.author:
|
|
@@ -111,10 +114,6 @@ class DocCfg:
|
|
|
111
114
|
pieces = without_falsey([self.author, self.description])
|
|
112
115
|
return ' '.join(pieces) if pieces else None
|
|
113
116
|
|
|
114
|
-
def duplicate_reason(self) -> str | None:
|
|
115
|
-
if self.dupe_type is not None:
|
|
116
|
-
return DUPE_TYPE_STRS[self.dupe_type]
|
|
117
|
-
|
|
118
117
|
def duplicate_cfgs(self) -> Generator['DocCfg', None, None]:
|
|
119
118
|
"""Create synthetic DocCfg objects that set the 'dupe_of_id' field to point back to this object."""
|
|
120
119
|
for id in self.duplicate_ids:
|
|
@@ -123,35 +122,17 @@ class DocCfg:
|
|
|
123
122
|
dupe_cfg.dupe_of_id = self.id
|
|
124
123
|
dupe_cfg.duplicate_ids = []
|
|
125
124
|
dupe_cfg.dupe_type = self.dupe_type
|
|
126
|
-
dupe_cfg.
|
|
125
|
+
dupe_cfg.is_synthetic = True
|
|
127
126
|
yield dupe_cfg
|
|
128
127
|
|
|
129
128
|
def metadata(self) -> Metadata:
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
if self.category in [EMAIL, TEXT_MESSAGE]:
|
|
133
|
-
del non_null_fields['category']
|
|
134
|
-
|
|
135
|
-
return non_null_fields
|
|
136
|
-
|
|
137
|
-
def non_null_field_names(self) -> list[str]:
|
|
138
|
-
return [f.name for f in self.sorted_fields() if getattr(self, f.name)]
|
|
139
|
-
|
|
140
|
-
def sorted_fields(self) -> list[Field]:
|
|
141
|
-
return sorted(fields(self), key=lambda f: FIELD_SORT_KEY.get(f.name, f.name))
|
|
142
|
-
|
|
143
|
-
def title_by_author(self) -> str:
|
|
144
|
-
if not (self.author and self.description):
|
|
145
|
-
raise RuntimeError(f"Can't call title_by_author() without author and description!")
|
|
146
|
-
|
|
147
|
-
title = self.description if '"' in self.description else f"'{self.description}'"
|
|
148
|
-
return f"{title} by {self.author}"
|
|
129
|
+
return {k: v for k, v in asdict(self).items() if k not in NON_METADATA_FIELDS and v}
|
|
149
130
|
|
|
150
131
|
def _props_strs(self) -> list[str]:
|
|
151
132
|
props = []
|
|
152
133
|
add_prop = lambda f, value: props.append(f"{f.name}={value}")
|
|
153
134
|
|
|
154
|
-
for _field in self.
|
|
135
|
+
for _field in sorted(fields(self), key=lambda f: FIELD_SORT_KEY.get(f.name, f.name)):
|
|
155
136
|
value = getattr(self, _field.name)
|
|
156
137
|
|
|
157
138
|
if value is None or value is False or (isinstance(value, list) and len(value) == 0):
|
|
@@ -160,13 +141,13 @@ class DocCfg:
|
|
|
160
141
|
add_prop(_field, constantize_name(str(value)) if CONSTANTIZE_NAMES else f"'{value}'")
|
|
161
142
|
elif _field.name == 'category' and value in [EMAIL, TEXT_MESSAGE]:
|
|
162
143
|
continue
|
|
163
|
-
elif _field.name == 'recipients' and
|
|
144
|
+
elif _field.name == 'recipients' and value:
|
|
164
145
|
recipients_str = str([constantize_name(r) if (CONSTANTIZE_NAMES and r) else r for r in value])
|
|
165
146
|
add_prop(_field, recipients_str.replace("'", '') if CONSTANTIZE_NAMES else recipients_str)
|
|
166
147
|
elif _field.name == 'timestamp' and self.date is not None:
|
|
167
148
|
continue # Don't print both timestamp and date
|
|
168
149
|
elif isinstance(value, datetime):
|
|
169
|
-
value_str =
|
|
150
|
+
value_str = remove_time_from_timestamp_str(value)
|
|
170
151
|
add_prop(_field, f"parse('{value_str}')" if CONSTANTIZE_NAMES else f"'{value}'")
|
|
171
152
|
elif isinstance(value, str):
|
|
172
153
|
if "'" in value:
|
|
@@ -221,18 +202,15 @@ class EmailCfg(CommunicationCfg):
|
|
|
221
202
|
"""
|
|
222
203
|
Attributes:
|
|
223
204
|
actual_text (str | None): In dire cases of broken OCR we just configure the body of the email as a string.
|
|
205
|
+
fwded_text_after (str | None): If set, any text after this is a fwd of an article or similar
|
|
224
206
|
is_fwded_article (bool): True if this is a newspaper article someone fwded. Used to exclude articles from word counting.
|
|
225
207
|
recipients (list[str | None]): Who received the email
|
|
226
208
|
"""
|
|
227
|
-
actual_text: str | None = None
|
|
228
|
-
fwded_text_after: str | None = None
|
|
209
|
+
actual_text: str | None = None
|
|
210
|
+
fwded_text_after: str | None = None
|
|
229
211
|
is_fwded_article: bool = False
|
|
230
212
|
recipients: list[str | None] = field(default_factory=list)
|
|
231
213
|
|
|
232
|
-
@classmethod
|
|
233
|
-
def from_doc_cfg(cls, cfg: DocCfg) -> 'EmailCfg':
|
|
234
|
-
return cls(**asdict(cfg))
|
|
235
|
-
|
|
236
214
|
# This is necessary because for some dumb reason @dataclass(repr=False) doesn't cut it
|
|
237
215
|
def __repr__(self) -> str:
|
|
238
216
|
return super().__repr__()
|
|
@@ -11,8 +11,10 @@ FILENAME_LENGTH = len(HOUSE_OVERSIGHT_PREFIX) + 6
|
|
|
11
11
|
KB = 1024
|
|
12
12
|
MB = KB * KB
|
|
13
13
|
|
|
14
|
+
file_size = lambda file_path: Path(file_path).stat().st_size
|
|
15
|
+
file_size_str = lambda file_path: file_size_to_str(file_size(file_path))
|
|
14
16
|
|
|
15
|
-
# Coerce methods
|
|
17
|
+
# Coerce methods handle both string and int arguments.
|
|
16
18
|
coerce_file_name = lambda filename_or_id: coerce_file_stem(filename_or_id) + '.txt'
|
|
17
19
|
coerce_file_path = lambda filename_or_id: DOCS_DIR.joinpath(coerce_file_name(filename_or_id))
|
|
18
20
|
id_str = lambda id: f"{int(id):06d}"
|
|
@@ -44,14 +46,6 @@ def extract_file_id(filename_or_id: int | str | Path) -> str:
|
|
|
44
46
|
return file_match.group(1)
|
|
45
47
|
|
|
46
48
|
|
|
47
|
-
def file_size(file_path: str | Path) -> int:
|
|
48
|
-
return Path(file_path).stat().st_size
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def file_size_str(file_path: str | Path) -> str:
|
|
52
|
-
return file_size_to_str(file_size(file_path))
|
|
53
|
-
|
|
54
|
-
|
|
55
49
|
def file_size_to_str(size: int) -> str:
|
|
56
50
|
digits = 2
|
|
57
51
|
|
|
@@ -223,6 +223,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
223
223
|
'Linda Pinto': 'interior design at Alberto Pinto Cabinet',
|
|
224
224
|
MERWIN_DELA_CRUZ: None, # HOUSE_OVERSIGHT_032652 Groff says "Jojo and Merwin both requested off Nov. 25 and 26"
|
|
225
225
|
NADIA_MARCINKO: 'pilot',
|
|
226
|
+
'Sean J. Lancaster': 'airplane reseller',
|
|
226
227
|
}
|
|
227
228
|
),
|
|
228
229
|
HighlightedNames(
|
|
@@ -260,6 +261,8 @@ HIGHLIGHTED_NAMES = [
|
|
|
260
261
|
MARTIN_WEINBERG: CRIMINAL_DEFENSE_ATTORNEY,
|
|
261
262
|
MICHAEL_MILLER: 'Steptoe LLP partner',
|
|
262
263
|
REID_WEINGARTEN: 'Steptoe LLP partner',
|
|
264
|
+
ROBERT_D_CRITTON_JR: 'criminal defense attorney',
|
|
265
|
+
'Robert Gold': None,
|
|
263
266
|
'Roy Black': CRIMINAL_DEFENSE_2008,
|
|
264
267
|
SCOTT_J_LINK: None,
|
|
265
268
|
TONJA_HADDAD_COLEMAN: f'{EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY}, maybe daughter of Fred Haddad?',
|
|
@@ -310,15 +313,17 @@ HIGHLIGHTED_NAMES = [
|
|
|
310
313
|
}
|
|
311
314
|
),
|
|
312
315
|
HighlightedNames(
|
|
313
|
-
label=
|
|
316
|
+
label=FINANCE,
|
|
314
317
|
style='green',
|
|
315
318
|
pattern=r'Apollo|Ari\s*Glass|Bank|(Bernie\s*)?Madoff|Black(rock|stone)|B\s*of\s*A|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|Mizrahi\s*Bank|MLPF&S|((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
|
|
316
319
|
emailers={
|
|
317
320
|
AMANDA_ENS: 'Citigroup',
|
|
321
|
+
BRAD_WECHSLER: f"head of {LEON_BLACK}'s personal investment vehicle according to FT",
|
|
318
322
|
DANIEL_SABBA: 'UBS Investment Bank',
|
|
319
323
|
DAVID_FISZEL: 'CIO Honeycomb Asset Management',
|
|
320
324
|
JES_STALEY: 'former CEO of Barclays',
|
|
321
325
|
JIDE_ZEITLIN: 'former partner at Goldman Sachs, allegations of sexual misconduct',
|
|
326
|
+
'Laurie Cameron': 'currency trading',
|
|
322
327
|
LEON_BLACK: 'Apollo CEO',
|
|
323
328
|
MARC_LEON: 'Luxury Properties Sari Morrocco',
|
|
324
329
|
MELANIE_SPINELLA: f'representative of {LEON_BLACK}',
|
|
@@ -378,7 +383,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
378
383
|
HighlightedNames(
|
|
379
384
|
label=JOURNALIST,
|
|
380
385
|
style='bright_yellow',
|
|
381
|
-
pattern=r'Palm\s*Beach\s*(Daily\s*News|Post)|ABC(\s*News)?|Alex\s*Yablon|(Andrew\s*)?Marra|Arianna(\s*Huffington)?|(Arthur\s*)?Kretchmer|BBC|Bloomberg|Breitbart|Charlie\s*Rose|China\s*Daily|CNBC|CNN(politics?)?|Con[cs]hita|Sarnoff|(?<!Virgin[-\s]Islands[-\s])Daily\s*(Beast|Mail|News|Telegraph)|(David\s*)?Pecker|David\s*Brooks|Ed\s*Krassenstein|(Emily\s*)?Michot|Ezra\s*Klein|(George\s*)?Stephanopoulus|Globe\s*and\s*Mail|Good\s*Morning\s*America|Graydon(\s*Carter)?|Huffington(\s*Post)?|Ingram, David|(James\s*)?Patterson|Jonathan\s*Karl|Julie\s*(K.?\s*)?Brown|(Katie\s*)?Couric|Keith\s*Larsen|L\.?A\.?\s*Times|Miami\s*Herald|(Michele\s*)?Dargan|(National\s*)?Enquirer|(The\s*)?N(ew\s*)?Y(ork\s*)?(P(ost)?|T(imes)?)|(The\s*)?New\s*Yorker|NYer|PERVERSION\s*OF\s*JUSTICE|Politico|Pro\s*Publica|(Sean\s*)?Hannity|Sulzberger|SunSentinel|Susan Edelman|(Uma\s*)?Sanghvi|(The\s*)?Wa(shington\s*)?Po(st)?|Viceland|Vick[iy]\s*Ward|Vox|WGBH|(The\s*)?Wall\s*Street\s*Journal|WSJ|[-\w.]+@(bbc|independent|mailonline|mirror|thetimes)\.co\.uk',
|
|
386
|
+
pattern=r'Palm\s*Beach\s*(Daily\s*News|Post)|ABC(\s*News)?|Alex\s*Yablon|(Andrew\s*)?Marra|Arianna(\s*Huffington)?|(Arthur\s*)?Kretchmer|BBC|Bloomberg|Breitbart|Charlie\s*Rose|China\s*Daily|CNBC|CNN(politics?)?|Con[cs]hita|Sarnoff|(?<!Virgin[-\s]Islands[-\s])Daily\s*(Beast|Mail|News|Telegraph)|(David\s*)?Pecker|David\s*Brooks|Ed\s*Krassenstein|(Emily\s*)?Michot|Ezra\s*Klein|(George\s*)?Stephanopoulus|Globe\s*and\s*Mail|Good\s*Morning\s*America|Graydon(\s*Carter)?|Huffington(\s*Post)?|Ingram, David|(James\s*)?(Hill|Patterson)|Jonathan\s*Karl|Julie\s*(K.?\s*)?Brown|(Katie\s*)?Couric|Keith\s*Larsen|L\.?A\.?\s*Times|Miami\s*Herald|(Michele\s*)?Dargan|(National\s*)?Enquirer|(The\s*)?N(ew\s*)?Y(ork\s*)?(P(ost)?|T(imes)?)|(The\s*)?New\s*Yorker|NYer|PERVERSION\s*OF\s*JUSTICE|Politico|Pro\s*Publica|(Sean\s*)?Hannity|Sulzberger|SunSentinel|Susan Edelman|(Uma\s*)?Sanghvi|(The\s*)?Wa(shington\s*)?Po(st)?|Viceland|Vick[iy]\s*Ward|Vox|WGBH|(The\s*)?Wall\s*Street\s*Journal|WSJ|[-\w.]+@(bbc|independent|mailonline|mirror|thetimes)\.co\.uk',
|
|
382
387
|
emailers = {
|
|
383
388
|
EDWARD_JAY_EPSTEIN: 'reporter who wrote about the kinds of crimes Epstein was involved in, no relation to Jeffrey',
|
|
384
389
|
'James Hill': 'ABC News',
|
|
@@ -458,6 +463,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
458
463
|
IAN_OSBORNE: f"{OSBORNE_LLP} reputation repairer possibly hired by Epstein ca. 2011-06",
|
|
459
464
|
MICHAEL_SITRICK: 'crisis PR',
|
|
460
465
|
PEGGY_SIEGAL: 'socialite',
|
|
466
|
+
'R. Couri Hay': None,
|
|
461
467
|
ROSS_GOW: 'Acuity Reputation Management',
|
|
462
468
|
TYLER_SHEARS: f"{REPUTATION_MGMT}, worked on Epstein's Google search results with {CHRISTINA_GALBRAITH}",
|
|
463
469
|
}
|
|
@@ -485,6 +491,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
485
491
|
style='red bold',
|
|
486
492
|
pattern=r'Alfa\s*Bank|Anya\s*Rasulova|Chernobyl|Day\s+One\s+Ventures|(Dmitry\s)?(Kiselyov|(Lana\s*)?Pozhidaeva|Medvedev|Rybolo(o?l?ev|vlev))|Dmitry|FSB|GRU|KGB|Kislyak|Kremlin|Kuznetsova|Lavrov|Lukoil|Moscow|(Oleg\s*)?Deripaska|Oleksandr Vilkul|Rosneft|RT|St.?\s*?Petersburg|Russian?|Sberbank|Soviet(\s*Union)?|USSR|Vladimir|(Vladimir\s*)?(Putin|Yudashkin)|Women\s*Empowerment|Xitrans',
|
|
487
493
|
emailers = {
|
|
494
|
+
'Dasha Zhukova': 'art collector, daughter of Alexander Zhukov',
|
|
488
495
|
MASHA_DROKOVA: 'silicon valley VC, former Putin Youth',
|
|
489
496
|
RENATA_BOLOTOVA: 'former aspiring model, now fund manager at New York State Insurance Fund',
|
|
490
497
|
SVETLANA_POZHIDAEVA: f'Epstein\'s Russian assistant who was recommended for a visa by Sergei Belyakov (FSB) and {DAVID_BLAINE}',
|
|
@@ -493,14 +500,16 @@ HIGHLIGHTED_NAMES = [
|
|
|
493
500
|
HighlightedNames(
|
|
494
501
|
label=ACADEMIA,
|
|
495
502
|
style='light_goldenrod2',
|
|
496
|
-
pattern=r'Alain Forget|Brotherton|Carl\s*Sagan|Columbia|David Grosof|J(ames|im)\s*Watson|(Lord\s*)?Martin\s*Rees|Massachusetts\s*Institute\s*of\s*Technology|MIT(\s*Media\s*Lab)?|Media\s*Lab|Minsky|((Noam|Valeria)\s*)?Chomsky|Praluent|Regeneron|(Richard\s*)?Dawkins|Sanofi|Stanford|(Stephen\s*)?Hawking|(Steven?\s*)?Pinker|UCLA',
|
|
503
|
+
pattern=r'Alain Forget|Brotherton|Carl\s*Sagan|Columbia|David Grosof|J(ames|im)\s*Watson|(Lord\s*)?Martin\s*Rees|Massachusetts\s*Institute\s*of\s*Technology|MIT(\s*Media\s*Lab)?|Media\s*Lab|Minsky|((Noam|Valeria)\s*)?Chomsky|Norman\s*Finkelstein|Praluent|Regeneron|(Richard\s*)?Dawkins|Sanofi|Stanford|(Stephen\s*)?Hawking|(Steven?\s*)?Pinker|UCLA',
|
|
497
504
|
emailers = {
|
|
498
505
|
DAVID_HAIG: None,
|
|
499
506
|
JOSCHA_BACH: 'cognitive science / AI research',
|
|
500
507
|
'Daniel Kahneman': 'Nobel economic sciences laureate and cognitivie psychologist (?)',
|
|
508
|
+
'Ed Boyden': 'Associate Professor, MIT Media Lab neurobiology',
|
|
501
509
|
LAWRENCE_KRAUSS: 'theoretical physicist',
|
|
502
510
|
LINDA_STONE: 'ex-Microsoft, MIT Media Lab',
|
|
503
511
|
MARK_TRAMO: 'professor of neurology at UCLA',
|
|
512
|
+
'Nancy Dahl': f'wife of {LAWRENCE_KRAUSS}',
|
|
504
513
|
NEAL_KASSELL: 'professor of neurosurgery at University of Virginia',
|
|
505
514
|
PETER_ATTIA: 'longevity medicine',
|
|
506
515
|
ROBERT_TRIVERS: 'evolutionary biology',
|
|
@@ -661,7 +670,7 @@ def get_style_for_category(category: str) -> str | None:
|
|
|
661
670
|
elif category in [CONFERENCE, SPEECH]:
|
|
662
671
|
return f"{get_style_for_category(ACADEMIA)} dim"
|
|
663
672
|
elif category == SOCIAL:
|
|
664
|
-
return
|
|
673
|
+
return get_style_for_category(PUBLICIST)
|
|
665
674
|
|
|
666
675
|
category = CATEGORY_STYLE_MAPPING.get(category, category)
|
|
667
676
|
|
epstein_files/util/logging.py
CHANGED
|
@@ -32,7 +32,7 @@ LOG_LEVEL_ENV_VAR = 'LOG_LEVEL'
|
|
|
32
32
|
# Augment the standard log highlighter with 'epstein_filename' matcher
|
|
33
33
|
class LogHighlighter(ReprHighlighter):
|
|
34
34
|
highlights = ReprHighlighter.highlights + [
|
|
35
|
-
*[fr"(?P<{doc_type}>{doc_type})" for doc_type in DOC_TYPE_STYLES.keys()],
|
|
35
|
+
*[fr"(?P<{doc_type}>{doc_type}(Cfg)?)" for doc_type in DOC_TYPE_STYLES.keys()],
|
|
36
36
|
"(?P<epstein_filename>" + FILE_NAME_REGEX.pattern + ')',
|
|
37
37
|
]
|
|
38
38
|
|
epstein_files/util/output.py
CHANGED
|
@@ -125,7 +125,7 @@ def print_json_files(epstein_files: EpsteinFiles):
|
|
|
125
125
|
console.print_json(json_file.json_str(), indent=4, sort_keys=False)
|
|
126
126
|
|
|
127
127
|
|
|
128
|
-
def
|
|
128
|
+
def write_json_metadata(epstein_files: EpsteinFiles) -> None:
|
|
129
129
|
json_str = epstein_files.json_metadata()
|
|
130
130
|
|
|
131
131
|
if args.build:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: epstein-files
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.12
|
|
4
4
|
Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
|
|
5
5
|
Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
|
|
6
6
|
License: GPL-3.0-or-later
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
epstein_files/__init__.py,sha256=qVFB7sS6XSlZX-ByyDwdbGSn2h06aoX2Mx8WcgRb-To,4710
|
|
2
|
+
epstein_files/documents/communication.py,sha256=XapJlNfcaww3TpSkZIBE5c1Skqv_pFEFlIVi06V7k3E,2046
|
|
3
|
+
epstein_files/documents/document.py,sha256=2FxyqWKROi7w2SmaQ493oGKekNvYAHSuv2YsDhPNQBU,16987
|
|
4
|
+
epstein_files/documents/email.py,sha256=y8QTq349LWlm2LLUJ8rGcdkDbaGYJCV99wJytPcEMew,40587
|
|
5
|
+
epstein_files/documents/emails/email_header.py,sha256=wkPfSLbmzkAeQwvhf0bAeFDLPbQT-EeG0v8vNNLYktM,7502
|
|
6
|
+
epstein_files/documents/imessage/text_message.py,sha256=4gFvTfulj_Su10lNQl6Hq_p9ArTrSmn5pfC22YRJXjI,2794
|
|
7
|
+
epstein_files/documents/json_file.py,sha256=tIYTwA3FYkwVZSpXvFYyUoH9m2sGYCD1U0ttamH6r1o,1306
|
|
8
|
+
epstein_files/documents/messenger_log.py,sha256=yT4WQyTE_W6yelug_YGpBMRJ0YxWNtX4rKoEj8n5TMA,6260
|
|
9
|
+
epstein_files/documents/other_file.py,sha256=pnl_q1o7ur3eeqGPwsYL2qbM3Y8O9LX6j6LbWnoxAiE,9939
|
|
10
|
+
epstein_files/epstein_files.py,sha256=SaD4DJJ5tRxY97Ei4BdOgLzHQ9wrBVGrP64CSqdmk-w,18691
|
|
11
|
+
epstein_files/util/constant/common_words.py,sha256=aR0UjoWmxyR49XS-DtHECQ1CiA_bK8hNP6CQ1TS9yZA,3696
|
|
12
|
+
epstein_files/util/constant/html.py,sha256=9U098TGzlghGg4WfxLYHyub5JGR17Dv7VP5i2MSu8Kk,1415
|
|
13
|
+
epstein_files/util/constant/names.py,sha256=CLWXrln8J-Dth6C-YF7Wdy7UoA8dybKJyqOLETrBeek,10284
|
|
14
|
+
epstein_files/util/constant/output_files.py,sha256=BkV4_gmdj46RfGy5SFYp6dgTty3FtlBth5YGmaGutls,1700
|
|
15
|
+
epstein_files/util/constant/strings.py,sha256=02DwbhAe8qBRq5HOUFx5FafXJ1P2-RJf9TCVu2b7UDQ,1932
|
|
16
|
+
epstein_files/util/constant/urls.py,sha256=0IdCVVvXib0i-4TZFkVHoS4zCbjOBZWcr6NkGxsmQWM,4981
|
|
17
|
+
epstein_files/util/constants.py,sha256=BpPRivoDYFI0uLU35kKpOdrSI6Rr9cmcrRj9-kANVrs,111834
|
|
18
|
+
epstein_files/util/data.py,sha256=X3AutdW-ascIlE2bz1BtN0Bywqpe4OwYzJ-diEpfogI,2992
|
|
19
|
+
epstein_files/util/doc_cfg.py,sha256=_f03qtA7qVbViHwqMXC4O5nfNbh90zDSq6El9Ior6f0,8996
|
|
20
|
+
epstein_files/util/env.py,sha256=HnYcfHSNkwVJ_T75Woy43_OpDyxD0KHPj3GxcVx86N4,5751
|
|
21
|
+
epstein_files/util/file_helper.py,sha256=tacTe1GcAnckPFvjMgxRRSLnFgr2aVIYsgfDR_C9uXk,2780
|
|
22
|
+
epstein_files/util/highlighted_group.py,sha256=xrDLB05YUYGsU6vDvhvENMvIyjEz-9eb9xN-RjfCQbQ,36531
|
|
23
|
+
epstein_files/util/logging.py,sha256=fuREq06xUUI3DfCV2JE-8QM-sQKxpLDj0_AYFO6qR1M,1983
|
|
24
|
+
epstein_files/util/output.py,sha256=XcflgSOlzUGj6FsFaK6j4Dljld8A0h_uVV7ERcI_EYw,8120
|
|
25
|
+
epstein_files/util/rich.py,sha256=8-4IA5bwPBdDPqkPdymq3zVKB9hfy3nrT7fUrN_XevY,14744
|
|
26
|
+
epstein_files/util/search_result.py,sha256=1fxe0KPBQXBk4dLfu6m0QXIzYfZCzvaSkWqvghJGzxY,567
|
|
27
|
+
epstein_files/util/timer.py,sha256=8hxW4Y1JcTUfnBrHh7sL2pM9xu1sL4HFQM4CmmzTarU,837
|
|
28
|
+
epstein_files/util/word_count.py,sha256=8qBTuq3d0Q-3fwiuECKWi2RfL-KUiZD8TciwvfL0D_o,9353
|
|
29
|
+
epstein_files-1.0.12.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
30
|
+
epstein_files-1.0.12.dist-info/METADATA,sha256=imTDdrHjWC-bWuw58SAyjYyiziZsqHkO7ODQUntw6YQ,5480
|
|
31
|
+
epstein_files-1.0.12.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
32
|
+
epstein_files-1.0.12.dist-info/entry_points.txt,sha256=5qYgwAXpxegeAicD_rzda_trDRnUC51F5UVDpcZ7j6Q,240
|
|
33
|
+
epstein_files-1.0.12.dist-info/RECORD,,
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
epstein_files/__init__.py,sha256=4zxX1tw-0xMwpM-Sbq7PezV0YNS9zN-P6gc9BQ1BqKU,4710
|
|
2
|
-
epstein_files/documents/communication.py,sha256=SunZdjMhR9v6y8LlQ6jhIu8vYjSndaBK0Su1mKnhfj0,2060
|
|
3
|
-
epstein_files/documents/document.py,sha256=dECV0bSnOJzPfOIHyHeG5rNxKd6uwuiso35-sQZg9No,18353
|
|
4
|
-
epstein_files/documents/email.py,sha256=yXiW7mB4myU8G9DY7PnnqazaCqeAR3dHr35NfBplfRU,38519
|
|
5
|
-
epstein_files/documents/emails/email_header.py,sha256=wkPfSLbmzkAeQwvhf0bAeFDLPbQT-EeG0v8vNNLYktM,7502
|
|
6
|
-
epstein_files/documents/imessage/text_message.py,sha256=3HlNp75JIoMlWj7PaUWIFry3qlGEmpGu5OmdmsBYS34,2807
|
|
7
|
-
epstein_files/documents/json_file.py,sha256=HsnVWPZXVxTF_DadL2YtJtsiXKXOd18PUs05O33tjNc,1317
|
|
8
|
-
epstein_files/documents/messenger_log.py,sha256=uSPlg85jGTwod1cV9f7MtxSNqmMZ61JBFzoiRNqg52M,6263
|
|
9
|
-
epstein_files/documents/other_file.py,sha256=S_Y-SxYYYXtx42JHmhFWl5BbTduNI7cwQjeYHBJA7sc,9950
|
|
10
|
-
epstein_files/epstein_files.py,sha256=SaD4DJJ5tRxY97Ei4BdOgLzHQ9wrBVGrP64CSqdmk-w,18691
|
|
11
|
-
epstein_files/util/constant/common_words.py,sha256=aR0UjoWmxyR49XS-DtHECQ1CiA_bK8hNP6CQ1TS9yZA,3696
|
|
12
|
-
epstein_files/util/constant/html.py,sha256=9U098TGzlghGg4WfxLYHyub5JGR17Dv7VP5i2MSu8Kk,1415
|
|
13
|
-
epstein_files/util/constant/names.py,sha256=KKJEYFpdOp4xDwXe5dhrqYgF12oJODvVSFpAB28Q76A,10153
|
|
14
|
-
epstein_files/util/constant/output_files.py,sha256=BkV4_gmdj46RfGy5SFYp6dgTty3FtlBth5YGmaGutls,1700
|
|
15
|
-
epstein_files/util/constant/strings.py,sha256=FDtksfH50PSxtSBw9XhmqxtrgRgGxdIvGiAR2bbPpu4,1899
|
|
16
|
-
epstein_files/util/constant/urls.py,sha256=0IdCVVvXib0i-4TZFkVHoS4zCbjOBZWcr6NkGxsmQWM,4981
|
|
17
|
-
epstein_files/util/constants.py,sha256=LPSI6Z0n3ChFDnMGYVO80cGuSKZf0OoyUzLih_jlRKI,111434
|
|
18
|
-
epstein_files/util/data.py,sha256=xwTqrbAi7ZDJM0iyFVOevnokP_oIQ2npkRjHzF1KGGY,2908
|
|
19
|
-
epstein_files/util/doc_cfg.py,sha256=OZlocAWldfR8Nomiad4FxQeyhNMbd0PQ-rumKn2nWBg,9641
|
|
20
|
-
epstein_files/util/env.py,sha256=HnYcfHSNkwVJ_T75Woy43_OpDyxD0KHPj3GxcVx86N4,5751
|
|
21
|
-
epstein_files/util/file_helper.py,sha256=-higKqc9J5IfNpzMzg-9j1ps3beV4N2cw8kdAxfm7NA,2835
|
|
22
|
-
epstein_files/util/highlighted_group.py,sha256=fU-8ns50uUolzPEAxadF5AnPLjn383KpEeyRXfFbv_U,35971
|
|
23
|
-
epstein_files/util/logging.py,sha256=8e22WaBfDAKEmkcr3Gb4TdqtFSkU4FQDpk3Z6hfSzbw,1977
|
|
24
|
-
epstein_files/util/output.py,sha256=UzTU0mNHEmeJr3w2TXAp19X497GB6_-HyW0mfztI1jk,8120
|
|
25
|
-
epstein_files/util/rich.py,sha256=8-4IA5bwPBdDPqkPdymq3zVKB9hfy3nrT7fUrN_XevY,14744
|
|
26
|
-
epstein_files/util/search_result.py,sha256=1fxe0KPBQXBk4dLfu6m0QXIzYfZCzvaSkWqvghJGzxY,567
|
|
27
|
-
epstein_files/util/timer.py,sha256=8hxW4Y1JcTUfnBrHh7sL2pM9xu1sL4HFQM4CmmzTarU,837
|
|
28
|
-
epstein_files/util/word_count.py,sha256=8qBTuq3d0Q-3fwiuECKWi2RfL-KUiZD8TciwvfL0D_o,9353
|
|
29
|
-
epstein_files-1.0.11.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
30
|
-
epstein_files-1.0.11.dist-info/METADATA,sha256=HBW3t1F9lkoN6GIR7ySV2kBYnJhNEF9otDZWnf03jUo,5480
|
|
31
|
-
epstein_files-1.0.11.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
32
|
-
epstein_files-1.0.11.dist-info/entry_points.txt,sha256=5qYgwAXpxegeAicD_rzda_trDRnUC51F5UVDpcZ7j6Q,240
|
|
33
|
-
epstein_files-1.0.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|