epstein-files 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +66 -131
- epstein_files/documents/document.py +12 -3
- epstein_files/documents/email.py +33 -13
- epstein_files/documents/imessage/text_message.py +11 -15
- epstein_files/documents/messenger_log.py +15 -11
- epstein_files/documents/other_file.py +13 -8
- epstein_files/epstein_files.py +51 -43
- epstein_files/util/constant/names.py +21 -24
- epstein_files/util/constant/output_files.py +29 -0
- epstein_files/util/constant/strings.py +8 -2
- epstein_files/util/constant/urls.py +11 -7
- epstein_files/util/constants.py +325 -227
- epstein_files/util/data.py +12 -33
- epstein_files/util/doc_cfg.py +7 -14
- epstein_files/util/env.py +5 -3
- epstein_files/util/file_helper.py +0 -22
- epstein_files/util/highlighted_group.py +31 -26
- epstein_files/util/logging.py +7 -0
- epstein_files/util/output.py +179 -0
- epstein_files/util/rich.py +22 -10
- {epstein_files-1.0.1.dist-info → epstein_files-1.0.3.dist-info}/METADATA +32 -7
- epstein_files-1.0.3.dist-info/RECORD +33 -0
- epstein_files-1.0.3.dist-info/entry_points.txt +7 -0
- epstein_files-1.0.1.dist-info/RECORD +0 -30
- {epstein_files-1.0.1.dist-info → epstein_files-1.0.3.dist-info}/LICENSE +0 -0
- {epstein_files-1.0.1.dist-info → epstein_files-1.0.3.dist-info}/WHEEL +0 -0
epstein_files/util/data.py
CHANGED
|
@@ -3,15 +3,10 @@ Helpers for dealing with various kinds of data.
|
|
|
3
3
|
"""
|
|
4
4
|
import itertools
|
|
5
5
|
import re
|
|
6
|
-
import time
|
|
7
|
-
from dataclasses import dataclass, field
|
|
8
6
|
from datetime import datetime, timezone
|
|
9
7
|
from dateutil import tz
|
|
10
8
|
from typing import TypeVar
|
|
11
9
|
|
|
12
|
-
from dateutil.parser import parse
|
|
13
|
-
from rich.text import Text
|
|
14
|
-
|
|
15
10
|
from epstein_files.util.constant import names
|
|
16
11
|
from epstein_files.util.env import args
|
|
17
12
|
from epstein_files.util.logging import logger
|
|
@@ -24,27 +19,20 @@ CONSTANT_VAR_REGEX = re.compile(r"^[A-Z_]+$")
|
|
|
24
19
|
ALL_NAMES = [v for k, v in vars(names).items() if isinstance(v, str) and CONSTANT_VAR_REGEX.match(k)]
|
|
25
20
|
|
|
26
21
|
PACIFIC_TZ = tz.gettz("America/Los_Angeles")
|
|
27
|
-
TIMEZONE_INFO = {"
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def dict_sets_to_lists(d: dict[str, set]) -> dict[str, list]:
|
|
31
|
-
return {k: sorted(list(v)) for k, v in d.items()}
|
|
32
|
-
|
|
22
|
+
TIMEZONE_INFO = {"PDT": PACIFIC_TZ, "PST": PACIFIC_TZ} # Suppresses annoying warnings from parse() calls
|
|
33
23
|
|
|
34
|
-
def extract_datetime(s: str) -> datetime | None:
|
|
35
|
-
match = ISO_DATE_REGEX.search(s)
|
|
36
24
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
25
|
+
collapse_newlines = lambda text: MULTINEWLINE_REGEX.sub('\n\n', text)
|
|
26
|
+
date_str = lambda dt: dt.isoformat()[0:10] if dt else None
|
|
27
|
+
escape_double_quotes = lambda text: text.replace('"', r'\"')
|
|
28
|
+
escape_single_quotes = lambda text: text.replace("'", r"\'")
|
|
29
|
+
iso_timestamp = lambda dt: dt.isoformat().replace('T', ' ')
|
|
30
|
+
uniquify = lambda _list: list(set(_list))
|
|
31
|
+
without_falsey = lambda _list: [e for e in _list if e]
|
|
41
32
|
|
|
42
|
-
if len(date_str) == 4:
|
|
43
|
-
date_str += '-01-01'
|
|
44
|
-
elif len(date_str) == 7:
|
|
45
|
-
date_str += '-01'
|
|
46
33
|
|
|
47
|
-
|
|
34
|
+
def dict_sets_to_lists(d: dict[str, set]) -> dict[str, list]:
|
|
35
|
+
return {k: sorted(list(v)) for k, v in d.items()}
|
|
48
36
|
|
|
49
37
|
|
|
50
38
|
def extract_last_name(name: str) -> str:
|
|
@@ -91,8 +79,8 @@ def ordinal_str(n: int) -> str:
|
|
|
91
79
|
return str(n) + suffix
|
|
92
80
|
|
|
93
81
|
|
|
94
|
-
def patternize(_pattern: str | re.Pattern):
|
|
95
|
-
return _pattern if isinstance(_pattern, re.Pattern) else re.compile(
|
|
82
|
+
def patternize(_pattern: str | re.Pattern) -> re.Pattern:
|
|
83
|
+
return _pattern if isinstance(_pattern, re.Pattern) else re.compile(fr"({_pattern})", re.IGNORECASE)
|
|
96
84
|
|
|
97
85
|
|
|
98
86
|
def remove_timezone(timestamp: datetime) -> datetime:
|
|
@@ -106,12 +94,3 @@ def remove_timezone(timestamp: datetime) -> datetime:
|
|
|
106
94
|
def sort_dict(d: dict[str | None, int] | dict[str, int]) -> list[tuple[str | None, int]]:
|
|
107
95
|
sort_key = lambda e: (e[0] or '').lower() if args.sort_alphabetical else [-e[1], (e[0] or '').lower()]
|
|
108
96
|
return sorted(d.items(), key=sort_key)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
collapse_newlines = lambda text: MULTINEWLINE_REGEX.sub('\n\n', text)
|
|
112
|
-
date_str = lambda dt: dt.isoformat()[0:10] if dt else None
|
|
113
|
-
escape_double_quotes = lambda text: text.replace('"', r'\"')
|
|
114
|
-
escape_single_quotes = lambda text: text.replace("'", r"\'")
|
|
115
|
-
iso_timestamp = lambda dt: dt.isoformat().replace('T', ' ')
|
|
116
|
-
uniquify = lambda _list: list(set(_list))
|
|
117
|
-
without_nones = lambda _list: [e for e in _list if e]
|
epstein_files/util/doc_cfg.py
CHANGED
|
@@ -8,7 +8,7 @@ from dateutil.parser import parse
|
|
|
8
8
|
|
|
9
9
|
from epstein_files.util.constant.names import *
|
|
10
10
|
from epstein_files.util.constant.strings import *
|
|
11
|
-
from epstein_files.util.data import
|
|
11
|
+
from epstein_files.util.data import without_falsey
|
|
12
12
|
|
|
13
13
|
DuplicateType = Literal['earlier', 'quoted', 'redacted', 'same']
|
|
14
14
|
Metadata = dict[str, bool | datetime | int | str | list[str | None] |dict[str, bool | str]]
|
|
@@ -116,10 +116,12 @@ class DocCfg:
|
|
|
116
116
|
return self.title_by_author()
|
|
117
117
|
elif self.category == FINANCE and self.author in FINANCIAL_REPORTS_AUTHORS:
|
|
118
118
|
return f"{self.author} report: '{self.description}'"
|
|
119
|
+
elif self.category == LEGAL and 'v.' in self.author:
|
|
120
|
+
return f"{self.author}: '{self.description}'"
|
|
119
121
|
elif self.category and self.author is None and self.description is None:
|
|
120
122
|
return self.category
|
|
121
123
|
|
|
122
|
-
pieces =
|
|
124
|
+
pieces = without_falsey([self.author, self.description])
|
|
123
125
|
return ' '.join(pieces) if pieces else None
|
|
124
126
|
|
|
125
127
|
def metadata(self) -> Metadata:
|
|
@@ -176,16 +178,6 @@ class DocCfg:
|
|
|
176
178
|
|
|
177
179
|
return props
|
|
178
180
|
|
|
179
|
-
def __eq__(self, other: 'DocCfg') -> bool:
|
|
180
|
-
"""Return True if everything matches other than the two 'dupe_' fields ('duplicate_ids' is compared)."""
|
|
181
|
-
for _field in self.sorted_fields():
|
|
182
|
-
if _field.name == 'id' or _field.name.startswith('dupe'):
|
|
183
|
-
continue
|
|
184
|
-
elif getattr(self, _field.name) != getattr(other, _field.name):
|
|
185
|
-
return False
|
|
186
|
-
|
|
187
|
-
return True
|
|
188
|
-
|
|
189
181
|
def __repr__(self) -> str:
|
|
190
182
|
props = self._props_strs()
|
|
191
183
|
type_str = f"{type(self).__name__}("
|
|
@@ -231,6 +223,7 @@ class EmailCfg(CommunicationCfg):
|
|
|
231
223
|
recipients (list[str | None]): Who received the email
|
|
232
224
|
"""
|
|
233
225
|
actual_text: str | None = None # Override for the Email._actual_text() method for particularly broken emails
|
|
226
|
+
fwded_text_after: str | None = None # If set, any text after this is a fwd of an article or similar
|
|
234
227
|
is_fwded_article: bool = False
|
|
235
228
|
recipients: list[str | None] = field(default_factory=list)
|
|
236
229
|
|
|
@@ -242,7 +235,7 @@ class EmailCfg(CommunicationCfg):
|
|
|
242
235
|
def from_doc_cfg(cls, cfg: DocCfg) -> 'EmailCfg':
|
|
243
236
|
return cls(**asdict(cfg))
|
|
244
237
|
|
|
245
|
-
# This is necessary for some dumb reason
|
|
238
|
+
# This is necessary because for some dumb reason @dataclass(repr=False) doesn't cut it
|
|
246
239
|
def __repr__(self) -> str:
|
|
247
240
|
return super().__repr__()
|
|
248
241
|
|
|
@@ -253,6 +246,6 @@ class TextCfg(CommunicationCfg):
|
|
|
253
246
|
super().__post_init__()
|
|
254
247
|
self.category = TEXT_MESSAGE
|
|
255
248
|
|
|
256
|
-
# This is necessary for some dumb reason
|
|
249
|
+
# This is necessary because for some dumb reason @dataclass(repr=False) doesn't cut it
|
|
257
250
|
def __repr__(self) -> str:
|
|
258
251
|
return super().__repr__()
|
epstein_files/util/env.py
CHANGED
|
@@ -6,16 +6,18 @@ from sys import argv
|
|
|
6
6
|
|
|
7
7
|
from epstein_files.util.logging import datefinder_logger, env_log_level, logger
|
|
8
8
|
|
|
9
|
+
COUNT_WORDS_SCRIPT = 'count_words.py'
|
|
9
10
|
DEFAULT_WIDTH = 154
|
|
10
|
-
HTML_SCRIPTS = ['
|
|
11
|
+
HTML_SCRIPTS = ['epstein_generate', 'generate_html.py', COUNT_WORDS_SCRIPT]
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
parser = ArgumentParser(description="Parse epstein OCR docs and generate HTML page.")
|
|
14
|
-
parser.add_argument('--build', '-b', action='store_true', help='write
|
|
15
|
+
parser.add_argument('--build', '-b', action='store_true', help='write output to file')
|
|
15
16
|
parser.add_argument('--all-emails', '-ae', action='store_true', help='all the emails instead of just the interesting ones')
|
|
16
17
|
parser.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just interesting ones')
|
|
17
18
|
parser.add_argument('--colors-only', '-c', action='store_true', help='print header with color key table and links and exit')
|
|
18
19
|
parser.add_argument('--name', '-n', action='append', dest='names', help='specify the name(s) whose communications should be output')
|
|
20
|
+
parser.add_argument('--output-file', '-out', metavar='FILE', default='index.html', help='write output to FILE in docs/ (default=index.html)')
|
|
19
21
|
parser.add_argument('--output-emails', '-oe', action='store_true', help='generate other files section')
|
|
20
22
|
parser.add_argument('--output-other-files', '-oo', action='store_true', help='generate other files section')
|
|
21
23
|
parser.add_argument('--output-texts', '-ot', action='store_true', help='generate other files section')
|
|
@@ -64,7 +66,7 @@ datefinder_logger.setLevel(logger.level)
|
|
|
64
66
|
|
|
65
67
|
# Massage args that depend on other args to the appropriate state
|
|
66
68
|
if not (args.json_metadata or args.output_texts or args.output_emails or args.output_other_files):
|
|
67
|
-
if is_html_script:
|
|
69
|
+
if is_html_script and current_script != COUNT_WORDS_SCRIPT and not args.make_clean:
|
|
68
70
|
logger.warning(f"No output section chosen; outputting default of texts, selected emails, and other files...")
|
|
69
71
|
|
|
70
72
|
args.output_texts = True
|
|
@@ -8,7 +8,6 @@ from epstein_files.util.constant.strings import FILE_NAME_REGEX, FILE_STEM_REGEX
|
|
|
8
8
|
EPSTEIN_DOCS_DIR_ENV_VAR_NAME = 'EPSTEIN_DOCS_DIR'
|
|
9
9
|
DOCS_DIR_ENV = environ[EPSTEIN_DOCS_DIR_ENV_VAR_NAME]
|
|
10
10
|
DOCS_DIR = Path(DOCS_DIR_ENV or '').resolve()
|
|
11
|
-
PICKLED_PATH = Path("the_epstein_files.pkl.gz")
|
|
12
11
|
|
|
13
12
|
if not DOCS_DIR_ENV:
|
|
14
13
|
print(f"ERROR: {EPSTEIN_DOCS_DIR_ENV_VAR_NAME} env var not set!")
|
|
@@ -17,20 +16,7 @@ elif not DOCS_DIR.exists():
|
|
|
17
16
|
print(f"ERROR: {EPSTEIN_DOCS_DIR_ENV_VAR_NAME}='{DOCS_DIR}' does not exist!")
|
|
18
17
|
exit(1)
|
|
19
18
|
|
|
20
|
-
HTML_DIR = Path('docs')
|
|
21
19
|
EXTRACTED_EMAILS_DIR = Path('emails_extracted_from_legal_filings')
|
|
22
|
-
EPSTEIN_WORD_COUNT_HTML_PATH = HTML_DIR.joinpath('epstein_texts_and_emails_word_count.html')
|
|
23
|
-
GH_PAGES_HTML_PATH = HTML_DIR.joinpath('index.html')
|
|
24
|
-
JSON_METADATA_PATH = HTML_DIR.joinpath('epstein_files_nov_2025_cryptadamus_metadata.json')
|
|
25
|
-
WORD_COUNT_HTML_PATH = HTML_DIR.joinpath('epstein_emails_word_count.html')
|
|
26
|
-
|
|
27
|
-
BUILD_ARTIFACTS = [
|
|
28
|
-
EPSTEIN_WORD_COUNT_HTML_PATH,
|
|
29
|
-
GH_PAGES_HTML_PATH,
|
|
30
|
-
JSON_METADATA_PATH,
|
|
31
|
-
WORD_COUNT_HTML_PATH,
|
|
32
|
-
]
|
|
33
|
-
|
|
34
20
|
FILE_ID_REGEX = re.compile(fr".*{FILE_NAME_REGEX.pattern}")
|
|
35
21
|
FILENAME_LENGTH = len(HOUSE_OVERSIGHT_PREFIX) + 6
|
|
36
22
|
KB = 1024
|
|
@@ -110,11 +96,3 @@ def is_local_extract_file(filename) -> bool:
|
|
|
110
96
|
"""Return true if filename is of form 'HOUSE_OVERSIGHT_029835_1.txt'."""
|
|
111
97
|
file_match = FILE_ID_REGEX.match(str(filename))
|
|
112
98
|
return True if file_match and file_match.group(2) else False
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
def make_clean() -> None:
|
|
116
|
-
"""Delete all build artifacts."""
|
|
117
|
-
for build_file in BUILD_ARTIFACTS:
|
|
118
|
-
if build_file.exists():
|
|
119
|
-
print(f"Removing build file '{build_file}'...")
|
|
120
|
-
build_file.unlink()
|
|
@@ -2,7 +2,6 @@ import re
|
|
|
2
2
|
from dataclasses import dataclass, field
|
|
3
3
|
|
|
4
4
|
from rich.highlighter import RegexHighlighter
|
|
5
|
-
from rich.text import Text
|
|
6
5
|
|
|
7
6
|
from epstein_files.util.constant.names import *
|
|
8
7
|
from epstein_files.util.constant.strings import *
|
|
@@ -10,7 +9,7 @@ from epstein_files.util.constant.urls import ARCHIVE_LINK_COLOR
|
|
|
10
9
|
from epstein_files.util.constants import (EMAILER_ID_REGEXES, EPSTEIN_V_ROTHSTEIN_EDWARDS, HEADER_ABBREVIATIONS,
|
|
11
10
|
OSBORNE_LLP, REPLY_REGEX, SENT_FROM_REGEX, VIRGIN_ISLANDS)
|
|
12
11
|
from epstein_files.util.doc_cfg import *
|
|
13
|
-
from epstein_files.util.data import extract_last_name, listify
|
|
12
|
+
from epstein_files.util.data import extract_last_name, listify, without_falsey
|
|
14
13
|
|
|
15
14
|
CIVIL_ATTORNEY = 'civil attorney'
|
|
16
15
|
CRIMINAL_DEFENSE_ATTORNEY = 'criminal defense attorney'
|
|
@@ -48,7 +47,6 @@ class HighlightedText:
|
|
|
48
47
|
label: str = ''
|
|
49
48
|
pattern: str = ''
|
|
50
49
|
style: str
|
|
51
|
-
# Computed fields
|
|
52
50
|
regex: re.Pattern = field(init=False)
|
|
53
51
|
theme_style_name: str = field(init=False)
|
|
54
52
|
_capture_group_label: str = field(init=False)
|
|
@@ -76,7 +74,7 @@ class HighlightedNames(HighlightedText):
|
|
|
76
74
|
Attributes:
|
|
77
75
|
category (str): optional string to use as an override for self.label in some contexts
|
|
78
76
|
emailers (dict[str, str | None]): optional names to construct regexes for (values are descriptions)
|
|
79
|
-
_pattern (str):
|
|
77
|
+
_pattern (str): regex pattern combining 'pattern' with first & last names of all 'emailers'
|
|
80
78
|
"""
|
|
81
79
|
category: str = ''
|
|
82
80
|
emailers: dict[str, str | None] = field(default_factory=dict)
|
|
@@ -102,7 +100,7 @@ class HighlightedNames(HighlightedText):
|
|
|
102
100
|
self.emailers.get(name),
|
|
103
101
|
]
|
|
104
102
|
|
|
105
|
-
info_pieces =
|
|
103
|
+
info_pieces = without_falsey(info_pieces)
|
|
106
104
|
return ', '.join(info_pieces) if info_pieces else None
|
|
107
105
|
|
|
108
106
|
def _emailer_pattern(self, name: str) -> str:
|
|
@@ -114,10 +112,10 @@ class HighlightedNames(HighlightedText):
|
|
|
114
112
|
if name in EMAILER_ID_REGEXES:
|
|
115
113
|
pattern = EMAILER_ID_REGEXES[name].pattern
|
|
116
114
|
|
|
117
|
-
# Include regex for last
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
115
|
+
# Include regex for first and last names
|
|
116
|
+
for partial_name in [first_name, last_name]:
|
|
117
|
+
if SIMPLE_NAME_REGEX.match(partial_name) and partial_name.lower() not in NAMES_TO_NOT_HIGHLIGHT:
|
|
118
|
+
pattern += fr"|{partial_name}"
|
|
121
119
|
|
|
122
120
|
return pattern
|
|
123
121
|
elif ' ' not in name:
|
|
@@ -163,7 +161,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
163
161
|
ALIREZA_ITTIHADIEH: 'CEO Freestream Aircraft Limited',
|
|
164
162
|
BARBRO_C_EHNBOM: 'Swedish pharmaceuticals',
|
|
165
163
|
FRED_HADDAD: "co-founder of Heck's in West Virginia",
|
|
166
|
-
GERALD_BARTON: "Maryland property developer, fan of Trump's Irish golf course",
|
|
164
|
+
GERALD_BARTON: "Maryland property developer Landmark Land Company, fan of Trump's Irish golf course",
|
|
167
165
|
GORDON_GETTY: 'heir of oil tycoon J. Paul Getty',
|
|
168
166
|
NICHOLAS_RIBIS: 'Hilton CEO, former president of Trump Organization',
|
|
169
167
|
'Philip Kafka': 'president of Prince Concepts (and son of Terry Kafka?)',
|
|
@@ -272,7 +270,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
272
270
|
HighlightedNames(
|
|
273
271
|
label='europe',
|
|
274
272
|
style='light_sky_blue3',
|
|
275
|
-
pattern=r'(Angela )?Merk(el|le)|Austria|(Benjamin\s*)?Harnwell|Berlin|Brexit(eers?)?|Brit(ain|ish)|Brussels|Cannes|(Caroline|Jack)?\s*Lang(, Caroline)?|Cypr(iot|us)|Davos|ECB|EU|Europe(an)?(\s*Union)?|
|
|
273
|
+
pattern=r'(Angela )?Merk(el|le)|Austria|(Benjamin\s*)?Harnwell|Berlin|Borge|Boris\s*Johnson|Brexit(eers?)?|Brit(ain|ish)|Brussels|Cannes|(Caroline|Jack)?\s*Lang(, Caroline)?|Cypr(iot|us)|Davos|ECB|England|EU|Europe(an)?(\s*Union)?|Fr(ance|ench)|Geneva|Germany?|Gillard|Gree(ce|k)|Ital(ian|y)|Jacques|(Kevin\s*)?Rudd|Le\s*Pen|London|Macron|Melusine|Munich|(Natalia\s*)?Veselnitskaya|(Nicholas\s*)?Sarkozy|Nigel(\s*Farage)?|Norw(ay|egian)|Oslo|Paris|Polish|(Sebastian )?Kurz|(Vi(c|k)tor\s+)?Orbah?n|Edward Rod Larsen|Strasbourg|Strauss[- ]?Kahn|Swed(en|ish)(?![-\s]+America)|Switzerland|(Tony\s)?Blair|Ukrain(e|ian)|Vienna|(Vitaly\s*)?Churkin|Zug',
|
|
276
274
|
emailers = {
|
|
277
275
|
ANDRZEJ_DUDA: 'former president of Poland',
|
|
278
276
|
MIROSLAV_LAJCAK: 'Russia-friendly Slovakian politician, friend of Steve Bannon',
|
|
@@ -306,7 +304,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
306
304
|
HighlightedNames(
|
|
307
305
|
label='finance',
|
|
308
306
|
style='green',
|
|
309
|
-
pattern=r'Apollo|Ari\s*Glass|(Bernie\s*)?Madoff|Black(rock|stone)|BofA|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|MLPF&S|(money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
|
|
307
|
+
pattern=r'Apollo|Ari\s*Glass|(Bernie\s*)?Madoff|Black(rock|stone)|BofA|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|Mizrahi\s*Bank|MLPF&S|(money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
|
|
310
308
|
emailers={
|
|
311
309
|
AMANDA_ENS: 'Citigroup',
|
|
312
310
|
DANIEL_SABBA: 'UBS Investment Bank',
|
|
@@ -342,7 +340,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
342
340
|
emailers = {
|
|
343
341
|
ANIL_AMBANI: 'chairman of Reliance Group',
|
|
344
342
|
VINIT_SAHNI: None,
|
|
345
|
-
ZUBAIR_KHAN: 'Tranchulas CEO, InsightsPod founder',
|
|
343
|
+
ZUBAIR_KHAN: 'cybersecurity firm Tranchulas CEO, InsightsPod founder, based in Islamabad and Dubai',
|
|
346
344
|
}
|
|
347
345
|
),
|
|
348
346
|
HighlightedNames(
|
|
@@ -391,7 +389,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
391
389
|
HighlightedNames(
|
|
392
390
|
label='law enforcement',
|
|
393
391
|
style='color(24) bold',
|
|
394
|
-
pattern=r'ag|(Alicia\s*)?Valle|((Bob|Robert)\s*)?Mueller|(Byung\s)?Pak|CFTC|CIA|CIS|CVRA|Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)|DHS|DOJ|FBI|FCPA|FDIC|Federal\s*Bureau\s*of\s*Investigation|FinCEN|FINRA|FOIA|FTC|IRS|(James\s*)?Comey|(Jennifer\s*Shasky\s*)?Calvery|((Judge|Mark)\s*)?(Carney|Filip)|(Kirk )?Blouin|KYC|NIH|NS(A|C)|OCC|OFAC|(Lann?a\s*)?Belohlavek|(Michael\s*)?Reiter|OGE|Office\s*of\s*Government\s*Ethics|Police Code Enforcement|(Preet\s*)?Bharara|SCOTUS|SD(FL|NY)|Southern\s*District\s*of\s*(Florida|New\s*York)|SEC|Securities\s*and\s*Exchange\s*Commission|State\s*Dep(artmen)?t|Strzok|Supreme\s*Court|Treasury\s*(Dep(artmen)?t|Secretary)|TSA|USAID|(William\s*J\.?\s*)?Zloch',
|
|
392
|
+
pattern=r'ag|(Alicia\s*)?Valle|attorney|((Bob|Robert)\s*)?Mueller|(Byung\s)?Pak|CFTC|CIA|CIS|CVRA|Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)|DHS|DOJ|FBI|FCPA|FDIC|Federal\s*Bureau\s*of\s*Investigation|FinCEN|FINRA|FOIA|FTC|IRS|(James\s*)?Comey|(Jennifer\s*Shasky\s*)?Calvery|((Judge|Mark)\s*)?(Carney|Filip)|(Kirk )?Blouin|KYC|NIH|NS(A|C)|OCC|OFAC|(Lann?a\s*)?Belohlavek|lawyer|(Michael\s*)?Reiter|OGE|Office\s*of\s*Government\s*Ethics|Police Code Enforcement|(Preet\s*)?Bharara|SCOTUS|SD(FL|NY)|Southern\s*District\s*of\s*(Florida|New\s*York)|SEC|Securities\s*and\s*Exchange\s*Commission|State\s*Dep(artmen)?t|Strzok|Supreme\s*Court|Treasury\s*(Dep(artmen)?t|Secretary)|TSA|USAID|(William\s*J\.?\s*)?Zloch',
|
|
395
393
|
emailers = {
|
|
396
394
|
ANN_MARIE_VILLAFANA: 'southern district of Florida U.S. Attorney',
|
|
397
395
|
DANNY_FROST: 'Director of Communications at Manhattan DA',
|
|
@@ -426,7 +424,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
426
424
|
HighlightedNames(
|
|
427
425
|
label='modeling',
|
|
428
426
|
style='pale_violet_red1',
|
|
429
|
-
pattern=r'\w+@mc2mm.com|(Nicole\s*)?Junkerman',
|
|
427
|
+
pattern=r'\w+@mc2mm.com|model(ed|ing)|(Nicole\s*)?Junkerman',
|
|
430
428
|
emailers = {
|
|
431
429
|
'Abi Schwinck': 'MC2 Model Management (?)',
|
|
432
430
|
DANIEL_SIAD: None,
|
|
@@ -458,7 +456,8 @@ HIGHLIGHTED_NAMES = [
|
|
|
458
456
|
HighlightedNames(
|
|
459
457
|
label='republicans',
|
|
460
458
|
style='bold dark_red',
|
|
461
|
-
pattern=r'Alberto\sGonzale[sz]|(Alex\s*)?Acosta|(Bill\s*)?Barr|Bill\s*Shine|(Bob\s*)?Corker|(John\s*(R.?\s*)?)Bolton|Broidy|(Chris\s)?Christie|Devin\s*Nunes|(Don\s*)?McGa[hn]n|McMaster|(George\s*)?Nader|GOP|(Brett\s*)?Kavanaugh|Kissinger|Kobach|Koch\s*Brothers|Kolfage|Kudlow|Lewandowski|(Marco\s)?Rubio|(Mark\s*)Meadows|Mattis|(?<!Merwin Dela )Cruz|(Michael\s)?Hayden|((General|Mike)\s*)?(Flynn|Pence)|(Mitt\s*)?Romney|Mnuchin|Nikki|Haley|(Paul\s+)?Manafort|(Peter\s)?Navarro|Pompeo|Reagan|Republican|(?<!Cynthia )(Richard\s*)?Nixon|Sasse|(Rex\s*)?Tillerson',
|
|
459
|
+
pattern=r'Alberto\sGonzale[sz]|(Alex\s*)?Acosta|(Bill\s*)?Barr|Bill\s*Shine|(Bob\s*)?Corker|(John\s*(R.?\s*)?)Bolton|Broidy|(Chris\s)?Christie|Devin\s*Nunes|(Don\s*)?McGa[hn]n|McMaster|(George\s*)?Nader|GOP|(Brett\s*)?Kavanaugh|Kissinger|Kobach|Koch\s*Brothers|Kolfage|Kudlow|Lewandowski|(Marco\s)?Rubio|(Mark\s*)Meadows|Mattis|(?<!Merwin Dela )Cruz|(Michael\s)?Hayden|((General|Mike)\s*)?(Flynn|Pence)|(Mitt\s*)?Romney|Mnuchin|Nikki|Haley|(Paul\s+)?Manafort|(Peter\s)?Navarro|Pompeo|Reagan|Reince|Priebus|Republican|(?<!Cynthia )(Richard\s*)?Nixon|Sasse|(Rex\s*)?Tillerson',
|
|
460
|
+
# There's no emails from these people, they're just here to automate the regex creation for both first + last names
|
|
462
461
|
emailers = {
|
|
463
462
|
RUDY_GIULIANI: 'disbarred formed mayor of New York City',
|
|
464
463
|
TULSI_GABBARD: None,
|
|
@@ -475,7 +474,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
475
474
|
HighlightedNames(
|
|
476
475
|
label='russia',
|
|
477
476
|
style='red bold',
|
|
478
|
-
pattern=r'Alfa\s*Bank|Anya\s*Rasulova|Chernobyl|Day\s+One\s+Ventures|(Dmitry\s)?(Kiselyov|(Lana\s*)?Pozhidaeva|Medvedev|Rybolo(o?l?ev|vlev))|Dmitry|FSB|GRU|KGB|Kislyak|Kremlin|Kuznetsova|Lavrov|Lukoil|Moscow|(Oleg\s*)?Deripaska|Oleksandr Vilkul|Rosneft|RT|St.?\s*?Petersburg|Russian?|Sberbank|Soviet(\s*Union)?|USSR|(Vladimir\s*)?(Putin|Yudashkin)|Women\s*Empowerment|Xitrans',
|
|
477
|
+
pattern=r'Alfa\s*Bank|Anya\s*Rasulova|Chernobyl|Day\s+One\s+Ventures|(Dmitry\s)?(Kiselyov|(Lana\s*)?Pozhidaeva|Medvedev|Rybolo(o?l?ev|vlev))|Dmitry|FSB|GRU|KGB|Kislyak|Kremlin|Kuznetsova|Lavrov|Lukoil|Moscow|(Oleg\s*)?Deripaska|Oleksandr Vilkul|Rosneft|RT|St.?\s*?Petersburg|Russian?|Sberbank|Soviet(\s*Union)?|USSR|Vladimir|(Vladimir\s*)?(Putin|Yudashkin)|Women\s*Empowerment|Xitrans',
|
|
479
478
|
emailers = {
|
|
480
479
|
MASHA_DROKOVA: 'silicon valley VC, former Putin Youth',
|
|
481
480
|
RENATA_BOLOTOVA: 'former aspiring model, now fund manager at New York State Insurance Fund',
|
|
@@ -519,7 +518,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
519
518
|
HighlightedNames(
|
|
520
519
|
label='trump',
|
|
521
520
|
style='red3 bold',
|
|
522
|
-
pattern=r"@?realDonaldTrump|(Alan\s*)?Weiss?elberg|\bDJ?T\b|Donald J. Tramp|(Donald\s+(J\.\s+)?)?Trump(ism|\s*Properties)?|Don(ald| *Jr)(?! Rubin)|Ivana|(Madeleine\s*)?Westerhout|Mar[-\s]*a[-\s]*Lago|(Marla\s*)?Maples|(Matt(hew)? )?Calamari|\bMatt C\b|Melania|(Michael (J.? )?)?Boccio|Roger\s+Stone|rona|(The\s*)?Art\s*of\s*the\s*Deal",
|
|
521
|
+
pattern=r"@?realDonaldTrump|(Alan\s*)?Weiss?elberg|\bDJ?T\b|Donald J. Tramp|(Donald\s+(J\.\s+)?)?Trump(ism|\s*Properties)?|Don(ald| *Jr)(?! Rubin)|Ivana|(Madeleine\s*)?Westerhout|Mar[-\s]*a[-\s]*Lago|(Marla\s*)?Maples|(Matt(hew)? )?Calamari|\bMatt C\b|Melania|(Michael (J.? )?)?Boccio|Rebekah\s*Mercer|Roger\s+Stone|rona|(The\s*)?Art\s*of\s*the\s*Deal",
|
|
523
522
|
emailers = {
|
|
524
523
|
'Bruce Moskowitz': "'Trump's health guy' according to Epstein",
|
|
525
524
|
},
|
|
@@ -541,7 +540,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
541
540
|
HighlightedNames(
|
|
542
541
|
label=VIRGIN_ISLANDS,
|
|
543
542
|
style='sea_green1',
|
|
544
|
-
pattern=r'Bahamas|Caribb?ean|Dominican\s*Republic|(Great|Little)\s*St.?\s*James|Haiti(an)?|(John\s*)deJongh(\s*Jr\.?)|(Kenneth E\. )?Mapp|Palm\s*Beach(?!\s*Post)|PBI|S(ain)?t.?\s*Thomas|USVI|VI|(The\s*)?Virgin\s*Islands(\s*Daily\s*News)?', # TODO: VI Daily News should be yellow but it's hard bc Daily News xists
|
|
543
|
+
pattern=r'Antigua|Bahamas|Caribb?ean|Dominican\s*Republic|(Great|Little)\s*St.?\s*James|Haiti(an)?|(John\s*)deJongh(\s*Jr\.?)|(Kenneth E\. )?Mapp|Palm\s*Beach(?!\s*Post)|PBI|S(ain)?t.?\s*Thomas|USVI|VI|(The\s*)?Virgin\s*Islands(\s*Daily\s*News)?', # TODO: VI Daily News should be yellow but it's hard bc Daily News xists
|
|
545
544
|
emailers = {
|
|
546
545
|
CECILE_DE_JONGH: f'First lady 2007-2015',
|
|
547
546
|
STACEY_PLASKETT: 'non-voting member of Congress',
|
|
@@ -561,7 +560,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
561
560
|
HighlightedNames(
|
|
562
561
|
label=STEVE_BANNON,
|
|
563
562
|
style='color(58)',
|
|
564
|
-
pattern=r'((Steve|Sean)\s*)?Bannon?',
|
|
563
|
+
pattern=r'((Steve|Sean)\s*)?Bannon?|(American\s*)?Dharma',
|
|
565
564
|
),
|
|
566
565
|
HighlightedNames(
|
|
567
566
|
emailers={STEVEN_HOFFENBERG: HEADER_ABBREVIATIONS['Hoffenberg']},
|
|
@@ -578,7 +577,18 @@ HIGHLIGHTED_NAMES = [
|
|
|
578
577
|
HighlightedNames(emailers={PRINCE_ANDREW: 'British royal family'}, style='dodger_blue1'),
|
|
579
578
|
HighlightedNames(emailers={SOON_YI_PREVIN: "wife of Woody Allen"}, style='hot_pink'),
|
|
580
579
|
HighlightedNames(emailers={SULTAN_BIN_SULAYEM: 'CEO of DP World, chairman of ports in Dubai'}, style='green1'),
|
|
581
|
-
|
|
580
|
+
|
|
581
|
+
# HighlightedText not HighlightedNames bc of word boundary issue
|
|
582
|
+
HighlightedText(
|
|
583
|
+
label='unknown',
|
|
584
|
+
style='cyan',
|
|
585
|
+
pattern=r'\(unknown\)'
|
|
586
|
+
),
|
|
587
|
+
HighlightedText(
|
|
588
|
+
label='phone_number',
|
|
589
|
+
style='bright_green',
|
|
590
|
+
pattern=r"\+?(1?\(?\d{3}\)?[- ]\d{3}[- ]\d{4}|\d{2}[- ]\(?0?\)?\d{2}[- ]\d{4}[- ]\d{4})|[\d+]{10,12}",
|
|
591
|
+
),
|
|
582
592
|
]
|
|
583
593
|
|
|
584
594
|
# Highlight regexes for things other than names, only used by RegexHighlighter pattern matching
|
|
@@ -593,11 +603,6 @@ HIGHLIGHTED_TEXTS = [
|
|
|
593
603
|
style=f'{ARCHIVE_LINK_COLOR} underline',
|
|
594
604
|
pattern=r"https?:[^\s]+",
|
|
595
605
|
),
|
|
596
|
-
HighlightedText(
|
|
597
|
-
label='phone_number',
|
|
598
|
-
style='bright_green',
|
|
599
|
-
pattern=r"\+?(1?\(?\d{3}\)?[- ]\d{3}[- ]\d{4}|\d{2}[- ]\(?0?\)?\d{2}[- ]\d{4}[- ]\d{4})|[\d+]{10,12}",
|
|
600
|
-
),
|
|
601
606
|
HighlightedText(
|
|
602
607
|
label='quoted_reply_line',
|
|
603
608
|
style='dim',
|
epstein_files/util/logging.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from os import environ
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
|
|
4
5
|
from rich.console import Console
|
|
5
6
|
from rich.highlighter import ReprHighlighter
|
|
@@ -7,6 +8,7 @@ from rich.logging import RichHandler
|
|
|
7
8
|
from rich.theme import Theme
|
|
8
9
|
|
|
9
10
|
from epstein_files.util.constant.strings import *
|
|
11
|
+
from epstein_files.util.file_helper import file_size_str
|
|
10
12
|
|
|
11
13
|
FILENAME_STYLE = 'gray27'
|
|
12
14
|
|
|
@@ -27,6 +29,7 @@ LOG_THEME[f"{ReprHighlighter.base_style}epstein_filename"] = FILENAME_STYLE
|
|
|
27
29
|
LOG_LEVEL_ENV_VAR = 'LOG_LEVEL'
|
|
28
30
|
|
|
29
31
|
|
|
32
|
+
# Augment the standard log highlighter with 'epstein_filename' matcher
|
|
30
33
|
class LogHighlighter(ReprHighlighter):
|
|
31
34
|
highlights = ReprHighlighter.highlights + [
|
|
32
35
|
*[fr"(?P<{doc_type}>{doc_type})" for doc_type in DOC_TYPE_STYLES.keys()],
|
|
@@ -55,3 +58,7 @@ if env_log_level_str:
|
|
|
55
58
|
logger.warning(f"Setting log level to {env_log_level} based on {LOG_LEVEL_ENV_VAR} env var...")
|
|
56
59
|
logger.setLevel(env_log_level)
|
|
57
60
|
datefinder_logger.setLevel(env_log_level)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def log_file_write(file_path: str | Path) -> None:
|
|
64
|
+
logger.warning(f"Wrote {file_size_str(file_path)} to '{file_path}'")
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
from rich.padding import Padding
|
|
2
|
+
|
|
3
|
+
from epstein_files.documents.email import Email
|
|
4
|
+
from epstein_files.documents.messenger_log import MessengerLog
|
|
5
|
+
from epstein_files.epstein_files import EpsteinFiles, count_by_month
|
|
6
|
+
from epstein_files.util.constant.output_files import JSON_METADATA_PATH
|
|
7
|
+
from epstein_files.util.constant import urls
|
|
8
|
+
from epstein_files.util.constant.html import *
|
|
9
|
+
from epstein_files.util.constant.names import *
|
|
10
|
+
from epstein_files.util.constant.strings import EMAIL_CLASS, MESSENGER_LOG_CLASS
|
|
11
|
+
from epstein_files.util.data import dict_sets_to_lists
|
|
12
|
+
from epstein_files.util.env import args, specified_names
|
|
13
|
+
from epstein_files.util.logging import log_file_write, logger
|
|
14
|
+
from epstein_files.util.rich import *
|
|
15
|
+
|
|
16
|
+
PRINT_COLOR_KEY_EVERY_N_EMAILS = 150
|
|
17
|
+
|
|
18
|
+
# Order matters. Default names to print emails for.
|
|
19
|
+
DEFAULT_EMAILERS = [
|
|
20
|
+
JEREMY_RUBIN,
|
|
21
|
+
AL_SECKEL,
|
|
22
|
+
JOI_ITO,
|
|
23
|
+
JABOR_Y,
|
|
24
|
+
STEVEN_SINOFSKY,
|
|
25
|
+
DANIEL_SIAD,
|
|
26
|
+
JEAN_LUC_BRUNEL,
|
|
27
|
+
STEVEN_HOFFENBERG,
|
|
28
|
+
EHUD_BARAK,
|
|
29
|
+
MARTIN_NOWAK,
|
|
30
|
+
MASHA_DROKOVA,
|
|
31
|
+
RENATA_BOLOTOVA,
|
|
32
|
+
STEVE_BANNON,
|
|
33
|
+
OLIVIER_COLOM,
|
|
34
|
+
BORIS_NIKOLIC,
|
|
35
|
+
PRINCE_ANDREW,
|
|
36
|
+
JIDE_ZEITLIN,
|
|
37
|
+
DAVID_STERN,
|
|
38
|
+
MOHAMED_WAHEED_HASSAN,
|
|
39
|
+
JENNIFER_JACQUET,
|
|
40
|
+
TYLER_SHEARS,
|
|
41
|
+
CHRISTINA_GALBRAITH,
|
|
42
|
+
None,
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
# Order matters. Default names to print tables w/email subject, timestamp, etc for. # TODO: get rid of this ?
|
|
46
|
+
DEFAULT_EMAILER_TABLES: list[str | None] = [
|
|
47
|
+
GHISLAINE_MAXWELL,
|
|
48
|
+
LEON_BLACK,
|
|
49
|
+
SULTAN_BIN_SULAYEM,
|
|
50
|
+
DEEPAK_CHOPRA,
|
|
51
|
+
ARIANE_DE_ROTHSCHILD,
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
if len(set(DEFAULT_EMAILERS).intersection(set(DEFAULT_EMAILER_TABLES))) > 0:
|
|
55
|
+
raise RuntimeError(f"Some names appear in both DEFAULT_EMAILERS and DEFAULT_EMAILER_TABLES")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def print_emails(epstein_files: EpsteinFiles) -> int:
|
|
59
|
+
"""Returns number of emails printed."""
|
|
60
|
+
print_section_header(('Selections from ' if not args.all_emails else '') + 'His Emails')
|
|
61
|
+
print_other_site_link(is_header=False)
|
|
62
|
+
|
|
63
|
+
emailers_to_print: list[str | None]
|
|
64
|
+
emailer_tables: list[str | None] = []
|
|
65
|
+
already_printed_emails: list[Email] = []
|
|
66
|
+
num_emails_printed_since_last_color_key = 0
|
|
67
|
+
|
|
68
|
+
if specified_names:
|
|
69
|
+
emailers_to_print = specified_names
|
|
70
|
+
else:
|
|
71
|
+
epstein_files.print_emailer_counts_table()
|
|
72
|
+
|
|
73
|
+
if args.all_emails:
|
|
74
|
+
emailers_to_print = sorted(epstein_files.all_emailers(), key=lambda e: epstein_files.earliest_email_at(e))
|
|
75
|
+
console.print('Email conversations are sorted chronologically based on time of the first email.')
|
|
76
|
+
print_numbered_list_of_emailers(emailers_to_print, epstein_files)
|
|
77
|
+
else:
|
|
78
|
+
emailers_to_print = DEFAULT_EMAILERS
|
|
79
|
+
emailer_tables = DEFAULT_EMAILER_TABLES
|
|
80
|
+
console.print('Email conversations grouped by counterparty can be found in the order listed below.')
|
|
81
|
+
print_numbered_list_of_emailers(emailers_to_print)
|
|
82
|
+
console.print("\nAfter that there's tables linking to (but not displaying) all known emails for each of these people:")
|
|
83
|
+
print_numbered_list_of_emailers(emailer_tables)
|
|
84
|
+
|
|
85
|
+
for author in emailers_to_print:
|
|
86
|
+
author_emails = epstein_files.print_emails_for(author)
|
|
87
|
+
already_printed_emails.extend(author_emails)
|
|
88
|
+
num_emails_printed_since_last_color_key += len(author_emails)
|
|
89
|
+
|
|
90
|
+
# Print color key every once in a while
|
|
91
|
+
if num_emails_printed_since_last_color_key > PRINT_COLOR_KEY_EVERY_N_EMAILS:
|
|
92
|
+
print_color_key()
|
|
93
|
+
num_emails_printed_since_last_color_key = 0
|
|
94
|
+
|
|
95
|
+
if emailer_tables:
|
|
96
|
+
print_author_header(f"Email Tables for {len(emailer_tables)} Other People", 'white')
|
|
97
|
+
|
|
98
|
+
for name in DEFAULT_EMAILER_TABLES:
|
|
99
|
+
epstein_files.print_emails_table_for(name)
|
|
100
|
+
|
|
101
|
+
if not specified_names:
|
|
102
|
+
epstein_files.print_email_device_info()
|
|
103
|
+
|
|
104
|
+
if args.all_emails:
|
|
105
|
+
_verify_all_emails_were_printed(epstein_files, already_printed_emails)
|
|
106
|
+
|
|
107
|
+
logger.warning(f"Rewrote {len(Email.rewritten_header_ids)} headers of {len(epstein_files.emails)} emails")
|
|
108
|
+
return len(already_printed_emails)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def print_json_metadata(epstein_files: EpsteinFiles) -> None:
|
|
112
|
+
json_str = epstein_files.json_metadata()
|
|
113
|
+
|
|
114
|
+
if args.build:
|
|
115
|
+
with open(JSON_METADATA_PATH, 'w') as f:
|
|
116
|
+
f.write(json_str)
|
|
117
|
+
log_file_write(JSON_METADATA_PATH)
|
|
118
|
+
else:
|
|
119
|
+
console.print_json(json_str, indent=4, sort_keys=True)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def print_json_stats(epstein_files: EpsteinFiles) -> None:
|
|
123
|
+
console.line(5)
|
|
124
|
+
console.print(Panel('JSON Stats Dump', expand=True, style='reverse bold'), '\n')
|
|
125
|
+
print_json(f"{MESSENGER_LOG_CLASS} Sender Counts", MessengerLog.count_authors(epstein_files.imessage_logs), skip_falsey=True)
|
|
126
|
+
print_json(f"{EMAIL_CLASS} Author Counts", epstein_files.email_author_counts, skip_falsey=True)
|
|
127
|
+
print_json(f"{EMAIL_CLASS} Recipient Counts", epstein_files.email_recipient_counts, skip_falsey=True)
|
|
128
|
+
print_json("Email signature_substitution_countss", epstein_files.email_signature_substitution_counts(), skip_falsey=True)
|
|
129
|
+
print_json("email_author_device_signatures", dict_sets_to_lists(epstein_files.email_authors_to_device_signatures))
|
|
130
|
+
print_json("email_sent_from_devices", dict_sets_to_lists(epstein_files.email_device_signatures_to_authors))
|
|
131
|
+
print_json("email_unknown_recipient_file_ids", epstein_files.email_unknown_recipient_file_ids())
|
|
132
|
+
print_json("count_by_month", count_by_month(epstein_files.all_documents()))
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def print_text_messages(epstein_files: EpsteinFiles) -> None:
|
|
136
|
+
print_section_header('Text Messages')
|
|
137
|
+
print_centered("(conversations are sorted chronologically based on timestamp of first message)\n", style='gray30')
|
|
138
|
+
authors: list[str | None] = specified_names if specified_names else [JEFFREY_EPSTEIN]
|
|
139
|
+
log_files = epstein_files.imessage_logs_for(authors)
|
|
140
|
+
|
|
141
|
+
for log_file in log_files:
|
|
142
|
+
console.print(Padding(log_file))
|
|
143
|
+
console.line(2)
|
|
144
|
+
|
|
145
|
+
epstein_files.print_imessage_summary()
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def write_urls() -> None:
|
|
149
|
+
"""Write _URL style constant variables to a file bash scripts can load as env vars."""
|
|
150
|
+
if args.output_file == 'index.html':
|
|
151
|
+
logger.warning(f"Can't write env vars to '{args.output_file}', writing to '{URLS_ENV}' instead.\n")
|
|
152
|
+
args.output_file = URLS_ENV
|
|
153
|
+
|
|
154
|
+
url_vars = {
|
|
155
|
+
k: v for k, v in vars(urls).items()
|
|
156
|
+
if isinstance(v, str) and k.split('_')[-1] in ['URL'] and 'github.io' in v and 'BASE' not in k
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
with open(args.output_file, 'w') as f:
|
|
160
|
+
for var_name, url in url_vars.items():
|
|
161
|
+
key_value = f"{var_name}='{url}'"
|
|
162
|
+
|
|
163
|
+
if not args.suppress_output:
|
|
164
|
+
console.print(key_value, style='dim')
|
|
165
|
+
|
|
166
|
+
f.write(f"{key_value}\n")
|
|
167
|
+
|
|
168
|
+
console.line()
|
|
169
|
+
logger.warning(f"Wrote {len(url_vars)} URL variables to '{args.output_file}'\n")
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _verify_all_emails_were_printed(epstein_files: EpsteinFiles, already_printed_emails: list[Email]) -> None:
|
|
173
|
+
"""Log warnings if some emails were never printed."""
|
|
174
|
+
email_ids_that_were_printed = set([email.file_id for email in already_printed_emails])
|
|
175
|
+
logger.warning(f"Printed {len(already_printed_emails)} emails of {len(email_ids_that_were_printed)} unique file IDs.")
|
|
176
|
+
|
|
177
|
+
for email in epstein_files.emails:
|
|
178
|
+
if email.file_id not in email_ids_that_were_printed and not email.is_duplicate:
|
|
179
|
+
logger.warning(f"Failed to print {email.summary()}")
|