epstein-files 1.1.2__tar.gz → 1.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {epstein_files-1.1.2 → epstein_files-1.1.3}/PKG-INFO +4 -1
- {epstein_files-1.1.2 → epstein_files-1.1.3}/README.md +3 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/__init__.py +7 -14
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/documents/email.py +0 -9
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/epstein_files.py +2 -2
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/constant/names.py +9 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/constants.py +1 -1
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/env.py +11 -1
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/highlighted_group.py +19 -7
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/output.py +95 -88
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/rich.py +3 -3
- {epstein_files-1.1.2 → epstein_files-1.1.3}/pyproject.toml +1 -1
- {epstein_files-1.1.2 → epstein_files-1.1.3}/LICENSE +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/documents/communication.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/documents/document.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/documents/emails/email_header.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/documents/imessage/text_message.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/documents/json_file.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/documents/messenger_log.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/documents/other_file.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/constant/common_words.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/constant/html.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/constant/output_files.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/constant/strings.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/constant/urls.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/data.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/doc_cfg.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/file_helper.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/logging.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/search_result.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/timer.py +0 -0
- {epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/util/word_count.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: epstein-files
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.3
|
|
4
4
|
Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
|
|
5
5
|
Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
|
|
6
6
|
License: GPL-3.0-or-later
|
|
@@ -81,6 +81,9 @@ epstein_diff 030999 020442
|
|
|
81
81
|
```
|
|
82
82
|
|
|
83
83
|
The first time you run anything it will take a few minutes to fix all the janky OCR text, attribute the redacted emails, etc. After that things will be quick.
|
|
84
|
+
|
|
85
|
+
The commands used to build the various sites that are deployed on Github Pages can be found in [`deploy.sh`](./deploy.sh).
|
|
86
|
+
|
|
84
87
|
Run `epstein_generate --help` for command line option assistance.
|
|
85
88
|
|
|
86
89
|
**Optional:** There are a handful of emails that I extracted from the legal filings they were contained in. If you want to include these files in your local analysis you'll need to copy those files from the repo into your local document directory. Something like:
|
|
@@ -48,6 +48,9 @@ epstein_diff 030999 020442
|
|
|
48
48
|
```
|
|
49
49
|
|
|
50
50
|
The first time you run anything it will take a few minutes to fix all the janky OCR text, attribute the redacted emails, etc. After that things will be quick.
|
|
51
|
+
|
|
52
|
+
The commands used to build the various sites that are deployed on Github Pages can be found in [`deploy.sh`](./deploy.sh).
|
|
53
|
+
|
|
51
54
|
Run `epstein_generate --help` for command line option assistance.
|
|
52
55
|
|
|
53
56
|
**Optional:** There are a handful of emails that I extracted from the legal filings they were contained in. If you want to include these files in your local analysis you'll need to copy those files from the repo into your local document directory. Something like:
|
|
@@ -16,13 +16,14 @@ from rich.text import Text
|
|
|
16
16
|
from epstein_files.epstein_files import EpsteinFiles, document_cls
|
|
17
17
|
from epstein_files.documents.document import INFO_PADDING, Document
|
|
18
18
|
from epstein_files.documents.email import Email
|
|
19
|
-
from epstein_files.util.constant.output_files import
|
|
19
|
+
from epstein_files.util.constant.output_files import make_clean
|
|
20
20
|
from epstein_files.util.env import args
|
|
21
21
|
from epstein_files.util.file_helper import coerce_file_path, extract_file_id
|
|
22
22
|
from epstein_files.util.logging import exit_with_error, logger
|
|
23
23
|
from epstein_files.util.output import (print_emails_section, print_json_files, print_json_stats,
|
|
24
|
-
print_other_files_section, print_text_messages_section,
|
|
25
|
-
from epstein_files.util.rich import build_highlighter, console, print_color_key, print_title_page_header,
|
|
24
|
+
print_other_files_section, print_text_messages_section, print_email_timeline, print_json_metadata, write_urls)
|
|
25
|
+
from epstein_files.util.rich import (build_highlighter, console, print_color_key, print_title_page_header,
|
|
26
|
+
print_title_page_tables, print_subtitle_panel, write_html)
|
|
26
27
|
from epstein_files.util.timer import Timer
|
|
27
28
|
from epstein_files.util.word_count import write_word_counts_html
|
|
28
29
|
|
|
@@ -37,7 +38,7 @@ def generate_html() -> None:
|
|
|
37
38
|
epstein_files = EpsteinFiles.get_files(timer)
|
|
38
39
|
|
|
39
40
|
if args.json_metadata:
|
|
40
|
-
|
|
41
|
+
print_json_metadata(epstein_files)
|
|
41
42
|
exit()
|
|
42
43
|
elif args.json_files:
|
|
43
44
|
print_json_files(epstein_files)
|
|
@@ -62,7 +63,7 @@ def generate_html() -> None:
|
|
|
62
63
|
emails_that_were_printed = print_emails_section(epstein_files)
|
|
63
64
|
timer.print_at_checkpoint(f"Printed {len(emails_that_were_printed):,} emails")
|
|
64
65
|
elif args.email_timeline:
|
|
65
|
-
|
|
66
|
+
print_email_timeline(epstein_files)
|
|
66
67
|
timer.print_at_checkpoint(f"Printed chronological emails table")
|
|
67
68
|
|
|
68
69
|
if args.output_other:
|
|
@@ -74,15 +75,7 @@ def generate_html() -> None:
|
|
|
74
75
|
print_other_files_section(files, epstein_files)
|
|
75
76
|
timer.print_at_checkpoint(f"Printed {len(files)} other files (skipped {len(epstein_files.other_files) - len(files)})")
|
|
76
77
|
|
|
77
|
-
|
|
78
|
-
if args.all_emails:
|
|
79
|
-
output_path = ALL_EMAILS_PATH
|
|
80
|
-
elif args.email_timeline:
|
|
81
|
-
output_path = CHRONOLOGICAL_EMAILS_PATH
|
|
82
|
-
else:
|
|
83
|
-
output_path = TEXT_MSGS_HTML_PATH
|
|
84
|
-
|
|
85
|
-
write_html(output_path)
|
|
78
|
+
write_html(args.build)
|
|
86
79
|
logger.warning(f"Total time: {timer.seconds_since_start_str()}")
|
|
87
80
|
|
|
88
81
|
# JSON stats (mostly used for building pytest checks)
|
|
@@ -126,15 +126,6 @@ EMAIL_SIGNATURE_REGEXES = {
|
|
|
126
126
|
UNKNOWN: re.compile(r"(This message is directed to and is for the use of the above-noted addressee only.*\nhereon\.)", re.DOTALL),
|
|
127
127
|
}
|
|
128
128
|
|
|
129
|
-
# Invalid for links to EpsteinWeb
|
|
130
|
-
JUNK_EMAILERS = [
|
|
131
|
-
'asmallworld@travel.asmallworld.net',
|
|
132
|
-
"digest-noreply@quora.com",
|
|
133
|
-
'editorialstaff@flipboard.com',
|
|
134
|
-
'How To Academy',
|
|
135
|
-
'Jokeland',
|
|
136
|
-
]
|
|
137
|
-
|
|
138
129
|
MAILING_LISTS = [
|
|
139
130
|
CAROLYN_RANGEL,
|
|
140
131
|
INTELLIGENCE_SQUARED,
|
|
@@ -218,8 +218,8 @@ class EpsteinFiles:
|
|
|
218
218
|
return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
|
|
219
219
|
|
|
220
220
|
def print_files_summary(self) -> None:
|
|
221
|
-
table = build_table('
|
|
222
|
-
add_cols_to_table(table, ['File Type', '
|
|
221
|
+
table = build_table('File Overview')
|
|
222
|
+
add_cols_to_table(table, ['File Type', 'Count', 'Author Known', 'Author Unknown', 'Duplicates'])
|
|
223
223
|
table.columns[1].justify = 'right'
|
|
224
224
|
|
|
225
225
|
def add_row(label: str, docs: list):
|
|
@@ -206,6 +206,15 @@ ROTHSTEIN_ROSENFELDT_ADLER = 'Rothstein Rosenfeldt Adler (Rothstein was a crook
|
|
|
206
206
|
TRUMP_ORG = 'Trump Organization'
|
|
207
207
|
UBS = 'UBS'
|
|
208
208
|
|
|
209
|
+
# Invalid for links to EpsteinWeb
|
|
210
|
+
JUNK_EMAILERS = [
|
|
211
|
+
'asmallworld@travel.asmallworld.net',
|
|
212
|
+
"digest-noreply@quora.com",
|
|
213
|
+
'editorialstaff@flipboard.com',
|
|
214
|
+
'How To Academy',
|
|
215
|
+
'Jokeland',
|
|
216
|
+
]
|
|
217
|
+
|
|
209
218
|
# First and last names that should be made part of a highlighting regex for emailers
|
|
210
219
|
NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
|
|
211
220
|
'Al', 'Alan', 'Alfredo', 'Allen', 'Alex', 'Alexander', 'Amanda', 'Andres', 'Andrew',
|
|
@@ -93,7 +93,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
93
93
|
KATHRYN_RUEMMLER: re.compile(r'Kathr?yn? Ruemmler?', re.IGNORECASE),
|
|
94
94
|
KEN_STARR: re.compile(r'starr, ken|Ken(neth\s*(W.\s*)?)?\s+starr?|starr', re.IGNORECASE),
|
|
95
95
|
LANDON_THOMAS: re.compile(r'lando[nr] thomas( jr)?|thomas jr.?, lando[nr]', re.IGNORECASE),
|
|
96
|
-
LARRY_SUMMERS: re.compile(r'(La(wrence|rry).{1,5})?Summers?|^LH$|LHS|
|
|
96
|
+
LARRY_SUMMERS: re.compile(r'(La(wrence|rry).{1,5})?Summers?|^LH$|LHS|[Il]hsofficel?', re.IGNORECASE),
|
|
97
97
|
LAWRANCE_VISOSKI: re.compile(r'La(rry|wrance) Visoski?|Lvjet', re.IGNORECASE),
|
|
98
98
|
LAWRENCE_KRAUSS: re.compile(r'Lawrence Kraus[es]?|[jl]awkrauss|kruase', re.IGNORECASE),
|
|
99
99
|
LEON_BLACK: re.compile(r'Leon\s*Black?|(?<!Marc )Leon(?! (Botstein|Jaworski|Wieseltier))', re.IGNORECASE),
|
|
@@ -5,9 +5,11 @@ from pathlib import Path
|
|
|
5
5
|
|
|
6
6
|
from rich_argparse_plus import RichHelpFormatterPlus
|
|
7
7
|
|
|
8
|
+
from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, CHRONOLOGICAL_EMAILS_PATH, TEXT_MSGS_HTML_PATH
|
|
8
9
|
from epstein_files.util.logging import env_log_level, exit_with_error, logger
|
|
9
10
|
|
|
10
11
|
DEFAULT_WIDTH = 145
|
|
12
|
+
DEFAULT_FILE = 'default_file'
|
|
11
13
|
EPSTEIN_GENERATE = 'epstein_generate'
|
|
12
14
|
HTML_SCRIPTS = [EPSTEIN_GENERATE, 'epstein_word_count']
|
|
13
15
|
|
|
@@ -34,7 +36,7 @@ parser.add_argument('--overwrite-pickle', '-op', action='store_true', help='re-p
|
|
|
34
36
|
output = parser.add_argument_group('OUTPUT', 'Options used by epstein_generate.')
|
|
35
37
|
output.add_argument('--all-emails', '-ae', action='store_true', help='all the emails instead of just the interesting ones')
|
|
36
38
|
output.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just the interesting ones')
|
|
37
|
-
|
|
39
|
+
parser.add_argument('--build', '-b', nargs="?", default=None, const=DEFAULT_FILE, help='write output to HTML file')
|
|
38
40
|
output.add_argument('--email-timeline', action='store_true', help='print a table of all emails in chronological order')
|
|
39
41
|
output.add_argument('--json-files', action='store_true', help='pretty print all the raw JSON data files in the collection and exit')
|
|
40
42
|
output.add_argument('--json-metadata', action='store_true', help='dump JSON metadata for all files and exit')
|
|
@@ -82,6 +84,14 @@ if is_html_script:
|
|
|
82
84
|
elif not args.email_timeline:
|
|
83
85
|
logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
|
|
84
86
|
args.output_texts = args.output_emails = args.output_other = True
|
|
87
|
+
|
|
88
|
+
if args.build == DEFAULT_FILE:
|
|
89
|
+
if args.all_emails:
|
|
90
|
+
args.build = ALL_EMAILS_PATH
|
|
91
|
+
elif args.email_timeline:
|
|
92
|
+
args.build = CHRONOLOGICAL_EMAILS_PATH
|
|
93
|
+
else:
|
|
94
|
+
args.build = TEXT_MSGS_HTML_PATH
|
|
85
95
|
elif parser.prog.startswith('epstein_') and not args.positional_args:
|
|
86
96
|
exit_with_error(f"{parser.prog} requires positional arguments but got none!")
|
|
87
97
|
|
|
@@ -221,6 +221,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
221
221
|
JOSCHA_BACH: 'cognitive science / AI research',
|
|
222
222
|
'Daniel Kahneman': 'Nobel economic sciences laureate and cognitivie psychologist (?)',
|
|
223
223
|
'Ed Boyden': f'Associate Professor, {MIT_MEDIA_LAB} neurobiology',
|
|
224
|
+
'Harry Fisch': "men's health expert at New York-Presbyterian / Weill Cornell (?)",
|
|
224
225
|
LAWRENCE_KRAUSS: 'theoretical physicist',
|
|
225
226
|
LINDA_STONE: f'ex-Microsoft, {MIT_MEDIA_LAB}',
|
|
226
227
|
MARK_TRAMO: 'professor of neurology at UCLA',
|
|
@@ -384,7 +385,6 @@ HIGHLIGHTED_NAMES = [
|
|
|
384
385
|
r"PRC",
|
|
385
386
|
r"Pyongyang",
|
|
386
387
|
r"SCMP",
|
|
387
|
-
r"Tai(pei|wan)",
|
|
388
388
|
r"Xi(aomi)?",
|
|
389
389
|
r"Jinping",
|
|
390
390
|
],
|
|
@@ -400,6 +400,9 @@ HIGHLIGHTED_NAMES = [
|
|
|
400
400
|
HighlightedNames(
|
|
401
401
|
label='Democrat',
|
|
402
402
|
style='sky_blue1',
|
|
403
|
+
emailers={
|
|
404
|
+
PAUL_PROSPERI: 'friend of Bill Clinton',
|
|
405
|
+
},
|
|
403
406
|
patterns=[
|
|
404
407
|
r"(Al\s*)?Franken",
|
|
405
408
|
r"(Barac?k )?Obama",
|
|
@@ -630,10 +633,12 @@ HIGHLIGHTED_NAMES = [
|
|
|
630
633
|
MARC_LEON: 'Luxury Properties Sari Morrocco',
|
|
631
634
|
MELANIE_SPINELLA: 'representative of Leon Black',
|
|
632
635
|
MORTIMER_ZUCKERMAN: 'business partner of Epstein, newspaper publisher',
|
|
636
|
+
NORMAN_D_RAU: 'managing director at Morgan Stanley',
|
|
633
637
|
PAUL_BARRETT: None,
|
|
634
638
|
PAUL_MORRIS: DEUTSCHE_BANK,
|
|
635
639
|
'Skip Rimer': 'Milken Institute (Michael Milken)',
|
|
636
640
|
'Steven Elkman': DEUTSCHE_BANK,
|
|
641
|
+
'Vahe Stepanian': 'Cetera Financial Group',
|
|
637
642
|
},
|
|
638
643
|
patterns=[
|
|
639
644
|
r"((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?",
|
|
@@ -810,6 +815,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
810
815
|
r"Ed\s*Krassenstein",
|
|
811
816
|
r"(Emily\s*)?Michot",
|
|
812
817
|
r"Ezra\s*Klein",
|
|
818
|
+
r"FrontPage Magazine",
|
|
813
819
|
r"FT",
|
|
814
820
|
r"(George\s*)?Stephanopoulus",
|
|
815
821
|
r"Globe\s*and\s*Mail",
|
|
@@ -1004,7 +1010,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1004
1010
|
r"HBJ",
|
|
1005
1011
|
r"Houthi",
|
|
1006
1012
|
r"Imran\s+Khan",
|
|
1007
|
-
r"Iran(ian)?",
|
|
1013
|
+
r"Iran(ian)?([-\s]Contra)?",
|
|
1008
1014
|
r"Isi[ls]",
|
|
1009
1015
|
r"Islam(abad|ic|ist)?",
|
|
1010
1016
|
r"Istanbul",
|
|
@@ -1047,6 +1053,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1047
1053
|
r"Syrian?",
|
|
1048
1054
|
r"(Tarek\s*)?El\s*Sayed",
|
|
1049
1055
|
r"Tehran",
|
|
1056
|
+
r"Tripoli",
|
|
1050
1057
|
r"Tunisian?",
|
|
1051
1058
|
r"Turk(ey|ish)",
|
|
1052
1059
|
r"UAE",
|
|
@@ -1203,7 +1210,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1203
1210
|
),
|
|
1204
1211
|
|
|
1205
1212
|
HighlightedNames(
|
|
1206
|
-
label='
|
|
1213
|
+
label='Southeast Asia',
|
|
1207
1214
|
style='light_salmon3 bold',
|
|
1208
1215
|
patterns=[
|
|
1209
1216
|
r"Bangkok",
|
|
@@ -1212,6 +1219,9 @@ HIGHLIGHTED_NAMES = [
|
|
|
1212
1219
|
r"Laos",
|
|
1213
1220
|
r"Malaysian?",
|
|
1214
1221
|
r"Myan?mar",
|
|
1222
|
+
r"Philippines",
|
|
1223
|
+
r"South\s*Korea",
|
|
1224
|
+
r"Tai(pei|wan)",
|
|
1215
1225
|
r"Thai(land)?",
|
|
1216
1226
|
r"Vietnam(ese)?",
|
|
1217
1227
|
],
|
|
@@ -1252,7 +1262,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1252
1262
|
],
|
|
1253
1263
|
),
|
|
1254
1264
|
HighlightedNames(
|
|
1255
|
-
label='
|
|
1265
|
+
label='Trump',
|
|
1256
1266
|
style='red3 bold',
|
|
1257
1267
|
emailers={
|
|
1258
1268
|
'Bruce Moskowitz': "'Trump's health guy' according to Epstein",
|
|
@@ -1262,7 +1272,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1262
1272
|
r"(Alan\s*)?Weiss?elberg",
|
|
1263
1273
|
r"\bDJ?T\b",
|
|
1264
1274
|
r"Donald J. Tramp",
|
|
1265
|
-
r"(Donald\s+(J\.\s+)?)?Trump(ism|\s*Properties)?",
|
|
1275
|
+
r"(Donald\s+(J\.\s+)?)?Trump(ism|\s*(Org(anization)?|Properties)(\s*LLC)?)?",
|
|
1266
1276
|
r"Don(ald| *Jr)(?! Rubin)",
|
|
1267
1277
|
r"Ivank?a",
|
|
1268
1278
|
r"Jared",
|
|
@@ -1274,6 +1284,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1274
1284
|
r"\bMatt C\b",
|
|
1275
1285
|
r"Melania",
|
|
1276
1286
|
r"(Michael (J.? )?)?Boccio",
|
|
1287
|
+
r"Paul Rampell",
|
|
1277
1288
|
r"Rebekah\s*Mercer",
|
|
1278
1289
|
r"Roger\s+Stone",
|
|
1279
1290
|
r"rona",
|
|
@@ -1326,6 +1337,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1326
1337
|
r"(John\s*)deJongh(\s*Jr\.?)",
|
|
1327
1338
|
r"(Kenneth E\. )?Mapp",
|
|
1328
1339
|
r"PBI",
|
|
1340
|
+
r"Puerto\s*Ric(an|o)",
|
|
1329
1341
|
r"S(ain)?t.?\s*Thomas",
|
|
1330
1342
|
r"USVI",
|
|
1331
1343
|
r"(?<!Epstein )VI",
|
|
@@ -1371,7 +1383,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1371
1383
|
HighlightedNames(emailers={JEFFREY_EPSTEIN: None}, patterns=[r"JEGE", r"LSJ", r"Mark (L. )?Epstein"], style='blue1'),
|
|
1372
1384
|
HighlightedNames(emailers={KATHRYN_RUEMMLER: 'former Obama legal counsel'}, style='magenta2'),
|
|
1373
1385
|
HighlightedNames(emailers={MELANIE_WALKER: 'doctor'}, style='pale_violet_red1'),
|
|
1374
|
-
HighlightedNames(emailers={PAULA: "Epstein's ex-girlfriend who is now in the opera world"}, label='
|
|
1386
|
+
HighlightedNames(emailers={PAULA: "Epstein's ex-girlfriend who is now in the opera world"}, label='paula', style='pink1'),
|
|
1375
1387
|
HighlightedNames(emailers={PRINCE_ANDREW: 'British royal family'}, style='dodger_blue1'),
|
|
1376
1388
|
HighlightedNames(emailers={SOON_YI_PREVIN: 'wife of Woody Allen'}, style='hot_pink'),
|
|
1377
1389
|
HighlightedNames(emailers={SULTAN_BIN_SULAYEM: 'CEO of DP World, chairman of ports in Dubai'}, style='green1'),
|
|
@@ -1452,7 +1464,7 @@ class EpsteinHighlighter(RegexHighlighter):
|
|
|
1452
1464
|
highlights = [highlight_group.regex for highlight_group in ALL_HIGHLIGHTS]
|
|
1453
1465
|
|
|
1454
1466
|
|
|
1455
|
-
def
|
|
1467
|
+
def get_category_txt_for_name(name: str | None) -> Text | None:
|
|
1456
1468
|
highlight_group = _get_highlight_group_for_name(name)
|
|
1457
1469
|
|
|
1458
1470
|
if highlight_group and isinstance(highlight_group, HighlightedNames):
|
|
@@ -3,7 +3,7 @@ import json
|
|
|
3
3
|
from rich.padding import Padding
|
|
4
4
|
|
|
5
5
|
from epstein_files.documents.document import Document
|
|
6
|
-
from epstein_files.documents.email import
|
|
6
|
+
from epstein_files.documents.email import KRASSNER_RECIPIENTS, Email
|
|
7
7
|
from epstein_files.documents.messenger_log import MessengerLog
|
|
8
8
|
from epstein_files.documents.other_file import FIRST_FEW_LINES, OtherFile
|
|
9
9
|
from epstein_files.epstein_files import EpsteinFiles, count_by_month
|
|
@@ -15,6 +15,7 @@ from epstein_files.util.constant.strings import TIMESTAMP_DIM, TIMESTAMP_STYLE
|
|
|
15
15
|
from epstein_files.util.data import dict_sets_to_lists, sort_dict
|
|
16
16
|
from epstein_files.util.env import args
|
|
17
17
|
from epstein_files.util.file_helper import log_file_write
|
|
18
|
+
from epstein_files.util.highlighted_group import QUESTION_MARKS_TXT
|
|
18
19
|
from epstein_files.util.logging import logger
|
|
19
20
|
from epstein_files.util.rich import *
|
|
20
21
|
|
|
@@ -53,6 +54,34 @@ INVALID_FOR_EPSTEIN_WEB = JUNK_EMAILERS + KRASSNER_RECIPIENTS + [
|
|
|
53
54
|
]
|
|
54
55
|
|
|
55
56
|
|
|
57
|
+
def print_email_timeline(epstein_files: EpsteinFiles) -> None:
|
|
58
|
+
"""Print a table of all emails in chronological order."""
|
|
59
|
+
emails = [email for email in epstein_files.non_duplicate_emails() if not email.is_junk_mail()]
|
|
60
|
+
table = build_table(f'All {len(emails):,} Non-Junk Emails in Chronological Order', highlight=True)
|
|
61
|
+
table.add_column('ID', style=TIMESTAMP_DIM)
|
|
62
|
+
table.add_column('Sent At', style='dim')
|
|
63
|
+
table.add_column('Author', max_width=20)
|
|
64
|
+
table.add_column('Recipients', max_width=22)
|
|
65
|
+
table.add_column('Length', justify='right', style='wheat4')
|
|
66
|
+
table.add_column('Subject')
|
|
67
|
+
|
|
68
|
+
for email in Document.sort_by_timestamp(emails):
|
|
69
|
+
if email.is_junk_mail():
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
table.add_row(
|
|
73
|
+
email.epstein_media_link(link_txt=email.source_file_id()),
|
|
74
|
+
email.timestamp_without_seconds(),
|
|
75
|
+
email.author_txt(),
|
|
76
|
+
email.recipients_txt(max_full_names=1),
|
|
77
|
+
f"{email.length()}",
|
|
78
|
+
email.subject(),
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
console.line(2)
|
|
82
|
+
console.print(table)
|
|
83
|
+
|
|
84
|
+
|
|
56
85
|
def print_emails_section(epstein_files: EpsteinFiles) -> list[Email]:
|
|
57
86
|
"""Returns emails that were printed (may contain dupes if printed for both author and recipient)."""
|
|
58
87
|
print_section_header(('Selections from ' if not args.all_emails else '') + 'His Emails')
|
|
@@ -70,7 +99,7 @@ def print_emails_section(epstein_files: EpsteinFiles) -> list[Email]:
|
|
|
70
99
|
|
|
71
100
|
print_other_page_link(epstein_files)
|
|
72
101
|
console.line(2)
|
|
73
|
-
console.print(
|
|
102
|
+
console.print(_table_of_selected_emailers(emailers_to_print, epstein_files))
|
|
74
103
|
console.print(Padding(_all_emailers_table(epstein_files), (2, 0)))
|
|
75
104
|
|
|
76
105
|
for author in emailers_to_print:
|
|
@@ -96,8 +125,9 @@ def print_emails_section(epstein_files: EpsteinFiles) -> list[Email]:
|
|
|
96
125
|
|
|
97
126
|
|
|
98
127
|
def print_json_files(epstein_files: EpsteinFiles):
|
|
128
|
+
"""Print all the JsonFile objects"""
|
|
99
129
|
if args.build:
|
|
100
|
-
json_data = {
|
|
130
|
+
json_data = {jf.url_slug: jf.json_data() for jf in epstein_files.json_files}
|
|
101
131
|
|
|
102
132
|
with open(JSON_FILES_JSON_PATH, 'w') as f:
|
|
103
133
|
f.write(json.dumps(json_data, sort_keys=True))
|
|
@@ -109,6 +139,17 @@ def print_json_files(epstein_files: EpsteinFiles):
|
|
|
109
139
|
console.print_json(json_file.json_str(), indent=4, sort_keys=False)
|
|
110
140
|
|
|
111
141
|
|
|
142
|
+
def print_json_metadata(epstein_files: EpsteinFiles) -> None:
|
|
143
|
+
json_str = epstein_files.json_metadata()
|
|
144
|
+
|
|
145
|
+
if args.build:
|
|
146
|
+
with open(JSON_METADATA_PATH, 'w') as f:
|
|
147
|
+
f.write(json_str)
|
|
148
|
+
log_file_write(JSON_METADATA_PATH)
|
|
149
|
+
else:
|
|
150
|
+
console.print_json(json_str, indent=4, sort_keys=True)
|
|
151
|
+
|
|
152
|
+
|
|
112
153
|
def print_json_stats(epstein_files: EpsteinFiles) -> None:
|
|
113
154
|
console.line(5)
|
|
114
155
|
console.print(Panel('JSON Stats Dump', expand=True, style='reverse bold'), '\n')
|
|
@@ -152,91 +193,6 @@ def print_text_messages_section(imessage_logs: list[MessengerLog]) -> None:
|
|
|
152
193
|
console.line(2)
|
|
153
194
|
|
|
154
195
|
|
|
155
|
-
def table_of_selected_emailers(_list: list[str | None], epstein_files: EpsteinFiles) -> Table:
|
|
156
|
-
"""Add the first emailed_at timestamp for each emailer if 'epstein_files' provided."""
|
|
157
|
-
header_pfx = '' if args.all_emails else 'Selected '
|
|
158
|
-
table = build_table(f'{header_pfx}Email Conversations Grouped by Counterparty Will Appear in this Order')
|
|
159
|
-
table.add_column('Start Date')
|
|
160
|
-
table.add_column('Name', max_width=25, no_wrap=True)
|
|
161
|
-
table.add_column('Category', justify='center', style='dim italic')
|
|
162
|
-
table.add_column('Num', justify='right', style='wheat4')
|
|
163
|
-
table.add_column('Info', style='white italic')
|
|
164
|
-
current_year = 1990
|
|
165
|
-
current_year_month = current_year * 12
|
|
166
|
-
grey_idx = 0
|
|
167
|
-
|
|
168
|
-
for i, name in enumerate(_list):
|
|
169
|
-
earliest_email_date = (epstein_files.earliest_email_at(name) or FALLBACK_TIMESTAMP).date()
|
|
170
|
-
year_months = (earliest_email_date.year * 12) + earliest_email_date.month
|
|
171
|
-
|
|
172
|
-
# Color year rollovers more brightly
|
|
173
|
-
if current_year != earliest_email_date.year:
|
|
174
|
-
grey_idx = 0
|
|
175
|
-
elif current_year_month != year_months:
|
|
176
|
-
grey_idx = ((current_year_month - 1) % 12) + 1
|
|
177
|
-
|
|
178
|
-
current_year_month = year_months
|
|
179
|
-
current_year = earliest_email_date.year
|
|
180
|
-
category = get_category_for_name(name)
|
|
181
|
-
info = get_info_for_name(name)
|
|
182
|
-
|
|
183
|
-
if category and category.plain == 'paula_heil_fisher': # TODO: hacky
|
|
184
|
-
category = None
|
|
185
|
-
elif category and info:
|
|
186
|
-
info = info.removeprefix(f"{category.plain}, ")
|
|
187
|
-
elif not name:
|
|
188
|
-
info = Text('(emails whose author or recipient could not be determined)', style='medium_purple4')
|
|
189
|
-
|
|
190
|
-
table.add_row(
|
|
191
|
-
Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[grey_idx]}"),
|
|
192
|
-
Text(name or UNKNOWN, style=get_style_for_name(name or UNKNOWN, default_style='dim')),
|
|
193
|
-
category,
|
|
194
|
-
f"{len(epstein_files.emails_for(name)):,}",
|
|
195
|
-
info or '',
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
return table
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
def write_complete_emails_timeline(epstein_files: EpsteinFiles) -> None:
|
|
202
|
-
"""Print a table of all emails in chronological order."""
|
|
203
|
-
emails = [email for email in epstein_files.non_duplicate_emails() if not email.is_junk_mail()]
|
|
204
|
-
table = build_table(f'All {len(emails):,} Non-Junk Emails in Chronological Order', highlight=True)
|
|
205
|
-
table.add_column('ID', style=TIMESTAMP_DIM)
|
|
206
|
-
table.add_column('Sent At', style='dim')
|
|
207
|
-
table.add_column('Author', max_width=20)
|
|
208
|
-
table.add_column('Recipients', max_width=22)
|
|
209
|
-
table.add_column('Length', justify='right', style='wheat4')
|
|
210
|
-
table.add_column('Subject')
|
|
211
|
-
|
|
212
|
-
for email in Document.sort_by_timestamp(emails):
|
|
213
|
-
if email.is_junk_mail():
|
|
214
|
-
continue
|
|
215
|
-
|
|
216
|
-
table.add_row(
|
|
217
|
-
email.epstein_media_link(link_txt=email.source_file_id()),
|
|
218
|
-
email.timestamp_without_seconds(),
|
|
219
|
-
email.author_txt(),
|
|
220
|
-
email.recipients_txt(max_full_names=1),
|
|
221
|
-
f"{email.length()}",
|
|
222
|
-
email.subject(),
|
|
223
|
-
)
|
|
224
|
-
|
|
225
|
-
console.line(2)
|
|
226
|
-
console.print(table)
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
def write_json_metadata(epstein_files: EpsteinFiles) -> None:
|
|
230
|
-
json_str = epstein_files.json_metadata()
|
|
231
|
-
|
|
232
|
-
if args.build:
|
|
233
|
-
with open(JSON_METADATA_PATH, 'w') as f:
|
|
234
|
-
f.write(json_str)
|
|
235
|
-
log_file_write(JSON_METADATA_PATH)
|
|
236
|
-
else:
|
|
237
|
-
console.print_json(json_str, indent=4, sort_keys=True)
|
|
238
|
-
|
|
239
|
-
|
|
240
196
|
def write_urls() -> None:
|
|
241
197
|
"""Write _URL style constant variables to URLS_ENV file so bash scripts can load as env vars."""
|
|
242
198
|
url_vars = {k: v for k, v in vars(output_files).items() if k.endswith('URL') and not k.startswith('GH')}
|
|
@@ -317,6 +273,57 @@ def _is_ok_for_epstein_web(name: str | None) -> bool:
|
|
|
317
273
|
return True
|
|
318
274
|
|
|
319
275
|
|
|
276
|
+
def _table_of_selected_emailers(_list: list[str | None], epstein_files: EpsteinFiles) -> Table:
|
|
277
|
+
"""Add the first emailed_at timestamp for each emailer if 'epstein_files' provided."""
|
|
278
|
+
header_pfx = '' if args.all_emails else 'Selected '
|
|
279
|
+
table = build_table(f'{header_pfx}Email Conversations Grouped by Counterparty Will Appear in this Order')
|
|
280
|
+
table.add_column('Start Date')
|
|
281
|
+
table.add_column('Name', max_width=25, no_wrap=True)
|
|
282
|
+
table.add_column('Category', justify='center', style='dim italic')
|
|
283
|
+
table.add_column('Num', justify='right', style='wheat4')
|
|
284
|
+
table.add_column('Info', style='white italic')
|
|
285
|
+
current_year = 1990
|
|
286
|
+
current_year_month = current_year * 12
|
|
287
|
+
grey_idx = 0
|
|
288
|
+
|
|
289
|
+
for i, name in enumerate(_list):
|
|
290
|
+
earliest_email_date = (epstein_files.earliest_email_at(name) or FALLBACK_TIMESTAMP).date()
|
|
291
|
+
year_months = (earliest_email_date.year * 12) + earliest_email_date.month
|
|
292
|
+
|
|
293
|
+
# Color year rollovers more brightly
|
|
294
|
+
if current_year != earliest_email_date.year:
|
|
295
|
+
grey_idx = 0
|
|
296
|
+
elif current_year_month != year_months:
|
|
297
|
+
grey_idx = ((current_year_month - 1) % 12) + 1
|
|
298
|
+
|
|
299
|
+
current_year_month = year_months
|
|
300
|
+
current_year = earliest_email_date.year
|
|
301
|
+
category = get_category_txt_for_name(name)
|
|
302
|
+
info = get_info_for_name(name)
|
|
303
|
+
style = get_style_for_name(name, default_style='none')
|
|
304
|
+
|
|
305
|
+
if category and category.plain == 'paula': # TODO: hacky
|
|
306
|
+
category = None
|
|
307
|
+
elif category and info:
|
|
308
|
+
info = info.removeprefix(f"{category.plain}, ").removeprefix(category.plain)
|
|
309
|
+
elif not name:
|
|
310
|
+
info = Text('(emails whose author or recipient could not be determined)', style='medium_purple4')
|
|
311
|
+
elif name in JUNK_EMAILERS:
|
|
312
|
+
category = Text('junk', style='gray30')
|
|
313
|
+
elif style == 'none' and '@' not in name and not (category or info):
|
|
314
|
+
info = QUESTION_MARKS_TXT
|
|
315
|
+
|
|
316
|
+
table.add_row(
|
|
317
|
+
Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[grey_idx]}"),
|
|
318
|
+
Text(name or UNKNOWN, style=get_style_for_name(name or UNKNOWN, default_style='dim')),
|
|
319
|
+
category,
|
|
320
|
+
f"{len(epstein_files.emails_for(name)):,}",
|
|
321
|
+
info or '',
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
return table
|
|
325
|
+
|
|
326
|
+
|
|
320
327
|
def _verify_all_emails_were_printed(epstein_files: EpsteinFiles, already_printed_emails: list[Email]) -> None:
|
|
321
328
|
"""Log warnings if some emails were never printed."""
|
|
322
329
|
email_ids_that_were_printed = set([email.file_id for email in already_printed_emails])
|
|
@@ -21,7 +21,7 @@ from epstein_files.util.data import json_safe
|
|
|
21
21
|
from epstein_files.util.env import args
|
|
22
22
|
from epstein_files.util.file_helper import log_file_write
|
|
23
23
|
from epstein_files.util.highlighted_group import (ALL_HIGHLIGHTS, HIGHLIGHTED_NAMES, EpsteinHighlighter,
|
|
24
|
-
|
|
24
|
+
get_category_txt_for_name, get_info_for_name, get_style_for_name)
|
|
25
25
|
from epstein_files.util.logging import logger
|
|
26
26
|
|
|
27
27
|
TITLE_WIDTH = 50
|
|
@@ -306,8 +306,8 @@ def wrap_in_markup_style(msg: str, style: str | None = None) -> str:
|
|
|
306
306
|
return msg
|
|
307
307
|
|
|
308
308
|
|
|
309
|
-
def write_html(output_path: Path) -> None:
|
|
310
|
-
if not
|
|
309
|
+
def write_html(output_path: Path | None) -> None:
|
|
310
|
+
if not output_path:
|
|
311
311
|
logger.warning(f"Not writing HTML because args.build={args.build}.")
|
|
312
312
|
return
|
|
313
313
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{epstein_files-1.1.2 → epstein_files-1.1.3}/epstein_files/documents/imessage/text_message.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|