epstein-files 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +31 -18
- epstein_files/documents/communication.py +9 -5
- epstein_files/documents/document.py +225 -136
- epstein_files/documents/doj_file.py +242 -0
- epstein_files/documents/doj_files/full_text.py +166 -0
- epstein_files/documents/email.py +138 -163
- epstein_files/documents/emails/email_header.py +21 -11
- epstein_files/documents/emails/emailers.py +223 -0
- epstein_files/documents/imessage/text_message.py +2 -3
- epstein_files/documents/json_file.py +18 -14
- epstein_files/documents/messenger_log.py +23 -39
- epstein_files/documents/other_file.py +48 -44
- epstein_files/epstein_files.py +54 -33
- epstein_files/person.py +142 -110
- epstein_files/util/constant/names.py +29 -6
- epstein_files/util/constant/output_files.py +2 -0
- epstein_files/util/constant/strings.py +12 -6
- epstein_files/util/constant/urls.py +17 -0
- epstein_files/util/constants.py +101 -174
- epstein_files/util/data.py +2 -0
- epstein_files/util/doc_cfg.py +20 -15
- epstein_files/util/env.py +24 -16
- epstein_files/util/file_helper.py +28 -6
- epstein_files/util/helpers/debugging_helper.py +13 -0
- epstein_files/util/helpers/env_helpers.py +21 -0
- epstein_files/util/highlighted_group.py +57 -16
- epstein_files/util/layout/left_bar_panel.py +26 -0
- epstein_files/util/logging.py +28 -13
- epstein_files/util/output.py +33 -10
- epstein_files/util/rich.py +28 -2
- epstein_files/util/word_count.py +7 -7
- {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/METADATA +14 -1
- epstein_files-1.5.0.dist-info/RECORD +40 -0
- epstein_files-1.4.1.dist-info/RECORD +0 -34
- {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/LICENSE +0 -0
- {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/WHEEL +0 -0
- {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/entry_points.txt +0 -0
epstein_files/util/env.py
CHANGED
|
@@ -5,23 +5,24 @@ from pathlib import Path
|
|
|
5
5
|
|
|
6
6
|
from rich_argparse_plus import RichHelpFormatterPlus
|
|
7
7
|
|
|
8
|
-
from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, CHRONOLOGICAL_EMAILS_PATH,
|
|
9
|
-
|
|
8
|
+
from epstein_files.util.constant.output_files import (ALL_EMAILS_PATH, CHRONOLOGICAL_EMAILS_PATH,
|
|
9
|
+
DOJ_2026_HTML_PATH, TEXT_MSGS_HTML_PATH)
|
|
10
|
+
from epstein_files.util.helpers.env_helpers import get_env_dir
|
|
11
|
+
from epstein_files.util.logging import env_log_level, exit_with_error, logger, set_log_level
|
|
10
12
|
|
|
11
13
|
DEFAULT_WIDTH = 155
|
|
12
14
|
DEFAULT_FILE = 'default_file'
|
|
13
15
|
EPSTEIN_GENERATE = 'epstein_generate'
|
|
14
16
|
HTML_SCRIPTS = [EPSTEIN_GENERATE, 'epstein_word_count']
|
|
15
17
|
|
|
16
|
-
#
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
# Get dirs from Env vars
|
|
19
|
+
DOCS_DIR_ENV_VAR = 'EPSTEIN_DOCS_DIR'
|
|
20
|
+
DOJ_PDFS_20260130_DIR_ENV_VAR = 'EPSTEIN_DOJ_PDFS_20260130_DIR'
|
|
21
|
+
DOJ_TXTS_20260130_DIR_ENV_VAR = 'EPSTEIN_DOJ_TXTS_20260130_DIR'
|
|
20
22
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
exit_with_error(f"{EPSTEIN_DOCS_DIR_ENV_VAR_NAME}='{DOCS_DIR}' does not exist!\n")
|
|
23
|
+
DOCS_DIR: Path = get_env_dir(DOCS_DIR_ENV_VAR, must_exist=True)
|
|
24
|
+
DOJ_PDFS_20260130_DIR: Path = get_env_dir(DOJ_PDFS_20260130_DIR_ENV_VAR, must_exist=False)
|
|
25
|
+
DOJ_TXTS_20260130_DIR: Path = get_env_dir(DOJ_TXTS_20260130_DIR_ENV_VAR, must_exist=False)
|
|
25
26
|
|
|
26
27
|
is_env_var_set = lambda s: len(environ.get(s) or '') > 0
|
|
27
28
|
is_output_arg = lambda arg: any([arg.startswith(pfx) for pfx in ['colors_only', 'json', 'make_clean', 'output']])
|
|
@@ -41,6 +42,7 @@ output.add_argument('--email-timeline', action='store_true', help='print a table
|
|
|
41
42
|
output.add_argument('--emailers-info', '-ei', action='store_true', help='write a .png of the eeailers info table')
|
|
42
43
|
output.add_argument('--json-files', action='store_true', help='pretty print all the raw JSON data files in the collection and exit')
|
|
43
44
|
output.add_argument('--json-metadata', action='store_true', help='dump JSON metadata for all files and exit')
|
|
45
|
+
output.add_argument('--output-doj-files', '-od', action='store_true', help='generate the DOJ files from 2026-01-30')
|
|
44
46
|
output.add_argument('--output-emails', '-oe', action='store_true', help='generate emails section')
|
|
45
47
|
output.add_argument('--output-other', '-oo', action='store_true', help='generate other files section')
|
|
46
48
|
output.add_argument('--output-texts', '-ot', action='store_true', help='generate text messages section')
|
|
@@ -61,7 +63,7 @@ debug.add_argument('--colors-only', '-c', action='store_true', help='print heade
|
|
|
61
63
|
debug.add_argument('--constantize', action='store_true', help='constantize names when printing repr() of objects')
|
|
62
64
|
debug.add_argument('--debug', '-d', action='store_true', help='set debug level to INFO')
|
|
63
65
|
debug.add_argument('--deep-debug', '-dd', action='store_true', help='set debug level to DEBUG')
|
|
64
|
-
debug.add_argument('--
|
|
66
|
+
debug.add_argument('--stats', '-j', action='store_true', help='print JSON formatted stats about the files')
|
|
65
67
|
debug.add_argument('--skip-other-files', '-sof', action='store_true', help='skip parsing non email/text files')
|
|
66
68
|
debug.add_argument('--suppress-logs', '-sl', action='store_true', help='set debug level to FATAL')
|
|
67
69
|
debug.add_argument('--truncate', '-t', type=int, help='truncate emails to this many characters')
|
|
@@ -69,7 +71,11 @@ debug.add_argument('--write-txt', '-wt', action='store_true', help='write a plai
|
|
|
69
71
|
|
|
70
72
|
|
|
71
73
|
# Parse args
|
|
72
|
-
|
|
74
|
+
if environ.get('INVOKED_BY_PYTEST'):
|
|
75
|
+
args = parser.parse_args([EPSTEIN_GENERATE])
|
|
76
|
+
else:
|
|
77
|
+
args = parser.parse_args()
|
|
78
|
+
|
|
73
79
|
is_html_script = parser.prog in HTML_SCRIPTS
|
|
74
80
|
|
|
75
81
|
args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
|
|
@@ -100,6 +106,8 @@ if is_html_script:
|
|
|
100
106
|
args.build = ALL_EMAILS_PATH
|
|
101
107
|
elif args.email_timeline:
|
|
102
108
|
args.build = CHRONOLOGICAL_EMAILS_PATH
|
|
109
|
+
elif args.output_doj_files:
|
|
110
|
+
args.build = DOJ_2026_HTML_PATH
|
|
103
111
|
else:
|
|
104
112
|
args.build = TEXT_MSGS_HTML_PATH
|
|
105
113
|
elif parser.prog.startswith('epstein_') and not args.positional_args and not args.names:
|
|
@@ -114,13 +122,13 @@ if args.truncate and args.whole_file:
|
|
|
114
122
|
|
|
115
123
|
# Log level args
|
|
116
124
|
if args.deep_debug:
|
|
117
|
-
|
|
125
|
+
set_log_level(logging.DEBUG)
|
|
118
126
|
elif args.debug:
|
|
119
|
-
|
|
127
|
+
set_log_level(logging.INFO)
|
|
120
128
|
elif args.suppress_logs:
|
|
121
|
-
|
|
129
|
+
set_log_level(logging.FATAL)
|
|
122
130
|
elif not env_log_level:
|
|
123
|
-
|
|
131
|
+
set_log_level(logging.WARNING)
|
|
124
132
|
|
|
125
133
|
logger.debug(f'Log level set to {logger.level}...')
|
|
126
134
|
args_str = ',\n'.join([f"{k}={v}" for k, v in vars(args).items() if v])
|
|
@@ -1,38 +1,60 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
-
from epstein_files.util.constant.strings import
|
|
5
|
-
|
|
4
|
+
from epstein_files.util.constant.strings import (DOJ_FILE_NAME_REGEX, EFTA_PREFIX,
|
|
5
|
+
HOUSE_OVERSIGHT_NOV_2025_FILE_NAME_REGEX, HOUSE_OVERSIGHT_NOV_2025_FILE_STEM_REGEX,
|
|
6
|
+
HOUSE_OVERSIGHT_PREFIX)
|
|
7
|
+
from epstein_files.util.env import DOCS_DIR, DOJ_TXTS_20260130_DIR
|
|
6
8
|
from epstein_files.util.logging import logger
|
|
7
9
|
|
|
8
10
|
EXTRACTED_EMAILS_DIR = Path('emails_extracted_from_legal_filings')
|
|
9
|
-
FILE_ID_REGEX = re.compile(fr".*{
|
|
11
|
+
FILE_ID_REGEX = re.compile(fr".*{HOUSE_OVERSIGHT_NOV_2025_FILE_NAME_REGEX.pattern}")
|
|
10
12
|
FILENAME_LENGTH = len(HOUSE_OVERSIGHT_PREFIX) + 6
|
|
11
13
|
KB = 1024
|
|
12
14
|
MB = KB * KB
|
|
13
15
|
|
|
14
16
|
# Coerce methods handle both string and int arguments.
|
|
15
17
|
coerce_file_name = lambda filename_or_id: coerce_file_stem(filename_or_id) + '.txt'
|
|
16
|
-
coerce_file_path = lambda filename_or_id: DOCS_DIR.joinpath(coerce_file_name(filename_or_id))
|
|
17
18
|
file_size = lambda file_path: Path(file_path).stat().st_size
|
|
18
19
|
id_str = lambda id: f"{int(id):06d}"
|
|
19
20
|
|
|
20
21
|
|
|
22
|
+
def coerce_file_path(filename_or_id: int | str) -> Path:
|
|
23
|
+
"""Returns the `Path` for the file with `filename_or_id` ID."""
|
|
24
|
+
filename = coerce_file_name(filename_or_id)
|
|
25
|
+
|
|
26
|
+
if isinstance(filename_or_id, str) and DOJ_FILE_NAME_REGEX.match(filename_or_id):
|
|
27
|
+
for txt_file in DOJ_TXTS_20260130_DIR.glob('**/*.txt'):
|
|
28
|
+
if txt_file.name == filename:
|
|
29
|
+
return txt_file
|
|
30
|
+
|
|
31
|
+
raise RuntimeError(f"'{filename_or_id}' looks like DOJ file but no file named {filename} in '{DOJ_TXTS_20260130_DIR}'")
|
|
32
|
+
else:
|
|
33
|
+
return DOCS_DIR.joinpath(filename)
|
|
34
|
+
|
|
35
|
+
|
|
21
36
|
def coerce_file_stem(filename_or_id: int | str) -> str:
|
|
22
|
-
"""Generate a valid
|
|
37
|
+
"""Generate a valid file stem no matter what form the argument comes in."""
|
|
38
|
+
if isinstance(filename_or_id, str) and DOJ_FILE_NAME_REGEX.search(filename_or_id):
|
|
39
|
+
return Path(filename_or_id).stem
|
|
40
|
+
|
|
23
41
|
if isinstance(filename_or_id, str) and filename_or_id.startswith(HOUSE_OVERSIGHT_PREFIX):
|
|
24
42
|
file_id = extract_file_id(filename_or_id)
|
|
25
43
|
file_stem = file_stem_for_id(file_id)
|
|
26
44
|
else:
|
|
27
45
|
file_stem = file_stem_for_id(filename_or_id)
|
|
28
46
|
|
|
29
|
-
if not
|
|
47
|
+
if not HOUSE_OVERSIGHT_NOV_2025_FILE_STEM_REGEX.match(file_stem):
|
|
30
48
|
raise RuntimeError(f"Invalid stem '{file_stem}' from '{filename_or_id}'")
|
|
31
49
|
|
|
32
50
|
return file_stem
|
|
33
51
|
|
|
34
52
|
|
|
35
53
|
def extract_file_id(filename_or_id: int | str | Path) -> str:
|
|
54
|
+
# DOJ 2026-01 files have different pattern
|
|
55
|
+
if isinstance(filename_or_id, str) and filename_or_id.startswith(EFTA_PREFIX):
|
|
56
|
+
return Path(filename_or_id).stem
|
|
57
|
+
|
|
36
58
|
if isinstance(filename_or_id, str):
|
|
37
59
|
filename_or_id = filename_or_id.removesuffix(',')
|
|
38
60
|
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
def _show_timestamps(epstein_files):
|
|
4
|
+
for doc in epstein_files.doj_files:
|
|
5
|
+
doc.warn(f"timestamp: {doc.timestamp}")
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _verify_filenames(epstein_files):
|
|
9
|
+
doc_filenames = set([doc.file_path.name for doc in epstein_files.all_documents])
|
|
10
|
+
|
|
11
|
+
for file_path in epstein_files.all_files:
|
|
12
|
+
if file_path.name not in doc_filenames:
|
|
13
|
+
print(f"'{file_path}' is not in list of {len(doc_filenames)} Document obj filenames!")
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Helpers for dealing with environment variables."""
|
|
2
|
+
from os import environ
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from epstein_files.util.logging import exit_with_error, logger
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_env_dir(env_var_name: str, must_exist: bool = True) -> Path | None:
|
|
9
|
+
if (dir := environ.get(env_var_name)):
|
|
10
|
+
dir = Path(dir)
|
|
11
|
+
error_msg = f"env var {env_var_name} set to '{dir}' but that's not a directory"
|
|
12
|
+
|
|
13
|
+
if dir.is_dir():
|
|
14
|
+
return dir.resolve()
|
|
15
|
+
elif must_exist:
|
|
16
|
+
exit_with_error(f"Required {error_msg}.\n")
|
|
17
|
+
else:
|
|
18
|
+
logger.warning(f"Optional {error_msg}. Some features will be unavailable.")
|
|
19
|
+
return None
|
|
20
|
+
else:
|
|
21
|
+
logger.warning(f"Optional env var {env_var_name} not set. Some features will be unavailable.")
|
|
@@ -7,11 +7,11 @@ from rich.console import Console
|
|
|
7
7
|
from rich.highlighter import RegexHighlighter
|
|
8
8
|
from rich.text import Text
|
|
9
9
|
|
|
10
|
+
from epstein_files.documents.emails.emailers import EMAILER_ID_REGEXES
|
|
10
11
|
from epstein_files.util.constant.names import *
|
|
11
12
|
from epstein_files.util.constant.strings import *
|
|
12
13
|
from epstein_files.util.constant.urls import ARCHIVE_LINK_COLOR
|
|
13
|
-
from epstein_files.util.constants import
|
|
14
|
-
OSBORNE_LLP, REPLY_REGEX, SENT_FROM_REGEX)
|
|
14
|
+
from epstein_files.util.constants import EPSTEIN_V_ROTHSTEIN_EDWARDS, OSBORNE_LLP, REPLY_REGEX, SENT_FROM_REGEX
|
|
15
15
|
from epstein_files.util.data import sort_dict, without_falsey
|
|
16
16
|
from epstein_files.util.doc_cfg import *
|
|
17
17
|
from epstein_files.util.env import args
|
|
@@ -372,6 +372,8 @@ HIGHLIGHTED_NAMES = [
|
|
|
372
372
|
label='bitcoin',
|
|
373
373
|
style='orange1 bold',
|
|
374
374
|
emailers={
|
|
375
|
+
AMIR_TAAKI: f"bitcoin bro, partner of {BROCK_PIERCE} (?)",
|
|
376
|
+
BROCK_PIERCE: 'crypto bro, Bannon business partner, Tether co-founder, arrested in house full of machine guns & "8,000 fotografias de pornopgraphia infantil"',
|
|
375
377
|
JEFFREY_WERNICK: 'former COO of Parler, involved in numerous crypto companies like Bitforex',
|
|
376
378
|
JEREMY_RUBIN: 'developer/researcher',
|
|
377
379
|
JOI_ITO: f"former head of {MIT_MEDIA_LAB} and MIT Digital Currency Initiative",
|
|
@@ -379,11 +381,12 @@ HIGHLIGHTED_NAMES = [
|
|
|
379
381
|
},
|
|
380
382
|
patterns=[
|
|
381
383
|
r"Balaji",
|
|
384
|
+
r"Bart\s*Stephens", # co-founder, Blockchain Capital
|
|
385
|
+
r"Bioptix", # Now RIOT Blockchain
|
|
382
386
|
r"bitcoin(\s*Foundation)?",
|
|
383
|
-
r"block ?chain(\s*capital)?",
|
|
384
387
|
r"Brian Forde",
|
|
385
388
|
r"Brock(\s*Pierce)?",
|
|
386
|
-
r"coins?",
|
|
389
|
+
r"coins?(\s*Center)?",
|
|
387
390
|
r"Cory\s*Fields", # bitcoin dev
|
|
388
391
|
r"cr[iy]?pto(currenc(y|ies))?",
|
|
389
392
|
r"Digital\s*Currenc(ies|y)(\s*Initiative)?",
|
|
@@ -395,12 +398,16 @@ HIGHLIGHTED_NAMES = [
|
|
|
395
398
|
r"Madars",
|
|
396
399
|
r"Mi(chael|ke)\s*Novogratz",
|
|
397
400
|
r"(Patrick\s*)?Murck",
|
|
398
|
-
r"
|
|
401
|
+
r"(Hester\s*)?Peirce",
|
|
402
|
+
r"(?-i:RIOT)", # (?-i:) makes it case sensitive
|
|
403
|
+
r"Ron\s*Rivest",
|
|
404
|
+
r"block ?chain(\s*capital)?",
|
|
399
405
|
r"(Ross\s*)?Ulbricht",
|
|
400
406
|
r"Silk\s*Road",
|
|
401
407
|
r"SpanCash",
|
|
402
408
|
r"Tether",
|
|
403
409
|
r"virtual\s*currenc(ies|y)",
|
|
410
|
+
r"Wire\s*ca\n?rd",
|
|
404
411
|
r"Wladimir( van der Laan)?", # bitcoin dev
|
|
405
412
|
r"(zero\s+knowledge\s+|zk)pro(of|tocols?)",
|
|
406
413
|
],
|
|
@@ -422,6 +429,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
422
429
|
},
|
|
423
430
|
patterns=[
|
|
424
431
|
r"Arthur Klein",
|
|
432
|
+
r"(Barry\s*)?Honig",
|
|
425
433
|
r"((Bill|David)\s*)?Koch(\s*(Bro(s|thers)|Industries))?",
|
|
426
434
|
r"Gruterite",
|
|
427
435
|
r"((John|Patricia)\s*)?Kluge",
|
|
@@ -575,6 +583,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
575
583
|
patterns=[
|
|
576
584
|
r"JEGE(\s*Inc)?",
|
|
577
585
|
r"LSJ",
|
|
586
|
+
r"Zorro(\s*Ranch)?",
|
|
578
587
|
],
|
|
579
588
|
),
|
|
580
589
|
HighlightedNames(
|
|
@@ -586,6 +595,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
586
595
|
ARDA_BESKARDES: 'NYC immigration attorney allegedly involved in sex-trafficking operations',
|
|
587
596
|
BENNET_MOSKOWITZ: f'represented the {EPSTEIN_ESTATE_EXECUTOR}s',
|
|
588
597
|
BRAD_KARP: 'head of the law firm Paul Weiss',
|
|
598
|
+
CHRISTIAN_EVERDELL: f"{GHISLAINE_MAXWELL}'s lawyer ca. 2021, Cohen & Gresser",
|
|
589
599
|
'Connie Zaguirre': f"office of {ROBERT_D_CRITTON_JR}",
|
|
590
600
|
DAVID_SCHOEN: f"{CRIMINAL_DEFENSE_ATTORNEY} after 2019 arrest",
|
|
591
601
|
DEBBIE_FEIN: EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY,
|
|
@@ -606,6 +616,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
606
616
|
'Robert Gold': 'helped Epstein track down money belonging to Spanish families',
|
|
607
617
|
'Roy Black': CRIMINAL_DEFENSE_2008,
|
|
608
618
|
SCOTT_J_LINK: CRIMINAL_DEFENSE_ATTORNEY,
|
|
619
|
+
STACEY_RICHMAN: f"New York {CRIMINAL_DEFENSE_ATTORNEY}",
|
|
609
620
|
TONJA_HADDAD_COLEMAN: f'{EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY}', # relation of Fred Haddad?
|
|
610
621
|
},
|
|
611
622
|
patterns=[
|
|
@@ -617,6 +628,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
617
628
|
r"Michael J. Pike",
|
|
618
629
|
r"Paul,?\s*Weiss",
|
|
619
630
|
r"Steptoe(\s*& Johnson)?(\s*LLP)?",
|
|
631
|
+
r"Sull(ivan)?\s*(&|and)?\s*Crom(well)?",
|
|
620
632
|
r"Wein(berg|garten)",
|
|
621
633
|
],
|
|
622
634
|
),
|
|
@@ -645,6 +657,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
645
657
|
},
|
|
646
658
|
patterns=[
|
|
647
659
|
r"AfD",
|
|
660
|
+
r"Alfa(\s*Bank)",
|
|
648
661
|
r"(Angela )?Merk(el|le)",
|
|
649
662
|
r"Austria",
|
|
650
663
|
r"Belgi(an|um)",
|
|
@@ -776,8 +789,11 @@ HIGHLIGHTED_NAMES = [
|
|
|
776
789
|
r"(Ray\s*)?Dalio",
|
|
777
790
|
r"(Richard\s*)?LeFrak",
|
|
778
791
|
r"Rockefeller(?! University)(\s*Foundation)?",
|
|
779
|
-
r"
|
|
792
|
+
r"SBNY",
|
|
780
793
|
r"Serageldin",
|
|
794
|
+
r"Signature\s*Bank",
|
|
795
|
+
r"(Ste(phen|ve)\s*)?Schwart?z?man",
|
|
796
|
+
r"Susquehanna",
|
|
781
797
|
r"UBS",
|
|
782
798
|
r"us.gio@jpmorgan.com",
|
|
783
799
|
r"Wall\s*Street(?!\s*Jour)",
|
|
@@ -805,7 +821,9 @@ HIGHLIGHTED_NAMES = [
|
|
|
805
821
|
label='government',
|
|
806
822
|
style='color(24) bold',
|
|
807
823
|
emailers={
|
|
824
|
+
ALISON_J_NATHAN: "judge in New York's Southern District",
|
|
808
825
|
ANN_MARIE_VILLAFANA: 'Southern District of Florida (SDFL) U.S. Attorney',
|
|
826
|
+
CHRISTOPHER_DILORIO: 'self described whistleblower',
|
|
809
827
|
DANNY_FROST: 'Director of Communications at Manhattan D.A.',
|
|
810
828
|
'Police Code Enforcement': f"{PALM_BEACH} buildings code enforcement",
|
|
811
829
|
},
|
|
@@ -815,7 +833,9 @@ HIGHLIGHTED_NAMES = [
|
|
|
815
833
|
r'Alice\s*Fisher|Fisher, Alice',
|
|
816
834
|
r"AML",
|
|
817
835
|
r"(Andrew\s*)?(McCabe|Natsios)",
|
|
836
|
+
r"(Assistant\s+)?State\s*Attorney",
|
|
818
837
|
r"Attorney General",
|
|
838
|
+
r'Barbara\s*Burns', # AUSA
|
|
819
839
|
r"((Bob|Robert)\s*)?Mueller",
|
|
820
840
|
r"(Byung\s)?Pak",
|
|
821
841
|
r"Case 1:19-cv-03377(-LAP)?",
|
|
@@ -828,13 +848,14 @@ HIGHLIGHTED_NAMES = [
|
|
|
828
848
|
r"Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)",
|
|
829
849
|
r"DHS",
|
|
830
850
|
r"DOJ",
|
|
851
|
+
r"EDGAR (Filing|Search)", # SEC database is EDGAR
|
|
831
852
|
r"FBI",
|
|
832
853
|
r"FCPA",
|
|
833
854
|
r"FDIC",
|
|
834
855
|
r"FDLE",
|
|
835
856
|
r"Federal\s*Bureau\s*of\s*Investigation",
|
|
836
857
|
r"FinCEN",
|
|
837
|
-
r"FINRA",
|
|
858
|
+
r"(www\.)?FINRA(\.org)?",
|
|
838
859
|
r"FOIA",
|
|
839
860
|
r"FTC",
|
|
840
861
|
r"(General\s*)?P(a|e)traeus",
|
|
@@ -857,11 +878,12 @@ HIGHLIGHTED_NAMES = [
|
|
|
857
878
|
r"(Michael\s*)?Reiter",
|
|
858
879
|
r"OGE",
|
|
859
880
|
r"Office\s*of\s*Government\s*Ethics",
|
|
881
|
+
r"PBPD",
|
|
860
882
|
r"police",
|
|
861
883
|
r"(Preet\s*)?Bharara",
|
|
862
884
|
r"SCOTUS",
|
|
863
885
|
r"SD(FL|NY)",
|
|
864
|
-
r"SEC",
|
|
886
|
+
r"SEC(\.gov)?",
|
|
865
887
|
r"Secret\s*Service",
|
|
866
888
|
r"Securities\s*and\s*Exchange\s*Commission",
|
|
867
889
|
r"Southern\s*District(\s*of\s*(Florida|New\s*York))?",
|
|
@@ -936,6 +958,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
936
958
|
r"(eh|(Ehud|Nili Priell)\s*)?barak",
|
|
937
959
|
r"EB",
|
|
938
960
|
r"Ehud\s*Barack",
|
|
961
|
+
r"Hapoalim",
|
|
939
962
|
r"Israeli?",
|
|
940
963
|
r"Jerusalem",
|
|
941
964
|
r"J\s*Street",
|
|
@@ -1143,6 +1166,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1143
1166
|
r"Colorado",
|
|
1144
1167
|
r"Connecticut",
|
|
1145
1168
|
r"Florida",
|
|
1169
|
+
r"Jersey\s*City",
|
|
1146
1170
|
r"Los Angeles",
|
|
1147
1171
|
r"Loudoun\s*County?",
|
|
1148
1172
|
r"Martha's\s*Vineyard",
|
|
@@ -1174,7 +1198,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1174
1198
|
style='dark_sea_green4',
|
|
1175
1199
|
emailers={
|
|
1176
1200
|
ANAS_ALRASHEED: 'former information minister of Kuwait (???)',
|
|
1177
|
-
AZIZA_ALAHMADI: 'Abu Dhabi Department of Culture & Tourism',
|
|
1201
|
+
AZIZA_ALAHMADI: 'Abu Dhabi Department of Culture & Tourism, assistant of Al Sabbagh',
|
|
1178
1202
|
RAAFAT_ALSABBAGH: 'Saudi royal advisor',
|
|
1179
1203
|
SHAHER_ABDULHAK_BESHER: 'Yemeni billionaire',
|
|
1180
1204
|
},
|
|
@@ -1348,6 +1372,8 @@ HIGHLIGHTED_NAMES = [
|
|
|
1348
1372
|
r"George\s*(H\.?\s*)?(W\.?\s*)?Bush",
|
|
1349
1373
|
r"(George\s*)?Nader",
|
|
1350
1374
|
r"GOP",
|
|
1375
|
+
r"((Chair|Jay|Joseph)\s*)?Clayton", # SEC chair, now SDNY
|
|
1376
|
+
r"((Bill|William)\s*)?Hinman"
|
|
1351
1377
|
r"Jeff(rey)?\s*Sessions",
|
|
1352
1378
|
r"(John\s*(R.?\s*)?)?Bolton",
|
|
1353
1379
|
r"Kasich",
|
|
@@ -1399,7 +1425,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1399
1425
|
emailers={
|
|
1400
1426
|
'Dasha Zhukova': 'art collector, daughter of Alexander Zhukov',
|
|
1401
1427
|
MASHA_DROKOVA: 'silicon valley VC, former Putin Youth member',
|
|
1402
|
-
RENATA_BOLOTOVA: 'former model, fund manager at New York State Insurance Fund',
|
|
1428
|
+
RENATA_BOLOTOVA: 'former model, fund manager at New York State Insurance Fund, Рената Болотова',
|
|
1403
1429
|
SVETLANA_POZHIDAEVA: "Epstein's Russian assistant who was recommended for a visa by Sergei Belyakov (FSB) and David Blaine",
|
|
1404
1430
|
},
|
|
1405
1431
|
patterns=[
|
|
@@ -1418,17 +1444,19 @@ HIGHLIGHTED_NAMES = [
|
|
|
1418
1444
|
r"(Anastasia\s*)?Kuznetsova",
|
|
1419
1445
|
r"Lavrov",
|
|
1420
1446
|
r"Lukoil",
|
|
1447
|
+
r'(Semion\s*)?Mogilevich',
|
|
1421
1448
|
r"Moscow",
|
|
1422
1449
|
r"(Natalia\s*)?Veselnitskaya",
|
|
1423
1450
|
r"(Oleg\s*)?Deripaska",
|
|
1424
1451
|
r"Oleksandr Vilkul",
|
|
1425
1452
|
r"Onexim", # Prokhorov investment vehicle
|
|
1426
1453
|
r"Prokhorov",
|
|
1454
|
+
r"Rakishev",
|
|
1427
1455
|
r"Rosneft",
|
|
1428
1456
|
r"RT",
|
|
1429
1457
|
r"St.?\s*?Petersburg",
|
|
1430
1458
|
r'Svet',
|
|
1431
|
-
r"
|
|
1459
|
+
r"Russ?ian?",
|
|
1432
1460
|
r"Sberbank",
|
|
1433
1461
|
r"Soviet(\s*Union)?",
|
|
1434
1462
|
r"USSR",
|
|
@@ -1513,6 +1541,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1513
1541
|
r"@?realDonaldTrump",
|
|
1514
1542
|
r"(Alan\s*)?Weiss?elberg",
|
|
1515
1543
|
r"Alex\s*Jones",
|
|
1544
|
+
r"(Brad(ley)?\s*)Parscale",
|
|
1516
1545
|
r"\bDJ?T\b",
|
|
1517
1546
|
r"Donald J. Tramp",
|
|
1518
1547
|
r"(Donald\s+(J\.\s+)?)?Trump(ism|\s*(Org(anization)?|Properties)(\s*LLC)?)?",
|
|
@@ -1559,7 +1588,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1559
1588
|
r"S(ain)?t.?\s*Thomas",
|
|
1560
1589
|
r"USVI",
|
|
1561
1590
|
r"(?<!stein |vis-a-)VI(?!s-a-)",
|
|
1562
|
-
r"(The\s*)?Virgin\s*Is(al|la)
|
|
1591
|
+
r"(The\s*)?Virgin\s*Is(al|la)nd?s(\s*Daily\s*News)?", # Hard to make this work right
|
|
1563
1592
|
r"(West\s*)?Palm\s*Beach(\s*County)?(?!\s*(Daily|Post))",
|
|
1564
1593
|
],
|
|
1565
1594
|
),
|
|
@@ -1572,6 +1601,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1572
1601
|
r"(Gloria\s*)?Allred",
|
|
1573
1602
|
r"(Jane|Tiffany)\s*Doe",
|
|
1574
1603
|
r"Katie\s*Johnson",
|
|
1604
|
+
r"Minor\s*Victim",
|
|
1575
1605
|
r"pedophile",
|
|
1576
1606
|
r"Stephanie\s*Clifford",
|
|
1577
1607
|
r"Stormy\s*Daniels",
|
|
@@ -1584,7 +1614,9 @@ HIGHLIGHTED_NAMES = [
|
|
|
1584
1614
|
style='medium_orchid1',
|
|
1585
1615
|
emailers={
|
|
1586
1616
|
BRAD_EDWARDS: ROTHSTEIN_ROSENFELDT_ADLER,
|
|
1617
|
+
'Douglas Wigdor': f'lawsuit against {LEON_BLACK}, Wigdor LLP',
|
|
1587
1618
|
'Grant J. Smith': ROTHSTEIN_ROSENFELDT_ADLER,
|
|
1619
|
+
'Jeanne M. Christensen': f'lawsuit against {LEON_BLACK}, Wigdor LLP',
|
|
1588
1620
|
JACK_SCAROLA: 'Searcy Denney Scarola Barnhart & Shipley',
|
|
1589
1621
|
KEN_JENNE: ROTHSTEIN_ROSENFELDT_ADLER,
|
|
1590
1622
|
},
|
|
@@ -1595,6 +1627,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1595
1627
|
r"Paul\s*(G.\s*)?Cassell",
|
|
1596
1628
|
r"Rothstein\s*Rosenfeldt\s*Adler",
|
|
1597
1629
|
r"(Scott\s*)?Rothstein",
|
|
1630
|
+
r"Wigdor(Law)?",
|
|
1598
1631
|
],
|
|
1599
1632
|
),
|
|
1600
1633
|
HighlightedNames(
|
|
@@ -1621,7 +1654,11 @@ HIGHLIGHTED_NAMES = [
|
|
|
1621
1654
|
HighlightedNames(
|
|
1622
1655
|
emailers={GHISLAINE_MAXWELL: "Epstein's girlfriend, daughter of the spy Robert Maxwell"},
|
|
1623
1656
|
category='Epstein',
|
|
1624
|
-
patterns=[
|
|
1657
|
+
patterns=[
|
|
1658
|
+
r"gmax(1@ellmax.com)?",
|
|
1659
|
+
r"(The )?TerraMar Project",
|
|
1660
|
+
r"(Scott\s*)?Borgenson",
|
|
1661
|
+
],
|
|
1625
1662
|
style='deep_pink3',
|
|
1626
1663
|
),
|
|
1627
1664
|
HighlightedNames(emailers={JABOR_Y: '"an influential man in Qatar"'}, category=MIDEAST, style='spring_green1'),
|
|
@@ -1658,7 +1695,7 @@ HIGHLIGHTED_TEXTS = [
|
|
|
1658
1695
|
HighlightedText(
|
|
1659
1696
|
label='header_field',
|
|
1660
1697
|
style='plum4',
|
|
1661
|
-
patterns=[r'^[>• ]{,4}(Date ?|From|Sent|To|C[cC]|Importance|Reply[- ]?To|Subject|Bee|B[cC]{2}|Attachments|Flag|Classification|((A|Debut du message transfer[&e]|De(stinataire)?|Envoye|Expe(cl|d)iteur|Objet|Q|Sujet) ?)):|^on behalf of'],
|
|
1698
|
+
patterns=[r'^[>• ]{,4}(Date ?|From|Sent|To|C[cC]|Importance|Reply[- ]?To|Subject|Bee|B[cC]{2}|Attachments|Flag|Classification|[Il]nline-[Il]mages|((A|Debut du message transfer[&e]|De(stinataire)?|Envoye|Expe(cl|d)iteur|Objet|Q|Sujet) ?)):|^on behalf of'],
|
|
1662
1699
|
),
|
|
1663
1700
|
HighlightedText(
|
|
1664
1701
|
label='http_links',
|
|
@@ -1668,7 +1705,11 @@ HIGHLIGHTED_TEXTS = [
|
|
|
1668
1705
|
HighlightedText(
|
|
1669
1706
|
label='quoted_reply_line',
|
|
1670
1707
|
style='dim',
|
|
1671
|
-
patterns=[
|
|
1708
|
+
patterns=[
|
|
1709
|
+
REPLY_REGEX.pattern, r"^(> )?wrote:$",
|
|
1710
|
+
r"CONFIDENTIAL FOR ATTORNEY'S EYES ONLY(\nDO NOT COPY)?",
|
|
1711
|
+
r"PRIVILEGED ?- ?ATTORNEY WORK.*(\nCONFIDENTIAL - SUBJECT TO.*)?",
|
|
1712
|
+
],
|
|
1672
1713
|
),
|
|
1673
1714
|
HighlightedText(
|
|
1674
1715
|
label='redacted',
|
|
@@ -1695,7 +1736,7 @@ HIGHLIGHTED_TEXTS = [
|
|
|
1695
1736
|
ManualHighlight(
|
|
1696
1737
|
label='email_attachments',
|
|
1697
1738
|
style='gray30 italic',
|
|
1698
|
-
pattern=r"^(> )?Attachments: (?P<email_attachments>.*)",
|
|
1739
|
+
pattern=r"^(> )?(Attachments|[Il]nline-[Il]mages): (?P<email_attachments>.*)",
|
|
1699
1740
|
),
|
|
1700
1741
|
ManualHighlight(
|
|
1701
1742
|
label='email_timestamp',
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from rich.table import Table
|
|
2
|
+
from rich.text import Text
|
|
3
|
+
|
|
4
|
+
from epstein_files.util.rich import highlighter
|
|
5
|
+
|
|
6
|
+
HEADER_INDENT = Text(' ')
|
|
7
|
+
VERTICAL_BAR = '┃ ' # ⎹┃┇┋❘⦀🁢⏐┃⎹
|
|
8
|
+
TOP_BAR = '🁢 '
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LeftBarPanel(Table):
|
|
12
|
+
"""Create a faux `Panel` that just has a single vertical line down the left side."""
|
|
13
|
+
@classmethod
|
|
14
|
+
def build(cls, text: str | Text, bar_style: str, header: str | Text = ''):
|
|
15
|
+
table = cls.grid(padding=0)
|
|
16
|
+
table.add_column(justify='left', style=bar_style) # Column for the line
|
|
17
|
+
table.add_column(justify='left') # Column for content
|
|
18
|
+
|
|
19
|
+
if header:
|
|
20
|
+
table.add_row(TOP_BAR, header)
|
|
21
|
+
table.add_row(VERTICAL_BAR, '')
|
|
22
|
+
|
|
23
|
+
for txt_line in highlighter(text).split('\n'):
|
|
24
|
+
table.add_row(VERTICAL_BAR, txt_line)
|
|
25
|
+
|
|
26
|
+
return table
|
epstein_files/util/logging.py
CHANGED
|
@@ -8,6 +8,7 @@ from rich.console import Console
|
|
|
8
8
|
from rich.highlighter import ReprHighlighter
|
|
9
9
|
from rich.logging import RichHandler
|
|
10
10
|
from rich.theme import Theme
|
|
11
|
+
from yaralyzer.util.helpers.env_helper import console_width_possibilities
|
|
11
12
|
|
|
12
13
|
from epstein_files.util.constant.strings import *
|
|
13
14
|
|
|
@@ -15,6 +16,7 @@ FILENAME_STYLE = 'gray27'
|
|
|
15
16
|
|
|
16
17
|
DOC_TYPE_STYLES = {
|
|
17
18
|
DOCUMENT_CLASS: 'grey69',
|
|
19
|
+
DOJ_FILE_CLASS: 'magenta',
|
|
18
20
|
EMAIL_CLASS: 'dark_orange3',
|
|
19
21
|
JSON_FILE_CLASS: 'sandy_brown',
|
|
20
22
|
MESSENGER_LOG_CLASS: 'deep_pink4',
|
|
@@ -27,29 +29,48 @@ LOG_THEME = {
|
|
|
27
29
|
}
|
|
28
30
|
|
|
29
31
|
LOG_THEME[f"{ReprHighlighter.base_style}epstein_filename"] = FILENAME_STYLE
|
|
30
|
-
LOG_LEVEL_ENV_VAR = '
|
|
32
|
+
LOG_LEVEL_ENV_VAR = 'EPSTEIN_LOG_LEVEL'
|
|
31
33
|
|
|
32
34
|
|
|
33
35
|
# Augment the standard log highlighter with 'epstein_filename' matcher
|
|
34
36
|
class LogHighlighter(ReprHighlighter):
|
|
35
37
|
highlights = ReprHighlighter.highlights + [
|
|
36
38
|
*[fr"(?P<{doc_type}>{doc_type}(Cfg|s)?)" for doc_type in DOC_TYPE_STYLES.keys()],
|
|
37
|
-
"(?P<epstein_filename>" +
|
|
39
|
+
"(?P<epstein_filename>" + '|'.join([HOUSE_OVERSIGHT_NOV_2025_FILE_NAME_REGEX.pattern, DOJ_FILE_NAME_REGEX.pattern]) + ')',
|
|
38
40
|
]
|
|
39
41
|
|
|
42
|
+
log_console = Console(
|
|
43
|
+
color_system='256',
|
|
44
|
+
stderr=True,
|
|
45
|
+
theme=Theme(LOG_THEME),
|
|
46
|
+
width=max(console_width_possibilities())
|
|
47
|
+
)
|
|
40
48
|
|
|
41
|
-
|
|
42
|
-
log_handler = RichHandler(console=log_console, highlighter=LogHighlighter())
|
|
49
|
+
|
|
50
|
+
log_handler = RichHandler(console=log_console, highlighter=LogHighlighter(), show_path=False)
|
|
43
51
|
logging.basicConfig(level="NOTSET", format="%(message)s", datefmt=" ", handlers=[log_handler])
|
|
44
|
-
logger = logging.getLogger(
|
|
52
|
+
logger = logging.getLogger(__name__)
|
|
53
|
+
logger = logging.getLogger("epstein_text_files")
|
|
45
54
|
|
|
46
55
|
|
|
47
|
-
# Set log levels to suppress annoying output
|
|
56
|
+
# Set log levels to suppress annoying output from other packages
|
|
48
57
|
logging.getLogger('datefinder').setLevel(logging.FATAL)
|
|
49
58
|
logging.getLogger('rich_argparse').setLevel(logging.FATAL)
|
|
50
59
|
env_log_level_str = environ.get(LOG_LEVEL_ENV_VAR) or None
|
|
51
60
|
env_log_level = None
|
|
52
61
|
|
|
62
|
+
|
|
63
|
+
def exit_with_error(msg: str) -> None:
|
|
64
|
+
print('')
|
|
65
|
+
logger.error(msg + '\n')
|
|
66
|
+
exit(1)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def set_log_level(log_level: int | str) -> None:
|
|
70
|
+
for lg in [logger] + logger.handlers:
|
|
71
|
+
lg.setLevel(log_level)
|
|
72
|
+
|
|
73
|
+
|
|
53
74
|
if env_log_level_str:
|
|
54
75
|
try:
|
|
55
76
|
env_log_level = getattr(logging, env_log_level_str)
|
|
@@ -58,10 +79,4 @@ if env_log_level_str:
|
|
|
58
79
|
env_log_level = logging.DEBUG
|
|
59
80
|
|
|
60
81
|
logger.warning(f"Setting log level to {env_log_level} based on {LOG_LEVEL_ENV_VAR} env var...")
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def exit_with_error(msg: str) -> None:
|
|
65
|
-
print('')
|
|
66
|
-
logger.error(msg + '\n')
|
|
67
|
-
exit(1)
|
|
82
|
+
set_log_level(env_log_level)
|