epstein-files 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. epstein_files/__init__.py +31 -18
  2. epstein_files/documents/communication.py +9 -5
  3. epstein_files/documents/document.py +225 -136
  4. epstein_files/documents/doj_file.py +242 -0
  5. epstein_files/documents/doj_files/full_text.py +166 -0
  6. epstein_files/documents/email.py +138 -163
  7. epstein_files/documents/emails/email_header.py +21 -11
  8. epstein_files/documents/emails/emailers.py +223 -0
  9. epstein_files/documents/imessage/text_message.py +2 -3
  10. epstein_files/documents/json_file.py +18 -14
  11. epstein_files/documents/messenger_log.py +23 -39
  12. epstein_files/documents/other_file.py +48 -44
  13. epstein_files/epstein_files.py +54 -33
  14. epstein_files/person.py +142 -110
  15. epstein_files/util/constant/names.py +29 -6
  16. epstein_files/util/constant/output_files.py +2 -0
  17. epstein_files/util/constant/strings.py +12 -6
  18. epstein_files/util/constant/urls.py +17 -0
  19. epstein_files/util/constants.py +101 -174
  20. epstein_files/util/data.py +2 -0
  21. epstein_files/util/doc_cfg.py +20 -15
  22. epstein_files/util/env.py +24 -16
  23. epstein_files/util/file_helper.py +28 -6
  24. epstein_files/util/helpers/debugging_helper.py +13 -0
  25. epstein_files/util/helpers/env_helpers.py +21 -0
  26. epstein_files/util/highlighted_group.py +57 -16
  27. epstein_files/util/layout/left_bar_panel.py +26 -0
  28. epstein_files/util/logging.py +28 -13
  29. epstein_files/util/output.py +33 -10
  30. epstein_files/util/rich.py +28 -2
  31. epstein_files/util/word_count.py +7 -7
  32. {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/METADATA +14 -1
  33. epstein_files-1.5.0.dist-info/RECORD +40 -0
  34. epstein_files-1.4.1.dist-info/RECORD +0 -34
  35. {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/LICENSE +0 -0
  36. {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/WHEEL +0 -0
  37. {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/entry_points.txt +0 -0
epstein_files/util/env.py CHANGED
@@ -5,23 +5,24 @@ from pathlib import Path
5
5
 
6
6
  from rich_argparse_plus import RichHelpFormatterPlus
7
7
 
8
- from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, CHRONOLOGICAL_EMAILS_PATH, TEXT_MSGS_HTML_PATH
9
- from epstein_files.util.logging import env_log_level, exit_with_error, logger
8
+ from epstein_files.util.constant.output_files import (ALL_EMAILS_PATH, CHRONOLOGICAL_EMAILS_PATH,
9
+ DOJ_2026_HTML_PATH, TEXT_MSGS_HTML_PATH)
10
+ from epstein_files.util.helpers.env_helpers import get_env_dir
11
+ from epstein_files.util.logging import env_log_level, exit_with_error, logger, set_log_level
10
12
 
11
13
  DEFAULT_WIDTH = 155
12
14
  DEFAULT_FILE = 'default_file'
13
15
  EPSTEIN_GENERATE = 'epstein_generate'
14
16
  HTML_SCRIPTS = [EPSTEIN_GENERATE, 'epstein_word_count']
15
17
 
16
- # Verify Epstein docs dir exists
17
- EPSTEIN_DOCS_DIR_ENV_VAR_NAME = 'EPSTEIN_DOCS_DIR'
18
- DOCS_DIR_ENV = environ.get(EPSTEIN_DOCS_DIR_ENV_VAR_NAME)
19
- DOCS_DIR = Path(DOCS_DIR_ENV or '').resolve()
18
+ # Get dirs from Env vars
19
+ DOCS_DIR_ENV_VAR = 'EPSTEIN_DOCS_DIR'
20
+ DOJ_PDFS_20260130_DIR_ENV_VAR = 'EPSTEIN_DOJ_PDFS_20260130_DIR'
21
+ DOJ_TXTS_20260130_DIR_ENV_VAR = 'EPSTEIN_DOJ_TXTS_20260130_DIR'
20
22
 
21
- if not DOCS_DIR_ENV:
22
- exit_with_error(f"{EPSTEIN_DOCS_DIR_ENV_VAR_NAME} env var not set!\n")
23
- elif not DOCS_DIR.exists():
24
- exit_with_error(f"{EPSTEIN_DOCS_DIR_ENV_VAR_NAME}='{DOCS_DIR}' does not exist!\n")
23
+ DOCS_DIR: Path = get_env_dir(DOCS_DIR_ENV_VAR, must_exist=True)
24
+ DOJ_PDFS_20260130_DIR: Path = get_env_dir(DOJ_PDFS_20260130_DIR_ENV_VAR, must_exist=False)
25
+ DOJ_TXTS_20260130_DIR: Path = get_env_dir(DOJ_TXTS_20260130_DIR_ENV_VAR, must_exist=False)
25
26
 
26
27
  is_env_var_set = lambda s: len(environ.get(s) or '') > 0
27
28
  is_output_arg = lambda arg: any([arg.startswith(pfx) for pfx in ['colors_only', 'json', 'make_clean', 'output']])
@@ -41,6 +42,7 @@ output.add_argument('--email-timeline', action='store_true', help='print a table
41
42
  output.add_argument('--emailers-info', '-ei', action='store_true', help='write a .png of the eeailers info table')
42
43
  output.add_argument('--json-files', action='store_true', help='pretty print all the raw JSON data files in the collection and exit')
43
44
  output.add_argument('--json-metadata', action='store_true', help='dump JSON metadata for all files and exit')
45
+ output.add_argument('--output-doj-files', '-od', action='store_true', help='generate the DOJ files from 2026-01-30')
44
46
  output.add_argument('--output-emails', '-oe', action='store_true', help='generate emails section')
45
47
  output.add_argument('--output-other', '-oo', action='store_true', help='generate other files section')
46
48
  output.add_argument('--output-texts', '-ot', action='store_true', help='generate text messages section')
@@ -61,7 +63,7 @@ debug.add_argument('--colors-only', '-c', action='store_true', help='print heade
61
63
  debug.add_argument('--constantize', action='store_true', help='constantize names when printing repr() of objects')
62
64
  debug.add_argument('--debug', '-d', action='store_true', help='set debug level to INFO')
63
65
  debug.add_argument('--deep-debug', '-dd', action='store_true', help='set debug level to DEBUG')
64
- debug.add_argument('--json-stats', '-j', action='store_true', help='print JSON formatted stats about the files')
66
+ debug.add_argument('--stats', '-j', action='store_true', help='print JSON formatted stats about the files')
65
67
  debug.add_argument('--skip-other-files', '-sof', action='store_true', help='skip parsing non email/text files')
66
68
  debug.add_argument('--suppress-logs', '-sl', action='store_true', help='set debug level to FATAL')
67
69
  debug.add_argument('--truncate', '-t', type=int, help='truncate emails to this many characters')
@@ -69,7 +71,11 @@ debug.add_argument('--write-txt', '-wt', action='store_true', help='write a plai
69
71
 
70
72
 
71
73
  # Parse args
72
- args = parser.parse_args()
74
+ if environ.get('INVOKED_BY_PYTEST'):
75
+ args = parser.parse_args([EPSTEIN_GENERATE])
76
+ else:
77
+ args = parser.parse_args()
78
+
73
79
  is_html_script = parser.prog in HTML_SCRIPTS
74
80
 
75
81
  args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
@@ -100,6 +106,8 @@ if is_html_script:
100
106
  args.build = ALL_EMAILS_PATH
101
107
  elif args.email_timeline:
102
108
  args.build = CHRONOLOGICAL_EMAILS_PATH
109
+ elif args.output_doj_files:
110
+ args.build = DOJ_2026_HTML_PATH
103
111
  else:
104
112
  args.build = TEXT_MSGS_HTML_PATH
105
113
  elif parser.prog.startswith('epstein_') and not args.positional_args and not args.names:
@@ -114,13 +122,13 @@ if args.truncate and args.whole_file:
114
122
 
115
123
  # Log level args
116
124
  if args.deep_debug:
117
- logger.setLevel(logging.DEBUG)
125
+ set_log_level(logging.DEBUG)
118
126
  elif args.debug:
119
- logger.setLevel(logging.INFO)
127
+ set_log_level(logging.INFO)
120
128
  elif args.suppress_logs:
121
- logger.setLevel(logging.FATAL)
129
+ set_log_level(logging.FATAL)
122
130
  elif not env_log_level:
123
- logger.setLevel(logging.WARNING)
131
+ set_log_level(logging.WARNING)
124
132
 
125
133
  logger.debug(f'Log level set to {logger.level}...')
126
134
  args_str = ',\n'.join([f"{k}={v}" for k, v in vars(args).items() if v])
@@ -1,38 +1,60 @@
1
1
  import re
2
2
  from pathlib import Path
3
3
 
4
- from epstein_files.util.constant.strings import FILE_NAME_REGEX, FILE_STEM_REGEX, HOUSE_OVERSIGHT_PREFIX
5
- from epstein_files.util.env import DOCS_DIR
4
+ from epstein_files.util.constant.strings import (DOJ_FILE_NAME_REGEX, EFTA_PREFIX,
5
+ HOUSE_OVERSIGHT_NOV_2025_FILE_NAME_REGEX, HOUSE_OVERSIGHT_NOV_2025_FILE_STEM_REGEX,
6
+ HOUSE_OVERSIGHT_PREFIX)
7
+ from epstein_files.util.env import DOCS_DIR, DOJ_TXTS_20260130_DIR
6
8
  from epstein_files.util.logging import logger
7
9
 
8
10
  EXTRACTED_EMAILS_DIR = Path('emails_extracted_from_legal_filings')
9
- FILE_ID_REGEX = re.compile(fr".*{FILE_NAME_REGEX.pattern}")
11
+ FILE_ID_REGEX = re.compile(fr".*{HOUSE_OVERSIGHT_NOV_2025_FILE_NAME_REGEX.pattern}")
10
12
  FILENAME_LENGTH = len(HOUSE_OVERSIGHT_PREFIX) + 6
11
13
  KB = 1024
12
14
  MB = KB * KB
13
15
 
14
16
  # Coerce methods handle both string and int arguments.
15
17
  coerce_file_name = lambda filename_or_id: coerce_file_stem(filename_or_id) + '.txt'
16
- coerce_file_path = lambda filename_or_id: DOCS_DIR.joinpath(coerce_file_name(filename_or_id))
17
18
  file_size = lambda file_path: Path(file_path).stat().st_size
18
19
  id_str = lambda id: f"{int(id):06d}"
19
20
 
20
21
 
22
+ def coerce_file_path(filename_or_id: int | str) -> Path:
23
+ """Returns the `Path` for the file with `filename_or_id` ID."""
24
+ filename = coerce_file_name(filename_or_id)
25
+
26
+ if isinstance(filename_or_id, str) and DOJ_FILE_NAME_REGEX.match(filename_or_id):
27
+ for txt_file in DOJ_TXTS_20260130_DIR.glob('**/*.txt'):
28
+ if txt_file.name == filename:
29
+ return txt_file
30
+
31
+ raise RuntimeError(f"'{filename_or_id}' looks like DOJ file but no file named {filename} in '{DOJ_TXTS_20260130_DIR}'")
32
+ else:
33
+ return DOCS_DIR.joinpath(filename)
34
+
35
+
21
36
  def coerce_file_stem(filename_or_id: int | str) -> str:
22
- """Generate a valid file_stem no matter what form the argument comes in."""
37
+ """Generate a valid file stem no matter what form the argument comes in."""
38
+ if isinstance(filename_or_id, str) and DOJ_FILE_NAME_REGEX.search(filename_or_id):
39
+ return Path(filename_or_id).stem
40
+
23
41
  if isinstance(filename_or_id, str) and filename_or_id.startswith(HOUSE_OVERSIGHT_PREFIX):
24
42
  file_id = extract_file_id(filename_or_id)
25
43
  file_stem = file_stem_for_id(file_id)
26
44
  else:
27
45
  file_stem = file_stem_for_id(filename_or_id)
28
46
 
29
- if not FILE_STEM_REGEX.match(file_stem):
47
+ if not HOUSE_OVERSIGHT_NOV_2025_FILE_STEM_REGEX.match(file_stem):
30
48
  raise RuntimeError(f"Invalid stem '{file_stem}' from '{filename_or_id}'")
31
49
 
32
50
  return file_stem
33
51
 
34
52
 
35
53
  def extract_file_id(filename_or_id: int | str | Path) -> str:
54
+ # DOJ 2026-01 files have different pattern
55
+ if isinstance(filename_or_id, str) and filename_or_id.startswith(EFTA_PREFIX):
56
+ return Path(filename_or_id).stem
57
+
36
58
  if isinstance(filename_or_id, str):
37
59
  filename_or_id = filename_or_id.removesuffix(',')
38
60
 
@@ -0,0 +1,13 @@
1
+
2
+
3
+ def _show_timestamps(epstein_files):
4
+ for doc in epstein_files.doj_files:
5
+ doc.warn(f"timestamp: {doc.timestamp}")
6
+
7
+
8
+ def _verify_filenames(epstein_files):
9
+ doc_filenames = set([doc.file_path.name for doc in epstein_files.all_documents])
10
+
11
+ for file_path in epstein_files.all_files:
12
+ if file_path.name not in doc_filenames:
13
+ print(f"'{file_path}' is not in list of {len(doc_filenames)} Document obj filenames!")
@@ -0,0 +1,21 @@
1
+ """Helpers for dealing with environment variables."""
2
+ from os import environ
3
+ from pathlib import Path
4
+
5
+ from epstein_files.util.logging import exit_with_error, logger
6
+
7
+
8
+ def get_env_dir(env_var_name: str, must_exist: bool = True) -> Path | None:
9
+ if (dir := environ.get(env_var_name)):
10
+ dir = Path(dir)
11
+ error_msg = f"env var {env_var_name} set to '{dir}' but that's not a directory"
12
+
13
+ if dir.is_dir():
14
+ return dir.resolve()
15
+ elif must_exist:
16
+ exit_with_error(f"Required {error_msg}.\n")
17
+ else:
18
+ logger.warning(f"Optional {error_msg}. Some features will be unavailable.")
19
+ return None
20
+ else:
21
+ logger.warning(f"Optional env var {env_var_name} not set. Some features will be unavailable.")
@@ -7,11 +7,11 @@ from rich.console import Console
7
7
  from rich.highlighter import RegexHighlighter
8
8
  from rich.text import Text
9
9
 
10
+ from epstein_files.documents.emails.emailers import EMAILER_ID_REGEXES
10
11
  from epstein_files.util.constant.names import *
11
12
  from epstein_files.util.constant.strings import *
12
13
  from epstein_files.util.constant.urls import ARCHIVE_LINK_COLOR
13
- from epstein_files.util.constants import (EMAILER_ID_REGEXES, EPSTEIN_V_ROTHSTEIN_EDWARDS,
14
- OSBORNE_LLP, REPLY_REGEX, SENT_FROM_REGEX)
14
+ from epstein_files.util.constants import EPSTEIN_V_ROTHSTEIN_EDWARDS, OSBORNE_LLP, REPLY_REGEX, SENT_FROM_REGEX
15
15
  from epstein_files.util.data import sort_dict, without_falsey
16
16
  from epstein_files.util.doc_cfg import *
17
17
  from epstein_files.util.env import args
@@ -372,6 +372,8 @@ HIGHLIGHTED_NAMES = [
372
372
  label='bitcoin',
373
373
  style='orange1 bold',
374
374
  emailers={
375
+ AMIR_TAAKI: f"bitcoin bro, partner of {BROCK_PIERCE} (?)",
376
+ BROCK_PIERCE: 'crypto bro, Bannon business partner, Tether co-founder, arrested in house full of machine guns & "8,000 fotografias de pornopgraphia infantil"',
375
377
  JEFFREY_WERNICK: 'former COO of Parler, involved in numerous crypto companies like Bitforex',
376
378
  JEREMY_RUBIN: 'developer/researcher',
377
379
  JOI_ITO: f"former head of {MIT_MEDIA_LAB} and MIT Digital Currency Initiative",
@@ -379,11 +381,12 @@ HIGHLIGHTED_NAMES = [
379
381
  },
380
382
  patterns=[
381
383
  r"Balaji",
384
+ r"Bart\s*Stephens", # co-founder, Blockchain Capital
385
+ r"Bioptix", # Now RIOT Blockchain
382
386
  r"bitcoin(\s*Foundation)?",
383
- r"block ?chain(\s*capital)?",
384
387
  r"Brian Forde",
385
388
  r"Brock(\s*Pierce)?",
386
- r"coins?",
389
+ r"coins?(\s*Center)?",
387
390
  r"Cory\s*Fields", # bitcoin dev
388
391
  r"cr[iy]?pto(currenc(y|ies))?",
389
392
  r"Digital\s*Currenc(ies|y)(\s*Initiative)?",
@@ -395,12 +398,16 @@ HIGHLIGHTED_NAMES = [
395
398
  r"Madars",
396
399
  r"Mi(chael|ke)\s*Novogratz",
397
400
  r"(Patrick\s*)?Murck",
398
- r"Ron Rivest",
401
+ r"(Hester\s*)?Peirce",
402
+ r"(?-i:RIOT)", # (?-i:) makes it case sensitive
403
+ r"Ron\s*Rivest",
404
+ r"block ?chain(\s*capital)?",
399
405
  r"(Ross\s*)?Ulbricht",
400
406
  r"Silk\s*Road",
401
407
  r"SpanCash",
402
408
  r"Tether",
403
409
  r"virtual\s*currenc(ies|y)",
410
+ r"Wire\s*ca\n?rd",
404
411
  r"Wladimir( van der Laan)?", # bitcoin dev
405
412
  r"(zero\s+knowledge\s+|zk)pro(of|tocols?)",
406
413
  ],
@@ -422,6 +429,7 @@ HIGHLIGHTED_NAMES = [
422
429
  },
423
430
  patterns=[
424
431
  r"Arthur Klein",
432
+ r"(Barry\s*)?Honig",
425
433
  r"((Bill|David)\s*)?Koch(\s*(Bro(s|thers)|Industries))?",
426
434
  r"Gruterite",
427
435
  r"((John|Patricia)\s*)?Kluge",
@@ -575,6 +583,7 @@ HIGHLIGHTED_NAMES = [
575
583
  patterns=[
576
584
  r"JEGE(\s*Inc)?",
577
585
  r"LSJ",
586
+ r"Zorro(\s*Ranch)?",
578
587
  ],
579
588
  ),
580
589
  HighlightedNames(
@@ -586,6 +595,7 @@ HIGHLIGHTED_NAMES = [
586
595
  ARDA_BESKARDES: 'NYC immigration attorney allegedly involved in sex-trafficking operations',
587
596
  BENNET_MOSKOWITZ: f'represented the {EPSTEIN_ESTATE_EXECUTOR}s',
588
597
  BRAD_KARP: 'head of the law firm Paul Weiss',
598
+ CHRISTIAN_EVERDELL: f"{GHISLAINE_MAXWELL}'s lawyer ca. 2021, Cohen & Gresser",
589
599
  'Connie Zaguirre': f"office of {ROBERT_D_CRITTON_JR}",
590
600
  DAVID_SCHOEN: f"{CRIMINAL_DEFENSE_ATTORNEY} after 2019 arrest",
591
601
  DEBBIE_FEIN: EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY,
@@ -606,6 +616,7 @@ HIGHLIGHTED_NAMES = [
606
616
  'Robert Gold': 'helped Epstein track down money belonging to Spanish families',
607
617
  'Roy Black': CRIMINAL_DEFENSE_2008,
608
618
  SCOTT_J_LINK: CRIMINAL_DEFENSE_ATTORNEY,
619
+ STACEY_RICHMAN: f"New York {CRIMINAL_DEFENSE_ATTORNEY}",
609
620
  TONJA_HADDAD_COLEMAN: f'{EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY}', # relation of Fred Haddad?
610
621
  },
611
622
  patterns=[
@@ -617,6 +628,7 @@ HIGHLIGHTED_NAMES = [
617
628
  r"Michael J. Pike",
618
629
  r"Paul,?\s*Weiss",
619
630
  r"Steptoe(\s*& Johnson)?(\s*LLP)?",
631
+ r"Sull(ivan)?\s*(&|and)?\s*Crom(well)?",
620
632
  r"Wein(berg|garten)",
621
633
  ],
622
634
  ),
@@ -645,6 +657,7 @@ HIGHLIGHTED_NAMES = [
645
657
  },
646
658
  patterns=[
647
659
  r"AfD",
660
+ r"Alfa(\s*Bank)",
648
661
  r"(Angela )?Merk(el|le)",
649
662
  r"Austria",
650
663
  r"Belgi(an|um)",
@@ -776,8 +789,11 @@ HIGHLIGHTED_NAMES = [
776
789
  r"(Ray\s*)?Dalio",
777
790
  r"(Richard\s*)?LeFrak",
778
791
  r"Rockefeller(?! University)(\s*Foundation)?",
779
- r"(Ste(phen|ve)\s*)?Schwart?z?man",
792
+ r"SBNY",
780
793
  r"Serageldin",
794
+ r"Signature\s*Bank",
795
+ r"(Ste(phen|ve)\s*)?Schwart?z?man",
796
+ r"Susquehanna",
781
797
  r"UBS",
782
798
  r"us.gio@jpmorgan.com",
783
799
  r"Wall\s*Street(?!\s*Jour)",
@@ -805,7 +821,9 @@ HIGHLIGHTED_NAMES = [
805
821
  label='government',
806
822
  style='color(24) bold',
807
823
  emailers={
824
+ ALISON_J_NATHAN: "judge in New York's Southern District",
808
825
  ANN_MARIE_VILLAFANA: 'Southern District of Florida (SDFL) U.S. Attorney',
826
+ CHRISTOPHER_DILORIO: 'self described whistleblower',
809
827
  DANNY_FROST: 'Director of Communications at Manhattan D.A.',
810
828
  'Police Code Enforcement': f"{PALM_BEACH} buildings code enforcement",
811
829
  },
@@ -815,7 +833,9 @@ HIGHLIGHTED_NAMES = [
815
833
  r'Alice\s*Fisher|Fisher, Alice',
816
834
  r"AML",
817
835
  r"(Andrew\s*)?(McCabe|Natsios)",
836
+ r"(Assistant\s+)?State\s*Attorney",
818
837
  r"Attorney General",
838
+ r'Barbara\s*Burns', # AUSA
819
839
  r"((Bob|Robert)\s*)?Mueller",
820
840
  r"(Byung\s)?Pak",
821
841
  r"Case 1:19-cv-03377(-LAP)?",
@@ -828,13 +848,14 @@ HIGHLIGHTED_NAMES = [
828
848
  r"Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)",
829
849
  r"DHS",
830
850
  r"DOJ",
851
+ r"EDGAR (Filing|Search)", # SEC database is EDGAR
831
852
  r"FBI",
832
853
  r"FCPA",
833
854
  r"FDIC",
834
855
  r"FDLE",
835
856
  r"Federal\s*Bureau\s*of\s*Investigation",
836
857
  r"FinCEN",
837
- r"FINRA",
858
+ r"(www\.)?FINRA(\.org)?",
838
859
  r"FOIA",
839
860
  r"FTC",
840
861
  r"(General\s*)?P(a|e)traeus",
@@ -857,11 +878,12 @@ HIGHLIGHTED_NAMES = [
857
878
  r"(Michael\s*)?Reiter",
858
879
  r"OGE",
859
880
  r"Office\s*of\s*Government\s*Ethics",
881
+ r"PBPD",
860
882
  r"police",
861
883
  r"(Preet\s*)?Bharara",
862
884
  r"SCOTUS",
863
885
  r"SD(FL|NY)",
864
- r"SEC",
886
+ r"SEC(\.gov)?",
865
887
  r"Secret\s*Service",
866
888
  r"Securities\s*and\s*Exchange\s*Commission",
867
889
  r"Southern\s*District(\s*of\s*(Florida|New\s*York))?",
@@ -936,6 +958,7 @@ HIGHLIGHTED_NAMES = [
936
958
  r"(eh|(Ehud|Nili Priell)\s*)?barak",
937
959
  r"EB",
938
960
  r"Ehud\s*Barack",
961
+ r"Hapoalim",
939
962
  r"Israeli?",
940
963
  r"Jerusalem",
941
964
  r"J\s*Street",
@@ -1143,6 +1166,7 @@ HIGHLIGHTED_NAMES = [
1143
1166
  r"Colorado",
1144
1167
  r"Connecticut",
1145
1168
  r"Florida",
1169
+ r"Jersey\s*City",
1146
1170
  r"Los Angeles",
1147
1171
  r"Loudoun\s*County?",
1148
1172
  r"Martha's\s*Vineyard",
@@ -1174,7 +1198,7 @@ HIGHLIGHTED_NAMES = [
1174
1198
  style='dark_sea_green4',
1175
1199
  emailers={
1176
1200
  ANAS_ALRASHEED: 'former information minister of Kuwait (???)',
1177
- AZIZA_ALAHMADI: 'Abu Dhabi Department of Culture & Tourism',
1201
+ AZIZA_ALAHMADI: 'Abu Dhabi Department of Culture & Tourism, assistant of Al Sabbagh',
1178
1202
  RAAFAT_ALSABBAGH: 'Saudi royal advisor',
1179
1203
  SHAHER_ABDULHAK_BESHER: 'Yemeni billionaire',
1180
1204
  },
@@ -1348,6 +1372,8 @@ HIGHLIGHTED_NAMES = [
1348
1372
  r"George\s*(H\.?\s*)?(W\.?\s*)?Bush",
1349
1373
  r"(George\s*)?Nader",
1350
1374
  r"GOP",
1375
+ r"((Chair|Jay|Joseph)\s*)?Clayton", # SEC chair, now SDNY
1376
+ r"((Bill|William)\s*)?Hinman"
1351
1377
  r"Jeff(rey)?\s*Sessions",
1352
1378
  r"(John\s*(R.?\s*)?)?Bolton",
1353
1379
  r"Kasich",
@@ -1399,7 +1425,7 @@ HIGHLIGHTED_NAMES = [
1399
1425
  emailers={
1400
1426
  'Dasha Zhukova': 'art collector, daughter of Alexander Zhukov',
1401
1427
  MASHA_DROKOVA: 'silicon valley VC, former Putin Youth member',
1402
- RENATA_BOLOTOVA: 'former model, fund manager at New York State Insurance Fund',
1428
+ RENATA_BOLOTOVA: 'former model, fund manager at New York State Insurance Fund, Рената Болотова',
1403
1429
  SVETLANA_POZHIDAEVA: "Epstein's Russian assistant who was recommended for a visa by Sergei Belyakov (FSB) and David Blaine",
1404
1430
  },
1405
1431
  patterns=[
@@ -1418,17 +1444,19 @@ HIGHLIGHTED_NAMES = [
1418
1444
  r"(Anastasia\s*)?Kuznetsova",
1419
1445
  r"Lavrov",
1420
1446
  r"Lukoil",
1447
+ r'(Semion\s*)?Mogilevich',
1421
1448
  r"Moscow",
1422
1449
  r"(Natalia\s*)?Veselnitskaya",
1423
1450
  r"(Oleg\s*)?Deripaska",
1424
1451
  r"Oleksandr Vilkul",
1425
1452
  r"Onexim", # Prokhorov investment vehicle
1426
1453
  r"Prokhorov",
1454
+ r"Rakishev",
1427
1455
  r"Rosneft",
1428
1456
  r"RT",
1429
1457
  r"St.?\s*?Petersburg",
1430
1458
  r'Svet',
1431
- r"Russian?",
1459
+ r"Russ?ian?",
1432
1460
  r"Sberbank",
1433
1461
  r"Soviet(\s*Union)?",
1434
1462
  r"USSR",
@@ -1513,6 +1541,7 @@ HIGHLIGHTED_NAMES = [
1513
1541
  r"@?realDonaldTrump",
1514
1542
  r"(Alan\s*)?Weiss?elberg",
1515
1543
  r"Alex\s*Jones",
1544
+ r"(Brad(ley)?\s*)Parscale",
1516
1545
  r"\bDJ?T\b",
1517
1546
  r"Donald J. Tramp",
1518
1547
  r"(Donald\s+(J\.\s+)?)?Trump(ism|\s*(Org(anization)?|Properties)(\s*LLC)?)?",
@@ -1559,7 +1588,7 @@ HIGHLIGHTED_NAMES = [
1559
1588
  r"S(ain)?t.?\s*Thomas",
1560
1589
  r"USVI",
1561
1590
  r"(?<!stein |vis-a-)VI(?!s-a-)",
1562
- r"(The\s*)?Virgin\s*Is(al|la)nds(\s*Daily\s*News)?", # Hard to make this work right
1591
+ r"(The\s*)?Virgin\s*Is(al|la)nd?s(\s*Daily\s*News)?", # Hard to make this work right
1563
1592
  r"(West\s*)?Palm\s*Beach(\s*County)?(?!\s*(Daily|Post))",
1564
1593
  ],
1565
1594
  ),
@@ -1572,6 +1601,7 @@ HIGHLIGHTED_NAMES = [
1572
1601
  r"(Gloria\s*)?Allred",
1573
1602
  r"(Jane|Tiffany)\s*Doe",
1574
1603
  r"Katie\s*Johnson",
1604
+ r"Minor\s*Victim",
1575
1605
  r"pedophile",
1576
1606
  r"Stephanie\s*Clifford",
1577
1607
  r"Stormy\s*Daniels",
@@ -1584,7 +1614,9 @@ HIGHLIGHTED_NAMES = [
1584
1614
  style='medium_orchid1',
1585
1615
  emailers={
1586
1616
  BRAD_EDWARDS: ROTHSTEIN_ROSENFELDT_ADLER,
1617
+ 'Douglas Wigdor': f'lawsuit against {LEON_BLACK}, Wigdor LLP',
1587
1618
  'Grant J. Smith': ROTHSTEIN_ROSENFELDT_ADLER,
1619
+ 'Jeanne M. Christensen': f'lawsuit against {LEON_BLACK}, Wigdor LLP',
1588
1620
  JACK_SCAROLA: 'Searcy Denney Scarola Barnhart & Shipley',
1589
1621
  KEN_JENNE: ROTHSTEIN_ROSENFELDT_ADLER,
1590
1622
  },
@@ -1595,6 +1627,7 @@ HIGHLIGHTED_NAMES = [
1595
1627
  r"Paul\s*(G.\s*)?Cassell",
1596
1628
  r"Rothstein\s*Rosenfeldt\s*Adler",
1597
1629
  r"(Scott\s*)?Rothstein",
1630
+ r"Wigdor(Law)?",
1598
1631
  ],
1599
1632
  ),
1600
1633
  HighlightedNames(
@@ -1621,7 +1654,11 @@ HIGHLIGHTED_NAMES = [
1621
1654
  HighlightedNames(
1622
1655
  emailers={GHISLAINE_MAXWELL: "Epstein's girlfriend, daughter of the spy Robert Maxwell"},
1623
1656
  category='Epstein',
1624
- patterns=[r"gmax(1@ellmax.com)?", r"(The )?TerraMar Project"],
1657
+ patterns=[
1658
+ r"gmax(1@ellmax.com)?",
1659
+ r"(The )?TerraMar Project",
1660
+ r"(Scott\s*)?Borgenson",
1661
+ ],
1625
1662
  style='deep_pink3',
1626
1663
  ),
1627
1664
  HighlightedNames(emailers={JABOR_Y: '"an influential man in Qatar"'}, category=MIDEAST, style='spring_green1'),
@@ -1658,7 +1695,7 @@ HIGHLIGHTED_TEXTS = [
1658
1695
  HighlightedText(
1659
1696
  label='header_field',
1660
1697
  style='plum4',
1661
- patterns=[r'^[>• ]{,4}(Date ?|From|Sent|To|C[cC]|Importance|Reply[- ]?To|Subject|Bee|B[cC]{2}|Attachments|Flag|Classification|((A|Debut du message transfer[&e]|De(stinataire)?|Envoye|Expe(cl|d)iteur|Objet|Q|Sujet) ?)):|^on behalf of'],
1698
+ patterns=[r'^[>• ]{,4}(Date ?|From|Sent|To|C[cC]|Importance|Reply[- ]?To|Subject|Bee|B[cC]{2}|Attachments|Flag|Classification|[Il]nline-[Il]mages|((A|Debut du message transfer[&e]|De(stinataire)?|Envoye|Expe(cl|d)iteur|Objet|Q|Sujet) ?)):|^on behalf of'],
1662
1699
  ),
1663
1700
  HighlightedText(
1664
1701
  label='http_links',
@@ -1668,7 +1705,11 @@ HIGHLIGHTED_TEXTS = [
1668
1705
  HighlightedText(
1669
1706
  label='quoted_reply_line',
1670
1707
  style='dim',
1671
- patterns=[REPLY_REGEX.pattern, r"^(> )?wrote:$"],
1708
+ patterns=[
1709
+ REPLY_REGEX.pattern, r"^(> )?wrote:$",
1710
+ r"CONFIDENTIAL FOR ATTORNEY'S EYES ONLY(\nDO NOT COPY)?",
1711
+ r"PRIVILEGED ?- ?ATTORNEY WORK.*(\nCONFIDENTIAL - SUBJECT TO.*)?",
1712
+ ],
1672
1713
  ),
1673
1714
  HighlightedText(
1674
1715
  label='redacted',
@@ -1695,7 +1736,7 @@ HIGHLIGHTED_TEXTS = [
1695
1736
  ManualHighlight(
1696
1737
  label='email_attachments',
1697
1738
  style='gray30 italic',
1698
- pattern=r"^(> )?Attachments: (?P<email_attachments>.*)",
1739
+ pattern=r"^(> )?(Attachments|[Il]nline-[Il]mages): (?P<email_attachments>.*)",
1699
1740
  ),
1700
1741
  ManualHighlight(
1701
1742
  label='email_timestamp',
@@ -0,0 +1,26 @@
1
+ from rich.table import Table
2
+ from rich.text import Text
3
+
4
+ from epstein_files.util.rich import highlighter
5
+
6
+ HEADER_INDENT = Text(' ')
7
+ VERTICAL_BAR = '┃ ' # ⎹┃┇┋❘⦀🁢⏐┃⎹
8
+ TOP_BAR = '🁢 '
9
+
10
+
11
+ class LeftBarPanel(Table):
12
+ """Create a faux `Panel` that just has a single vertical line down the left side."""
13
+ @classmethod
14
+ def build(cls, text: str | Text, bar_style: str, header: str | Text = ''):
15
+ table = cls.grid(padding=0)
16
+ table.add_column(justify='left', style=bar_style) # Column for the line
17
+ table.add_column(justify='left') # Column for content
18
+
19
+ if header:
20
+ table.add_row(TOP_BAR, header)
21
+ table.add_row(VERTICAL_BAR, '')
22
+
23
+ for txt_line in highlighter(text).split('\n'):
24
+ table.add_row(VERTICAL_BAR, txt_line)
25
+
26
+ return table
@@ -8,6 +8,7 @@ from rich.console import Console
8
8
  from rich.highlighter import ReprHighlighter
9
9
  from rich.logging import RichHandler
10
10
  from rich.theme import Theme
11
+ from yaralyzer.util.helpers.env_helper import console_width_possibilities
11
12
 
12
13
  from epstein_files.util.constant.strings import *
13
14
 
@@ -15,6 +16,7 @@ FILENAME_STYLE = 'gray27'
15
16
 
16
17
  DOC_TYPE_STYLES = {
17
18
  DOCUMENT_CLASS: 'grey69',
19
+ DOJ_FILE_CLASS: 'magenta',
18
20
  EMAIL_CLASS: 'dark_orange3',
19
21
  JSON_FILE_CLASS: 'sandy_brown',
20
22
  MESSENGER_LOG_CLASS: 'deep_pink4',
@@ -27,29 +29,48 @@ LOG_THEME = {
27
29
  }
28
30
 
29
31
  LOG_THEME[f"{ReprHighlighter.base_style}epstein_filename"] = FILENAME_STYLE
30
- LOG_LEVEL_ENV_VAR = 'LOG_LEVEL'
32
+ LOG_LEVEL_ENV_VAR = 'EPSTEIN_LOG_LEVEL'
31
33
 
32
34
 
33
35
  # Augment the standard log highlighter with 'epstein_filename' matcher
34
36
  class LogHighlighter(ReprHighlighter):
35
37
  highlights = ReprHighlighter.highlights + [
36
38
  *[fr"(?P<{doc_type}>{doc_type}(Cfg|s)?)" for doc_type in DOC_TYPE_STYLES.keys()],
37
- "(?P<epstein_filename>" + FILE_NAME_REGEX.pattern + ')',
39
+ "(?P<epstein_filename>" + '|'.join([HOUSE_OVERSIGHT_NOV_2025_FILE_NAME_REGEX.pattern, DOJ_FILE_NAME_REGEX.pattern]) + ')',
38
40
  ]
39
41
 
42
+ log_console = Console(
43
+ color_system='256',
44
+ stderr=True,
45
+ theme=Theme(LOG_THEME),
46
+ width=max(console_width_possibilities())
47
+ )
40
48
 
41
- log_console = Console(color_system='256', theme=Theme(LOG_THEME))
42
- log_handler = RichHandler(console=log_console, highlighter=LogHighlighter())
49
+
50
+ log_handler = RichHandler(console=log_console, highlighter=LogHighlighter(), show_path=False)
43
51
  logging.basicConfig(level="NOTSET", format="%(message)s", datefmt=" ", handlers=[log_handler])
44
- logger = logging.getLogger("rich")
52
+ logger = logging.getLogger(__name__)
53
+ logger = logging.getLogger("epstein_text_files")
45
54
 
46
55
 
47
- # Set log levels to suppress annoying output
56
+ # Set log levels to suppress annoying output from other packages
48
57
  logging.getLogger('datefinder').setLevel(logging.FATAL)
49
58
  logging.getLogger('rich_argparse').setLevel(logging.FATAL)
50
59
  env_log_level_str = environ.get(LOG_LEVEL_ENV_VAR) or None
51
60
  env_log_level = None
52
61
 
62
+
63
+ def exit_with_error(msg: str) -> None:
64
+ print('')
65
+ logger.error(msg + '\n')
66
+ exit(1)
67
+
68
+
69
+ def set_log_level(log_level: int | str) -> None:
70
+ for lg in [logger] + logger.handlers:
71
+ lg.setLevel(log_level)
72
+
73
+
53
74
  if env_log_level_str:
54
75
  try:
55
76
  env_log_level = getattr(logging, env_log_level_str)
@@ -58,10 +79,4 @@ if env_log_level_str:
58
79
  env_log_level = logging.DEBUG
59
80
 
60
81
  logger.warning(f"Setting log level to {env_log_level} based on {LOG_LEVEL_ENV_VAR} env var...")
61
- logger.setLevel(env_log_level)
62
-
63
-
64
- def exit_with_error(msg: str) -> None:
65
- print('')
66
- logger.error(msg + '\n')
67
- exit(1)
82
+ set_log_level(env_log_level)