epstein-files 1.2.5__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. epstein_files/__init__.py +55 -23
  2. epstein_files/documents/communication.py +9 -5
  3. epstein_files/documents/document.py +231 -135
  4. epstein_files/documents/doj_file.py +242 -0
  5. epstein_files/documents/doj_files/full_text.py +166 -0
  6. epstein_files/documents/email.py +289 -232
  7. epstein_files/documents/emails/email_header.py +35 -16
  8. epstein_files/documents/emails/emailers.py +223 -0
  9. epstein_files/documents/imessage/text_message.py +2 -3
  10. epstein_files/documents/json_file.py +18 -14
  11. epstein_files/documents/messenger_log.py +23 -39
  12. epstein_files/documents/other_file.py +54 -48
  13. epstein_files/epstein_files.py +65 -29
  14. epstein_files/person.py +151 -94
  15. epstein_files/util/constant/names.py +37 -10
  16. epstein_files/util/constant/output_files.py +2 -0
  17. epstein_files/util/constant/strings.py +14 -7
  18. epstein_files/util/constant/urls.py +17 -0
  19. epstein_files/util/constants.py +556 -391
  20. epstein_files/util/data.py +2 -0
  21. epstein_files/util/doc_cfg.py +44 -33
  22. epstein_files/util/env.py +34 -19
  23. epstein_files/util/file_helper.py +30 -6
  24. epstein_files/util/helpers/debugging_helper.py +13 -0
  25. epstein_files/util/helpers/env_helpers.py +21 -0
  26. epstein_files/util/highlighted_group.py +121 -37
  27. epstein_files/util/layout/left_bar_panel.py +26 -0
  28. epstein_files/util/logging.py +28 -13
  29. epstein_files/util/output.py +49 -40
  30. epstein_files/util/rich.py +30 -3
  31. epstein_files/util/word_count.py +7 -7
  32. {epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/METADATA +16 -3
  33. epstein_files-1.5.0.dist-info/RECORD +40 -0
  34. {epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/entry_points.txt +1 -1
  35. epstein_files-1.2.5.dist-info/RECORD +0 -34
  36. {epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/LICENSE +0 -0
  37. {epstein_files-1.2.5.dist-info → epstein_files-1.5.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,26 @@
1
+ from rich.table import Table
2
+ from rich.text import Text
3
+
4
+ from epstein_files.util.rich import highlighter
5
+
6
+ HEADER_INDENT = Text(' ')
7
+ VERTICAL_BAR = '┃ ' # ⎹┃┇┋❘⦀🁢⏐┃⎹
8
+ TOP_BAR = '🁢 '
9
+
10
+
11
+ class LeftBarPanel(Table):
12
+ """Create a faux `Panel` that just has a single vertical line down the left side."""
13
+ @classmethod
14
+ def build(cls, text: str | Text, bar_style: str, header: str | Text = ''):
15
+ table = cls.grid(padding=0)
16
+ table.add_column(justify='left', style=bar_style) # Column for the line
17
+ table.add_column(justify='left') # Column for content
18
+
19
+ if header:
20
+ table.add_row(TOP_BAR, header)
21
+ table.add_row(VERTICAL_BAR, '')
22
+
23
+ for txt_line in highlighter(text).split('\n'):
24
+ table.add_row(VERTICAL_BAR, txt_line)
25
+
26
+ return table
@@ -8,6 +8,7 @@ from rich.console import Console
8
8
  from rich.highlighter import ReprHighlighter
9
9
  from rich.logging import RichHandler
10
10
  from rich.theme import Theme
11
+ from yaralyzer.util.helpers.env_helper import console_width_possibilities
11
12
 
12
13
  from epstein_files.util.constant.strings import *
13
14
 
@@ -15,6 +16,7 @@ FILENAME_STYLE = 'gray27'
15
16
 
16
17
  DOC_TYPE_STYLES = {
17
18
  DOCUMENT_CLASS: 'grey69',
19
+ DOJ_FILE_CLASS: 'magenta',
18
20
  EMAIL_CLASS: 'dark_orange3',
19
21
  JSON_FILE_CLASS: 'sandy_brown',
20
22
  MESSENGER_LOG_CLASS: 'deep_pink4',
@@ -27,29 +29,48 @@ LOG_THEME = {
27
29
  }
28
30
 
29
31
  LOG_THEME[f"{ReprHighlighter.base_style}epstein_filename"] = FILENAME_STYLE
30
- LOG_LEVEL_ENV_VAR = 'LOG_LEVEL'
32
+ LOG_LEVEL_ENV_VAR = 'EPSTEIN_LOG_LEVEL'
31
33
 
32
34
 
33
35
  # Augment the standard log highlighter with 'epstein_filename' matcher
34
36
  class LogHighlighter(ReprHighlighter):
35
37
  highlights = ReprHighlighter.highlights + [
36
38
  *[fr"(?P<{doc_type}>{doc_type}(Cfg|s)?)" for doc_type in DOC_TYPE_STYLES.keys()],
37
- "(?P<epstein_filename>" + FILE_NAME_REGEX.pattern + ')',
39
+ "(?P<epstein_filename>" + '|'.join([HOUSE_OVERSIGHT_NOV_2025_FILE_NAME_REGEX.pattern, DOJ_FILE_NAME_REGEX.pattern]) + ')',
38
40
  ]
39
41
 
42
+ log_console = Console(
43
+ color_system='256',
44
+ stderr=True,
45
+ theme=Theme(LOG_THEME),
46
+ width=max(console_width_possibilities())
47
+ )
40
48
 
41
- log_console = Console(color_system='256', theme=Theme(LOG_THEME))
42
- log_handler = RichHandler(console=log_console, highlighter=LogHighlighter())
49
+
50
+ log_handler = RichHandler(console=log_console, highlighter=LogHighlighter(), show_path=False)
43
51
  logging.basicConfig(level="NOTSET", format="%(message)s", datefmt=" ", handlers=[log_handler])
44
- logger = logging.getLogger("rich")
52
+ logger = logging.getLogger(__name__)
53
+ logger = logging.getLogger("epstein_text_files")
45
54
 
46
55
 
47
- # Set log levels to suppress annoying output
56
+ # Set log levels to suppress annoying output from other packages
48
57
  logging.getLogger('datefinder').setLevel(logging.FATAL)
49
58
  logging.getLogger('rich_argparse').setLevel(logging.FATAL)
50
59
  env_log_level_str = environ.get(LOG_LEVEL_ENV_VAR) or None
51
60
  env_log_level = None
52
61
 
62
+
63
+ def exit_with_error(msg: str) -> None:
64
+ print('')
65
+ logger.error(msg + '\n')
66
+ exit(1)
67
+
68
+
69
+ def set_log_level(log_level: int | str) -> None:
70
+ for lg in [logger] + logger.handlers:
71
+ lg.setLevel(log_level)
72
+
73
+
53
74
  if env_log_level_str:
54
75
  try:
55
76
  env_log_level = getattr(logging, env_log_level_str)
@@ -58,10 +79,4 @@ if env_log_level_str:
58
79
  env_log_level = logging.DEBUG
59
80
 
60
81
  logger.warning(f"Setting log level to {env_log_level} based on {LOG_LEVEL_ENV_VAR} env var...")
61
- logger.setLevel(env_log_level)
62
-
63
-
64
- def exit_with_error(msg: str) -> None:
65
- print('')
66
- logger.error(msg + '\n')
67
- exit(1)
82
+ set_log_level(env_log_level)
@@ -1,11 +1,13 @@
1
1
  import json
2
2
  from os import unlink
3
+ from subprocess import CalledProcessError, check_output
3
4
  from typing import cast
4
5
 
5
6
  from rich.padding import Padding
6
7
 
7
8
  from epstein_files.documents.document import Document
8
- from epstein_files.documents.email import INTERESTING_TRUNCATION_LENGTHS, Email
9
+ from epstein_files.documents.doj_file import DojFile
10
+ from epstein_files.documents.email import Email
9
11
  from epstein_files.documents.messenger_log import MessengerLog
10
12
  from epstein_files.documents.other_file import FIRST_FEW_LINES, OtherFile
11
13
  from epstein_files.epstein_files import EpsteinFiles, count_by_month
@@ -29,6 +31,8 @@ PRINT_COLOR_KEY_EVERY_N_EMAILS = 150
29
31
 
30
32
  # Order matters. Default names to print emails for.
31
33
  DEFAULT_EMAILERS = [
34
+ BROCK_PIERCE,
35
+ AMIR_TAAKI,
32
36
  JEREMY_RUBIN,
33
37
  JABOR_Y,
34
38
  JOI_ITO,
@@ -42,7 +46,6 @@ DEFAULT_EMAILERS = [
42
46
  EHUD_BARAK,
43
47
  STEVE_BANNON,
44
48
  TYLER_SHEARS,
45
- JIDE_ZEITLIN,
46
49
  CHRISTINA_GALBRAITH,
47
50
  MOHAMED_WAHEED_HASSAN,
48
51
  JENNIFER_JACQUET,
@@ -53,40 +56,35 @@ DEFAULT_EMAILERS = [
53
56
  JEFFREY_EPSTEIN,
54
57
  ]
55
58
 
56
- INTERESTING_EMAIL_IDS = [
57
- '032229', # Michael Wolff on strategy
58
- '028784', # seminars: Money / Power
59
- '030630', # 'What happens with zubair's project?'
60
- '033178', # 'How is it going with Zubair?'
61
- '022396', # Ukraine friend
62
- '026505', # I know how dirty trump is
63
- '029679', # Trump's driver was the bag man
64
- '026258', '026260', # Bannon cripto coin issues
65
- '032359', # Jabor e-currency
66
- '031451', '031596', # "would you like photso of donald and girls in bikinis in my kitchen."
67
- '031601', # Old gf i gave to donald
68
- '030727', # David Stern "Death of chinese shareholder quite an issue. What can we do with Qataris here?"
69
- '030725', # David Stern in Moscow
70
- '029098', # Nowak, "her Skype contact is in moscow."
71
- '030714', # Bannon, Russian Dugan shout out
72
- '031659', # "i have met some very bad people „ none as bad as trump"
73
- ]
74
-
75
- for id in INTERESTING_TRUNCATION_LENGTHS:
76
- if id not in INTERESTING_EMAIL_IDS:
77
- logger.debug(f"Adding INTERESTING_TRUNCATION_LENGTHS ID {id} to INTERESTING_EMAIL_IDS")
78
- INTERESTING_EMAIL_IDS.append(id)
79
-
80
-
81
59
  INTERESTING_TEXT_IDS = [
82
60
  '027275', # "Crypto- Kerry- Qatar -sessions"
83
61
  '027165', # melaniee walker crypto health
84
62
  ]
85
63
 
86
64
 
65
+ def print_doj_files(epstein_files: EpsteinFiles) -> list[DojFile | Email]:
66
+ last_was_empty = False
67
+ printed_doj_files: list[DojFile | Email] = []
68
+
69
+ for doj_file in Document.sort_by_timestamp(epstein_files.all_doj_files):
70
+ if isinstance(doj_file, DojFile) and (doj_file.is_empty or doj_file.is_bad_ocr):
71
+ console.print(doj_file.image_with_no_text_msg(), style='dim')
72
+ last_was_empty = True
73
+ continue
74
+
75
+ if last_was_empty:
76
+ console.line()
77
+
78
+ console.print(doj_file)
79
+ last_was_empty = False
80
+ printed_doj_files.append(doj_file)
81
+
82
+ return printed_doj_files
83
+
84
+
87
85
  def print_email_timeline(epstein_files: EpsteinFiles) -> None:
88
86
  """Print a table of all emails in chronological order."""
89
- emails = Document.sort_by_timestamp([e for e in epstein_files.non_duplicate_emails() if not e.is_mailing_list()])
87
+ emails = Document.sort_by_timestamp([e for e in epstein_files.non_duplicate_emails() if not e.is_mailing_list])
90
88
  title = f'Table of All {len(emails):,} Non-Junk Emails in Chronological Order (actual emails below)'
91
89
  table = Email.build_emails_table(emails, title=title, show_length=True)
92
90
  console.print(Padding(table, (2, 0)))
@@ -101,7 +99,7 @@ def print_emailers_info(epstein_files: EpsteinFiles) -> None:
101
99
  """Print tbe summary table of everyone in the files to an image."""
102
100
  print_color_key()
103
101
  console.line()
104
- all_emailers = sorted(epstein_files.emailers(), key=lambda person: person.sort_key())
102
+ all_emailers = sorted(epstein_files.emailers(), key=lambda person: person.sort_key)
105
103
  console.print(Person.emailer_info_table(all_emailers, show_epstein_total=True))
106
104
 
107
105
  if not args.build:
@@ -111,8 +109,17 @@ def print_emailers_info(epstein_files: EpsteinFiles) -> None:
111
109
  svg_path = f"{EMAILERS_TABLE_PNG_PATH}.svg"
112
110
  console.save_svg(svg_path, theme=HTML_TERMINAL_THEME, title="Epstein Emailers")
113
111
  log_file_write(svg_path)
114
- import cairosvg
115
- cairosvg.svg2png(url=svg_path, write_to=str(EMAILERS_TABLE_PNG_PATH))
112
+
113
+ try:
114
+ # Inkscape is better at converting svg to png
115
+ inkscape_cmd_args = ['inkscape', f'--export-filename={EMAILERS_TABLE_PNG_PATH}', svg_path]
116
+ logger.warning(f"Running inkscape cmd: {' '.join(inkscape_cmd_args)}")
117
+ check_output(inkscape_cmd_args)
118
+ except (CalledProcessError, FileNotFoundError) as e:
119
+ logger.error(f"Failed to convert svg to png with inkscape, falling back to cairosvg: {e}")
120
+ import cairosvg
121
+ cairosvg.svg2png(url=svg_path, write_to=str(EMAILERS_TABLE_PNG_PATH))
122
+
116
123
  log_file_write(EMAILERS_TABLE_PNG_PATH)
117
124
  unlink(svg_path)
118
125
 
@@ -120,7 +127,8 @@ def print_emailers_info(epstein_files: EpsteinFiles) -> None:
120
127
  def print_emails_section(epstein_files: EpsteinFiles) -> list[Email]:
121
128
  """Returns emails that were printed (may contain dupes if printed for both author and recipient)."""
122
129
  print_section_header(('Selections from ' if not args.all_emails else '') + 'His Emails')
123
- all_emailers = sorted(epstein_files.emailers(), key=lambda person: person.earliest_email_at())
130
+ all_emailers = sorted(epstein_files.emailers(), key=lambda person: person.earliest_email_at)
131
+ all_emails = Person.emails_from_people(all_emailers)
124
132
  num_emails_printed_since_last_color_key = 0
125
133
  printed_emails: list[Email] = []
126
134
  people_to_print: list[Person]
@@ -157,7 +165,8 @@ def print_emails_section(epstein_files: EpsteinFiles) -> list[Email]:
157
165
 
158
166
  # Print other interesting emails
159
167
  printed_email_ids = [email.file_id for email in printed_emails]
160
- extra_emails = [e for e in epstein_files.for_ids(INTERESTING_EMAIL_IDS) if e.file_id not in printed_email_ids]
168
+ extra_emails = [e for e in all_emails if e.is_interesting and e.file_id not in printed_email_ids]
169
+ logger.warning(f"Found {len(extra_emails)} extra_emails...")
161
170
 
162
171
  if len(extra_emails) > 0:
163
172
  print_subtitle_panel(OTHER_INTERESTING_EMAILS_SUBTITLE)
@@ -171,7 +180,7 @@ def print_emails_section(epstein_files: EpsteinFiles) -> list[Email]:
171
180
  _verify_all_emails_were_printed(epstein_files, printed_emails)
172
181
 
173
182
  _print_email_device_signature_info(epstein_files)
174
- fwded_articles = [e for e in printed_emails if e.config and e.is_fwded_article()]
183
+ fwded_articles = [e for e in printed_emails if e.config and e.is_fwded_article]
175
184
  log_msg = f"Rewrote {len(Email.rewritten_header_ids)} of {len(printed_emails)} email headers"
176
185
  logger.warning(f" -> {log_msg}, {len(fwded_articles)} of the Emails printed were forwarded articles.")
177
186
  return printed_emails
@@ -188,7 +197,7 @@ def print_json_files(epstein_files: EpsteinFiles):
188
197
  else:
189
198
  for json_file in epstein_files.json_files:
190
199
  console.line(2)
191
- console.print(json_file.summary_panel())
200
+ console.print(json_file.summary_panel)
192
201
  console.print_json(json_file.json_str(), indent=4, sort_keys=False)
193
202
 
194
203
 
@@ -203,7 +212,7 @@ def print_json_metadata(epstein_files: EpsteinFiles) -> None:
203
212
  console.print_json(json_str, indent=4, sort_keys=True)
204
213
 
205
214
 
206
- def print_json_stats(epstein_files: EpsteinFiles) -> None:
215
+ def print_stats(epstein_files: EpsteinFiles) -> None:
207
216
  console.line(5)
208
217
  console.print(Panel('JSON Stats Dump', expand=True, style='reverse bold'), '\n')
209
218
  print_json(f"MessengerLog Sender Counts", MessengerLog.count_authors(epstein_files.imessage_logs), skip_falsey=True)
@@ -213,15 +222,15 @@ def print_json_stats(epstein_files: EpsteinFiles) -> None:
213
222
  print_json("email_author_device_signatures", dict_sets_to_lists(epstein_files.email_authors_to_device_signatures()))
214
223
  print_json("email_sent_from_devices", dict_sets_to_lists(epstein_files.email_device_signatures_to_authors()))
215
224
  print_json("unknown_recipient_ids", epstein_files.unknown_recipient_ids())
216
- print_json("count_by_month", count_by_month(epstein_files.all_documents()))
225
+ print_json("count_by_month", count_by_month(epstein_files.all_documents))
217
226
 
218
227
 
219
228
  def print_other_files_section(epstein_files: EpsteinFiles) -> list[OtherFile]:
220
229
  """Returns the OtherFile objects that were interesting enough to print."""
221
230
  if args.uninteresting:
222
- files = [f for f in epstein_files.other_files if not f.is_interesting()]
231
+ files = [f for f in epstein_files.other_files if not f.is_interesting]
223
232
  else:
224
- files = [f for f in epstein_files.other_files if args.all_other_files or f.is_interesting()]
233
+ files = [f for f in epstein_files.other_files if args.all_other_files or f.is_interesting]
225
234
 
226
235
  title_pfx = '' if args.all_other_files else 'Selected '
227
236
  category_table = OtherFile.summary_table(files, title_pfx=title_pfx)
@@ -308,7 +317,7 @@ def _verify_all_emails_were_printed(epstein_files: EpsteinFiles, already_printed
308
317
 
309
318
  for email in epstein_files.non_duplicate_emails():
310
319
  if email.file_id not in email_ids_that_were_printed:
311
- logger.warning(f"Failed to print {email.summary()}")
320
+ logger.error(f"Failed to print {email.summary()}")
312
321
  missed_an_email = True
313
322
 
314
323
  if not missed_an_email:
@@ -26,11 +26,13 @@ from epstein_files.util.logging import logger
26
26
 
27
27
  TITLE_WIDTH = 50
28
28
  SUBTITLE_WIDTH = 110
29
- NUM_COLOR_KEY_COLS = 4
29
+ NUM_COLOR_KEY_COLS = 6
30
30
  NA_TXT = Text(NA, style='dim')
31
+ SKIPPED_FILE_MSG_PADDING = (0, 0, 0, 4)
31
32
  SUBTITLE_PADDING = (2, 0, 1, 0)
32
33
  GREY_NUMBERS = [58, 39, 39, 35, 30, 27, 23, 23, 19, 19, 15, 15, 15]
33
34
  VALID_GREYS = [0, 3, 7, 11, 15, 19, 23, 27, 30, 35, 37, 39, 42, 46, 50, 53, 54, 58, 62, 63, 66, 69, 70, 74, 78, 82, 84, 85, 89, 93]
35
+ DOJ_PAGE_LINK_MSG = 'WIP page with documents from the Epstein Files Transparency Act'
34
36
 
35
37
  INFO_STYLE = 'white dim italic'
36
38
  KEY_STYLE = 'honeydew2 bold'
@@ -65,6 +67,19 @@ THEME_STYLES = {
65
67
  **{hg.theme_style_name: hg.style for hg in ALL_HIGHLIGHTS},
66
68
  }
67
69
 
70
+ RAINBOW = [
71
+ 'royal_blue1',
72
+ 'medium_purple',
73
+ 'light_coral',
74
+ 'light_slate_gray',
75
+ 'dark_goldenrod',
76
+ 'wheat4',
77
+ 'white',
78
+ 'medium_orchid',
79
+ 'deep_pink1',
80
+ 'navajo_white1',
81
+ ]
82
+
68
83
  # Instantiate console object
69
84
  CONSOLE_ARGS = {
70
85
  'color_system': '256',
@@ -177,14 +192,22 @@ def print_title_page_header() -> None:
177
192
  """Top half of the title page."""
178
193
  print_page_title(width=TITLE_WIDTH)
179
194
  site_type = EMAIL if (args.all_emails or args.email_timeline) else TEXT_MESSAGE
180
- title = f"This is the " + ('chronological ' if args.email_timeline else '') + f"Epstein {site_type.title()}s Page"
195
+ title = f"This is the "
196
+
197
+ if args.output_doj_files:
198
+ title += "DOJ 2026-01-30 Document Dump"
199
+ else:
200
+ title += ('Chronological ' if args.email_timeline else '') + f"Epstein {site_type.title()}s Page"
201
+
181
202
  print_starred_header(title, num_spaces=9 if args.all_emails else 6, num_stars=14)
203
+ #print_centered(f"This page contains all of the text messages and a curated selection of emails and other files.", style='gray74')
182
204
  print_centered(f"These documents come from the Nov. 2025 House Oversight Committee release.\n", style='gray74')
183
205
  other_site_msg = "another page with" + (' all of' if other_site_type() == EMAIL else '')
184
206
  other_site_msg += f" Epstein's {other_site_type()}s also generated by this code"
185
207
 
186
208
  links = [
187
209
  Text.from_markup(link_markup(other_site_url(), other_site_msg, f"{OTHER_SITE_LINK_STYLE} bold")),
210
+ Text.from_markup(link_markup(DOJ_2026_URL, DOJ_PAGE_LINK_MSG, f"{OTHER_SITE_LINK_STYLE} bold")),
188
211
  link_text_obj(WORD_COUNT_URL, 'most frequently used words in the emails and texts', AUX_SITE_LINK_STYLE),
189
212
  link_text_obj(JSON_METADATA_URL, 'author attribution explanations', AUX_SITE_LINK_STYLE),
190
213
  link_text_obj(JSON_FILES_URL, "epstein's json files", AUX_SITE_LINK_STYLE),
@@ -314,8 +337,12 @@ def _print_external_links() -> None:
314
337
  console.line()
315
338
  print_centered(Text('External Links', style=TABLE_TITLE_STYLE))
316
339
  presser_link = link_text_obj(OVERSIGHT_REPUBLICANS_PRESSER_URL, 'Official Oversight Committee Press Release')
317
- raw_docs_link = join_texts([link_text_obj(RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL, 'raw files', style=f"{ARCHIVE_LINK_COLOR} dim")], encloser='()')
340
+ raw_docs_link = link_text_obj(RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL, 'raw files', style=ARCHIVE_ALT_LINK_STYLE)
341
+ raw_docs_link = join_texts([raw_docs_link], encloser='()')
318
342
  print_centered(join_texts([presser_link, raw_docs_link]))
343
+ doj_docs_link = link_text_obj(DOJ_2026_URL, 'Epstein Files Transparency Act Disclosures')
344
+ doj_search_link = join_texts([link_text_obj(DOJ_SEARCH_URL, 'search', style=ARCHIVE_ALT_LINK_STYLE)], encloser='()')
345
+ print_centered(join_texts([doj_docs_link, doj_search_link]))
319
346
  print_centered(link_markup(JMAIL_URL, JMAIL) + " (read His Emails via Gmail interface)")
320
347
  print_centered(link_markup(EPSTEIN_DOCS_URL) + " (searchable archive)")
321
348
  print_centered(link_markup(EPSTEINIFY_URL) + " (raw document images)")
@@ -8,7 +8,7 @@ from rich.console import Console, ConsoleOptions, RenderResult
8
8
  from rich.padding import Padding
9
9
  from rich.text import Text
10
10
 
11
- from epstein_files.documents.emails.email_header import EmailHeader
11
+ from epstein_files.documents.emails.emailers import cleanup_str
12
12
  from epstein_files.epstein_files import EpsteinFiles
13
13
  from epstein_files.util.constant.common_words import COMMON_WORDS_LIST, COMMON_WORDS, UNSINGULARIZABLE_WORDS
14
14
  from epstein_files.util.constant.names import OTHER_NAMES
@@ -119,7 +119,7 @@ class WordCount:
119
119
  singularized: dict[str, int] = field(default_factory=lambda: defaultdict(int))
120
120
 
121
121
  def tally_word(self, word: str, document_line: SearchResult) -> None:
122
- word = EmailHeader.cleanup_str(word).lower().strip()
122
+ word = cleanup_str(word).lower().strip()
123
123
  raw_word = word
124
124
 
125
125
  if HTML_REGEX.search(word):
@@ -197,18 +197,18 @@ def write_word_counts_html() -> None:
197
197
  email_subjects: set[str] = set()
198
198
  word_count = WordCount()
199
199
  # Remove dupes, junk mail, and fwded articles from emails
200
- emails = [e for e in epstein_files.non_duplicate_emails() if not (e.is_mailing_list() or e.is_fwded_article())]
200
+ emails = [e for e in epstein_files.non_duplicate_emails() if e.is_word_count_worthy]
201
201
 
202
202
  for email in emails:
203
203
  if args.names and email.author not in args.names:
204
204
  continue
205
205
 
206
- logger.info(f"Counting words in {email}\n [SUBJECT] {email.subject()}")
206
+ logger.info(f"Counting words in {email}\n [SUBJECT] {email.subject}")
207
207
  lines = email.actual_text.split('\n')
208
208
 
209
- if email.subject() not in email_subjects and f'Re: {email.subject()}' not in email_subjects:
210
- email_subjects.add(email.subject())
211
- lines.append(email.subject())
209
+ if email.subject not in email_subjects and f'Re: {email.subject}' not in email_subjects:
210
+ email_subjects.add(email.subject)
211
+ lines.append(email.subject)
212
212
 
213
213
  for i, line in enumerate(lines):
214
214
  if HTML_REGEX.search(line):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: epstein-files
3
- Version: 1.2.5
3
+ Version: 1.5.0
4
4
  Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
5
5
  Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
6
6
  License: GPL-3.0-or-later
@@ -20,6 +20,7 @@ Classifier: Programming Language :: Python :: 3.13
20
20
  Requires-Dist: cairosvg (>=2.8.2,<3.0.0)
21
21
  Requires-Dist: datefinder (>=0.7.3,<0.8.0)
22
22
  Requires-Dist: inflection (>=0.5.1,<0.6.0)
23
+ Requires-Dist: pdfalyzer[extract] (>=1.19.6,<2.0.0)
23
24
  Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
24
25
  Requires-Dist: python-dotenv (>=1.2.1,<2.0.0)
25
26
  Requires-Dist: requests (>=2.32.5,<3.0.0)
@@ -47,6 +48,7 @@ Description-Content-Type: text/markdown
47
48
  #### Installation
48
49
  1. Requires you have a local copy of the OCR text files from the House Oversight document release in a directory `/path/to/epstein/ocr_txt_files`. You can download those OCR text files from [the Congressional Google Drive folder](https://drive.google.com/drive/folders/1ldncvdqIf6miiskDp_EDuGSDAaI_fJx8) (make sure you grab both the `001/` and `002/` folders).
49
50
  1. Use `poetry install` for easiest time installing. `pip install epstein-files` should also work, though `pipx install epstein-files` is usually better.
51
+ 1. (Optional) If you want to work with the documents released by DOJ on January 30th 2026 you'll need to also download the PDF collections from [the DOJ site](https://www.justice.gov/epstein/doj-disclosures) (they're in the "Epstein Files Transparency Act" section) and OCR them or find another way to get the OCR text.
50
52
 
51
53
 
52
54
  #### Command Line Tools
@@ -56,6 +58,13 @@ You need to set the `EPSTEIN_DOCS_DIR` environment variable with the path to the
56
58
  EPSTEIN_DOCS_DIR=/path/to/epstein/ocr_txt_files epstein_generate --help
57
59
  ```
58
60
 
61
+ To work with the January 2026 DOJ documents you'll also need to set the `EPSTEIN_DOJ_TXTS_20260130_DIR` env var to point at folders full of OCR extracted texts from the raw DOJ PDFs. If you have the PDFs but not the text files there's [a script](scripts/extract_doj_pdfs.py) that can help you take care of that.
62
+
63
+ ```bash
64
+ EPSTEIN_DOCS_DIR=/path/to/epstein/ocr_txt_files EPSTEIN_DOJ_TXTS_20260130_DIR=/path/to/doj/files epstein_generate --help
65
+ ```
66
+
67
+
59
68
  All the tools that come with the package require `EPSTEIN_DOCS_DIR` to be set. These are the available tools:
60
69
 
61
70
  ```bash
@@ -63,9 +72,9 @@ All the tools that come with the package require `EPSTEIN_DOCS_DIR` to be set. T
63
72
  epstein_generate
64
73
 
65
74
  # Search for a string:
66
- epstein_search Bannon
75
+ epstein_grep Bannon
67
76
  # Or a regex:
68
- epstein_search '\bSteve\s*Bannon|Jeffrey\s*Epstein\b'
77
+ epstein_grep '\bSteve\s*Bannon|Jeffrey\s*Epstein\b'
69
78
 
70
79
  # Show a file with color highlighting of keywords:
71
80
  epstein_show 030999
@@ -123,3 +132,7 @@ for file in epstein_files.other_files:
123
132
  # Everyone Who Sent or Received an Email in the November Document Dump
124
133
  ![emails](https://github.com/michelcrypt4d4mus/epstein_text_messages/raw/master/docs/emailers_info_table.png)
125
134
 
135
+
136
+ # TODO List
137
+ See [TODO.md](TODO.md).
138
+
@@ -0,0 +1,40 @@
1
+ epstein_files/__init__.py,sha256=5YJvr8sk9Q84g4GMgto_-DkWRcOzSp1LCq6Bw9NM8z0,7518
2
+ epstein_files/documents/communication.py,sha256=_gGv6FPytDtKNRXfnb_N1pAuGb7IC-Cfnv3xzJNmfZY,1802
3
+ epstein_files/documents/document.py,sha256=QTbeDCQFovopIpl0emyOv4etI6Ki7TepSOxAqlKgMc0,23525
4
+ epstein_files/documents/doj_file.py,sha256=_dphYix5ShdflqKl-gwleyYLQWWex6TPtHFmoFKYfEY,9142
5
+ epstein_files/documents/doj_files/full_text.py,sha256=N6AN75_C5da8b93bL36Zzhn2FrHTSYp21mXJmevkaxU,4357
6
+ epstein_files/documents/email.py,sha256=OXGS3a5OEKfhnG-TuveoScJw4cRmFr4OarULJzq8PIc,47472
7
+ epstein_files/documents/emails/email_header.py,sha256=c_ipCy4FC2PIuSLpJhztGnd9Fkkoh24W3_iiwVyD9f4,7789
8
+ epstein_files/documents/emails/emailers.py,sha256=a9U8FjYVDm1wejpJHcJuN8ktUCFZgYQDLTbTd6aQfhg,9445
9
+ epstein_files/documents/imessage/text_message.py,sha256=8f17D7cN-_SY8YQpEz9FGuEGVMLpOtCHIIGTgGoP4pY,3379
10
+ epstein_files/documents/json_file.py,sha256=fPe6wPaqEbw8tIIZnJCmur2FB5K0Wi41BH2nuN8lgGM,1558
11
+ epstein_files/documents/messenger_log.py,sha256=iBT5nSDgNfYIBhqs0VJSTrZ6_lzFkRU-1jZLW_2UB2I,6484
12
+ epstein_files/documents/other_file.py,sha256=8YjmiyfCBmm3sMmsNi_VySXJA2EC6c5APo8OZfPY7FA,9694
13
+ epstein_files/epstein_files.py,sha256=wVx9QEe8REmayQBFtpNhIoM8UWSP3uPe82Ix6HBfizU,15722
14
+ epstein_files/person.py,sha256=sn-_DS9khyr_7_22jQSUbsdqGu5NoFHhJuBkTShz8cs,16249
15
+ epstein_files/util/constant/common_words.py,sha256=C1JERPnOGHV2UMC71aEue1i9QTQog-RfT3IzdcYQOYQ,3702
16
+ epstein_files/util/constant/html.py,sha256=MFooFV8KfFBCm9hL1u6A3hi_u37i7lL6UKAYoKQj3PI,1505
17
+ epstein_files/util/constant/names.py,sha256=FXFCVQv5cRmeAgGSDAgiDs0tOmQoKTUF1mPom9vJByQ,12317
18
+ epstein_files/util/constant/output_files.py,sha256=sy_NtnWnMZJ2KGSFNP4zIC7u_rJOF94znkiZKou84_A,2251
19
+ epstein_files/util/constant/strings.py,sha256=R48MBo3YcPUbaxLLWq_4gueLucLvRD9PZPT0slv20nE,2379
20
+ epstein_files/util/constant/urls.py,sha256=S5nxO5UNjbJOnwkrgZcNB1vb36zqjNJyaq4dU7BIoRA,6887
21
+ epstein_files/util/constants.py,sha256=xir4Raog7z407oPXioYPoMI3efqvRB3ySy1RREEWEmY,133262
22
+ epstein_files/util/data.py,sha256=d9MeDxXkbtoqCeNEhExqvtJUfTJCQPE6N7SO7QZfLic,3201
23
+ epstein_files/util/doc_cfg.py,sha256=6gfcEvPRfPir0twvtqF-rTl9I_AkrK__CHggk-i5AfE,9551
24
+ epstein_files/util/env.py,sha256=7vKzxk5-w-pCFBQ1HVj-qem4lUe2jPQENf2hjEQh0OQ,7872
25
+ epstein_files/util/file_helper.py,sha256=XmUuqYiGfUMkXnYwEjqPw7P-qGi0KBN7KRYtAZTlND8,4098
26
+ epstein_files/util/helpers/debugging_helper.py,sha256=gDqCZVIuenFX5oqk2l5Ue_CffhgL_FcqeZYPMR8gU-o,446
27
+ epstein_files/util/helpers/env_helpers.py,sha256=BdMyndPVrLxzYluIh9NT-xfZMuzfUQySEIU8vEzIr4I,778
28
+ epstein_files/util/highlighted_group.py,sha256=HSJe0V48s4Uj7DMIADS3qPQmctq0LRCU-CbEtu_1qVg,66090
29
+ epstein_files/util/layout/left_bar_panel.py,sha256=lAw8YGfTcYACupaZaoExeAPGmgQdjtbH9fz7gLibFTw,860
30
+ epstein_files/util/logging.py,sha256=0mKk8QQDOrQtKcZrfzU4tR3gbktM0r-FKStUB3dVqo0,2563
31
+ epstein_files/util/output.py,sha256=TCRT4CzOnfyEwyHZcZ25P14W08Q6mF7TT9_9bxdMr_Y,13372
32
+ epstein_files/util/rich.py,sha256=NmKemFaUYMQAA5JxS7f3-p_Hta5EpXcflPKZULJTqRc,14726
33
+ epstein_files/util/search_result.py,sha256=1fxe0KPBQXBk4dLfu6m0QXIzYfZCzvaSkWqvghJGzxY,567
34
+ epstein_files/util/timer.py,sha256=GZHefXiPA-prK-1szC1cebl44Y-i4Wd3K8zedFpcggg,1373
35
+ epstein_files/util/word_count.py,sha256=nfYJIfHO2H8N-csDCd13ET3exH2jPJDvI6lFeg-kqmc,9173
36
+ epstein_files-1.5.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
37
+ epstein_files-1.5.0.dist-info/METADATA,sha256=9x6OnF7XLqEENlkQag8SCFpj3G3aA5cFY8UtzOieEYM,7076
38
+ epstein_files-1.5.0.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
39
+ epstein_files-1.5.0.dist-info/entry_points.txt,sha256=hyNFwhAw8zyGlBOxFu9beFGqfK3l8hOJvCK1DjUw1rA,236
40
+ epstein_files-1.5.0.dist-info/RECORD,,
@@ -1,7 +1,7 @@
1
1
  [console_scripts]
2
2
  epstein_diff=epstein_files:epstein_diff
3
3
  epstein_generate=epstein_files:generate_html
4
- epstein_search=epstein_files:epstein_search
4
+ epstein_grep=epstein_files:epstein_grep
5
5
  epstein_show=epstein_files:epstein_show
6
6
  epstein_word_count=epstein_files:epstein_word_count
7
7
 
@@ -1,34 +0,0 @@
1
- epstein_files/__init__.py,sha256=_7YTeRPEpqnY7XBBGvjLpk-G287D1NrYjldTinsKeoI,5899
2
- epstein_files/documents/communication.py,sha256=QiCZ35R2ttlqcdovm5LBdGqgoj4xCcpl9ANqwlA9XGU,1752
3
- epstein_files/documents/document.py,sha256=cJxL0UF8mtUvkUx1YGa62J24336DULVKLv1j6_d39Gg,19668
4
- epstein_files/documents/email.py,sha256=ZylzikpeuiwhZsWVnfaYlO8llMjIfvAS2hQuuQELl6g,42343
5
- epstein_files/documents/emails/email_header.py,sha256=3UD2pXMS9bRsFP4L5RSP2tSjI8OR6lq6gPKKao0DYLY,7739
6
- epstein_files/documents/imessage/text_message.py,sha256=IexwwVeF14FZqD7IuK47bIHYam07ZcD3rxheVNULNkc,3394
7
- epstein_files/documents/json_file.py,sha256=WcZW5NNqA67rHTdopbOGtup00muNaLlvrNgKb-K4zO8,1504
8
- epstein_files/documents/messenger_log.py,sha256=1bv62WoQMKR3gYDrK9W3Xm7cqLbKrkRBV7NTFL2cexE,7349
9
- epstein_files/documents/other_file.py,sha256=xZSILlnYlX2QIYCQCm6WNHvNqhF5do6AmxT7qxw-yog,9417
10
- epstein_files/epstein_files.py,sha256=fHa50Xdxgc2Vw7YgcgS3vAjTqDp1L5yPWOWQ3sb4RlM,14021
11
- epstein_files/person.py,sha256=lrBPCg1LFfd8koytEL0AOb8CcFKmJjLV9kAFqzGHHdU,14700
12
- epstein_files/util/constant/common_words.py,sha256=C1JERPnOGHV2UMC71aEue1i9QTQog-RfT3IzdcYQOYQ,3702
13
- epstein_files/util/constant/html.py,sha256=MFooFV8KfFBCm9hL1u6A3hi_u37i7lL6UKAYoKQj3PI,1505
14
- epstein_files/util/constant/names.py,sha256=exsR_dtwVEANYFJSU1wk50hiqzAtfc8zQbaKKwgX42A,11408
15
- epstein_files/util/constant/output_files.py,sha256=vxof85L-1GqSwss8XvLS3HZGguyWSzJkoJm1PrYYJ7g,2123
16
- epstein_files/util/constant/strings.py,sha256=jh_ou1VoA-6T1ej9l7CFfETEOCuSCectqqK7ppBQz2I,2030
17
- epstein_files/util/constant/urls.py,sha256=rFVU7824M1gAsmWLnAr-BBVOstlVwIsgyOlUWd1cdDk,6181
18
- epstein_files/util/constants.py,sha256=ANm_qMYK-dpgg02W9i9PUYiOI2fqr_5aoa50AHOOKIo,123568
19
- epstein_files/util/data.py,sha256=oZSrjzQSnXHtOlqxisodpH65cTGe57TfA3sZyQQzT7Q,3051
20
- epstein_files/util/doc_cfg.py,sha256=gIBAoeheQYqqrCtVRU2ytYu799LyCiPxPyHDgWUad3c,9471
21
- epstein_files/util/env.py,sha256=506dRXdetaKAHZ8vIacxA5x2Cv-IGBNYS8A0NQfMdUM,6961
22
- epstein_files/util/file_helper.py,sha256=MpG1hI7DGs05fV9KSVb_ltc98DC8tv1E_TTo5X_E7Js,3010
23
- epstein_files/util/highlighted_group.py,sha256=6EwCRVysWRzQOYu12ZKvVapKHTveSmnF-5bCZTsJtHk,62411
24
- epstein_files/util/logging.py,sha256=pSLHGrUnPQGFMGYNIOTbZbCyy5dDOCbNrbNz22wIcZI,2099
25
- epstein_files/util/output.py,sha256=ZNkx6iSJhaaeE3WkG3SDjgATL6pGkJpqzOCov7hZQW8,13221
26
- epstein_files/util/rich.py,sha256=pp_rIfy8rrxDONvpcizQ6r-x3oeJXEqgCgaA3FNDIHI,13739
27
- epstein_files/util/search_result.py,sha256=1fxe0KPBQXBk4dLfu6m0QXIzYfZCzvaSkWqvghJGzxY,567
28
- epstein_files/util/timer.py,sha256=GZHefXiPA-prK-1szC1cebl44Y-i4Wd3K8zedFpcggg,1373
29
- epstein_files/util/word_count.py,sha256=Ab0KSBv5oaee40h9MpGIlMNWRH4nAWDcMAq4A1TdpvA,9226
30
- epstein_files-1.2.5.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
31
- epstein_files-1.2.5.dist-info/METADATA,sha256=9cKpM9bg6-9MbKQ-EcX_S4oPBfS1a01FADl_2LbRvec,6222
32
- epstein_files-1.2.5.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
33
- epstein_files-1.2.5.dist-info/entry_points.txt,sha256=5qYgwAXpxegeAicD_rzda_trDRnUC51F5UVDpcZ7j6Q,240
34
- epstein_files-1.2.5.dist-info/RECORD,,