epstein-files 1.0.14__tar.gz → 1.0.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {epstein_files-1.0.14 → epstein_files-1.0.15}/PKG-INFO +1 -1
  2. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/__init__.py +8 -8
  3. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/documents/document.py +7 -5
  4. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/documents/email.py +2 -1
  5. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/documents/messenger_log.py +9 -10
  6. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/documents/other_file.py +33 -32
  7. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/epstein_files.py +1 -42
  8. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/constant/names.py +2 -0
  9. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/constant/strings.py +0 -2
  10. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/constant/urls.py +1 -7
  11. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/constants.py +68 -74
  12. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/doc_cfg.py +4 -3
  13. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/env.py +10 -8
  14. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/file_helper.py +8 -4
  15. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/highlighted_group.py +2 -2
  16. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/output.py +45 -10
  17. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/word_count.py +6 -8
  18. {epstein_files-1.0.14 → epstein_files-1.0.15}/pyproject.toml +1 -1
  19. {epstein_files-1.0.14 → epstein_files-1.0.15}/LICENSE +0 -0
  20. {epstein_files-1.0.14 → epstein_files-1.0.15}/README.md +0 -0
  21. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/documents/communication.py +0 -0
  22. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/documents/emails/email_header.py +0 -0
  23. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/documents/imessage/text_message.py +0 -0
  24. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/documents/json_file.py +0 -0
  25. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/constant/common_words.py +0 -0
  26. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/constant/html.py +0 -0
  27. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/constant/output_files.py +0 -0
  28. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/data.py +0 -0
  29. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/logging.py +0 -0
  30. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/rich.py +0 -0
  31. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/search_result.py +0 -0
  32. {epstein_files-1.0.14 → epstein_files-1.0.15}/epstein_files/util/timer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: epstein-files
3
- Version: 1.0.14
3
+ Version: 1.0.15
4
4
  Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
5
5
  Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
6
6
  License: GPL-3.0-or-later
@@ -17,11 +17,11 @@ from epstein_files.epstein_files import EpsteinFiles, document_cls
17
17
  from epstein_files.documents.document import INFO_PADDING, Document
18
18
  from epstein_files.documents.email import Email
19
19
  from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_HTML_PATH, make_clean
20
- from epstein_files.util.env import args, specified_names
20
+ from epstein_files.util.env import args
21
21
  from epstein_files.util.file_helper import coerce_file_path, extract_file_id
22
22
  from epstein_files.util.logging import logger
23
- from epstein_files.util.output import (print_emails, print_json_files, print_json_stats,
24
- write_json_metadata, write_urls)
23
+ from epstein_files.util.output import (print_emails_section, print_json_files, print_json_stats,
24
+ print_other_files_section, print_text_messages_section, write_json_metadata, write_urls)
25
25
  from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
26
26
  from epstein_files.util.timer import Timer
27
27
  from epstein_files.util.word_count import write_word_counts_html
@@ -49,12 +49,12 @@ def generate_html() -> None:
49
49
  exit()
50
50
 
51
51
  if args.output_texts:
52
- epstein_files.print_text_messages_section()
52
+ print_text_messages_section(epstein_files)
53
53
  timer.print_at_checkpoint(f'Printed {len(epstein_files.imessage_logs)} text message logs')
54
54
 
55
55
  if args.output_emails:
56
- emails_printed = print_emails(epstein_files)
57
- timer.print_at_checkpoint(f"Printed {emails_printed:,} emails")
56
+ emails_that_were_printed = print_emails_section(epstein_files)
57
+ timer.print_at_checkpoint(f"Printed {len(emails_that_were_printed):,} emails")
58
58
 
59
59
  if args.output_other:
60
60
  if args.uninteresting:
@@ -62,7 +62,7 @@ def generate_html() -> None:
62
62
  else:
63
63
  files = [f for f in epstein_files.other_files if args.all_other_files or f.is_interesting()]
64
64
 
65
- epstein_files.print_other_files_section(files)
65
+ print_other_files_section(files, epstein_files)
66
66
  timer.print_at_checkpoint(f"Printed {len(files)} other files (skipped {len(epstein_files.other_files) - len(files)})")
67
67
 
68
68
  # Save output
@@ -86,7 +86,7 @@ def epstein_search():
86
86
 
87
87
  for search_term in args.positional_args:
88
88
  temp_highlighter = build_highlighter(search_term)
89
- search_results = epstein_files.docs_matching(search_term, specified_names)
89
+ search_results = epstein_files.docs_matching(search_term, args.names)
90
90
  console.line(2)
91
91
  print_panel(f"Found {len(search_results)} documents matching '{search_term}'", padding=(0, 0, 0, 3))
92
92
 
@@ -160,8 +160,8 @@ class Document:
160
160
  def file_size(self) -> int:
161
161
  return file_size(self.file_path)
162
162
 
163
- def file_size_str(self) -> str:
164
- return file_size_str(self.file_path)
163
+ def file_size_str(self, decimal_places: int | None = None) -> str:
164
+ return file_size_str(self.file_path, decimal_places)
165
165
 
166
166
  def info(self) -> list[Text]:
167
167
  """0 to 2 sentences containing the info_txt() as well as any configured description."""
@@ -171,14 +171,14 @@ class Document:
171
171
  ])
172
172
 
173
173
  def info_txt(self) -> Text | None:
174
- """Secondary info about this file (recipients, level of certainty, etc). Overload in subclasses."""
174
+ """Secondary info about this file (description recipients, etc). Overload in subclasses."""
175
175
  return None
176
176
 
177
177
  def is_duplicate(self) -> bool:
178
178
  return bool(self.config and self.config.duplicate_of_id)
179
179
 
180
180
  def is_local_extract_file(self) -> bool:
181
- """True if file created by extracting text from a court doc (identifiable from filename e.g. HOUSE_OVERSIGHT_012345_1.txt)."""
181
+ """True if extracted from other file (identifiable from filename e.g. HOUSE_OVERSIGHT_012345_1.txt)."""
182
182
  return is_local_extract_file(self.filename)
183
183
 
184
184
  def length(self) -> int:
@@ -234,6 +234,7 @@ class Document:
234
234
  return text
235
235
 
236
236
  def sort_key(self) -> tuple[datetime, str, int]:
237
+ """Sort by timestamp, file_id, then whether or not it's a duplicate file."""
237
238
  if self.is_duplicate():
238
239
  sort_id = self.config.duplicate_of_id
239
240
  dupe_idx = 1
@@ -253,7 +254,7 @@ class Document:
253
254
  txt.append(' (', style=SYMBOL_STYLE)
254
255
  txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
255
256
 
256
- txt.append(' [').append(key_value_txt('size', Text(self.file_size_str(), style='aquamarine1')))
257
+ txt.append(' [').append(key_value_txt('size', Text(self.file_size_str(0), style='aquamarine1')))
257
258
  txt.append(", ").append(key_value_txt('lines', self.num_lines()))
258
259
 
259
260
  if self.config and self.config.duplicate_of_id:
@@ -271,6 +272,7 @@ class Document:
271
272
  return Panel(Group(*sentences), border_style=self._class_style(), expand=False)
272
273
 
273
274
  def top_lines(self, n: int = 10) -> str:
275
+ """First n lines."""
274
276
  return '\n'.join(self.lines[0:n])[:MAX_TOP_LINES_LEN]
275
277
 
276
278
  def warn(self, msg: str) -> None:
@@ -17,7 +17,7 @@ from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, INFO_INDENT
17
17
  from epstein_files.documents.emails.email_header import (BAD_EMAILER_REGEX, EMAIL_SIMPLE_HEADER_REGEX,
18
18
  EMAIL_SIMPLE_HEADER_LINE_BREAK_REGEX, FIELD_NAMES, TIME_REGEX, EmailHeader)
19
19
  from epstein_files.util.constant.names import *
20
- from epstein_files.util.constant.strings import REDACTED, URL_SIGNIFIERS
20
+ from epstein_files.util.constant.strings import REDACTED
21
21
  from epstein_files.util.constants import *
22
22
  from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes, extract_last_name,
23
23
  flatten, remove_timezone, uniquify)
@@ -41,6 +41,7 @@ LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
41
41
 
42
42
  SUPPRESS_LOGS_FOR_AUTHORS = ['Undisclosed recipients:', 'undisclosed-recipients:', 'Multiple Senders Multiple Senders']
43
43
  REWRITTEN_HEADER_MSG = "(janky OCR header fields were prettified, check source if something seems off)"
44
+ URL_SIGNIFIERS = ['gclid', 'htm', 'ref=', 'utm']
44
45
  APPEARS_IN = 'Appears in'
45
46
  MAX_CHARS_TO_PRINT = 4000
46
47
  MAX_NUM_HEADER_LINES = 14
@@ -121,23 +121,22 @@ class MessengerLog(Communication):
121
121
  return sender_counts
122
122
 
123
123
  @classmethod
124
- def logs_for(cls, author: str | None | list[str | None], logs: list['MessengerLog']) -> list['MessengerLog']:
125
- authors = listify(author)
126
- return logs if JEFFREY_EPSTEIN in authors else [log for log in logs if log.author in authors]
127
-
128
- @classmethod
129
- def summary_table(cls, imessage_logs: list['MessengerLog']) -> Table:
124
+ def summary_table(cls, log_files: list['MessengerLog']) -> Table:
130
125
  """Build a table summarizing the text messages in 'imessage_logs'."""
131
- counts_table = build_table("Text Message Counts By Author")
132
- counts_table.add_column(AUTHOR.title(), justify='left', style="steel_blue bold", width=30)
126
+ author_counts = cls.count_authors(log_files)
127
+ msg_count = sum([len(log.messages) for log in log_files])
128
+
129
+ footer = f"Deanonymized {msg_count - author_counts[None]:,} of {msg_count:,} text messages in"
130
+ counts_table = build_table("Text Message Counts By Author", caption=f"{footer} {len(log_files)} files")
131
+ counts_table.add_column(AUTHOR.title(), justify='left', width=30)
133
132
  counts_table.add_column('Files', justify='right', style='white')
134
133
  counts_table.add_column("Msgs", justify='right')
135
134
  counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
136
135
  counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE, width=21)
137
136
  counts_table.add_column('Days', justify='right', style='dim')
138
137
 
139
- for name, count in sort_dict(cls.count_authors(imessage_logs)):
140
- logs = cls.logs_for(name, imessage_logs)
138
+ for name, count in sort_dict(author_counts):
139
+ logs = log_files if name == JEFFREY_EPSTEIN else [log for log in log_files if log.author == name]
141
140
  first_at = logs[0].first_message_at(name)
142
141
  last_at = logs[-1].first_message_at(name)
143
142
 
@@ -17,14 +17,15 @@ from rich.text import Text
17
17
  from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, WHITESPACE_REGEX, Document
18
18
  from epstein_files.util.constant.strings import *
19
19
  from epstein_files.util.constants import *
20
- from epstein_files.util.doc_cfg import FINANCIAL_REPORTS_AUTHORS, DocCfg, Metadata
20
+ from epstein_files.util.doc_cfg import DocCfg, Metadata
21
21
  from epstein_files.util.data import days_between, escape_single_quotes, remove_timezone, sort_dict, uniquify
22
22
  from epstein_files.util.file_helper import FILENAME_LENGTH, file_size_to_str
23
23
  from epstein_files.util.env import args
24
24
  from epstein_files.util.highlighted_group import styled_category
25
- from epstein_files.util.rich import QUESTION_MARK_TXT, add_cols_to_table, build_table, highlighter
25
+ from epstein_files.util.rich import QUESTION_MARK_TXT, build_table, highlighter
26
26
  from epstein_files.util.logging import logger
27
27
 
28
+ FIRST_FEW_LINES = 'First Few Lines'
28
29
  MAX_DAYS_SPANNED_TO_BE_VALID = 10
29
30
  MAX_EXTRACTED_TIMESTAMPS = 100
30
31
  MIN_TIMESTAMP = datetime(2000, 1, 1)
@@ -208,6 +209,36 @@ class OtherFile(Document):
208
209
  if num_days_spanned > MAX_DAYS_SPANNED_TO_BE_VALID and VAST_HOUSE not in self.text:
209
210
  self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
210
211
 
212
+ @staticmethod
213
+ def count_by_category_table(files: Sequence['OtherFile']) -> Table:
214
+ counts = defaultdict(int)
215
+ category_bytes = defaultdict(int)
216
+
217
+ for file in files:
218
+ if file.category() is None:
219
+ logger.warning(f"file {file.file_id} has no category")
220
+
221
+ counts[file.category()] += 1
222
+ category_bytes[file.category()] += file.file_size()
223
+
224
+ table = build_table('Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
225
+ table.columns[0].min_width = 14
226
+ table.columns[-1].style = 'dim'
227
+
228
+ for (category, count) in sort_dict(counts):
229
+ category_files = [f for f in files if f.category() == category]
230
+ known_author_count = Document.known_author_count(category_files)
231
+
232
+ table.add_row(
233
+ styled_category(category or UNKNOWN),
234
+ str(count),
235
+ str(known_author_count),
236
+ str(count - known_author_count),
237
+ file_size_to_str(category_bytes[category]),
238
+ )
239
+
240
+ return table
241
+
211
242
  @staticmethod
212
243
  def files_preview_table(files: Sequence['OtherFile']) -> Table:
213
244
  """Build a table of OtherFile documents."""
@@ -240,33 +271,3 @@ class OtherFile(Document):
240
271
  )
241
272
 
242
273
  return table
243
-
244
- @staticmethod
245
- def count_by_category_table(files: Sequence['OtherFile']) -> Table:
246
- counts = defaultdict(int)
247
- category_bytes = defaultdict(int)
248
-
249
- for file in files:
250
- if file.category() is None:
251
- logger.warning(f"file {file.file_id} has no category")
252
-
253
- counts[file.category()] += 1
254
- category_bytes[file.category()] += file.file_size()
255
-
256
- table = build_table('Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
257
- table.columns[0].min_width = 14
258
- table.columns[-1].style = 'dim'
259
-
260
- for (category, count) in sort_dict(counts):
261
- category_files = [f for f in files if f.category() == category]
262
- known_author_count = Document.known_author_count(category_files)
263
-
264
- table.add_row(
265
- styled_category(category or UNKNOWN),
266
- str(count),
267
- str(known_author_count),
268
- str(count - known_author_count),
269
- file_size_to_str(category_bytes[category]),
270
- )
271
-
272
- return table
@@ -25,7 +25,7 @@ from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL,
25
25
  from epstein_files.util.constants import *
26
26
  from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify, sort_dict
27
27
  from epstein_files.util.doc_cfg import EmailCfg, Metadata
28
- from epstein_files.util.env import DOCS_DIR, args, logger, specified_names
28
+ from epstein_files.util.env import DOCS_DIR, args, logger
29
29
  from epstein_files.util.file_helper import file_size_str
30
30
  from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames, get_info_for_name, get_style_for_name
31
31
  from epstein_files.util.rich import (DEFAULT_NAME_STYLE, LAST_TIMESTAMP_STYLE, NA_TXT, add_cols_to_table,
@@ -201,9 +201,6 @@ class EpsteinFiles:
201
201
 
202
202
  return docs
203
203
 
204
- def imessage_logs_for(self, author: str | None | list[str | None]) -> Sequence[MessengerLog]:
205
- return MessengerLog.logs_for(author, self.imessage_logs)
206
-
207
204
  def json_metadata(self) -> str:
208
205
  """Create a JSON string containing metadata for all the files."""
209
206
  metadata = {
@@ -290,44 +287,6 @@ class EpsteinFiles:
290
287
  console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
291
288
  console.print(_build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
292
289
 
293
- def print_other_files_section(self, files: list[OtherFile]) -> None:
294
- """Returns the OtherFile objects that were interesting enough to print."""
295
- category_table = OtherFile.count_by_category_table(files)
296
- other_files_preview_table = OtherFile.files_preview_table(files)
297
- header_pfx = '' if args.all_other_files else 'Selected '
298
- print_section_header(f"{FIRST_FEW_LINES} of {len(files)} {header_pfx}Files That Are Neither Emails Nor Text Messages")
299
-
300
- if args.all_other_files:
301
- console.line(1)
302
- else:
303
- print_all_files_page_link(self)
304
- console.line(2)
305
-
306
- for table in [category_table, other_files_preview_table]:
307
- table.title = f"{header_pfx}{table.title}"
308
-
309
- print_centered(category_table)
310
- console.line(2)
311
- console.print(other_files_preview_table)
312
-
313
- def print_text_messages_section(self) -> None:
314
- """Print summary table and stats for text messages."""
315
- print_section_header('All of His Text Messages')
316
- print_centered("(conversations are sorted chronologically based on timestamp of first message)\n", style='gray30')
317
- authors: list[str | None] = specified_names if specified_names else [JEFFREY_EPSTEIN]
318
- log_files = self.imessage_logs_for(authors)
319
-
320
- for log_file in log_files:
321
- console.print(Padding(log_file))
322
- console.line(2)
323
-
324
- print_centered(MessengerLog.summary_table(self.imessage_logs))
325
- text_summary_msg = f"\nDeanonymized {Document.known_author_count(self.imessage_logs)} of "
326
- text_summary_msg += f"{len(self.imessage_logs)} {TEXT_MESSAGE} logs found in {len(self.all_files):,} files."
327
- console.print(text_summary_msg)
328
- imessage_msg_count = sum([len(log.messages) for log in self.imessage_logs])
329
- console.print(f"Found {imessage_msg_count} text messages in {len(self.imessage_logs)} iMessage log files.")
330
-
331
290
  def table_of_emailers(self) -> Table:
332
291
  attributed_emails = [e for e in self.non_duplicate_emails() if e.author]
333
292
  footer = f"Identified authors of {len(attributed_emails):,} out of {len(self.non_duplicate_emails()):,} emails."
@@ -187,9 +187,11 @@ VIRGINIA_GIUFFRE = 'Virginia Giuffre'
187
187
 
188
188
  # Organizations
189
189
  BOFA = 'BofA'
190
+ BOFA_MERRILL = f'{BOFA} / Merrill Lynch'
190
191
  CNN = 'CNN'
191
192
  DEUTSCHE_BANK = 'Deutsche Bank'
192
193
  ELECTRON_CAPITAL_PARTNERS = 'Electron Capital Partners'
194
+ EPSTEIN_FOUNDATION = 'Jeffrey Epstein VI Foundation'
193
195
  GOLDMAN_SACHS = 'Goldman Sachs'
194
196
  GOLDMAN_INVESTMENT_MGMT = f'{GOLDMAN_SACHS} Investment Management Division'
195
197
  HARVARD = 'Harvard'
@@ -57,12 +57,10 @@ TIMESTAMP_DIM = f"turquoise4 dim"
57
57
  AUTHOR = 'author'
58
58
  DEFAULT = 'default'
59
59
  EVERYONE = 'everyone'
60
- FIRST_FEW_LINES = 'First Few Lines'
61
60
  HOUSE_OVERSIGHT_PREFIX = 'HOUSE_OVERSIGHT_'
62
61
  JSON = 'json'
63
62
  NA = 'n/a'
64
63
  REDACTED = '<REDACTED>'
65
- URL_SIGNIFIERS = ['gclid', 'htm', 'ref=', 'utm']
66
64
  QUESTION_MARKS = '(???)'
67
65
 
68
66
  # Regexes
@@ -49,7 +49,7 @@ DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
49
49
  }
50
50
 
51
51
 
52
- epsteinify_api_url = lambda file_id: f"{EPSTEINIFY_URL}/api/documents/HOUSE_OVERSIGHT_{file_id}"
52
+ epsteinify_api_url = lambda file_stem: f"{EPSTEINIFY_URL}/api/documents/{file_stem}"
53
53
  epsteinify_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEINIFY, filename_or_id, style)
54
54
  epsteinify_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_markup(external_doc_link_markup(filename_or_id, style))
55
55
  epsteinify_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEINIFY], file_stem)
@@ -66,8 +66,6 @@ epstein_web_search_url = lambda s: f"{EPSTEIN_WEB_URL}/?ewmfileq={urllib.parse.q
66
66
 
67
67
  rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL], file_stem, 'title')
68
68
 
69
- search_archive_url = lambda txt: f"{COURIER_NEWSROOM_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
70
- search_coffeezilla_url = lambda txt: f"{COFFEEZILLA_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
71
69
  search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
72
70
  search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
73
71
 
@@ -103,8 +101,4 @@ def link_text_obj(url: str, link_text: str | None = None, style: str = ARCHIVE_L
103
101
  return Text.from_markup(link_markup(url, link_text, style))
104
102
 
105
103
 
106
- def search_coffeezilla_link(text: str, link_txt: str, style: str = ARCHIVE_LINK_COLOR) -> Text:
107
- return link_text_obj(search_coffeezilla_url(text), link_txt or text, style)
108
-
109
-
110
104
  CRYPTADAMUS_TWITTER = link_markup('https://x.com/cryptadamist', '@cryptadamist')
@@ -208,73 +208,6 @@ for emailer in EMAILERS:
208
208
  EMAILER_REGEXES[emailer] = re.compile(emailer, re.IGNORECASE)
209
209
 
210
210
 
211
- ##########################
212
- # OtherFile config stuff #
213
- ##########################
214
-
215
- # strings
216
- FBI = 'FBI'
217
- MEME = 'meme of'
218
- PRESS_RELEASE = 'press release'
219
- RESUME_OF = 'professional resumé'
220
- SCREENSHOT = 'screenshot of'
221
- TRANSLATION = 'translation of'
222
- TWEET = 'tweet'
223
-
224
- # Legal cases
225
- BRUNEL_V_EPSTEIN = f"{JEAN_LUC_BRUNEL} v. {JEFFREY_EPSTEIN} and Tyler McDonald d/b/a YI.org"
226
- EDWARDS_V_DERSHOWITZ = f"{BRAD_EDWARDS} & {PAUL_G_CASSELL} v. {ALAN_DERSHOWITZ}"
227
- EPSTEIN_V_ROTHSTEIN_EDWARDS = f"Epstein v. Scott Rothstein, {BRAD_EDWARDS}, and L.M."
228
- GIUFFRE_V_DERSHOWITZ = f"{VIRGINIA_GIUFFRE} v. {ALAN_DERSHOWITZ}"
229
- GIUFFRE_V_EPSTEIN = f"{VIRGINIA_GIUFFRE} v. {JEFFREY_EPSTEIN}"
230
- GIUFFRE_V_MAXWELL = f"{VIRGINIA_GIUFFRE} v. {GHISLAINE_MAXWELL}"
231
- JANE_DOE_V_EPSTEIN_TRUMP = f"Jane Doe v. Donald Trump and {JEFFREY_EPSTEIN}"
232
- JANE_DOE_V_USA = 'Jane Doe #1 and Jane Doe #2 v. United States'
233
- NEW_YORK_V_EPSTEIN = f"New York v. {JEFFREY_EPSTEIN}"
234
-
235
- # Descriptions of non-email, non-text message files
236
- ARTICLE_DRAFT = 'draft of an article about'
237
- BOFA_MERRILL = f'{BOFA} / Merrill Lynch Report'
238
- BOFA_WEALTH_MGMT = f'{BOFA} Wealth Management'
239
- BROCKMAN_INC = 'Brockman, Inc.'
240
- CVRA = "Crime Victims' Rights Act [CVRA]"
241
- DAVID_BLAINE_VISA_LETTER = f"letter of recommendation for visa for a model"
242
- DERSH_GIUFFRE_TWEET = f"{TWEET} about {VIRGINIA_GIUFFRE}"
243
- DEUTSCHE_BANK_TAX_TOPICS = f'{DEUTSCHE_BANK} Wealth Management Tax Topics'
244
- DIANA_DEGETTE_CAMPAIGN = "Colorado legislator Diana DeGette's campaign"
245
- EPSTEIN_FOUNDATION = 'Jeffrey Epstein VI Foundation'
246
- FBI_REPORT = f"report on Epstein investigation (redacted)"
247
- FBI_SEIZED_PROPERTY = f"seized property inventory (redacted)"
248
- FEMALE_HEALTH_COMPANY = 'Female Health Company (FHX)'
249
- FIRE_AND_FURY = f"Fire And Fury"
250
- HARVARD_POETRY = f'{HARVARD} poetry stuff from {LISA_NEW}'
251
- HBS_APPLICATION = f"{HARVARD} Business School application letter"
252
- JASTA = 'JASTA'
253
- JASTA_SAUDI_LAWSUIT = f"{JASTA} lawsuit against Saudi Arabia by 9/11 victims"
254
- JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
255
- LAWRENCE_KRAUSS_ASU_ORIGINS = f"{LAWRENCE_KRAUSS}'s ASU Origins Project"
256
- LEXIS_NEXIS_CVRA_SEARCH = f"{LEXIS_NEXIS} search for case law around the {CVRA}"
257
- KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
258
- MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
259
- NIGHT_FLIGHT_BOOK = f'"Night Flight" (draft)'
260
- NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
261
- OBAMA_JOKE = 'joke about Obama'
262
- PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} Code Enforcement'
263
- PALM_BEACH_PROPERTY_INFO = f"{PALM_BEACH} property info"
264
- PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
265
- PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
266
- PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
267
- REAL_DEAL_ARTICLE = 'article by Keith Larsen'
268
- SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
269
- SINGLE_PAGE = 'single page of'
270
- STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
271
- SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
272
- TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
273
- UBS_CIO_REPORT = 'CIO Monthly Extended report'
274
- UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
275
- WOMEN_EMPOWERMENT = f"Women Empowerment (WE) conference"
276
- ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
277
-
278
211
  # Atribution reasons
279
212
  BOLOTOVA_REASON = 'Same signature style as 029020 ("--" followed by "Sincerely Renata Bolotova")'
280
213
  KATHY_REASON = 'from "Kathy" about dems, sent from iPad'
@@ -369,6 +302,8 @@ TEXTS_CONFIG = CONFIRMED_TEXTS_CONFIG + UNCONFIRMED_TEXTS_CONFIG
369
302
  ################################################ EMAILS ################################################
370
303
  ########################################################################################################
371
304
 
305
+ MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
306
+
372
307
  # Some emails have a lot of uninteresting CCs
373
308
  IRAN_DEAL_RECIPIENTS = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
374
309
  FLIGHT_IN_2012_PEOPLE = ['Francis Derby', 'Januiz Banasiak', 'Louella Rabuyo', 'Richard Barnnet']
@@ -866,6 +801,65 @@ EMAILS_CONFIG = [
866
801
  ####################################### OTHER FILES ############################################
867
802
  ################################################################################################
868
803
 
804
+ # strings
805
+ FBI = 'FBI'
806
+ MEME = 'meme of'
807
+ PRESS_RELEASE = 'press release'
808
+ RESUME_OF = 'professional resumé'
809
+ SCREENSHOT = 'screenshot of'
810
+ TRANSLATION = 'translation of'
811
+ TWEET = 'tweet'
812
+
813
+ # Legal cases
814
+ BRUNEL_V_EPSTEIN = f"{JEAN_LUC_BRUNEL} v. {JEFFREY_EPSTEIN} and Tyler McDonald d/b/a YI.org"
815
+ EDWARDS_V_DERSHOWITZ = f"{BRAD_EDWARDS} & {PAUL_G_CASSELL} v. {ALAN_DERSHOWITZ}"
816
+ EPSTEIN_V_ROTHSTEIN_EDWARDS = f"Epstein v. Scott Rothstein, {BRAD_EDWARDS}, and L.M."
817
+ GIUFFRE_V_DERSHOWITZ = f"{VIRGINIA_GIUFFRE} v. {ALAN_DERSHOWITZ}"
818
+ GIUFFRE_V_EPSTEIN = f"{VIRGINIA_GIUFFRE} v. {JEFFREY_EPSTEIN}"
819
+ GIUFFRE_V_MAXWELL = f"{VIRGINIA_GIUFFRE} v. {GHISLAINE_MAXWELL}"
820
+ JANE_DOE_V_EPSTEIN_TRUMP = f"Jane Doe v. Donald Trump and {JEFFREY_EPSTEIN}"
821
+ JANE_DOE_V_USA = 'Jane Doe #1 and Jane Doe #2 v. United States'
822
+ NEW_YORK_V_EPSTEIN = f"New York v. {JEFFREY_EPSTEIN}"
823
+
824
+ # Descriptions of non-email, non-text message files
825
+ ARTICLE_DRAFT = 'draft of an article about'
826
+ BOFA_WEALTH_MGMT = f'{BOFA} Wealth Management'
827
+ BROCKMAN_INC = 'Brockman, Inc.'
828
+ CVRA = "Crime Victims' Rights Act [CVRA]"
829
+ DAVID_BLAINE_VISA_LETTER = f"letter of recommendation for visa for a model"
830
+ DERSH_GIUFFRE_TWEET = f"{TWEET} about {VIRGINIA_GIUFFRE}"
831
+ DEUTSCHE_BANK_TAX_TOPICS = f'{DEUTSCHE_BANK} Wealth Management Tax Topics'
832
+ DIANA_DEGETTE_CAMPAIGN = "Colorado legislator Diana DeGette's campaign"
833
+ FBI_REPORT = f"report on Epstein investigation (redacted)"
834
+ FBI_SEIZED_PROPERTY = f"seized property inventory (redacted)"
835
+ FEMALE_HEALTH_COMPANY = 'Female Health Company (FHX)'
836
+ FIRE_AND_FURY = f"Fire And Fury"
837
+ HARVARD_POETRY = f'{HARVARD} poetry stuff from {LISA_NEW}'
838
+ HBS_APPLICATION = f"{HARVARD} Business School application letter"
839
+ JASTA = 'JASTA'
840
+ JASTA_SAUDI_LAWSUIT = f"{JASTA} lawsuit against Saudi Arabia by 9/11 victims"
841
+ JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
842
+ LAWRENCE_KRAUSS_ASU_ORIGINS = f"{LAWRENCE_KRAUSS}'s ASU Origins Project"
843
+ KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
844
+ LEXIS_NEXIS_CVRA_SEARCH = f"{LEXIS_NEXIS} search for case law around the {CVRA}"
845
+ NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
846
+ OBAMA_JOKE = 'joke about Obama'
847
+ PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} Code Enforcement'
848
+ PALM_BEACH_PROPERTY_INFO = f"{PALM_BEACH} property info"
849
+ PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
850
+ PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
851
+ PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
852
+ REAL_DEAL_ARTICLE = 'article by Keith Larsen'
853
+ SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
854
+ STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
855
+ SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
856
+ TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
857
+ UBS_CIO_REPORT = 'CIO Monthly Extended report'
858
+ UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
859
+ WOMEN_EMPOWERMENT = f"Women Empowerment (WE) conference"
860
+ ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
861
+
862
+
869
863
  OTHER_FILES_BOOKS = [
870
864
  DocCfg(id='017088', author=ALAN_DERSHOWITZ, description=f'"Taking the Stand: My Life in the Law" (draft)'),
871
865
  DocCfg(id='013501', author='Arnold J. Mandell', description=f'The Nearness Of Grace: A Personal Science Of Spiritual Transformation', date='2005-01-01'),
@@ -873,7 +867,7 @@ OTHER_FILES_BOOKS = [
873
867
  DocCfg(id='018438', author='Clarisse Thorn', description=f'The S&M Feminist'),
874
868
  DocCfg(id='019477', author=EDWARD_JAY_EPSTEIN, description=f'How America Lost Its Secrets: Edward Snowden, the Man, and the Theft'),
875
869
  DocCfg(id='020153', author=EDWARD_JAY_EPSTEIN, description=f'The Snowden Affair: A Spy Story In Six Parts'),
876
- DocCfg(id='011472', author=EHUD_BARAK, description=NIGHT_FLIGHT_BOOK, date='2006-07-12', duplicate_ids=['027849']), # date from _extract_timestamp()
870
+ DocCfg(id='011472', author=EHUD_BARAK, description=f'"Night Flight" (draft)', date='2006-07-12', duplicate_ids=['027849']), # date from _extract_timestamp()
877
871
  DocCfg(id='010912', author=GORDON_GETTY, description=f'"Free Growth and Other Surprises" (draft)', date='2018-10-18'),
878
872
  DocCfg(id='010477', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
879
873
  DocCfg(id='010486', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
@@ -1002,7 +996,7 @@ OTHER_FILES_ARTICLES = [
1002
996
  DocCfg(id='024997', author=SHIMON_POST, description=SHIMON_POST_ARTICLE, date='2011-09-08'),
1003
997
  DocCfg(id='031941', author=SHIMON_POST, description=SHIMON_POST_ARTICLE, date='2011-11-17'),
1004
998
  DocCfg(id='030829', author=f'South Florida Sun Sentinel', description=f'article about {BRAD_EDWARDS} and {JEFFREY_EPSTEIN}'),
1005
- DocCfg(id='021092', author='Tatler', description=f'{SINGLE_PAGE} of article about {GHISLAINE_MAXWELL} shredding documents', date='2019-08-15'),
999
+ DocCfg(id='021092', author='Tatler', description=f'single page of article about {GHISLAINE_MAXWELL} shredding documents', date='2019-08-15'),
1006
1000
  DocCfg(id='030333', author=f'The Independent', description=f'article about Prince Andrew, Epstein, and Epstein\'s butler who stole his address book'),
1007
1001
  DocCfg(id='010754', author=f'U.S. News', description=f"article about Yitzhak Rabin"),
1008
1002
  DocCfg(id='014498', author=VI_DAILY_NEWS, description='article', date='2016-12-13'),
@@ -1030,7 +1024,7 @@ OTHER_FILES_ARTICLES = [
1030
1024
  DocCfg(id='033480', description=f"John Bolton press clipping", date='2018-04-06', duplicate_ids=['033481']),
1031
1025
  DocCfg(id='013403', description=f"{LEXIS_NEXIS} result from The Evening Standard about Bernie Madoff", date='2009-12-24'),
1032
1026
  DocCfg(id='021093', description=f"page of unknown article about Epstein and Maxwell"),
1033
- DocCfg(id='031191', description=f"{SINGLE_PAGE} unknown article about Epstein and Trump's relationship in 1997"),
1027
+ DocCfg(id='031191', description=f"single page of unknown article about Epstein and Trump's relationship in 1997"),
1034
1028
  DocCfg(id='026520', description=f'Spanish language article about {SULTAN_BIN_SULAYEM}', date='2013-09-27'),
1035
1029
  DocCfg(
1036
1030
  id='031736',
@@ -1186,16 +1180,16 @@ OTHER_FILES_LEGAL = [
1186
1180
  ]
1187
1181
 
1188
1182
  OTHER_FILES_CONFERENCES = [
1189
- DocCfg(id='014315', author=BOFA_MERRILL, description=f'2016 Future of Financials Conference, attached to 014312'),
1183
+ DocCfg(id='014315', author=BOFA_MERRILL, description=f'2016 Future of Financials Conference', attached_to_email_id='014312'),
1190
1184
  DocCfg(id='026825', author=DEUTSCHE_BANK, description=f"Asset & Wealth Management featured speaker bios"), # Really "Deutsche Asset" which may not be Deutsche Bank?
1191
1185
  DocCfg(id='023123', author=LAWRENCE_KRAUSS_ASU_ORIGINS, description=f"{STRANGE_BEDFELLOWS} (old draft)"),
1192
1186
  DocCfg(id='023120', author=LAWRENCE_KRAUSS_ASU_ORIGINS, description=STRANGE_BEDFELLOWS, duplicate_ids=['023121'], dupe_type='earlier'),
1193
- DocCfg(id='031359', author=NOBEL_CHARITABLE_TRUST, description=f"Earth Environment Convention about ESG investing"),
1187
+ DocCfg(id='031359', author=NOBEL_CHARITABLE_TRUST, description=f'"Earth Environment Convention" about ESG investing'),
1194
1188
  DocCfg(id='031354', author=NOBEL_CHARITABLE_TRUST, description=f'"Thinking About the Environment and Technology" report 2011'),
1195
1189
  DocCfg(id='019300', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} f. {KATHRYN_RUEMMLER}', date='2019-04-05'),
1196
1190
  DocCfg(id='022267', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} founder essay about growing the seminar business'),
1197
1191
  DocCfg(id='022407', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} seminar pitch deck'),
1198
- DocCfg(id='017524', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2012 program emailed to epstein BY {BARBRO_C_EHNBOM} in 031226", date='2012-08-18'),
1192
+ DocCfg(id='017524', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2012 program", date='2012-08-18', attached_to_email_id='031226'),
1199
1193
  DocCfg(id='026747', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2017 program", date='2017-08-23'),
1200
1194
  DocCfg(id='014951', author='TED Talks', description=f"2017 program", date='2017-04-20'),
1201
1195
  DocCfg(id='024179', author=UN_GENERAL_ASSEMBLY, description=f'president and first lady schedule', date='2012-09-21'),
@@ -1411,7 +1405,7 @@ OTHER_FILES_POLITICS = [
1411
1405
  description=f"'Breaking Down Democracy: Goals, Strategies, and Methods of Modern Authoritarians'",
1412
1406
  date='2017-06-02',
1413
1407
  ),
1414
- DocCfg(id='026856', author='Kevin Rudd', description=f"speech 'Xi Jinping, China And The Global Order'", date='2018-06-26'),
1408
+ DocCfg(id='026856', author='Kevin Rudd', description=f'speech "Xi Jinping, China And The Global Order"', date='2018-06-26'),
1415
1409
  DocCfg(id='026827', author='Scowcroft Group', description=f'report on ISIS', date='2015-11-14'),
1416
1410
  DocCfg(id='024294', author=STACEY_PLASKETT, description=f"campaign flier", date='2016-10-01'),
1417
1411
  DocCfg(
@@ -36,7 +36,7 @@ FIELD_SORT_KEY = {
36
36
  }
37
37
 
38
38
  FINANCIAL_REPORTS_AUTHORS = [
39
- BOFA,
39
+ BOFA_MERRILL,
40
40
  DEUTSCHE_BANK,
41
41
  ELECTRON_CAPITAL_PARTNERS,
42
42
  GOLDMAN_INVESTMENT_MGMT,
@@ -73,6 +73,7 @@ class DocCfg:
73
73
  is_synthetic (bool): True if this config was generated by the duplicate_cfgs() method
74
74
  """
75
75
  id: str
76
+ attached_to_email_id: str | None = None
76
77
  author: str | None = None
77
78
  category: str | None = None
78
79
  date: str | None = None
@@ -102,10 +103,10 @@ class DocCfg:
102
103
  return f"{msg} {self.description}" if self.description else msg
103
104
  elif self.author and self.description:
104
105
  if self.category in [ACADEMIA, BOOK]:
105
- title = self.description if '"' in self.description else f"'{self.description}'"
106
+ title = self.description if '"' in self.description else f'"{self.description}"'
106
107
  return f"{title} by {self.author}"
107
108
  elif self.category == FINANCE and self.author in FINANCIAL_REPORTS_AUTHORS:
108
- return f"{self.author} report: '{self.description}'"
109
+ return f'{self.author} report: "{self.description}"'
109
110
  elif self.category == LEGAL and 'v.' in self.author:
110
111
  return f"{self.author}: {self.description}"
111
112
  elif self.category and self.author is None and self.description is None:
@@ -8,10 +8,8 @@ from rich_argparse_plus import RichHelpFormatterPlus
8
8
 
9
9
  from epstein_files.util.logging import env_log_level, logger
10
10
 
11
- COUNT_WORDS_SCRIPT = 'epstein_word_count'
12
11
  DEFAULT_WIDTH = 145
13
- HTML_SCRIPTS = ['epstein_generate', COUNT_WORDS_SCRIPT]
14
- EPSTEIN_DOCS_DIR_ENV_VAR_NAME = 'EPSTEIN_DOCS_DIR'
12
+ HTML_SCRIPTS = ['epstein_generate', 'epstein_word_count']
15
13
 
16
14
 
17
15
  RichHelpFormatterPlus.choose_theme('morning_glory')
@@ -50,6 +48,7 @@ args = parser.parse_args()
50
48
 
51
49
 
52
50
  # Verify Epstein docs can be found
51
+ EPSTEIN_DOCS_DIR_ENV_VAR_NAME = 'EPSTEIN_DOCS_DIR'
53
52
  DOCS_DIR_ENV = environ.get(EPSTEIN_DOCS_DIR_ENV_VAR_NAME)
54
53
  DOCS_DIR = Path(DOCS_DIR_ENV or '').resolve()
55
54
 
@@ -65,13 +64,13 @@ is_env_var_set = lambda s: len(environ.get(s) or '') > 0
65
64
  is_html_script = current_script in HTML_SCRIPTS
66
65
 
67
66
  args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
67
+ args.names = [None if n == 'None' else n for n in (args.names or [])]
68
68
  args.output_emails = args.output_emails or args.all_emails
69
69
  args.output_other = args.output_other or args.all_other_files or args.uninteresting
70
70
  args.overwrite_pickle = args.overwrite_pickle or (is_env_var_set('OVERWRITE_PICKLE') and not is_env_var_set('PICKLED'))
71
71
  args.width = args.width if is_html_script else None
72
- is_output_selected = any([arg.startswith('output_') and value for arg, value in vars(args).items()])
73
- is_output_selected = is_output_selected or args.json_metadata or args.colors_only
74
- specified_names: list[str | None] = [None if n == 'None' else n for n in (args.names or [])]
72
+ is_any_output_selected = any([arg.startswith('output_') and value for arg, value in vars(args).items()])
73
+ is_any_output_selected = is_any_output_selected or args.json_metadata or args.colors_only
75
74
 
76
75
  # Log level args
77
76
  if args.deep_debug:
@@ -86,9 +85,12 @@ elif not env_log_level:
86
85
  logger.info(f'Log level set to {logger.level}...')
87
86
 
88
87
  # Massage args that depend on other args to the appropriate state
89
- if current_script == 'epstein_generate' and not (is_output_selected or args.make_clean):
88
+ if current_script == 'epstein_generate' and not (is_any_output_selected or args.make_clean):
90
89
  logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
91
90
  args.output_texts = args.output_emails = args.output_other = True
92
91
 
93
92
  if args.debug:
94
- logger.warning(f"Invocation args:\ncurrent_script={current_script}\nis_html_script={is_html_script},\nis_output_selected={is_output_selected}\nspecified_names={specified_names},\nargs={args}")
93
+ logger.warning(f"Invocation args:\ncurrent_script={current_script}\nis_html_script={is_html_script},\nis_output_selected={is_any_output_selected},\nargs={args}")
94
+
95
+ if args.names:
96
+ logger.warning(f"Output restricted to {args.names}")
@@ -12,7 +12,6 @@ KB = 1024
12
12
  MB = KB * KB
13
13
 
14
14
  file_size = lambda file_path: Path(file_path).stat().st_size
15
- file_size_str = lambda file_path: file_size_to_str(file_size(file_path))
16
15
 
17
16
  # Coerce methods handle both string and int arguments.
18
17
  coerce_file_name = lambda filename_or_id: coerce_file_stem(filename_or_id) + '.txt'
@@ -46,8 +45,12 @@ def extract_file_id(filename_or_id: int | str | Path) -> str:
46
45
  return file_match.group(1)
47
46
 
48
47
 
49
- def file_size_to_str(size: int) -> str:
50
- digits = 2
48
+ def file_size_str(file_path, digits: int | None = None):
49
+ return file_size_to_str(file_size(file_path), digits)
50
+
51
+
52
+ def file_size_to_str(size: int, digits: int | None = None) -> str:
53
+ _digits = 2
51
54
 
52
55
  if size > MB:
53
56
  size_num = float(size) / MB
@@ -55,10 +58,11 @@ def file_size_to_str(size: int) -> str:
55
58
  elif size > KB:
56
59
  size_num = float(size) / KB
57
60
  size_str = 'kb'
58
- digits = 1
61
+ _digits = 1
59
62
  else:
60
63
  return f"{size} b"
61
64
 
65
+ digits = _digits if digits is None else digits
62
66
  return f"{size_num:,.{digits}f} {size_str}"
63
67
 
64
68
 
@@ -302,7 +302,7 @@ HIGHLIGHTED_NAMES = [
302
302
  HighlightedNames(
303
303
  label=FINANCE,
304
304
  style='green',
305
- pattern=r'Apollo|Ari\s*Glass|Bank|(Bernie\s*)?Madoff|Black(rock|stone)|B\s*of\s*A|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|Mizrahi\s*Bank|MLPF&S|((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|(Richard\s*)?LeFrak|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
305
+ pattern=r'Apollo|Ari\s*Glass|Bank|(Bernie\s*)?Madoff|Black(rock|stone)|B\s*of\s*A|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche?\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|Mizrahi\s*Bank|MLPF&S|((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|(Richard\s*)?LeFrak|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
306
306
  emailers={
307
307
  AMANDA_ENS: 'Citigroup',
308
308
  BRAD_WECHSLER: f"head of {LEON_BLACK}'s personal investment vehicle according to FT",
@@ -396,7 +396,7 @@ HIGHLIGHTED_NAMES = [
396
396
  HighlightedNames(
397
397
  label='law enforcement',
398
398
  style='color(24) bold',
399
- pattern=r'ag|(Alicia\s*)?Valle|AML|(Andrew\s*)?McCabe|attorney|((Bob|Robert)\s*)?Mueller|(Byung\s)?Pak|CFTC?|CIA|CIS|CVRA|Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)|DHS|DOJ|FBI|FCPA|FDIC|Federal\s*Bureau\s*of\s*Investigation|FinCEN|FINRA|FOIA|FTC|IRS|(James\s*)?Comey|(Jennifer\s*Shasky\s*)?Calvery|((Judge|Mark)\s*)?(Carney|Filip)|(Kirk )?Blouin|KYC|NIH|NS(A|C)|OCC|OFAC|(Lann?a\s*)?Belohlavek|lawyer|(Michael\s*)?Reiter|OGE|Office\s*of\s*Government\s*Ethics|Police Code Enforcement|(Preet\s*)?Bharara|SCOTUS|SD(FL|NY)|SEC|Secret\s*Service|Securities\s*and\s*Exchange\s*Commission|Southern\s*District\s*of\s*(Florida|New\s*York)|State\s*Dep(artmen)?t|Strzok|Supreme\s*Court|Treasury\s*(Dep(artmen)?t|Secretary)|TSA|USAID|(William\s*J\.?\s*)?Zloch',
399
+ pattern=r'ag|(Alicia\s*)?Valle|AML|(Andrew\s*)?McCabe|((Bob|Robert)\s*)?Mueller|(Byung\s)?Pak|CFTC?|CIA|CIS|CVRA|Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)|DHS|DOJ|FBI|FCPA|FDIC|Federal\s*Bureau\s*of\s*Investigation|FinCEN|FINRA|FOIA|FTC|IRS|(James\s*)?Comey|(Jennifer\s*Shasky\s*)?Calvery|((Judge|Mark)\s*)?(Carney|Filip)|(Kirk )?Blouin|KYC|NIH|NS(A|C)|OCC|OFAC|(Lann?a\s*)?Belohlavek|(Michael\s*)?Reiter|OGE|Office\s*of\s*Government\s*Ethics|Police Code Enforcement|(Preet\s*)?Bharara|SCOTUS|SD(FL|NY)|SEC|Secret\s*Service|Securities\s*and\s*Exchange\s*Commission|Southern\s*District\s*of\s*(Florida|New\s*York)|State\s*Dep(artmen)?t|Strzok|Supreme\s*Court|Treasury\s*(Dep(artmen)?t|Secretary)|TSA|USAID|(William\s*J\.?\s*)?Zloch',
400
400
  emailers = {
401
401
  ANN_MARIE_VILLAFANA: 'southern district of Florida U.S. Attorney',
402
402
  DANNY_FROST: 'Director of Communications at Manhattan DA',
@@ -4,13 +4,14 @@ from rich.padding import Padding
4
4
 
5
5
  from epstein_files.documents.email import Email
6
6
  from epstein_files.documents.messenger_log import MessengerLog
7
+ from epstein_files.documents.other_file import FIRST_FEW_LINES, OtherFile
7
8
  from epstein_files.epstein_files import EpsteinFiles, count_by_month
8
9
  from epstein_files.util.constant import output_files
9
10
  from epstein_files.util.constant.html import *
10
11
  from epstein_files.util.constant.names import *
11
12
  from epstein_files.util.constant.output_files import JSON_FILES_JSON_PATH, JSON_METADATA_PATH
12
13
  from epstein_files.util.data import dict_sets_to_lists
13
- from epstein_files.util.env import args, specified_names
14
+ from epstein_files.util.env import args
14
15
  from epstein_files.util.file_helper import log_file_write
15
16
  from epstein_files.util.logging import logger
16
17
  from epstein_files.util.rich import *
@@ -20,17 +21,17 @@ PRINT_COLOR_KEY_EVERY_N_EMAILS = 150
20
21
  # Order matters. Default names to print emails for.
21
22
  DEFAULT_EMAILERS = [
22
23
  JEREMY_RUBIN,
23
- AL_SECKEL,
24
24
  JOI_ITO,
25
25
  JABOR_Y,
26
26
  STEVEN_SINOFSKY,
27
+ AL_SECKEL,
27
28
  DANIEL_SIAD,
28
29
  JEAN_LUC_BRUNEL,
29
30
  STEVEN_HOFFENBERG,
31
+ RENATA_BOLOTOVA,
32
+ MASHA_DROKOVA,
30
33
  EHUD_BARAK,
31
34
  MARTIN_NOWAK,
32
- MASHA_DROKOVA,
33
- RENATA_BOLOTOVA,
34
35
  STEVE_BANNON,
35
36
  PRINCE_ANDREW,
36
37
  JIDE_ZEITLIN,
@@ -39,6 +40,7 @@ DEFAULT_EMAILERS = [
39
40
  JENNIFER_JACQUET,
40
41
  TYLER_SHEARS,
41
42
  CHRISTINA_GALBRAITH,
43
+ ZUBAIR_KHAN,
42
44
  None,
43
45
  ]
44
46
 
@@ -55,8 +57,8 @@ if len(set(DEFAULT_EMAILERS).intersection(set(DEFAULT_EMAILER_TABLES))) > 0:
55
57
  raise RuntimeError(f"Some names appear in both DEFAULT_EMAILERS and DEFAULT_EMAILER_TABLES")
56
58
 
57
59
 
58
- def print_emails(epstein_files: EpsteinFiles) -> int:
59
- """Returns number of emails printed."""
60
+ def print_emails_section(epstein_files: EpsteinFiles) -> list[Email]:
61
+ """Returns emails that were printed (may contain dupes if printed for both author and recipient)."""
60
62
  print_section_header(('Selections from ' if not args.all_emails else '') + 'His Emails')
61
63
  print_all_files_page_link(epstein_files)
62
64
  emailers_to_print: list[str | None]
@@ -64,8 +66,8 @@ def print_emails(epstein_files: EpsteinFiles) -> int:
64
66
  already_printed_emails: list[Email] = []
65
67
  num_emails_printed_since_last_color_key = 0
66
68
 
67
- if specified_names:
68
- emailers_to_print = specified_names
69
+ if args.names:
70
+ emailers_to_print = args.names
69
71
  else:
70
72
  print_centered(Padding(epstein_files.table_of_emailers(), (2, 0)))
71
73
 
@@ -97,7 +99,7 @@ def print_emails(epstein_files: EpsteinFiles) -> int:
97
99
  for name in DEFAULT_EMAILER_TABLES:
98
100
  epstein_files.print_emails_table_for(name)
99
101
 
100
- if not specified_names:
102
+ if not args.names:
101
103
  epstein_files.print_email_device_info()
102
104
 
103
105
  if args.all_emails:
@@ -106,7 +108,7 @@ def print_emails(epstein_files: EpsteinFiles) -> int:
106
108
  fwded_articles = [e for e in already_printed_emails if e.config and e.is_fwded_article()]
107
109
  log_msg = f"Rewrote {len(Email.rewritten_header_ids)} of {len(already_printed_emails)} email headers"
108
110
  logger.warning(f"{log_msg}, {len(fwded_articles)} of the emails were forwarded articles.")
109
- return len(already_printed_emails)
111
+ return already_printed_emails
110
112
 
111
113
 
112
114
  def print_json_files(epstein_files: EpsteinFiles):
@@ -136,6 +138,39 @@ def print_json_stats(epstein_files: EpsteinFiles) -> None:
136
138
  print_json("count_by_month", count_by_month(epstein_files.all_documents()))
137
139
 
138
140
 
141
+ def print_other_files_section(files: list[OtherFile], epstein_files: EpsteinFiles) -> None:
142
+ """Returns the OtherFile objects that were interesting enough to print."""
143
+ category_table = OtherFile.count_by_category_table(files)
144
+ other_files_preview_table = OtherFile.files_preview_table(files)
145
+ header_pfx = '' if args.all_other_files else 'Selected '
146
+ print_section_header(f"{FIRST_FEW_LINES} of {len(files)} {header_pfx}Files That Are Neither Emails Nor Text Messages")
147
+
148
+ if args.all_other_files:
149
+ console.line(1)
150
+ else:
151
+ print_all_files_page_link(epstein_files)
152
+ console.line(2)
153
+
154
+ for table in [category_table, other_files_preview_table]:
155
+ table.title = f"{header_pfx}{table.title}"
156
+
157
+ print_centered(category_table)
158
+ console.line(2)
159
+ console.print(other_files_preview_table)
160
+
161
+
162
+ def print_text_messages_section(epstein_files: EpsteinFiles) -> None:
163
+ """Print summary table and stats for text messages."""
164
+ print_section_header('All of His Text Messages')
165
+ print_centered("(conversations are sorted chronologically based on timestamp of first message)\n", style='gray30')
166
+
167
+ for log_file in epstein_files.imessage_logs:
168
+ console.print(Padding(log_file))
169
+ console.line(2)
170
+
171
+ print_centered(MessengerLog.summary_table(epstein_files.imessage_logs))
172
+
173
+
139
174
  def write_json_metadata(epstein_files: EpsteinFiles) -> None:
140
175
  json_str = epstein_files.json_metadata()
141
176
 
@@ -14,7 +14,7 @@ from epstein_files.util.constant.common_words import COMMON_WORDS_LIST, COMMON_W
14
14
  from epstein_files.util.constant.names import OTHER_NAMES
15
15
  from epstein_files.util.constant.output_files import WORD_COUNT_HTML_PATH
16
16
  from epstein_files.util.data import ALL_NAMES, flatten, sort_dict
17
- from epstein_files.util.env import args, specified_names
17
+ from epstein_files.util.env import args
18
18
  from epstein_files.util.logging import logger
19
19
  from epstein_files.util.rich import (console, highlighter, print_centered, print_color_key, print_page_title,
20
20
  print_panel, print_starred_header, write_html)
@@ -201,7 +201,7 @@ def write_word_counts_html() -> None:
201
201
  emails = [e for e in epstein_files.non_duplicate_emails() if not (e.is_junk_mail() or e.is_fwded_article())]
202
202
 
203
203
  for email in emails:
204
- if specified_names and email.author not in specified_names:
204
+ if args.names and email.author not in args.names:
205
205
  continue
206
206
 
207
207
  logger.info(f"Counting words in {email}\n [SUBJECT] {email.subject()}")
@@ -218,14 +218,12 @@ def write_word_counts_html() -> None:
218
218
  for word in line.split():
219
219
  word_count.tally_word(word, SearchResult(email, [MatchedLine(line, i)]))
220
220
 
221
- # Add in iMessage conversation words
222
- imessage_logs = epstein_files.imessage_logs_for(specified_names) if specified_names else epstein_files.imessage_logs
223
-
224
- for imessage_log in imessage_logs:
221
+ # Add in iMessage conversations
222
+ for imessage_log in epstein_files.imessage_logs:
225
223
  logger.info(f"Counting words in {imessage_log}")
226
224
 
227
225
  for i, msg in enumerate(imessage_log.messages):
228
- if specified_names and msg.author not in specified_names:
226
+ if args.names and msg.author not in args.names:
229
227
  continue
230
228
  elif HTML_REGEX.search(line):
231
229
  continue
@@ -234,7 +232,7 @@ def write_word_counts_html() -> None:
234
232
  word_count.tally_word(word, SearchResult(imessage_log, [MatchedLine(msg.text, i)]))
235
233
 
236
234
  print_page_title(expand=False)
237
- print_starred_header(f"Most Common Words in {len(emails):,} Emails and {len(imessage_logs)} iMessage Logs")
235
+ print_starred_header(f"Most Common Words in {len(emails):,} Emails and {len(epstein_files.imessage_logs)} iMessage Logs")
238
236
  print_centered(f"(excluding {len(COMMON_WORDS_LIST)} particularly common words at bottom)", style='dim')
239
237
  console.line()
240
238
  print_color_key()
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "epstein-files"
3
- version = "1.0.14"
3
+ version = "1.0.15"
4
4
  description = "Tools for working with the Jeffrey Epstein documents released in November 2025."
5
5
  authors = ["Michel de Cryptadamus"]
6
6
  readme = "README.md"
File without changes
File without changes