epstein-files 1.0.11__tar.gz → 1.0.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {epstein_files-1.0.11 → epstein_files-1.0.13}/PKG-INFO +1 -1
  2. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/__init__.py +3 -3
  3. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/communication.py +2 -2
  4. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/document.py +43 -69
  5. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/email.py +48 -6
  6. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/imessage/text_message.py +1 -1
  7. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/json_file.py +1 -1
  8. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/messenger_log.py +3 -3
  9. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/other_file.py +2 -2
  10. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/epstein_files.py +27 -12
  11. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/constant/names.py +12 -9
  12. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/constant/strings.py +2 -1
  13. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/constant/urls.py +13 -8
  14. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/constants.py +21 -15
  15. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/data.py +1 -1
  16. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/doc_cfg.py +20 -42
  17. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/file_helper.py +3 -9
  18. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/highlighted_group.py +32 -21
  19. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/logging.py +1 -1
  20. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/output.py +1 -1
  21. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/rich.py +11 -2
  22. {epstein_files-1.0.11 → epstein_files-1.0.13}/pyproject.toml +1 -1
  23. {epstein_files-1.0.11 → epstein_files-1.0.13}/LICENSE +0 -0
  24. {epstein_files-1.0.11 → epstein_files-1.0.13}/README.md +0 -0
  25. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/documents/emails/email_header.py +0 -0
  26. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/constant/common_words.py +0 -0
  27. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/constant/html.py +0 -0
  28. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/constant/output_files.py +0 -0
  29. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/env.py +0 -0
  30. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/search_result.py +0 -0
  31. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/timer.py +0 -0
  32. {epstein_files-1.0.11 → epstein_files-1.0.13}/epstein_files/util/word_count.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: epstein-files
3
- Version: 1.0.11
3
+ Version: 1.0.13
4
4
  Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
5
5
  Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
6
6
  License: GPL-3.0-or-later
@@ -20,8 +20,8 @@ from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_
20
20
  from epstein_files.util.env import args, specified_names
21
21
  from epstein_files.util.file_helper import coerce_file_path, extract_file_id
22
22
  from epstein_files.util.logging import logger
23
- from epstein_files.util.output import (print_emails, print_json_files, print_json_metadata, print_json_stats,
24
- print_text_messages, write_urls)
23
+ from epstein_files.util.output import (print_emails, print_json_files, print_json_stats,
24
+ print_text_messages, write_json_metadata, write_urls)
25
25
  from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
26
26
  from epstein_files.util.timer import Timer
27
27
  from epstein_files.util.word_count import write_word_counts_html
@@ -37,7 +37,7 @@ def generate_html() -> None:
37
37
  epstein_files = EpsteinFiles.get_files(timer)
38
38
 
39
39
  if args.json_metadata:
40
- print_json_metadata(epstein_files)
40
+ write_json_metadata(epstein_files)
41
41
  exit()
42
42
  elif args.json_files:
43
43
  print_json_files(epstein_files)
@@ -34,9 +34,9 @@ class Communication(Document):
34
34
  def is_attribution_uncertain(self) -> bool:
35
35
  return bool(self.config and self.config.is_attribution_uncertain)
36
36
 
37
- def raw_document_link_txt(self, _style: str = '', include_alt_link: bool = True) -> Text:
37
+ def external_links(self, _style: str = '', include_alt_links: bool = True) -> Text:
38
38
  """Overrides super() method to apply self.author_style."""
39
- return super().raw_document_link_txt(self.author_style, include_alt_link=include_alt_link)
39
+ return super().external_links(self.author_style, include_alt_links=include_alt_links)
40
40
 
41
41
  def summary(self) -> Text:
42
42
  return self._summary().append(CLOSE_PROPERTIES_CHAR)
@@ -5,7 +5,7 @@ from dataclasses import asdict, dataclass, field
5
5
  from datetime import datetime
6
6
  from pathlib import Path
7
7
  from subprocess import run
8
- from typing import ClassVar, Sequence, TypeVar
8
+ from typing import Callable, ClassVar, Sequence, TypeVar
9
9
 
10
10
  from rich.console import Console, ConsoleOptions, Group, RenderResult
11
11
  from rich.padding import Padding
@@ -16,8 +16,8 @@ from epstein_files.util.constant.names import *
16
16
  from epstein_files.util.constant.strings import *
17
17
  from epstein_files.util.constant.urls import *
18
18
  from epstein_files.util.constants import ALL_FILE_CONFIGS, FALLBACK_TIMESTAMP
19
- from epstein_files.util.data import collapse_newlines, date_str, iso_timestamp, patternize, without_falsey
20
- from epstein_files.util.doc_cfg import EmailCfg, DocCfg, Metadata, TextCfg
19
+ from epstein_files.util.data import collapse_newlines, date_str, patternize, remove_zero_time_from_timestamp_str, without_falsey
20
+ from epstein_files.util.doc_cfg import DUPE_TYPE_STRS, EmailCfg, DocCfg, Metadata, TextCfg
21
21
  from epstein_files.util.env import DOCS_DIR, args
22
22
  from epstein_files.util.file_helper import (file_stem_for_id, extract_file_id, file_size,
23
23
  file_size_str, is_local_extract_file)
@@ -31,10 +31,8 @@ INFO_INDENT = 2
31
31
  INFO_PADDING = (0, 0, 0, INFO_INDENT)
32
32
  MAX_TOP_LINES_LEN = 4000 # Only for logging
33
33
  MIN_DOCUMENT_ID = 10477
34
- LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
35
34
  WHITESPACE_REGEX = re.compile(r"\s{2,}|\t|\n", re.MULTILINE)
36
35
 
37
- EXTRACTED_FROM = 'Extracted from'
38
36
  MIN_TIMESTAMP = datetime(1991, 1, 1)
39
37
  MID_TIMESTAMP = datetime(2007, 1, 1)
40
38
  MAX_TIMESTAMP = datetime(2020, 1, 1)
@@ -96,15 +94,9 @@ class Document:
96
94
  def __post_init__(self):
97
95
  self.filename = self.file_path.name
98
96
  self.file_id = extract_file_id(self.filename)
99
- self.config = deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
97
+ self.config = self.config or deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
100
98
 
101
- if self.is_local_extract_file():
102
- self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
103
- extracted_from_doc_id = self.url_slug.split('_')[-1]
104
-
105
- if extracted_from_doc_id in ALL_FILE_CONFIGS:
106
- self._set_extract_config(deepcopy(ALL_FILE_CONFIGS[extracted_from_doc_id]))
107
- else:
99
+ if 'url_slug' not in vars(self):
108
100
  self.url_slug = self.file_path.stem
109
101
 
110
102
  self._set_computed_fields(text=self.text or self._load_file())
@@ -122,28 +114,51 @@ class Document:
122
114
 
123
115
  def duplicate_file_txt(self) -> Text:
124
116
  """If the file is a dupe make a nice message to explain what file it's a duplicate of."""
125
- if not self.config or not self.config.dupe_of_id:
117
+ if not self.config or not self.config.dupe_of_id or self.config.dupe_type is None:
126
118
  raise RuntimeError(f"duplicate_file_txt() called on {self.summary()} but not a dupe! config:\n\n{self.config}")
127
119
 
128
120
  txt = Text(f"Not showing ", style=INFO_STYLE).append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
129
- txt.append(f" because it's {self.config.duplicate_reason()} ")
121
+ txt.append(f" because it's {DUPE_TYPE_STRS[self.config.dupe_type]} ")
130
122
  return txt.append(epstein_media_doc_link_txt(self.config.dupe_of_id, style='royal_blue1'))
131
123
 
132
124
  def epsteinify_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
133
- """Create a Text obj link to this document on epsteinify.com."""
134
- return link_text_obj(epsteinify_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
125
+ return self.external_url(epsteinify_doc_url, style, link_txt)
135
126
 
136
127
  def epstein_media_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
137
- """Create a Text obj link to this document on epstein.media."""
138
- return link_text_obj(epstein_media_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
128
+ return self.external_url(epstein_media_doc_url, style, link_txt)
139
129
 
140
130
  def epstein_web_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
141
- """Create a Text obj link to this document on EpsteinWeb."""
142
- return link_text_obj(epstein_web_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
131
+ return self.external_url(epstein_web_doc_url, style, link_txt)
132
+
133
+ def rollcall_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
134
+ return self.external_url(rollcall_doc_url, style, link_txt)
135
+
136
+ def external_url(self, fxn: Callable[[str], str], style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
137
+ return link_text_obj(fxn(self.url_slug), link_txt or self.file_path.stem, style)
138
+
139
+ def external_links(self, style: str = '', include_alt_links: bool = False) -> Text:
140
+ """Returns colored links to epstein.media and and epsteinweb in a Text object."""
141
+ txt = Text('', style='white' if include_alt_links else ARCHIVE_LINK_COLOR)
142
+
143
+ if args.use_epstein_web:
144
+ txt.append(self.epstein_web_link(style=style))
145
+ alt_link = self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)
146
+ else:
147
+ txt.append(self.epstein_media_link(style=style))
148
+ alt_link = self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)
149
+
150
+ if include_alt_links:
151
+ txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
152
+ txt.append(' (').append(alt_link).append(')')
153
+
154
+ if self._class_name() == 'Email':
155
+ txt.append(' (').append(self.rollcall_link(style='white dim', link_txt=ROLLCALL)).append(')')
156
+
157
+ return txt
143
158
 
144
159
  def file_info_panel(self) -> Group:
145
160
  """Panel with filename linking to raw file plus any additional info about the file."""
146
- panel = Panel(self.raw_document_link_txt(include_alt_link=True), border_style=self._border_style(), expand=False)
161
+ panel = Panel(self.external_links(include_alt_links=True), border_style=self._border_style(), expand=False)
147
162
  padded_info = [Padding(sentence, INFO_PADDING) for sentence in self.info()]
148
163
  return Group(*([panel] + padded_info))
149
164
 
@@ -155,12 +170,10 @@ class Document:
155
170
 
156
171
  def info(self) -> list[Text]:
157
172
  """0 to 2 sentences containing the info_txt() as well as any configured description."""
158
- sentences = [
173
+ return without_falsey([
159
174
  self.info_txt(),
160
175
  highlighter(Text(self.config_description(), style=INFO_STYLE)) if self.config_description() else None
161
- ]
162
-
163
- return without_falsey(sentences)
176
+ ])
164
177
 
165
178
  def info_txt(self) -> Text | None:
166
179
  """Secondary info about this file (recipients, level of certainty, etc). Overload in subclasses."""
@@ -197,9 +210,9 @@ class Document:
197
210
 
198
211
  if self.is_local_extract_file():
199
212
  metadata['extracted_file'] = {
200
- 'explanation': 'This file was extracted from a court filing, not distributed directly. A copy can be found on github.',
201
- 'extracted_from_file': self.url_slug + '.txt',
202
- 'extracted_file_url': extracted_file_url(self.filename),
213
+ 'explanation': 'Manually extracted from one of the court filings.',
214
+ 'extracted_from': self.url_slug + '.txt',
215
+ 'url': extracted_file_url(self.filename),
203
216
  }
204
217
 
205
218
  return metadata
@@ -208,25 +221,6 @@ class Document:
208
221
  with open(self.file_path) as f:
209
222
  return f.read()
210
223
 
211
- def raw_document_link_txt(self, style: str = '', include_alt_link: bool = False) -> Text:
212
- """Returns colored links to epstein.media and and epsteinweb in a Text object."""
213
- txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
214
-
215
- if args.use_epstein_web:
216
- txt.append(self.epstein_web_link(style=style))
217
-
218
- if include_alt_link:
219
- txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
220
- txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
221
- else:
222
- txt.append(self.epstein_media_link(style=style))
223
-
224
- if include_alt_link:
225
- txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
226
- txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
227
-
228
- return txt
229
-
230
224
  def repair_ocr_text(self, repairs: dict[str | re.Pattern, str], text: str) -> str:
231
225
  """Apply a dict of repairs (key is pattern or string, value is replacement string) to text."""
232
226
  for k, v in repairs.items():
@@ -253,7 +247,7 @@ class Document:
253
247
  txt.append(f" {self.url_slug}", style=FILENAME_STYLE)
254
248
 
255
249
  if self.timestamp:
256
- timestamp_str = iso_timestamp(self.timestamp).removesuffix(' 00:00:00')
250
+ timestamp_str = remove_zero_time_from_timestamp_str(self.timestamp).replace('T', ' ')
257
251
  txt.append(' (', style=SYMBOL_STYLE)
258
252
  txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
259
253
 
@@ -327,26 +321,6 @@ class Document:
327
321
  self.lines = [line.strip() if self.strip_whitespace else line for line in self.text.split('\n')]
328
322
  self.num_lines = len(self.lines)
329
323
 
330
- def _set_extract_config(self, doc_cfg: DocCfg | EmailCfg) -> None:
331
- """Copy info from original config for file this document was extracted from."""
332
- if self.config:
333
- self.warn(f"Merging existing config with config for file this document was extracted from")
334
- else:
335
- self.config = EmailCfg(id=self.file_id)
336
-
337
- extracted_from_description = doc_cfg.complete_description()
338
-
339
- if extracted_from_description:
340
- extracted_description = f"{EXTRACTED_FROM} {extracted_from_description}"
341
-
342
- if self.config.description:
343
- self.warn(f"Overwriting description '{self.config.description}' with extract description '{doc_cfg.description}'")
344
-
345
- self.config.description = extracted_description
346
-
347
- self.config.is_interesting = self.config.is_interesting or doc_cfg.is_interesting
348
- self.warn(f"Constructed local config\n{self.config}")
349
-
350
324
  def _write_clean_text(self, output_path: Path) -> None:
351
325
  """Write self.text to 'output_path'. Used only for diffing files."""
352
326
  if output_path.exists():
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import re
3
+ from copy import deepcopy
3
4
  from dataclasses import asdict, dataclass, field
4
5
  from datetime import datetime
5
6
  from typing import ClassVar, cast
@@ -21,6 +22,7 @@ from epstein_files.util.constants import *
21
22
  from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes, extract_last_name,
22
23
  flatten, remove_timezone, uniquify)
23
24
  from epstein_files.util.doc_cfg import EmailCfg, Metadata
25
+ from epstein_files.util.file_helper import extract_file_id, file_stem_for_id
24
26
  from epstein_files.util.highlighted_group import get_style_for_name
25
27
  from epstein_files.util.logging import logger
26
28
  from epstein_files.util.rich import *
@@ -35,9 +37,11 @@ REPLY_TEXT_REGEX = re.compile(rf"^(.*?){REPLY_LINE_PATTERN}", re.DOTALL | re.IGN
35
37
  BAD_TIMEZONE_REGEX = re.compile(fr'\((UTC|GMT\+\d\d:\d\d)\)|{REDACTED}')
36
38
  DATE_HEADER_REGEX = re.compile(r'(?:Date|Sent):? +(?!by|from|to|via)([^\n]{6,})\n')
37
39
  TIMESTAMP_LINE_REGEX = re.compile(r"\d+:\d+")
40
+ LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
38
41
 
39
42
  SUPPRESS_LOGS_FOR_AUTHORS = ['Undisclosed recipients:', 'undisclosed-recipients:', 'Multiple Senders Multiple Senders']
40
43
  REWRITTEN_HEADER_MSG = "(janky OCR header fields were prettified, check source if something seems off)"
44
+ APPEARS_IN = 'Appears in'
41
45
  MAX_CHARS_TO_PRINT = 4000
42
46
  MAX_NUM_HEADER_LINES = 14
43
47
  MAX_QUOTED_REPLIES = 2
@@ -128,7 +132,6 @@ JUNK_EMAILERS = [
128
132
  'How To Academy',
129
133
  'Jokeland',
130
134
  JP_MORGAN_USGIO,
131
- 'Saved by Internet Explorer 11',
132
135
  ]
133
136
 
134
137
  MAILING_LISTS = [
@@ -248,6 +251,7 @@ KRASSNER_RECIPIENTS = uniquify(flatten([ALL_FILE_CONFIGS[id].recipients for id i
248
251
 
249
252
  # No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
250
253
  USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIPIENTS + [
254
+ 'Alan Dlugash', # CCed with Richard Kahn
251
255
  'Alan Rogers', # Random CC
252
256
  'Andrew Friendly', # Presumably some relation of Kelly Friendly
253
257
  'BS Stern', # A random fwd of email we have
@@ -264,14 +268,14 @@ USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIP
264
268
  'Lyn Fontanilla', # Random CC
265
269
  'Mark Albert', # Random CC
266
270
  'Matthew Schafer', # Random CC
271
+ MICHAEL_BUCHHOLTZ, # Terry Kafka CC
272
+ 'Nancy Dahl', # covered by Lawrence Krauss (her husband)
267
273
  'Michael Simmons', # Random CC
268
274
  'Nancy Portland', # Lawrence Krauss CC
269
275
  'Oliver Goodenough', # Robert Trivers CC
270
- 'Owen Blicksilver', # Landon Thomas CC
271
276
  'Peter Aldhous', # Lawrence Krauss CC
272
277
  'Sam Harris', # Lawrence Krauss CC
273
278
  SAMUEL_LEFF, # Random CC
274
- "Saved by Internet Explorer 11",
275
279
  'Sean T Lehane', # Random CC
276
280
  'Stephen Rubin', # Random CC
277
281
  'Tim Kane', # Random CC
@@ -318,6 +322,17 @@ class Email(Communication):
318
322
  rewritten_header_ids: ClassVar[set[str]] = set([])
319
323
 
320
324
  def __post_init__(self):
325
+ self.filename = self.file_path.name
326
+ self.file_id = extract_file_id(self.filename)
327
+
328
+ # Special handling for copying properties out of the config for the document this one was extracted from
329
+ if self.is_local_extract_file():
330
+ self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
331
+ extracted_from_doc_id = self.url_slug.split('_')[-1]
332
+
333
+ if extracted_from_doc_id in ALL_FILE_CONFIGS:
334
+ self._set_config_for_extracted_file(ALL_FILE_CONFIGS[extracted_from_doc_id])
335
+
321
336
  super().__post_init__()
322
337
 
323
338
  try:
@@ -340,8 +355,12 @@ class Email(Communication):
340
355
  self.actual_text = self._actual_text()
341
356
  self.sent_from_device = self._sent_from_device()
342
357
 
358
+ def attachments(self) -> list[str]:
359
+ return (self.header.attachments or '').split(';')
360
+
343
361
  def info_txt(self) -> Text:
344
- txt = Text("OCR text of email from ", style='grey46').append(self.author_txt).append(' to ')
362
+ email_type = 'fwded article' if self.is_fwded_article() else 'email'
363
+ txt = Text(f"OCR text of {email_type} from ", style='grey46').append(self.author_txt).append(' to ')
345
364
  return txt.append(self._recipients_txt()).append(highlighter(f" probably sent at {self.timestamp}"))
346
365
 
347
366
  def is_fwded_article(self) -> bool:
@@ -566,11 +585,11 @@ class Email(Communication):
566
585
  self._merge_lines(2, 5)
567
586
  elif self.file_id in ['029498', '031428']:
568
587
  self._merge_lines(2, 4)
569
- elif self.file_id in ['029976', '023067']:
588
+ elif self.file_id in ['029976', '023067', '033576']:
570
589
  self._merge_lines(3) # Merge 4th and 5th rows
571
590
  elif self.file_id in '026609 029402 032405 022695'.split():
572
591
  self._merge_lines(4) # Merge 5th and 6th rows
573
- elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381']:
592
+ elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381', '033357']:
574
593
  self._merge_lines(2, 4)
575
594
  elif self.file_id in ['029154', '029163']:
576
595
  self._merge_lines(2, 5)
@@ -591,6 +610,8 @@ class Email(Communication):
591
610
  self._merge_lines(7, 9)
592
611
  elif self.file_id == '030299':
593
612
  self._merge_lines(7, 10)
613
+ elif self.file_id in ['022673', '022684']:
614
+ self._merge_lines(9)
594
615
  elif self.file_id == '014860':
595
616
  self._merge_lines(3)
596
617
  self._merge_lines(4)
@@ -649,6 +670,27 @@ class Email(Communication):
649
670
  sent_from = sent_from_match.group(0)
650
671
  return 'S' + sent_from[1:] if sent_from.startswith('sent') else sent_from
651
672
 
673
+ def _set_config_for_extracted_file(self, extracted_from_doc_cfg: DocCfg) -> None:
674
+ """Copy info from original config for file this document was extracted from."""
675
+ if self.file_id in ALL_FILE_CONFIGS:
676
+ self.config = cast(EmailCfg, deepcopy(ALL_FILE_CONFIGS[self.file_id]))
677
+ self.warn(f"Merging existing config for {self.file_id} with config for file this document was extracted from")
678
+ else:
679
+ self.config = EmailCfg(id=self.file_id)
680
+
681
+ extracted_from_description = extracted_from_doc_cfg.complete_description()
682
+
683
+ if extracted_from_description:
684
+ extracted_description = f"{APPEARS_IN} {extracted_from_description}"
685
+
686
+ if self.config.description:
687
+ self.warn(f"Overwriting description '{self.config.description}' with extract description '{self.config.description}'")
688
+
689
+ self.config.description = extracted_description
690
+
691
+ self.config.is_interesting = self.config.is_interesting or extracted_from_doc_cfg.is_interesting
692
+ self.warn(f"Constructed synthetic config: {self.config}")
693
+
652
694
  def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
653
695
  logger.debug(f"Printing '{self.filename}'...")
654
696
  yield self.file_info_panel()
@@ -45,7 +45,7 @@ class TextMessage:
45
45
  self.author_str = self.author_str or self.author
46
46
 
47
47
  if not self.id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
48
- self.author_str = self.author + ' (?)'
48
+ self.author_str += ' (?)'
49
49
 
50
50
  def timestamp(self) -> datetime:
51
51
  return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
@@ -39,7 +39,7 @@ class JsonFile(OtherFile):
39
39
  return JSON
40
40
 
41
41
  def info_txt(self) -> Text | None:
42
- return Text(f"JSON file, seems to contain link unfurl/embed data for iMessage or similar", style=INFO_STYLE)
42
+ return Text(f"JSON file, contains preview data for links sent a messaging app", style=INFO_STYLE)
43
43
 
44
44
  def is_interesting(self):
45
45
  return False
@@ -16,7 +16,7 @@ from epstein_files.util.data import iso_timestamp, listify, sort_dict
16
16
  from epstein_files.util.doc_cfg import Metadata, TextCfg
17
17
  from epstein_files.util.highlighted_group import get_style_for_name
18
18
  from epstein_files.util.logging import logger
19
- from epstein_files.util.rich import build_table, highlighter
19
+ from epstein_files.util.rich import LAST_TIMESTAMP_STYLE, build_table, highlighter
20
20
 
21
21
  CONFIRMED_MSG = 'Found confirmed counterparty'
22
22
  GUESSED_MSG = 'This is probably a conversation with'
@@ -76,7 +76,7 @@ class MessengerLog(Communication):
76
76
  is_phone_number = author_str.startswith('+')
77
77
 
78
78
  if is_phone_number:
79
- logger.warning(f"{self.summary()} Found phone number: {author_str}")
79
+ logger.info(f"{self.summary()} Found phone number: {author_str}")
80
80
  self.phone_number = author_str
81
81
 
82
82
  # If the Sender: is redacted or if it's an unredacted phone number that means it's from self.author
@@ -130,7 +130,7 @@ class MessengerLog(Communication):
130
130
  counts_table.add_column('Files', justify='right', style='white')
131
131
  counts_table.add_column("Msgs", justify='right')
132
132
  counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
133
- counts_table.add_column('Last Sent At', justify='center', style='wheat4', width=21)
133
+ counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE, width=21)
134
134
  counts_table.add_column('Days', justify='right', style='dim')
135
135
 
136
136
  for name, count in sort_dict(cls.count_authors(imessage_logs)):
@@ -107,7 +107,7 @@ UNINTERESTING_PREFIXES = FINANCIAL_REPORTS_AUTHORS + [
107
107
  TEXT_OF_US_LAW,
108
108
  TRANSLATION,
109
109
  TWEET,
110
- THE_REAL_DEAL_ARTICLE,
110
+ REAL_DEAL_ARTICLE,
111
111
  TRUMP_DISCLOSURES,
112
112
  UBS_CIO_REPORT,
113
113
  UN_GENERAL_ASSEMBLY,
@@ -240,7 +240,7 @@ class OtherFile(Document):
240
240
  table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
241
241
 
242
242
  for file in files:
243
- link_and_info = [file.raw_document_link_txt()]
243
+ link_and_info = [file.external_links()]
244
244
  date_str = file.date_str()
245
245
 
246
246
  if file.is_duplicate():
@@ -23,12 +23,12 @@ from epstein_files.util.constant.strings import *
23
23
  from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL, epstein_media_person_url,
24
24
  epsteinify_name_url, epstein_web_person_url, search_jmail_url, search_twitter_url)
25
25
  from epstein_files.util.constants import *
26
- from epstein_files.util.data import dict_sets_to_lists, json_safe, listify, sort_dict
26
+ from epstein_files.util.data import dict_sets_to_lists, iso_timestamp, json_safe, listify, sort_dict
27
27
  from epstein_files.util.doc_cfg import EmailCfg, Metadata
28
28
  from epstein_files.util.env import DOCS_DIR, args, logger
29
29
  from epstein_files.util.file_helper import file_size_str
30
30
  from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
31
- from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, add_cols_to_table,
31
+ from epstein_files.util.rich import (DEFAULT_NAME_STYLE, LAST_TIMESTAMP_STYLE, NA_TXT, add_cols_to_table,
32
32
  build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
33
33
  print_other_site_link, print_panel, print_section_header, vertically_pad)
34
34
  from epstein_files.util.search_result import SearchResult
@@ -278,25 +278,40 @@ class EpsteinFiles:
278
278
  def print_emailer_counts_table(self) -> None:
279
279
  footer = f"Identified authors of {self.attributed_email_count():,} out of {len(self.emails):,} emails ."
280
280
  counts_table = build_table("Email Counts", caption=footer)
281
- add_cols_to_table(counts_table, ['Name', 'Count', 'Sent', "Recv'd", JMAIL, EPSTEIN_MEDIA, EPSTEIN_WEB, 'Twitter'])
281
+
282
+ add_cols_to_table(counts_table, [
283
+ 'Name',
284
+ 'Num',
285
+ 'Sent',
286
+ "Recv",
287
+ {'name': 'First', 'highlight': True},
288
+ {'name': 'Last', 'style': LAST_TIMESTAMP_STYLE},
289
+ JMAIL,
290
+ 'eMedia',
291
+ 'eWeb',
292
+ 'Twitter',
293
+ ])
282
294
 
283
295
  emailer_counts = {
284
296
  emailer: self.email_author_counts[emailer] + self.email_recipient_counts[emailer]
285
297
  for emailer in self.all_emailers(True)
286
298
  }
287
299
 
288
- for p, count in sort_dict(emailer_counts):
289
- style = get_style_for_name(p, default_style=DEFAULT_NAME_STYLE)
300
+ for name, count in sort_dict(emailer_counts):
301
+ style = get_style_for_name(name, default_style=DEFAULT_NAME_STYLE)
302
+ emails = self.emails_for(name)
290
303
 
291
304
  counts_table.add_row(
292
- Text.from_markup(link_markup(epsteinify_name_url(p or UNKNOWN), p or UNKNOWN, style)),
305
+ Text.from_markup(link_markup(epsteinify_name_url(name or UNKNOWN), name or UNKNOWN, style)),
293
306
  str(count),
294
- str(self.email_author_counts[p]),
295
- str(self.email_recipient_counts[p]),
296
- '' if p is None else link_text_obj(search_jmail_url(p), JMAIL),
297
- '' if not is_ok_for_epstein_web(p) else link_text_obj(epstein_media_person_url(p), EPSTEIN_MEDIA),
298
- '' if not is_ok_for_epstein_web(p) else link_text_obj(epstein_web_person_url(p), EPSTEIN_WEB),
299
- '' if p is None else link_text_obj(search_twitter_url(p), 'search X'),
307
+ str(self.email_author_counts[name]),
308
+ str(self.email_recipient_counts[name]),
309
+ emails[0].timestamp_without_seconds(),
310
+ emails[-1].timestamp_without_seconds(),
311
+ '' if name is None else link_text_obj(search_jmail_url(name), JMAIL),
312
+ '' if not is_ok_for_epstein_web(name) else link_text_obj(epstein_media_person_url(name), 'eMedia'),
313
+ '' if not is_ok_for_epstein_web(name) else link_text_obj(epstein_web_person_url(name), 'eWeb'),
314
+ '' if name is None else link_text_obj(search_twitter_url(name), 'search X'),
300
315
  )
301
316
 
302
317
  console.print(vertically_pad(counts_table, 2))
@@ -42,6 +42,7 @@ CECILE_DE_JONGH = 'Cecile de Jongh'
42
42
  CECILIA_STEEN = 'Cecilia Steen'
43
43
  CELINA_DUBIN = 'Celina Dubin'
44
44
  CHRISTINA_GALBRAITH = 'Christina Galbraith' # Works with Tyler Shears on reputation stuff
45
+ DANGENE_AND_JENNIE_ENTERPRISE = 'Dangene and Jennie Enterprise'
45
46
  DANIEL_SABBA = 'Daniel Sabba'
46
47
  DANIEL_SIAD = 'Daniel Siad'
47
48
  DANNY_FROST = 'Danny Frost'
@@ -143,7 +144,7 @@ REID_HOFFMAN = 'Reid Hoffman'
143
144
  REID_WEINGARTEN = 'Reid Weingarten'
144
145
  RENATA_BOLOTOVA = 'Renata Bolotova'
145
146
  RICHARD_KAHN = 'Richard Kahn'
146
- ROBERT_D_CRITTON = 'Robert D. Critton Jr.'
147
+ ROBERT_D_CRITTON_JR = 'Robert D. Critton Jr.'
147
148
  ROBERT_LAWRENCE_KUHN = 'Robert Lawrence Kuhn'
148
149
  ROBERT_TRIVERS = 'Robert Trivers'
149
150
  ROGER_SCHANK = 'Roger Schank'
@@ -178,6 +179,7 @@ JARED_KUSHNER = 'Jared Kushner'
178
179
  JULIE_K_BROWN = 'Julie K. Brown'
179
180
  KARIM_SADJADPOUR = 'KARIM SADJADPOUR'.title()
180
181
  MICHAEL_J_BOCCIO = 'Michael J. Boccio'
182
+ NERIO_ALESSANDRI = 'Nerio Alessandri (Founder and Chairman of Technogym S.p.A. Italy)'
181
183
  PAUL_G_CASSELL = 'Paul G. Cassell'
182
184
  RUDY_GIULIANI = 'Rudy Giuliani'
183
185
  TULSI_GABBARD = 'Tulsi Gabbard'
@@ -226,22 +228,23 @@ NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
226
228
  # Names to color white in the word counts
227
229
  OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
228
230
  aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
229
- baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bruno bryant burton
231
+ baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
230
232
  chapman charles charlie christopher clint cohen colin collins conway
231
- davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
233
+ danny davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
232
234
  edmond elizabeth emily entwistle erik evelyn
233
- ferguson flachsbart francis franco frank
235
+ ferguson flachsbart francis franco frank frost
234
236
  gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
235
- hancock harold harrison harry helen hirsch hofstadter horowitz hussein
237
+ hancock harold harrison harry hay helen hill hirsch hofstadter horowitz hussein
236
238
  ian isaac isaacson
237
- jamie jane janet jason jen jim joe johnson jones josh julie justin
239
+ james jamie jane janet jason jen jim joe johnson jones josh julie justin
238
240
  karl kate kathy kelly kim kruger kyle
239
- leo leonard lenny leslie lieberman louis lynch lynn
241
+ laurie leo leonard lenny leslie lieberman louis lynch lynn
240
242
  marcus marianne matt matthew melissa michele michelle moore moscowitz
241
- nicole nussbaum
243
+ nancy nicole nussbaum
244
+ owen
242
245
  paulson philippe
243
246
  rafael ray richard richardson rob robin ron rubin rudolph ryan
244
- sara sarah seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
247
+ sara sarah sean seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
245
248
  ted theresa thompson tiffany timothy tony
246
249
  valeria
247
250
  walter warren weinstein weiss william
@@ -20,7 +20,7 @@ POLITICS = 'politics'
20
20
  PROPERTY = 'property'
21
21
  PUBLICIST = 'publicist'
22
22
  REPUTATION = 'reputation'
23
- SKYPE_LOG= 'skype log'
23
+ SKYPE_LOG = 'Skype log'
24
24
  SOCIAL = 'social'
25
25
  SPEECH = 'speech'
26
26
 
@@ -39,6 +39,7 @@ MIAMI_HERALD = 'Miami Herald'
39
39
  NYT = "New York Times"
40
40
  PALM_BEACH_DAILY_NEWS = f'{PALM_BEACH} Daily News'
41
41
  PALM_BEACH_POST = f'{PALM_BEACH} Post'
42
+ SHIMON_POST = 'The Shimon Post'
42
43
  THE_REAL_DEAL = 'The Real Deal'
43
44
  WAPO = 'WaPo'
44
45
  VI_DAILY_NEWS = f'{VIRGIN_ISLANDS} Daily News'
@@ -13,11 +13,12 @@ ARCHIVE_LINK_COLOR = 'slate_blue3'
13
13
  TEXT_LINK = 'text_link'
14
14
 
15
15
  # External site names
16
- ExternalSite = Literal['epstein.media', 'epsteinify', 'EpsteinWeb']
16
+ ExternalSite = Literal['epstein.media', 'epsteinify', 'EpsteinWeb', 'RollCall']
17
17
  EPSTEIN_MEDIA = 'epstein.media'
18
18
  EPSTEIN_WEB = 'EpsteinWeb'
19
19
  EPSTEINIFY = 'epsteinify'
20
20
  JMAIL = 'Jmail'
21
+ ROLLCALL = 'RollCall'
21
22
 
22
23
  GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
23
24
  GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
@@ -41,9 +42,10 @@ EPSTEIN_WEB_URL = 'https://epsteinweb.org'
41
42
  JMAIL_URL = 'https://jmail.world'
42
43
 
43
44
  DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
44
- EPSTEIN_MEDIA: f"{EPSTEIN_MEDIA_URL}/files",
45
- EPSTEIN_WEB: f'{EPSTEIN_WEB_URL}/wp-content/uploads/epstein_evidence/images',
46
- EPSTEINIFY: f"{EPSTEINIFY_URL}/document",
45
+ EPSTEIN_MEDIA: f"{EPSTEIN_MEDIA_URL}/files/",
46
+ EPSTEIN_WEB: f'{EPSTEIN_WEB_URL}/wp-content/uploads/epstein_evidence/images/',
47
+ EPSTEINIFY: f"{EPSTEINIFY_URL}/document/",
48
+ ROLLCALL: f'https://rollcall.com/factbase/epstein/file?id=',
47
49
  }
48
50
 
49
51
 
@@ -53,7 +55,7 @@ epsteinify_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_ma
53
55
  epsteinify_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEINIFY], file_stem)
54
56
  epsteinify_name_url = lambda name: f"{EPSTEINIFY_URL}/?name={urllib.parse.quote(name)}"
55
57
 
56
- epstein_media_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEIN_MEDIA], file_stem, True)
58
+ epstein_media_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEIN_MEDIA], file_stem, 'lower')
57
59
  epstein_media_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEIN_MEDIA, filename_or_id, style)
58
60
  epstein_media_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_markup(epstein_media_doc_link_markup(filename_or_id, style))
59
61
  epstein_media_person_url = lambda person: f"{EPSTEIN_MEDIA_URL}/people/{parameterize(person)}"
@@ -62,16 +64,19 @@ epstein_web_doc_url = lambda file_stem: f"{DOC_LINK_BASE_URLS[EPSTEIN_WEB]}/{fil
62
64
  epstein_web_person_url = lambda person: f"{EPSTEIN_WEB_URL}/{parameterize(person)}"
63
65
  epstein_web_search_url = lambda s: f"{EPSTEIN_WEB_URL}/?ewmfileq={urllib.parse.quote(s)}&ewmfilepp=20"
64
66
 
67
+ rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL], file_stem, 'title')
68
+
65
69
  search_archive_url = lambda txt: f"{COURIER_NEWSROOM_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
66
70
  search_coffeezilla_url = lambda txt: f"{COFFEEZILLA_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
67
71
  search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
68
72
  search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
69
73
 
70
74
 
71
- def build_doc_url(base_url: str, filename_or_id: int | str, lowercase: bool = False) -> str:
75
+ def build_doc_url(base_url: str, filename_or_id: int | str, case: Literal['lower', 'title'] | None = None) -> str:
72
76
  file_stem = coerce_file_stem(filename_or_id)
73
- file_stem = file_stem.lower() if lowercase else file_stem
74
- return f"{base_url}/{file_stem}"
77
+ file_stem = file_stem.lower() if case == 'lower' else file_stem
78
+ file_stem = file_stem.title() if case == 'title' else file_stem
79
+ return f"{base_url}{file_stem}"
75
80
 
76
81
 
77
82
  def external_doc_link_markup(site: ExternalSite, filename_or_id: int | str, style: str = TEXT_LINK) -> str:
@@ -65,8 +65,8 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
65
65
  BORIS_NIKOLIC: re.compile(r'(boris )?nikolic?', re.IGNORECASE),
66
66
  BRAD_EDWARDS: re.compile(r'Brad(ley)?(\s*J(.?|ames))?\s*Edwards', re.IGNORECASE),
67
67
  BRAD_KARP: re.compile(r'Brad (S.? )?Karp|Karp, Brad', re.IGNORECASE),
68
- 'Dangene and Jennie Enterprise': re.compile(r'Dangene and Jennie Enterprise?', re.IGNORECASE),
69
- DANNY_FROST: re.compile(r'Frost, Danny|frostd@dany.nyc.gov', re.IGNORECASE),
68
+ DANGENE_AND_JENNIE_ENTERPRISE: re.compile(r'Dangene and Jennie Enterprise?', re.IGNORECASE),
69
+ DANNY_FROST: re.compile(r'Frost, Danny|frostd@dany.nyc.gov|Danny\s*Frost', re.IGNORECASE),
70
70
  DARREN_INDYKE: re.compile(r'darren$|Darren\s*(K\.?\s*)?[il]n[dq]_?yke?|dkiesq', re.IGNORECASE),
71
71
  DAVID_FISZEL: re.compile(r'David\s*Fis?zel', re.IGNORECASE),
72
72
  DAVID_HAIG: re.compile(fr'{DAVID_HAIG}|Haig, David', re.IGNORECASE),
@@ -128,7 +128,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
128
128
  PRINCE_ANDREW: re.compile(r'Prince Andrew|The Duke', re.IGNORECASE),
129
129
  REID_WEINGARTEN: re.compile(r'Weingarten, Rei[cdi]|Rei[cdi] Weingarten', re.IGNORECASE),
130
130
  RICHARD_KAHN: re.compile(r'rich(ard)? kahn?', re.IGNORECASE),
131
- ROBERT_D_CRITTON: re.compile(r'Robert D.? Critton Jr.?', re.IGNORECASE),
131
+ ROBERT_D_CRITTON_JR: re.compile(r'Robert D.? Critton Jr.?', re.IGNORECASE),
132
132
  ROBERT_LAWRENCE_KUHN: re.compile(r'Robert\s*(Lawrence)?\s*Kuhn', re.IGNORECASE),
133
133
  ROBERT_TRIVERS: re.compile(r'tri[vy]ersr@gmail|Robert\s*Trivers?', re.IGNORECASE),
134
134
  ROSS_GOW: re.compile(fr"{ROSS_GOW}|ross@acuityreputation.com", re.IGNORECASE),
@@ -163,6 +163,7 @@ EMAILERS = [
163
163
  DEEPAK_CHOPRA,
164
164
  GLENN_DUBIN,
165
165
  GORDON_GETTY,
166
+ 'Kevin Bright',
166
167
  'Jack Lang',
167
168
  JACK_SCAROLA,
168
169
  JAY_LEFKOWITZ,
@@ -257,7 +258,6 @@ JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
257
258
  LAWRENCE_KRAUSS_ASU_ORIGINS = f"{LAWRENCE_KRAUSS}'s ASU Origins Project"
258
259
  KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
259
260
  MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
260
- NERIO_ALESSANDRI = 'Nerio Alessandri (Founder and Chairman of Technogym S.p.A. Italy)'
261
261
  NIGHT_FLIGHT_BOOK = f'"Night Flight" (draft)'
262
262
  NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
263
263
  OBAMA_JOKE = 'joke about Obama'
@@ -265,12 +265,11 @@ PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} Code Enforcement'
265
265
  PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
266
266
  PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
267
267
  PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
268
- SHIMON_POST = 'The Shimon Post'
268
+ REAL_DEAL_ARTICLE = 'article by Keith Larsen'
269
269
  SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
270
270
  SINGLE_PAGE = 'single page of'
271
271
  STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
272
272
  SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
273
- THE_REAL_DEAL_ARTICLE = 'article by Keith Larsen'
274
273
  TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
275
274
  UBS_CIO_REPORT = 'CIO Monthly Extended report'
276
275
  UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
@@ -500,7 +499,7 @@ EMAILS_CONFIG = [
500
499
  EmailCfg(
501
500
  id='029977',
502
501
  author=LAWRANCE_VISOSKI,
503
- recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE,
502
+ recipients=cast(list[str | None], [JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE),
504
503
  attribution_reason=LARRY_REASON,
505
504
  duplicate_ids=['031129'],
506
505
  ),
@@ -508,7 +507,7 @@ EMAILS_CONFIG = [
508
507
  EmailCfg(id='033488', author=LAWRANCE_VISOSKI, duplicate_ids=['033154']),
509
508
  EmailCfg(id='033309', author=LINDA_STONE, attribution_reason='"Co-authored with iPhone autocorrect"'),
510
509
  EmailCfg(id='017581', author='Lisa Randall', attribution_reason='reply header'),
511
- EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd'),
510
+ EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd, Mark Green is in signature'),
512
511
  EmailCfg(id='030472', author=MARTIN_WEINBERG, attribution_reason='Maybe. in reply', is_attribution_uncertain=True),
513
512
  EmailCfg(id='030235', author=MELANIE_WALKER, attribution_reason='In fwd'),
514
513
  EmailCfg(id='032343', author=MELANIE_WALKER, attribution_reason='Name seen in later reply 032346'),
@@ -573,7 +572,7 @@ EMAILS_CONFIG = [
573
572
  attribution_reason='ends with "Respectfully, terry"',
574
573
  author=TERRY_KAFKA,
575
574
  fwded_text_after='From: Mike Cohen',
576
- recipients=[JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS,
575
+ recipients=cast(list[str | None], [JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS),
577
576
  duplicate_ids=['028482'],
578
577
  ),
579
578
  EmailCfg(id='029992', author=TERRY_KAFKA, attribution_reason='Quoted reply'),
@@ -665,6 +664,10 @@ EMAILS_CONFIG = [
665
664
  EmailCfg(id='029849', is_fwded_article=True, duplicate_ids=['033482']), # Fareed Zakaria: Trump sells America short),
666
665
  EmailCfg(id='032023', is_fwded_article=True, duplicate_ids=['032012']), # American-Israeli Cooperative Enterprise Newsletter
667
666
  EmailCfg(id='021758', is_fwded_article=True, duplicate_ids=['030616']), # Radar Online article about Epstein's early prison release
667
+ EmailCfg(id='031774', is_fwded_article=True), # Krassner fwd of Palmer Report article
668
+ EmailCfg(id='033345', is_fwded_article=True), # Krassner fwd of Palmer Report article
669
+ EmailCfg(id='029903', is_fwded_article=True), # Krassner fwd of Ann Coulter article about Epstein
670
+ EmailCfg(id='030266', is_fwded_article=True), # Krassner fwd of article about Dershowitz
668
671
  EmailCfg(id='030868', is_fwded_article=True), # 'He doesn't like this sh*t': Trump reportedly hates his job and his staff after 1 month
669
672
  EmailCfg(id='026755', is_fwded_article=True), # HuffPo
670
673
  EmailCfg(id='016218', is_fwded_article=True), # AT&T confirms it paid Trump lawyer Cohen for insights on Trump
@@ -710,6 +713,8 @@ EMAILS_CONFIG = [
710
713
  EmailCfg(id='033311', is_fwded_article=True), # 2016 election polls
711
714
  EmailCfg(id='026580', is_fwded_article=True), # NPR: Antigua: Land Of Sun, Sand, And Super Cheap
712
715
  EmailCfg(id='031340', is_fwded_article=True), # Article about Alex Jones threatening Robert Mueller
716
+ EmailCfg(id='030209', is_fwded_article=True), # Atlantic Council Syria: Blackberry Diplomacy
717
+ EmailCfg(id='026605', is_fwded_article=True), # Article about Ruemmler turning down attorney general job by NEDRA PICKLER
713
718
  EmailCfg(id='033297', is_fwded_article=True, duplicate_ids=['033586']), # Sultan Sulayem fwding article about Trump and Russia
714
719
  EmailCfg(id='032475', timestamp=parse('2017-02-15 13:31:25')),
715
720
  EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
@@ -852,9 +857,9 @@ EMAILS_CONFIG = [
852
857
  EmailCfg(id='030015', fwded_text_after='Bill Clinton reportedly'),
853
858
  EmailCfg(id='026312', fwded_text_after='Steve Bannon trying to get on disgraced'),
854
859
  EmailCfg(id='031742', fwded_text_after="Trump's former campaign manager Paul Manafort"),
855
- EmailCfg(id='012197_4', fwded_text_after="Thanks -- Jay"),
856
860
  EmailCfg(id='028925', fwded_text_after='> on Jan 4, 2015'),
857
861
  EmailCfg(id='029773', fwded_text_after='Omar Quadhafi', duplicate_ids=['012685']),
862
+ EmailCfg(id='012197_4', fwded_text_after="Thanks -- Jay"),
858
863
  ]
859
864
 
860
865
 
@@ -925,6 +930,7 @@ OTHER_FILES_ARTICLES = [
925
930
  DocCfg(id='029865', author=LA_TIMES, description=f"front page article about {DEEPAK_CHOPRA} and young Iranians", date='2016-11-05'),
926
931
  DocCfg(id='026598', author=LA_TIMES, description=f"op-ed about why America needs a Ministry of Culture"),
927
932
  DocCfg(id='027024', author=LA_TIMES, description=f"Scientists Create Human Embryos to Make Stem Cells", date='2013-05-15'),
933
+ DocCfg(id='022811', author='Law.com', description='Sarah Ransome Identifies Herself in Epstein Sex Trafficking Case', date='2018-01-09'),
928
934
  DocCfg(id='031776', author='Law360', description=f"article about Michael Avenatti by Andrew Strickler"),
929
935
  DocCfg(id='023102', author=f'Litigation Daily', description=f"article about {REID_WEINGARTEN}", date='2015-09-04'),
930
936
  DocCfg(id='029340', author=f'MarketWatch', description=f'article about estate taxes, particularly Epstein\'s favoured GRATs'),
@@ -1186,7 +1192,7 @@ OTHER_FILES_LEGAL = [
1186
1192
  ]
1187
1193
 
1188
1194
  OTHER_FILES_CONFERENCES = [
1189
- DocCfg(id='014315', author=BOFA_MERRILL, description=f'2016 Future of Financials Conference'),
1195
+ DocCfg(id='014315', author=BOFA_MERRILL, description=f'2016 Future of Financials Conference, attached to 014312'),
1190
1196
  DocCfg(id='026825', author=DEUTSCHE_BANK, description=f"Asset & Wealth Management featured speaker bios"), # Really "Deutsche Asset" which may not be Deutsche Bank?
1191
1197
  DocCfg(id='023123', author=LAWRENCE_KRAUSS_ASU_ORIGINS, description=f"{STRANGE_BEDFELLOWS} (old draft)"),
1192
1198
  DocCfg(id='023120', author=LAWRENCE_KRAUSS_ASU_ORIGINS, description=STRANGE_BEDFELLOWS, duplicate_ids=['023121'], dupe_type='earlier'),
@@ -1335,8 +1341,8 @@ OTHER_FILES_PROPERTY = [
1335
1341
  DocCfg(id='016554', author=PALM_BEACH_CODE_ENFORCEMENT, description='board minutes', date='2008-07-17', duplicate_ids=['016616', '016574']),
1336
1342
  DocCfg(id='016636', author=PALM_BEACH_WATER_COMMITTEE, description=f"Meeting on January 29, 2009"),
1337
1343
  DocCfg(id='022417', author='Park Partners NYC', description=f"letter to partners in real estate project with architectural plans"),
1338
- DocCfg(id='027068', author=THE_REAL_DEAL, description=f"{THE_REAL_DEAL_ARTICLE} Palm House Hotel Bankruptcy and EB-5 Visa Fraud Allegations"),
1339
- DocCfg(id='029520', author=THE_REAL_DEAL, description=f"{THE_REAL_DEAL_ARTICLE} 'Lost Paradise at the Palm House'", date='2019-06-17'),
1344
+ DocCfg(id='027068', author=THE_REAL_DEAL, description=f"{REAL_DEAL_ARTICLE} Palm House Hotel Bankruptcy and EB-5 Visa Fraud Allegations"),
1345
+ DocCfg(id='029520', author=THE_REAL_DEAL, description=f"{REAL_DEAL_ARTICLE} 'Lost Paradise at the Palm House'", date='2019-06-17'),
1340
1346
  DocCfg(id='016597', author='Trump Properties LLC', description=f'appeal of some decision about Mar-a-Lago by {PALM_BEACH} authorities'),
1341
1347
  DocCfg(id='018743', description=f"Las Vegas property listing"),
1342
1348
  DocCfg(id='016695', description=f"{PALM_BEACH} property info (?)"),
@@ -1497,13 +1503,13 @@ OTHER_FILES_MISC = [
1497
1503
  DocCfg(id='032206', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
1498
1504
  DocCfg(id='032208', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
1499
1505
  DocCfg(id='032209', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
1506
+ DocCfg(id='032210', category=SKYPE_LOG, author='linkspirit', is_interesting=True),
1500
1507
  DocCfg(
1501
1508
  id='018224',
1502
1509
  category=SKYPE_LOG,
1503
- description=f'Skype conversations with linkspirit (French?) and {LAWRENCE_KRAUSS}',
1510
+ author=f'linkspirit (French?) and {LAWRENCE_KRAUSS}',
1504
1511
  is_interesting=True, # we don't know who linkspirit is yet
1505
1512
  ),
1506
- DocCfg(id='032210', category=SKYPE_LOG, description=f'Skype conversation with linkspirit', is_interesting=True),
1507
1513
  DocCfg(
1508
1514
  id='025147',
1509
1515
  author=BROCKMAN_INC,
@@ -21,12 +21,12 @@ ALL_NAMES = [v for k, v in vars(names).items() if isinstance(v, str) and CONSTAN
21
21
  PACIFIC_TZ = tz.gettz("America/Los_Angeles")
22
22
  TIMEZONE_INFO = {"PDT": PACIFIC_TZ, "PST": PACIFIC_TZ} # Suppresses annoying warnings from parse() calls
23
23
 
24
-
25
24
  collapse_newlines = lambda text: MULTINEWLINE_REGEX.sub('\n\n', text)
26
25
  date_str = lambda dt: dt.isoformat()[0:10] if dt else None
27
26
  escape_double_quotes = lambda text: text.replace('"', r'\"')
28
27
  escape_single_quotes = lambda text: text.replace("'", r"\'")
29
28
  iso_timestamp = lambda dt: dt.isoformat().replace('T', ' ')
29
+ remove_zero_time_from_timestamp_str = lambda dt: dt.isoformat().removesuffix('T00:00:00')
30
30
  uniquify = lambda _list: list(set(_list))
31
31
  without_falsey = lambda _list: [e for e in _list if e]
32
32
 
@@ -8,7 +8,7 @@ from dateutil.parser import parse
8
8
 
9
9
  from epstein_files.util.constant.names import *
10
10
  from epstein_files.util.constant.strings import *
11
- from epstein_files.util.data import without_falsey
11
+ from epstein_files.util.data import remove_zero_time_from_timestamp_str, without_falsey
12
12
 
13
13
  DuplicateType = Literal['earlier', 'quoted', 'redacted', 'same']
14
14
  Metadata = dict[str, bool | datetime | int | str | list[str | None] |dict[str, bool | str]]
@@ -47,12 +47,11 @@ FINANCIAL_REPORTS_AUTHORS = [
47
47
  ]
48
48
 
49
49
  # Fields like timestamp and author are better added from the Document object
50
- INVALID_FOR_METADATA = [
50
+ NON_METADATA_FIELDS = [
51
51
  'actual_text',
52
52
  'date',
53
53
  'id',
54
- 'timestamp',
55
- 'was_generated',
54
+ 'is_synthetic',
56
55
  ]
57
56
 
58
57
 
@@ -68,10 +67,10 @@ class DocCfg:
68
67
  date (str | None): If passed will be immediated parsed into the 'timestamp' field
69
68
  dupe_of_id (str | None): If this is a dupe the ID of the duplicated file. This file will be suppressed
70
69
  dupe_type (DuplicateType | None): The type of duplicate this file is or its 'duplicate_ids' are
71
- duplicate_ids (list[str]): Inverse of 'dupe_of_id' - this file will NOT be suppressed but 'duplicate_ids' will be
70
+ duplicate_ids (list[str]): IDs of *other* documents that are dupes of this document
72
71
  is_interesting (bool): Override other considerations and always consider this file interesting
73
72
  timestamp (datetime | None): Time this email was sent, file was created, article published, etc.
74
- was_generated (bool): True if this object was generated by the duplicate_cfgs() method
73
+ is_synthetic (bool): True if this config was generated by the duplicate_cfgs() method
75
74
  """
76
75
  id: str
77
76
  author: str | None = None
@@ -82,8 +81,8 @@ class DocCfg:
82
81
  dupe_type: DuplicateType | None = None
83
82
  duplicate_ids: list[str] = field(default_factory=list)
84
83
  is_interesting: bool = False
84
+ is_synthetic: bool = False
85
85
  timestamp: datetime | None = None
86
- was_generated: bool = False
87
86
 
88
87
  def __post_init__(self):
89
88
  if self.date:
@@ -94,13 +93,17 @@ class DocCfg:
94
93
 
95
94
  def complete_description(self) -> str | None:
96
95
  """String that summarizes what is known about this document."""
97
- if self.category and not self.description:
96
+ if self.category and not self.description and not self.author:
98
97
  return self.category
99
98
  elif self.category == REPUTATION:
100
99
  return f"{REPUTATION_MGMT}: {self.description}"
100
+ elif self.category == SKYPE_LOG:
101
+ msg = f"{self.category} of conversation with {self.author}" if self.author else self.category
102
+ return f"{msg} {self.description}" if self.description else msg
101
103
  elif self.author and self.description:
102
104
  if self.category in [ACADEMIA, BOOK]:
103
- return self.title_by_author()
105
+ title = self.description if '"' in self.description else f"'{self.description}'"
106
+ return f"{title} by {self.author}"
104
107
  elif self.category == FINANCE and self.author in FINANCIAL_REPORTS_AUTHORS:
105
108
  return f"{self.author} report: '{self.description}'"
106
109
  elif self.category == LEGAL and 'v.' in self.author:
@@ -111,10 +114,6 @@ class DocCfg:
111
114
  pieces = without_falsey([self.author, self.description])
112
115
  return ' '.join(pieces) if pieces else None
113
116
 
114
- def duplicate_reason(self) -> str | None:
115
- if self.dupe_type is not None:
116
- return DUPE_TYPE_STRS[self.dupe_type]
117
-
118
117
  def duplicate_cfgs(self) -> Generator['DocCfg', None, None]:
119
118
  """Create synthetic DocCfg objects that set the 'dupe_of_id' field to point back to this object."""
120
119
  for id in self.duplicate_ids:
@@ -123,35 +122,17 @@ class DocCfg:
123
122
  dupe_cfg.dupe_of_id = self.id
124
123
  dupe_cfg.duplicate_ids = []
125
124
  dupe_cfg.dupe_type = self.dupe_type
126
- dupe_cfg.was_generated = True
125
+ dupe_cfg.is_synthetic = True
127
126
  yield dupe_cfg
128
127
 
129
128
  def metadata(self) -> Metadata:
130
- non_null_fields = {k: v for k, v in asdict(self).items() if v and k not in INVALID_FOR_METADATA}
131
-
132
- if self.category in [EMAIL, TEXT_MESSAGE]:
133
- del non_null_fields['category']
134
-
135
- return non_null_fields
136
-
137
- def non_null_field_names(self) -> list[str]:
138
- return [f.name for f in self.sorted_fields() if getattr(self, f.name)]
139
-
140
- def sorted_fields(self) -> list[Field]:
141
- return sorted(fields(self), key=lambda f: FIELD_SORT_KEY.get(f.name, f.name))
142
-
143
- def title_by_author(self) -> str:
144
- if not (self.author and self.description):
145
- raise RuntimeError(f"Can't call title_by_author() without author and description!")
146
-
147
- title = self.description if '"' in self.description else f"'{self.description}'"
148
- return f"{title} by {self.author}"
129
+ return {k: v for k, v in asdict(self).items() if k not in NON_METADATA_FIELDS and v}
149
130
 
150
131
  def _props_strs(self) -> list[str]:
151
132
  props = []
152
133
  add_prop = lambda f, value: props.append(f"{f.name}={value}")
153
134
 
154
- for _field in self.sorted_fields():
135
+ for _field in sorted(fields(self), key=lambda f: FIELD_SORT_KEY.get(f.name, f.name)):
155
136
  value = getattr(self, _field.name)
156
137
 
157
138
  if value is None or value is False or (isinstance(value, list) and len(value) == 0):
@@ -160,13 +141,13 @@ class DocCfg:
160
141
  add_prop(_field, constantize_name(str(value)) if CONSTANTIZE_NAMES else f"'{value}'")
161
142
  elif _field.name == 'category' and value in [EMAIL, TEXT_MESSAGE]:
162
143
  continue
163
- elif _field.name == 'recipients' and isinstance(value, list):
144
+ elif _field.name == 'recipients' and value:
164
145
  recipients_str = str([constantize_name(r) if (CONSTANTIZE_NAMES and r) else r for r in value])
165
146
  add_prop(_field, recipients_str.replace("'", '') if CONSTANTIZE_NAMES else recipients_str)
166
147
  elif _field.name == 'timestamp' and self.date is not None:
167
148
  continue # Don't print both timestamp and date
168
149
  elif isinstance(value, datetime):
169
- value_str = re.sub(' 00:00:00', '', str(value))
150
+ value_str = remove_zero_time_from_timestamp_str(value)
170
151
  add_prop(_field, f"parse('{value_str}')" if CONSTANTIZE_NAMES else f"'{value}'")
171
152
  elif isinstance(value, str):
172
153
  if "'" in value:
@@ -221,18 +202,15 @@ class EmailCfg(CommunicationCfg):
221
202
  """
222
203
  Attributes:
223
204
  actual_text (str | None): In dire cases of broken OCR we just configure the body of the email as a string.
205
+ fwded_text_after (str | None): If set, any text after this is a fwd of an article or similar
224
206
  is_fwded_article (bool): True if this is a newspaper article someone fwded. Used to exclude articles from word counting.
225
207
  recipients (list[str | None]): Who received the email
226
208
  """
227
- actual_text: str | None = None # Override for the Email._actual_text() method for particularly broken emails
228
- fwded_text_after: str | None = None # If set, any text after this is a fwd of an article or similar
209
+ actual_text: str | None = None
210
+ fwded_text_after: str | None = None
229
211
  is_fwded_article: bool = False
230
212
  recipients: list[str | None] = field(default_factory=list)
231
213
 
232
- @classmethod
233
- def from_doc_cfg(cls, cfg: DocCfg) -> 'EmailCfg':
234
- return cls(**asdict(cfg))
235
-
236
214
  # This is necessary because for some dumb reason @dataclass(repr=False) doesn't cut it
237
215
  def __repr__(self) -> str:
238
216
  return super().__repr__()
@@ -11,8 +11,10 @@ FILENAME_LENGTH = len(HOUSE_OVERSIGHT_PREFIX) + 6
11
11
  KB = 1024
12
12
  MB = KB * KB
13
13
 
14
+ file_size = lambda file_path: Path(file_path).stat().st_size
15
+ file_size_str = lambda file_path: file_size_to_str(file_size(file_path))
14
16
 
15
- # Coerce methods hands both string and int arguments.
17
+ # Coerce methods handle both string and int arguments.
16
18
  coerce_file_name = lambda filename_or_id: coerce_file_stem(filename_or_id) + '.txt'
17
19
  coerce_file_path = lambda filename_or_id: DOCS_DIR.joinpath(coerce_file_name(filename_or_id))
18
20
  id_str = lambda id: f"{int(id):06d}"
@@ -44,14 +46,6 @@ def extract_file_id(filename_or_id: int | str | Path) -> str:
44
46
  return file_match.group(1)
45
47
 
46
48
 
47
- def file_size(file_path: str | Path) -> int:
48
- return Path(file_path).stat().st_size
49
-
50
-
51
- def file_size_str(file_path: str | Path) -> str:
52
- return file_size_to_str(file_size(file_path))
53
-
54
-
55
49
  def file_size_to_str(size: int) -> str:
56
50
  digits = 2
57
51
 
@@ -223,6 +223,7 @@ HIGHLIGHTED_NAMES = [
223
223
  'Linda Pinto': 'interior design at Alberto Pinto Cabinet',
224
224
  MERWIN_DELA_CRUZ: None, # HOUSE_OVERSIGHT_032652 Groff says "Jojo and Merwin both requested off Nov. 25 and 26"
225
225
  NADIA_MARCINKO: 'pilot',
226
+ 'Sean J. Lancaster': 'airplane reseller',
226
227
  }
227
228
  ),
228
229
  HighlightedNames(
@@ -260,6 +261,8 @@ HIGHLIGHTED_NAMES = [
260
261
  MARTIN_WEINBERG: CRIMINAL_DEFENSE_ATTORNEY,
261
262
  MICHAEL_MILLER: 'Steptoe LLP partner',
262
263
  REID_WEINGARTEN: 'Steptoe LLP partner',
264
+ ROBERT_D_CRITTON_JR: 'criminal defense attorney',
265
+ 'Robert Gold': None,
263
266
  'Roy Black': CRIMINAL_DEFENSE_2008,
264
267
  SCOTT_J_LINK: None,
265
268
  TONJA_HADDAD_COLEMAN: f'{EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY}, maybe daughter of Fred Haddad?',
@@ -297,28 +300,17 @@ HIGHLIGHTED_NAMES = [
297
300
  }
298
301
  ),
299
302
  HighlightedNames(
300
- label='friend',
301
- style='tan',
302
- pattern=r"Andrew Farkas|Thomas\s*(J\.?\s*)?Barrack(\s*Jr)?",
303
- emailers = {
304
- DAVID_STERN: f'emailed Epstein from Moscow, appears to know chairman of {DEUTSCHE_BANK}',
305
- JONATHAN_FARKAS: "heir to the Alexander's department store fortune",
306
- 'linkspirit': "Skype username of someone Epstein communicated with",
307
- 'Peter Thomas Roth': 'student of Epstein at Dalton, skincare company founder',
308
- STEPHEN_HANSON: None,
309
- TOM_BARRACK: 'long time friend of Trump',
310
- }
311
- ),
312
- HighlightedNames(
313
- label='finance',
303
+ label=FINANCE,
314
304
  style='green',
315
- pattern=r'Apollo|Ari\s*Glass|Bank|(Bernie\s*)?Madoff|Black(rock|stone)|B\s*of\s*A|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|Mizrahi\s*Bank|MLPF&S|((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
305
+ pattern=r'Apollo|Ari\s*Glass|Bank|(Bernie\s*)?Madoff|Black(rock|stone)|B\s*of\s*A|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|Mizrahi\s*Bank|MLPF&S|((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|(Richard\s*)?LeFrak|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
316
306
  emailers={
317
307
  AMANDA_ENS: 'Citigroup',
308
+ BRAD_WECHSLER: f"head of {LEON_BLACK}'s personal investment vehicle according to FT",
318
309
  DANIEL_SABBA: 'UBS Investment Bank',
319
310
  DAVID_FISZEL: 'CIO Honeycomb Asset Management',
320
311
  JES_STALEY: 'former CEO of Barclays',
321
312
  JIDE_ZEITLIN: 'former partner at Goldman Sachs, allegations of sexual misconduct',
313
+ 'Laurie Cameron': 'currency trading',
322
314
  LEON_BLACK: 'Apollo CEO',
323
315
  MARC_LEON: 'Luxury Properties Sari Morrocco',
324
316
  MELANIE_SPINELLA: f'representative of {LEON_BLACK}',
@@ -327,9 +319,23 @@ HIGHLIGHTED_NAMES = [
327
319
  PAUL_MORRIS: 'Deutsche Bank',
328
320
  }
329
321
  ),
322
+ HighlightedNames(
323
+ label='friend',
324
+ style='tan',
325
+ pattern=r"Andrew Farkas|Thomas\s*(J\.?\s*)?Barrack(\s*Jr)?",
326
+ emailers = {
327
+ DANGENE_AND_JENNIE_ENTERPRISE: 'founders of the members-only CORE club',
328
+ DAVID_STERN: f'emailed Epstein from Moscow, appears to know chairman of {DEUTSCHE_BANK}',
329
+ JONATHAN_FARKAS: "heir to the Alexander's department store fortune",
330
+ 'linkspirit': "Skype username of someone Epstein communicated with",
331
+ 'Peter Thomas Roth': 'student of Epstein at Dalton, skincare company founder',
332
+ STEPHEN_HANSON: None,
333
+ TOM_BARRACK: 'long time friend of Trump',
334
+ },
335
+ ),
330
336
  HighlightedNames(
331
337
  label=HARVARD.lower(),
332
- style='deep_pink2',
338
+ style='light_goldenrod3',
333
339
  pattern=r'Cambridge|(Derek\s*)?Bok|Elisa(\s*New)?|Harvard(\s*(Business|Law|University)(\s*School)?)?|(Jonathan\s*)?Zittrain|(Stephen\s*)?Kosslyn',
334
340
  emailers = {
335
341
  "Donald Rubin": f"Professor of Statistics",
@@ -378,7 +384,7 @@ HIGHLIGHTED_NAMES = [
378
384
  HighlightedNames(
379
385
  label=JOURNALIST,
380
386
  style='bright_yellow',
381
- pattern=r'Palm\s*Beach\s*(Daily\s*News|Post)|ABC(\s*News)?|Alex\s*Yablon|(Andrew\s*)?Marra|Arianna(\s*Huffington)?|(Arthur\s*)?Kretchmer|BBC|Bloomberg|Breitbart|Charlie\s*Rose|China\s*Daily|CNBC|CNN(politics?)?|Con[cs]hita|Sarnoff|(?<!Virgin[-\s]Islands[-\s])Daily\s*(Beast|Mail|News|Telegraph)|(David\s*)?Pecker|David\s*Brooks|Ed\s*Krassenstein|(Emily\s*)?Michot|Ezra\s*Klein|(George\s*)?Stephanopoulus|Globe\s*and\s*Mail|Good\s*Morning\s*America|Graydon(\s*Carter)?|Huffington(\s*Post)?|Ingram, David|(James\s*)?Patterson|Jonathan\s*Karl|Julie\s*(K.?\s*)?Brown|(Katie\s*)?Couric|Keith\s*Larsen|L\.?A\.?\s*Times|Miami\s*Herald|(Michele\s*)?Dargan|(National\s*)?Enquirer|(The\s*)?N(ew\s*)?Y(ork\s*)?(P(ost)?|T(imes)?)|(The\s*)?New\s*Yorker|NYer|PERVERSION\s*OF\s*JUSTICE|Politico|Pro\s*Publica|(Sean\s*)?Hannity|Sulzberger|SunSentinel|Susan Edelman|(Uma\s*)?Sanghvi|(The\s*)?Wa(shington\s*)?Po(st)?|Viceland|Vick[iy]\s*Ward|Vox|WGBH|(The\s*)?Wall\s*Street\s*Journal|WSJ|[-\w.]+@(bbc|independent|mailonline|mirror|thetimes)\.co\.uk',
387
+ pattern=r'Palm\s*Beach\s*(Daily\s*News|Post)|ABC(\s*News)?|Alex\s*Yablon|(Andrew\s*)?Marra|Arianna(\s*Huffington)?|(Arthur\s*)?Kretchmer|BBC|Bloomberg|Breitbart|Charlie\s*Rose|China\s*Daily|CNBC|CNN(politics?)?|Con[cs]hita|Sarnoff|(?<!Virgin[-\s]Islands[-\s])Daily\s*(Beast|Mail|News|Telegraph)|(David\s*)?Pecker|David\s*Brooks|Ed\s*Krassenstein|(Emily\s*)?Michot|Ezra\s*Klein|(George\s*)?Stephanopoulus|Globe\s*and\s*Mail|Good\s*Morning\s*America|Graydon(\s*Carter)?|Huffington(\s*Post)?|Ingram, David|(James\s*)?(Hill|Patterson)|Jonathan\s*Karl|Julie\s*(K.?\s*)?Brown|(Katie\s*)?Couric|Keith\s*Larsen|L\.?A\.?\s*Times|Miami\s*Herald|(Michele\s*)?Dargan|(National\s*)?Enquirer|(The\s*)?N(ew\s*)?Y(ork\s*)?(P(ost)?|T(imes)?)|(The\s*)?New\s*Yorker|NYer|PERVERSION\s*OF\s*JUSTICE|Politico|Pro\s*Publica|(Sean\s*)?Hannity|Sulzberger|SunSentinel|Susan Edelman|(Uma\s*)?Sanghvi|(The\s*)?Wa(shington\s*)?Po(st)?|Viceland|Vick[iy]\s*Ward|Vox|WGBH|(The\s*)?Wall\s*Street\s*Journal|WSJ|[-\w.]+@(bbc|independent|mailonline|mirror|thetimes)\.co\.uk',
382
388
  emailers = {
383
389
  EDWARD_JAY_EPSTEIN: 'reporter who wrote about the kinds of crimes Epstein was involved in, no relation to Jeffrey',
384
390
  'James Hill': 'ABC News',
@@ -398,7 +404,7 @@ HIGHLIGHTED_NAMES = [
398
404
  HighlightedNames(
399
405
  label='law enforcement',
400
406
  style='color(24) bold',
401
- pattern=r'ag|(Alicia\s*)?Valle|AML|attorney|((Bob|Robert)\s*)?Mueller|(Byung\s)?Pak|CFTC?|CIA|CIS|CVRA|Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)|DHS|DOJ|FBI|FCPA|FDIC|Federal\s*Bureau\s*of\s*Investigation|FinCEN|FINRA|FOIA|FTC|IRS|(James\s*)?Comey|(Jennifer\s*Shasky\s*)?Calvery|((Judge|Mark)\s*)?(Carney|Filip)|(Kirk )?Blouin|KYC|NIH|NS(A|C)|OCC|OFAC|(Lann?a\s*)?Belohlavek|lawyer|(Michael\s*)?Reiter|OGE|Office\s*of\s*Government\s*Ethics|Police Code Enforcement|(Preet\s*)?Bharara|SCOTUS|SD(FL|NY)|Southern\s*District\s*of\s*(Florida|New\s*York)|SEC|Secret\s*Service|Securities\s*and\s*Exchange\s*Commission|State\s*Dep(artmen)?t|Strzok|Supreme\s*Court|Treasury\s*(Dep(artmen)?t|Secretary)|TSA|USAID|(William\s*J\.?\s*)?Zloch',
407
+ pattern=r'ag|(Alicia\s*)?Valle|AML|(Andrew\s*)?McCabe|attorney|((Bob|Robert)\s*)?Mueller|(Byung\s)?Pak|CFTC?|CIA|CIS|CVRA|Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)|DHS|DOJ|FBI|FCPA|FDIC|Federal\s*Bureau\s*of\s*Investigation|FinCEN|FINRA|FOIA|FTC|IRS|(James\s*)?Comey|(Jennifer\s*Shasky\s*)?Calvery|((Judge|Mark)\s*)?(Carney|Filip)|(Kirk )?Blouin|KYC|NIH|NS(A|C)|OCC|OFAC|(Lann?a\s*)?Belohlavek|lawyer|(Michael\s*)?Reiter|OGE|Office\s*of\s*Government\s*Ethics|Police Code Enforcement|(Preet\s*)?Bharara|SCOTUS|SD(FL|NY)|Southern\s*District\s*of\s*(Florida|New\s*York)|SEC|Secret\s*Service|Securities\s*and\s*Exchange\s*Commission|State\s*Dep(artmen)?t|Strzok|Supreme\s*Court|Treasury\s*(Dep(artmen)?t|Secretary)|TSA|USAID|(William\s*J\.?\s*)?Zloch',
402
408
  emailers = {
403
409
  ANN_MARIE_VILLAFANA: 'southern district of Florida U.S. Attorney',
404
410
  DANNY_FROST: 'Director of Communications at Manhattan DA',
@@ -457,7 +463,9 @@ HIGHLIGHTED_NAMES = [
457
463
  CHRISTINA_GALBRAITH: f"{REPUTATION_MGMT}, worked on Epstein's Google search results with {TYLER_SHEARS}",
458
464
  IAN_OSBORNE: f"{OSBORNE_LLP} reputation repairer possibly hired by Epstein ca. 2011-06",
459
465
  MICHAEL_SITRICK: 'crisis PR',
466
+ 'Owen Blicksilver': 'OBPR, Inc.',
460
467
  PEGGY_SIEGAL: 'socialite',
468
+ 'R. Couri Hay': None,
461
469
  ROSS_GOW: 'Acuity Reputation Management',
462
470
  TYLER_SHEARS: f"{REPUTATION_MGMT}, worked on Epstein's Google search results with {CHRISTINA_GALBRAITH}",
463
471
  }
@@ -465,7 +473,7 @@ HIGHLIGHTED_NAMES = [
465
473
  HighlightedNames(
466
474
  label='republicans',
467
475
  style='bold dark_red',
468
- pattern=r'Alberto\sGonzale[sz]|(Alex\s*)?Acosta|(Bill\s*)?Barr|Bill\s*Shine|(Bob\s*)?Corker|(John\s*(R.?\s*)?)Bolton|Broidy|(Chris\s)?Christie|Devin\s*Nunes|(Don\s*)?McGa[hn]n|McMaster|(George\s*)?Nader|GOP|(Brett\s*)?Kavanaugh|Kissinger|Kobach|Koch\s*Brothers|Kolfage|Kudlow|Lewandowski|(Marco\s)?Rubio|(Mark\s*)Meadows|Mattis|McCain|(?<!Merwin Dela )Cruz|(Michael\s)?Hayden|((General|Mike)\s*)?(Flynn|Pence)|(Mitt\s*)?Romney|Mnuchin|Nikki|Haley|(Paul\s+)?(Manafort|Volcker)|(Peter\s)?Navarro|Pompeo|Reagan|Reince|Priebus|Republican|(Rex\s*)?Tillerson|(?<!Cynthia )(Richard\s*)?Nixon|Sasse',
476
+ pattern=r'Alberto\sGonzale[sz]|(Alex\s*)?Acosta|(Bill\s*)?Barr|Bill\s*Shine|(Bob\s*)?Corker|(John\s*(R.?\s*)?)Bolton|Broidy|(Chris\s)?Christie|Devin\s*Nunes|(Don\s*)?McGa[hn]n|McMaster|(George\s*)?Nader|GOP|(Brett\s*)?Kavanaugh|Kissinger|Kobach|Koch\s*Brothers|Kolfage|Kudlow|Lewandowski|(Marco\s)?Rubio|(Mark\s*)Meadows|Mattis|McCain|(?<!Merwin Dela )Cruz|(Michael\s)?Hayden|((General|Mike)\s*)?(Flynn|Pence)|(Mitt\s*)?Romney|Mnuchin|Nikki|Haley|(Paul\s+)?(Manafort|Volcker)|(Peter\s)?Navarro|Pompeo|Reagan|Reince|Priebus|Republican|(Rex\s*)?Tillerson|(?<!Cynthia )(Richard\s*)?Nixon|Sasse|Tea\s*Party',
469
477
  # There's no emails from these people, they're just here to automate the regex creation for both first + last names
470
478
  emailers = {
471
479
  RUDY_GIULIANI: 'disbarred formed mayor of New York City',
@@ -485,6 +493,7 @@ HIGHLIGHTED_NAMES = [
485
493
  style='red bold',
486
494
  pattern=r'Alfa\s*Bank|Anya\s*Rasulova|Chernobyl|Day\s+One\s+Ventures|(Dmitry\s)?(Kiselyov|(Lana\s*)?Pozhidaeva|Medvedev|Rybolo(o?l?ev|vlev))|Dmitry|FSB|GRU|KGB|Kislyak|Kremlin|Kuznetsova|Lavrov|Lukoil|Moscow|(Oleg\s*)?Deripaska|Oleksandr Vilkul|Rosneft|RT|St.?\s*?Petersburg|Russian?|Sberbank|Soviet(\s*Union)?|USSR|Vladimir|(Vladimir\s*)?(Putin|Yudashkin)|Women\s*Empowerment|Xitrans',
487
495
  emailers = {
496
+ 'Dasha Zhukova': 'art collector, daughter of Alexander Zhukov',
488
497
  MASHA_DROKOVA: 'silicon valley VC, former Putin Youth',
489
498
  RENATA_BOLOTOVA: 'former aspiring model, now fund manager at New York State Insurance Fund',
490
499
  SVETLANA_POZHIDAEVA: f'Epstein\'s Russian assistant who was recommended for a visa by Sergei Belyakov (FSB) and {DAVID_BLAINE}',
@@ -493,14 +502,16 @@ HIGHLIGHTED_NAMES = [
493
502
  HighlightedNames(
494
503
  label=ACADEMIA,
495
504
  style='light_goldenrod2',
496
- pattern=r'Alain Forget|Brotherton|Carl\s*Sagan|Columbia|David Grosof|J(ames|im)\s*Watson|(Lord\s*)?Martin\s*Rees|Massachusetts\s*Institute\s*of\s*Technology|MIT(\s*Media\s*Lab)?|Media\s*Lab|Minsky|((Noam|Valeria)\s*)?Chomsky|Praluent|Regeneron|(Richard\s*)?Dawkins|Sanofi|Stanford|(Stephen\s*)?Hawking|(Steven?\s*)?Pinker|UCLA',
505
+ pattern=r'Alain Forget|Brotherton|Carl\s*Sagan|Columbia|David Grosof|J(ames|im)\s*Watson|(Lord\s*)?Martin\s*Rees|Massachusetts\s*Institute\s*of\s*Technology|MIT(\s*Media\s*Lab)?|Media\s*Lab|Minsky|((Noam|Valeria)\s*)?Chomsky|Norman\s*Finkelstein|Praluent|Regeneron|(Richard\s*)?Dawkins|Sanofi|Stanford|(Stephen\s*)?Hawking|(Steven?\s*)?Pinker|UCLA',
497
506
  emailers = {
498
507
  DAVID_HAIG: None,
499
508
  JOSCHA_BACH: 'cognitive science / AI research',
500
509
  'Daniel Kahneman': 'Nobel economic sciences laureate and cognitivie psychologist (?)',
510
+ 'Ed Boyden': 'Associate Professor, MIT Media Lab neurobiology',
501
511
  LAWRENCE_KRAUSS: 'theoretical physicist',
502
512
  LINDA_STONE: 'ex-Microsoft, MIT Media Lab',
503
513
  MARK_TRAMO: 'professor of neurology at UCLA',
514
+ 'Nancy Dahl': f'wife of {LAWRENCE_KRAUSS}',
504
515
  NEAL_KASSELL: 'professor of neurosurgery at University of Virginia',
505
516
  PETER_ATTIA: 'longevity medicine',
506
517
  ROBERT_TRIVERS: 'evolutionary biology',
@@ -661,7 +672,7 @@ def get_style_for_category(category: str) -> str | None:
661
672
  elif category in [CONFERENCE, SPEECH]:
662
673
  return f"{get_style_for_category(ACADEMIA)} dim"
663
674
  elif category == SOCIAL:
664
- return f"{get_style_for_category(PUBLICIST)}"
675
+ return get_style_for_category(PUBLICIST)
665
676
 
666
677
  category = CATEGORY_STYLE_MAPPING.get(category, category)
667
678
 
@@ -32,7 +32,7 @@ LOG_LEVEL_ENV_VAR = 'LOG_LEVEL'
32
32
  # Augment the standard log highlighter with 'epstein_filename' matcher
33
33
  class LogHighlighter(ReprHighlighter):
34
34
  highlights = ReprHighlighter.highlights + [
35
- *[fr"(?P<{doc_type}>{doc_type})" for doc_type in DOC_TYPE_STYLES.keys()],
35
+ *[fr"(?P<{doc_type}>{doc_type}(Cfg)?)" for doc_type in DOC_TYPE_STYLES.keys()],
36
36
  "(?P<epstein_filename>" + FILE_NAME_REGEX.pattern + ')',
37
37
  ]
38
38
 
@@ -125,7 +125,7 @@ def print_json_files(epstein_files: EpsteinFiles):
125
125
  console.print_json(json_file.json_str(), indent=4, sort_keys=False)
126
126
 
127
127
 
128
- def print_json_metadata(epstein_files: EpsteinFiles) -> None:
128
+ def write_json_metadata(epstein_files: EpsteinFiles) -> None:
129
129
  json_str = epstein_files.json_metadata()
130
130
 
131
131
  if args.build:
@@ -33,6 +33,7 @@ GREY_NUMBERS = [58, 39, 39, 35, 30, 27, 23, 23, 19, 19, 15, 15, 15]
33
33
  DEFAULT_NAME_STYLE = 'gray46'
34
34
  INFO_STYLE = 'white dim italic'
35
35
  KEY_STYLE='honeydew2 bold'
36
+ LAST_TIMESTAMP_STYLE='wheat4'
36
37
  SECTION_HEADER_STYLE = 'bold white on blue3'
37
38
  SOCIAL_MEDIA_LINK_STYLE = 'pale_turquoise4'
38
39
  SUBSTACK_POST_LINK_STYLE = 'bright_cyan'
@@ -79,10 +80,18 @@ console = Console(**CONSOLE_ARGS)
79
80
  highlighter = CONSOLE_ARGS['highlighter']
80
81
 
81
82
 
82
- def add_cols_to_table(table: Table, col_names: list[str]) -> None:
83
+ def add_cols_to_table(table: Table, col_names: list[str | dict]) -> None:
83
84
  """Left most col will be left justified, rest are center justified."""
84
85
  for i, col in enumerate(col_names):
85
- table.add_column(col, justify='left' if i == 0 else 'center')
86
+ if isinstance(col, dict):
87
+ col_name = col['name']
88
+ kwargs = col
89
+ del kwargs['name']
90
+ else:
91
+ col_name = col
92
+ kwargs = {}
93
+
94
+ table.add_column(col_name, justify='left' if i == 0 else 'center', **kwargs)
86
95
 
87
96
 
88
97
  def build_highlighter(pattern: str) -> EpsteinHighlighter:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "epstein-files"
3
- version = "1.0.11"
3
+ version = "1.0.13"
4
4
  description = "Tools for working with the Jeffrey Epstein documents released in November 2025."
5
5
  authors = ["Michel de Cryptadamus"]
6
6
  readme = "README.md"
File without changes
File without changes