epstein-files 1.0.11__tar.gz → 1.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {epstein_files-1.0.11 → epstein_files-1.0.12}/PKG-INFO +1 -1
  2. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/__init__.py +3 -3
  3. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/documents/communication.py +2 -2
  4. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/documents/document.py +32 -62
  5. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/documents/email.py +40 -1
  6. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/documents/imessage/text_message.py +1 -1
  7. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/documents/json_file.py +1 -1
  8. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/documents/messenger_log.py +1 -1
  9. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/documents/other_file.py +2 -2
  10. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/constant/names.py +9 -8
  11. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/constant/strings.py +2 -1
  12. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/constants.py +17 -13
  13. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/data.py +1 -1
  14. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/doc_cfg.py +20 -42
  15. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/file_helper.py +3 -9
  16. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/highlighted_group.py +13 -4
  17. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/logging.py +1 -1
  18. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/output.py +1 -1
  19. {epstein_files-1.0.11 → epstein_files-1.0.12}/pyproject.toml +1 -1
  20. {epstein_files-1.0.11 → epstein_files-1.0.12}/LICENSE +0 -0
  21. {epstein_files-1.0.11 → epstein_files-1.0.12}/README.md +0 -0
  22. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/documents/emails/email_header.py +0 -0
  23. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/epstein_files.py +0 -0
  24. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/constant/common_words.py +0 -0
  25. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/constant/html.py +0 -0
  26. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/constant/output_files.py +0 -0
  27. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/constant/urls.py +0 -0
  28. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/env.py +0 -0
  29. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/rich.py +0 -0
  30. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/search_result.py +0 -0
  31. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/timer.py +0 -0
  32. {epstein_files-1.0.11 → epstein_files-1.0.12}/epstein_files/util/word_count.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: epstein-files
3
- Version: 1.0.11
3
+ Version: 1.0.12
4
4
  Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
5
5
  Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
6
6
  License: GPL-3.0-or-later
@@ -20,8 +20,8 @@ from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_
20
20
  from epstein_files.util.env import args, specified_names
21
21
  from epstein_files.util.file_helper import coerce_file_path, extract_file_id
22
22
  from epstein_files.util.logging import logger
23
- from epstein_files.util.output import (print_emails, print_json_files, print_json_metadata, print_json_stats,
24
- print_text_messages, write_urls)
23
+ from epstein_files.util.output import (print_emails, print_json_files, print_json_stats,
24
+ print_text_messages, write_json_metadata, write_urls)
25
25
  from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
26
26
  from epstein_files.util.timer import Timer
27
27
  from epstein_files.util.word_count import write_word_counts_html
@@ -37,7 +37,7 @@ def generate_html() -> None:
37
37
  epstein_files = EpsteinFiles.get_files(timer)
38
38
 
39
39
  if args.json_metadata:
40
- print_json_metadata(epstein_files)
40
+ write_json_metadata(epstein_files)
41
41
  exit()
42
42
  elif args.json_files:
43
43
  print_json_files(epstein_files)
@@ -34,9 +34,9 @@ class Communication(Document):
34
34
  def is_attribution_uncertain(self) -> bool:
35
35
  return bool(self.config and self.config.is_attribution_uncertain)
36
36
 
37
- def raw_document_link_txt(self, _style: str = '', include_alt_link: bool = True) -> Text:
37
+ def external_links(self, _style: str = '', include_alt_link: bool = True) -> Text:
38
38
  """Overrides super() method to apply self.author_style."""
39
- return super().raw_document_link_txt(self.author_style, include_alt_link=include_alt_link)
39
+ return super().external_links(self.author_style, include_alt_link=include_alt_link)
40
40
 
41
41
  def summary(self) -> Text:
42
42
  return self._summary().append(CLOSE_PROPERTIES_CHAR)
@@ -16,8 +16,8 @@ from epstein_files.util.constant.names import *
16
16
  from epstein_files.util.constant.strings import *
17
17
  from epstein_files.util.constant.urls import *
18
18
  from epstein_files.util.constants import ALL_FILE_CONFIGS, FALLBACK_TIMESTAMP
19
- from epstein_files.util.data import collapse_newlines, date_str, iso_timestamp, patternize, without_falsey
20
- from epstein_files.util.doc_cfg import EmailCfg, DocCfg, Metadata, TextCfg
19
+ from epstein_files.util.data import collapse_newlines, date_str, patternize, remove_time_from_timestamp_str, without_falsey
20
+ from epstein_files.util.doc_cfg import DUPE_TYPE_STRS, EmailCfg, DocCfg, Metadata, TextCfg
21
21
  from epstein_files.util.env import DOCS_DIR, args
22
22
  from epstein_files.util.file_helper import (file_stem_for_id, extract_file_id, file_size,
23
23
  file_size_str, is_local_extract_file)
@@ -31,10 +31,8 @@ INFO_INDENT = 2
31
31
  INFO_PADDING = (0, 0, 0, INFO_INDENT)
32
32
  MAX_TOP_LINES_LEN = 4000 # Only for logging
33
33
  MIN_DOCUMENT_ID = 10477
34
- LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
35
34
  WHITESPACE_REGEX = re.compile(r"\s{2,}|\t|\n", re.MULTILINE)
36
35
 
37
- EXTRACTED_FROM = 'Extracted from'
38
36
  MIN_TIMESTAMP = datetime(1991, 1, 1)
39
37
  MID_TIMESTAMP = datetime(2007, 1, 1)
40
38
  MAX_TIMESTAMP = datetime(2020, 1, 1)
@@ -96,15 +94,9 @@ class Document:
96
94
  def __post_init__(self):
97
95
  self.filename = self.file_path.name
98
96
  self.file_id = extract_file_id(self.filename)
99
- self.config = deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
97
+ self.config = self.config or deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
100
98
 
101
- if self.is_local_extract_file():
102
- self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
103
- extracted_from_doc_id = self.url_slug.split('_')[-1]
104
-
105
- if extracted_from_doc_id in ALL_FILE_CONFIGS:
106
- self._set_extract_config(deepcopy(ALL_FILE_CONFIGS[extracted_from_doc_id]))
107
- else:
99
+ if 'url_slug' not in vars(self):
108
100
  self.url_slug = self.file_path.stem
109
101
 
110
102
  self._set_computed_fields(text=self.text or self._load_file())
@@ -122,11 +114,11 @@ class Document:
122
114
 
123
115
  def duplicate_file_txt(self) -> Text:
124
116
  """If the file is a dupe make a nice message to explain what file it's a duplicate of."""
125
- if not self.config or not self.config.dupe_of_id:
117
+ if not self.config or not self.config.dupe_of_id or self.config.dupe_type is None:
126
118
  raise RuntimeError(f"duplicate_file_txt() called on {self.summary()} but not a dupe! config:\n\n{self.config}")
127
119
 
128
120
  txt = Text(f"Not showing ", style=INFO_STYLE).append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
129
- txt.append(f" because it's {self.config.duplicate_reason()} ")
121
+ txt.append(f" because it's {DUPE_TYPE_STRS[self.config.dupe_type]} ")
130
122
  return txt.append(epstein_media_doc_link_txt(self.config.dupe_of_id, style='royal_blue1'))
131
123
 
132
124
  def epsteinify_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
@@ -141,9 +133,28 @@ class Document:
141
133
  """Create a Text obj link to this document on EpsteinWeb."""
142
134
  return link_text_obj(epstein_web_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
143
135
 
136
+ def external_links(self, style: str = '', include_alt_link: bool = False) -> Text:
137
+ """Returns colored links to epstein.media and and epsteinweb in a Text object."""
138
+ txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
139
+
140
+ if args.use_epstein_web:
141
+ txt.append(self.epstein_web_link(style=style))
142
+
143
+ if include_alt_link:
144
+ txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
145
+ txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
146
+ else:
147
+ txt.append(self.epstein_media_link(style=style))
148
+
149
+ if include_alt_link:
150
+ txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
151
+ txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
152
+
153
+ return txt
154
+
144
155
  def file_info_panel(self) -> Group:
145
156
  """Panel with filename linking to raw file plus any additional info about the file."""
146
- panel = Panel(self.raw_document_link_txt(include_alt_link=True), border_style=self._border_style(), expand=False)
157
+ panel = Panel(self.external_links(include_alt_link=True), border_style=self._border_style(), expand=False)
147
158
  padded_info = [Padding(sentence, INFO_PADDING) for sentence in self.info()]
148
159
  return Group(*([panel] + padded_info))
149
160
 
@@ -155,12 +166,10 @@ class Document:
155
166
 
156
167
  def info(self) -> list[Text]:
157
168
  """0 to 2 sentences containing the info_txt() as well as any configured description."""
158
- sentences = [
169
+ return without_falsey([
159
170
  self.info_txt(),
160
171
  highlighter(Text(self.config_description(), style=INFO_STYLE)) if self.config_description() else None
161
- ]
162
-
163
- return without_falsey(sentences)
172
+ ])
164
173
 
165
174
  def info_txt(self) -> Text | None:
166
175
  """Secondary info about this file (recipients, level of certainty, etc). Overload in subclasses."""
@@ -197,9 +206,9 @@ class Document:
197
206
 
198
207
  if self.is_local_extract_file():
199
208
  metadata['extracted_file'] = {
200
- 'explanation': 'This file was extracted from a court filing, not distributed directly. A copy can be found on github.',
201
- 'extracted_from_file': self.url_slug + '.txt',
202
- 'extracted_file_url': extracted_file_url(self.filename),
209
+ 'explanation': 'Manually extracted from one of the court filings.',
210
+ 'extracted_from': self.url_slug + '.txt',
211
+ 'url': extracted_file_url(self.filename),
203
212
  }
204
213
 
205
214
  return metadata
@@ -208,25 +217,6 @@ class Document:
208
217
  with open(self.file_path) as f:
209
218
  return f.read()
210
219
 
211
- def raw_document_link_txt(self, style: str = '', include_alt_link: bool = False) -> Text:
212
- """Returns colored links to epstein.media and and epsteinweb in a Text object."""
213
- txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
214
-
215
- if args.use_epstein_web:
216
- txt.append(self.epstein_web_link(style=style))
217
-
218
- if include_alt_link:
219
- txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
220
- txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
221
- else:
222
- txt.append(self.epstein_media_link(style=style))
223
-
224
- if include_alt_link:
225
- txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
226
- txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
227
-
228
- return txt
229
-
230
220
  def repair_ocr_text(self, repairs: dict[str | re.Pattern, str], text: str) -> str:
231
221
  """Apply a dict of repairs (key is pattern or string, value is replacement string) to text."""
232
222
  for k, v in repairs.items():
@@ -253,7 +243,7 @@ class Document:
253
243
  txt.append(f" {self.url_slug}", style=FILENAME_STYLE)
254
244
 
255
245
  if self.timestamp:
256
- timestamp_str = iso_timestamp(self.timestamp).removesuffix(' 00:00:00')
246
+ timestamp_str = remove_time_from_timestamp_str(self.timestamp)
257
247
  txt.append(' (', style=SYMBOL_STYLE)
258
248
  txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
259
249
 
@@ -327,26 +317,6 @@ class Document:
327
317
  self.lines = [line.strip() if self.strip_whitespace else line for line in self.text.split('\n')]
328
318
  self.num_lines = len(self.lines)
329
319
 
330
- def _set_extract_config(self, doc_cfg: DocCfg | EmailCfg) -> None:
331
- """Copy info from original config for file this document was extracted from."""
332
- if self.config:
333
- self.warn(f"Merging existing config with config for file this document was extracted from")
334
- else:
335
- self.config = EmailCfg(id=self.file_id)
336
-
337
- extracted_from_description = doc_cfg.complete_description()
338
-
339
- if extracted_from_description:
340
- extracted_description = f"{EXTRACTED_FROM} {extracted_from_description}"
341
-
342
- if self.config.description:
343
- self.warn(f"Overwriting description '{self.config.description}' with extract description '{doc_cfg.description}'")
344
-
345
- self.config.description = extracted_description
346
-
347
- self.config.is_interesting = self.config.is_interesting or doc_cfg.is_interesting
348
- self.warn(f"Constructed local config\n{self.config}")
349
-
350
320
  def _write_clean_text(self, output_path: Path) -> None:
351
321
  """Write self.text to 'output_path'. Used only for diffing files."""
352
322
  if output_path.exists():
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import re
3
+ from copy import deepcopy
3
4
  from dataclasses import asdict, dataclass, field
4
5
  from datetime import datetime
5
6
  from typing import ClassVar, cast
@@ -21,6 +22,7 @@ from epstein_files.util.constants import *
21
22
  from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes, extract_last_name,
22
23
  flatten, remove_timezone, uniquify)
23
24
  from epstein_files.util.doc_cfg import EmailCfg, Metadata
25
+ from epstein_files.util.file_helper import extract_file_id, file_stem_for_id
24
26
  from epstein_files.util.highlighted_group import get_style_for_name
25
27
  from epstein_files.util.logging import logger
26
28
  from epstein_files.util.rich import *
@@ -35,9 +37,11 @@ REPLY_TEXT_REGEX = re.compile(rf"^(.*?){REPLY_LINE_PATTERN}", re.DOTALL | re.IGN
35
37
  BAD_TIMEZONE_REGEX = re.compile(fr'\((UTC|GMT\+\d\d:\d\d)\)|{REDACTED}')
36
38
  DATE_HEADER_REGEX = re.compile(r'(?:Date|Sent):? +(?!by|from|to|via)([^\n]{6,})\n')
37
39
  TIMESTAMP_LINE_REGEX = re.compile(r"\d+:\d+")
40
+ LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
38
41
 
39
42
  SUPPRESS_LOGS_FOR_AUTHORS = ['Undisclosed recipients:', 'undisclosed-recipients:', 'Multiple Senders Multiple Senders']
40
43
  REWRITTEN_HEADER_MSG = "(janky OCR header fields were prettified, check source if something seems off)"
44
+ APPEARS_IN = 'Appears in'
41
45
  MAX_CHARS_TO_PRINT = 4000
42
46
  MAX_NUM_HEADER_LINES = 14
43
47
  MAX_QUOTED_REPLIES = 2
@@ -248,6 +252,7 @@ KRASSNER_RECIPIENTS = uniquify(flatten([ALL_FILE_CONFIGS[id].recipients for id i
248
252
 
249
253
  # No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
250
254
  USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIPIENTS + [
255
+ 'Alan Dlugash', # CCed with Richard Kahn
251
256
  'Alan Rogers', # Random CC
252
257
  'Andrew Friendly', # Presumably some relation of Kelly Friendly
253
258
  'BS Stern', # A random fwd of email we have
@@ -264,6 +269,8 @@ USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIP
264
269
  'Lyn Fontanilla', # Random CC
265
270
  'Mark Albert', # Random CC
266
271
  'Matthew Schafer', # Random CC
272
+ MICHAEL_BUCHHOLTZ, # Terry Kafka CC
273
+ 'Nancy Dahl', # covered by Lawrence Krauss (her husband)
267
274
  'Michael Simmons', # Random CC
268
275
  'Nancy Portland', # Lawrence Krauss CC
269
276
  'Oliver Goodenough', # Robert Trivers CC
@@ -318,6 +325,17 @@ class Email(Communication):
318
325
  rewritten_header_ids: ClassVar[set[str]] = set([])
319
326
 
320
327
  def __post_init__(self):
328
+ self.filename = self.file_path.name
329
+ self.file_id = extract_file_id(self.filename)
330
+
331
+ # Special handling for copying properties out of the config for the document this one was extracted from
332
+ if self.is_local_extract_file():
333
+ self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
334
+ extracted_from_doc_id = self.url_slug.split('_')[-1]
335
+
336
+ if extracted_from_doc_id in ALL_FILE_CONFIGS:
337
+ self._set_config_for_extracted_file(ALL_FILE_CONFIGS[extracted_from_doc_id])
338
+
321
339
  super().__post_init__()
322
340
 
323
341
  try:
@@ -570,7 +588,7 @@ class Email(Communication):
570
588
  self._merge_lines(3) # Merge 4th and 5th rows
571
589
  elif self.file_id in '026609 029402 032405 022695'.split():
572
590
  self._merge_lines(4) # Merge 5th and 6th rows
573
- elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381']:
591
+ elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381', '033357']:
574
592
  self._merge_lines(2, 4)
575
593
  elif self.file_id in ['029154', '029163']:
576
594
  self._merge_lines(2, 5)
@@ -649,6 +667,27 @@ class Email(Communication):
649
667
  sent_from = sent_from_match.group(0)
650
668
  return 'S' + sent_from[1:] if sent_from.startswith('sent') else sent_from
651
669
 
670
+ def _set_config_for_extracted_file(self, extracted_from_doc_cfg: DocCfg) -> None:
671
+ """Copy info from original config for file this document was extracted from."""
672
+ if self.file_id in ALL_FILE_CONFIGS:
673
+ self.config = cast(EmailCfg, deepcopy(ALL_FILE_CONFIGS[self.file_id]))
674
+ self.warn(f"Merging existing config for {self.file_id} with config for file this document was extracted from")
675
+ else:
676
+ self.config = EmailCfg(id=self.file_id)
677
+
678
+ extracted_from_description = extracted_from_doc_cfg.complete_description()
679
+
680
+ if extracted_from_description:
681
+ extracted_description = f"{APPEARS_IN} {extracted_from_description}"
682
+
683
+ if self.config.description:
684
+ self.warn(f"Overwriting description '{self.config.description}' with extract description '{self.config.description}'")
685
+
686
+ self.config.description = extracted_description
687
+
688
+ self.config.is_interesting = self.config.is_interesting or extracted_from_doc_cfg.is_interesting
689
+ self.warn(f"Constructed synthetic config: {self.config}")
690
+
652
691
  def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
653
692
  logger.debug(f"Printing '{self.filename}'...")
654
693
  yield self.file_info_panel()
@@ -45,7 +45,7 @@ class TextMessage:
45
45
  self.author_str = self.author_str or self.author
46
46
 
47
47
  if not self.id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
48
- self.author_str = self.author + ' (?)'
48
+ self.author_str += ' (?)'
49
49
 
50
50
  def timestamp(self) -> datetime:
51
51
  return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
@@ -39,7 +39,7 @@ class JsonFile(OtherFile):
39
39
  return JSON
40
40
 
41
41
  def info_txt(self) -> Text | None:
42
- return Text(f"JSON file, seems to contain link unfurl/embed data for iMessage or similar", style=INFO_STYLE)
42
+ return Text(f"JSON file, contains preview data for links sent a messaging app", style=INFO_STYLE)
43
43
 
44
44
  def is_interesting(self):
45
45
  return False
@@ -76,7 +76,7 @@ class MessengerLog(Communication):
76
76
  is_phone_number = author_str.startswith('+')
77
77
 
78
78
  if is_phone_number:
79
- logger.warning(f"{self.summary()} Found phone number: {author_str}")
79
+ logger.info(f"{self.summary()} Found phone number: {author_str}")
80
80
  self.phone_number = author_str
81
81
 
82
82
  # If the Sender: is redacted or if it's an unredacted phone number that means it's from self.author
@@ -107,7 +107,7 @@ UNINTERESTING_PREFIXES = FINANCIAL_REPORTS_AUTHORS + [
107
107
  TEXT_OF_US_LAW,
108
108
  TRANSLATION,
109
109
  TWEET,
110
- THE_REAL_DEAL_ARTICLE,
110
+ REAL_DEAL_ARTICLE,
111
111
  TRUMP_DISCLOSURES,
112
112
  UBS_CIO_REPORT,
113
113
  UN_GENERAL_ASSEMBLY,
@@ -240,7 +240,7 @@ class OtherFile(Document):
240
240
  table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
241
241
 
242
242
  for file in files:
243
- link_and_info = [file.raw_document_link_txt()]
243
+ link_and_info = [file.external_links()]
244
244
  date_str = file.date_str()
245
245
 
246
246
  if file.is_duplicate():
@@ -143,7 +143,7 @@ REID_HOFFMAN = 'Reid Hoffman'
143
143
  REID_WEINGARTEN = 'Reid Weingarten'
144
144
  RENATA_BOLOTOVA = 'Renata Bolotova'
145
145
  RICHARD_KAHN = 'Richard Kahn'
146
- ROBERT_D_CRITTON = 'Robert D. Critton Jr.'
146
+ ROBERT_D_CRITTON_JR = 'Robert D. Critton Jr.'
147
147
  ROBERT_LAWRENCE_KUHN = 'Robert Lawrence Kuhn'
148
148
  ROBERT_TRIVERS = 'Robert Trivers'
149
149
  ROGER_SCHANK = 'Roger Schank'
@@ -178,6 +178,7 @@ JARED_KUSHNER = 'Jared Kushner'
178
178
  JULIE_K_BROWN = 'Julie K. Brown'
179
179
  KARIM_SADJADPOUR = 'KARIM SADJADPOUR'.title()
180
180
  MICHAEL_J_BOCCIO = 'Michael J. Boccio'
181
+ NERIO_ALESSANDRI = 'Nerio Alessandri (Founder and Chairman of Technogym S.p.A. Italy)'
181
182
  PAUL_G_CASSELL = 'Paul G. Cassell'
182
183
  RUDY_GIULIANI = 'Rudy Giuliani'
183
184
  TULSI_GABBARD = 'Tulsi Gabbard'
@@ -226,22 +227,22 @@ NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
226
227
  # Names to color white in the word counts
227
228
  OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
228
229
  aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
229
- baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bruno bryant burton
230
+ baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
230
231
  chapman charles charlie christopher clint cohen colin collins conway
231
- davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
232
+ danny davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
232
233
  edmond elizabeth emily entwistle erik evelyn
233
- ferguson flachsbart francis franco frank
234
+ ferguson flachsbart francis franco frank frost
234
235
  gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
235
- hancock harold harrison harry helen hirsch hofstadter horowitz hussein
236
+ hancock harold harrison harry hay helen hirsch hofstadter horowitz hussein
236
237
  ian isaac isaacson
237
238
  jamie jane janet jason jen jim joe johnson jones josh julie justin
238
239
  karl kate kathy kelly kim kruger kyle
239
- leo leonard lenny leslie lieberman louis lynch lynn
240
+ laurie leo leonard lenny leslie lieberman louis lynch lynn
240
241
  marcus marianne matt matthew melissa michele michelle moore moscowitz
241
- nicole nussbaum
242
+ nancy nicole nussbaum
242
243
  paulson philippe
243
244
  rafael ray richard richardson rob robin ron rubin rudolph ryan
244
- sara sarah seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
245
+ sara sarah sean seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
245
246
  ted theresa thompson tiffany timothy tony
246
247
  valeria
247
248
  walter warren weinstein weiss william
@@ -20,7 +20,7 @@ POLITICS = 'politics'
20
20
  PROPERTY = 'property'
21
21
  PUBLICIST = 'publicist'
22
22
  REPUTATION = 'reputation'
23
- SKYPE_LOG= 'skype log'
23
+ SKYPE_LOG = 'Skype log'
24
24
  SOCIAL = 'social'
25
25
  SPEECH = 'speech'
26
26
 
@@ -39,6 +39,7 @@ MIAMI_HERALD = 'Miami Herald'
39
39
  NYT = "New York Times"
40
40
  PALM_BEACH_DAILY_NEWS = f'{PALM_BEACH} Daily News'
41
41
  PALM_BEACH_POST = f'{PALM_BEACH} Post'
42
+ SHIMON_POST = 'The Shimon Post'
42
43
  THE_REAL_DEAL = 'The Real Deal'
43
44
  WAPO = 'WaPo'
44
45
  VI_DAILY_NEWS = f'{VIRGIN_ISLANDS} Daily News'
@@ -66,7 +66,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
66
66
  BRAD_EDWARDS: re.compile(r'Brad(ley)?(\s*J(.?|ames))?\s*Edwards', re.IGNORECASE),
67
67
  BRAD_KARP: re.compile(r'Brad (S.? )?Karp|Karp, Brad', re.IGNORECASE),
68
68
  'Dangene and Jennie Enterprise': re.compile(r'Dangene and Jennie Enterprise?', re.IGNORECASE),
69
- DANNY_FROST: re.compile(r'Frost, Danny|frostd@dany.nyc.gov', re.IGNORECASE),
69
+ DANNY_FROST: re.compile(r'Frost, Danny|frostd@dany.nyc.gov|Danny\s*Frost', re.IGNORECASE),
70
70
  DARREN_INDYKE: re.compile(r'darren$|Darren\s*(K\.?\s*)?[il]n[dq]_?yke?|dkiesq', re.IGNORECASE),
71
71
  DAVID_FISZEL: re.compile(r'David\s*Fis?zel', re.IGNORECASE),
72
72
  DAVID_HAIG: re.compile(fr'{DAVID_HAIG}|Haig, David', re.IGNORECASE),
@@ -128,7 +128,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
128
128
  PRINCE_ANDREW: re.compile(r'Prince Andrew|The Duke', re.IGNORECASE),
129
129
  REID_WEINGARTEN: re.compile(r'Weingarten, Rei[cdi]|Rei[cdi] Weingarten', re.IGNORECASE),
130
130
  RICHARD_KAHN: re.compile(r'rich(ard)? kahn?', re.IGNORECASE),
131
- ROBERT_D_CRITTON: re.compile(r'Robert D.? Critton Jr.?', re.IGNORECASE),
131
+ ROBERT_D_CRITTON_JR: re.compile(r'Robert D.? Critton Jr.?', re.IGNORECASE),
132
132
  ROBERT_LAWRENCE_KUHN: re.compile(r'Robert\s*(Lawrence)?\s*Kuhn', re.IGNORECASE),
133
133
  ROBERT_TRIVERS: re.compile(r'tri[vy]ersr@gmail|Robert\s*Trivers?', re.IGNORECASE),
134
134
  ROSS_GOW: re.compile(fr"{ROSS_GOW}|ross@acuityreputation.com", re.IGNORECASE),
@@ -163,6 +163,7 @@ EMAILERS = [
163
163
  DEEPAK_CHOPRA,
164
164
  GLENN_DUBIN,
165
165
  GORDON_GETTY,
166
+ 'Kevin Bright',
166
167
  'Jack Lang',
167
168
  JACK_SCAROLA,
168
169
  JAY_LEFKOWITZ,
@@ -257,7 +258,6 @@ JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
257
258
  LAWRENCE_KRAUSS_ASU_ORIGINS = f"{LAWRENCE_KRAUSS}'s ASU Origins Project"
258
259
  KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
259
260
  MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
260
- NERIO_ALESSANDRI = 'Nerio Alessandri (Founder and Chairman of Technogym S.p.A. Italy)'
261
261
  NIGHT_FLIGHT_BOOK = f'"Night Flight" (draft)'
262
262
  NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
263
263
  OBAMA_JOKE = 'joke about Obama'
@@ -265,12 +265,11 @@ PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} Code Enforcement'
265
265
  PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
266
266
  PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
267
267
  PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
268
- SHIMON_POST = 'The Shimon Post'
268
+ REAL_DEAL_ARTICLE = 'article by Keith Larsen'
269
269
  SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
270
270
  SINGLE_PAGE = 'single page of'
271
271
  STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
272
272
  SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
273
- THE_REAL_DEAL_ARTICLE = 'article by Keith Larsen'
274
273
  TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
275
274
  UBS_CIO_REPORT = 'CIO Monthly Extended report'
276
275
  UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
@@ -500,7 +499,7 @@ EMAILS_CONFIG = [
500
499
  EmailCfg(
501
500
  id='029977',
502
501
  author=LAWRANCE_VISOSKI,
503
- recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE,
502
+ recipients=cast(list[str | None], [JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE),
504
503
  attribution_reason=LARRY_REASON,
505
504
  duplicate_ids=['031129'],
506
505
  ),
@@ -508,7 +507,7 @@ EMAILS_CONFIG = [
508
507
  EmailCfg(id='033488', author=LAWRANCE_VISOSKI, duplicate_ids=['033154']),
509
508
  EmailCfg(id='033309', author=LINDA_STONE, attribution_reason='"Co-authored with iPhone autocorrect"'),
510
509
  EmailCfg(id='017581', author='Lisa Randall', attribution_reason='reply header'),
511
- EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd'),
510
+ EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd, Mark Green is in signature'),
512
511
  EmailCfg(id='030472', author=MARTIN_WEINBERG, attribution_reason='Maybe. in reply', is_attribution_uncertain=True),
513
512
  EmailCfg(id='030235', author=MELANIE_WALKER, attribution_reason='In fwd'),
514
513
  EmailCfg(id='032343', author=MELANIE_WALKER, attribution_reason='Name seen in later reply 032346'),
@@ -573,7 +572,7 @@ EMAILS_CONFIG = [
573
572
  attribution_reason='ends with "Respectfully, terry"',
574
573
  author=TERRY_KAFKA,
575
574
  fwded_text_after='From: Mike Cohen',
576
- recipients=[JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS,
575
+ recipients=cast(list[str | None], [JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS),
577
576
  duplicate_ids=['028482'],
578
577
  ),
579
578
  EmailCfg(id='029992', author=TERRY_KAFKA, attribution_reason='Quoted reply'),
@@ -665,6 +664,10 @@ EMAILS_CONFIG = [
665
664
  EmailCfg(id='029849', is_fwded_article=True, duplicate_ids=['033482']), # Fareed Zakaria: Trump sells America short),
666
665
  EmailCfg(id='032023', is_fwded_article=True, duplicate_ids=['032012']), # American-Israeli Cooperative Enterprise Newsletter
667
666
  EmailCfg(id='021758', is_fwded_article=True, duplicate_ids=['030616']), # Radar Online article about Epstein's early prison release
667
+ EmailCfg(id='031774', is_fwded_article=True), # Krassner fwd of Palmer Report article
668
+ EmailCfg(id='033345', is_fwded_article=True), # Krassner fwd of Palmer Report article
669
+ EmailCfg(id='029903', is_fwded_article=True), # Krassner fwd of Ann Coulter article about Epstein
670
+ EmailCfg(id='030266', is_fwded_article=True), # Krassner fwd of article about Dershowitz
668
671
  EmailCfg(id='030868', is_fwded_article=True), # 'He doesn't like this sh*t': Trump reportedly hates his job and his staff after 1 month
669
672
  EmailCfg(id='026755', is_fwded_article=True), # HuffPo
670
673
  EmailCfg(id='016218', is_fwded_article=True), # AT&T confirms it paid Trump lawyer Cohen for insights on Trump
@@ -710,6 +713,7 @@ EMAILS_CONFIG = [
710
713
  EmailCfg(id='033311', is_fwded_article=True), # 2016 election polls
711
714
  EmailCfg(id='026580', is_fwded_article=True), # NPR: Antigua: Land Of Sun, Sand, And Super Cheap
712
715
  EmailCfg(id='031340', is_fwded_article=True), # Article about Alex Jones threatening Robert Mueller
716
+ EmailCfg(id='030209', is_fwded_article=True), # Atlantic Council Syria: Blackberry Diplomacy
713
717
  EmailCfg(id='033297', is_fwded_article=True, duplicate_ids=['033586']), # Sultan Sulayem fwding article about Trump and Russia
714
718
  EmailCfg(id='032475', timestamp=parse('2017-02-15 13:31:25')),
715
719
  EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
@@ -852,9 +856,9 @@ EMAILS_CONFIG = [
852
856
  EmailCfg(id='030015', fwded_text_after='Bill Clinton reportedly'),
853
857
  EmailCfg(id='026312', fwded_text_after='Steve Bannon trying to get on disgraced'),
854
858
  EmailCfg(id='031742', fwded_text_after="Trump's former campaign manager Paul Manafort"),
855
- EmailCfg(id='012197_4', fwded_text_after="Thanks -- Jay"),
856
859
  EmailCfg(id='028925', fwded_text_after='> on Jan 4, 2015'),
857
860
  EmailCfg(id='029773', fwded_text_after='Omar Quadhafi', duplicate_ids=['012685']),
861
+ EmailCfg(id='012197_4', fwded_text_after="Thanks -- Jay"),
858
862
  ]
859
863
 
860
864
 
@@ -1335,8 +1339,8 @@ OTHER_FILES_PROPERTY = [
1335
1339
  DocCfg(id='016554', author=PALM_BEACH_CODE_ENFORCEMENT, description='board minutes', date='2008-07-17', duplicate_ids=['016616', '016574']),
1336
1340
  DocCfg(id='016636', author=PALM_BEACH_WATER_COMMITTEE, description=f"Meeting on January 29, 2009"),
1337
1341
  DocCfg(id='022417', author='Park Partners NYC', description=f"letter to partners in real estate project with architectural plans"),
1338
- DocCfg(id='027068', author=THE_REAL_DEAL, description=f"{THE_REAL_DEAL_ARTICLE} Palm House Hotel Bankruptcy and EB-5 Visa Fraud Allegations"),
1339
- DocCfg(id='029520', author=THE_REAL_DEAL, description=f"{THE_REAL_DEAL_ARTICLE} 'Lost Paradise at the Palm House'", date='2019-06-17'),
1342
+ DocCfg(id='027068', author=THE_REAL_DEAL, description=f"{REAL_DEAL_ARTICLE} Palm House Hotel Bankruptcy and EB-5 Visa Fraud Allegations"),
1343
+ DocCfg(id='029520', author=THE_REAL_DEAL, description=f"{REAL_DEAL_ARTICLE} 'Lost Paradise at the Palm House'", date='2019-06-17'),
1340
1344
  DocCfg(id='016597', author='Trump Properties LLC', description=f'appeal of some decision about Mar-a-Lago by {PALM_BEACH} authorities'),
1341
1345
  DocCfg(id='018743', description=f"Las Vegas property listing"),
1342
1346
  DocCfg(id='016695', description=f"{PALM_BEACH} property info (?)"),
@@ -1497,13 +1501,13 @@ OTHER_FILES_MISC = [
1497
1501
  DocCfg(id='032206', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
1498
1502
  DocCfg(id='032208', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
1499
1503
  DocCfg(id='032209', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
1504
+ DocCfg(id='032210', category=SKYPE_LOG, author='linkspirit', is_interesting=True),
1500
1505
  DocCfg(
1501
1506
  id='018224',
1502
1507
  category=SKYPE_LOG,
1503
- description=f'Skype conversations with linkspirit (French?) and {LAWRENCE_KRAUSS}',
1508
+ author=f'linkspirit (French?) and {LAWRENCE_KRAUSS}',
1504
1509
  is_interesting=True, # we don't know who linkspirit is yet
1505
1510
  ),
1506
- DocCfg(id='032210', category=SKYPE_LOG, description=f'Skype conversation with linkspirit', is_interesting=True),
1507
1511
  DocCfg(
1508
1512
  id='025147',
1509
1513
  author=BROCKMAN_INC,
@@ -21,12 +21,12 @@ ALL_NAMES = [v for k, v in vars(names).items() if isinstance(v, str) and CONSTAN
21
21
  PACIFIC_TZ = tz.gettz("America/Los_Angeles")
22
22
  TIMEZONE_INFO = {"PDT": PACIFIC_TZ, "PST": PACIFIC_TZ} # Suppresses annoying warnings from parse() calls
23
23
 
24
-
25
24
  collapse_newlines = lambda text: MULTINEWLINE_REGEX.sub('\n\n', text)
26
25
  date_str = lambda dt: dt.isoformat()[0:10] if dt else None
27
26
  escape_double_quotes = lambda text: text.replace('"', r'\"')
28
27
  escape_single_quotes = lambda text: text.replace("'", r"\'")
29
28
  iso_timestamp = lambda dt: dt.isoformat().replace('T', ' ')
29
+ remove_time_from_timestamp_str = lambda dt: dt.isoformat().removesuffix('T00:00:00')
30
30
  uniquify = lambda _list: list(set(_list))
31
31
  without_falsey = lambda _list: [e for e in _list if e]
32
32
 
@@ -8,7 +8,7 @@ from dateutil.parser import parse
8
8
 
9
9
  from epstein_files.util.constant.names import *
10
10
  from epstein_files.util.constant.strings import *
11
- from epstein_files.util.data import without_falsey
11
+ from epstein_files.util.data import remove_time_from_timestamp_str, without_falsey
12
12
 
13
13
  DuplicateType = Literal['earlier', 'quoted', 'redacted', 'same']
14
14
  Metadata = dict[str, bool | datetime | int | str | list[str | None] |dict[str, bool | str]]
@@ -47,12 +47,11 @@ FINANCIAL_REPORTS_AUTHORS = [
47
47
  ]
48
48
 
49
49
  # Fields like timestamp and author are better added from the Document object
50
- INVALID_FOR_METADATA = [
50
+ NON_METADATA_FIELDS = [
51
51
  'actual_text',
52
52
  'date',
53
53
  'id',
54
- 'timestamp',
55
- 'was_generated',
54
+ 'is_synthetic',
56
55
  ]
57
56
 
58
57
 
@@ -68,10 +67,10 @@ class DocCfg:
68
67
  date (str | None): If passed will be immediated parsed into the 'timestamp' field
69
68
  dupe_of_id (str | None): If this is a dupe the ID of the duplicated file. This file will be suppressed
70
69
  dupe_type (DuplicateType | None): The type of duplicate this file is or its 'duplicate_ids' are
71
- duplicate_ids (list[str]): Inverse of 'dupe_of_id' - this file will NOT be suppressed but 'duplicate_ids' will be
70
+ duplicate_ids (list[str]): IDs of *other* documents that are dupes of this document
72
71
  is_interesting (bool): Override other considerations and always consider this file interesting
73
72
  timestamp (datetime | None): Time this email was sent, file was created, article published, etc.
74
- was_generated (bool): True if this object was generated by the duplicate_cfgs() method
73
+ is_synthetic (bool): True if this config was generated by the duplicate_cfgs() method
75
74
  """
76
75
  id: str
77
76
  author: str | None = None
@@ -82,8 +81,8 @@ class DocCfg:
82
81
  dupe_type: DuplicateType | None = None
83
82
  duplicate_ids: list[str] = field(default_factory=list)
84
83
  is_interesting: bool = False
84
+ is_synthetic: bool = False
85
85
  timestamp: datetime | None = None
86
- was_generated: bool = False
87
86
 
88
87
  def __post_init__(self):
89
88
  if self.date:
@@ -94,13 +93,17 @@ class DocCfg:
94
93
 
95
94
  def complete_description(self) -> str | None:
96
95
  """String that summarizes what is known about this document."""
97
- if self.category and not self.description:
96
+ if self.category and not self.description and not self.author:
98
97
  return self.category
99
98
  elif self.category == REPUTATION:
100
99
  return f"{REPUTATION_MGMT}: {self.description}"
100
+ elif self.category == SKYPE_LOG:
101
+ msg = f"{self.category} of conversation with {self.author}" if self.author else self.category
102
+ return f"{msg} {self.description}" if self.description else msg
101
103
  elif self.author and self.description:
102
104
  if self.category in [ACADEMIA, BOOK]:
103
- return self.title_by_author()
105
+ title = self.description if '"' in self.description else f"'{self.description}'"
106
+ return f"{title} by {self.author}"
104
107
  elif self.category == FINANCE and self.author in FINANCIAL_REPORTS_AUTHORS:
105
108
  return f"{self.author} report: '{self.description}'"
106
109
  elif self.category == LEGAL and 'v.' in self.author:
@@ -111,10 +114,6 @@ class DocCfg:
111
114
  pieces = without_falsey([self.author, self.description])
112
115
  return ' '.join(pieces) if pieces else None
113
116
 
114
- def duplicate_reason(self) -> str | None:
115
- if self.dupe_type is not None:
116
- return DUPE_TYPE_STRS[self.dupe_type]
117
-
118
117
  def duplicate_cfgs(self) -> Generator['DocCfg', None, None]:
119
118
  """Create synthetic DocCfg objects that set the 'dupe_of_id' field to point back to this object."""
120
119
  for id in self.duplicate_ids:
@@ -123,35 +122,17 @@ class DocCfg:
123
122
  dupe_cfg.dupe_of_id = self.id
124
123
  dupe_cfg.duplicate_ids = []
125
124
  dupe_cfg.dupe_type = self.dupe_type
126
- dupe_cfg.was_generated = True
125
+ dupe_cfg.is_synthetic = True
127
126
  yield dupe_cfg
128
127
 
129
128
  def metadata(self) -> Metadata:
130
- non_null_fields = {k: v for k, v in asdict(self).items() if v and k not in INVALID_FOR_METADATA}
131
-
132
- if self.category in [EMAIL, TEXT_MESSAGE]:
133
- del non_null_fields['category']
134
-
135
- return non_null_fields
136
-
137
- def non_null_field_names(self) -> list[str]:
138
- return [f.name for f in self.sorted_fields() if getattr(self, f.name)]
139
-
140
- def sorted_fields(self) -> list[Field]:
141
- return sorted(fields(self), key=lambda f: FIELD_SORT_KEY.get(f.name, f.name))
142
-
143
- def title_by_author(self) -> str:
144
- if not (self.author and self.description):
145
- raise RuntimeError(f"Can't call title_by_author() without author and description!")
146
-
147
- title = self.description if '"' in self.description else f"'{self.description}'"
148
- return f"{title} by {self.author}"
129
+ return {k: v for k, v in asdict(self).items() if k not in NON_METADATA_FIELDS and v}
149
130
 
150
131
  def _props_strs(self) -> list[str]:
151
132
  props = []
152
133
  add_prop = lambda f, value: props.append(f"{f.name}={value}")
153
134
 
154
- for _field in self.sorted_fields():
135
+ for _field in sorted(fields(self), key=lambda f: FIELD_SORT_KEY.get(f.name, f.name)):
155
136
  value = getattr(self, _field.name)
156
137
 
157
138
  if value is None or value is False or (isinstance(value, list) and len(value) == 0):
@@ -160,13 +141,13 @@ class DocCfg:
160
141
  add_prop(_field, constantize_name(str(value)) if CONSTANTIZE_NAMES else f"'{value}'")
161
142
  elif _field.name == 'category' and value in [EMAIL, TEXT_MESSAGE]:
162
143
  continue
163
- elif _field.name == 'recipients' and isinstance(value, list):
144
+ elif _field.name == 'recipients' and value:
164
145
  recipients_str = str([constantize_name(r) if (CONSTANTIZE_NAMES and r) else r for r in value])
165
146
  add_prop(_field, recipients_str.replace("'", '') if CONSTANTIZE_NAMES else recipients_str)
166
147
  elif _field.name == 'timestamp' and self.date is not None:
167
148
  continue # Don't print both timestamp and date
168
149
  elif isinstance(value, datetime):
169
- value_str = re.sub(' 00:00:00', '', str(value))
150
+ value_str = remove_time_from_timestamp_str(value)
170
151
  add_prop(_field, f"parse('{value_str}')" if CONSTANTIZE_NAMES else f"'{value}'")
171
152
  elif isinstance(value, str):
172
153
  if "'" in value:
@@ -221,18 +202,15 @@ class EmailCfg(CommunicationCfg):
221
202
  """
222
203
  Attributes:
223
204
  actual_text (str | None): In dire cases of broken OCR we just configure the body of the email as a string.
205
+ fwded_text_after (str | None): If set, any text after this is a fwd of an article or similar
224
206
  is_fwded_article (bool): True if this is a newspaper article someone fwded. Used to exclude articles from word counting.
225
207
  recipients (list[str | None]): Who received the email
226
208
  """
227
- actual_text: str | None = None # Override for the Email._actual_text() method for particularly broken emails
228
- fwded_text_after: str | None = None # If set, any text after this is a fwd of an article or similar
209
+ actual_text: str | None = None
210
+ fwded_text_after: str | None = None
229
211
  is_fwded_article: bool = False
230
212
  recipients: list[str | None] = field(default_factory=list)
231
213
 
232
- @classmethod
233
- def from_doc_cfg(cls, cfg: DocCfg) -> 'EmailCfg':
234
- return cls(**asdict(cfg))
235
-
236
214
  # This is necessary because for some dumb reason @dataclass(repr=False) doesn't cut it
237
215
  def __repr__(self) -> str:
238
216
  return super().__repr__()
@@ -11,8 +11,10 @@ FILENAME_LENGTH = len(HOUSE_OVERSIGHT_PREFIX) + 6
11
11
  KB = 1024
12
12
  MB = KB * KB
13
13
 
14
+ file_size = lambda file_path: Path(file_path).stat().st_size
15
+ file_size_str = lambda file_path: file_size_to_str(file_size(file_path))
14
16
 
15
- # Coerce methods hands both string and int arguments.
17
+ # Coerce methods handle both string and int arguments.
16
18
  coerce_file_name = lambda filename_or_id: coerce_file_stem(filename_or_id) + '.txt'
17
19
  coerce_file_path = lambda filename_or_id: DOCS_DIR.joinpath(coerce_file_name(filename_or_id))
18
20
  id_str = lambda id: f"{int(id):06d}"
@@ -44,14 +46,6 @@ def extract_file_id(filename_or_id: int | str | Path) -> str:
44
46
  return file_match.group(1)
45
47
 
46
48
 
47
- def file_size(file_path: str | Path) -> int:
48
- return Path(file_path).stat().st_size
49
-
50
-
51
- def file_size_str(file_path: str | Path) -> str:
52
- return file_size_to_str(file_size(file_path))
53
-
54
-
55
49
  def file_size_to_str(size: int) -> str:
56
50
  digits = 2
57
51
 
@@ -223,6 +223,7 @@ HIGHLIGHTED_NAMES = [
223
223
  'Linda Pinto': 'interior design at Alberto Pinto Cabinet',
224
224
  MERWIN_DELA_CRUZ: None, # HOUSE_OVERSIGHT_032652 Groff says "Jojo and Merwin both requested off Nov. 25 and 26"
225
225
  NADIA_MARCINKO: 'pilot',
226
+ 'Sean J. Lancaster': 'airplane reseller',
226
227
  }
227
228
  ),
228
229
  HighlightedNames(
@@ -260,6 +261,8 @@ HIGHLIGHTED_NAMES = [
260
261
  MARTIN_WEINBERG: CRIMINAL_DEFENSE_ATTORNEY,
261
262
  MICHAEL_MILLER: 'Steptoe LLP partner',
262
263
  REID_WEINGARTEN: 'Steptoe LLP partner',
264
+ ROBERT_D_CRITTON_JR: 'criminal defense attorney',
265
+ 'Robert Gold': None,
263
266
  'Roy Black': CRIMINAL_DEFENSE_2008,
264
267
  SCOTT_J_LINK: None,
265
268
  TONJA_HADDAD_COLEMAN: f'{EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY}, maybe daughter of Fred Haddad?',
@@ -310,15 +313,17 @@ HIGHLIGHTED_NAMES = [
310
313
  }
311
314
  ),
312
315
  HighlightedNames(
313
- label='finance',
316
+ label=FINANCE,
314
317
  style='green',
315
318
  pattern=r'Apollo|Ari\s*Glass|Bank|(Bernie\s*)?Madoff|Black(rock|stone)|B\s*of\s*A|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|Mizrahi\s*Bank|MLPF&S|((anti.?)?money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
316
319
  emailers={
317
320
  AMANDA_ENS: 'Citigroup',
321
+ BRAD_WECHSLER: f"head of {LEON_BLACK}'s personal investment vehicle according to FT",
318
322
  DANIEL_SABBA: 'UBS Investment Bank',
319
323
  DAVID_FISZEL: 'CIO Honeycomb Asset Management',
320
324
  JES_STALEY: 'former CEO of Barclays',
321
325
  JIDE_ZEITLIN: 'former partner at Goldman Sachs, allegations of sexual misconduct',
326
+ 'Laurie Cameron': 'currency trading',
322
327
  LEON_BLACK: 'Apollo CEO',
323
328
  MARC_LEON: 'Luxury Properties Sari Morrocco',
324
329
  MELANIE_SPINELLA: f'representative of {LEON_BLACK}',
@@ -378,7 +383,7 @@ HIGHLIGHTED_NAMES = [
378
383
  HighlightedNames(
379
384
  label=JOURNALIST,
380
385
  style='bright_yellow',
381
- pattern=r'Palm\s*Beach\s*(Daily\s*News|Post)|ABC(\s*News)?|Alex\s*Yablon|(Andrew\s*)?Marra|Arianna(\s*Huffington)?|(Arthur\s*)?Kretchmer|BBC|Bloomberg|Breitbart|Charlie\s*Rose|China\s*Daily|CNBC|CNN(politics?)?|Con[cs]hita|Sarnoff|(?<!Virgin[-\s]Islands[-\s])Daily\s*(Beast|Mail|News|Telegraph)|(David\s*)?Pecker|David\s*Brooks|Ed\s*Krassenstein|(Emily\s*)?Michot|Ezra\s*Klein|(George\s*)?Stephanopoulus|Globe\s*and\s*Mail|Good\s*Morning\s*America|Graydon(\s*Carter)?|Huffington(\s*Post)?|Ingram, David|(James\s*)?Patterson|Jonathan\s*Karl|Julie\s*(K.?\s*)?Brown|(Katie\s*)?Couric|Keith\s*Larsen|L\.?A\.?\s*Times|Miami\s*Herald|(Michele\s*)?Dargan|(National\s*)?Enquirer|(The\s*)?N(ew\s*)?Y(ork\s*)?(P(ost)?|T(imes)?)|(The\s*)?New\s*Yorker|NYer|PERVERSION\s*OF\s*JUSTICE|Politico|Pro\s*Publica|(Sean\s*)?Hannity|Sulzberger|SunSentinel|Susan Edelman|(Uma\s*)?Sanghvi|(The\s*)?Wa(shington\s*)?Po(st)?|Viceland|Vick[iy]\s*Ward|Vox|WGBH|(The\s*)?Wall\s*Street\s*Journal|WSJ|[-\w.]+@(bbc|independent|mailonline|mirror|thetimes)\.co\.uk',
386
+ pattern=r'Palm\s*Beach\s*(Daily\s*News|Post)|ABC(\s*News)?|Alex\s*Yablon|(Andrew\s*)?Marra|Arianna(\s*Huffington)?|(Arthur\s*)?Kretchmer|BBC|Bloomberg|Breitbart|Charlie\s*Rose|China\s*Daily|CNBC|CNN(politics?)?|Con[cs]hita|Sarnoff|(?<!Virgin[-\s]Islands[-\s])Daily\s*(Beast|Mail|News|Telegraph)|(David\s*)?Pecker|David\s*Brooks|Ed\s*Krassenstein|(Emily\s*)?Michot|Ezra\s*Klein|(George\s*)?Stephanopoulus|Globe\s*and\s*Mail|Good\s*Morning\s*America|Graydon(\s*Carter)?|Huffington(\s*Post)?|Ingram, David|(James\s*)?(Hill|Patterson)|Jonathan\s*Karl|Julie\s*(K.?\s*)?Brown|(Katie\s*)?Couric|Keith\s*Larsen|L\.?A\.?\s*Times|Miami\s*Herald|(Michele\s*)?Dargan|(National\s*)?Enquirer|(The\s*)?N(ew\s*)?Y(ork\s*)?(P(ost)?|T(imes)?)|(The\s*)?New\s*Yorker|NYer|PERVERSION\s*OF\s*JUSTICE|Politico|Pro\s*Publica|(Sean\s*)?Hannity|Sulzberger|SunSentinel|Susan Edelman|(Uma\s*)?Sanghvi|(The\s*)?Wa(shington\s*)?Po(st)?|Viceland|Vick[iy]\s*Ward|Vox|WGBH|(The\s*)?Wall\s*Street\s*Journal|WSJ|[-\w.]+@(bbc|independent|mailonline|mirror|thetimes)\.co\.uk',
382
387
  emailers = {
383
388
  EDWARD_JAY_EPSTEIN: 'reporter who wrote about the kinds of crimes Epstein was involved in, no relation to Jeffrey',
384
389
  'James Hill': 'ABC News',
@@ -458,6 +463,7 @@ HIGHLIGHTED_NAMES = [
458
463
  IAN_OSBORNE: f"{OSBORNE_LLP} reputation repairer possibly hired by Epstein ca. 2011-06",
459
464
  MICHAEL_SITRICK: 'crisis PR',
460
465
  PEGGY_SIEGAL: 'socialite',
466
+ 'R. Couri Hay': None,
461
467
  ROSS_GOW: 'Acuity Reputation Management',
462
468
  TYLER_SHEARS: f"{REPUTATION_MGMT}, worked on Epstein's Google search results with {CHRISTINA_GALBRAITH}",
463
469
  }
@@ -485,6 +491,7 @@ HIGHLIGHTED_NAMES = [
485
491
  style='red bold',
486
492
  pattern=r'Alfa\s*Bank|Anya\s*Rasulova|Chernobyl|Day\s+One\s+Ventures|(Dmitry\s)?(Kiselyov|(Lana\s*)?Pozhidaeva|Medvedev|Rybolo(o?l?ev|vlev))|Dmitry|FSB|GRU|KGB|Kislyak|Kremlin|Kuznetsova|Lavrov|Lukoil|Moscow|(Oleg\s*)?Deripaska|Oleksandr Vilkul|Rosneft|RT|St.?\s*?Petersburg|Russian?|Sberbank|Soviet(\s*Union)?|USSR|Vladimir|(Vladimir\s*)?(Putin|Yudashkin)|Women\s*Empowerment|Xitrans',
487
493
  emailers = {
494
+ 'Dasha Zhukova': 'art collector, daughter of Alexander Zhukov',
488
495
  MASHA_DROKOVA: 'silicon valley VC, former Putin Youth',
489
496
  RENATA_BOLOTOVA: 'former aspiring model, now fund manager at New York State Insurance Fund',
490
497
  SVETLANA_POZHIDAEVA: f'Epstein\'s Russian assistant who was recommended for a visa by Sergei Belyakov (FSB) and {DAVID_BLAINE}',
@@ -493,14 +500,16 @@ HIGHLIGHTED_NAMES = [
493
500
  HighlightedNames(
494
501
  label=ACADEMIA,
495
502
  style='light_goldenrod2',
496
- pattern=r'Alain Forget|Brotherton|Carl\s*Sagan|Columbia|David Grosof|J(ames|im)\s*Watson|(Lord\s*)?Martin\s*Rees|Massachusetts\s*Institute\s*of\s*Technology|MIT(\s*Media\s*Lab)?|Media\s*Lab|Minsky|((Noam|Valeria)\s*)?Chomsky|Praluent|Regeneron|(Richard\s*)?Dawkins|Sanofi|Stanford|(Stephen\s*)?Hawking|(Steven?\s*)?Pinker|UCLA',
503
+ pattern=r'Alain Forget|Brotherton|Carl\s*Sagan|Columbia|David Grosof|J(ames|im)\s*Watson|(Lord\s*)?Martin\s*Rees|Massachusetts\s*Institute\s*of\s*Technology|MIT(\s*Media\s*Lab)?|Media\s*Lab|Minsky|((Noam|Valeria)\s*)?Chomsky|Norman\s*Finkelstein|Praluent|Regeneron|(Richard\s*)?Dawkins|Sanofi|Stanford|(Stephen\s*)?Hawking|(Steven?\s*)?Pinker|UCLA',
497
504
  emailers = {
498
505
  DAVID_HAIG: None,
499
506
  JOSCHA_BACH: 'cognitive science / AI research',
500
507
  'Daniel Kahneman': 'Nobel economic sciences laureate and cognitivie psychologist (?)',
508
+ 'Ed Boyden': 'Associate Professor, MIT Media Lab neurobiology',
501
509
  LAWRENCE_KRAUSS: 'theoretical physicist',
502
510
  LINDA_STONE: 'ex-Microsoft, MIT Media Lab',
503
511
  MARK_TRAMO: 'professor of neurology at UCLA',
512
+ 'Nancy Dahl': f'wife of {LAWRENCE_KRAUSS}',
504
513
  NEAL_KASSELL: 'professor of neurosurgery at University of Virginia',
505
514
  PETER_ATTIA: 'longevity medicine',
506
515
  ROBERT_TRIVERS: 'evolutionary biology',
@@ -661,7 +670,7 @@ def get_style_for_category(category: str) -> str | None:
661
670
  elif category in [CONFERENCE, SPEECH]:
662
671
  return f"{get_style_for_category(ACADEMIA)} dim"
663
672
  elif category == SOCIAL:
664
- return f"{get_style_for_category(PUBLICIST)}"
673
+ return get_style_for_category(PUBLICIST)
665
674
 
666
675
  category = CATEGORY_STYLE_MAPPING.get(category, category)
667
676
 
@@ -32,7 +32,7 @@ LOG_LEVEL_ENV_VAR = 'LOG_LEVEL'
32
32
  # Augment the standard log highlighter with 'epstein_filename' matcher
33
33
  class LogHighlighter(ReprHighlighter):
34
34
  highlights = ReprHighlighter.highlights + [
35
- *[fr"(?P<{doc_type}>{doc_type})" for doc_type in DOC_TYPE_STYLES.keys()],
35
+ *[fr"(?P<{doc_type}>{doc_type}(Cfg)?)" for doc_type in DOC_TYPE_STYLES.keys()],
36
36
  "(?P<epstein_filename>" + FILE_NAME_REGEX.pattern + ')',
37
37
  ]
38
38
 
@@ -125,7 +125,7 @@ def print_json_files(epstein_files: EpsteinFiles):
125
125
  console.print_json(json_file.json_str(), indent=4, sort_keys=False)
126
126
 
127
127
 
128
- def print_json_metadata(epstein_files: EpsteinFiles) -> None:
128
+ def write_json_metadata(epstein_files: EpsteinFiles) -> None:
129
129
  json_str = epstein_files.json_metadata()
130
130
 
131
131
  if args.build:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "epstein-files"
3
- version = "1.0.11"
3
+ version = "1.0.12"
4
4
  description = "Tools for working with the Jeffrey Epstein documents released in November 2025."
5
5
  authors = ["Michel de Cryptadamus"]
6
6
  readme = "README.md"
File without changes
File without changes