epstein-files 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ import logging
2
2
  import re
3
3
  from collections import defaultdict
4
4
  from dataclasses import dataclass, field
5
- from datetime import datetime
5
+ from datetime import datetime, timedelta
6
6
 
7
7
  from rich.console import Console, ConsoleOptions, RenderResult
8
8
  from rich.table import Table
@@ -36,10 +36,10 @@ class MessengerLog(Communication):
36
36
  self.messages = [self._build_message(match) for match in MSG_REGEX.finditer(self.text)]
37
37
 
38
38
  def first_message_at(self, name: str | None) -> datetime:
39
- return self.messages_by(name)[0].timestamp()
39
+ return self.messages_by(name)[0].parse_timestamp()
40
40
 
41
41
  def info_txt(self) -> Text | None:
42
- num_days_str = days_between_str(self.timestamp, self.messages[-1].timestamp())
42
+ num_days_str = days_between_str(self.timestamp, self.messages[-1].parse_timestamp())
43
43
  txt = Text(f"(Covers {num_days_str} starting ", style='dim')
44
44
  txt.append(self.date_str(), style=TIMESTAMP_STYLE).append(' ')
45
45
 
@@ -47,7 +47,7 @@ class MessengerLog(Communication):
47
47
  txt.append('with unknown counterparty')
48
48
  else:
49
49
  txt.append(GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG).append(' ')
50
- txt.append(Text(self.author, style=self.author_style + ' bold'))
50
+ txt.append(Text(self.author, style=self.author_style() + ' bold'))
51
51
 
52
52
  if self.phone_number:
53
53
  txt.append(highlighter(f" using the phone number {self.phone_number}"))
@@ -55,7 +55,7 @@ class MessengerLog(Communication):
55
55
  return txt.append(')')
56
56
 
57
57
  def last_message_at(self, name: str | None) -> datetime:
58
- return self.messages_by(name)[-1].timestamp()
58
+ return self.messages_by(name)[-1].parse_timestamp()
59
59
 
60
60
  def messages_by(self, name: str | None) -> list[TextMessage]:
61
61
  """Return all messages by 'name'."""
@@ -71,7 +71,7 @@ class MessengerLog(Communication):
71
71
  return metadata
72
72
 
73
73
  def _border_style(self) -> str:
74
- return self.author_style
74
+ return self.author_style()
75
75
 
76
76
  def _build_message(self, match: re.Match) -> TextMessage:
77
77
  """Turn a regex match into a TextMessage."""
@@ -86,7 +86,7 @@ class MessengerLog(Communication):
86
86
  return TextMessage(
87
87
  author=self.author if (is_phone_number or not author_str) else author_str,
88
88
  author_str=author_str if is_phone_number else '', # Preserve phone numbers
89
- id_confirmed=not self.is_attribution_uncertain(),
89
+ is_id_confirmed=not self.is_attribution_uncertain(),
90
90
  text=match.group(4).strip(),
91
91
  timestamp_str=match.group(2).strip(),
92
92
  )
@@ -96,12 +96,31 @@ class MessengerLog(Communication):
96
96
  message = self._build_message(match)
97
97
 
98
98
  try:
99
- return message.timestamp()
99
+ return message.parse_timestamp()
100
100
  except ValueError as e:
101
101
  logger.info(f"Failed to parse '{message.timestamp_str}' to datetime! Using next match. Error: {e}'")
102
102
 
103
103
  raise RuntimeError(f"{self}: No timestamp found!")
104
104
 
105
+ def _set_message_timestamps(self) -> None:
106
+ raise NotImplementedError(f"TextMessage.timestamp no longer exists")
107
+ last_message: TextMessage | None = None
108
+
109
+ for i, message in enumerate(self.messages):
110
+ try:
111
+ message.timestamp = message.parse_timestamp()
112
+ except Exception as e:
113
+ msg = f"Failed to parse timestamp for TextMessage {i + 1}, {message}: {e}"
114
+
115
+ if i == 0:
116
+ message.timestamp = self.timestamp
117
+ self.warn(f"{msg}\nit's the first message so using the MessengerLog timestamp property {self.timestamp}")
118
+ else:
119
+ message.timestamp = last_message.timestamp + timedelta(milliseconds=1)
120
+ self.warn(f"{msg}\nadding 1 millisecond to last timestamp {last_message.timestamp}")
121
+
122
+ last_message = message
123
+
105
124
  def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
106
125
  yield self.file_info_panel()
107
126
  yield Text('')
@@ -126,13 +145,13 @@ class MessengerLog(Communication):
126
145
  author_counts = cls.count_authors(log_files)
127
146
  msg_count = sum([len(log.messages) for log in log_files])
128
147
 
129
- footer = f"Deanonymized {msg_count - author_counts[None]:,} of {msg_count:,} text messages in"
130
- counts_table = build_table("Text Message Counts By Author", caption=f"{footer} {len(log_files)} files")
148
+ footer = f"deanonymized {msg_count - author_counts[None]:,} of {msg_count:,} text messages in"
149
+ counts_table = build_table("Text Message Counts By Author", caption=f"({footer} {len(log_files)} files)")
131
150
  counts_table.add_column(AUTHOR.title(), justify='left', width=30)
132
151
  counts_table.add_column('Files', justify='right', style='white')
133
152
  counts_table.add_column("Msgs", justify='right')
134
- counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
135
- counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE, width=21)
153
+ counts_table.add_column('First Sent At', justify='center', highlight=True)
154
+ counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE)
136
155
  counts_table.add_column('Days', justify='right', style='dim')
137
156
 
138
157
  for name, count in sort_dict(author_counts):
@@ -21,8 +21,8 @@ from epstein_files.util.doc_cfg import DocCfg, Metadata
21
21
  from epstein_files.util.data import days_between, escape_single_quotes, remove_timezone, sort_dict, uniquify
22
22
  from epstein_files.util.file_helper import FILENAME_LENGTH, file_size_to_str
23
23
  from epstein_files.util.env import args
24
- from epstein_files.util.highlighted_group import styled_category
25
- from epstein_files.util.rich import QUESTION_MARK_TXT, build_table, highlighter
24
+ from epstein_files.util.highlighted_group import QUESTION_MARKS_TXT, styled_category
25
+ from epstein_files.util.rich import build_table, highlighter
26
26
  from epstein_files.util.logging import logger
27
27
 
28
28
  FIRST_FEW_LINES = 'First Few Lines'
@@ -105,7 +105,7 @@ class OtherFile(Document):
105
105
  return self.config and self.config.category
106
106
 
107
107
  def category_txt(self) -> Text | None:
108
- return styled_category(self.category() or UNKNOWN)
108
+ return styled_category(self.category())
109
109
 
110
110
  def config_description(self) -> str | None:
111
111
  """Overloads superclass method."""
@@ -184,7 +184,7 @@ class OtherFile(Document):
184
184
  if len(timestamps) >= MAX_EXTRACTED_TIMESTAMPS:
185
185
  break
186
186
  except ValueError as e:
187
- self.log(f"Error while iterating through datefinder.find_dates(): {e}", logging.WARNING)
187
+ self.warn(f"Error while iterating through datefinder.find_dates(): {e}")
188
188
 
189
189
  if len(timestamps) == 0:
190
190
  if not (self.is_duplicate() or VAST_HOUSE in self.text):
@@ -210,7 +210,7 @@ class OtherFile(Document):
210
210
  self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
211
211
 
212
212
  @staticmethod
213
- def count_by_category_table(files: Sequence['OtherFile']) -> Table:
213
+ def count_by_category_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
214
214
  counts = defaultdict(int)
215
215
  category_bytes = defaultdict(int)
216
216
 
@@ -221,7 +221,8 @@ class OtherFile(Document):
221
221
  counts[file.category()] += 1
222
222
  category_bytes[file.category()] += file.file_size()
223
223
 
224
- table = build_table('Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
224
+ table = build_table(f'{title_pfx}Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
225
+ table.columns[-1].justify = 'right'
225
226
  table.columns[0].min_width = 14
226
227
  table.columns[-1].style = 'dim'
227
228
 
@@ -230,7 +231,7 @@ class OtherFile(Document):
230
231
  known_author_count = Document.known_author_count(category_files)
231
232
 
232
233
  table.add_row(
233
- styled_category(category or UNKNOWN),
234
+ styled_category(category),
234
235
  str(count),
235
236
  str(known_author_count),
236
237
  str(count - known_author_count),
@@ -240,13 +241,13 @@ class OtherFile(Document):
240
241
  return table
241
242
 
242
243
  @staticmethod
243
- def files_preview_table(files: Sequence['OtherFile']) -> Table:
244
+ def files_preview_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
244
245
  """Build a table of OtherFile documents."""
245
- table = build_table('Other Files Details', show_lines=True)
246
+ table = build_table(f'{title_pfx}Other Files Details in Chronological Order', show_lines=True)
246
247
  table.add_column('File', justify='center', width=FILENAME_LENGTH)
247
248
  table.add_column('Date', justify='center')
248
- table.add_column('Size', justify='center')
249
- table.add_column('Type', justify='center')
249
+ table.add_column('Size', justify='right', style='dim')
250
+ table.add_column('Category', justify='center')
250
251
  table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
251
252
 
252
253
  for file in files:
@@ -263,7 +264,7 @@ class OtherFile(Document):
263
264
 
264
265
  table.add_row(
265
266
  Group(*link_and_info),
266
- Text(date_str, style=TIMESTAMP_DIM) if date_str else QUESTION_MARK_TXT,
267
+ Text(date_str, style=TIMESTAMP_STYLE) if date_str else QUESTION_MARKS_TXT,
267
268
  file.file_size_str(),
268
269
  file.category_txt(),
269
270
  preview_text,
@@ -8,45 +8,35 @@ from datetime import datetime
8
8
  from pathlib import Path
9
9
  from typing import Sequence, Type
10
10
 
11
- from rich.align import Align
12
11
  from rich.padding import Padding
13
12
  from rich.table import Table
14
13
  from rich.text import Text
15
14
 
16
15
  from epstein_files.documents.document import Document
17
- from epstein_files.documents.email import DETECT_EMAIL_REGEX, JUNK_EMAILERS, KRASSNER_RECIPIENTS, USELESS_EMAILERS, Email
16
+ from epstein_files.documents.email import DETECT_EMAIL_REGEX, USELESS_EMAILERS, Email
18
17
  from epstein_files.documents.emails.email_header import AUTHOR
19
18
  from epstein_files.documents.json_file import JsonFile
20
19
  from epstein_files.documents.messenger_log import MSG_REGEX, MessengerLog
21
20
  from epstein_files.documents.other_file import OtherFile
22
21
  from epstein_files.util.constant.strings import *
23
- from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL, epstein_media_person_url,
24
- epsteinify_name_url, epstein_web_person_url, search_jmail_url, search_twitter_url)
25
22
  from epstein_files.util.constants import *
26
- from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify, sort_dict
23
+ from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify
27
24
  from epstein_files.util.doc_cfg import EmailCfg, Metadata
28
25
  from epstein_files.util.env import DOCS_DIR, args, logger
29
26
  from epstein_files.util.file_helper import file_size_str
30
27
  from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames, get_info_for_name, get_style_for_name
31
- from epstein_files.util.rich import (DEFAULT_NAME_STYLE, LAST_TIMESTAMP_STYLE, NA_TXT, add_cols_to_table,
32
- print_other_page_link, build_table, console, highlighter, link_text_obj, link_markup, print_author_panel, print_centered,
33
- print_panel, print_section_header, vertically_pad)
28
+ from epstein_files.util.rich import (NA_TXT, add_cols_to_table, build_table, console, highlighter,
29
+ print_author_panel, print_centered, print_subtitle_panel)
34
30
  from epstein_files.util.search_result import SearchResult
35
31
  from epstein_files.util.timer import Timer
36
32
 
37
- EXCLUDED_EMAILERS = [e.lower() for e in (USELESS_EMAILERS + [JEFFREY_EPSTEIN])]
38
- PICKLED_PATH = Path("the_epstein_files.pkl.gz")
33
+ EXCLUDED_EMAILERS = USELESS_EMAILERS + [JEFFREY_EPSTEIN]
34
+ DEVICE_SIGNATURE_SUBTITLE = f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown"
39
35
  DEVICE_SIGNATURE = 'Device Signature'
40
36
  DEVICE_SIGNATURE_PADDING = (1, 0)
37
+ PICKLED_PATH = Path("the_epstein_files.pkl.gz")
41
38
  SLOW_FILE_SECONDS = 1.0
42
39
 
43
- INVALID_FOR_EPSTEIN_WEB = JUNK_EMAILERS + KRASSNER_RECIPIENTS + [
44
- 'ACT for America',
45
- 'BS Stern',
46
- INTELLIGENCE_SQUARED,
47
- UNKNOWN,
48
- ]
49
-
50
40
 
51
41
  @dataclass
52
42
  class EpsteinFiles:
@@ -128,7 +118,7 @@ class EpsteinFiles:
128
118
  def all_emailers(self, include_useless: bool = False) -> list[str | None]:
129
119
  """Returns all emailers except Epstein and EXCLUDED_EMAILERS, sorted from least frequent to most."""
130
120
  names = [a for a in self.email_author_counts.keys()] + [r for r in self.email_recipient_counts.keys()]
131
- names = names if include_useless else [e for e in names if e is None or e.lower() not in EXCLUDED_EMAILERS]
121
+ names = names if include_useless else [e for e in names if e not in EXCLUDED_EMAILERS]
132
122
  return sorted(list(set(names)), key=lambda e: self.email_author_counts[e] + self.email_recipient_counts[e])
133
123
 
134
124
  def docs_matching(
@@ -177,7 +167,7 @@ class EpsteinFiles:
177
167
 
178
168
  def emails_for(self, author: str | None) -> list[Email]:
179
169
  """Returns emails to or from a given 'author' sorted chronologically."""
180
- emails = self.emails if author == EVERYONE else (self.emails_by(author) + self.emails_to(author))
170
+ emails = self.emails_by(author) + self.emails_to(author)
181
171
 
182
172
  if len(emails) == 0:
183
173
  raise RuntimeError(f"No emails found for '{author}'")
@@ -230,6 +220,7 @@ class EpsteinFiles:
230
220
  def print_files_summary(self) -> None:
231
221
  table = build_table('Summary of Document Types')
232
222
  add_cols_to_table(table, ['File Type', 'Files', 'Author Known', 'Author Unknown', 'Duplicates'])
223
+ table.columns[1].justify = 'right'
233
224
 
234
225
  def add_row(label: str, docs: list):
235
226
  known = None if isinstance(docs[0], JsonFile) else Document.known_author_count(docs)
@@ -246,18 +237,19 @@ class EpsteinFiles:
246
237
  add_row('iMessage Logs', self.imessage_logs)
247
238
  add_row('JSON Data', self.json_files)
248
239
  add_row('Other', self.non_json_other_files())
249
- console.print(Align.center(table))
240
+ print_centered(table)
250
241
  console.line()
251
242
 
252
243
  def print_emails_for(self, _author: str | None) -> list[Email]:
253
244
  """Print complete emails to or from a particular 'author'. Returns the Emails that were printed."""
254
- conversation_length = self.email_conversation_length_in_days(_author)
255
245
  emails = self.emails_for(_author)
246
+ num_days = self.email_conversation_length_in_days(_author)
256
247
  unique_emails = [email for email in emails if not email.is_duplicate()]
248
+ start_date = emails[0].timestamp.date()
257
249
  author = _author or UNKNOWN
258
250
 
259
251
  print_author_panel(
260
- f"Found {len(unique_emails)} {author} emails starting {emails[0].timestamp.date()} over {conversation_length:,} days",
252
+ f"Found {len(unique_emails)} emails to/from {author} starting {start_date} covering {num_days:,} days",
261
253
  get_style_for_name(author),
262
254
  get_info_for_name(author)
263
255
  )
@@ -280,54 +272,13 @@ class EpsteinFiles:
280
272
 
281
273
  def print_emails_table_for(self, author: str | None) -> None:
282
274
  emails = [email for email in self.emails_for(author) if not email.is_duplicate()] # Remove dupes
283
- console.print(Align.center(Email.build_table(emails, author)), '\n')
275
+ print_centered(Email.build_emails_table(emails, author))
276
+ console.line()
284
277
 
285
278
  def print_email_device_info(self) -> None:
286
- print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(2, 0, 0, 0), centered=True)
287
- console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
279
+ print_subtitle_panel(DEVICE_SIGNATURE_SUBTITLE, padding=(2, 0, 0, 0), centered=True)
288
280
  console.print(_build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
289
-
290
- def table_of_emailers(self) -> Table:
291
- attributed_emails = [e for e in self.non_duplicate_emails() if e.author]
292
- footer = f"Identified authors of {len(attributed_emails):,} out of {len(self.non_duplicate_emails()):,} emails."
293
- counts_table = build_table("Email Counts", caption=footer)
294
-
295
- add_cols_to_table(counts_table, [
296
- 'Name',
297
- 'Num',
298
- 'Sent',
299
- "Recv",
300
- {'name': 'First', 'highlight': True},
301
- {'name': 'Last', 'style': LAST_TIMESTAMP_STYLE},
302
- JMAIL,
303
- 'eMedia',
304
- 'eWeb',
305
- 'Twitter',
306
- ])
307
-
308
- emailer_counts = {
309
- emailer: self.email_author_counts[emailer] + self.email_recipient_counts[emailer]
310
- for emailer in self.all_emailers(True)
311
- }
312
-
313
- for name, count in sort_dict(emailer_counts):
314
- style = get_style_for_name(name, default_style=DEFAULT_NAME_STYLE)
315
- emails = self.emails_for(name)
316
-
317
- counts_table.add_row(
318
- Text.from_markup(link_markup(epsteinify_name_url(name or UNKNOWN), name or UNKNOWN, style)),
319
- str(count),
320
- str(self.email_author_counts[name]),
321
- str(self.email_recipient_counts[name]),
322
- emails[0].timestamp_without_seconds(),
323
- emails[-1].timestamp_without_seconds(),
324
- link_text_obj(search_jmail_url(name), JMAIL) if name else '',
325
- link_text_obj(epstein_media_person_url(name), 'eMedia') if is_ok_for_epstein_web(name) else '',
326
- link_text_obj(epstein_web_person_url(name), 'eWeb') if is_ok_for_epstein_web(name) else '',
327
- link_text_obj(search_twitter_url(name), 'search X') if name else '',
328
- )
329
-
330
- return counts_table
281
+ console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
331
282
 
332
283
  def _tally_email_data(self) -> None:
333
284
  """Tally up summary info about Email objects."""
@@ -373,18 +324,6 @@ def document_cls(doc: Document) -> Type[Document]:
373
324
  return OtherFile
374
325
 
375
326
 
376
- def is_ok_for_epstein_web(name: str | None) -> bool:
377
- """Return True if it's likely that EpsteinWeb has a page for this name."""
378
- if name is None or ' ' not in name:
379
- return False
380
- elif '@' in name or '/' in name or '??' in name:
381
- return False
382
- elif name in INVALID_FOR_EPSTEIN_WEB:
383
- return False
384
-
385
- return True
386
-
387
-
388
327
  def _build_signature_table(keyed_sets: dict[str, set[str]], cols: tuple[str, str], join_char: str = '\n') -> Padding:
389
328
  title = 'Signatures Used By Authors' if cols[0] == AUTHOR else 'Authors Seen Using Signatures'
390
329
  table = build_table(title, header_style="bold reverse", show_lines=True)
@@ -89,6 +89,6 @@ UNSINGULARIZABLE_WORDS = """
89
89
  """.strip().split()
90
90
 
91
91
 
92
- if args.deep_debug:
93
- word_str = '\n'.join(COMMON_WORDS_LIST)
94
- print(f"common words:\n\n{word_str}")
92
+ # if args.deep_debug:
93
+ # word_str = '\n'.join(COMMON_WORDS_LIST)
94
+ # print(f"common words:\n\n{word_str}")
@@ -13,7 +13,8 @@ else:
13
13
  page_type = 'Text Messages'
14
14
 
15
15
 
16
- CONSOLE_HTML_FORMAT = """<!DOCTYPE html>
16
+ CONSOLE_HTML_FORMAT = """
17
+ <!DOCTYPE html>
17
18
  <html>
18
19
  <head>
19
20
  <meta charset="UTF-8">
@@ -22,17 +23,15 @@ CONSOLE_HTML_FORMAT = """<!DOCTYPE html>
22
23
  <style>
23
24
  {stylesheet}
24
25
  body {{
25
- color: {foreground};
26
26
  background-color: {background};
27
+ color: {foreground};
27
28
  }}
28
29
  </style>
29
30
  """ + f"<title>Epstein {page_type}</title>" + """
30
31
  </head>
31
32
  <body>
32
33
  <pre style="font-family: Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace; white-space: pre-wrap; overflow-wrap: break-word;">
33
- <code style="font-family: inherit; white-space: pre-wrap; overflow-wrap: break-word;">
34
- {code}
35
- </code>
34
+ <code style="font-family: inherit; white-space: pre-wrap; overflow-wrap: break-word;">{code}</code>
36
35
  </pre>
37
36
  </body>
38
37
  </html>
@@ -166,6 +166,7 @@ TOM_BARRACK = 'Tom Barrack'
166
166
  TOM_PRITZKER = 'Tom Pritzker'
167
167
  TONJA_HADDAD_COLEMAN = 'Tonja Haddad Coleman'
168
168
  TYLER_SHEARS = 'Tyler Shears' # Reputation manager, like Al Seckel
169
+ VINCENZO_IOZZO = 'Vincenzo Iozzo'
169
170
  VINIT_SAHNI = 'Vinit Sahni'
170
171
  ZUBAIR_KHAN = 'Zubair Khan'
171
172
 
@@ -197,9 +198,11 @@ GOLDMAN_SACHS = 'Goldman Sachs'
197
198
  GOLDMAN_INVESTMENT_MGMT = f'{GOLDMAN_SACHS} Investment Management Division'
198
199
  HARVARD = 'Harvard'
199
200
  INSIGHTS_POD = f"InsightsPod" # Zubair bots
201
+ MIT_MEDIA_LAB = 'MIT Media Lab'
200
202
  NEXT_MANAGEMENT = 'Next Management LLC'
201
203
  JP_MORGAN = 'JP Morgan'
202
204
  OSBORNE_LLP = f"{IAN_OSBORNE} & Partners LLP" # Ian Osborne's PR firm
205
+ ROTHSTEIN_ROSENFELDT_ADLER = 'Rothstein Rosenfeldt Adler (Rothstein was a crook & partner of Roger Stone)'
203
206
  TRUMP_ORG = 'Trump Organization'
204
207
  UBS = 'UBS'
205
208
 
@@ -231,12 +234,12 @@ NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
231
234
  # Names to color white in the word counts
232
235
  OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
233
236
  aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
234
- baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
237
+ baldwin barack barrett ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
235
238
  chapman charles charlie christopher clint cohen colin collins conway
236
239
  danny davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
237
- edmond elizabeth emily entwistle erik evelyn
240
+ edmond elizabeth emily enterprises entwistle erik evelyn
238
241
  ferguson flachsbart francis franco frank frost
239
- gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
242
+ gardner gary geoff geoffrey gerald gilbert gloria gold goldberg gonzalez gould graham greene guarino gwyneth
240
243
  hancock harold harrison harry hay helen hill hirsch hofstadter horowitz hussein
241
244
  ian isaac isaacson
242
245
  james jamie jane janet jason jeffrey jen jim joe johnson jones josh julie justin
@@ -245,12 +248,12 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
245
248
  marcus marianne matt matthew melissa michele michelle moore moscowitz
246
249
  nancy nicole nussbaum
247
250
  owen
248
- paulson philippe
249
- rafael ray richard richardson rob robin ron rubin rudolph ryan
251
+ paulson peter philippe
252
+ rafael ray richard richardson rob robert robin ron rubin rudolph ryan
250
253
  sara sarah sean seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
251
254
  ted theresa thompson tiffany timothy tony
252
255
  valeria
253
- walter warren weinstein weiss william
256
+ walter warren waters weinstein weiss william
254
257
  zach zack
255
258
  """.strip().split()
256
259
 
@@ -11,7 +11,7 @@ BUSINESS = 'business'
11
11
  CONFERENCE = 'conference'
12
12
  ENTERTAINER = 'entertainer'
13
13
  FINANCE = 'finance'
14
- FLIGHT_LOGS = 'flight logs'
14
+ FLIGHT_LOG = 'flight log'
15
15
  JOURNALIST = 'journalist'
16
16
  JUNK = 'junk'
17
17
  LEGAL = 'legal'
@@ -56,7 +56,6 @@ TIMESTAMP_DIM = f"turquoise4 dim"
56
56
  # Misc
57
57
  AUTHOR = 'author'
58
58
  DEFAULT = 'default'
59
- EVERYONE = 'everyone'
60
59
  HOUSE_OVERSIGHT_PREFIX = 'HOUSE_OVERSIGHT_'
61
60
  JSON = 'json'
62
61
  NA = 'n/a'
@@ -77,3 +76,8 @@ OTHER_FILE_CLASS = 'OtherFile'
77
76
 
78
77
 
79
78
  remove_question_marks = lambda name: QUESTION_MARKS_REGEX.sub('', name)
79
+
80
+
81
+ def indented(s: str, spaces: int = 4) -> str:
82
+ indent = ' ' * spaces
83
+ return indent + f"\n{indent}".join(s.split('\n'))
@@ -73,7 +73,7 @@ search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(tx
73
73
 
74
74
  def build_doc_url(base_url: str, filename_or_id: int | str, case: Literal['lower', 'title'] | None = None) -> str:
75
75
  file_stem = coerce_file_stem(filename_or_id)
76
- file_stem = file_stem.lower() if case == 'lower' else file_stem
76
+ file_stem = file_stem.lower() if case == 'lower' or EPSTEIN_MEDIA in base_url else file_stem
77
77
  file_stem = file_stem.title() if case == 'title' else file_stem
78
78
  return f"{base_url}{file_stem}"
79
79
 
@@ -25,8 +25,8 @@ HEADER_ABBREVIATIONS = {
25
25
  'Jagland': 'Thorbjørn Jagland (former Norwegian prime minister)',
26
26
  'JEGE': "Epstein's airplane holding company",
27
27
  'Jeffrey Wernick': 'right wing crypto bro, former COO of Parler',
28
- 'Joi': 'Joi Ito (MIT Media Lab, MIT Digital Currency Initiative)',
29
- "Hoffenberg": "Steven Hoffenberg (Epstein's ponzi scheme partner)",
28
+ 'Joi': f"Joi Ito ({MIT_MEDIA_LAB}, MIT Digital Currency Initiative)",
29
+ "Hoffenberg": f"{STEVEN_HOFFENBERG} (Epstein's ponzi scheme partner)",
30
30
  'KSA': "Kingdom of Saudi Arabia",
31
31
  'Kurz': 'Sebastian Kurz (former Austrian Chancellor)',
32
32
  'Kwok': "Chinese criminal Miles Kwok AKA Miles Guo AKA Guo Wengui",
@@ -91,17 +91,17 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
91
91
  JOI_ITO: re.compile(r'ji@media.mit.?edu|(joichi|joi)( Ito)?', re.IGNORECASE),
92
92
  JONATHAN_FARKAS: re.compile(r'Jonathan Farka(s|il)', re.IGNORECASE),
93
93
  KATHRYN_RUEMMLER: re.compile(r'Kathr?yn? Ruemmler?', re.IGNORECASE),
94
- KEN_STARR: re.compile(r'starr, ken|Ken(neth W.)?\s+starr?|starr', re.IGNORECASE),
94
+ KEN_STARR: re.compile(r'starr, ken|Ken(neth\s*(W.\s*)?)?\s+starr?|starr', re.IGNORECASE),
95
95
  LANDON_THOMAS: re.compile(r'lando[nr] thomas( jr)?|thomas jr.?, lando[nr]', re.IGNORECASE),
96
96
  LARRY_SUMMERS: re.compile(r'(La(wrence|rry).{1,5})?Summers?|^LH$|LHS|Ihsofficel', re.IGNORECASE),
97
97
  LAWRANCE_VISOSKI: re.compile(r'La(rry|wrance) Visoski?|Lvjet', re.IGNORECASE),
98
- LAWRENCE_KRAUSS: re.compile(r'Lawrence Kraus|[jl]awkrauss|kruase', re.IGNORECASE),
99
- LEON_BLACK: re.compile(r'Leon Black?', re.IGNORECASE),
98
+ LAWRENCE_KRAUSS: re.compile(r'Lawrence Kraus[es]?|[jl]awkrauss|kruase', re.IGNORECASE),
99
+ LEON_BLACK: re.compile(r'Leon\s*Black?|(?<!Marc )Leon(?! (Botstein|Jaworski|Wieseltier))', re.IGNORECASE),
100
+ LILLY_SANCHEZ: re.compile(r'Lilly.*Sanchez', re.IGNORECASE),
101
+ LISA_NEW: re.compile(r'E?Lisa New?\b', re.IGNORECASE),
100
102
  MANUELA_MARTINEZ: re.compile(fr'Manuela (- Mega Partners|Martinez)', re.IGNORECASE),
101
103
  MARIANA_IDZKOWSKA: re.compile(r'Mariana [Il]d[źi]kowska?', re.IGNORECASE),
102
104
  MARK_EPSTEIN: re.compile(r'Mark (L\. )?Epstein', re.IGNORECASE),
103
- LILLY_SANCHEZ: re.compile(r'Lilly.*Sanchez', re.IGNORECASE),
104
- LISA_NEW: re.compile(r'E?Lisa New?\b', re.IGNORECASE),
105
105
  MARC_LEON: re.compile(r'Marc[.\s]+(Kensington|Leon)|Kensington2', re.IGNORECASE),
106
106
  MARTIN_NOWAK: re.compile(r'(Martin.*?)?No[vw]ak|Nowak, Martin', re.IGNORECASE),
107
107
  MARTIN_WEINBERG: re.compile(r'martin.*?weinberg', re.IGNORECASE),
@@ -128,10 +128,10 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
128
128
  PRINCE_ANDREW: re.compile(r'Prince Andrew|The Duke', re.IGNORECASE),
129
129
  REID_WEINGARTEN: re.compile(r'Weingarten, Rei[cdi]|Rei[cdi] Weingarten', re.IGNORECASE),
130
130
  RICHARD_KAHN: re.compile(r'rich(ard)? kahn?', re.IGNORECASE),
131
- ROBERT_D_CRITTON_JR: re.compile(r'Robert D.? Critton Jr.?', re.IGNORECASE),
131
+ ROBERT_D_CRITTON_JR: re.compile(r'Robert D.? Critton,? Jr.?', re.IGNORECASE),
132
132
  ROBERT_LAWRENCE_KUHN: re.compile(r'Robert\s*(Lawrence)?\s*Kuhn', re.IGNORECASE),
133
133
  ROBERT_TRIVERS: re.compile(r'tri[vy]ersr@gmail|Robert\s*Trivers?', re.IGNORECASE),
134
- ROSS_GOW: re.compile(fr"{ROSS_GOW}|ross@acuityreputation.com", re.IGNORECASE),
134
+ ROSS_GOW: re.compile(fr"Ross(acuity)? Gow|(ross@)?acuity\s*reputation(\.com)?", re.IGNORECASE),
135
135
  SAMUEL_LEFF: re.compile(r"Sam(uel)?(/Walli)? Leff", re.IGNORECASE),
136
136
  SCOTT_J_LINK: re.compile(r'scott j. link?', re.IGNORECASE),
137
137
  SEAN_BANNON: re.compile(r'sean bannon?', re.IGNORECASE),
@@ -145,7 +145,8 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
145
145
  TERRY_KAFKA: re.compile(r'Terry Kafka?', re.IGNORECASE),
146
146
  THANU_BOONYAWATANA: re.compile(r"Thanu (BOONYAWATANA|Cnx)", re.IGNORECASE),
147
147
  THORBJORN_JAGLAND: re.compile(r'(Thor.{3,8})?Jag[il]and?', re.IGNORECASE),
148
- TONJA_HADDAD_COLEMAN: re.compile(fr"To(nj|rl)a Haddad Coleman|haddadfm@aol.com", re.IGNORECASE)
148
+ TONJA_HADDAD_COLEMAN: re.compile(r"To(nj|rl)a Haddad Coleman|haddadfm@aol.com", re.IGNORECASE),
149
+ VINCENZO_IOZZO: re.compile(r"Vincenzo [IL]ozzo", re.IGNORECASE),
149
150
  }
150
151
 
151
152
  # If found as substring consider them the author
@@ -194,7 +195,6 @@ EMAILERS = [
194
195
  'Steven Victor MD',
195
196
  'Susan Edelman',
196
197
  TOM_BARRACK,
197
- 'Vincenzo Lozzo',
198
198
  'Vladimir Yudashkin',
199
199
  ]
200
200
 
@@ -387,6 +387,7 @@ EMAILS_CONFIG = [
387
387
  EmailCfg(
388
388
  id='023208',
389
389
  author=JEFFREY_EPSTEIN,
390
+ description=f"very long email chain about Leon Black's finances and things like Gratitude America",
390
391
  fwded_text_after='Date: Tue, Oct 27',
391
392
  recipients=[BRAD_WECHSLER, MELANIE_SPINELLA],
392
393
  duplicate_ids=['023291'],
@@ -499,7 +500,7 @@ EMAILS_CONFIG = [
499
500
  author=STEVEN_HOFFENBERG,
500
501
  recipients=["Players2"],
501
502
  timestamp=parse('2016-08-11 09:36:01'),
502
- attribution_reason='Actually a fwd by Charles Michael but Hoffenberg email more interesting',
503
+ attribution_reason=f"Actually a fwd by Charles Michael but {STEVEN_HOFFENBERG} email more interesting",
503
504
  ),
504
505
  EmailCfg(
505
506
  id='026620',
@@ -859,7 +860,6 @@ UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
859
860
  WOMEN_EMPOWERMENT = f"Women Empowerment (WE) conference"
860
861
  ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
861
862
 
862
-
863
863
  OTHER_FILES_BOOKS = [
864
864
  DocCfg(id='017088', author=ALAN_DERSHOWITZ, description=f'"Taking the Stand: My Life in the Law" (draft)'),
865
865
  DocCfg(id='013501', author='Arnold J. Mandell', description=f'The Nearness Of Grace: A Personal Science Of Spiritual Transformation', date='2005-01-01'),
@@ -1139,11 +1139,7 @@ OTHER_FILES_LEGAL = [
1139
1139
  DocCfg(id='025353', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-05-19', duplicate_ids=['010723', '019224'], dupe_type='redacted'),
1140
1140
  DocCfg(id='025704', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-05-27', duplicate_ids=['010732', '019221'], dupe_type='redacted'),
1141
1141
  DocCfg(id='012130', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-06-19', duplicate_ids=['012135']),
1142
- DocCfg(
1143
- id='031447',
1144
- author=MARTIN_WEINBERG,
1145
- description=f"letter from to Melanie Ann Pustay and Sean O'Neill re: an Epstein FOIA request"
1146
- ),
1142
+ DocCfg(id='031447', author=MARTIN_WEINBERG, description=f"letter from to Melanie Ann Pustay & Sean O'Neill re: Epstein FOIA request"),
1147
1143
  DocCfg(
1148
1144
  id='028965',
1149
1145
  author=MARTIN_WEINBERG,
@@ -1223,7 +1219,7 @@ OTHER_FILES_CONFERENCES = [
1223
1219
  OTHER_FILES_FINANCE = [
1224
1220
  DocCfg(id='024631', author='Ackrell Capital', description=f"Cannabis Investment Report 2018", is_interesting=True),
1225
1221
  DocCfg(id='016111', author=BOFA_MERRILL, description=f"GEMs Paper #26 Saudi Arabia: beyond oil but not so fast", date='2016-06-30'),
1226
- DocCfg(id='010609', author=BOFA_MERRILL, description=f"Liquid Insight Trump\'s effect on MXN", date='2016-09-22'),
1222
+ DocCfg(id='010609', author=BOFA_MERRILL, description=f"Liquid Insight Trump's effect on MXN", date='2016-09-22'),
1227
1223
  DocCfg(id='025978', author=BOFA_MERRILL, description=f"Understanding when risk parity risk Increases", date='2016-08-09'),
1228
1224
  DocCfg(id='014404', author=BOFA_MERRILL, description=f'Japan Investment Strategy Report', date='2016-11-18'),
1229
1225
  DocCfg(id='014410', author=BOFA_MERRILL, description=f'Japan Investment Strategy Report', date='2016-11-18'),
@@ -1515,8 +1511,8 @@ OTHER_FILES_ARTS = [
1515
1511
  ]
1516
1512
 
1517
1513
  OTHER_FILES_MISC = [
1518
- DocCfg(id='022780', category=FLIGHT_LOGS),
1519
- DocCfg(id='022816', category=FLIGHT_LOGS),
1514
+ DocCfg(id='022780', category=FLIGHT_LOG),
1515
+ DocCfg(id='022816', category=FLIGHT_LOG),
1520
1516
  DocCfg(id='032206', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
1521
1517
  DocCfg(id='032208', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
1522
1518
  DocCfg(id='032209', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
@@ -1541,7 +1537,7 @@ OTHER_FILES_MISC = [
1541
1537
  DocCfg(id='031743', description=f'a few pages describing the internet as a "New Nation State" (Network State?)'),
1542
1538
  DocCfg(id='012718', description=f"{CVRA} congressional record", date='2011-06-17'),
1543
1539
  DocCfg(id='024117', description=f"FAQ about anti-money laundering (AML) and terrorist financing (CFT) law in the U.S."),
1544
- DocCfg(id='019448', description=f"Haitian business investment proposal called Jacmel"),
1540
+ DocCfg(id='019448', description=f"Haitian business investment proposal called Jacmel", attached_to_email_id='019448'),
1545
1541
  DocCfg(id='023644', description=f"interview with Mohammed bin Salman", date='2016-04-25'),
1546
1542
  DocCfg(
1547
1543
  id='030142',