epstein-files 1.0.16__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,12 @@
1
1
  import re
2
- from dataclasses import dataclass
2
+ from dataclasses import dataclass, field, fields
3
3
  from datetime import datetime
4
4
 
5
5
  from rich.text import Text
6
6
 
7
7
  from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN
8
8
  from epstein_files.util.constant.strings import TIMESTAMP_DIM
9
- from epstein_files.util.data import extract_last_name
9
+ from epstein_files.util.data import extract_last_name, iso_timestamp
10
10
  from epstein_files.util.highlighted_group import get_style_for_name
11
11
  from epstein_files.util.logging import logger
12
12
  from epstein_files.util.rich import TEXT_LINK, highlighter
@@ -30,7 +30,7 @@ class TextMessage:
30
30
  """Class representing a single iMessage text message."""
31
31
  author: str | None
32
32
  author_str: str = ''
33
- id_confirmed: bool = False
33
+ is_id_confirmed: bool = False
34
34
  text: str
35
35
  timestamp_str: str
36
36
 
@@ -44,38 +44,55 @@ class TextMessage:
44
44
  else:
45
45
  self.author_str = self.author_str or self.author
46
46
 
47
- if not self.id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
47
+ if not self.is_id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
48
48
  self.author_str += ' (?)'
49
49
 
50
- def timestamp(self) -> datetime:
51
- return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
50
+ if self.is_link():
51
+ self.text = self.text.replace('\n', '').replace(' ', '_')
52
+ else:
53
+ self.text = self.text.replace('\n', ' ')
52
54
 
53
- def _message(self) -> Text:
54
- lines = self.text.split('\n')
55
+ def is_link(self) -> bool:
56
+ return self.text.startswith('http')
55
57
 
56
- # Fix multiline links
57
- if self.text.startswith('http'):
58
- text = self.text
58
+ def parse_timestamp(self) -> datetime:
59
+ return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
59
60
 
60
- if len(lines) > 1 and not lines[0].endswith('html'):
61
- if len(lines) > 2 and lines[1].endswith('-'):
62
- text = text.replace('\n', '', 2)
63
- else:
64
- text = text.replace('\n', '', 1)
61
+ def timestamp_txt(self) -> Text:
62
+ timestamp_str = self.timestamp_str
65
63
 
66
- lines = text.split('\n')
67
- link_text = lines.pop()
68
- msg_txt = Text('').append(Text.from_markup(f"[link={link_text}]{link_text}[/link]", style=TEXT_LINK))
64
+ try:
65
+ timestamp_str = iso_timestamp(self.parse_timestamp())
66
+ except Exception as e:
67
+ logger.warning(f"Failed to parse timestamp for {self}")
69
68
 
70
- if len(lines) > 0:
71
- msg_txt.append('\n' + ' '.join(lines))
72
- else:
73
- msg_txt = highlighter(' '.join(lines)) # remove newlines
69
+ return Text(f"[{timestamp_str}]", style=TIMESTAMP_DIM)
74
70
 
75
- return msg_txt
71
+ def _message(self) -> Text:
72
+ if self.is_link():
73
+ return Text.from_markup(f"[link={self.text}]{self.text}[/link]", style=TEXT_LINK)
74
+ else:
75
+ return highlighter(self.text)
76
76
 
77
77
  def __rich__(self) -> Text:
78
- timestamp_txt = Text(f"[{self.timestamp_str}]", style=TIMESTAMP_DIM).append(' ')
78
+ timestamp_txt = self.timestamp_txt().append(' ')
79
79
  author_style = get_style_for_name(self.author_str if self.author_str.startswith('+') else self.author)
80
80
  author_txt = Text(self.author_str, style=author_style)
81
81
  return Text('').append(timestamp_txt).append(author_txt).append(': ', style='dim').append(self._message())
82
+
83
+ def __repr__(self) -> str:
84
+ props = []
85
+ add_prop = lambda k, v: props.append(f"{k}={v}")
86
+
87
+ for _field in sorted(fields(self), key=lambda f: f.name):
88
+ key = _field.name
89
+ value = getattr(self, key)
90
+
91
+ if key == 'author_str' and self.author and self.author_str.startswith(value):
92
+ continue
93
+ elif isinstance(value, str):
94
+ add_prop(key, f'"{value}"')
95
+ else:
96
+ add_prop(key, value)
97
+
98
+ return f"{type(self).__name__}(" + ', '.join(props) + f')'
@@ -2,7 +2,7 @@ import logging
2
2
  import re
3
3
  from collections import defaultdict
4
4
  from dataclasses import dataclass, field
5
- from datetime import datetime
5
+ from datetime import datetime, timedelta
6
6
 
7
7
  from rich.console import Console, ConsoleOptions, RenderResult
8
8
  from rich.table import Table
@@ -36,10 +36,10 @@ class MessengerLog(Communication):
36
36
  self.messages = [self._build_message(match) for match in MSG_REGEX.finditer(self.text)]
37
37
 
38
38
  def first_message_at(self, name: str | None) -> datetime:
39
- return self.messages_by(name)[0].timestamp()
39
+ return self.messages_by(name)[0].parse_timestamp()
40
40
 
41
41
  def info_txt(self) -> Text | None:
42
- num_days_str = days_between_str(self.timestamp, self.messages[-1].timestamp())
42
+ num_days_str = days_between_str(self.timestamp, self.messages[-1].parse_timestamp())
43
43
  txt = Text(f"(Covers {num_days_str} starting ", style='dim')
44
44
  txt.append(self.date_str(), style=TIMESTAMP_STYLE).append(' ')
45
45
 
@@ -47,7 +47,7 @@ class MessengerLog(Communication):
47
47
  txt.append('with unknown counterparty')
48
48
  else:
49
49
  txt.append(GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG).append(' ')
50
- txt.append(Text(self.author, style=self.author_style + ' bold'))
50
+ txt.append(Text(self.author, style=self.author_style() + ' bold'))
51
51
 
52
52
  if self.phone_number:
53
53
  txt.append(highlighter(f" using the phone number {self.phone_number}"))
@@ -55,7 +55,7 @@ class MessengerLog(Communication):
55
55
  return txt.append(')')
56
56
 
57
57
  def last_message_at(self, name: str | None) -> datetime:
58
- return self.messages_by(name)[-1].timestamp()
58
+ return self.messages_by(name)[-1].parse_timestamp()
59
59
 
60
60
  def messages_by(self, name: str | None) -> list[TextMessage]:
61
61
  """Return all messages by 'name'."""
@@ -71,7 +71,7 @@ class MessengerLog(Communication):
71
71
  return metadata
72
72
 
73
73
  def _border_style(self) -> str:
74
- return self.author_style
74
+ return self.author_style()
75
75
 
76
76
  def _build_message(self, match: re.Match) -> TextMessage:
77
77
  """Turn a regex match into a TextMessage."""
@@ -86,7 +86,7 @@ class MessengerLog(Communication):
86
86
  return TextMessage(
87
87
  author=self.author if (is_phone_number or not author_str) else author_str,
88
88
  author_str=author_str if is_phone_number else '', # Preserve phone numbers
89
- id_confirmed=not self.is_attribution_uncertain(),
89
+ is_id_confirmed=not self.is_attribution_uncertain(),
90
90
  text=match.group(4).strip(),
91
91
  timestamp_str=match.group(2).strip(),
92
92
  )
@@ -96,12 +96,31 @@ class MessengerLog(Communication):
96
96
  message = self._build_message(match)
97
97
 
98
98
  try:
99
- return message.timestamp()
99
+ return message.parse_timestamp()
100
100
  except ValueError as e:
101
101
  logger.info(f"Failed to parse '{message.timestamp_str}' to datetime! Using next match. Error: {e}'")
102
102
 
103
103
  raise RuntimeError(f"{self}: No timestamp found!")
104
104
 
105
+ def _set_message_timestamps(self) -> None:
106
+ raise NotImplementedError(f"TextMessage.timestamp no longer exists")
107
+ last_message: TextMessage | None = None
108
+
109
+ for i, message in enumerate(self.messages):
110
+ try:
111
+ message.timestamp = message.parse_timestamp()
112
+ except Exception as e:
113
+ msg = f"Failed to parse timestamp for TextMessage {i + 1}, {message}: {e}"
114
+
115
+ if i == 0:
116
+ message.timestamp = self.timestamp
117
+ self.warn(f"{msg}\nit's the first message so using the MessengerLog timestamp property {self.timestamp}")
118
+ else:
119
+ message.timestamp = last_message.timestamp + timedelta(milliseconds=1)
120
+ self.warn(f"{msg}\nadding 1 millisecond to last timestamp {last_message.timestamp}")
121
+
122
+ last_message = message
123
+
105
124
  def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
106
125
  yield self.file_info_panel()
107
126
  yield Text('')
@@ -126,13 +145,13 @@ class MessengerLog(Communication):
126
145
  author_counts = cls.count_authors(log_files)
127
146
  msg_count = sum([len(log.messages) for log in log_files])
128
147
 
129
- footer = f"Deanonymized {msg_count - author_counts[None]:,} of {msg_count:,} text messages in"
130
- counts_table = build_table("Text Message Counts By Author", caption=f"{footer} {len(log_files)} files")
148
+ footer = f"deanonymized {msg_count - author_counts[None]:,} of {msg_count:,} text messages in"
149
+ counts_table = build_table("Text Message Counts By Author", caption=f"({footer} {len(log_files)} files)")
131
150
  counts_table.add_column(AUTHOR.title(), justify='left', width=30)
132
151
  counts_table.add_column('Files', justify='right', style='white')
133
152
  counts_table.add_column("Msgs", justify='right')
134
- counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
135
- counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE, width=21)
153
+ counts_table.add_column('First Sent At', justify='center', highlight=True)
154
+ counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE)
136
155
  counts_table.add_column('Days', justify='right', style='dim')
137
156
 
138
157
  for name, count in sort_dict(author_counts):
@@ -21,8 +21,8 @@ from epstein_files.util.doc_cfg import DocCfg, Metadata
21
21
  from epstein_files.util.data import days_between, escape_single_quotes, remove_timezone, sort_dict, uniquify
22
22
  from epstein_files.util.file_helper import FILENAME_LENGTH, file_size_to_str
23
23
  from epstein_files.util.env import args
24
- from epstein_files.util.highlighted_group import styled_category
25
- from epstein_files.util.rich import QUESTION_MARK_TXT, build_table, highlighter
24
+ from epstein_files.util.highlighted_group import QUESTION_MARKS_TXT, styled_category
25
+ from epstein_files.util.rich import build_table, highlighter
26
26
  from epstein_files.util.logging import logger
27
27
 
28
28
  FIRST_FEW_LINES = 'First Few Lines'
@@ -105,7 +105,7 @@ class OtherFile(Document):
105
105
  return self.config and self.config.category
106
106
 
107
107
  def category_txt(self) -> Text | None:
108
- return styled_category(self.category() or UNKNOWN)
108
+ return styled_category(self.category())
109
109
 
110
110
  def config_description(self) -> str | None:
111
111
  """Overloads superclass method."""
@@ -184,7 +184,7 @@ class OtherFile(Document):
184
184
  if len(timestamps) >= MAX_EXTRACTED_TIMESTAMPS:
185
185
  break
186
186
  except ValueError as e:
187
- self.log(f"Error while iterating through datefinder.find_dates(): {e}", logging.WARNING)
187
+ self.warn(f"Error while iterating through datefinder.find_dates(): {e}")
188
188
 
189
189
  if len(timestamps) == 0:
190
190
  if not (self.is_duplicate() or VAST_HOUSE in self.text):
@@ -210,7 +210,7 @@ class OtherFile(Document):
210
210
  self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
211
211
 
212
212
  @staticmethod
213
- def count_by_category_table(files: Sequence['OtherFile']) -> Table:
213
+ def count_by_category_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
214
214
  counts = defaultdict(int)
215
215
  category_bytes = defaultdict(int)
216
216
 
@@ -221,7 +221,8 @@ class OtherFile(Document):
221
221
  counts[file.category()] += 1
222
222
  category_bytes[file.category()] += file.file_size()
223
223
 
224
- table = build_table('Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
224
+ table = build_table(f'{title_pfx}Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
225
+ table.columns[-1].justify = 'right'
225
226
  table.columns[0].min_width = 14
226
227
  table.columns[-1].style = 'dim'
227
228
 
@@ -230,7 +231,7 @@ class OtherFile(Document):
230
231
  known_author_count = Document.known_author_count(category_files)
231
232
 
232
233
  table.add_row(
233
- styled_category(category or UNKNOWN),
234
+ styled_category(category),
234
235
  str(count),
235
236
  str(known_author_count),
236
237
  str(count - known_author_count),
@@ -240,13 +241,13 @@ class OtherFile(Document):
240
241
  return table
241
242
 
242
243
  @staticmethod
243
- def files_preview_table(files: Sequence['OtherFile']) -> Table:
244
+ def files_preview_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
244
245
  """Build a table of OtherFile documents."""
245
- table = build_table('Other Files Details', show_lines=True)
246
+ table = build_table(f'{title_pfx}Other Files Details in Chronological Order', show_lines=True)
246
247
  table.add_column('File', justify='center', width=FILENAME_LENGTH)
247
248
  table.add_column('Date', justify='center')
248
- table.add_column('Size', justify='center')
249
- table.add_column('Type', justify='center')
249
+ table.add_column('Size', justify='right', style='dim')
250
+ table.add_column('Category', justify='center')
250
251
  table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
251
252
 
252
253
  for file in files:
@@ -263,7 +264,7 @@ class OtherFile(Document):
263
264
 
264
265
  table.add_row(
265
266
  Group(*link_and_info),
266
- Text(date_str, style=TIMESTAMP_DIM) if date_str else QUESTION_MARK_TXT,
267
+ Text(date_str, style=TIMESTAMP_STYLE) if date_str else QUESTION_MARKS_TXT,
267
268
  file.file_size_str(),
268
269
  file.category_txt(),
269
270
  preview_text,
@@ -8,45 +8,35 @@ from datetime import datetime
8
8
  from pathlib import Path
9
9
  from typing import Sequence, Type
10
10
 
11
- from rich.align import Align
12
11
  from rich.padding import Padding
13
12
  from rich.table import Table
14
13
  from rich.text import Text
15
14
 
16
15
  from epstein_files.documents.document import Document
17
- from epstein_files.documents.email import DETECT_EMAIL_REGEX, JUNK_EMAILERS, KRASSNER_RECIPIENTS, USELESS_EMAILERS, Email
16
+ from epstein_files.documents.email import DETECT_EMAIL_REGEX, USELESS_EMAILERS, Email
18
17
  from epstein_files.documents.emails.email_header import AUTHOR
19
18
  from epstein_files.documents.json_file import JsonFile
20
19
  from epstein_files.documents.messenger_log import MSG_REGEX, MessengerLog
21
20
  from epstein_files.documents.other_file import OtherFile
22
21
  from epstein_files.util.constant.strings import *
23
- from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL, epstein_media_person_url,
24
- epsteinify_name_url, epstein_web_person_url, search_jmail_url, search_twitter_url)
25
22
  from epstein_files.util.constants import *
26
- from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify, sort_dict
23
+ from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify
27
24
  from epstein_files.util.doc_cfg import EmailCfg, Metadata
28
25
  from epstein_files.util.env import DOCS_DIR, args, logger
29
26
  from epstein_files.util.file_helper import file_size_str
30
27
  from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames, get_info_for_name, get_style_for_name
31
- from epstein_files.util.rich import (DEFAULT_NAME_STYLE, LAST_TIMESTAMP_STYLE, NA_TXT, add_cols_to_table,
32
- print_other_page_link, build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
33
- print_panel, print_section_header, vertically_pad)
28
+ from epstein_files.util.rich import (NA_TXT, add_cols_to_table, build_table, console, highlighter,
29
+ print_author_panel, print_centered, print_subtitle_panel)
34
30
  from epstein_files.util.search_result import SearchResult
35
31
  from epstein_files.util.timer import Timer
36
32
 
37
- EXCLUDED_EMAILERS = [e.lower() for e in (USELESS_EMAILERS + [JEFFREY_EPSTEIN])]
38
- PICKLED_PATH = Path("the_epstein_files.pkl.gz")
33
+ EXCLUDED_EMAILERS = USELESS_EMAILERS + [JEFFREY_EPSTEIN]
34
+ DEVICE_SIGNATURE_SUBTITLE = f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown"
39
35
  DEVICE_SIGNATURE = 'Device Signature'
40
36
  DEVICE_SIGNATURE_PADDING = (1, 0)
37
+ PICKLED_PATH = Path("the_epstein_files.pkl.gz")
41
38
  SLOW_FILE_SECONDS = 1.0
42
39
 
43
- INVALID_FOR_EPSTEIN_WEB = JUNK_EMAILERS + KRASSNER_RECIPIENTS + [
44
- 'ACT for America',
45
- 'BS Stern',
46
- INTELLIGENCE_SQUARED,
47
- UNKNOWN,
48
- ]
49
-
50
40
 
51
41
  @dataclass
52
42
  class EpsteinFiles:
@@ -128,7 +118,7 @@ class EpsteinFiles:
128
118
  def all_emailers(self, include_useless: bool = False) -> list[str | None]:
129
119
  """Returns all emailers except Epstein and EXCLUDED_EMAILERS, sorted from least frequent to most."""
130
120
  names = [a for a in self.email_author_counts.keys()] + [r for r in self.email_recipient_counts.keys()]
131
- names = names if include_useless else [e for e in names if e is None or e.lower() not in EXCLUDED_EMAILERS]
121
+ names = names if include_useless else [e for e in names if e not in EXCLUDED_EMAILERS]
132
122
  return sorted(list(set(names)), key=lambda e: self.email_author_counts[e] + self.email_recipient_counts[e])
133
123
 
134
124
  def docs_matching(
@@ -177,7 +167,7 @@ class EpsteinFiles:
177
167
 
178
168
  def emails_for(self, author: str | None) -> list[Email]:
179
169
  """Returns emails to or from a given 'author' sorted chronologically."""
180
- emails = self.emails if author == EVERYONE else (self.emails_by(author) + self.emails_to(author))
170
+ emails = self.emails_by(author) + self.emails_to(author)
181
171
 
182
172
  if len(emails) == 0:
183
173
  raise RuntimeError(f"No emails found for '{author}'")
@@ -230,6 +220,7 @@ class EpsteinFiles:
230
220
  def print_files_summary(self) -> None:
231
221
  table = build_table('Summary of Document Types')
232
222
  add_cols_to_table(table, ['File Type', 'Files', 'Author Known', 'Author Unknown', 'Duplicates'])
223
+ table.columns[1].justify = 'right'
233
224
 
234
225
  def add_row(label: str, docs: list):
235
226
  known = None if isinstance(docs[0], JsonFile) else Document.known_author_count(docs)
@@ -246,18 +237,19 @@ class EpsteinFiles:
246
237
  add_row('iMessage Logs', self.imessage_logs)
247
238
  add_row('JSON Data', self.json_files)
248
239
  add_row('Other', self.non_json_other_files())
249
- console.print(Align.center(table))
240
+ print_centered(table)
250
241
  console.line()
251
242
 
252
243
  def print_emails_for(self, _author: str | None) -> list[Email]:
253
244
  """Print complete emails to or from a particular 'author'. Returns the Emails that were printed."""
254
- conversation_length = self.email_conversation_length_in_days(_author)
255
245
  emails = self.emails_for(_author)
246
+ num_days = self.email_conversation_length_in_days(_author)
256
247
  unique_emails = [email for email in emails if not email.is_duplicate()]
248
+ start_date = emails[0].timestamp.date()
257
249
  author = _author or UNKNOWN
258
250
 
259
- print_author_header(
260
- f"Found {len(unique_emails)} {author} emails starting {emails[0].timestamp.date()} over {conversation_length:,} days",
251
+ print_author_panel(
252
+ f"Found {len(unique_emails)} emails to/from {author} starting {start_date} covering {num_days:,} days",
261
253
  get_style_for_name(author),
262
254
  get_info_for_name(author)
263
255
  )
@@ -280,54 +272,13 @@ class EpsteinFiles:
280
272
 
281
273
  def print_emails_table_for(self, author: str | None) -> None:
282
274
  emails = [email for email in self.emails_for(author) if not email.is_duplicate()] # Remove dupes
283
- console.print(Align.center(Email.build_table(emails, author)), '\n')
275
+ print_centered(Email.build_emails_table(emails, author))
276
+ console.line()
284
277
 
285
278
  def print_email_device_info(self) -> None:
286
- print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(2, 0, 0, 0), centered=True)
287
- console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
279
+ print_subtitle_panel(DEVICE_SIGNATURE_SUBTITLE, padding=(2, 0, 0, 0), centered=True)
288
280
  console.print(_build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
289
-
290
- def table_of_emailers(self) -> Table:
291
- attributed_emails = [e for e in self.non_duplicate_emails() if e.author]
292
- footer = f"Identified authors of {len(attributed_emails):,} out of {len(self.non_duplicate_emails()):,} emails."
293
- counts_table = build_table("Email Counts", caption=footer)
294
-
295
- add_cols_to_table(counts_table, [
296
- 'Name',
297
- 'Num',
298
- 'Sent',
299
- "Recv",
300
- {'name': 'First', 'highlight': True},
301
- {'name': 'Last', 'style': LAST_TIMESTAMP_STYLE},
302
- JMAIL,
303
- 'eMedia',
304
- 'eWeb',
305
- 'Twitter',
306
- ])
307
-
308
- emailer_counts = {
309
- emailer: self.email_author_counts[emailer] + self.email_recipient_counts[emailer]
310
- for emailer in self.all_emailers(True)
311
- }
312
-
313
- for name, count in sort_dict(emailer_counts):
314
- style = get_style_for_name(name, default_style=DEFAULT_NAME_STYLE)
315
- emails = self.emails_for(name)
316
-
317
- counts_table.add_row(
318
- Text.from_markup(link_markup(epsteinify_name_url(name or UNKNOWN), name or UNKNOWN, style)),
319
- str(count),
320
- str(self.email_author_counts[name]),
321
- str(self.email_recipient_counts[name]),
322
- emails[0].timestamp_without_seconds(),
323
- emails[-1].timestamp_without_seconds(),
324
- link_text_obj(search_jmail_url(name), JMAIL) if name else '',
325
- link_text_obj(epstein_media_person_url(name), 'eMedia') if is_ok_for_epstein_web(name) else '',
326
- link_text_obj(epstein_web_person_url(name), 'eWeb') if is_ok_for_epstein_web(name) else '',
327
- link_text_obj(search_twitter_url(name), 'search X') if name else '',
328
- )
329
-
330
- return counts_table
281
+ console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
331
282
 
332
283
  def _tally_email_data(self) -> None:
333
284
  """Tally up summary info about Email objects."""
@@ -373,18 +324,6 @@ def document_cls(doc: Document) -> Type[Document]:
373
324
  return OtherFile
374
325
 
375
326
 
376
- def is_ok_for_epstein_web(name: str | None) -> bool:
377
- """Return True if it's likely that EpsteinWeb has a page for this name."""
378
- if name is None or ' ' not in name:
379
- return False
380
- elif '@' in name or '/' in name or '??' in name:
381
- return False
382
- elif name in INVALID_FOR_EPSTEIN_WEB:
383
- return False
384
-
385
- return True
386
-
387
-
388
327
  def _build_signature_table(keyed_sets: dict[str, set[str]], cols: tuple[str, str], join_char: str = '\n') -> Padding:
389
328
  title = 'Signatures Used By Authors' if cols[0] == AUTHOR else 'Authors Seen Using Signatures'
390
329
  table = build_table(title, header_style="bold reverse", show_lines=True)
@@ -89,6 +89,6 @@ UNSINGULARIZABLE_WORDS = """
89
89
  """.strip().split()
90
90
 
91
91
 
92
- if args.deep_debug:
93
- word_str = '\n'.join(COMMON_WORDS_LIST)
94
- print(f"common words:\n\n{word_str}")
92
+ # if args.deep_debug:
93
+ # word_str = '\n'.join(COMMON_WORDS_LIST)
94
+ # print(f"common words:\n\n{word_str}")
@@ -5,7 +5,16 @@ from epstein_files.util.env import args
5
5
 
6
6
  PAGE_TITLE = ' ∞ Michel de Cryptadamus ∞ '
7
7
 
8
- CONSOLE_HTML_FORMAT = """<!DOCTYPE html>
8
+ if args.all_emails:
9
+ page_type = 'Emails'
10
+ elif args.email_timeline:
11
+ page_type = 'Chronological Emails'
12
+ else:
13
+ page_type = 'Text Messages'
14
+
15
+
16
+ CONSOLE_HTML_FORMAT = """
17
+ <!DOCTYPE html>
9
18
  <html>
10
19
  <head>
11
20
  <meta charset="UTF-8">
@@ -14,17 +23,15 @@ CONSOLE_HTML_FORMAT = """<!DOCTYPE html>
14
23
  <style>
15
24
  {stylesheet}
16
25
  body {{
17
- color: {foreground};
18
26
  background-color: {background};
27
+ color: {foreground};
19
28
  }}
20
29
  </style>
21
- """ + f"<title>Epstein {'Emails' if args.all_emails else 'Text Messages'}</title>" + """
30
+ """ + f"<title>Epstein {page_type}</title>" + """
22
31
  </head>
23
32
  <body>
24
33
  <pre style="font-family: Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace; white-space: pre-wrap; overflow-wrap: break-word;">
25
- <code style="font-family: inherit; white-space: pre-wrap; overflow-wrap: break-word;">
26
- {code}
27
- </code>
34
+ <code style="font-family: inherit; white-space: pre-wrap; overflow-wrap: break-word;">{code}</code>
28
35
  </pre>
29
36
  </body>
30
37
  </html>
@@ -166,6 +166,7 @@ TOM_BARRACK = 'Tom Barrack'
166
166
  TOM_PRITZKER = 'Tom Pritzker'
167
167
  TONJA_HADDAD_COLEMAN = 'Tonja Haddad Coleman'
168
168
  TYLER_SHEARS = 'Tyler Shears' # Reputation manager, like Al Seckel
169
+ VINCENZO_IOZZO = 'Vincenzo Iozzo'
169
170
  VINIT_SAHNI = 'Vinit Sahni'
170
171
  ZUBAIR_KHAN = 'Zubair Khan'
171
172
 
@@ -197,9 +198,11 @@ GOLDMAN_SACHS = 'Goldman Sachs'
197
198
  GOLDMAN_INVESTMENT_MGMT = f'{GOLDMAN_SACHS} Investment Management Division'
198
199
  HARVARD = 'Harvard'
199
200
  INSIGHTS_POD = f"InsightsPod" # Zubair bots
201
+ MIT_MEDIA_LAB = 'MIT Media Lab'
200
202
  NEXT_MANAGEMENT = 'Next Management LLC'
201
203
  JP_MORGAN = 'JP Morgan'
202
204
  OSBORNE_LLP = f"{IAN_OSBORNE} & Partners LLP" # Ian Osborne's PR firm
205
+ ROTHSTEIN_ROSENFELDT_ADLER = 'Rothstein Rosenfeldt Adler (Rothstein was a crook & partner of Roger Stone)'
203
206
  TRUMP_ORG = 'Trump Organization'
204
207
  UBS = 'UBS'
205
208
 
@@ -231,26 +234,26 @@ NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
231
234
  # Names to color white in the word counts
232
235
  OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
233
236
  aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
234
- baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
237
+ baldwin barack barrett ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
235
238
  chapman charles charlie christopher clint cohen colin collins conway
236
239
  danny davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
237
- edmond elizabeth emily entwistle erik evelyn
240
+ edmond elizabeth emily enterprises entwistle erik evelyn
238
241
  ferguson flachsbart francis franco frank frost
239
- gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
242
+ gardner gary geoff geoffrey gerald gilbert gloria gold goldberg gonzalez gould graham greene guarino gwyneth
240
243
  hancock harold harrison harry hay helen hill hirsch hofstadter horowitz hussein
241
244
  ian isaac isaacson
242
- james jamie jane janet jason jen jim joe johnson jones josh julie justin
245
+ james jamie jane janet jason jeffrey jen jim joe johnson jones josh julie justin
243
246
  karl kate kathy kelly kim kruger kyle
244
247
  laurie lawrence leo leonard lenny leslie lieberman louis lynch lynn
245
248
  marcus marianne matt matthew melissa michele michelle moore moscowitz
246
249
  nancy nicole nussbaum
247
250
  owen
248
- paulson philippe
249
- rafael ray richard richardson rob robin ron rubin rudolph ryan
251
+ paulson peter philippe
252
+ rafael ray richard richardson rob robert robin ron rubin rudolph ryan
250
253
  sara sarah sean seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
251
254
  ted theresa thompson tiffany timothy tony
252
255
  valeria
253
- walter warren weinstein weiss william
256
+ walter warren waters weinstein weiss william
254
257
  zach zack
255
258
  """.strip().split()
256
259
 
@@ -6,6 +6,7 @@ from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
6
6
  HTML_DIR = Path('docs')
7
7
  EPSTEIN_FILES_NOV_2025 = 'epstein_files_nov_2025'
8
8
  ALL_EMAILS_PATH = HTML_DIR.joinpath(f'all_emails_{EPSTEIN_FILES_NOV_2025}.html')
9
+ CHRONOLOGICAL_EMAILS_PATH = HTML_DIR.joinpath(f'chronological_emails_{EPSTEIN_FILES_NOV_2025}.html')
9
10
  JSON_FILES_JSON_PATH = HTML_DIR.joinpath(f'json_files_from_{EPSTEIN_FILES_NOV_2025}.json')
10
11
  JSON_METADATA_PATH = HTML_DIR.joinpath(f'file_metadata_{EPSTEIN_FILES_NOV_2025}.json')
11
12
  TEXT_MSGS_HTML_PATH = HTML_DIR.joinpath('index.html')
@@ -18,6 +19,7 @@ URLS_ENV = '.urls.env'
18
19
  GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
19
20
  TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/epstein_text_messages"
20
21
  ALL_EMAILS_URL = f"{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}"
22
+ CHRONOLOGICAL_EMAILS_URL = f"{TEXT_MSGS_URL}/{CHRONOLOGICAL_EMAILS_PATH.name}"
21
23
  JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
22
24
  JSON_METADATA_URL = f"{TEXT_MSGS_URL}/{JSON_METADATA_PATH.name}"
23
25
  WORD_COUNT_URL = f"{TEXT_MSGS_URL}/{WORD_COUNT_HTML_PATH.name}"
@@ -29,6 +31,7 @@ SITE_URLS: dict[SiteType, str] = {
29
31
 
30
32
  BUILD_ARTIFACTS = [
31
33
  ALL_EMAILS_PATH,
34
+ CHRONOLOGICAL_EMAILS_PATH,
32
35
  # EPSTEIN_WORD_COUNT_HTML_PATH,
33
36
  JSON_FILES_JSON_PATH,
34
37
  JSON_METADATA_PATH,
@@ -11,7 +11,7 @@ BUSINESS = 'business'
11
11
  CONFERENCE = 'conference'
12
12
  ENTERTAINER = 'entertainer'
13
13
  FINANCE = 'finance'
14
- FLIGHT_LOGS = 'flight logs'
14
+ FLIGHT_LOG = 'flight log'
15
15
  JOURNALIST = 'journalist'
16
16
  JUNK = 'junk'
17
17
  LEGAL = 'legal'
@@ -56,7 +56,6 @@ TIMESTAMP_DIM = f"turquoise4 dim"
56
56
  # Misc
57
57
  AUTHOR = 'author'
58
58
  DEFAULT = 'default'
59
- EVERYONE = 'everyone'
60
59
  HOUSE_OVERSIGHT_PREFIX = 'HOUSE_OVERSIGHT_'
61
60
  JSON = 'json'
62
61
  NA = 'n/a'
@@ -77,3 +76,8 @@ OTHER_FILE_CLASS = 'OtherFile'
77
76
 
78
77
 
79
78
  remove_question_marks = lambda name: QUESTION_MARKS_REGEX.sub('', name)
79
+
80
+
81
+ def indented(s: str, spaces: int = 4) -> str:
82
+ indent = ' ' * spaces
83
+ return indent + f"\n{indent}".join(s.split('\n'))
@@ -73,7 +73,7 @@ search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(tx
73
73
 
74
74
  def build_doc_url(base_url: str, filename_or_id: int | str, case: Literal['lower', 'title'] | None = None) -> str:
75
75
  file_stem = coerce_file_stem(filename_or_id)
76
- file_stem = file_stem.lower() if case == 'lower' else file_stem
76
+ file_stem = file_stem.lower() if case == 'lower' or EPSTEIN_MEDIA in base_url else file_stem
77
77
  file_stem = file_stem.title() if case == 'title' else file_stem
78
78
  return f"{base_url}{file_stem}"
79
79