epstein-files 1.0.11__py3-none-any.whl → 1.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
epstein_files/__init__.py CHANGED
@@ -20,8 +20,8 @@ from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_
20
20
  from epstein_files.util.env import args, specified_names
21
21
  from epstein_files.util.file_helper import coerce_file_path, extract_file_id
22
22
  from epstein_files.util.logging import logger
23
- from epstein_files.util.output import (print_emails, print_json_files, print_json_metadata, print_json_stats,
24
- print_text_messages, write_urls)
23
+ from epstein_files.util.output import (print_emails, print_json_files, print_json_stats,
24
+ print_text_messages, write_json_metadata, write_urls)
25
25
  from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
26
26
  from epstein_files.util.timer import Timer
27
27
  from epstein_files.util.word_count import write_word_counts_html
@@ -37,7 +37,7 @@ def generate_html() -> None:
37
37
  epstein_files = EpsteinFiles.get_files(timer)
38
38
 
39
39
  if args.json_metadata:
40
- print_json_metadata(epstein_files)
40
+ write_json_metadata(epstein_files)
41
41
  exit()
42
42
  elif args.json_files:
43
43
  print_json_files(epstein_files)
@@ -34,9 +34,9 @@ class Communication(Document):
34
34
  def is_attribution_uncertain(self) -> bool:
35
35
  return bool(self.config and self.config.is_attribution_uncertain)
36
36
 
37
- def raw_document_link_txt(self, _style: str = '', include_alt_link: bool = True) -> Text:
37
+ def external_links(self, _style: str = '', include_alt_links: bool = True) -> Text:
38
38
  """Overrides super() method to apply self.author_style."""
39
- return super().raw_document_link_txt(self.author_style, include_alt_link=include_alt_link)
39
+ return super().external_links(self.author_style, include_alt_links=include_alt_links)
40
40
 
41
41
  def summary(self) -> Text:
42
42
  return self._summary().append(CLOSE_PROPERTIES_CHAR)
@@ -5,7 +5,7 @@ from dataclasses import asdict, dataclass, field
5
5
  from datetime import datetime
6
6
  from pathlib import Path
7
7
  from subprocess import run
8
- from typing import ClassVar, Sequence, TypeVar
8
+ from typing import Callable, ClassVar, Sequence, TypeVar
9
9
 
10
10
  from rich.console import Console, ConsoleOptions, Group, RenderResult
11
11
  from rich.padding import Padding
@@ -16,8 +16,8 @@ from epstein_files.util.constant.names import *
16
16
  from epstein_files.util.constant.strings import *
17
17
  from epstein_files.util.constant.urls import *
18
18
  from epstein_files.util.constants import ALL_FILE_CONFIGS, FALLBACK_TIMESTAMP
19
- from epstein_files.util.data import collapse_newlines, date_str, iso_timestamp, patternize, without_falsey
20
- from epstein_files.util.doc_cfg import EmailCfg, DocCfg, Metadata, TextCfg
19
+ from epstein_files.util.data import collapse_newlines, date_str, patternize, remove_zero_time_from_timestamp_str, without_falsey
20
+ from epstein_files.util.doc_cfg import DUPE_TYPE_STRS, EmailCfg, DocCfg, Metadata, TextCfg
21
21
  from epstein_files.util.env import DOCS_DIR, args
22
22
  from epstein_files.util.file_helper import (file_stem_for_id, extract_file_id, file_size,
23
23
  file_size_str, is_local_extract_file)
@@ -31,10 +31,8 @@ INFO_INDENT = 2
31
31
  INFO_PADDING = (0, 0, 0, INFO_INDENT)
32
32
  MAX_TOP_LINES_LEN = 4000 # Only for logging
33
33
  MIN_DOCUMENT_ID = 10477
34
- LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
35
34
  WHITESPACE_REGEX = re.compile(r"\s{2,}|\t|\n", re.MULTILINE)
36
35
 
37
- EXTRACTED_FROM = 'Extracted from'
38
36
  MIN_TIMESTAMP = datetime(1991, 1, 1)
39
37
  MID_TIMESTAMP = datetime(2007, 1, 1)
40
38
  MAX_TIMESTAMP = datetime(2020, 1, 1)
@@ -96,15 +94,9 @@ class Document:
96
94
  def __post_init__(self):
97
95
  self.filename = self.file_path.name
98
96
  self.file_id = extract_file_id(self.filename)
99
- self.config = deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
97
+ self.config = self.config or deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
100
98
 
101
- if self.is_local_extract_file():
102
- self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
103
- extracted_from_doc_id = self.url_slug.split('_')[-1]
104
-
105
- if extracted_from_doc_id in ALL_FILE_CONFIGS:
106
- self._set_extract_config(deepcopy(ALL_FILE_CONFIGS[extracted_from_doc_id]))
107
- else:
99
+ if 'url_slug' not in vars(self):
108
100
  self.url_slug = self.file_path.stem
109
101
 
110
102
  self._set_computed_fields(text=self.text or self._load_file())
@@ -122,28 +114,51 @@ class Document:
122
114
 
123
115
  def duplicate_file_txt(self) -> Text:
124
116
  """If the file is a dupe make a nice message to explain what file it's a duplicate of."""
125
- if not self.config or not self.config.dupe_of_id:
117
+ if not self.config or not self.config.dupe_of_id or self.config.dupe_type is None:
126
118
  raise RuntimeError(f"duplicate_file_txt() called on {self.summary()} but not a dupe! config:\n\n{self.config}")
127
119
 
128
120
  txt = Text(f"Not showing ", style=INFO_STYLE).append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
129
- txt.append(f" because it's {self.config.duplicate_reason()} ")
121
+ txt.append(f" because it's {DUPE_TYPE_STRS[self.config.dupe_type]} ")
130
122
  return txt.append(epstein_media_doc_link_txt(self.config.dupe_of_id, style='royal_blue1'))
131
123
 
132
124
  def epsteinify_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
133
- """Create a Text obj link to this document on epsteinify.com."""
134
- return link_text_obj(epsteinify_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
125
+ return self.external_url(epsteinify_doc_url, style, link_txt)
135
126
 
136
127
  def epstein_media_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
137
- """Create a Text obj link to this document on epstein.media."""
138
- return link_text_obj(epstein_media_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
128
+ return self.external_url(epstein_media_doc_url, style, link_txt)
139
129
 
140
130
  def epstein_web_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
141
- """Create a Text obj link to this document on EpsteinWeb."""
142
- return link_text_obj(epstein_web_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
131
+ return self.external_url(epstein_web_doc_url, style, link_txt)
132
+
133
+ def rollcall_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
134
+ return self.external_url(rollcall_doc_url, style, link_txt)
135
+
136
+ def external_url(self, fxn: Callable[[str], str], style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
137
+ return link_text_obj(fxn(self.url_slug), link_txt or self.file_path.stem, style)
138
+
139
+ def external_links(self, style: str = '', include_alt_links: bool = False) -> Text:
140
+ """Returns colored links to epstein.media and and epsteinweb in a Text object."""
141
+ txt = Text('', style='white' if include_alt_links else ARCHIVE_LINK_COLOR)
142
+
143
+ if args.use_epstein_web:
144
+ txt.append(self.epstein_web_link(style=style))
145
+ alt_link = self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)
146
+ else:
147
+ txt.append(self.epstein_media_link(style=style))
148
+ alt_link = self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)
149
+
150
+ if include_alt_links:
151
+ txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
152
+ txt.append(' (').append(alt_link).append(')')
153
+
154
+ if self._class_name() == 'Email':
155
+ txt.append(' (').append(self.rollcall_link(style='white dim', link_txt=ROLLCALL)).append(')')
156
+
157
+ return txt
143
158
 
144
159
  def file_info_panel(self) -> Group:
145
160
  """Panel with filename linking to raw file plus any additional info about the file."""
146
- panel = Panel(self.raw_document_link_txt(include_alt_link=True), border_style=self._border_style(), expand=False)
161
+ panel = Panel(self.external_links(include_alt_links=True), border_style=self._border_style(), expand=False)
147
162
  padded_info = [Padding(sentence, INFO_PADDING) for sentence in self.info()]
148
163
  return Group(*([panel] + padded_info))
149
164
 
@@ -155,12 +170,10 @@ class Document:
155
170
 
156
171
  def info(self) -> list[Text]:
157
172
  """0 to 2 sentences containing the info_txt() as well as any configured description."""
158
- sentences = [
173
+ return without_falsey([
159
174
  self.info_txt(),
160
175
  highlighter(Text(self.config_description(), style=INFO_STYLE)) if self.config_description() else None
161
- ]
162
-
163
- return without_falsey(sentences)
176
+ ])
164
177
 
165
178
  def info_txt(self) -> Text | None:
166
179
  """Secondary info about this file (recipients, level of certainty, etc). Overload in subclasses."""
@@ -197,9 +210,9 @@ class Document:
197
210
 
198
211
  if self.is_local_extract_file():
199
212
  metadata['extracted_file'] = {
200
- 'explanation': 'This file was extracted from a court filing, not distributed directly. A copy can be found on github.',
201
- 'extracted_from_file': self.url_slug + '.txt',
202
- 'extracted_file_url': extracted_file_url(self.filename),
213
+ 'explanation': 'Manually extracted from one of the court filings.',
214
+ 'extracted_from': self.url_slug + '.txt',
215
+ 'url': extracted_file_url(self.filename),
203
216
  }
204
217
 
205
218
  return metadata
@@ -208,25 +221,6 @@ class Document:
208
221
  with open(self.file_path) as f:
209
222
  return f.read()
210
223
 
211
- def raw_document_link_txt(self, style: str = '', include_alt_link: bool = False) -> Text:
212
- """Returns colored links to epstein.media and and epsteinweb in a Text object."""
213
- txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
214
-
215
- if args.use_epstein_web:
216
- txt.append(self.epstein_web_link(style=style))
217
-
218
- if include_alt_link:
219
- txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
220
- txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
221
- else:
222
- txt.append(self.epstein_media_link(style=style))
223
-
224
- if include_alt_link:
225
- txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
226
- txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
227
-
228
- return txt
229
-
230
224
  def repair_ocr_text(self, repairs: dict[str | re.Pattern, str], text: str) -> str:
231
225
  """Apply a dict of repairs (key is pattern or string, value is replacement string) to text."""
232
226
  for k, v in repairs.items():
@@ -253,7 +247,7 @@ class Document:
253
247
  txt.append(f" {self.url_slug}", style=FILENAME_STYLE)
254
248
 
255
249
  if self.timestamp:
256
- timestamp_str = iso_timestamp(self.timestamp).removesuffix(' 00:00:00')
250
+ timestamp_str = remove_zero_time_from_timestamp_str(self.timestamp).replace('T', ' ')
257
251
  txt.append(' (', style=SYMBOL_STYLE)
258
252
  txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
259
253
 
@@ -327,26 +321,6 @@ class Document:
327
321
  self.lines = [line.strip() if self.strip_whitespace else line for line in self.text.split('\n')]
328
322
  self.num_lines = len(self.lines)
329
323
 
330
- def _set_extract_config(self, doc_cfg: DocCfg | EmailCfg) -> None:
331
- """Copy info from original config for file this document was extracted from."""
332
- if self.config:
333
- self.warn(f"Merging existing config with config for file this document was extracted from")
334
- else:
335
- self.config = EmailCfg(id=self.file_id)
336
-
337
- extracted_from_description = doc_cfg.complete_description()
338
-
339
- if extracted_from_description:
340
- extracted_description = f"{EXTRACTED_FROM} {extracted_from_description}"
341
-
342
- if self.config.description:
343
- self.warn(f"Overwriting description '{self.config.description}' with extract description '{doc_cfg.description}'")
344
-
345
- self.config.description = extracted_description
346
-
347
- self.config.is_interesting = self.config.is_interesting or doc_cfg.is_interesting
348
- self.warn(f"Constructed local config\n{self.config}")
349
-
350
324
  def _write_clean_text(self, output_path: Path) -> None:
351
325
  """Write self.text to 'output_path'. Used only for diffing files."""
352
326
  if output_path.exists():
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import re
3
+ from copy import deepcopy
3
4
  from dataclasses import asdict, dataclass, field
4
5
  from datetime import datetime
5
6
  from typing import ClassVar, cast
@@ -21,6 +22,7 @@ from epstein_files.util.constants import *
21
22
  from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes, extract_last_name,
22
23
  flatten, remove_timezone, uniquify)
23
24
  from epstein_files.util.doc_cfg import EmailCfg, Metadata
25
+ from epstein_files.util.file_helper import extract_file_id, file_stem_for_id
24
26
  from epstein_files.util.highlighted_group import get_style_for_name
25
27
  from epstein_files.util.logging import logger
26
28
  from epstein_files.util.rich import *
@@ -35,9 +37,11 @@ REPLY_TEXT_REGEX = re.compile(rf"^(.*?){REPLY_LINE_PATTERN}", re.DOTALL | re.IGN
35
37
  BAD_TIMEZONE_REGEX = re.compile(fr'\((UTC|GMT\+\d\d:\d\d)\)|{REDACTED}')
36
38
  DATE_HEADER_REGEX = re.compile(r'(?:Date|Sent):? +(?!by|from|to|via)([^\n]{6,})\n')
37
39
  TIMESTAMP_LINE_REGEX = re.compile(r"\d+:\d+")
40
+ LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
38
41
 
39
42
  SUPPRESS_LOGS_FOR_AUTHORS = ['Undisclosed recipients:', 'undisclosed-recipients:', 'Multiple Senders Multiple Senders']
40
43
  REWRITTEN_HEADER_MSG = "(janky OCR header fields were prettified, check source if something seems off)"
44
+ APPEARS_IN = 'Appears in'
41
45
  MAX_CHARS_TO_PRINT = 4000
42
46
  MAX_NUM_HEADER_LINES = 14
43
47
  MAX_QUOTED_REPLIES = 2
@@ -128,7 +132,6 @@ JUNK_EMAILERS = [
128
132
  'How To Academy',
129
133
  'Jokeland',
130
134
  JP_MORGAN_USGIO,
131
- 'Saved by Internet Explorer 11',
132
135
  ]
133
136
 
134
137
  MAILING_LISTS = [
@@ -248,6 +251,7 @@ KRASSNER_RECIPIENTS = uniquify(flatten([ALL_FILE_CONFIGS[id].recipients for id i
248
251
 
249
252
  # No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
250
253
  USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIPIENTS + [
254
+ 'Alan Dlugash', # CCed with Richard Kahn
251
255
  'Alan Rogers', # Random CC
252
256
  'Andrew Friendly', # Presumably some relation of Kelly Friendly
253
257
  'BS Stern', # A random fwd of email we have
@@ -264,14 +268,14 @@ USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIP
264
268
  'Lyn Fontanilla', # Random CC
265
269
  'Mark Albert', # Random CC
266
270
  'Matthew Schafer', # Random CC
271
+ MICHAEL_BUCHHOLTZ, # Terry Kafka CC
272
+ 'Nancy Dahl', # covered by Lawrence Krauss (her husband)
267
273
  'Michael Simmons', # Random CC
268
274
  'Nancy Portland', # Lawrence Krauss CC
269
275
  'Oliver Goodenough', # Robert Trivers CC
270
- 'Owen Blicksilver', # Landon Thomas CC
271
276
  'Peter Aldhous', # Lawrence Krauss CC
272
277
  'Sam Harris', # Lawrence Krauss CC
273
278
  SAMUEL_LEFF, # Random CC
274
- "Saved by Internet Explorer 11",
275
279
  'Sean T Lehane', # Random CC
276
280
  'Stephen Rubin', # Random CC
277
281
  'Tim Kane', # Random CC
@@ -318,6 +322,17 @@ class Email(Communication):
318
322
  rewritten_header_ids: ClassVar[set[str]] = set([])
319
323
 
320
324
  def __post_init__(self):
325
+ self.filename = self.file_path.name
326
+ self.file_id = extract_file_id(self.filename)
327
+
328
+ # Special handling for copying properties out of the config for the document this one was extracted from
329
+ if self.is_local_extract_file():
330
+ self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
331
+ extracted_from_doc_id = self.url_slug.split('_')[-1]
332
+
333
+ if extracted_from_doc_id in ALL_FILE_CONFIGS:
334
+ self._set_config_for_extracted_file(ALL_FILE_CONFIGS[extracted_from_doc_id])
335
+
321
336
  super().__post_init__()
322
337
 
323
338
  try:
@@ -340,8 +355,12 @@ class Email(Communication):
340
355
  self.actual_text = self._actual_text()
341
356
  self.sent_from_device = self._sent_from_device()
342
357
 
358
+ def attachments(self) -> list[str]:
359
+ return (self.header.attachments or '').split(';')
360
+
343
361
  def info_txt(self) -> Text:
344
- txt = Text("OCR text of email from ", style='grey46').append(self.author_txt).append(' to ')
362
+ email_type = 'fwded article' if self.is_fwded_article() else 'email'
363
+ txt = Text(f"OCR text of {email_type} from ", style='grey46').append(self.author_txt).append(' to ')
345
364
  return txt.append(self._recipients_txt()).append(highlighter(f" probably sent at {self.timestamp}"))
346
365
 
347
366
  def is_fwded_article(self) -> bool:
@@ -566,11 +585,11 @@ class Email(Communication):
566
585
  self._merge_lines(2, 5)
567
586
  elif self.file_id in ['029498', '031428']:
568
587
  self._merge_lines(2, 4)
569
- elif self.file_id in ['029976', '023067']:
588
+ elif self.file_id in ['029976', '023067', '033576']:
570
589
  self._merge_lines(3) # Merge 4th and 5th rows
571
590
  elif self.file_id in '026609 029402 032405 022695'.split():
572
591
  self._merge_lines(4) # Merge 5th and 6th rows
573
- elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381']:
592
+ elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381', '033357']:
574
593
  self._merge_lines(2, 4)
575
594
  elif self.file_id in ['029154', '029163']:
576
595
  self._merge_lines(2, 5)
@@ -591,6 +610,8 @@ class Email(Communication):
591
610
  self._merge_lines(7, 9)
592
611
  elif self.file_id == '030299':
593
612
  self._merge_lines(7, 10)
613
+ elif self.file_id in ['022673', '022684']:
614
+ self._merge_lines(9)
594
615
  elif self.file_id == '014860':
595
616
  self._merge_lines(3)
596
617
  self._merge_lines(4)
@@ -649,6 +670,27 @@ class Email(Communication):
649
670
  sent_from = sent_from_match.group(0)
650
671
  return 'S' + sent_from[1:] if sent_from.startswith('sent') else sent_from
651
672
 
673
+ def _set_config_for_extracted_file(self, extracted_from_doc_cfg: DocCfg) -> None:
674
+ """Copy info from original config for file this document was extracted from."""
675
+ if self.file_id in ALL_FILE_CONFIGS:
676
+ self.config = cast(EmailCfg, deepcopy(ALL_FILE_CONFIGS[self.file_id]))
677
+ self.warn(f"Merging existing config for {self.file_id} with config for file this document was extracted from")
678
+ else:
679
+ self.config = EmailCfg(id=self.file_id)
680
+
681
+ extracted_from_description = extracted_from_doc_cfg.complete_description()
682
+
683
+ if extracted_from_description:
684
+ extracted_description = f"{APPEARS_IN} {extracted_from_description}"
685
+
686
+ if self.config.description:
687
+ self.warn(f"Overwriting description '{self.config.description}' with extract description '{self.config.description}'")
688
+
689
+ self.config.description = extracted_description
690
+
691
+ self.config.is_interesting = self.config.is_interesting or extracted_from_doc_cfg.is_interesting
692
+ self.warn(f"Constructed synthetic config: {self.config}")
693
+
652
694
  def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
653
695
  logger.debug(f"Printing '{self.filename}'...")
654
696
  yield self.file_info_panel()
@@ -45,7 +45,7 @@ class TextMessage:
45
45
  self.author_str = self.author_str or self.author
46
46
 
47
47
  if not self.id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
48
- self.author_str = self.author + ' (?)'
48
+ self.author_str += ' (?)'
49
49
 
50
50
  def timestamp(self) -> datetime:
51
51
  return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
@@ -39,7 +39,7 @@ class JsonFile(OtherFile):
39
39
  return JSON
40
40
 
41
41
  def info_txt(self) -> Text | None:
42
- return Text(f"JSON file, seems to contain link unfurl/embed data for iMessage or similar", style=INFO_STYLE)
42
+ return Text(f"JSON file, contains preview data for links sent a messaging app", style=INFO_STYLE)
43
43
 
44
44
  def is_interesting(self):
45
45
  return False
@@ -16,7 +16,7 @@ from epstein_files.util.data import iso_timestamp, listify, sort_dict
16
16
  from epstein_files.util.doc_cfg import Metadata, TextCfg
17
17
  from epstein_files.util.highlighted_group import get_style_for_name
18
18
  from epstein_files.util.logging import logger
19
- from epstein_files.util.rich import build_table, highlighter
19
+ from epstein_files.util.rich import LAST_TIMESTAMP_STYLE, build_table, highlighter
20
20
 
21
21
  CONFIRMED_MSG = 'Found confirmed counterparty'
22
22
  GUESSED_MSG = 'This is probably a conversation with'
@@ -76,7 +76,7 @@ class MessengerLog(Communication):
76
76
  is_phone_number = author_str.startswith('+')
77
77
 
78
78
  if is_phone_number:
79
- logger.warning(f"{self.summary()} Found phone number: {author_str}")
79
+ logger.info(f"{self.summary()} Found phone number: {author_str}")
80
80
  self.phone_number = author_str
81
81
 
82
82
  # If the Sender: is redacted or if it's an unredacted phone number that means it's from self.author
@@ -130,7 +130,7 @@ class MessengerLog(Communication):
130
130
  counts_table.add_column('Files', justify='right', style='white')
131
131
  counts_table.add_column("Msgs", justify='right')
132
132
  counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
133
- counts_table.add_column('Last Sent At', justify='center', style='wheat4', width=21)
133
+ counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE, width=21)
134
134
  counts_table.add_column('Days', justify='right', style='dim')
135
135
 
136
136
  for name, count in sort_dict(cls.count_authors(imessage_logs)):
@@ -107,7 +107,7 @@ UNINTERESTING_PREFIXES = FINANCIAL_REPORTS_AUTHORS + [
107
107
  TEXT_OF_US_LAW,
108
108
  TRANSLATION,
109
109
  TWEET,
110
- THE_REAL_DEAL_ARTICLE,
110
+ REAL_DEAL_ARTICLE,
111
111
  TRUMP_DISCLOSURES,
112
112
  UBS_CIO_REPORT,
113
113
  UN_GENERAL_ASSEMBLY,
@@ -240,7 +240,7 @@ class OtherFile(Document):
240
240
  table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
241
241
 
242
242
  for file in files:
243
- link_and_info = [file.raw_document_link_txt()]
243
+ link_and_info = [file.external_links()]
244
244
  date_str = file.date_str()
245
245
 
246
246
  if file.is_duplicate():
@@ -23,12 +23,12 @@ from epstein_files.util.constant.strings import *
23
23
  from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL, epstein_media_person_url,
24
24
  epsteinify_name_url, epstein_web_person_url, search_jmail_url, search_twitter_url)
25
25
  from epstein_files.util.constants import *
26
- from epstein_files.util.data import dict_sets_to_lists, json_safe, listify, sort_dict
26
+ from epstein_files.util.data import dict_sets_to_lists, iso_timestamp, json_safe, listify, sort_dict
27
27
  from epstein_files.util.doc_cfg import EmailCfg, Metadata
28
28
  from epstein_files.util.env import DOCS_DIR, args, logger
29
29
  from epstein_files.util.file_helper import file_size_str
30
30
  from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
31
- from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, add_cols_to_table,
31
+ from epstein_files.util.rich import (DEFAULT_NAME_STYLE, LAST_TIMESTAMP_STYLE, NA_TXT, add_cols_to_table,
32
32
  build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
33
33
  print_other_site_link, print_panel, print_section_header, vertically_pad)
34
34
  from epstein_files.util.search_result import SearchResult
@@ -278,25 +278,40 @@ class EpsteinFiles:
278
278
  def print_emailer_counts_table(self) -> None:
279
279
  footer = f"Identified authors of {self.attributed_email_count():,} out of {len(self.emails):,} emails ."
280
280
  counts_table = build_table("Email Counts", caption=footer)
281
- add_cols_to_table(counts_table, ['Name', 'Count', 'Sent', "Recv'd", JMAIL, EPSTEIN_MEDIA, EPSTEIN_WEB, 'Twitter'])
281
+
282
+ add_cols_to_table(counts_table, [
283
+ 'Name',
284
+ 'Num',
285
+ 'Sent',
286
+ "Recv",
287
+ {'name': 'First', 'highlight': True},
288
+ {'name': 'Last', 'style': LAST_TIMESTAMP_STYLE},
289
+ JMAIL,
290
+ 'eMedia',
291
+ 'eWeb',
292
+ 'Twitter',
293
+ ])
282
294
 
283
295
  emailer_counts = {
284
296
  emailer: self.email_author_counts[emailer] + self.email_recipient_counts[emailer]
285
297
  for emailer in self.all_emailers(True)
286
298
  }
287
299
 
288
- for p, count in sort_dict(emailer_counts):
289
- style = get_style_for_name(p, default_style=DEFAULT_NAME_STYLE)
300
+ for name, count in sort_dict(emailer_counts):
301
+ style = get_style_for_name(name, default_style=DEFAULT_NAME_STYLE)
302
+ emails = self.emails_for(name)
290
303
 
291
304
  counts_table.add_row(
292
- Text.from_markup(link_markup(epsteinify_name_url(p or UNKNOWN), p or UNKNOWN, style)),
305
+ Text.from_markup(link_markup(epsteinify_name_url(name or UNKNOWN), name or UNKNOWN, style)),
293
306
  str(count),
294
- str(self.email_author_counts[p]),
295
- str(self.email_recipient_counts[p]),
296
- '' if p is None else link_text_obj(search_jmail_url(p), JMAIL),
297
- '' if not is_ok_for_epstein_web(p) else link_text_obj(epstein_media_person_url(p), EPSTEIN_MEDIA),
298
- '' if not is_ok_for_epstein_web(p) else link_text_obj(epstein_web_person_url(p), EPSTEIN_WEB),
299
- '' if p is None else link_text_obj(search_twitter_url(p), 'search X'),
307
+ str(self.email_author_counts[name]),
308
+ str(self.email_recipient_counts[name]),
309
+ emails[0].timestamp_without_seconds(),
310
+ emails[-1].timestamp_without_seconds(),
311
+ '' if name is None else link_text_obj(search_jmail_url(name), JMAIL),
312
+ '' if not is_ok_for_epstein_web(name) else link_text_obj(epstein_media_person_url(name), 'eMedia'),
313
+ '' if not is_ok_for_epstein_web(name) else link_text_obj(epstein_web_person_url(name), 'eWeb'),
314
+ '' if name is None else link_text_obj(search_twitter_url(name), 'search X'),
300
315
  )
301
316
 
302
317
  console.print(vertically_pad(counts_table, 2))
@@ -42,6 +42,7 @@ CECILE_DE_JONGH = 'Cecile de Jongh'
42
42
  CECILIA_STEEN = 'Cecilia Steen'
43
43
  CELINA_DUBIN = 'Celina Dubin'
44
44
  CHRISTINA_GALBRAITH = 'Christina Galbraith' # Works with Tyler Shears on reputation stuff
45
+ DANGENE_AND_JENNIE_ENTERPRISE = 'Dangene and Jennie Enterprise'
45
46
  DANIEL_SABBA = 'Daniel Sabba'
46
47
  DANIEL_SIAD = 'Daniel Siad'
47
48
  DANNY_FROST = 'Danny Frost'
@@ -143,7 +144,7 @@ REID_HOFFMAN = 'Reid Hoffman'
143
144
  REID_WEINGARTEN = 'Reid Weingarten'
144
145
  RENATA_BOLOTOVA = 'Renata Bolotova'
145
146
  RICHARD_KAHN = 'Richard Kahn'
146
- ROBERT_D_CRITTON = 'Robert D. Critton Jr.'
147
+ ROBERT_D_CRITTON_JR = 'Robert D. Critton Jr.'
147
148
  ROBERT_LAWRENCE_KUHN = 'Robert Lawrence Kuhn'
148
149
  ROBERT_TRIVERS = 'Robert Trivers'
149
150
  ROGER_SCHANK = 'Roger Schank'
@@ -178,6 +179,7 @@ JARED_KUSHNER = 'Jared Kushner'
178
179
  JULIE_K_BROWN = 'Julie K. Brown'
179
180
  KARIM_SADJADPOUR = 'KARIM SADJADPOUR'.title()
180
181
  MICHAEL_J_BOCCIO = 'Michael J. Boccio'
182
+ NERIO_ALESSANDRI = 'Nerio Alessandri (Founder and Chairman of Technogym S.p.A. Italy)'
181
183
  PAUL_G_CASSELL = 'Paul G. Cassell'
182
184
  RUDY_GIULIANI = 'Rudy Giuliani'
183
185
  TULSI_GABBARD = 'Tulsi Gabbard'
@@ -226,22 +228,23 @@ NAMES_TO_NOT_HIGHLIGHT: list[str] = [name.lower() for name in [
226
228
  # Names to color white in the word counts
227
229
  OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
228
230
  aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
229
- baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bruno bryant burton
231
+ baldwin barack ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
230
232
  chapman charles charlie christopher clint cohen colin collins conway
231
- davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
233
+ danny davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
232
234
  edmond elizabeth emily entwistle erik evelyn
233
- ferguson flachsbart francis franco frank
235
+ ferguson flachsbart francis franco frank frost
234
236
  gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
235
- hancock harold harrison harry helen hirsch hofstadter horowitz hussein
237
+ hancock harold harrison harry hay helen hill hirsch hofstadter horowitz hussein
236
238
  ian isaac isaacson
237
- jamie jane janet jason jen jim joe johnson jones josh julie justin
239
+ james jamie jane janet jason jen jim joe johnson jones josh julie justin
238
240
  karl kate kathy kelly kim kruger kyle
239
- leo leonard lenny leslie lieberman louis lynch lynn
241
+ laurie leo leonard lenny leslie lieberman louis lynch lynn
240
242
  marcus marianne matt matthew melissa michele michelle moore moscowitz
241
- nicole nussbaum
243
+ nancy nicole nussbaum
244
+ owen
242
245
  paulson philippe
243
246
  rafael ray richard richardson rob robin ron rubin rudolph ryan
244
- sara sarah seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
247
+ sara sarah sean seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
245
248
  ted theresa thompson tiffany timothy tony
246
249
  valeria
247
250
  walter warren weinstein weiss william
@@ -20,7 +20,7 @@ POLITICS = 'politics'
20
20
  PROPERTY = 'property'
21
21
  PUBLICIST = 'publicist'
22
22
  REPUTATION = 'reputation'
23
- SKYPE_LOG= 'skype log'
23
+ SKYPE_LOG = 'Skype log'
24
24
  SOCIAL = 'social'
25
25
  SPEECH = 'speech'
26
26
 
@@ -39,6 +39,7 @@ MIAMI_HERALD = 'Miami Herald'
39
39
  NYT = "New York Times"
40
40
  PALM_BEACH_DAILY_NEWS = f'{PALM_BEACH} Daily News'
41
41
  PALM_BEACH_POST = f'{PALM_BEACH} Post'
42
+ SHIMON_POST = 'The Shimon Post'
42
43
  THE_REAL_DEAL = 'The Real Deal'
43
44
  WAPO = 'WaPo'
44
45
  VI_DAILY_NEWS = f'{VIRGIN_ISLANDS} Daily News'
@@ -13,11 +13,12 @@ ARCHIVE_LINK_COLOR = 'slate_blue3'
13
13
  TEXT_LINK = 'text_link'
14
14
 
15
15
  # External site names
16
- ExternalSite = Literal['epstein.media', 'epsteinify', 'EpsteinWeb']
16
+ ExternalSite = Literal['epstein.media', 'epsteinify', 'EpsteinWeb', 'RollCall']
17
17
  EPSTEIN_MEDIA = 'epstein.media'
18
18
  EPSTEIN_WEB = 'EpsteinWeb'
19
19
  EPSTEINIFY = 'epsteinify'
20
20
  JMAIL = 'Jmail'
21
+ ROLLCALL = 'RollCall'
21
22
 
22
23
  GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
23
24
  GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
@@ -41,9 +42,10 @@ EPSTEIN_WEB_URL = 'https://epsteinweb.org'
41
42
  JMAIL_URL = 'https://jmail.world'
42
43
 
43
44
  DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
44
- EPSTEIN_MEDIA: f"{EPSTEIN_MEDIA_URL}/files",
45
- EPSTEIN_WEB: f'{EPSTEIN_WEB_URL}/wp-content/uploads/epstein_evidence/images',
46
- EPSTEINIFY: f"{EPSTEINIFY_URL}/document",
45
+ EPSTEIN_MEDIA: f"{EPSTEIN_MEDIA_URL}/files/",
46
+ EPSTEIN_WEB: f'{EPSTEIN_WEB_URL}/wp-content/uploads/epstein_evidence/images/',
47
+ EPSTEINIFY: f"{EPSTEINIFY_URL}/document/",
48
+ ROLLCALL: f'https://rollcall.com/factbase/epstein/file?id=',
47
49
  }
48
50
 
49
51
 
@@ -53,7 +55,7 @@ epsteinify_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_ma
53
55
  epsteinify_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEINIFY], file_stem)
54
56
  epsteinify_name_url = lambda name: f"{EPSTEINIFY_URL}/?name={urllib.parse.quote(name)}"
55
57
 
56
- epstein_media_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEIN_MEDIA], file_stem, True)
58
+ epstein_media_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEIN_MEDIA], file_stem, 'lower')
57
59
  epstein_media_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEIN_MEDIA, filename_or_id, style)
58
60
  epstein_media_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_markup(epstein_media_doc_link_markup(filename_or_id, style))
59
61
  epstein_media_person_url = lambda person: f"{EPSTEIN_MEDIA_URL}/people/{parameterize(person)}"
@@ -62,16 +64,19 @@ epstein_web_doc_url = lambda file_stem: f"{DOC_LINK_BASE_URLS[EPSTEIN_WEB]}/{fil
62
64
  epstein_web_person_url = lambda person: f"{EPSTEIN_WEB_URL}/{parameterize(person)}"
63
65
  epstein_web_search_url = lambda s: f"{EPSTEIN_WEB_URL}/?ewmfileq={urllib.parse.quote(s)}&ewmfilepp=20"
64
66
 
67
+ rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL], file_stem, 'title')
68
+
65
69
  search_archive_url = lambda txt: f"{COURIER_NEWSROOM_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
66
70
  search_coffeezilla_url = lambda txt: f"{COFFEEZILLA_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
67
71
  search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
68
72
  search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
69
73
 
70
74
 
71
- def build_doc_url(base_url: str, filename_or_id: int | str, lowercase: bool = False) -> str:
75
+ def build_doc_url(base_url: str, filename_or_id: int | str, case: Literal['lower', 'title'] | None = None) -> str:
72
76
  file_stem = coerce_file_stem(filename_or_id)
73
- file_stem = file_stem.lower() if lowercase else file_stem
74
- return f"{base_url}/{file_stem}"
77
+ file_stem = file_stem.lower() if case == 'lower' else file_stem
78
+ file_stem = file_stem.title() if case == 'title' else file_stem
79
+ return f"{base_url}{file_stem}"
75
80
 
76
81
 
77
82
  def external_doc_link_markup(site: ExternalSite, filename_or_id: int | str, style: str = TEXT_LINK) -> str: