epstein-files 1.0.13__py3-none-any.whl → 1.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,17 +9,17 @@ from rich.table import Table
9
9
  from rich.text import Text
10
10
 
11
11
  from epstein_files.documents.communication import Communication
12
- from epstein_files.documents.imessage.text_message import MSG_DATE_FORMAT, TextMessage
12
+ from epstein_files.documents.imessage.text_message import TextMessage
13
13
  from epstein_files.util.constant.names import JEFFREY_EPSTEIN, UNKNOWN
14
- from epstein_files.util.constant.strings import AUTHOR
15
- from epstein_files.util.data import iso_timestamp, listify, sort_dict
14
+ from epstein_files.util.constant.strings import AUTHOR, TIMESTAMP_STYLE
15
+ from epstein_files.util.data import days_between, days_between_str, iso_timestamp, listify, sort_dict
16
16
  from epstein_files.util.doc_cfg import Metadata, TextCfg
17
17
  from epstein_files.util.highlighted_group import get_style_for_name
18
18
  from epstein_files.util.logging import logger
19
19
  from epstein_files.util.rich import LAST_TIMESTAMP_STYLE, build_table, highlighter
20
20
 
21
- CONFIRMED_MSG = 'Found confirmed counterparty'
22
- GUESSED_MSG = 'This is probably a conversation with'
21
+ CONFIRMED_MSG = 'with confirmed counterparty'
22
+ GUESSED_MSG = 'and is probably with'
23
23
  MSG_REGEX = re.compile(r'Sender:(.*?)\nTime:(.*? (AM|PM)).*?Message:(.*?)\s*?((?=(\nSender)|\Z))', re.DOTALL)
24
24
  REDACTED_AUTHOR_REGEX = re.compile(r"^([-+•_1MENO.=F]+|[4Ide])$")
25
25
 
@@ -39,17 +39,20 @@ class MessengerLog(Communication):
39
39
  return self.messages_by(name)[0].timestamp()
40
40
 
41
41
  def info_txt(self) -> Text | None:
42
- if self.author is None:
43
- return None
42
+ num_days_str = days_between_str(self.timestamp, self.messages[-1].timestamp())
43
+ txt = Text(f"(Covers {num_days_str} starting ", style='dim')
44
+ txt.append(self.date_str(), style=TIMESTAMP_STYLE).append(' ')
44
45
 
45
- info_msg = GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG
46
- author_txt = Text(self.author, style=self.author_style + ' bold')
47
- txt = Text(f"({info_msg} ", style='dim').append(author_txt)
46
+ if not self.author:
47
+ txt.append('with unknown counterparty')
48
+ else:
49
+ txt.append(GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG).append(' ')
50
+ txt.append(Text(self.author, style=self.author_style + ' bold'))
48
51
 
49
52
  if self.phone_number:
50
- txt.append(f" using the phone number {self.phone_number}")
53
+ txt.append(highlighter(f" using the phone number {self.phone_number}"))
51
54
 
52
- return highlighter(txt.append(')'))
55
+ return txt.append(')')
53
56
 
54
57
  def last_message_at(self, name: str | None) -> datetime:
55
58
  return self.messages_by(name)[-1].timestamp()
@@ -82,7 +85,7 @@ class MessengerLog(Communication):
82
85
  # If the Sender: is redacted or if it's an unredacted phone number that means it's from self.author
83
86
  return TextMessage(
84
87
  author=self.author if (is_phone_number or not author_str) else author_str,
85
- author_str=author_str if is_phone_number else None, # Preserve phone numbers
88
+ author_str=author_str if is_phone_number else '', # Preserve phone numbers
86
89
  id_confirmed=not self.is_attribution_uncertain(),
87
90
  text=match.group(4).strip(),
88
91
  timestamp_str=match.group(2).strip(),
@@ -90,12 +93,12 @@ class MessengerLog(Communication):
90
93
 
91
94
  def _extract_timestamp(self) -> datetime:
92
95
  for match in MSG_REGEX.finditer(self.text):
93
- timestamp_str = match.group(2).strip()
96
+ message = self._build_message(match)
94
97
 
95
98
  try:
96
- return datetime.strptime(timestamp_str, MSG_DATE_FORMAT)
99
+ return message.timestamp()
97
100
  except ValueError as e:
98
- logger.info(f"Failed to parse '{timestamp_str}' to datetime! Using next match. Error: {e}'")
101
+ logger.info(f"Failed to parse '{message.timestamp_str}' to datetime! Using next match. Error: {e}'")
99
102
 
100
103
  raise RuntimeError(f"{self}: No timestamp found!")
101
104
 
@@ -118,23 +121,22 @@ class MessengerLog(Communication):
118
121
  return sender_counts
119
122
 
120
123
  @classmethod
121
- def logs_for(cls, author: str | None | list[str | None], logs: list['MessengerLog']) -> list['MessengerLog']:
122
- authors = listify(author)
123
- return logs if JEFFREY_EPSTEIN in authors else [log for log in logs if log.author in authors]
124
-
125
- @classmethod
126
- def summary_table(cls, imessage_logs: list['MessengerLog']) -> Table:
124
+ def summary_table(cls, log_files: list['MessengerLog']) -> Table:
127
125
  """Build a table summarizing the text messages in 'imessage_logs'."""
128
- counts_table = build_table("Text Message Counts By Author")
129
- counts_table.add_column(AUTHOR.title(), justify='left', style="steel_blue bold", width=30)
126
+ author_counts = cls.count_authors(log_files)
127
+ msg_count = sum([len(log.messages) for log in log_files])
128
+
129
+ footer = f"Deanonymized {msg_count - author_counts[None]:,} of {msg_count:,} text messages in"
130
+ counts_table = build_table("Text Message Counts By Author", caption=f"{footer} {len(log_files)} files")
131
+ counts_table.add_column(AUTHOR.title(), justify='left', width=30)
130
132
  counts_table.add_column('Files', justify='right', style='white')
131
133
  counts_table.add_column("Msgs", justify='right')
132
134
  counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
133
135
  counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE, width=21)
134
136
  counts_table.add_column('Days', justify='right', style='dim')
135
137
 
136
- for name, count in sort_dict(cls.count_authors(imessage_logs)):
137
- logs = cls.logs_for(name, imessage_logs)
138
+ for name, count in sort_dict(author_counts):
139
+ logs = log_files if name == JEFFREY_EPSTEIN else [log for log in log_files if log.author == name]
138
140
  first_at = logs[0].first_message_at(name)
139
141
  last_at = logs[-1].first_message_at(name)
140
142
 
@@ -144,7 +146,7 @@ class MessengerLog(Communication):
144
146
  f"{count:,}",
145
147
  iso_timestamp(first_at),
146
148
  iso_timestamp(last_at),
147
- str((last_at - first_at).days + 1),
149
+ str(days_between(first_at, last_at)),
148
150
  )
149
151
 
150
152
  return counts_table
@@ -17,14 +17,15 @@ from rich.text import Text
17
17
  from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, WHITESPACE_REGEX, Document
18
18
  from epstein_files.util.constant.strings import *
19
19
  from epstein_files.util.constants import *
20
- from epstein_files.util.doc_cfg import FINANCIAL_REPORTS_AUTHORS, DocCfg, Metadata
21
- from epstein_files.util.data import escape_single_quotes, remove_timezone, sort_dict, uniquify
20
+ from epstein_files.util.doc_cfg import DocCfg, Metadata
21
+ from epstein_files.util.data import days_between, escape_single_quotes, remove_timezone, sort_dict, uniquify
22
22
  from epstein_files.util.file_helper import FILENAME_LENGTH, file_size_to_str
23
23
  from epstein_files.util.env import args
24
24
  from epstein_files.util.highlighted_group import styled_category
25
- from epstein_files.util.rich import QUESTION_MARK_TXT, add_cols_to_table, build_table, highlighter
25
+ from epstein_files.util.rich import QUESTION_MARK_TXT, build_table, highlighter
26
26
  from epstein_files.util.logging import logger
27
27
 
28
+ FIRST_FEW_LINES = 'First Few Lines'
28
29
  MAX_DAYS_SPANNED_TO_BE_VALID = 10
29
30
  MAX_EXTRACTED_TIMESTAMPS = 100
30
31
  MIN_TIMESTAMP = datetime(2000, 1, 1)
@@ -36,94 +37,62 @@ TIMESTAMP_LOG_INDENT = f'{LOG_INDENT} '
36
37
  VAST_HOUSE = 'vast house' # Michael Wolff article draft about Epstein indicator
37
38
  VI_DAILY_NEWS_REGEX = re.compile(r'virgin\s*is[kl][ai]nds\s*daily\s*news', re.IGNORECASE)
38
39
 
39
- UNINTERESTING_CATEGORES = [
40
+ SKIP_TIMESTAMP_EXTRACT = [
41
+ PALM_BEACH_TSV,
42
+ PALM_BEACH_PROPERTY_INFO,
43
+ ]
44
+
45
+ UNINTERESTING_CATEGORIES = [
46
+ ACADEMIA,
47
+ ARTICLE,
40
48
  ARTS,
41
49
  BOOK,
50
+ CONFERENCE,
42
51
  JUNK,
52
+ POLITICS,
43
53
  SKYPE_LOG,
44
- SPEECH,
45
54
  ]
46
55
 
47
56
  # OtherFiles whose descriptions/info match these prefixes are not displayed unless --all-other-files is used
48
- UNINTERESTING_PREFIXES = FINANCIAL_REPORTS_AUTHORS + [
57
+ UNINTERESTING_PREFIXES = [
49
58
  'article about',
50
- ARTICLE_DRAFT,
51
- 'Aviation International',
52
- BBC,
53
- BLOOMBERG,
54
- 'Boston Globe',
55
59
  BROCKMAN_INC,
56
- CHINA_DAILY,
57
- CNN,
58
- 'completely redacted',
59
60
  CVRA,
60
- DAILY_MAIL,
61
- DAILY_TELEGRAPH,
62
- CVRA_LEXIS_SEARCH[0:-12], # Because date at end :(
63
61
  DERSH_GIUFFRE_TWEET,
64
- 'Financial Times',
65
- 'Forbes',
66
- 'Frontlines',
67
- 'Future Science',
68
- 'Globe and Mail',
69
62
  GORDON_GETTY,
70
63
  f"{HARVARD} Econ",
71
64
  HARVARD_POETRY,
72
- 'Inference',
73
65
  JASTA,
74
- 'JetGala',
75
- JOHN_BOLTON_PRESS_CLIPPING,
76
- 'Journal of Criminal',
77
- LA_TIMES,
78
- 'Litigation Daily',
79
- LAWRENCE_KRAUSS,
80
- LAWRENCE_KRAUSS_ASU_ORIGINS,
81
- 'MarketWatch',
82
- MARTIN_NOWAK,
83
- 'Morning News',
66
+ LEXIS_NEXIS,
84
67
  NOBEL_CHARITABLE_TRUST,
85
- 'Nautilus',
86
- 'New Yorker',
87
- NYT,
88
68
  PALM_BEACH_CODE_ENFORCEMENT,
89
- PALM_BEACH_DAILY_NEWS,
90
- PALM_BEACH_POST,
91
69
  PALM_BEACH_TSV,
92
70
  PALM_BEACH_WATER_COMMITTEE,
93
- PAUL_KRASSNER,
94
- PEGGY_SIEGAL,
95
- 'Politifact',
96
- 'Rafanelli',
97
- ROBERT_LAWRENCE_KUHN,
98
- ROBERT_TRIVERS,
99
- 'SCMP',
100
- 'SciencExpress',
101
- 'Scowcroft',
102
- SHIMON_POST_ARTICLE,
103
- SINGLE_PAGE,
104
- STACEY_PLASKETT,
105
- 'Tatler',
106
- TERJE_ROD_LARSEN,
107
- TEXT_OF_US_LAW,
108
- TRANSLATION,
109
71
  TWEET,
110
- REAL_DEAL_ARTICLE,
111
- TRUMP_DISCLOSURES,
112
- UBS_CIO_REPORT,
113
72
  UN_GENERAL_ASSEMBLY,
114
- 'U.S. News',
115
73
  'US Office',
116
- 'Vanity Fair',
117
- VI_DAILY_NEWS,
118
- WAPO,
74
+ ]
75
+
76
+ INTERESTING_AUTHORS = [
77
+ EDWARD_JAY_EPSTEIN,
78
+ EHUD_BARAK,
79
+ JOI_ITO,
80
+ NOAM_CHOMSKY,
81
+ MICHAEL_WOLFF,
82
+ SVETLANA_POZHIDAEVA,
119
83
  ]
120
84
 
121
85
 
122
86
  @dataclass
123
87
  class OtherFile(Document):
124
- """File that is not an email, an iMessage log, or JSON data."""
88
+ """
89
+ File that is not an email, an iMessage log, or JSON data.
125
90
 
126
- include_description_in_summary_panel: ClassVar[bool] = True
91
+ Attributes:
92
+ was_timestamp_extracted (bool): True if the timestamp was programmatically extracted (and could be wrong)
93
+ """
94
+ was_timestamp_extracted: bool = False
95
+ include_description_in_summary_panel: ClassVar[bool] = True # Class var for logging output
127
96
 
128
97
  def __post_init__(self):
129
98
  super().__post_init__()
@@ -162,11 +131,13 @@ class OtherFile(Document):
162
131
  elif len(info_sentences) == 0:
163
132
  return True
164
133
  elif self.config:
165
- if self.config.is_interesting:
134
+ if self.config.is_interesting is not None:
135
+ return self.config.is_interesting
136
+ elif self.config.author in INTERESTING_AUTHORS:
166
137
  return True
167
138
  elif self.category() == FINANCE and self.author is not None:
168
139
  return False
169
- elif self.category() in UNINTERESTING_CATEGORES:
140
+ elif self.category() in UNINTERESTING_CATEGORIES:
170
141
  return False
171
142
 
172
143
  for prefix in UNINTERESTING_PREFIXES:
@@ -178,6 +149,10 @@ class OtherFile(Document):
178
149
  def metadata(self) -> Metadata:
179
150
  metadata = super().metadata()
180
151
  metadata['is_interesting'] = self.is_interesting()
152
+
153
+ if self.was_timestamp_extracted:
154
+ metadata['was_timestamp_extracted'] = self.was_timestamp_extracted
155
+
181
156
  return metadata
182
157
 
183
158
  def preview_text(self) -> str:
@@ -191,6 +166,8 @@ class OtherFile(Document):
191
166
  """Return configured timestamp or value extracted by scanning text with datefinder."""
192
167
  if self.config and self.config.timestamp:
193
168
  return self.config.timestamp
169
+ elif self.config and any([s in (self.config_description() or '') for s in SKIP_TIMESTAMP_EXTRACT]):
170
+ return None
194
171
 
195
172
  timestamps: list[datetime] = []
196
173
 
@@ -214,7 +191,10 @@ class OtherFile(Document):
214
191
  self.log_top_lines(15, msg=f"No timestamps found")
215
192
 
216
193
  return None
217
- elif len(timestamps) == 1:
194
+
195
+ self.was_timestamp_extracted = True
196
+
197
+ if len(timestamps) == 1:
218
198
  return timestamps[0]
219
199
  else:
220
200
  timestamps = sorted(uniquify(timestamps), reverse=True)
@@ -222,7 +202,7 @@ class OtherFile(Document):
222
202
  return timestamps[0] # Most recent timestamp appearing in text is usually the closest
223
203
 
224
204
  def _log_extracted_timestamps_info(self, timestamps: list[datetime]) -> None:
225
- num_days_spanned = (timestamps[0] - timestamps[-1]).days
205
+ num_days_spanned = days_between(timestamps[-1], timestamps[0])
226
206
  timestamps_log_msg = f"Extracted {len(timestamps)} timestamps spanning {num_days_spanned} days{TIMESTAMP_LOG_INDENT}"
227
207
  timestamps_log_msg += TIMESTAMP_LOG_INDENT.join([str(dt) for dt in timestamps])
228
208
 
@@ -230,9 +210,39 @@ class OtherFile(Document):
230
210
  self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
231
211
 
232
212
  @staticmethod
233
- def build_table(files: Sequence['OtherFile']) -> Table:
213
+ def count_by_category_table(files: Sequence['OtherFile']) -> Table:
214
+ counts = defaultdict(int)
215
+ category_bytes = defaultdict(int)
216
+
217
+ for file in files:
218
+ if file.category() is None:
219
+ logger.warning(f"file {file.file_id} has no category")
220
+
221
+ counts[file.category()] += 1
222
+ category_bytes[file.category()] += file.file_size()
223
+
224
+ table = build_table('Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
225
+ table.columns[0].min_width = 14
226
+ table.columns[-1].style = 'dim'
227
+
228
+ for (category, count) in sort_dict(counts):
229
+ category_files = [f for f in files if f.category() == category]
230
+ known_author_count = Document.known_author_count(category_files)
231
+
232
+ table.add_row(
233
+ styled_category(category or UNKNOWN),
234
+ str(count),
235
+ str(known_author_count),
236
+ str(count - known_author_count),
237
+ file_size_to_str(category_bytes[category]),
238
+ )
239
+
240
+ return table
241
+
242
+ @staticmethod
243
+ def files_preview_table(files: Sequence['OtherFile']) -> Table:
234
244
  """Build a table of OtherFile documents."""
235
- table = build_table(None, show_lines=True)
245
+ table = build_table('Other Files Details', show_lines=True)
236
246
  table.add_column('File', justify='center', width=FILENAME_LENGTH)
237
247
  table.add_column('Date', justify='center')
238
248
  table.add_column('Size', justify='center')
@@ -240,7 +250,7 @@ class OtherFile(Document):
240
250
  table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
241
251
 
242
252
  for file in files:
243
- link_and_info = [file.external_links()]
253
+ link_and_info = [file.external_links_txt()]
244
254
  date_str = file.date_str()
245
255
 
246
256
  if file.is_duplicate():
@@ -261,33 +271,3 @@ class OtherFile(Document):
261
271
  )
262
272
 
263
273
  return table
264
-
265
- @staticmethod
266
- def count_by_category_table(files: Sequence['OtherFile']) -> Table:
267
- counts = defaultdict(int)
268
- category_bytes = defaultdict(int)
269
-
270
- for file in files:
271
- if file.category() is None:
272
- logger.warning(f"file {file.file_id} has no category")
273
-
274
- counts[file.category()] += 1
275
- category_bytes[file.category()] += file.length
276
-
277
- table = build_table('Other Files Summary')
278
- add_cols_to_table(table, ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
279
- table.columns[-1].style = 'dim'
280
-
281
- for (category, count) in sort_dict(counts):
282
- category_files = [f for f in files if f.category() == category]
283
- known_author_count = Document.known_author_count(category_files)
284
-
285
- table.add_row(
286
- styled_category(category or UNKNOWN),
287
- str(count),
288
- str(known_author_count),
289
- str(count - known_author_count),
290
- file_size_to_str(category_bytes[category]),
291
- )
292
-
293
- return table
@@ -23,14 +23,14 @@ from epstein_files.util.constant.strings import *
23
23
  from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL, epstein_media_person_url,
24
24
  epsteinify_name_url, epstein_web_person_url, search_jmail_url, search_twitter_url)
25
25
  from epstein_files.util.constants import *
26
- from epstein_files.util.data import dict_sets_to_lists, iso_timestamp, json_safe, listify, sort_dict
26
+ from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify, sort_dict
27
27
  from epstein_files.util.doc_cfg import EmailCfg, Metadata
28
28
  from epstein_files.util.env import DOCS_DIR, args, logger
29
29
  from epstein_files.util.file_helper import file_size_str
30
- from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
30
+ from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames, get_info_for_name, get_style_for_name
31
31
  from epstein_files.util.rich import (DEFAULT_NAME_STYLE, LAST_TIMESTAMP_STYLE, NA_TXT, add_cols_to_table,
32
- build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
33
- print_other_site_link, print_panel, print_section_header, vertically_pad)
32
+ print_all_files_page_link, build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
33
+ print_panel, print_section_header, vertically_pad)
34
34
  from epstein_files.util.search_result import SearchResult
35
35
  from epstein_files.util.timer import Timer
36
36
 
@@ -72,18 +72,18 @@ class EpsteinFiles:
72
72
 
73
73
  # Read through and classify all the files
74
74
  for file_arg in self.all_files:
75
- doc_timer = Timer(decimals=4)
75
+ doc_timer = Timer(decimals=2)
76
76
  document = Document(file_arg)
77
77
  cls = document_cls(document)
78
78
 
79
- if document.length == 0:
79
+ if document.length() == 0:
80
80
  logger.warning(f"Skipping empty file: {document}]")
81
81
  continue
82
82
  elif args.skip_other_files and cls == OtherFile and file_type_count[cls.__name__] > 1:
83
- logger.warning(f"Skipping {document.filename}...")
83
+ document.log(f"Skipping OtherFile...")
84
84
  continue
85
85
 
86
- documents.append(cls(file_arg, text=document.text))
86
+ documents.append(cls(file_arg, lines=document.lines, text=document.text))
87
87
  logger.info(str(documents[-1]))
88
88
  file_type_count[cls.__name__] += 1
89
89
 
@@ -104,16 +104,20 @@ class EpsteinFiles:
104
104
  if PICKLED_PATH.exists() and not args.overwrite_pickle:
105
105
  with gzip.open(PICKLED_PATH, 'rb') as file:
106
106
  epstein_files = pickle.load(file)
107
- timer.print_at_checkpoint(f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})")
108
107
  epstein_files.timer = timer
108
+ timer_msg = f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}'"
109
+ epstein_files.timer.print_at_checkpoint(f"{timer_msg} ({file_size_str(PICKLED_PATH)})")
109
110
  return epstein_files
110
111
 
111
112
  logger.warning(f"Building new cache file, this will take a few minutes...")
112
113
  epstein_files = EpsteinFiles(timer=timer)
113
114
 
114
- with gzip.open(PICKLED_PATH, 'wb') as file:
115
- pickle.dump(epstein_files, file)
116
- logger.warning(f"Pickled data to '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})...")
115
+ if args.skip_other_files:
116
+ logger.warning(f"Not writing pickled data because --skip-other-files")
117
+ else:
118
+ with gzip.open(PICKLED_PATH, 'wb') as file:
119
+ pickle.dump(epstein_files, file)
120
+ logger.warning(f"Pickled data to '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})...")
117
121
 
118
122
  timer.print_at_checkpoint(f'Processed {len(epstein_files.all_files):,} documents')
119
123
  return epstein_files
@@ -127,9 +131,6 @@ class EpsteinFiles:
127
131
  names = names if include_useless else [e for e in names if e is None or e.lower() not in EXCLUDED_EMAILERS]
128
132
  return sorted(list(set(names)), key=lambda e: self.email_author_counts[e] + self.email_recipient_counts[e])
129
133
 
130
- def attributed_email_count(self) -> int:
131
- return sum([i for author, i in self.email_author_counts.items() if author != UNKNOWN])
132
-
133
134
  def docs_matching(
134
135
  self,
135
136
  pattern: re.Pattern | str,
@@ -156,7 +157,7 @@ class EpsteinFiles:
156
157
  return self.emails_for(author)[-1].timestamp
157
158
 
158
159
  def email_conversation_length_in_days(self, author: str | None) -> int:
159
- return (self.last_email_at(author) - self.earliest_email_at(author)).days + 1
160
+ return days_between(self.earliest_email_at(author), self.last_email_at(author))
160
161
 
161
162
  def email_signature_substitution_counts(self) -> dict[str, int]:
162
163
  """Return the number of times an email signature was replaced with "<...snipped...>" for each author."""
@@ -172,7 +173,7 @@ class EpsteinFiles:
172
173
  return sorted(list(self.unknown_recipient_email_ids))
173
174
 
174
175
  def emails_by(self, author: str | None) -> list[Email]:
175
- return [e for e in self.emails if e.author == author]
176
+ return Document.sort_by_timestamp([e for e in self.emails if e.author == author])
176
177
 
177
178
  def emails_for(self, author: str | None) -> list[Email]:
178
179
  """Returns emails to or from a given 'author' sorted chronologically."""
@@ -185,9 +186,11 @@ class EpsteinFiles:
185
186
 
186
187
  def emails_to(self, author: str | None) -> list[Email]:
187
188
  if author is None:
188
- return [e for e in self.emails if len(e.recipients) == 0 or None in e.recipients]
189
+ emails = [e for e in self.emails if len(e.recipients) == 0 or None in e.recipients]
189
190
  else:
190
- return [e for e in self.emails if author in e.recipients]
191
+ emails = [e for e in self.emails if author in e.recipients]
192
+
193
+ return Document.sort_by_timestamp(emails)
191
194
 
192
195
  def get_documents_by_id(self, file_ids: str | list[str]) -> list[Document]:
193
196
  file_ids = listify(file_ids)
@@ -198,20 +201,29 @@ class EpsteinFiles:
198
201
 
199
202
  return docs
200
203
 
201
- def imessage_logs_for(self, author: str | None | list[str | None]) -> Sequence[MessengerLog]:
202
- return MessengerLog.logs_for(author, self.imessage_logs)
203
-
204
204
  def json_metadata(self) -> str:
205
205
  """Create a JSON string containing metadata for all the files."""
206
206
  metadata = {
207
- Email.__name__: _sorted_metadata(self.emails),
208
- JsonFile.__name__: _sorted_metadata(self.json_files),
209
- MessengerLog.__name__: _sorted_metadata(self.imessage_logs),
210
- OtherFile.__name__: _sorted_metadata(self.non_json_other_files()),
207
+ 'files': {
208
+ Email.__name__: _sorted_metadata(self.emails),
209
+ JsonFile.__name__: _sorted_metadata(self.json_files),
210
+ MessengerLog.__name__: _sorted_metadata(self.imessage_logs),
211
+ OtherFile.__name__: _sorted_metadata(self.non_json_other_files()),
212
+ },
213
+ 'people': {
214
+ name: highlighted_group.get_info(name)
215
+ for highlighted_group in HIGHLIGHTED_NAMES
216
+ if isinstance(highlighted_group, HighlightedNames)
217
+ for name, description in highlighted_group.emailers.items()
218
+ if description
219
+ }
211
220
  }
212
221
 
213
222
  return json.dumps(metadata, indent=4, sort_keys=True)
214
223
 
224
+ def non_duplicate_emails(self) -> list[Email]:
225
+ return [email for email in self.emails if not email.is_duplicate()]
226
+
215
227
  def non_json_other_files(self) -> list[OtherFile]:
216
228
  return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
217
229
 
@@ -230,8 +242,8 @@ class EpsteinFiles:
230
242
  f"{len([d for d in docs if d.is_duplicate()])}",
231
243
  )
232
244
 
233
- add_row('iMessage Logs', self.imessage_logs)
234
245
  add_row('Emails', self.emails)
246
+ add_row('iMessage Logs', self.imessage_logs)
235
247
  add_row('JSON Data', self.json_files)
236
248
  add_row('Other', self.non_json_other_files())
237
249
  console.print(Align.center(table))
@@ -271,12 +283,13 @@ class EpsteinFiles:
271
283
  console.print(Align.center(Email.build_table(emails, author)), '\n')
272
284
 
273
285
  def print_email_device_info(self) -> None:
274
- print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(4, 0, 0, 0), centered=True)
286
+ print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(2, 0, 0, 0), centered=True)
275
287
  console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
276
288
  console.print(_build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
277
289
 
278
- def print_emailer_counts_table(self) -> None:
279
- footer = f"Identified authors of {self.attributed_email_count():,} out of {len(self.emails):,} emails ."
290
+ def table_of_emailers(self) -> Table:
291
+ attributed_emails = [e for e in self.non_duplicate_emails() if e.author]
292
+ footer = f"Identified authors of {len(attributed_emails):,} out of {len(self.non_duplicate_emails()):,} emails."
280
293
  counts_table = build_table("Email Counts", caption=footer)
281
294
 
282
295
  add_cols_to_table(counts_table, [
@@ -308,49 +321,17 @@ class EpsteinFiles:
308
321
  str(self.email_recipient_counts[name]),
309
322
  emails[0].timestamp_without_seconds(),
310
323
  emails[-1].timestamp_without_seconds(),
311
- '' if name is None else link_text_obj(search_jmail_url(name), JMAIL),
312
- '' if not is_ok_for_epstein_web(name) else link_text_obj(epstein_media_person_url(name), 'eMedia'),
313
- '' if not is_ok_for_epstein_web(name) else link_text_obj(epstein_web_person_url(name), 'eWeb'),
314
- '' if name is None else link_text_obj(search_twitter_url(name), 'search X'),
324
+ link_text_obj(search_jmail_url(name), JMAIL) if name else '',
325
+ link_text_obj(epstein_media_person_url(name), 'eMedia') if is_ok_for_epstein_web(name) else '',
326
+ link_text_obj(epstein_web_person_url(name), 'eWeb') if is_ok_for_epstein_web(name) else '',
327
+ link_text_obj(search_twitter_url(name), 'search X') if name else '',
315
328
  )
316
329
 
317
- console.print(vertically_pad(counts_table, 2))
318
-
319
- def print_imessage_summary(self) -> None:
320
- """Print summary table and stats for text messages."""
321
- console.print(MessengerLog.summary_table(self.imessage_logs))
322
- text_summary_msg = f"\nDeanonymized {Document.known_author_count(self.imessage_logs)} of "
323
- text_summary_msg += f"{len(self.imessage_logs)} {TEXT_MESSAGE} logs found in {len(self.all_files):,} files."
324
- console.print(text_summary_msg)
325
- imessage_msg_count = sum([len(log.messages) for log in self.imessage_logs])
326
- console.print(f"Found {imessage_msg_count} text messages in {len(self.imessage_logs)} iMessage log files.")
327
-
328
- def print_other_files_table(self) -> list[OtherFile]:
329
- """Returns the OtherFile objects that were interesting enough to print."""
330
- interesting_files = [doc for doc in self.other_files if args.all_other_files or doc.is_interesting()]
331
- header_pfx = '' if args.all_other_files else 'Selected '
332
- print_section_header(f"{FIRST_FEW_LINES} of {len(interesting_files)} {header_pfx}Files That Are Neither Emails Nor Text Msgs")
333
-
334
- if not args.all_other_files:
335
- print_centered(f"(the other site is uncurated and has all {len(self.other_files)} unclassifiable files and {len(self.emails):,} emails)", style='dim')
336
- print_other_site_link(False)
337
- console.line(2)
338
-
339
- console.print(OtherFile.build_table(interesting_files))
340
- console.print(Padding(OtherFile.count_by_category_table(interesting_files), (2, 0, 2, 2)))
341
- skipped_file_count = len(self.other_files) - len(interesting_files)
342
-
343
- if skipped_file_count > 0:
344
- logger.warning(f"Skipped {skipped_file_count} uninteresting other files...")
345
-
346
- return interesting_files
330
+ return counts_table
347
331
 
348
332
  def _tally_email_data(self) -> None:
349
333
  """Tally up summary info about Email objects."""
350
- for email in self.emails:
351
- if email.is_duplicate():
352
- continue
353
-
334
+ for email in self.non_duplicate_emails():
354
335
  self.email_author_counts[email.author] += 1
355
336
 
356
337
  if len(email.recipients) == 0:
@@ -380,7 +361,7 @@ def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
380
361
  def document_cls(doc: Document) -> Type[Document]:
381
362
  search_area = doc.text[0:5000] # Limit search area to avoid pointless scans of huge files
382
363
 
383
- if doc.length == 0:
364
+ if doc.length() == 0:
384
365
  return Document
385
366
  if doc.text[0] == '{':
386
367
  return JsonFile
@@ -187,9 +187,11 @@ VIRGINIA_GIUFFRE = 'Virginia Giuffre'
187
187
 
188
188
  # Organizations
189
189
  BOFA = 'BofA'
190
+ BOFA_MERRILL = f'{BOFA} / Merrill Lynch'
190
191
  CNN = 'CNN'
191
192
  DEUTSCHE_BANK = 'Deutsche Bank'
192
193
  ELECTRON_CAPITAL_PARTNERS = 'Electron Capital Partners'
194
+ EPSTEIN_FOUNDATION = 'Jeffrey Epstein VI Foundation'
193
195
  GOLDMAN_SACHS = 'Goldman Sachs'
194
196
  GOLDMAN_INVESTMENT_MGMT = f'{GOLDMAN_SACHS} Investment Management Division'
195
197
  HARVARD = 'Harvard'
@@ -238,7 +240,7 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
238
240
  ian isaac isaacson
239
241
  james jamie jane janet jason jen jim joe johnson jones josh julie justin
240
242
  karl kate kathy kelly kim kruger kyle
241
- laurie leo leonard lenny leslie lieberman louis lynch lynn
243
+ laurie lawrence leo leonard lenny leslie lieberman louis lynch lynn
242
244
  marcus marianne matt matthew melissa michele michelle moore moscowitz
243
245
  nancy nicole nussbaum
244
246
  owen