epstein-files 1.0.12__py3-none-any.whl → 1.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,17 +9,17 @@ from rich.table import Table
9
9
  from rich.text import Text
10
10
 
11
11
  from epstein_files.documents.communication import Communication
12
- from epstein_files.documents.imessage.text_message import MSG_DATE_FORMAT, TextMessage
12
+ from epstein_files.documents.imessage.text_message import TextMessage
13
13
  from epstein_files.util.constant.names import JEFFREY_EPSTEIN, UNKNOWN
14
- from epstein_files.util.constant.strings import AUTHOR
15
- from epstein_files.util.data import iso_timestamp, listify, sort_dict
14
+ from epstein_files.util.constant.strings import AUTHOR, TIMESTAMP_STYLE
15
+ from epstein_files.util.data import days_between, days_between_str, iso_timestamp, listify, sort_dict
16
16
  from epstein_files.util.doc_cfg import Metadata, TextCfg
17
17
  from epstein_files.util.highlighted_group import get_style_for_name
18
18
  from epstein_files.util.logging import logger
19
- from epstein_files.util.rich import build_table, highlighter
19
+ from epstein_files.util.rich import LAST_TIMESTAMP_STYLE, build_table, highlighter
20
20
 
21
- CONFIRMED_MSG = 'Found confirmed counterparty'
22
- GUESSED_MSG = 'This is probably a conversation with'
21
+ CONFIRMED_MSG = 'with confirmed counterparty'
22
+ GUESSED_MSG = 'and is probably with'
23
23
  MSG_REGEX = re.compile(r'Sender:(.*?)\nTime:(.*? (AM|PM)).*?Message:(.*?)\s*?((?=(\nSender)|\Z))', re.DOTALL)
24
24
  REDACTED_AUTHOR_REGEX = re.compile(r"^([-+•_1MENO.=F]+|[4Ide])$")
25
25
 
@@ -39,17 +39,20 @@ class MessengerLog(Communication):
39
39
  return self.messages_by(name)[0].timestamp()
40
40
 
41
41
  def info_txt(self) -> Text | None:
42
- if self.author is None:
43
- return None
42
+ num_days_str = days_between_str(self.timestamp, self.messages[-1].timestamp())
43
+ txt = Text(f"(Covers {num_days_str} starting ", style='dim')
44
+ txt.append(self.date_str(), style=TIMESTAMP_STYLE).append(' ')
44
45
 
45
- info_msg = GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG
46
- author_txt = Text(self.author, style=self.author_style + ' bold')
47
- txt = Text(f"({info_msg} ", style='dim').append(author_txt)
46
+ if not self.author:
47
+ txt.append('with unknown counterparty')
48
+ else:
49
+ txt.append(GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG).append(' ')
50
+ txt.append(Text(self.author, style=self.author_style + ' bold'))
48
51
 
49
52
  if self.phone_number:
50
- txt.append(f" using the phone number {self.phone_number}")
53
+ txt.append(highlighter(f" using the phone number {self.phone_number}"))
51
54
 
52
- return highlighter(txt.append(')'))
55
+ return txt.append(')')
53
56
 
54
57
  def last_message_at(self, name: str | None) -> datetime:
55
58
  return self.messages_by(name)[-1].timestamp()
@@ -82,7 +85,7 @@ class MessengerLog(Communication):
82
85
  # If the Sender: is redacted or if it's an unredacted phone number that means it's from self.author
83
86
  return TextMessage(
84
87
  author=self.author if (is_phone_number or not author_str) else author_str,
85
- author_str=author_str if is_phone_number else None, # Preserve phone numbers
88
+ author_str=author_str if is_phone_number else '', # Preserve phone numbers
86
89
  id_confirmed=not self.is_attribution_uncertain(),
87
90
  text=match.group(4).strip(),
88
91
  timestamp_str=match.group(2).strip(),
@@ -90,12 +93,12 @@ class MessengerLog(Communication):
90
93
 
91
94
  def _extract_timestamp(self) -> datetime:
92
95
  for match in MSG_REGEX.finditer(self.text):
93
- timestamp_str = match.group(2).strip()
96
+ message = self._build_message(match)
94
97
 
95
98
  try:
96
- return datetime.strptime(timestamp_str, MSG_DATE_FORMAT)
99
+ return message.timestamp()
97
100
  except ValueError as e:
98
- logger.info(f"Failed to parse '{timestamp_str}' to datetime! Using next match. Error: {e}'")
101
+ logger.info(f"Failed to parse '{message.timestamp_str}' to datetime! Using next match. Error: {e}'")
99
102
 
100
103
  raise RuntimeError(f"{self}: No timestamp found!")
101
104
 
@@ -130,7 +133,7 @@ class MessengerLog(Communication):
130
133
  counts_table.add_column('Files', justify='right', style='white')
131
134
  counts_table.add_column("Msgs", justify='right')
132
135
  counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
133
- counts_table.add_column('Last Sent At', justify='center', style='wheat4', width=21)
136
+ counts_table.add_column('Last Sent At', justify='center', style=LAST_TIMESTAMP_STYLE, width=21)
134
137
  counts_table.add_column('Days', justify='right', style='dim')
135
138
 
136
139
  for name, count in sort_dict(cls.count_authors(imessage_logs)):
@@ -144,7 +147,7 @@ class MessengerLog(Communication):
144
147
  f"{count:,}",
145
148
  iso_timestamp(first_at),
146
149
  iso_timestamp(last_at),
147
- str((last_at - first_at).days + 1),
150
+ str(days_between(first_at, last_at)),
148
151
  )
149
152
 
150
153
  return counts_table
@@ -18,7 +18,7 @@ from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, WHITESPACE_R
18
18
  from epstein_files.util.constant.strings import *
19
19
  from epstein_files.util.constants import *
20
20
  from epstein_files.util.doc_cfg import FINANCIAL_REPORTS_AUTHORS, DocCfg, Metadata
21
- from epstein_files.util.data import escape_single_quotes, remove_timezone, sort_dict, uniquify
21
+ from epstein_files.util.data import days_between, escape_single_quotes, remove_timezone, sort_dict, uniquify
22
22
  from epstein_files.util.file_helper import FILENAME_LENGTH, file_size_to_str
23
23
  from epstein_files.util.env import args
24
24
  from epstein_files.util.highlighted_group import styled_category
@@ -36,94 +36,62 @@ TIMESTAMP_LOG_INDENT = f'{LOG_INDENT} '
36
36
  VAST_HOUSE = 'vast house' # Michael Wolff article draft about Epstein indicator
37
37
  VI_DAILY_NEWS_REGEX = re.compile(r'virgin\s*is[kl][ai]nds\s*daily\s*news', re.IGNORECASE)
38
38
 
39
- UNINTERESTING_CATEGORES = [
39
+ SKIP_TIMESTAMP_EXTRACT = [
40
+ PALM_BEACH_TSV,
41
+ PALM_BEACH_PROPERTY_INFO,
42
+ ]
43
+
44
+ UNINTERESTING_CATEGORIES = [
45
+ ACADEMIA,
46
+ ARTICLE,
40
47
  ARTS,
41
48
  BOOK,
49
+ CONFERENCE,
42
50
  JUNK,
51
+ POLITICS,
43
52
  SKYPE_LOG,
44
- SPEECH,
45
53
  ]
46
54
 
47
55
  # OtherFiles whose descriptions/info match these prefixes are not displayed unless --all-other-files is used
48
- UNINTERESTING_PREFIXES = FINANCIAL_REPORTS_AUTHORS + [
56
+ UNINTERESTING_PREFIXES = [
49
57
  'article about',
50
- ARTICLE_DRAFT,
51
- 'Aviation International',
52
- BBC,
53
- BLOOMBERG,
54
- 'Boston Globe',
55
58
  BROCKMAN_INC,
56
- CHINA_DAILY,
57
- CNN,
58
- 'completely redacted',
59
59
  CVRA,
60
- DAILY_MAIL,
61
- DAILY_TELEGRAPH,
62
- CVRA_LEXIS_SEARCH[0:-12], # Because date at end :(
63
60
  DERSH_GIUFFRE_TWEET,
64
- 'Financial Times',
65
- 'Forbes',
66
- 'Frontlines',
67
- 'Future Science',
68
- 'Globe and Mail',
69
61
  GORDON_GETTY,
70
62
  f"{HARVARD} Econ",
71
63
  HARVARD_POETRY,
72
- 'Inference',
73
64
  JASTA,
74
- 'JetGala',
75
- JOHN_BOLTON_PRESS_CLIPPING,
76
- 'Journal of Criminal',
77
- LA_TIMES,
78
- 'Litigation Daily',
79
- LAWRENCE_KRAUSS,
80
- LAWRENCE_KRAUSS_ASU_ORIGINS,
81
- 'MarketWatch',
82
- MARTIN_NOWAK,
83
- 'Morning News',
65
+ LEXIS_NEXIS,
84
66
  NOBEL_CHARITABLE_TRUST,
85
- 'Nautilus',
86
- 'New Yorker',
87
- NYT,
88
67
  PALM_BEACH_CODE_ENFORCEMENT,
89
- PALM_BEACH_DAILY_NEWS,
90
- PALM_BEACH_POST,
91
68
  PALM_BEACH_TSV,
92
69
  PALM_BEACH_WATER_COMMITTEE,
93
- PAUL_KRASSNER,
94
- PEGGY_SIEGAL,
95
- 'Politifact',
96
- 'Rafanelli',
97
- ROBERT_LAWRENCE_KUHN,
98
- ROBERT_TRIVERS,
99
- 'SCMP',
100
- 'SciencExpress',
101
- 'Scowcroft',
102
- SHIMON_POST_ARTICLE,
103
- SINGLE_PAGE,
104
- STACEY_PLASKETT,
105
- 'Tatler',
106
- TERJE_ROD_LARSEN,
107
- TEXT_OF_US_LAW,
108
- TRANSLATION,
109
70
  TWEET,
110
- REAL_DEAL_ARTICLE,
111
- TRUMP_DISCLOSURES,
112
- UBS_CIO_REPORT,
113
71
  UN_GENERAL_ASSEMBLY,
114
- 'U.S. News',
115
72
  'US Office',
116
- 'Vanity Fair',
117
- VI_DAILY_NEWS,
118
- WAPO,
73
+ ]
74
+
75
+ INTERESTING_AUTHORS = [
76
+ EDWARD_JAY_EPSTEIN,
77
+ EHUD_BARAK,
78
+ JOI_ITO,
79
+ NOAM_CHOMSKY,
80
+ MICHAEL_WOLFF,
81
+ SVETLANA_POZHIDAEVA,
119
82
  ]
120
83
 
121
84
 
122
85
  @dataclass
123
86
  class OtherFile(Document):
124
- """File that is not an email, an iMessage log, or JSON data."""
87
+ """
88
+ File that is not an email, an iMessage log, or JSON data.
125
89
 
126
- include_description_in_summary_panel: ClassVar[bool] = True
90
+ Attributes:
91
+ was_timestamp_extracted (bool): True if the timestamp was programmatically extracted (and could be wrong)
92
+ """
93
+ was_timestamp_extracted: bool = False
94
+ include_description_in_summary_panel: ClassVar[bool] = True # Class var for logging output
127
95
 
128
96
  def __post_init__(self):
129
97
  super().__post_init__()
@@ -162,11 +130,13 @@ class OtherFile(Document):
162
130
  elif len(info_sentences) == 0:
163
131
  return True
164
132
  elif self.config:
165
- if self.config.is_interesting:
133
+ if self.config.is_interesting is not None:
134
+ return self.config.is_interesting
135
+ elif self.config.author in INTERESTING_AUTHORS:
166
136
  return True
167
137
  elif self.category() == FINANCE and self.author is not None:
168
138
  return False
169
- elif self.category() in UNINTERESTING_CATEGORES:
139
+ elif self.category() in UNINTERESTING_CATEGORIES:
170
140
  return False
171
141
 
172
142
  for prefix in UNINTERESTING_PREFIXES:
@@ -178,6 +148,10 @@ class OtherFile(Document):
178
148
  def metadata(self) -> Metadata:
179
149
  metadata = super().metadata()
180
150
  metadata['is_interesting'] = self.is_interesting()
151
+
152
+ if self.was_timestamp_extracted:
153
+ metadata['was_timestamp_extracted'] = self.was_timestamp_extracted
154
+
181
155
  return metadata
182
156
 
183
157
  def preview_text(self) -> str:
@@ -191,6 +165,8 @@ class OtherFile(Document):
191
165
  """Return configured timestamp or value extracted by scanning text with datefinder."""
192
166
  if self.config and self.config.timestamp:
193
167
  return self.config.timestamp
168
+ elif self.config and any([s in (self.config_description() or '') for s in SKIP_TIMESTAMP_EXTRACT]):
169
+ return None
194
170
 
195
171
  timestamps: list[datetime] = []
196
172
 
@@ -214,7 +190,10 @@ class OtherFile(Document):
214
190
  self.log_top_lines(15, msg=f"No timestamps found")
215
191
 
216
192
  return None
217
- elif len(timestamps) == 1:
193
+
194
+ self.was_timestamp_extracted = True
195
+
196
+ if len(timestamps) == 1:
218
197
  return timestamps[0]
219
198
  else:
220
199
  timestamps = sorted(uniquify(timestamps), reverse=True)
@@ -222,7 +201,7 @@ class OtherFile(Document):
222
201
  return timestamps[0] # Most recent timestamp appearing in text is usually the closest
223
202
 
224
203
  def _log_extracted_timestamps_info(self, timestamps: list[datetime]) -> None:
225
- num_days_spanned = (timestamps[0] - timestamps[-1]).days
204
+ num_days_spanned = days_between(timestamps[-1], timestamps[0])
226
205
  timestamps_log_msg = f"Extracted {len(timestamps)} timestamps spanning {num_days_spanned} days{TIMESTAMP_LOG_INDENT}"
227
206
  timestamps_log_msg += TIMESTAMP_LOG_INDENT.join([str(dt) for dt in timestamps])
228
207
 
@@ -230,9 +209,9 @@ class OtherFile(Document):
230
209
  self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
231
210
 
232
211
  @staticmethod
233
- def build_table(files: Sequence['OtherFile']) -> Table:
212
+ def files_preview_table(files: Sequence['OtherFile']) -> Table:
234
213
  """Build a table of OtherFile documents."""
235
- table = build_table(None, show_lines=True)
214
+ table = build_table('Other Files Details', show_lines=True)
236
215
  table.add_column('File', justify='center', width=FILENAME_LENGTH)
237
216
  table.add_column('Date', justify='center')
238
217
  table.add_column('Size', justify='center')
@@ -240,7 +219,7 @@ class OtherFile(Document):
240
219
  table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
241
220
 
242
221
  for file in files:
243
- link_and_info = [file.external_links()]
222
+ link_and_info = [file.external_links_txt()]
244
223
  date_str = file.date_str()
245
224
 
246
225
  if file.is_duplicate():
@@ -272,10 +251,10 @@ class OtherFile(Document):
272
251
  logger.warning(f"file {file.file_id} has no category")
273
252
 
274
253
  counts[file.category()] += 1
275
- category_bytes[file.category()] += file.length
254
+ category_bytes[file.category()] += file.file_size()
276
255
 
277
- table = build_table('Other Files Summary')
278
- add_cols_to_table(table, ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
256
+ table = build_table('Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
257
+ table.columns[0].min_width = 14
279
258
  table.columns[-1].style = 'dim'
280
259
 
281
260
  for (category, count) in sort_dict(counts):
@@ -23,14 +23,14 @@ from epstein_files.util.constant.strings import *
23
23
  from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL, epstein_media_person_url,
24
24
  epsteinify_name_url, epstein_web_person_url, search_jmail_url, search_twitter_url)
25
25
  from epstein_files.util.constants import *
26
- from epstein_files.util.data import dict_sets_to_lists, json_safe, listify, sort_dict
26
+ from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify, sort_dict
27
27
  from epstein_files.util.doc_cfg import EmailCfg, Metadata
28
- from epstein_files.util.env import DOCS_DIR, args, logger
28
+ from epstein_files.util.env import DOCS_DIR, args, logger, specified_names
29
29
  from epstein_files.util.file_helper import file_size_str
30
- from epstein_files.util.highlighted_group import get_info_for_name, get_style_for_name
31
- from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, add_cols_to_table,
32
- build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
33
- print_other_site_link, print_panel, print_section_header, vertically_pad)
30
+ from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames, get_info_for_name, get_style_for_name
31
+ from epstein_files.util.rich import (DEFAULT_NAME_STYLE, LAST_TIMESTAMP_STYLE, NA_TXT, add_cols_to_table,
32
+ print_all_files_page_link, build_table, console, highlighter, link_text_obj, link_markup, print_author_header, print_centered,
33
+ print_panel, print_section_header, vertically_pad)
34
34
  from epstein_files.util.search_result import SearchResult
35
35
  from epstein_files.util.timer import Timer
36
36
 
@@ -72,18 +72,18 @@ class EpsteinFiles:
72
72
 
73
73
  # Read through and classify all the files
74
74
  for file_arg in self.all_files:
75
- doc_timer = Timer(decimals=4)
75
+ doc_timer = Timer(decimals=2)
76
76
  document = Document(file_arg)
77
77
  cls = document_cls(document)
78
78
 
79
- if document.length == 0:
79
+ if document.length() == 0:
80
80
  logger.warning(f"Skipping empty file: {document}]")
81
81
  continue
82
82
  elif args.skip_other_files and cls == OtherFile and file_type_count[cls.__name__] > 1:
83
- logger.warning(f"Skipping {document.filename}...")
83
+ document.log(f"Skipping OtherFile...")
84
84
  continue
85
85
 
86
- documents.append(cls(file_arg, text=document.text))
86
+ documents.append(cls(file_arg, lines=document.lines, text=document.text))
87
87
  logger.info(str(documents[-1]))
88
88
  file_type_count[cls.__name__] += 1
89
89
 
@@ -104,16 +104,20 @@ class EpsteinFiles:
104
104
  if PICKLED_PATH.exists() and not args.overwrite_pickle:
105
105
  with gzip.open(PICKLED_PATH, 'rb') as file:
106
106
  epstein_files = pickle.load(file)
107
- timer.print_at_checkpoint(f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})")
108
107
  epstein_files.timer = timer
108
+ timer_msg = f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}'"
109
+ epstein_files.timer.print_at_checkpoint(f"{timer_msg} ({file_size_str(PICKLED_PATH)})")
109
110
  return epstein_files
110
111
 
111
112
  logger.warning(f"Building new cache file, this will take a few minutes...")
112
113
  epstein_files = EpsteinFiles(timer=timer)
113
114
 
114
- with gzip.open(PICKLED_PATH, 'wb') as file:
115
- pickle.dump(epstein_files, file)
116
- logger.warning(f"Pickled data to '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})...")
115
+ if args.skip_other_files:
116
+ logger.warning(f"Not writing pickled data because --skip-other-files")
117
+ else:
118
+ with gzip.open(PICKLED_PATH, 'wb') as file:
119
+ pickle.dump(epstein_files, file)
120
+ logger.warning(f"Pickled data to '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})...")
117
121
 
118
122
  timer.print_at_checkpoint(f'Processed {len(epstein_files.all_files):,} documents')
119
123
  return epstein_files
@@ -127,9 +131,6 @@ class EpsteinFiles:
127
131
  names = names if include_useless else [e for e in names if e is None or e.lower() not in EXCLUDED_EMAILERS]
128
132
  return sorted(list(set(names)), key=lambda e: self.email_author_counts[e] + self.email_recipient_counts[e])
129
133
 
130
- def attributed_email_count(self) -> int:
131
- return sum([i for author, i in self.email_author_counts.items() if author != UNKNOWN])
132
-
133
134
  def docs_matching(
134
135
  self,
135
136
  pattern: re.Pattern | str,
@@ -156,7 +157,7 @@ class EpsteinFiles:
156
157
  return self.emails_for(author)[-1].timestamp
157
158
 
158
159
  def email_conversation_length_in_days(self, author: str | None) -> int:
159
- return (self.last_email_at(author) - self.earliest_email_at(author)).days + 1
160
+ return days_between(self.earliest_email_at(author), self.last_email_at(author))
160
161
 
161
162
  def email_signature_substitution_counts(self) -> dict[str, int]:
162
163
  """Return the number of times an email signature was replaced with "<...snipped...>" for each author."""
@@ -172,7 +173,7 @@ class EpsteinFiles:
172
173
  return sorted(list(self.unknown_recipient_email_ids))
173
174
 
174
175
  def emails_by(self, author: str | None) -> list[Email]:
175
- return [e for e in self.emails if e.author == author]
176
+ return Document.sort_by_timestamp([e for e in self.emails if e.author == author])
176
177
 
177
178
  def emails_for(self, author: str | None) -> list[Email]:
178
179
  """Returns emails to or from a given 'author' sorted chronologically."""
@@ -185,9 +186,11 @@ class EpsteinFiles:
185
186
 
186
187
  def emails_to(self, author: str | None) -> list[Email]:
187
188
  if author is None:
188
- return [e for e in self.emails if len(e.recipients) == 0 or None in e.recipients]
189
+ emails = [e for e in self.emails if len(e.recipients) == 0 or None in e.recipients]
189
190
  else:
190
- return [e for e in self.emails if author in e.recipients]
191
+ emails = [e for e in self.emails if author in e.recipients]
192
+
193
+ return Document.sort_by_timestamp(emails)
191
194
 
192
195
  def get_documents_by_id(self, file_ids: str | list[str]) -> list[Document]:
193
196
  file_ids = listify(file_ids)
@@ -204,14 +207,26 @@ class EpsteinFiles:
204
207
  def json_metadata(self) -> str:
205
208
  """Create a JSON string containing metadata for all the files."""
206
209
  metadata = {
207
- Email.__name__: _sorted_metadata(self.emails),
208
- JsonFile.__name__: _sorted_metadata(self.json_files),
209
- MessengerLog.__name__: _sorted_metadata(self.imessage_logs),
210
- OtherFile.__name__: _sorted_metadata(self.non_json_other_files()),
210
+ 'files': {
211
+ Email.__name__: _sorted_metadata(self.emails),
212
+ JsonFile.__name__: _sorted_metadata(self.json_files),
213
+ MessengerLog.__name__: _sorted_metadata(self.imessage_logs),
214
+ OtherFile.__name__: _sorted_metadata(self.non_json_other_files()),
215
+ },
216
+ 'people': {
217
+ name: highlighted_group.get_info(name)
218
+ for highlighted_group in HIGHLIGHTED_NAMES
219
+ if isinstance(highlighted_group, HighlightedNames)
220
+ for name, description in highlighted_group.emailers.items()
221
+ if description
222
+ }
211
223
  }
212
224
 
213
225
  return json.dumps(metadata, indent=4, sort_keys=True)
214
226
 
227
+ def non_duplicate_emails(self) -> list[Email]:
228
+ return [email for email in self.emails if not email.is_duplicate()]
229
+
215
230
  def non_json_other_files(self) -> list[OtherFile]:
216
231
  return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
217
232
 
@@ -230,8 +245,8 @@ class EpsteinFiles:
230
245
  f"{len([d for d in docs if d.is_duplicate()])}",
231
246
  )
232
247
 
233
- add_row('iMessage Logs', self.imessage_logs)
234
248
  add_row('Emails', self.emails)
249
+ add_row('iMessage Logs', self.imessage_logs)
235
250
  add_row('JSON Data', self.json_files)
236
251
  add_row('Other', self.non_json_other_files())
237
252
  console.print(Align.center(table))
@@ -271,71 +286,93 @@ class EpsteinFiles:
271
286
  console.print(Align.center(Email.build_table(emails, author)), '\n')
272
287
 
273
288
  def print_email_device_info(self) -> None:
274
- print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(4, 0, 0, 0), centered=True)
289
+ print_panel(f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown", padding=(2, 0, 0, 0), centered=True)
275
290
  console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
276
291
  console.print(_build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
277
292
 
278
- def print_emailer_counts_table(self) -> None:
279
- footer = f"Identified authors of {self.attributed_email_count():,} out of {len(self.emails):,} emails ."
280
- counts_table = build_table("Email Counts", caption=footer)
281
- add_cols_to_table(counts_table, ['Name', 'Count', 'Sent', "Recv'd", JMAIL, EPSTEIN_MEDIA, EPSTEIN_WEB, 'Twitter'])
282
-
283
- emailer_counts = {
284
- emailer: self.email_author_counts[emailer] + self.email_recipient_counts[emailer]
285
- for emailer in self.all_emailers(True)
286
- }
293
+ def print_other_files_section(self, files: list[OtherFile]) -> None:
294
+ """Returns the OtherFile objects that were interesting enough to print."""
295
+ category_table = OtherFile.count_by_category_table(files)
296
+ other_files_preview_table = OtherFile.files_preview_table(files)
297
+ header_pfx = '' if args.all_other_files else 'Selected '
298
+ print_section_header(f"{FIRST_FEW_LINES} of {len(files)} {header_pfx}Files That Are Neither Emails Nor Text Messages")
287
299
 
288
- for p, count in sort_dict(emailer_counts):
289
- style = get_style_for_name(p, default_style=DEFAULT_NAME_STYLE)
300
+ if args.all_other_files:
301
+ console.line(1)
302
+ else:
303
+ print_all_files_page_link(self)
304
+ console.line(2)
290
305
 
291
- counts_table.add_row(
292
- Text.from_markup(link_markup(epsteinify_name_url(p or UNKNOWN), p or UNKNOWN, style)),
293
- str(count),
294
- str(self.email_author_counts[p]),
295
- str(self.email_recipient_counts[p]),
296
- '' if p is None else link_text_obj(search_jmail_url(p), JMAIL),
297
- '' if not is_ok_for_epstein_web(p) else link_text_obj(epstein_media_person_url(p), EPSTEIN_MEDIA),
298
- '' if not is_ok_for_epstein_web(p) else link_text_obj(epstein_web_person_url(p), EPSTEIN_WEB),
299
- '' if p is None else link_text_obj(search_twitter_url(p), 'search X'),
300
- )
306
+ for table in [category_table, other_files_preview_table]:
307
+ table.title = f"{header_pfx}{table.title}"
301
308
 
302
- console.print(vertically_pad(counts_table, 2))
309
+ print_centered(category_table)
310
+ console.line(2)
311
+ console.print(other_files_preview_table)
303
312
 
304
- def print_imessage_summary(self) -> None:
313
+ def print_text_messages_section(self) -> None:
305
314
  """Print summary table and stats for text messages."""
306
- console.print(MessengerLog.summary_table(self.imessage_logs))
315
+ print_section_header('All of His Text Messages')
316
+ print_centered("(conversations are sorted chronologically based on timestamp of first message)\n", style='gray30')
317
+ authors: list[str | None] = specified_names if specified_names else [JEFFREY_EPSTEIN]
318
+ log_files = self.imessage_logs_for(authors)
319
+
320
+ for log_file in log_files:
321
+ console.print(Padding(log_file))
322
+ console.line(2)
323
+
324
+ print_centered(MessengerLog.summary_table(self.imessage_logs))
307
325
  text_summary_msg = f"\nDeanonymized {Document.known_author_count(self.imessage_logs)} of "
308
326
  text_summary_msg += f"{len(self.imessage_logs)} {TEXT_MESSAGE} logs found in {len(self.all_files):,} files."
309
327
  console.print(text_summary_msg)
310
328
  imessage_msg_count = sum([len(log.messages) for log in self.imessage_logs])
311
329
  console.print(f"Found {imessage_msg_count} text messages in {len(self.imessage_logs)} iMessage log files.")
312
330
 
313
- def print_other_files_table(self) -> list[OtherFile]:
314
- """Returns the OtherFile objects that were interesting enough to print."""
315
- interesting_files = [doc for doc in self.other_files if args.all_other_files or doc.is_interesting()]
316
- header_pfx = '' if args.all_other_files else 'Selected '
317
- print_section_header(f"{FIRST_FEW_LINES} of {len(interesting_files)} {header_pfx}Files That Are Neither Emails Nor Text Msgs")
331
+ def table_of_emailers(self) -> Table:
332
+ attributed_emails = [e for e in self.non_duplicate_emails() if e.author]
333
+ footer = f"Identified authors of {len(attributed_emails):,} out of {len(self.non_duplicate_emails()):,} emails."
334
+ counts_table = build_table("Email Counts", caption=footer)
318
335
 
319
- if not args.all_other_files:
320
- print_centered(f"(the other site is uncurated and has all {len(self.other_files)} unclassifiable files and {len(self.emails):,} emails)", style='dim')
321
- print_other_site_link(False)
322
- console.line(2)
336
+ add_cols_to_table(counts_table, [
337
+ 'Name',
338
+ 'Num',
339
+ 'Sent',
340
+ "Recv",
341
+ {'name': 'First', 'highlight': True},
342
+ {'name': 'Last', 'style': LAST_TIMESTAMP_STYLE},
343
+ JMAIL,
344
+ 'eMedia',
345
+ 'eWeb',
346
+ 'Twitter',
347
+ ])
323
348
 
324
- console.print(OtherFile.build_table(interesting_files))
325
- console.print(Padding(OtherFile.count_by_category_table(interesting_files), (2, 0, 2, 2)))
326
- skipped_file_count = len(self.other_files) - len(interesting_files)
349
+ emailer_counts = {
350
+ emailer: self.email_author_counts[emailer] + self.email_recipient_counts[emailer]
351
+ for emailer in self.all_emailers(True)
352
+ }
353
+
354
+ for name, count in sort_dict(emailer_counts):
355
+ style = get_style_for_name(name, default_style=DEFAULT_NAME_STYLE)
356
+ emails = self.emails_for(name)
327
357
 
328
- if skipped_file_count > 0:
329
- logger.warning(f"Skipped {skipped_file_count} uninteresting other files...")
358
+ counts_table.add_row(
359
+ Text.from_markup(link_markup(epsteinify_name_url(name or UNKNOWN), name or UNKNOWN, style)),
360
+ str(count),
361
+ str(self.email_author_counts[name]),
362
+ str(self.email_recipient_counts[name]),
363
+ emails[0].timestamp_without_seconds(),
364
+ emails[-1].timestamp_without_seconds(),
365
+ link_text_obj(search_jmail_url(name), JMAIL) if name else '',
366
+ link_text_obj(epstein_media_person_url(name), 'eMedia') if is_ok_for_epstein_web(name) else '',
367
+ link_text_obj(epstein_web_person_url(name), 'eWeb') if is_ok_for_epstein_web(name) else '',
368
+ link_text_obj(search_twitter_url(name), 'search X') if name else '',
369
+ )
330
370
 
331
- return interesting_files
371
+ return counts_table
332
372
 
333
373
  def _tally_email_data(self) -> None:
334
374
  """Tally up summary info about Email objects."""
335
- for email in self.emails:
336
- if email.is_duplicate():
337
- continue
338
-
375
+ for email in self.non_duplicate_emails():
339
376
  self.email_author_counts[email.author] += 1
340
377
 
341
378
  if len(email.recipients) == 0:
@@ -365,7 +402,7 @@ def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
365
402
  def document_cls(doc: Document) -> Type[Document]:
366
403
  search_area = doc.text[0:5000] # Limit search area to avoid pointless scans of huge files
367
404
 
368
- if doc.length == 0:
405
+ if doc.length() == 0:
369
406
  return Document
370
407
  if doc.text[0] == '{':
371
408
  return JsonFile
@@ -42,6 +42,7 @@ CECILE_DE_JONGH = 'Cecile de Jongh'
42
42
  CECILIA_STEEN = 'Cecilia Steen'
43
43
  CELINA_DUBIN = 'Celina Dubin'
44
44
  CHRISTINA_GALBRAITH = 'Christina Galbraith' # Works with Tyler Shears on reputation stuff
45
+ DANGENE_AND_JENNIE_ENTERPRISE = 'Dangene and Jennie Enterprise'
45
46
  DANIEL_SABBA = 'Daniel Sabba'
46
47
  DANIEL_SIAD = 'Daniel Siad'
47
48
  DANNY_FROST = 'Danny Frost'
@@ -233,13 +234,14 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
233
234
  edmond elizabeth emily entwistle erik evelyn
234
235
  ferguson flachsbart francis franco frank frost
235
236
  gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
236
- hancock harold harrison harry hay helen hirsch hofstadter horowitz hussein
237
+ hancock harold harrison harry hay helen hill hirsch hofstadter horowitz hussein
237
238
  ian isaac isaacson
238
- jamie jane janet jason jen jim joe johnson jones josh julie justin
239
+ james jamie jane janet jason jen jim joe johnson jones josh julie justin
239
240
  karl kate kathy kelly kim kruger kyle
240
- laurie leo leonard lenny leslie lieberman louis lynch lynn
241
+ laurie lawrence leo leonard lenny leslie lieberman louis lynch lynn
241
242
  marcus marianne matt matthew melissa michele michelle moore moscowitz
242
243
  nancy nicole nussbaum
244
+ owen
243
245
  paulson philippe
244
246
  rafael ray richard richardson rob robin ron rubin rudolph ryan
245
247
  sara sarah sean seligman serge sergey silverman sloman smith snowden sorkin steele stevie stewart
@@ -22,7 +22,6 @@ PUBLICIST = 'publicist'
22
22
  REPUTATION = 'reputation'
23
23
  SKYPE_LOG = 'Skype log'
24
24
  SOCIAL = 'social'
25
- SPEECH = 'speech'
26
25
 
27
26
  # Locations
28
27
  PALM_BEACH = 'Palm Beach'
@@ -35,6 +34,7 @@ CHINA_DAILY = "China Daily"
35
34
  DAILY_MAIL = 'Daily Mail'
36
35
  DAILY_TELEGRAPH = "Daily Telegraph"
37
36
  LA_TIMES = 'LA Times'
37
+ LEXIS_NEXIS = 'Lexis Nexis'
38
38
  MIAMI_HERALD = 'Miami Herald'
39
39
  NYT = "New York Times"
40
40
  PALM_BEACH_DAILY_NEWS = f'{PALM_BEACH} Daily News'