epstein-files 1.0.10__tar.gz → 1.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {epstein_files-1.0.10 → epstein_files-1.0.11}/PKG-INFO +1 -1
  2. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/__init__.py +4 -6
  3. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/document.py +92 -49
  4. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/email.py +7 -4
  5. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/imessage/text_message.py +3 -12
  6. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/json_file.py +13 -1
  7. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/messenger_log.py +32 -19
  8. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/other_file.py +66 -43
  9. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/epstein_files.py +22 -15
  10. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/constant/names.py +2 -2
  11. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/constants.py +84 -78
  12. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/doc_cfg.py +17 -25
  13. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/env.py +29 -17
  14. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/file_helper.py +13 -24
  15. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/highlighted_group.py +22 -14
  16. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/logging.py +0 -6
  17. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/output.py +12 -7
  18. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/rich.py +15 -10
  19. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/word_count.py +65 -5
  20. {epstein_files-1.0.10 → epstein_files-1.0.11}/pyproject.toml +1 -1
  21. epstein_files-1.0.10/epstein_files/count_words.py +0 -72
  22. {epstein_files-1.0.10 → epstein_files-1.0.11}/LICENSE +0 -0
  23. {epstein_files-1.0.10 → epstein_files-1.0.11}/README.md +0 -0
  24. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/communication.py +0 -0
  25. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/emails/email_header.py +0 -0
  26. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/constant/common_words.py +0 -0
  27. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/constant/html.py +0 -0
  28. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/constant/output_files.py +0 -0
  29. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/constant/strings.py +0 -0
  30. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/constant/urls.py +0 -0
  31. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/data.py +0 -0
  32. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/search_result.py +0 -0
  33. {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/timer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: epstein-files
3
- Version: 1.0.10
3
+ Version: 1.0.11
4
4
  Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
5
5
  Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
6
6
  License: GPL-3.0-or-later
@@ -1,9 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  """
3
3
  Reformat Epstein text message files for readability and count email senders.
4
- For use with iMessage log files from https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_
5
4
 
6
- Install: 'poetry install'
7
5
  Run: 'EPSTEIN_DOCS_DIR=/path/to/TXT epstein_generate'
8
6
  """
9
7
  from sys import exit
@@ -15,7 +13,6 @@ from rich.padding import Padding
15
13
  from rich.panel import Panel
16
14
  from rich.text import Text
17
15
 
18
- from epstein_files.count_words import write_word_counts_html
19
16
  from epstein_files.epstein_files import EpsteinFiles, document_cls
20
17
  from epstein_files.documents.document import INFO_PADDING, Document
21
18
  from epstein_files.documents.email import Email
@@ -27,6 +24,7 @@ from epstein_files.util.output import (print_emails, print_json_files, print_jso
27
24
  print_text_messages, write_urls)
28
25
  from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
29
26
  from epstein_files.util.timer import Timer
27
+ from epstein_files.util.word_count import write_word_counts_html
30
28
 
31
29
 
32
30
  def generate_html() -> None:
@@ -41,7 +39,7 @@ def generate_html() -> None:
41
39
  if args.json_metadata:
42
40
  print_json_metadata(epstein_files)
43
41
  exit()
44
- elif args.output_json_files:
42
+ elif args.json_files:
45
43
  print_json_files(epstein_files)
46
44
  exit()
47
45
 
@@ -58,7 +56,7 @@ def generate_html() -> None:
58
56
  emails_printed = print_emails(epstein_files)
59
57
  timer.print_at_checkpoint(f"Printed {emails_printed:,} emails")
60
58
 
61
- if args.output_other_files:
59
+ if args.output_other:
62
60
  files_printed = epstein_files.print_other_files_table()
63
61
  timer.print_at_checkpoint(f"Printed {len(files_printed)} other files")
64
62
 
@@ -96,7 +94,7 @@ def epstein_search():
96
94
 
97
95
  console.print(search_result.document)
98
96
  else:
99
- console.print(search_result.document.description_panel())
97
+ console.print(search_result.document.summary_panel())
100
98
 
101
99
  for matching_line in search_result.lines:
102
100
  line_txt = matching_line.__rich__()
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import re
3
+ from copy import deepcopy
3
4
  from dataclasses import asdict, dataclass, field
4
5
  from datetime import datetime
5
6
  from pathlib import Path
@@ -15,13 +16,13 @@ from epstein_files.util.constant.names import *
15
16
  from epstein_files.util.constant.strings import *
16
17
  from epstein_files.util.constant.urls import *
17
18
  from epstein_files.util.constants import ALL_FILE_CONFIGS, FALLBACK_TIMESTAMP
18
- from epstein_files.util.data import collapse_newlines, date_str, iso_timestamp, listify, patternize, without_falsey
19
+ from epstein_files.util.data import collapse_newlines, date_str, iso_timestamp, patternize, without_falsey
19
20
  from epstein_files.util.doc_cfg import EmailCfg, DocCfg, Metadata, TextCfg
20
- from epstein_files.util.env import args
21
- from epstein_files.util.file_helper import (DOCS_DIR, file_stem_for_id, extract_file_id, file_size,
21
+ from epstein_files.util.env import DOCS_DIR, args
22
+ from epstein_files.util.file_helper import (file_stem_for_id, extract_file_id, file_size,
22
23
  file_size_str, is_local_extract_file)
23
24
  from epstein_files.util.logging import DOC_TYPE_STYLES, FILENAME_STYLE, logger
24
- from epstein_files.util.rich import SYMBOL_STYLE, console, highlighter, key_value_txt, link_text_obj
25
+ from epstein_files.util.rich import INFO_STYLE, SYMBOL_STYLE, console, highlighter, key_value_txt, link_text_obj
25
26
  from epstein_files.util.search_result import MatchedLine
26
27
 
27
28
  CLOSE_PROPERTIES_CHAR = ']'
@@ -33,6 +34,7 @@ MIN_DOCUMENT_ID = 10477
33
34
  LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
34
35
  WHITESPACE_REGEX = re.compile(r"\s{2,}|\t|\n", re.MULTILINE)
35
36
 
37
+ EXTRACTED_FROM = 'Extracted from'
36
38
  MIN_TIMESTAMP = datetime(1991, 1, 1)
37
39
  MID_TIMESTAMP = datetime(2007, 1, 1)
38
40
  MAX_TIMESTAMP = datetime(2020, 1, 1)
@@ -59,14 +61,27 @@ OCR_REPAIRS = {
59
61
 
60
62
  @dataclass
61
63
  class Document:
62
- """Base class for all Epstein Files documents."""
64
+ """
65
+ Base class for all Epstein Files documents.
66
+
67
+ Attributes:
68
+ file_path (Path): Local path to file
69
+ author (str | None): Who is responsible for the text in the file
70
+ config (DocCfg): Information about this fil
71
+ file_id (str): 6 digit (or 8 digits if it's a local extract file) string ID
72
+ filename (str): File's basename
73
+ length (int): Number of characters in the file after all the cleanup
74
+ lines (str): Number of lines in the file after all the cleanup
75
+ text (str): Contents of the file
76
+ timestamp (datetime | None): When the file was originally created
77
+ url_slug (str): Version of the filename that works in links to epsteinify etc.
78
+ """
63
79
  file_path: Path
64
80
  # Optional fields
65
81
  author: str | None = None
66
82
  config: EmailCfg | DocCfg | TextCfg | None = None
67
83
  file_id: str = field(init=False)
68
84
  filename: str = field(init=False)
69
- is_duplicate: bool = False
70
85
  length: int = field(init=False)
71
86
  lines: list[str] = field(init=False)
72
87
  num_lines: int = field(init=False)
@@ -74,21 +89,21 @@ class Document:
74
89
  timestamp: datetime | None = None
75
90
  url_slug: str = field(init=False) # e.g. 'HOUSE_OVERSIGHT_123456
76
91
 
77
- # Class variable overridden in JsonFile
78
- strip_whitespace: ClassVar[bool] = True
92
+ # Class variables
93
+ include_description_in_summary_panel: ClassVar[bool] = False
94
+ strip_whitespace: ClassVar[bool] = True # Overridden in JsonFile
79
95
 
80
96
  def __post_init__(self):
81
97
  self.filename = self.file_path.name
82
98
  self.file_id = extract_file_id(self.filename)
83
- self.config = ALL_FILE_CONFIGS.get(self.file_id)
84
- self.is_duplicate = bool(self.config.dupe_of_id) if self.config else False
99
+ self.config = deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
85
100
 
86
101
  if self.is_local_extract_file():
87
102
  self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
103
+ extracted_from_doc_id = self.url_slug.split('_')[-1]
88
104
 
89
- # Coerce FileConfig for court docs etc. to MessageCfg for email files extracted from that document
90
- if self.class_name() == EMAIL_CLASS and self.config and not isinstance(self.config, EmailCfg):
91
- self.config = EmailCfg.from_doc_cfg(self.config)
105
+ if extracted_from_doc_id in ALL_FILE_CONFIGS:
106
+ self._set_extract_config(deepcopy(ALL_FILE_CONFIGS[extracted_from_doc_id]))
92
107
  else:
93
108
  self.url_slug = self.file_path.stem
94
109
 
@@ -97,11 +112,7 @@ class Document:
97
112
  self._extract_author()
98
113
  self.timestamp = self._extract_timestamp()
99
114
 
100
- def class_name(self) -> str:
101
- """Annoying workaround for circular import issues and isinstance()."""
102
- return str(type(self).__name__)
103
-
104
- def configured_description(self) -> str | None:
115
+ def config_description(self) -> str | None:
105
116
  """Overloaded in OtherFile."""
106
117
  if self.config and self.config.description:
107
118
  return f"({self.config.description})"
@@ -109,40 +120,32 @@ class Document:
109
120
  def date_str(self) -> str | None:
110
121
  return date_str(self.timestamp)
111
122
 
112
- def description_panel(self, include_hints: bool = False) -> Panel:
113
- """Panelized description() with info_txt(), used in search results."""
114
- hints = [Text('', style='italic').append(h) for h in (self.hints() if include_hints else [])]
115
- return Panel(Group(*([self.summary()] + hints)), border_style=self.document_type_style(), expand=False)
116
-
117
- def document_type_style(self) -> str:
118
- return DOC_TYPE_STYLES[self.class_name()]
119
-
120
123
  def duplicate_file_txt(self) -> Text:
121
124
  """If the file is a dupe make a nice message to explain what file it's a duplicate of."""
122
125
  if not self.config or not self.config.dupe_of_id:
123
126
  raise RuntimeError(f"duplicate_file_txt() called on {self.summary()} but not a dupe! config:\n\n{self.config}")
124
127
 
125
- txt = Text(f"Not showing ", style='white dim italic').append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
128
+ txt = Text(f"Not showing ", style=INFO_STYLE).append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
126
129
  txt.append(f" because it's {self.config.duplicate_reason()} ")
127
130
  return txt.append(epstein_media_doc_link_txt(self.config.dupe_of_id, style='royal_blue1'))
128
131
 
129
132
  def epsteinify_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
130
133
  """Create a Text obj link to this document on epsteinify.com."""
131
- return link_text_obj(epsteinify_doc_url(self.url_slug), link_txt or self.url_slug, style)
134
+ return link_text_obj(epsteinify_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
132
135
 
133
136
  def epstein_media_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
134
137
  """Create a Text obj link to this document on epstein.media."""
135
- return link_text_obj(epstein_media_doc_url(self.url_slug), link_txt or self.url_slug, style)
138
+ return link_text_obj(epstein_media_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
136
139
 
137
140
  def epstein_web_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
138
141
  """Create a Text obj link to this document on EpsteinWeb."""
139
- return link_text_obj(epstein_web_doc_url(self.url_slug), link_txt or self.url_slug, style)
142
+ return link_text_obj(epstein_web_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
140
143
 
141
144
  def file_info_panel(self) -> Group:
142
- """Panel with filename linking to raw file plus any hints/info about the file."""
145
+ """Panel with filename linking to raw file plus any additional info about the file."""
143
146
  panel = Panel(self.raw_document_link_txt(include_alt_link=True), border_style=self._border_style(), expand=False)
144
- hints = [Padding(hint, INFO_PADDING) for hint in self.hints()]
145
- return Group(*([panel] + hints))
147
+ padded_info = [Padding(sentence, INFO_PADDING) for sentence in self.info()]
148
+ return Group(*([panel] + padded_info))
146
149
 
147
150
  def file_size(self) -> int:
148
151
  return file_size(self.file_path)
@@ -150,34 +153,35 @@ class Document:
150
153
  def file_size_str(self) -> str:
151
154
  return file_size_str(self.file_path)
152
155
 
153
- def hints(self) -> list[Text]:
154
- """Additional info about the Document (author, description, and so on) to be desplayed in doc header."""
155
- hints = listify(self.info_txt())
156
- hint_msg = self.configured_description()
157
-
158
- if hint_msg:
159
- hints.append(highlighter(Text(hint_msg, style='white dim italic')))
156
+ def info(self) -> list[Text]:
157
+ """0 to 2 sentences containing the info_txt() as well as any configured description."""
158
+ sentences = [
159
+ self.info_txt(),
160
+ highlighter(Text(self.config_description(), style=INFO_STYLE)) if self.config_description() else None
161
+ ]
160
162
 
161
- return without_falsey(hints)
163
+ return without_falsey(sentences)
162
164
 
163
165
  def info_txt(self) -> Text | None:
164
166
  """Secondary info about this file (recipients, level of certainty, etc). Overload in subclasses."""
165
167
  return None
166
168
 
169
+ def is_duplicate(self) -> bool:
170
+ return bool(self.config and self.config.dupe_of_id)
171
+
167
172
  def is_local_extract_file(self) -> bool:
168
173
  """True if file created by extracting text from a court doc (identifiable from filename e.g. HOUSE_OVERSIGHT_012345_1.txt)."""
169
174
  return is_local_extract_file(self.filename)
170
175
 
171
- def log(self, msg: str, level: int = logging.WARNING):
176
+ def log(self, msg: str, level: int = logging.INFO):
172
177
  """Log with filename as a prefix."""
173
- logger.log(level, f"{self.url_slug} {msg}")
178
+ logger.log(level, f"{self.file_path.stem} {msg}")
174
179
 
175
180
  def log_top_lines(self, n: int = 10, msg: str = '', level: int = logging.INFO) -> None:
176
181
  """Log first 'n' lines of self.text at 'level'. 'msg' can be optionally provided."""
177
182
  separator = '\n\n' if '\n' in msg else '. '
178
183
  msg = (msg + separator) if msg else ''
179
- msg = f"{self.filename}: {msg}First {n} lines:"
180
- logger.log(level, f"{msg}\n\n{self.top_lines(n)}\n")
184
+ self.log(f"{msg}First {n} lines:\n\n{self.top_lines(n)}\n", level)
181
185
 
182
186
  def matching_lines(self, _pattern: re.Pattern | str) -> list[MatchedLine]:
183
187
  """Return lines matching a regex as colored list[Text]."""
@@ -189,7 +193,7 @@ class Document:
189
193
  metadata.update({k: v for k, v in asdict(self).items() if k in METADATA_FIELDS and v is not None})
190
194
  metadata['bytes'] = self.file_size()
191
195
  metadata['filename'] = f"{self.url_slug}.txt"
192
- metadata['type'] = self.class_name()
196
+ metadata['type'] = self._class_name()
193
197
 
194
198
  if self.is_local_extract_file():
195
199
  metadata['extracted_file'] = {
@@ -208,7 +212,7 @@ class Document:
208
212
  """Returns colored links to epstein.media and and epsteinweb in a Text object."""
209
213
  txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
210
214
 
211
- if args.use_epstein_web_links:
215
+ if args.use_epstein_web:
212
216
  txt.append(self.epstein_web_link(style=style))
213
217
 
214
218
  if include_alt_link:
@@ -234,7 +238,7 @@ class Document:
234
238
  return text
235
239
 
236
240
  def sort_key(self) -> tuple[datetime, str, int]:
237
- if self.config and self.config.dupe_of_id:
241
+ if self.is_duplicate():
238
242
  sort_id = self.config.dupe_of_id
239
243
  dupe_idx = 1
240
244
  else:
@@ -245,7 +249,7 @@ class Document:
245
249
 
246
250
  def summary(self) -> Text:
247
251
  """Summary of this file for logging. Brackets are left open for subclasses to add stuff."""
248
- txt = Text('').append(self.class_name(), style=self.document_type_style())
252
+ txt = Text('').append(self._class_name(), style=self._class_style())
249
253
  txt.append(f" {self.url_slug}", style=FILENAME_STYLE)
250
254
 
251
255
  if self.timestamp:
@@ -261,13 +265,32 @@ class Document:
261
265
 
262
266
  return txt
263
267
 
268
+ def summary_panel(self) -> Panel:
269
+ """Panelized description() with info_txt(), used in search results."""
270
+ sentences = [self.summary()]
271
+
272
+ if self.include_description_in_summary_panel:
273
+ sentences += [Text('', style='italic').append(h) for h in self.info()]
274
+
275
+ return Panel(Group(*sentences), border_style=self._class_style(), expand=False)
276
+
264
277
  def top_lines(self, n: int = 10) -> str:
265
278
  return '\n'.join(self.lines[0:n])[:MAX_TOP_LINES_LEN]
266
279
 
280
+ def warn(self, msg: str) -> None:
281
+ self.log(msg, level=logging.WARNING)
282
+
267
283
  def _border_style(self) -> str:
268
284
  """Should be overloaded in subclasses."""
269
285
  return 'white'
270
286
 
287
+ def _class_name(self) -> str:
288
+ """Annoying workaround for circular import issues and isinstance()."""
289
+ return str(type(self).__name__)
290
+
291
+ def _class_style(self) -> str:
292
+ return DOC_TYPE_STYLES[self._class_name()]
293
+
271
294
  def _extract_author(self) -> None:
272
295
  """Get author from config. Extended in Email subclass to also check headers."""
273
296
  if self.config and self.config.author:
@@ -304,6 +327,26 @@ class Document:
304
327
  self.lines = [line.strip() if self.strip_whitespace else line for line in self.text.split('\n')]
305
328
  self.num_lines = len(self.lines)
306
329
 
330
+ def _set_extract_config(self, doc_cfg: DocCfg | EmailCfg) -> None:
331
+ """Copy info from original config for file this document was extracted from."""
332
+ if self.config:
333
+ self.warn(f"Merging existing config with config for file this document was extracted from")
334
+ else:
335
+ self.config = EmailCfg(id=self.file_id)
336
+
337
+ extracted_from_description = doc_cfg.complete_description()
338
+
339
+ if extracted_from_description:
340
+ extracted_description = f"{EXTRACTED_FROM} {extracted_from_description}"
341
+
342
+ if self.config.description:
343
+ self.warn(f"Overwriting description '{self.config.description}' with extract description '{doc_cfg.description}'")
344
+
345
+ self.config.description = extracted_description
346
+
347
+ self.config.is_interesting = self.config.is_interesting or doc_cfg.is_interesting
348
+ self.warn(f"Constructed local config\n{self.config}")
349
+
307
350
  def _write_clean_text(self, output_path: Path) -> None:
308
351
  """Write self.text to 'output_path'. Used only for diffing files."""
309
352
  if output_path.exists():
@@ -591,6 +591,10 @@ class Email(Communication):
591
591
  self._merge_lines(7, 9)
592
592
  elif self.file_id == '030299':
593
593
  self._merge_lines(7, 10)
594
+ elif self.file_id == '014860':
595
+ self._merge_lines(3)
596
+ self._merge_lines(4)
597
+ self._merge_lines(4)
594
598
  elif self.file_id == '029977':
595
599
  self._set_computed_fields(text=self.text.replace('Sent 9/28/2012 2:41:02 PM', 'Sent: 9/28/2012 2:41:02 PM'))
596
600
 
@@ -606,9 +610,8 @@ class Email(Communication):
606
610
  self._remove_line(3)
607
611
 
608
612
  if old_text != self.text:
609
- self.log(f"Modified text, old:\n\n" + '\n'.join(old_text.split('\n')[0:12]) + '\n', logging.INFO)
610
- self.log_top_lines(12, 'Result of modifications', logging.INFO)
611
- self.log('', logging.INFO)
613
+ self.log(f"Modified text, old:\n\n" + '\n'.join(old_text.split('\n')[0:12]) + '\n')
614
+ self.log_top_lines(12, 'Result of modifications')
612
615
 
613
616
  lines = self.repair_ocr_text(OCR_REPAIRS, self.text).split('\n')
614
617
  new_lines = []
@@ -697,7 +700,7 @@ class Email(Communication):
697
700
  yield Padding(email_txt_panel, (0, 0, 1, INFO_INDENT))
698
701
 
699
702
  if should_rewrite_header:
700
- self.log_top_lines(self.header.num_header_rows + 4, f'Original header:', logging.INFO)
703
+ self.log_top_lines(self.header.num_header_rows + 4, f'Original header:')
701
704
 
702
705
  @staticmethod
703
706
  def build_table(emails: list['Email'], _author: str | None) -> Table:
@@ -1,10 +1,10 @@
1
1
  import re
2
- from dataclasses import dataclass, field
2
+ from dataclasses import dataclass
3
3
  from datetime import datetime
4
4
 
5
5
  from rich.text import Text
6
6
 
7
- from epstein_files.util.constant.names import JEFFREY_EPSTEIN, ANTHONY_SCARAMUCCI, CELINA_DUBIN, EVA, STEVE_BANNON, UNKNOWN
7
+ from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN
8
8
  from epstein_files.util.data import extract_last_name
9
9
  from epstein_files.util.highlighted_group import get_style_for_name
10
10
  from epstein_files.util.logging import logger
@@ -19,15 +19,6 @@ DISPLAY_LAST_NAME_ONLY = [
19
19
  STEVE_BANNON,
20
20
  ]
21
21
 
22
- PHONE_NUMBER_MAPPING = {
23
- '+19174393646': ANTHONY_SCARAMUCCI,
24
- '+13109906526': STEVE_BANNON,
25
- '+16463880059': EVA,
26
- '+13108737937': CELINA_DUBIN,
27
- '+13108802851': STEVE_BANNON,
28
-
29
- }
30
-
31
22
  TEXTER_MAPPING = {
32
23
  'e:': JEFFREY_EPSTEIN,
33
24
  'e:jeeitunes@gmail.com': JEFFREY_EPSTEIN,
@@ -48,7 +39,7 @@ class TextMessage:
48
39
 
49
40
  if self.author is None:
50
41
  self.author_str = UNKNOWN
51
- elif self.author in DISPLAY_LAST_NAME_ONLY:
42
+ elif self.author in DISPLAY_LAST_NAME_ONLY and not self.author_str:
52
43
  self.author_str = extract_last_name(self.author)
53
44
  else:
54
45
  self.author_str = self.author_str or self.author
@@ -8,11 +8,23 @@ from rich.text import Text
8
8
 
9
9
  from epstein_files.documents.other_file import OtherFile
10
10
  from epstein_files.util.constant.strings import JSON
11
+ from epstein_files.util.rich import INFO_STYLE
12
+
13
+ TEXT_FIELDS = [
14
+ 'caption',
15
+ 'standard',
16
+ 'subtitle',
17
+ 'text',
18
+ 'title',
19
+ 'to',
20
+ ]
11
21
 
12
22
 
13
23
  @dataclass
14
24
  class JsonFile(OtherFile):
15
25
  """File containing JSON data."""
26
+
27
+ include_description_in_summary_panel: ClassVar[bool] = False
16
28
  strip_whitespace: ClassVar[bool] = False
17
29
 
18
30
  def __post_init__(self):
@@ -27,7 +39,7 @@ class JsonFile(OtherFile):
27
39
  return JSON
28
40
 
29
41
  def info_txt(self) -> Text | None:
30
- return Text(f"JSON file, possibly iMessage or similar app metadata", style='white dim italic')
42
+ return Text(f"JSON file, seems to contain link unfurl/embed data for iMessage or similar", style=INFO_STYLE)
31
43
 
32
44
  def is_interesting(self):
33
45
  return False
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  import re
2
3
  from collections import defaultdict
3
4
  from dataclasses import dataclass, field
@@ -15,7 +16,7 @@ from epstein_files.util.data import iso_timestamp, listify, sort_dict
15
16
  from epstein_files.util.doc_cfg import Metadata, TextCfg
16
17
  from epstein_files.util.highlighted_group import get_style_for_name
17
18
  from epstein_files.util.logging import logger
18
- from epstein_files.util.rich import build_table
19
+ from epstein_files.util.rich import build_table, highlighter
19
20
 
20
21
  CONFIRMED_MSG = 'Found confirmed counterparty'
21
22
  GUESSED_MSG = 'This is probably a conversation with'
@@ -27,7 +28,12 @@ REDACTED_AUTHOR_REGEX = re.compile(r"^([-+•_1MENO.=F]+|[4Ide])$")
27
28
  class MessengerLog(Communication):
28
29
  """Class representing one iMessage log file (one conversation between Epstein and some counterparty)."""
29
30
  config: TextCfg | None = None
30
- _messages: list[TextMessage] = field(default_factory=list)
31
+ messages: list[TextMessage] = field(default_factory=list)
32
+ phone_number: str | None = None
33
+
34
+ def __post_init__(self):
35
+ super().__post_init__()
36
+ self.messages = [self._build_message(match) for match in MSG_REGEX.finditer(self.text)]
31
37
 
32
38
  def first_message_at(self, name: str | None) -> datetime:
33
39
  return self.messages_by(name)[0].timestamp()
@@ -36,27 +42,29 @@ class MessengerLog(Communication):
36
42
  if self.author is None:
37
43
  return None
38
44
 
39
- hint_msg = GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG
40
- author_txt = Text(self.author_or_unknown(), style=self.author_style + ' bold')
41
- return Text(f"({hint_msg} ", style='dim').append(author_txt).append(')')
45
+ info_msg = GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG
46
+ author_txt = Text(self.author, style=self.author_style + ' bold')
47
+ txt = Text(f"({info_msg} ", style='dim').append(author_txt)
42
48
 
43
- def last_message_at(self, name: str | None) -> datetime:
44
- return self.messages_by(name)[-1].timestamp()
49
+ if self.phone_number:
50
+ txt.append(f" using the phone number {self.phone_number}")
45
51
 
46
- def messages(self) -> list[TextMessage]:
47
- """Lazily evaluated accessor for self._messages."""
48
- if not self._messages:
49
- self._messages = [self._build_message(match) for match in MSG_REGEX.finditer(self.text)]
52
+ return highlighter(txt.append(')'))
50
53
 
51
- return self._messages
54
+ def last_message_at(self, name: str | None) -> datetime:
55
+ return self.messages_by(name)[-1].timestamp()
52
56
 
53
57
  def messages_by(self, name: str | None) -> list[TextMessage]:
54
58
  """Return all messages by 'name'."""
55
- return [m for m in self.messages() if m.author == name]
59
+ return [m for m in self.messages if m.author == name]
56
60
 
57
61
  def metadata(self) -> Metadata:
58
62
  metadata = super().metadata()
59
- metadata.update({'num_messages': len(self.messages())})
63
+ metadata.update({'num_messages': len(self.messages)})
64
+
65
+ if self.phone_number:
66
+ metadata['phone_number'] = self.phone_number
67
+
60
68
  return metadata
61
69
 
62
70
  def _border_style(self) -> str:
@@ -65,11 +73,16 @@ class MessengerLog(Communication):
65
73
  def _build_message(self, match: re.Match) -> TextMessage:
66
74
  """Turn a regex match into a TextMessage."""
67
75
  author_str = REDACTED_AUTHOR_REGEX.sub('', match.group(1).strip())
76
+ is_phone_number = author_str.startswith('+')
77
+
78
+ if is_phone_number:
79
+ logger.warning(f"{self.summary()} Found phone number: {author_str}")
80
+ self.phone_number = author_str
68
81
 
69
- # If the Sender: is redacted that means it's from self.author
82
+ # If the Sender: is redacted or if it's an unredacted phone number that means it's from self.author
70
83
  return TextMessage(
71
- author=self.author if (author_str.startswith('+') or not author_str) else author_str,
72
- author_str=author_str if author_str.startswith('+') else None, # Preserve phone numbers
84
+ author=self.author if (is_phone_number or not author_str) else author_str,
85
+ author_str=author_str if is_phone_number else None, # Preserve phone numbers
73
86
  id_confirmed=not self.is_attribution_uncertain(),
74
87
  text=match.group(4).strip(),
75
88
  timestamp_str=match.group(2).strip(),
@@ -90,7 +103,7 @@ class MessengerLog(Communication):
90
103
  yield self.file_info_panel()
91
104
  yield Text('')
92
105
 
93
- for message in self.messages():
106
+ for message in self.messages:
94
107
  yield message
95
108
 
96
109
  @classmethod
@@ -99,7 +112,7 @@ class MessengerLog(Communication):
99
112
  sender_counts: dict[str | None, int] = defaultdict(int)
100
113
 
101
114
  for message_log in imessage_logs:
102
- for message in message_log.messages():
115
+ for message in message_log.messages:
103
116
  sender_counts[message.author] += 1
104
117
 
105
118
  return sender_counts