epstein-files 1.0.10__tar.gz → 1.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {epstein_files-1.0.10 → epstein_files-1.0.12}/PKG-INFO +1 -1
  2. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/__init__.py +7 -9
  3. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/documents/communication.py +2 -2
  4. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/documents/document.py +94 -81
  5. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/documents/email.py +47 -5
  6. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/documents/imessage/text_message.py +4 -13
  7. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/documents/json_file.py +13 -1
  8. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/documents/messenger_log.py +32 -19
  9. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/documents/other_file.py +67 -44
  10. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/epstein_files.py +22 -15
  11. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/constant/names.py +11 -10
  12. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/constant/strings.py +2 -1
  13. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/constants.py +98 -88
  14. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/data.py +1 -1
  15. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/doc_cfg.py +32 -62
  16. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/env.py +29 -17
  17. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/file_helper.py +12 -29
  18. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/highlighted_group.py +34 -17
  19. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/logging.py +1 -7
  20. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/output.py +13 -8
  21. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/rich.py +15 -10
  22. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/word_count.py +65 -5
  23. {epstein_files-1.0.10 → epstein_files-1.0.12}/pyproject.toml +1 -1
  24. epstein_files-1.0.10/epstein_files/count_words.py +0 -72
  25. {epstein_files-1.0.10 → epstein_files-1.0.12}/LICENSE +0 -0
  26. {epstein_files-1.0.10 → epstein_files-1.0.12}/README.md +0 -0
  27. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/documents/emails/email_header.py +0 -0
  28. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/constant/common_words.py +0 -0
  29. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/constant/html.py +0 -0
  30. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/constant/output_files.py +0 -0
  31. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/constant/urls.py +0 -0
  32. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/search_result.py +0 -0
  33. {epstein_files-1.0.10 → epstein_files-1.0.12}/epstein_files/util/timer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: epstein-files
3
- Version: 1.0.10
3
+ Version: 1.0.12
4
4
  Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
5
5
  Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
6
6
  License: GPL-3.0-or-later
@@ -1,9 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  """
3
3
  Reformat Epstein text message files for readability and count email senders.
4
- For use with iMessage log files from https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_
5
4
 
6
- Install: 'poetry install'
7
5
  Run: 'EPSTEIN_DOCS_DIR=/path/to/TXT epstein_generate'
8
6
  """
9
7
  from sys import exit
@@ -15,7 +13,6 @@ from rich.padding import Padding
15
13
  from rich.panel import Panel
16
14
  from rich.text import Text
17
15
 
18
- from epstein_files.count_words import write_word_counts_html
19
16
  from epstein_files.epstein_files import EpsteinFiles, document_cls
20
17
  from epstein_files.documents.document import INFO_PADDING, Document
21
18
  from epstein_files.documents.email import Email
@@ -23,10 +20,11 @@ from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_
23
20
  from epstein_files.util.env import args, specified_names
24
21
  from epstein_files.util.file_helper import coerce_file_path, extract_file_id
25
22
  from epstein_files.util.logging import logger
26
- from epstein_files.util.output import (print_emails, print_json_files, print_json_metadata, print_json_stats,
27
- print_text_messages, write_urls)
23
+ from epstein_files.util.output import (print_emails, print_json_files, print_json_stats,
24
+ print_text_messages, write_json_metadata, write_urls)
28
25
  from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
29
26
  from epstein_files.util.timer import Timer
27
+ from epstein_files.util.word_count import write_word_counts_html
30
28
 
31
29
 
32
30
  def generate_html() -> None:
@@ -39,9 +37,9 @@ def generate_html() -> None:
39
37
  epstein_files = EpsteinFiles.get_files(timer)
40
38
 
41
39
  if args.json_metadata:
42
- print_json_metadata(epstein_files)
40
+ write_json_metadata(epstein_files)
43
41
  exit()
44
- elif args.output_json_files:
42
+ elif args.json_files:
45
43
  print_json_files(epstein_files)
46
44
  exit()
47
45
 
@@ -58,7 +56,7 @@ def generate_html() -> None:
58
56
  emails_printed = print_emails(epstein_files)
59
57
  timer.print_at_checkpoint(f"Printed {emails_printed:,} emails")
60
58
 
61
- if args.output_other_files:
59
+ if args.output_other:
62
60
  files_printed = epstein_files.print_other_files_table()
63
61
  timer.print_at_checkpoint(f"Printed {len(files_printed)} other files")
64
62
 
@@ -96,7 +94,7 @@ def epstein_search():
96
94
 
97
95
  console.print(search_result.document)
98
96
  else:
99
- console.print(search_result.document.description_panel())
97
+ console.print(search_result.document.summary_panel())
100
98
 
101
99
  for matching_line in search_result.lines:
102
100
  line_txt = matching_line.__rich__()
@@ -34,9 +34,9 @@ class Communication(Document):
34
34
  def is_attribution_uncertain(self) -> bool:
35
35
  return bool(self.config and self.config.is_attribution_uncertain)
36
36
 
37
- def raw_document_link_txt(self, _style: str = '', include_alt_link: bool = True) -> Text:
37
+ def external_links(self, _style: str = '', include_alt_link: bool = True) -> Text:
38
38
  """Overrides super() method to apply self.author_style."""
39
- return super().raw_document_link_txt(self.author_style, include_alt_link=include_alt_link)
39
+ return super().external_links(self.author_style, include_alt_link=include_alt_link)
40
40
 
41
41
  def summary(self) -> Text:
42
42
  return self._summary().append(CLOSE_PROPERTIES_CHAR)
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import re
3
+ from copy import deepcopy
3
4
  from dataclasses import asdict, dataclass, field
4
5
  from datetime import datetime
5
6
  from pathlib import Path
@@ -15,13 +16,13 @@ from epstein_files.util.constant.names import *
15
16
  from epstein_files.util.constant.strings import *
16
17
  from epstein_files.util.constant.urls import *
17
18
  from epstein_files.util.constants import ALL_FILE_CONFIGS, FALLBACK_TIMESTAMP
18
- from epstein_files.util.data import collapse_newlines, date_str, iso_timestamp, listify, patternize, without_falsey
19
- from epstein_files.util.doc_cfg import EmailCfg, DocCfg, Metadata, TextCfg
20
- from epstein_files.util.env import args
21
- from epstein_files.util.file_helper import (DOCS_DIR, file_stem_for_id, extract_file_id, file_size,
19
+ from epstein_files.util.data import collapse_newlines, date_str, patternize, remove_time_from_timestamp_str, without_falsey
20
+ from epstein_files.util.doc_cfg import DUPE_TYPE_STRS, EmailCfg, DocCfg, Metadata, TextCfg
21
+ from epstein_files.util.env import DOCS_DIR, args
22
+ from epstein_files.util.file_helper import (file_stem_for_id, extract_file_id, file_size,
22
23
  file_size_str, is_local_extract_file)
23
24
  from epstein_files.util.logging import DOC_TYPE_STYLES, FILENAME_STYLE, logger
24
- from epstein_files.util.rich import SYMBOL_STYLE, console, highlighter, key_value_txt, link_text_obj
25
+ from epstein_files.util.rich import INFO_STYLE, SYMBOL_STYLE, console, highlighter, key_value_txt, link_text_obj
25
26
  from epstein_files.util.search_result import MatchedLine
26
27
 
27
28
  CLOSE_PROPERTIES_CHAR = ']'
@@ -30,7 +31,6 @@ INFO_INDENT = 2
30
31
  INFO_PADDING = (0, 0, 0, INFO_INDENT)
31
32
  MAX_TOP_LINES_LEN = 4000 # Only for logging
32
33
  MIN_DOCUMENT_ID = 10477
33
- LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
34
34
  WHITESPACE_REGEX = re.compile(r"\s{2,}|\t|\n", re.MULTILINE)
35
35
 
36
36
  MIN_TIMESTAMP = datetime(1991, 1, 1)
@@ -59,14 +59,27 @@ OCR_REPAIRS = {
59
59
 
60
60
  @dataclass
61
61
  class Document:
62
- """Base class for all Epstein Files documents."""
62
+ """
63
+ Base class for all Epstein Files documents.
64
+
65
+ Attributes:
66
+ file_path (Path): Local path to file
67
+ author (str | None): Who is responsible for the text in the file
68
+ config (DocCfg): Information about this fil
69
+ file_id (str): 6 digit (or 8 digits if it's a local extract file) string ID
70
+ filename (str): File's basename
71
+ length (int): Number of characters in the file after all the cleanup
72
+ lines (str): Number of lines in the file after all the cleanup
73
+ text (str): Contents of the file
74
+ timestamp (datetime | None): When the file was originally created
75
+ url_slug (str): Version of the filename that works in links to epsteinify etc.
76
+ """
63
77
  file_path: Path
64
78
  # Optional fields
65
79
  author: str | None = None
66
80
  config: EmailCfg | DocCfg | TextCfg | None = None
67
81
  file_id: str = field(init=False)
68
82
  filename: str = field(init=False)
69
- is_duplicate: bool = False
70
83
  length: int = field(init=False)
71
84
  lines: list[str] = field(init=False)
72
85
  num_lines: int = field(init=False)
@@ -74,22 +87,16 @@ class Document:
74
87
  timestamp: datetime | None = None
75
88
  url_slug: str = field(init=False) # e.g. 'HOUSE_OVERSIGHT_123456
76
89
 
77
- # Class variable overridden in JsonFile
78
- strip_whitespace: ClassVar[bool] = True
90
+ # Class variables
91
+ include_description_in_summary_panel: ClassVar[bool] = False
92
+ strip_whitespace: ClassVar[bool] = True # Overridden in JsonFile
79
93
 
80
94
  def __post_init__(self):
81
95
  self.filename = self.file_path.name
82
96
  self.file_id = extract_file_id(self.filename)
83
- self.config = ALL_FILE_CONFIGS.get(self.file_id)
84
- self.is_duplicate = bool(self.config.dupe_of_id) if self.config else False
97
+ self.config = self.config or deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
85
98
 
86
- if self.is_local_extract_file():
87
- self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
88
-
89
- # Coerce FileConfig for court docs etc. to MessageCfg for email files extracted from that document
90
- if self.class_name() == EMAIL_CLASS and self.config and not isinstance(self.config, EmailCfg):
91
- self.config = EmailCfg.from_doc_cfg(self.config)
92
- else:
99
+ if 'url_slug' not in vars(self):
93
100
  self.url_slug = self.file_path.stem
94
101
 
95
102
  self._set_computed_fields(text=self.text or self._load_file())
@@ -97,11 +104,7 @@ class Document:
97
104
  self._extract_author()
98
105
  self.timestamp = self._extract_timestamp()
99
106
 
100
- def class_name(self) -> str:
101
- """Annoying workaround for circular import issues and isinstance()."""
102
- return str(type(self).__name__)
103
-
104
- def configured_description(self) -> str | None:
107
+ def config_description(self) -> str | None:
105
108
  """Overloaded in OtherFile."""
106
109
  if self.config and self.config.description:
107
110
  return f"({self.config.description})"
@@ -109,40 +112,51 @@ class Document:
109
112
  def date_str(self) -> str | None:
110
113
  return date_str(self.timestamp)
111
114
 
112
- def description_panel(self, include_hints: bool = False) -> Panel:
113
- """Panelized description() with info_txt(), used in search results."""
114
- hints = [Text('', style='italic').append(h) for h in (self.hints() if include_hints else [])]
115
- return Panel(Group(*([self.summary()] + hints)), border_style=self.document_type_style(), expand=False)
116
-
117
- def document_type_style(self) -> str:
118
- return DOC_TYPE_STYLES[self.class_name()]
119
-
120
115
  def duplicate_file_txt(self) -> Text:
121
116
  """If the file is a dupe make a nice message to explain what file it's a duplicate of."""
122
- if not self.config or not self.config.dupe_of_id:
117
+ if not self.config or not self.config.dupe_of_id or self.config.dupe_type is None:
123
118
  raise RuntimeError(f"duplicate_file_txt() called on {self.summary()} but not a dupe! config:\n\n{self.config}")
124
119
 
125
- txt = Text(f"Not showing ", style='white dim italic').append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
126
- txt.append(f" because it's {self.config.duplicate_reason()} ")
120
+ txt = Text(f"Not showing ", style=INFO_STYLE).append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
121
+ txt.append(f" because it's {DUPE_TYPE_STRS[self.config.dupe_type]} ")
127
122
  return txt.append(epstein_media_doc_link_txt(self.config.dupe_of_id, style='royal_blue1'))
128
123
 
129
124
  def epsteinify_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
130
125
  """Create a Text obj link to this document on epsteinify.com."""
131
- return link_text_obj(epsteinify_doc_url(self.url_slug), link_txt or self.url_slug, style)
126
+ return link_text_obj(epsteinify_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
132
127
 
133
128
  def epstein_media_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
134
129
  """Create a Text obj link to this document on epstein.media."""
135
- return link_text_obj(epstein_media_doc_url(self.url_slug), link_txt or self.url_slug, style)
130
+ return link_text_obj(epstein_media_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
136
131
 
137
132
  def epstein_web_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
138
133
  """Create a Text obj link to this document on EpsteinWeb."""
139
- return link_text_obj(epstein_web_doc_url(self.url_slug), link_txt or self.url_slug, style)
134
+ return link_text_obj(epstein_web_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
135
+
136
+ def external_links(self, style: str = '', include_alt_link: bool = False) -> Text:
137
+ """Returns colored links to epstein.media and and epsteinweb in a Text object."""
138
+ txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
139
+
140
+ if args.use_epstein_web:
141
+ txt.append(self.epstein_web_link(style=style))
142
+
143
+ if include_alt_link:
144
+ txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
145
+ txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
146
+ else:
147
+ txt.append(self.epstein_media_link(style=style))
148
+
149
+ if include_alt_link:
150
+ txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
151
+ txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
152
+
153
+ return txt
140
154
 
141
155
  def file_info_panel(self) -> Group:
142
- """Panel with filename linking to raw file plus any hints/info about the file."""
143
- panel = Panel(self.raw_document_link_txt(include_alt_link=True), border_style=self._border_style(), expand=False)
144
- hints = [Padding(hint, INFO_PADDING) for hint in self.hints()]
145
- return Group(*([panel] + hints))
156
+ """Panel with filename linking to raw file plus any additional info about the file."""
157
+ panel = Panel(self.external_links(include_alt_link=True), border_style=self._border_style(), expand=False)
158
+ padded_info = [Padding(sentence, INFO_PADDING) for sentence in self.info()]
159
+ return Group(*([panel] + padded_info))
146
160
 
147
161
  def file_size(self) -> int:
148
162
  return file_size(self.file_path)
@@ -150,34 +164,33 @@ class Document:
150
164
  def file_size_str(self) -> str:
151
165
  return file_size_str(self.file_path)
152
166
 
153
- def hints(self) -> list[Text]:
154
- """Additional info about the Document (author, description, and so on) to be desplayed in doc header."""
155
- hints = listify(self.info_txt())
156
- hint_msg = self.configured_description()
157
-
158
- if hint_msg:
159
- hints.append(highlighter(Text(hint_msg, style='white dim italic')))
160
-
161
- return without_falsey(hints)
167
+ def info(self) -> list[Text]:
168
+ """0 to 2 sentences containing the info_txt() as well as any configured description."""
169
+ return without_falsey([
170
+ self.info_txt(),
171
+ highlighter(Text(self.config_description(), style=INFO_STYLE)) if self.config_description() else None
172
+ ])
162
173
 
163
174
  def info_txt(self) -> Text | None:
164
175
  """Secondary info about this file (recipients, level of certainty, etc). Overload in subclasses."""
165
176
  return None
166
177
 
178
+ def is_duplicate(self) -> bool:
179
+ return bool(self.config and self.config.dupe_of_id)
180
+
167
181
  def is_local_extract_file(self) -> bool:
168
182
  """True if file created by extracting text from a court doc (identifiable from filename e.g. HOUSE_OVERSIGHT_012345_1.txt)."""
169
183
  return is_local_extract_file(self.filename)
170
184
 
171
- def log(self, msg: str, level: int = logging.WARNING):
185
+ def log(self, msg: str, level: int = logging.INFO):
172
186
  """Log with filename as a prefix."""
173
- logger.log(level, f"{self.url_slug} {msg}")
187
+ logger.log(level, f"{self.file_path.stem} {msg}")
174
188
 
175
189
  def log_top_lines(self, n: int = 10, msg: str = '', level: int = logging.INFO) -> None:
176
190
  """Log first 'n' lines of self.text at 'level'. 'msg' can be optionally provided."""
177
191
  separator = '\n\n' if '\n' in msg else '. '
178
192
  msg = (msg + separator) if msg else ''
179
- msg = f"{self.filename}: {msg}First {n} lines:"
180
- logger.log(level, f"{msg}\n\n{self.top_lines(n)}\n")
193
+ self.log(f"{msg}First {n} lines:\n\n{self.top_lines(n)}\n", level)
181
194
 
182
195
  def matching_lines(self, _pattern: re.Pattern | str) -> list[MatchedLine]:
183
196
  """Return lines matching a regex as colored list[Text]."""
@@ -189,13 +202,13 @@ class Document:
189
202
  metadata.update({k: v for k, v in asdict(self).items() if k in METADATA_FIELDS and v is not None})
190
203
  metadata['bytes'] = self.file_size()
191
204
  metadata['filename'] = f"{self.url_slug}.txt"
192
- metadata['type'] = self.class_name()
205
+ metadata['type'] = self._class_name()
193
206
 
194
207
  if self.is_local_extract_file():
195
208
  metadata['extracted_file'] = {
196
- 'explanation': 'This file was extracted from a court filing, not distributed directly. A copy can be found on github.',
197
- 'extracted_from_file': self.url_slug + '.txt',
198
- 'extracted_file_url': extracted_file_url(self.filename),
209
+ 'explanation': 'Manually extracted from one of the court filings.',
210
+ 'extracted_from': self.url_slug + '.txt',
211
+ 'url': extracted_file_url(self.filename),
199
212
  }
200
213
 
201
214
  return metadata
@@ -204,25 +217,6 @@ class Document:
204
217
  with open(self.file_path) as f:
205
218
  return f.read()
206
219
 
207
- def raw_document_link_txt(self, style: str = '', include_alt_link: bool = False) -> Text:
208
- """Returns colored links to epstein.media and and epsteinweb in a Text object."""
209
- txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
210
-
211
- if args.use_epstein_web_links:
212
- txt.append(self.epstein_web_link(style=style))
213
-
214
- if include_alt_link:
215
- txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
216
- txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
217
- else:
218
- txt.append(self.epstein_media_link(style=style))
219
-
220
- if include_alt_link:
221
- txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
222
- txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
223
-
224
- return txt
225
-
226
220
  def repair_ocr_text(self, repairs: dict[str | re.Pattern, str], text: str) -> str:
227
221
  """Apply a dict of repairs (key is pattern or string, value is replacement string) to text."""
228
222
  for k, v in repairs.items():
@@ -234,7 +228,7 @@ class Document:
234
228
  return text
235
229
 
236
230
  def sort_key(self) -> tuple[datetime, str, int]:
237
- if self.config and self.config.dupe_of_id:
231
+ if self.is_duplicate():
238
232
  sort_id = self.config.dupe_of_id
239
233
  dupe_idx = 1
240
234
  else:
@@ -245,11 +239,11 @@ class Document:
245
239
 
246
240
  def summary(self) -> Text:
247
241
  """Summary of this file for logging. Brackets are left open for subclasses to add stuff."""
248
- txt = Text('').append(self.class_name(), style=self.document_type_style())
242
+ txt = Text('').append(self._class_name(), style=self._class_style())
249
243
  txt.append(f" {self.url_slug}", style=FILENAME_STYLE)
250
244
 
251
245
  if self.timestamp:
252
- timestamp_str = iso_timestamp(self.timestamp).removesuffix(' 00:00:00')
246
+ timestamp_str = remove_time_from_timestamp_str(self.timestamp)
253
247
  txt.append(' (', style=SYMBOL_STYLE)
254
248
  txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
255
249
 
@@ -261,13 +255,32 @@ class Document:
261
255
 
262
256
  return txt
263
257
 
258
+ def summary_panel(self) -> Panel:
259
+ """Panelized description() with info_txt(), used in search results."""
260
+ sentences = [self.summary()]
261
+
262
+ if self.include_description_in_summary_panel:
263
+ sentences += [Text('', style='italic').append(h) for h in self.info()]
264
+
265
+ return Panel(Group(*sentences), border_style=self._class_style(), expand=False)
266
+
264
267
  def top_lines(self, n: int = 10) -> str:
265
268
  return '\n'.join(self.lines[0:n])[:MAX_TOP_LINES_LEN]
266
269
 
270
+ def warn(self, msg: str) -> None:
271
+ self.log(msg, level=logging.WARNING)
272
+
267
273
  def _border_style(self) -> str:
268
274
  """Should be overloaded in subclasses."""
269
275
  return 'white'
270
276
 
277
+ def _class_name(self) -> str:
278
+ """Annoying workaround for circular import issues and isinstance()."""
279
+ return str(type(self).__name__)
280
+
281
+ def _class_style(self) -> str:
282
+ return DOC_TYPE_STYLES[self._class_name()]
283
+
271
284
  def _extract_author(self) -> None:
272
285
  """Get author from config. Extended in Email subclass to also check headers."""
273
286
  if self.config and self.config.author:
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import re
3
+ from copy import deepcopy
3
4
  from dataclasses import asdict, dataclass, field
4
5
  from datetime import datetime
5
6
  from typing import ClassVar, cast
@@ -21,6 +22,7 @@ from epstein_files.util.constants import *
21
22
  from epstein_files.util.data import (TIMEZONE_INFO, collapse_newlines, escape_single_quotes, extract_last_name,
22
23
  flatten, remove_timezone, uniquify)
23
24
  from epstein_files.util.doc_cfg import EmailCfg, Metadata
25
+ from epstein_files.util.file_helper import extract_file_id, file_stem_for_id
24
26
  from epstein_files.util.highlighted_group import get_style_for_name
25
27
  from epstein_files.util.logging import logger
26
28
  from epstein_files.util.rich import *
@@ -35,9 +37,11 @@ REPLY_TEXT_REGEX = re.compile(rf"^(.*?){REPLY_LINE_PATTERN}", re.DOTALL | re.IGN
35
37
  BAD_TIMEZONE_REGEX = re.compile(fr'\((UTC|GMT\+\d\d:\d\d)\)|{REDACTED}')
36
38
  DATE_HEADER_REGEX = re.compile(r'(?:Date|Sent):? +(?!by|from|to|via)([^\n]{6,})\n')
37
39
  TIMESTAMP_LINE_REGEX = re.compile(r"\d+:\d+")
40
+ LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
38
41
 
39
42
  SUPPRESS_LOGS_FOR_AUTHORS = ['Undisclosed recipients:', 'undisclosed-recipients:', 'Multiple Senders Multiple Senders']
40
43
  REWRITTEN_HEADER_MSG = "(janky OCR header fields were prettified, check source if something seems off)"
44
+ APPEARS_IN = 'Appears in'
41
45
  MAX_CHARS_TO_PRINT = 4000
42
46
  MAX_NUM_HEADER_LINES = 14
43
47
  MAX_QUOTED_REPLIES = 2
@@ -248,6 +252,7 @@ KRASSNER_RECIPIENTS = uniquify(flatten([ALL_FILE_CONFIGS[id].recipients for id i
248
252
 
249
253
  # No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
250
254
  USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIPIENTS + [
255
+ 'Alan Dlugash', # CCed with Richard Kahn
251
256
  'Alan Rogers', # Random CC
252
257
  'Andrew Friendly', # Presumably some relation of Kelly Friendly
253
258
  'BS Stern', # A random fwd of email we have
@@ -264,6 +269,8 @@ USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIP
264
269
  'Lyn Fontanilla', # Random CC
265
270
  'Mark Albert', # Random CC
266
271
  'Matthew Schafer', # Random CC
272
+ MICHAEL_BUCHHOLTZ, # Terry Kafka CC
273
+ 'Nancy Dahl', # covered by Lawrence Krauss (her husband)
267
274
  'Michael Simmons', # Random CC
268
275
  'Nancy Portland', # Lawrence Krauss CC
269
276
  'Oliver Goodenough', # Robert Trivers CC
@@ -318,6 +325,17 @@ class Email(Communication):
318
325
  rewritten_header_ids: ClassVar[set[str]] = set([])
319
326
 
320
327
  def __post_init__(self):
328
+ self.filename = self.file_path.name
329
+ self.file_id = extract_file_id(self.filename)
330
+
331
+ # Special handling for copying properties out of the config for the document this one was extracted from
332
+ if self.is_local_extract_file():
333
+ self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
334
+ extracted_from_doc_id = self.url_slug.split('_')[-1]
335
+
336
+ if extracted_from_doc_id in ALL_FILE_CONFIGS:
337
+ self._set_config_for_extracted_file(ALL_FILE_CONFIGS[extracted_from_doc_id])
338
+
321
339
  super().__post_init__()
322
340
 
323
341
  try:
@@ -570,7 +588,7 @@ class Email(Communication):
570
588
  self._merge_lines(3) # Merge 4th and 5th rows
571
589
  elif self.file_id in '026609 029402 032405 022695'.split():
572
590
  self._merge_lines(4) # Merge 5th and 6th rows
573
- elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381']:
591
+ elif self.file_id in ['019407', '031980', '030384', '033144', '030999', '033575', '029835', '030381', '033357']:
574
592
  self._merge_lines(2, 4)
575
593
  elif self.file_id in ['029154', '029163']:
576
594
  self._merge_lines(2, 5)
@@ -591,6 +609,10 @@ class Email(Communication):
591
609
  self._merge_lines(7, 9)
592
610
  elif self.file_id == '030299':
593
611
  self._merge_lines(7, 10)
612
+ elif self.file_id == '014860':
613
+ self._merge_lines(3)
614
+ self._merge_lines(4)
615
+ self._merge_lines(4)
594
616
  elif self.file_id == '029977':
595
617
  self._set_computed_fields(text=self.text.replace('Sent 9/28/2012 2:41:02 PM', 'Sent: 9/28/2012 2:41:02 PM'))
596
618
 
@@ -606,9 +628,8 @@ class Email(Communication):
606
628
  self._remove_line(3)
607
629
 
608
630
  if old_text != self.text:
609
- self.log(f"Modified text, old:\n\n" + '\n'.join(old_text.split('\n')[0:12]) + '\n', logging.INFO)
610
- self.log_top_lines(12, 'Result of modifications', logging.INFO)
611
- self.log('', logging.INFO)
631
+ self.log(f"Modified text, old:\n\n" + '\n'.join(old_text.split('\n')[0:12]) + '\n')
632
+ self.log_top_lines(12, 'Result of modifications')
612
633
 
613
634
  lines = self.repair_ocr_text(OCR_REPAIRS, self.text).split('\n')
614
635
  new_lines = []
@@ -646,6 +667,27 @@ class Email(Communication):
646
667
  sent_from = sent_from_match.group(0)
647
668
  return 'S' + sent_from[1:] if sent_from.startswith('sent') else sent_from
648
669
 
670
+ def _set_config_for_extracted_file(self, extracted_from_doc_cfg: DocCfg) -> None:
671
+ """Copy info from original config for file this document was extracted from."""
672
+ if self.file_id in ALL_FILE_CONFIGS:
673
+ self.config = cast(EmailCfg, deepcopy(ALL_FILE_CONFIGS[self.file_id]))
674
+ self.warn(f"Merging existing config for {self.file_id} with config for file this document was extracted from")
675
+ else:
676
+ self.config = EmailCfg(id=self.file_id)
677
+
678
+ extracted_from_description = extracted_from_doc_cfg.complete_description()
679
+
680
+ if extracted_from_description:
681
+ extracted_description = f"{APPEARS_IN} {extracted_from_description}"
682
+
683
+ if self.config.description:
684
+ self.warn(f"Overwriting description '{self.config.description}' with extract description '{self.config.description}'")
685
+
686
+ self.config.description = extracted_description
687
+
688
+ self.config.is_interesting = self.config.is_interesting or extracted_from_doc_cfg.is_interesting
689
+ self.warn(f"Constructed synthetic config: {self.config}")
690
+
649
691
  def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
650
692
  logger.debug(f"Printing '{self.filename}'...")
651
693
  yield self.file_info_panel()
@@ -697,7 +739,7 @@ class Email(Communication):
697
739
  yield Padding(email_txt_panel, (0, 0, 1, INFO_INDENT))
698
740
 
699
741
  if should_rewrite_header:
700
- self.log_top_lines(self.header.num_header_rows + 4, f'Original header:', logging.INFO)
742
+ self.log_top_lines(self.header.num_header_rows + 4, f'Original header:')
701
743
 
702
744
  @staticmethod
703
745
  def build_table(emails: list['Email'], _author: str | None) -> Table:
@@ -1,10 +1,10 @@
1
1
  import re
2
- from dataclasses import dataclass, field
2
+ from dataclasses import dataclass
3
3
  from datetime import datetime
4
4
 
5
5
  from rich.text import Text
6
6
 
7
- from epstein_files.util.constant.names import JEFFREY_EPSTEIN, ANTHONY_SCARAMUCCI, CELINA_DUBIN, EVA, STEVE_BANNON, UNKNOWN
7
+ from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN
8
8
  from epstein_files.util.data import extract_last_name
9
9
  from epstein_files.util.highlighted_group import get_style_for_name
10
10
  from epstein_files.util.logging import logger
@@ -19,15 +19,6 @@ DISPLAY_LAST_NAME_ONLY = [
19
19
  STEVE_BANNON,
20
20
  ]
21
21
 
22
- PHONE_NUMBER_MAPPING = {
23
- '+19174393646': ANTHONY_SCARAMUCCI,
24
- '+13109906526': STEVE_BANNON,
25
- '+16463880059': EVA,
26
- '+13108737937': CELINA_DUBIN,
27
- '+13108802851': STEVE_BANNON,
28
-
29
- }
30
-
31
22
  TEXTER_MAPPING = {
32
23
  'e:': JEFFREY_EPSTEIN,
33
24
  'e:jeeitunes@gmail.com': JEFFREY_EPSTEIN,
@@ -48,13 +39,13 @@ class TextMessage:
48
39
 
49
40
  if self.author is None:
50
41
  self.author_str = UNKNOWN
51
- elif self.author in DISPLAY_LAST_NAME_ONLY:
42
+ elif self.author in DISPLAY_LAST_NAME_ONLY and not self.author_str:
52
43
  self.author_str = extract_last_name(self.author)
53
44
  else:
54
45
  self.author_str = self.author_str or self.author
55
46
 
56
47
  if not self.id_confirmed and self.author is not None and self.author != JEFFREY_EPSTEIN:
57
- self.author_str = self.author + ' (?)'
48
+ self.author_str += ' (?)'
58
49
 
59
50
  def timestamp(self) -> datetime:
60
51
  return datetime.strptime(self.timestamp_str, MSG_DATE_FORMAT)
@@ -8,11 +8,23 @@ from rich.text import Text
8
8
 
9
9
  from epstein_files.documents.other_file import OtherFile
10
10
  from epstein_files.util.constant.strings import JSON
11
+ from epstein_files.util.rich import INFO_STYLE
12
+
13
+ TEXT_FIELDS = [
14
+ 'caption',
15
+ 'standard',
16
+ 'subtitle',
17
+ 'text',
18
+ 'title',
19
+ 'to',
20
+ ]
11
21
 
12
22
 
13
23
  @dataclass
14
24
  class JsonFile(OtherFile):
15
25
  """File containing JSON data."""
26
+
27
+ include_description_in_summary_panel: ClassVar[bool] = False
16
28
  strip_whitespace: ClassVar[bool] = False
17
29
 
18
30
  def __post_init__(self):
@@ -27,7 +39,7 @@ class JsonFile(OtherFile):
27
39
  return JSON
28
40
 
29
41
  def info_txt(self) -> Text | None:
30
- return Text(f"JSON file, possibly iMessage or similar app metadata", style='white dim italic')
42
+ return Text(f"JSON file, contains preview data for links sent a messaging app", style=INFO_STYLE)
31
43
 
32
44
  def is_interesting(self):
33
45
  return False