epstein-files 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. epstein_files/__init__.py +31 -18
  2. epstein_files/documents/communication.py +9 -5
  3. epstein_files/documents/document.py +225 -136
  4. epstein_files/documents/doj_file.py +242 -0
  5. epstein_files/documents/doj_files/full_text.py +166 -0
  6. epstein_files/documents/email.py +138 -163
  7. epstein_files/documents/emails/email_header.py +21 -11
  8. epstein_files/documents/emails/emailers.py +223 -0
  9. epstein_files/documents/imessage/text_message.py +2 -3
  10. epstein_files/documents/json_file.py +18 -14
  11. epstein_files/documents/messenger_log.py +23 -39
  12. epstein_files/documents/other_file.py +48 -44
  13. epstein_files/epstein_files.py +54 -33
  14. epstein_files/person.py +142 -110
  15. epstein_files/util/constant/names.py +29 -6
  16. epstein_files/util/constant/output_files.py +2 -0
  17. epstein_files/util/constant/strings.py +12 -6
  18. epstein_files/util/constant/urls.py +17 -0
  19. epstein_files/util/constants.py +101 -174
  20. epstein_files/util/data.py +2 -0
  21. epstein_files/util/doc_cfg.py +20 -15
  22. epstein_files/util/env.py +24 -16
  23. epstein_files/util/file_helper.py +28 -6
  24. epstein_files/util/helpers/debugging_helper.py +13 -0
  25. epstein_files/util/helpers/env_helpers.py +21 -0
  26. epstein_files/util/highlighted_group.py +57 -16
  27. epstein_files/util/layout/left_bar_panel.py +26 -0
  28. epstein_files/util/logging.py +28 -13
  29. epstein_files/util/output.py +33 -10
  30. epstein_files/util/rich.py +28 -2
  31. epstein_files/util/word_count.py +7 -7
  32. {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/METADATA +14 -1
  33. epstein_files-1.5.0.dist-info/RECORD +40 -0
  34. epstein_files-1.4.1.dist-info/RECORD +0 -34
  35. {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/LICENSE +0 -0
  36. {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/WHEEL +0 -0
  37. {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/entry_points.txt +0 -0
epstein_files/__init__.py CHANGED
@@ -4,6 +4,7 @@ Reformat Epstein text message files for readability and count email senders.
4
4
 
5
5
  Run: 'EPSTEIN_DOCS_DIR=/path/to/TXT epstein_generate'
6
6
  """
7
+ import re
7
8
  from sys import exit
8
9
 
9
10
  from dotenv import load_dotenv
@@ -15,16 +16,17 @@ from rich.text import Text
15
16
 
16
17
  from epstein_files.epstein_files import EpsteinFiles, document_cls
17
18
  from epstein_files.documents.document import INFO_PADDING, Document
19
+ from epstein_files.documents.doj_file import DojFile
18
20
  from epstein_files.documents.email import Email
19
21
  from epstein_files.documents.messenger_log import MessengerLog
20
22
  from epstein_files.documents.other_file import OtherFile
21
23
  from epstein_files.util.constant.output_files import make_clean
22
- from epstein_files.util.constant.strings import ID_REGEX
24
+ from epstein_files.util.constant.strings import HOUSE_OVERSIGHT_NOV_2025_ID_REGEX
23
25
  from epstein_files.util.data import flatten
24
26
  from epstein_files.util.env import args
25
27
  from epstein_files.util.file_helper import coerce_file_path, extract_file_id
26
28
  from epstein_files.util.logging import exit_with_error, logger
27
- from epstein_files.util.output import (print_emails_section, print_json_files, print_json_stats,
29
+ from epstein_files.util.output import (print_doj_files, print_emails_section, print_json_files, print_stats,
28
30
  print_other_files_section, print_text_messages_section, print_email_timeline, print_emailers_info,
29
31
  print_json_metadata, write_urls)
30
32
  from epstein_files.util.rich import (build_highlighter, console, highlighter, print_color_key, print_json,
@@ -62,6 +64,10 @@ def generate_html() -> None:
62
64
  if args.colors_only:
63
65
  exit()
64
66
 
67
+ if args.output_doj_files:
68
+ printed_doj_files = print_doj_files(epstein_files)
69
+ timer.log_section_complete('DojFile', epstein_files.doj_files, printed_doj_files)
70
+
65
71
  if args.output_texts:
66
72
  printed_logs = print_text_messages_section(epstein_files)
67
73
  timer.log_section_complete('MessengerLog', epstein_files.imessage_logs, printed_logs)
@@ -83,9 +89,8 @@ def generate_html() -> None:
83
89
  if args.debug:
84
90
  highlighter.print_highlight_counts(console)
85
91
 
86
- # JSON stats (mostly used for building pytest checks)
87
- if args.json_stats:
88
- print_json_stats(epstein_files)
92
+ if args.stats:
93
+ print_stats(epstein_files) # Used for building pytest checks
89
94
 
90
95
 
91
96
  def epstein_diff():
@@ -97,7 +102,7 @@ def epstein_grep():
97
102
  """Search the cleaned up text of the files."""
98
103
  epstein_files = EpsteinFiles.get_files()
99
104
 
100
- if ID_REGEX.match(args.positional_args[0]):
105
+ if HOUSE_OVERSIGHT_NOV_2025_ID_REGEX.match(args.positional_args[0]):
101
106
  logger.warning(f"'{args.positional_args[0]}' seems to be an ID, running epstein_show instead...")
102
107
  epstein_show()
103
108
  return
@@ -113,7 +118,7 @@ def epstein_grep():
113
118
  lines = search_result.lines
114
119
 
115
120
  if (isinstance(doc, Email) and not args.output_emails) \
116
- or (isinstance(doc, OtherFile) and not args.output_other) \
121
+ or (isinstance(doc, (DojFile, OtherFile)) and not args.output_other) \
117
122
  or (isinstance(doc, MessengerLog) and not args.output_texts):
118
123
  doc.log(f"{type(doc).__name__} Skipping search result...")
119
124
  continue
@@ -124,22 +129,23 @@ def epstein_grep():
124
129
  doc.log(f"None of the matches for '{search_term}' seem to be in the body of the email")
125
130
  continue
126
131
 
127
- if doc.is_duplicate():
128
- if last_document and not last_document.is_duplicate():
132
+ if doc.is_duplicate:
133
+ if last_document and not last_document.is_duplicate:
129
134
  console.line()
130
135
 
131
136
  last_document = doc
132
- console.print(doc.duplicate_file_txt())
137
+ console.print(doc.duplicate_file_txt)
133
138
  elif args.whole_file:
134
139
  console.print(doc)
135
140
  else:
136
- console.print(doc.summary_panel())
141
+ console.print(doc.summary_panel)
137
142
 
138
143
  for matching_line in lines:
139
144
  line_txt = matching_line.__rich__()
140
145
  console.print(Padding(temp_highlighter(line_txt), INFO_PADDING), style='gray37')
141
146
 
142
147
  console.line()
148
+ console.print(doc.local_path_and_url + '\n', style='dim')
143
149
 
144
150
 
145
151
  def epstein_show():
@@ -153,28 +159,35 @@ def epstein_show():
153
159
  raw_docs = [doc for doc in flatten([p.emails for p in people])]
154
160
  else:
155
161
  ids = [extract_file_id(arg.strip().strip('_')) for arg in args.positional_args]
156
- raw_docs = [Document(coerce_file_path(id)) for id in ids]
162
+ logger.info(f"extracted IDs: {ids}")
163
+ raw_docs = [Document.from_file_id(id) for id in ids]
164
+ logger.info(f"raw docs: {raw_docs}")
157
165
 
166
+ # Rebuild the Document objs so we can see result of latest processing
158
167
  docs = Document.sort_by_timestamp([document_cls(doc)(doc.file_path) for doc in raw_docs])
168
+ logger.info(f"Document types: {[doc._class_name for doc in docs]}")
159
169
  except Exception as e:
170
+ console.print_exception()
160
171
  exit_with_error(str(e))
161
172
 
162
173
  for doc in docs:
163
174
  console.print('\n', doc, '\n')
164
175
 
165
176
  if args.raw:
166
- console.print(Panel(Text("RAW: ").append(doc.summary()), expand=False, style=doc._border_style()))
177
+ console.print(Panel(Text("RAW: ").append(doc.summary()), expand=False, style=doc.border_style))
167
178
  console.print(escape(doc.raw_text()), '\n')
168
179
 
169
180
  if isinstance(doc, Email):
170
- console.print(Panel(Text("actual_text: ").append(doc.summary()), expand=False, style=doc._border_style()))
171
- console.print(escape(doc._actual_text()), '\n')
172
- metadata = doc.metadata()
173
- metadata['is_fwded_article'] = doc.is_fwded_article()
174
- metadata['is_word_count_worthy'] = doc.is_word_count_worthy()
181
+ console.print(Panel(Text("actual_text: ").append(doc.summary()), expand=False, style=doc.border_style))
182
+ console.print(escape(doc._extract_actual_text()), '\n')
183
+ metadata = doc.metadata
184
+ metadata['is_fwded_article'] = doc.is_fwded_article
185
+ metadata['is_word_count_worthy'] = doc.is_word_count_worthy
175
186
  metadata['_is_first_for_user'] = doc._is_first_for_user
176
187
  print_json(f"{doc.file_id} Metadata", metadata)
177
188
 
189
+ console.print(doc.local_path_and_url, style='dim')
190
+
178
191
 
179
192
  def epstein_word_count() -> None:
180
193
  write_word_counts_html()
@@ -21,26 +21,30 @@ class Communication(Document):
21
21
  config: CommunicationCfg | None = None
22
22
  timestamp: datetime = FALLBACK_TIMESTAMP # TODO this default sucks (though it never happens)
23
23
 
24
+ @property
24
25
  def author_or_unknown(self) -> str:
25
26
  return self.author or UNKNOWN
26
27
 
28
+ @property
27
29
  def author_style(self) -> str:
28
30
  return get_style_for_name(self.author)
29
31
 
32
+ @property
30
33
  def author_txt(self) -> Text:
31
34
  return styled_name(self.author)
32
35
 
36
+ @property
37
+ def timestamp_without_seconds(self) -> str:
38
+ return TIMESTAMP_SECONDS_REGEX.sub('', str(self.timestamp))
39
+
33
40
  def external_links_txt(self, _style: str = '', include_alt_links: bool = True) -> Text:
34
41
  """Overrides super() method to apply self.author_style."""
35
- return super().external_links_txt(self.author_style(), include_alt_links=include_alt_links)
42
+ return super().external_links_txt(self.author_style, include_alt_links=include_alt_links)
36
43
 
37
44
  def summary(self) -> Text:
38
45
  return self._summary().append(CLOSE_PROPERTIES_CHAR)
39
46
 
40
- def timestamp_without_seconds(self) -> str:
41
- return TIMESTAMP_SECONDS_REGEX.sub('', str(self.timestamp))
42
-
43
47
  def _summary(self) -> Text:
44
48
  """One line summary mostly for logging."""
45
49
  txt = super().summary().append(', ')
46
- return txt.append(key_value_txt('author', Text(f"'{self.author_or_unknown()}'", style=self.author_style())))
50
+ return txt.append(key_value_txt('author', Text(f"'{self.author_or_unknown}'", style=self.author_style)))