epstein-files 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. epstein_files/__init__.py +75 -135
  2. epstein_files/documents/communication.py +9 -9
  3. epstein_files/documents/document.py +115 -87
  4. epstein_files/documents/email.py +154 -85
  5. epstein_files/documents/emails/email_header.py +7 -6
  6. epstein_files/documents/imessage/text_message.py +3 -2
  7. epstein_files/documents/json_file.py +17 -0
  8. epstein_files/documents/messenger_log.py +62 -3
  9. epstein_files/documents/other_file.py +165 -17
  10. epstein_files/epstein_files.py +128 -169
  11. epstein_files/util/constant/names.py +8 -1
  12. epstein_files/util/constant/output_files.py +29 -0
  13. epstein_files/util/constant/strings.py +27 -0
  14. epstein_files/util/constant/urls.py +25 -9
  15. epstein_files/util/constants.py +1018 -1045
  16. epstein_files/util/data.py +20 -55
  17. epstein_files/util/{file_cfg.py → doc_cfg.py} +121 -43
  18. epstein_files/util/env.py +19 -20
  19. epstein_files/util/file_helper.py +38 -21
  20. epstein_files/util/highlighted_group.py +229 -177
  21. epstein_files/util/logging.py +63 -0
  22. epstein_files/util/output.py +180 -0
  23. epstein_files/util/rich.py +29 -17
  24. epstein_files/util/search_result.py +14 -6
  25. epstein_files/util/timer.py +24 -0
  26. epstein_files/util/word_count.py +2 -1
  27. {epstein_files-1.0.0.dist-info → epstein_files-1.0.2.dist-info}/METADATA +20 -4
  28. epstein_files-1.0.2.dist-info/RECORD +33 -0
  29. epstein_files-1.0.2.dist-info/entry_points.txt +7 -0
  30. epstein_files-1.0.0.dist-info/RECORD +0 -28
  31. {epstein_files-1.0.0.dist-info → epstein_files-1.0.2.dist-info}/LICENSE +0 -0
  32. {epstein_files-1.0.0.dist-info → epstein_files-1.0.2.dist-info}/WHEEL +0 -0
epstein_files/__init__.py CHANGED
@@ -10,185 +10,125 @@ from sys import exit
10
10
 
11
11
  from dotenv import load_dotenv
12
12
  load_dotenv()
13
+
14
+ from rich.markup import escape
13
15
  from rich.padding import Padding
16
+ from rich.panel import Panel
14
17
 
18
+ from epstein_files.epstein_files import EpsteinFiles, document_cls
19
+ from epstein_files.documents.document import INFO_PADDING, Document
15
20
  from epstein_files.documents.email import Email
16
- from epstein_files.epstein_files import EpsteinFiles, count_by_month
17
21
  from epstein_files.util.constant.html import *
18
22
  from epstein_files.util.constant.names import *
19
- from epstein_files.util.constant.strings import EMAIL_CLASS, MESSENGER_LOG_CLASS
20
- from epstein_files.util.data import Timer, dict_sets_to_lists, flatten
21
- from epstein_files.util.env import specified_names, args
22
- from epstein_files.util.file_helper import GH_PAGES_HTML_PATH
23
- from epstein_files.util.rich import *
24
-
25
- PRINT_COLOR_KEY_EVERY_N_EMAILS = 150
26
-
27
- # Order matters (will be order of output)
28
- PEOPLE_WHOSE_EMAILS_SHOULD_BE_PRINTED: list[str | None] = [
29
- JEREMY_RUBIN,
30
- AL_SECKEL,
31
- JOI_ITO,
32
- JABOR_Y,
33
- STEVEN_SINOFSKY,
34
- DANIEL_SIAD,
35
- JEAN_LUC_BRUNEL,
36
- STEVEN_HOFFENBERG,
37
- EHUD_BARAK,
38
- MARTIN_NOWAK,
39
- MASHA_DROKOVA,
40
- RENATA_BOLOTOVA,
41
- STEVE_BANNON,
42
- OLIVIER_COLOM,
43
- BORIS_NIKOLIC,
44
- PRINCE_ANDREW,
45
- JIDE_ZEITLIN,
46
- DAVID_STERN,
47
- MOHAMED_WAHEED_HASSAN,
48
- JENNIFER_JACQUET,
49
- None,
50
- ]
51
-
52
- # Order matters (will be order of output)
53
- PEOPLE_WHOSE_EMAILS_SHOULD_BE_TABLES: list[str | None] = [
54
- GHISLAINE_MAXWELL,
55
- LEON_BLACK,
56
- LANDON_THOMAS,
57
- KATHRYN_RUEMMLER,
58
- DARREN_INDYKE,
59
- RICHARD_KAHN,
60
- TYLER_SHEARS,
61
- SULTAN_BIN_SULAYEM,
62
- DEEPAK_CHOPRA,
63
- ARIANE_DE_ROTHSCHILD,
64
- TOM_PRITZKER,
65
- ]
23
+ from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, TEXT_MSGS_HTML_PATH, make_clean
24
+ from epstein_files.util.env import args, specified_names
25
+ from epstein_files.util.file_helper import coerce_file_path, extract_file_id
26
+ from epstein_files.util.logging import logger
27
+ from epstein_files.util.output import print_emails, print_json_metadata, print_json_stats, print_text_messages, write_urls
28
+ from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
29
+ from epstein_files.util.timer import Timer
66
30
 
67
31
 
68
32
  def generate_html() -> None:
33
+ if args.make_clean:
34
+ make_clean()
35
+ exit()
36
+
69
37
  timer = Timer()
70
38
  epstein_files = EpsteinFiles.get_files(timer)
39
+
40
+ if args.json_metadata:
41
+ print_json_metadata(epstein_files)
42
+ exit()
43
+
71
44
  print_header(epstein_files)
72
45
 
73
46
  if args.colors_only:
74
47
  exit()
75
48
 
76
- # Text messages section
77
49
  if args.output_texts:
78
50
  print_text_messages(epstein_files)
79
- timer.print_at_checkpoint(f'Printed {len(epstein_files.imessage_logs):,} text message logs')
51
+ timer.print_at_checkpoint(f'Printed {len(epstein_files.imessage_logs)} text message logs')
80
52
 
81
- # Emails section
82
53
  if args.output_emails:
83
54
  emails_printed = print_emails(epstein_files)
84
55
  timer.print_at_checkpoint(f"Printed {emails_printed:,} emails")
85
56
 
86
57
  if args.output_other_files:
87
- epstein_files.print_other_files_table()
88
- timer.print_at_checkpoint(f"Printed {len(epstein_files.other_files):,} other files")
89
- else:
90
- logger.warning(f"Skipping other files section...")
58
+ files_printed = epstein_files.print_other_files_table()
59
+ timer.print_at_checkpoint(f"Printed {len(files_printed)} other files")
91
60
 
92
61
  # Save output
93
- write_html(GH_PAGES_HTML_PATH)
94
- logger.warning(f"Total time: {timer.seconds_since_start()}")
62
+ write_html(ALL_EMAILS_PATH if args.all_emails else TEXT_MSGS_HTML_PATH)
63
+ logger.warning(f"Total time: {timer.seconds_since_start_str()}")
95
64
 
96
65
  # JSON stats (mostly used for building pytest checks)
97
66
  if args.json_stats:
98
- console.line(5)
99
67
  print_json_stats(epstein_files)
100
68
 
101
69
 
102
- def print_emails(epstein_files: EpsteinFiles) -> int:
103
- """Returns number of emails printed."""
104
- print_section_header(('Selections from ' if not args.all_emails else '') + 'His Emails')
105
- print_other_site_link(is_header=False)
106
-
107
- if len(specified_names) == 0:
108
- epstein_files.print_emailer_counts_table()
109
-
110
- emailers_to_print: list[str | None]
111
- emailer_tables: list[str | None] = []
112
- emails_that_were_printed: list[Email] = []
113
- num_emails_printed_since_last_color_key = 0
70
+ def epstein_diff():
71
+ """Diff the cleaned up text of two files."""
72
+ Document.diff_files(args.positional_args)
114
73
 
115
- if args.all_emails:
116
- console.print('Email conversations are sorted chronologically based on time of the first email.')
117
- emailers_to_print = sorted(epstein_files.all_emailers(), key=lambda e: epstein_files.earliest_email_at(e))
118
- print_numbered_list_of_emailers(emailers_to_print, epstein_files)
119
- else:
120
- if len(specified_names) > 0:
121
- emailers_to_print = specified_names
122
- else:
123
- emailers_to_print = PEOPLE_WHOSE_EMAILS_SHOULD_BE_PRINTED
124
-
125
- console.print('Email conversations grouped by counterparty can be found in the order listed below.')
126
- print_numbered_list_of_emailers(emailers_to_print)
127
- console.print("\nAfter that there's tables linking to (but not displaying) all known emails for each of these people:")
128
-
129
- if len(specified_names) > 0:
130
- if args.all_email_tables:
131
- emailer_tables = sorted(epstein_files.all_emailers(), key=lambda e: epstein_files.earliest_email_at(e))
132
- else:
133
- emailer_tables = PEOPLE_WHOSE_EMAILS_SHOULD_BE_TABLES
134
-
135
- print_numbered_list_of_emailers(emailer_tables)
136
74
 
137
- for author in emailers_to_print:
138
- newly_printed_emails = epstein_files.print_emails_for(author)
139
- emails_that_were_printed.extend(newly_printed_emails)
140
- num_emails_printed_since_last_color_key += len(newly_printed_emails)
75
+ def epstein_search():
76
+ """Search the cleaned up text of the files."""
77
+ _assert_positional_args()
78
+ epstein_files = EpsteinFiles.get_files(use_pickled=True)
141
79
 
142
- # Print color key every once in a while
143
- if num_emails_printed_since_last_color_key > PRINT_COLOR_KEY_EVERY_N_EMAILS:
144
- print_color_key()
145
- num_emails_printed_since_last_color_key = 0
146
-
147
- if len(emailer_tables) > 0 and len(specified_names) == 0:
148
- print_author_header(f"Email Tables for {len(emailer_tables)} Other People", 'white')
80
+ for search_term in args.positional_args:
81
+ temp_highlighter = build_highlighter(search_term)
82
+ search_results = epstein_files.docs_matching(search_term, specified_names)
83
+ console.line(2)
84
+ print_panel(f"Found {len(search_results)} documents matching '{search_term}'", padding=(0, 0, 0, 3))
149
85
 
150
- for name in emailer_tables:
151
- epstein_files.print_emails_table_for(name)
86
+ for search_result in search_results:
87
+ console.line()
152
88
 
153
- if len(specified_names) == 0:
154
- epstein_files.print_email_device_info()
89
+ if args.whole_file:
90
+ console.print(search_result.document)
91
+ else:
92
+ console.print(search_result.document.description_panel())
155
93
 
156
- logger.warning(f"Rewrote {len(Email.rewritten_header_ids)} headers of {len(epstein_files.emails)} emails")
94
+ for matching_line in search_result.lines:
95
+ line_txt = matching_line.__rich__()
96
+ console.print(Padding(temp_highlighter(line_txt), INFO_PADDING), style='gray37')
157
97
 
158
- if args.all_emails:
159
- email_ids_that_were_printed = set([email.file_id for email in emails_that_were_printed])
160
- logger.warning(f"Printed {len(emails_that_were_printed)} emails of {len(email_ids_that_were_printed)} unique file IDs.")
161
98
 
162
- for email in epstein_files.emails:
163
- if email.file_id not in email_ids_that_were_printed and not email.is_duplicate:
164
- logger.warning(f"Failed to print {email.description()}")
99
+ def epstein_show():
100
+ """Show the color highlighted file. If --raw arg is passed, show the raw text of the file as well."""
101
+ _assert_positional_args()
102
+ ids = [extract_file_id(arg) for arg in args.positional_args]
103
+ console.line()
165
104
 
166
- return len(emails_that_were_printed)
105
+ if args.pickled:
106
+ epstein_files = EpsteinFiles.get_files(use_pickled=True)
107
+ docs = epstein_files.get_documents_by_id(ids)
108
+ else:
109
+ raw_docs = [Document(coerce_file_path(id)) for id in ids]
110
+ docs = [document_cls(doc)(doc.file_path) for doc in raw_docs]
167
111
 
112
+ for doc in docs:
113
+ console.line()
114
+ console.print(doc)
168
115
 
169
- def print_text_messages(epstein_files: EpsteinFiles) -> None:
170
- print_section_header('Text Messages')
171
- print_centered("(conversations are sorted chronologically based on timestamp of first message)\n", style='gray30')
116
+ if args.raw:
117
+ console.line()
118
+ console.print(Panel(f"*** {doc.url_slug} RAW ***", expand=False, style=doc._border_style()))
119
+ console.print(escape(doc.raw_text()))
172
120
 
173
- if len(specified_names) == 0:
174
- log_files = epstein_files.imessage_logs
175
- else:
176
- log_files = flatten([epstein_files.imessage_logs_for(name) for name in specified_names])
121
+ if isinstance(doc, Email):
122
+ console.line()
123
+ console.print(Panel(f"*** {doc.url_slug} actual_text ***", expand=False, style=doc._border_style()))
124
+ console.print(escape(doc._actual_text()))
177
125
 
178
- for log_file in log_files:
179
- console.print(Padding(log_file))
180
- console.line(2)
181
126
 
182
- epstein_files.print_imessage_summary()
127
+ def epstein_dump_urls() -> None:
128
+ write_urls()
183
129
 
184
130
 
185
- def print_json_stats(epstein_files: EpsteinFiles) -> None:
186
- console.print(Panel('JSON Stats Dump', expand=True, style='reverse bold'), '\n')
187
- print_json(f"{MESSENGER_LOG_CLASS} Sender Counts", epstein_files.imessage_sender_counts(), skip_falsey=True)
188
- print_json(f"{EMAIL_CLASS} Author Counts", epstein_files.email_author_counts, skip_falsey=True)
189
- print_json(f"{EMAIL_CLASS} Recipient Counts", epstein_files.email_recipient_counts, skip_falsey=True)
190
- print_json("Email signature_substitution_countss", epstein_files.email_signature_substitution_counts(), skip_falsey=True)
191
- print_json("email_author_device_signatures", dict_sets_to_lists(epstein_files.email_authors_to_device_signatures))
192
- print_json("email_sent_from_devices", dict_sets_to_lists(epstein_files.email_device_signatures_to_authors))
193
- print_json("email_unknown_recipient_file_ids", epstein_files.email_unknown_recipient_file_ids())
194
- print_json("count_by_month", count_by_month(epstein_files.all_documents()))
131
+ def _assert_positional_args():
132
+ if not args.positional_args:
133
+ console.print(f"\n ERROR: No positional args!\n", style='red1')
134
+ exit(1)
@@ -8,7 +8,7 @@ from rich.text import Text
8
8
  from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, Document
9
9
  from epstein_files.util.constant.names import UNKNOWN
10
10
  from epstein_files.util.constants import FALLBACK_TIMESTAMP
11
- from epstein_files.util.file_cfg import MessageCfg
11
+ from epstein_files.util.doc_cfg import CommunicationCfg
12
12
  from epstein_files.util.highlighted_group import get_style_for_name
13
13
  from epstein_files.util.rich import key_value_txt
14
14
 
@@ -20,7 +20,7 @@ class Communication(Document):
20
20
  """Superclass for Email and MessengerLog."""
21
21
  author_style: str = 'white'
22
22
  author_txt: Text = field(init=False)
23
- config: MessageCfg | None = None
23
+ config: CommunicationCfg | None = None
24
24
  timestamp: datetime = FALLBACK_TIMESTAMP # TODO this default sucks (though it never happens)
25
25
 
26
26
  def __post_init__(self):
@@ -31,22 +31,22 @@ class Communication(Document):
31
31
  def author_or_unknown(self) -> str:
32
32
  return self.author or UNKNOWN
33
33
 
34
- def description(self) -> Text:
35
- return self._description().append(CLOSE_PROPERTIES_CHAR)
36
-
37
- def is_attribution_uncertain(self) -> bool | None:
38
- return self.config and self.config.is_attribution_uncertain
34
+ def is_attribution_uncertain(self) -> bool:
35
+ return bool(self.config and self.config.is_attribution_uncertain)
39
36
 
40
37
  def raw_document_link_txt(self, _style: str = '', include_alt_link: bool = True) -> Text:
41
38
  """Overrides super() method to apply self.author_style."""
42
39
  return super().raw_document_link_txt(self.author_style, include_alt_link=include_alt_link)
43
40
 
41
+ def summary(self) -> Text:
42
+ return self._summary().append(CLOSE_PROPERTIES_CHAR)
43
+
44
44
  def timestamp_without_seconds(self) -> str:
45
45
  return TIMESTAMP_SECONDS_REGEX.sub('', str(self.timestamp))
46
46
 
47
- def _description(self) -> Text:
47
+ def _summary(self) -> Text:
48
48
  """One line summary mostly for logging."""
49
- txt = super().description().append(', ')
49
+ txt = super().summary().append(', ')
50
50
  return txt.append(key_value_txt('author', Text(f"'{self.author_or_unknown()}'", style=self.author_style)))
51
51
 
52
52