epstein-files 1.0.10__py3-none-any.whl → 1.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,8 +20,9 @@ from epstein_files.util.constant.urls import *
20
20
  from epstein_files.util.constants import FALLBACK_TIMESTAMP, HEADER_ABBREVIATIONS
21
21
  from epstein_files.util.data import json_safe
22
22
  from epstein_files.util.env import args
23
+ from epstein_files.util.file_helper import log_file_write
23
24
  from epstein_files.util.highlighted_group import ALL_HIGHLIGHTS, HIGHLIGHTED_NAMES, EpsteinHighlighter
24
- from epstein_files.util.logging import log_file_write, logger
25
+ from epstein_files.util.logging import logger
25
26
 
26
27
  TITLE_WIDTH = 50
27
28
  NUM_COLOR_KEY_COLS = 4
@@ -30,6 +31,7 @@ QUESTION_MARK_TXT = Text(QUESTION_MARKS, style='dim')
30
31
  GREY_NUMBERS = [58, 39, 39, 35, 30, 27, 23, 23, 19, 19, 15, 15, 15]
31
32
 
32
33
  DEFAULT_NAME_STYLE = 'gray46'
34
+ INFO_STYLE = 'white dim italic'
33
35
  KEY_STYLE='honeydew2 bold'
34
36
  SECTION_HEADER_STYLE = 'bold white on blue3'
35
37
  SOCIAL_MEDIA_LINK_STYLE = 'pale_turquoise4'
@@ -239,23 +241,26 @@ def print_numbered_list_of_emailers(_list: list[str | None], epstein_files = Non
239
241
  def print_other_site_link(is_header: bool = True) -> None:
240
242
  """Print a link to the emails site if we're building text messages site and vice versa."""
241
243
  site_type: SiteType = EMAIL if args.all_emails else TEXT_MESSAGE
244
+ link_style = OTHER_SITE_LINK_STYLE if is_header else 'light_slate_grey bold'
242
245
 
243
246
  if is_header:
244
247
  print_starred_header(f"This is the Epstein {site_type.title()}s site", num_spaces=4, num_stars=14)
245
248
 
246
249
  other_site_type: SiteType = TEXT_MESSAGE if site_type == EMAIL else EMAIL
247
- other_site_msg = "another site for" + (' all of' if other_site_type == EMAIL else '')
250
+ other_site_msg = "another site with" + (' all of' if other_site_type == EMAIL else '')
248
251
  other_site_msg += f" Epstein's {other_site_type}s also generated by this code"
249
- markup_msg = link_markup(SITE_URLS[other_site_type], other_site_msg, OTHER_SITE_LINK_STYLE)
252
+ markup_msg = link_markup(SITE_URLS[other_site_type], other_site_msg, link_style)
250
253
  print_centered(parenthesize(Text.from_markup(markup_msg)), style='bold')
251
254
 
252
- if is_header:
253
- word_count_link = link_text_obj(WORD_COUNT_URL, 'most frequently used words in the emails and texts', AUX_SITE_LINK_STYLE)
254
- print_centered(parenthesize(word_count_link))
255
- metadata_link = link_text_obj(JSON_METADATA_URL, 'author attribution explanations', AUX_SITE_LINK_STYLE)
256
- print_centered(parenthesize(metadata_link))
257
- json_link = link_text_obj(WORD_COUNT_URL, "epstein's json files", AUX_SITE_LINK_STYLE)
258
- print_centered(parenthesize(json_link))
255
+ if not is_header:
256
+ return
257
+
258
+ word_count_link = link_text_obj(WORD_COUNT_URL, 'most frequently used words in the emails and texts', AUX_SITE_LINK_STYLE)
259
+ print_centered(parenthesize(word_count_link))
260
+ metadata_link = link_text_obj(JSON_METADATA_URL, 'author attribution explanations', AUX_SITE_LINK_STYLE)
261
+ print_centered(parenthesize(metadata_link))
262
+ json_link = link_text_obj(WORD_COUNT_URL, "epstein's json files", AUX_SITE_LINK_STYLE)
263
+ print_centered(parenthesize(json_link))
259
264
 
260
265
 
261
266
  def print_page_title(expand: bool = True, width: int | None = None) -> None:
@@ -9,18 +9,22 @@ from rich.padding import Padding
9
9
  from rich.text import Text
10
10
 
11
11
  from epstein_files.documents.emails.email_header import EmailHeader
12
- from epstein_files.util.constant.common_words import COMMON_WORDS, UNSINGULARIZABLE_WORDS
12
+ from epstein_files.epstein_files import EpsteinFiles
13
+ from epstein_files.util.constant.common_words import COMMON_WORDS_LIST, COMMON_WORDS, UNSINGULARIZABLE_WORDS
13
14
  from epstein_files.util.constant.names import OTHER_NAMES
15
+ from epstein_files.util.constant.output_files import WORD_COUNT_HTML_PATH
14
16
  from epstein_files.util.data import ALL_NAMES, flatten, sort_dict
15
- from epstein_files.util.env import args
17
+ from epstein_files.util.env import args, specified_names
16
18
  from epstein_files.util.logging import logger
17
- from epstein_files.util.rich import highlighter
18
- from epstein_files.util.search_result import SearchResult
19
+ from epstein_files.util.rich import (console, highlighter, print_centered, print_color_key, print_page_title,
20
+ print_panel, print_starred_header, write_html)
21
+ from epstein_files.util.search_result import MatchedLine, SearchResult
22
+ from epstein_files.util.timer import Timer
19
23
 
20
24
  FIRST_AND_LAST_NAMES = flatten([n.split() for n in ALL_NAMES])
21
25
  FIRST_AND_LAST_NAMES = [n.lower() for n in FIRST_AND_LAST_NAMES] + OTHER_NAMES
22
26
 
23
- HTML_REGEX = re.compile(r"com/|cae-v2w=|content-(transfe|type)|font(/|-(family|size))|http|\.html?\??|margin-bottom|padding-left|quoted-printable|region=|text-decoration|ttps|www|\.(gif|jpe?g|png);?$")
27
+ HTML_REGEX = re.compile(r"^http|#yiv|com/|cae-v2w=|content-(transfe|type)|font(/|-(family|size))|http|\.html?\??|margin-bottom|padding-left|quoted-printable|region=|text-decoration|ttps|www|\.(gif|jpe?g|png);?$")
24
28
  HYPHENATED_WORD_REGEX = re.compile(r"[a-z]+-[a-z]+", re.IGNORECASE)
25
29
  OK_SYMBOL_WORDS = ['mar-a-lago', 'p/e', 's&p', ':)', ':).', ';)', ':-)', ';-)']
26
30
  ONLY_SYMBOLS_REGEX = re.compile(r"^[^a-zA-Z0-9]+$")
@@ -187,6 +191,62 @@ class WordCount:
187
191
  yield f"Showing {len(word_txts):,} words appearing at least {MIN_COUNT_CUTOFF} times (out of {len(self.count):,} words)."
188
192
 
189
193
 
194
+ def write_word_counts_html() -> None:
195
+ timer = Timer()
196
+ epstein_files = EpsteinFiles.get_files(timer)
197
+ email_subjects: set[str] = set()
198
+ word_count = WordCount()
199
+
200
+ # Remove dupes, junk mail, and fwded articles from emails
201
+ emails = [e for e in epstein_files.emails if not (e.is_duplicate() or e.is_junk_mail() or e.is_fwded_article())]
202
+
203
+ for email in emails:
204
+ if specified_names and email.author not in specified_names:
205
+ continue
206
+
207
+ logger.info(f"Counting words in {email}\n [SUBJECT] {email.subject()}")
208
+ lines = email.actual_text.split('\n')
209
+
210
+ if email.subject() not in email_subjects and f'Re: {email.subject()}' not in email_subjects:
211
+ email_subjects.add(email.subject())
212
+ lines.append(email.subject())
213
+
214
+ for i, line in enumerate(lines):
215
+ if HTML_REGEX.search(line):
216
+ continue
217
+
218
+ for word in line.split():
219
+ word_count.tally_word(word, SearchResult(email, [MatchedLine(line, i)]))
220
+
221
+ # Add in iMessage conversation words
222
+ imessage_logs = epstein_files.imessage_logs_for(specified_names) if specified_names else epstein_files.imessage_logs
223
+
224
+ for imessage_log in imessage_logs:
225
+ logger.info(f"Counting words in {imessage_log}")
226
+
227
+ for i, msg in enumerate(imessage_log.messages):
228
+ if specified_names and msg.author not in specified_names:
229
+ continue
230
+ elif HTML_REGEX.search(line):
231
+ continue
232
+
233
+ for word in msg.text.split():
234
+ word_count.tally_word(word, SearchResult(imessage_log, [MatchedLine(msg.text, i)]))
235
+
236
+ print_page_title(expand=False)
237
+ print_starred_header(f"Most Common Words in {len(emails):,} Emails and {len(imessage_logs)} iMessage Logs")
238
+ print_centered(f"(excluding {len(COMMON_WORDS_LIST)} particularly common words at bottom)", style='dim')
239
+ console.line()
240
+ print_color_key()
241
+ console.line()
242
+ console.print(word_count)
243
+ console.line(2)
244
+ print_panel(f"{len(COMMON_WORDS_LIST):,} Excluded Words", centered=True)
245
+ console.print(', '.join(COMMON_WORDS_LIST), highlight=False)
246
+ write_html(WORD_COUNT_HTML_PATH)
247
+ timer.print_at_checkpoint(f"Finished counting words")
248
+
249
+
190
250
  def _word_style(word: str | None) -> str:
191
251
  word = word or ''
192
252
  return 'bright_white' if word in FIRST_AND_LAST_NAMES else 'grey53'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: epstein-files
3
- Version: 1.0.10
3
+ Version: 1.0.12
4
4
  Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
5
5
  Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
6
6
  License: GPL-3.0-or-later
@@ -0,0 +1,33 @@
1
+ epstein_files/__init__.py,sha256=qVFB7sS6XSlZX-ByyDwdbGSn2h06aoX2Mx8WcgRb-To,4710
2
+ epstein_files/documents/communication.py,sha256=XapJlNfcaww3TpSkZIBE5c1Skqv_pFEFlIVi06V7k3E,2046
3
+ epstein_files/documents/document.py,sha256=2FxyqWKROi7w2SmaQ493oGKekNvYAHSuv2YsDhPNQBU,16987
4
+ epstein_files/documents/email.py,sha256=y8QTq349LWlm2LLUJ8rGcdkDbaGYJCV99wJytPcEMew,40587
5
+ epstein_files/documents/emails/email_header.py,sha256=wkPfSLbmzkAeQwvhf0bAeFDLPbQT-EeG0v8vNNLYktM,7502
6
+ epstein_files/documents/imessage/text_message.py,sha256=4gFvTfulj_Su10lNQl6Hq_p9ArTrSmn5pfC22YRJXjI,2794
7
+ epstein_files/documents/json_file.py,sha256=tIYTwA3FYkwVZSpXvFYyUoH9m2sGYCD1U0ttamH6r1o,1306
8
+ epstein_files/documents/messenger_log.py,sha256=yT4WQyTE_W6yelug_YGpBMRJ0YxWNtX4rKoEj8n5TMA,6260
9
+ epstein_files/documents/other_file.py,sha256=pnl_q1o7ur3eeqGPwsYL2qbM3Y8O9LX6j6LbWnoxAiE,9939
10
+ epstein_files/epstein_files.py,sha256=SaD4DJJ5tRxY97Ei4BdOgLzHQ9wrBVGrP64CSqdmk-w,18691
11
+ epstein_files/util/constant/common_words.py,sha256=aR0UjoWmxyR49XS-DtHECQ1CiA_bK8hNP6CQ1TS9yZA,3696
12
+ epstein_files/util/constant/html.py,sha256=9U098TGzlghGg4WfxLYHyub5JGR17Dv7VP5i2MSu8Kk,1415
13
+ epstein_files/util/constant/names.py,sha256=CLWXrln8J-Dth6C-YF7Wdy7UoA8dybKJyqOLETrBeek,10284
14
+ epstein_files/util/constant/output_files.py,sha256=BkV4_gmdj46RfGy5SFYp6dgTty3FtlBth5YGmaGutls,1700
15
+ epstein_files/util/constant/strings.py,sha256=02DwbhAe8qBRq5HOUFx5FafXJ1P2-RJf9TCVu2b7UDQ,1932
16
+ epstein_files/util/constant/urls.py,sha256=0IdCVVvXib0i-4TZFkVHoS4zCbjOBZWcr6NkGxsmQWM,4981
17
+ epstein_files/util/constants.py,sha256=BpPRivoDYFI0uLU35kKpOdrSI6Rr9cmcrRj9-kANVrs,111834
18
+ epstein_files/util/data.py,sha256=X3AutdW-ascIlE2bz1BtN0Bywqpe4OwYzJ-diEpfogI,2992
19
+ epstein_files/util/doc_cfg.py,sha256=_f03qtA7qVbViHwqMXC4O5nfNbh90zDSq6El9Ior6f0,8996
20
+ epstein_files/util/env.py,sha256=HnYcfHSNkwVJ_T75Woy43_OpDyxD0KHPj3GxcVx86N4,5751
21
+ epstein_files/util/file_helper.py,sha256=tacTe1GcAnckPFvjMgxRRSLnFgr2aVIYsgfDR_C9uXk,2780
22
+ epstein_files/util/highlighted_group.py,sha256=xrDLB05YUYGsU6vDvhvENMvIyjEz-9eb9xN-RjfCQbQ,36531
23
+ epstein_files/util/logging.py,sha256=fuREq06xUUI3DfCV2JE-8QM-sQKxpLDj0_AYFO6qR1M,1983
24
+ epstein_files/util/output.py,sha256=XcflgSOlzUGj6FsFaK6j4Dljld8A0h_uVV7ERcI_EYw,8120
25
+ epstein_files/util/rich.py,sha256=8-4IA5bwPBdDPqkPdymq3zVKB9hfy3nrT7fUrN_XevY,14744
26
+ epstein_files/util/search_result.py,sha256=1fxe0KPBQXBk4dLfu6m0QXIzYfZCzvaSkWqvghJGzxY,567
27
+ epstein_files/util/timer.py,sha256=8hxW4Y1JcTUfnBrHh7sL2pM9xu1sL4HFQM4CmmzTarU,837
28
+ epstein_files/util/word_count.py,sha256=8qBTuq3d0Q-3fwiuECKWi2RfL-KUiZD8TciwvfL0D_o,9353
29
+ epstein_files-1.0.12.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
30
+ epstein_files-1.0.12.dist-info/METADATA,sha256=imTDdrHjWC-bWuw58SAyjYyiziZsqHkO7ODQUntw6YQ,5480
31
+ epstein_files-1.0.12.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
32
+ epstein_files-1.0.12.dist-info/entry_points.txt,sha256=5qYgwAXpxegeAicD_rzda_trDRnUC51F5UVDpcZ7j6Q,240
33
+ epstein_files-1.0.12.dist-info/RECORD,,
@@ -1,72 +0,0 @@
1
- # Count word usage in emails and texts
2
- import re
3
-
4
- from epstein_files.epstein_files import EpsteinFiles
5
- from epstein_files.util.constant.common_words import COMMON_WORDS_LIST
6
- from epstein_files.util.constant.output_files import WORD_COUNT_HTML_PATH
7
- from epstein_files.util.env import args, specified_names
8
- from epstein_files.util.logging import logger
9
- from epstein_files.util.rich import (console, print_centered, print_color_key, print_page_title, print_panel,
10
- print_starred_header, write_html)
11
- from epstein_files.util.search_result import MatchedLine, SearchResult
12
- from epstein_files.util.timer import Timer
13
- from epstein_files.util.word_count import WordCount
14
-
15
- HTML_REGEX = re.compile(r"^http|#yiv")
16
-
17
-
18
- def write_word_counts_html() -> None:
19
- timer = Timer()
20
- epstein_files = EpsteinFiles.get_files(timer)
21
- email_subjects: set[str] = set()
22
- word_count = WordCount()
23
-
24
- # Remove dupes, junk mail, and fwded articles from emails
25
- emails = [
26
- e for e in epstein_files.emails
27
- if not (e.is_duplicate or e.is_junk_mail() or (e.config and e.config.is_fwded_article)) \
28
- and (len(specified_names) == 0 or e.author in specified_names)
29
- ]
30
-
31
- for email in emails:
32
- logger.info(f"Counting words in {email}\n [SUBJECT] {email.subject()}")
33
- lines = email.actual_text.split('\n')
34
-
35
- if email.subject() not in email_subjects and f'Re: {email.subject()}' not in email_subjects:
36
- email_subjects.add(email.subject())
37
- lines.append(email.subject())
38
-
39
- for i, line in enumerate(lines):
40
- if HTML_REGEX.search(line):
41
- continue
42
-
43
- for word in line.split():
44
- word_count.tally_word(word, SearchResult(email, [MatchedLine(line, i)]))
45
-
46
- # Add in iMessage conversation words
47
- imessage_logs = epstein_files.imessage_logs_for(specified_names) if specified_names else epstein_files.imessage_logs
48
-
49
- for imessage_log in imessage_logs:
50
- logger.info(f"Counting words in {imessage_log}")
51
-
52
- for msg in imessage_log.messages():
53
- if len(specified_names) > 0 and msg.author not in specified_names:
54
- continue
55
- elif HTML_REGEX.search(line):
56
- continue
57
-
58
- for word in msg.text.split():
59
- word_count.tally_word(word, SearchResult(imessage_log, [msg.text]))
60
-
61
- print_page_title(expand=False)
62
- print_starred_header(f"Most Common Words in {len(emails):,} Emails and {len(imessage_logs)} iMessage Logs")
63
- print_centered(f"(excluding {len(COMMON_WORDS_LIST)} particularly common words at bottom)", style='dim')
64
- console.line()
65
- print_color_key()
66
- console.line()
67
- console.print(word_count)
68
- console.line(2)
69
- print_panel(f"{len(COMMON_WORDS_LIST):,} Excluded Words", centered=True)
70
- console.print(', '.join(COMMON_WORDS_LIST), highlight=False)
71
- write_html(WORD_COUNT_HTML_PATH)
72
- timer.print_at_checkpoint(f"Finished counting words")
@@ -1,34 +0,0 @@
1
- epstein_files/__init__.py,sha256=SfLLu9X7rfHdmZcl8JGmiIxZ_E1RVsmCrh8sLO4jNPU,4859
2
- epstein_files/count_words.py,sha256=i1pYaQzX7b9S3pyV3RM_8asbQJ1PEk8wJgLOG6Mf0D8,2966
3
- epstein_files/documents/communication.py,sha256=SunZdjMhR9v6y8LlQ6jhIu8vYjSndaBK0Su1mKnhfj0,2060
4
- epstein_files/documents/document.py,sha256=BUaioSvOmfsR-ULa6hJy3WYg-hBDC-kqafUheMJ-jFY,16665
5
- epstein_files/documents/email.py,sha256=H34b2zt_TrPUgXHwZXybjmLE9-QNAtezs9NVSCPOSGM,38462
6
- epstein_files/documents/emails/email_header.py,sha256=wkPfSLbmzkAeQwvhf0bAeFDLPbQT-EeG0v8vNNLYktM,7502
7
- epstein_files/documents/imessage/text_message.py,sha256=wfWPQhwGG5Yzyhbr1NAQAY0bzRjjqVZmh8SPl48XmAM,3025
8
- epstein_files/documents/json_file.py,sha256=1Cx_3uM38Dwgrbs8fU55TUZKSrCsmd4QpHKWtfWdudw,1089
9
- epstein_files/documents/messenger_log.py,sha256=DHlQpbLbMITMpMtCYk2vcRc7-CTvYvOXql-9nDUc3tQ,5887
10
- epstein_files/documents/other_file.py,sha256=NdVlCYcyzHvOInReqF-zvHJI1hwtzMWW9ekDojHIb4U,9091
11
- epstein_files/epstein_files.py,sha256=EEx8Auwv8z0FkRrCi7wE8iuuRQd6K1rQDMc2vdbrsh4,18298
12
- epstein_files/util/constant/common_words.py,sha256=aR0UjoWmxyR49XS-DtHECQ1CiA_bK8hNP6CQ1TS9yZA,3696
13
- epstein_files/util/constant/html.py,sha256=9U098TGzlghGg4WfxLYHyub5JGR17Dv7VP5i2MSu8Kk,1415
14
- epstein_files/util/constant/names.py,sha256=uYhv9xa4NO5jCk9zrGpPKFkcVVaMY2qtBC7ZaKGK3J8,10135
15
- epstein_files/util/constant/output_files.py,sha256=BkV4_gmdj46RfGy5SFYp6dgTty3FtlBth5YGmaGutls,1700
16
- epstein_files/util/constant/strings.py,sha256=FDtksfH50PSxtSBw9XhmqxtrgRgGxdIvGiAR2bbPpu4,1899
17
- epstein_files/util/constant/urls.py,sha256=0IdCVVvXib0i-4TZFkVHoS4zCbjOBZWcr6NkGxsmQWM,4981
18
- epstein_files/util/constants.py,sha256=gp5HWHt5FHd916r4UpjcJKslO5L-Wno6kjA4F3ZA4YU,110884
19
- epstein_files/util/data.py,sha256=xwTqrbAi7ZDJM0iyFVOevnokP_oIQ2npkRjHzF1KGGY,2908
20
- epstein_files/util/doc_cfg.py,sha256=5Pb__bP00mKi9ACv33omZQA-TBzumc7D2Td_Mk4M5DY,9822
21
- epstein_files/util/env.py,sha256=PaPBi27-npU9egt9LHxr5qR65B2DPHwt7Xc9sx5VN-M,5225
22
- epstein_files/util/file_helper.py,sha256=v_bE10MHEcXti9DVJo4WqyOsG83Xrv05S3Vc70cYJkk,3082
23
- epstein_files/util/highlighted_group.py,sha256=dajLYuSbT69zMWf6XKUOZI6ZcgFy-Beq7Nsg9qlteck,35715
24
- epstein_files/util/logging.py,sha256=4hVl1Qw1qRMSVEYKXZxrvdQuSIMBgTPskzvNMNu8268,2185
25
- epstein_files/util/output.py,sha256=wLjFBGR5ffn4cLep12G3OmUR0H3WtEMXeVMOXtd-6ig,7909
26
- epstein_files/util/rich.py,sha256=rdHzn4XRB2erQSf2yYyPakRmd9ixqBUdS8-BVOUAXnE,14603
27
- epstein_files/util/search_result.py,sha256=1fxe0KPBQXBk4dLfu6m0QXIzYfZCzvaSkWqvghJGzxY,567
28
- epstein_files/util/timer.py,sha256=8hxW4Y1JcTUfnBrHh7sL2pM9xu1sL4HFQM4CmmzTarU,837
29
- epstein_files/util/word_count.py,sha256=eGzcsoAvMcutRUFOJnVuEp9_28H74to7T9jTdGUZnuI,6757
30
- epstein_files-1.0.10.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
31
- epstein_files-1.0.10.dist-info/METADATA,sha256=zi10sSw5g5BZDRovIeWlpMYEgLbqFxSl7QII9jUuKdw,5480
32
- epstein_files-1.0.10.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
33
- epstein_files-1.0.10.dist-info/entry_points.txt,sha256=5qYgwAXpxegeAicD_rzda_trDRnUC51F5UVDpcZ7j6Q,240
34
- epstein_files-1.0.10.dist-info/RECORD,,