epstein-files 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ from dateutil.parser import parse
8
8
 
9
9
  from epstein_files.util.constant.names import *
10
10
  from epstein_files.util.constant.strings import *
11
- from epstein_files.util.data import without_nones
11
+ from epstein_files.util.data import without_falsey
12
12
 
13
13
  DuplicateType = Literal['earlier', 'quoted', 'redacted', 'same']
14
14
  Metadata = dict[str, bool | datetime | int | str | list[str | None] |dict[str, bool | str]]
@@ -121,7 +121,7 @@ class DocCfg:
121
121
  elif self.category and self.author is None and self.description is None:
122
122
  return self.category
123
123
 
124
- pieces = without_nones([self.author, self.description])
124
+ pieces = without_falsey([self.author, self.description])
125
125
  return ' '.join(pieces) if pieces else None
126
126
 
127
127
  def metadata(self) -> Metadata:
@@ -223,6 +223,7 @@ class EmailCfg(CommunicationCfg):
223
223
  recipients (list[str | None]): Who received the email
224
224
  """
225
225
  actual_text: str | None = None # Override for the Email._actual_text() method for particularly broken emails
226
+ fwded_text_after: str | None = None # If set, any text after this is a fwd of an article or similar
226
227
  is_fwded_article: bool = False
227
228
  recipients: list[str | None] = field(default_factory=list)
228
229
 
@@ -234,7 +235,7 @@ class EmailCfg(CommunicationCfg):
234
235
  def from_doc_cfg(cls, cfg: DocCfg) -> 'EmailCfg':
235
236
  return cls(**asdict(cfg))
236
237
 
237
- # This is necessary for some dumb reason. @dataclass(repr=False) doesn't cut it
238
+ # This is necessary because for some dumb reason @dataclass(repr=False) doesn't cut it
238
239
  def __repr__(self) -> str:
239
240
  return super().__repr__()
240
241
 
@@ -245,6 +246,6 @@ class TextCfg(CommunicationCfg):
245
246
  super().__post_init__()
246
247
  self.category = TEXT_MESSAGE
247
248
 
248
- # This is necessary for some dumb reason. @dataclass(repr=False) doesn't cut it
249
+ # This is necessary because for some dumb reason @dataclass(repr=False) doesn't cut it
249
250
  def __repr__(self) -> str:
250
251
  return super().__repr__()
epstein_files/util/env.py CHANGED
@@ -6,8 +6,9 @@ from sys import argv
6
6
 
7
7
  from epstein_files.util.logging import datefinder_logger, env_log_level, logger
8
8
 
9
+ COUNT_WORDS_SCRIPT = 'count_words.py'
9
10
  DEFAULT_WIDTH = 154
10
- HTML_SCRIPTS = ['epstein_generate', 'generate_html.py', 'count_words.py']
11
+ HTML_SCRIPTS = ['epstein_generate', 'generate_html.py', COUNT_WORDS_SCRIPT]
11
12
 
12
13
 
13
14
  parser = ArgumentParser(description="Parse epstein OCR docs and generate HTML page.")
@@ -65,7 +66,7 @@ datefinder_logger.setLevel(logger.level)
65
66
 
66
67
  # Massage args that depend on other args to the appropriate state
67
68
  if not (args.json_metadata or args.output_texts or args.output_emails or args.output_other_files):
68
- if is_html_script:
69
+ if is_html_script and current_script != COUNT_WORDS_SCRIPT and not args.make_clean:
69
70
  logger.warning(f"No output section chosen; outputting default of texts, selected emails, and other files...")
70
71
 
71
72
  args.output_texts = True
@@ -2,7 +2,6 @@ import re
2
2
  from dataclasses import dataclass, field
3
3
 
4
4
  from rich.highlighter import RegexHighlighter
5
- from rich.text import Text
6
5
 
7
6
  from epstein_files.util.constant.names import *
8
7
  from epstein_files.util.constant.strings import *
@@ -10,7 +9,7 @@ from epstein_files.util.constant.urls import ARCHIVE_LINK_COLOR
10
9
  from epstein_files.util.constants import (EMAILER_ID_REGEXES, EPSTEIN_V_ROTHSTEIN_EDWARDS, HEADER_ABBREVIATIONS,
11
10
  OSBORNE_LLP, REPLY_REGEX, SENT_FROM_REGEX, VIRGIN_ISLANDS)
12
11
  from epstein_files.util.doc_cfg import *
13
- from epstein_files.util.data import extract_last_name, listify
12
+ from epstein_files.util.data import extract_last_name, listify, without_falsey
14
13
 
15
14
  CIVIL_ATTORNEY = 'civil attorney'
16
15
  CRIMINAL_DEFENSE_ATTORNEY = 'criminal defense attorney'
@@ -48,7 +47,6 @@ class HighlightedText:
48
47
  label: str = ''
49
48
  pattern: str = ''
50
49
  style: str
51
- # Computed fields
52
50
  regex: re.Pattern = field(init=False)
53
51
  theme_style_name: str = field(init=False)
54
52
  _capture_group_label: str = field(init=False)
@@ -76,7 +74,7 @@ class HighlightedNames(HighlightedText):
76
74
  Attributes:
77
75
  category (str): optional string to use as an override for self.label in some contexts
78
76
  emailers (dict[str, str | None]): optional names to construct regexes for (values are descriptions)
79
- _pattern (str): complete regex pattern that combines 'pattern' with 'emailers'
77
+ _pattern (str): regex pattern combining 'pattern' with first & last names of all 'emailers'
80
78
  """
81
79
  category: str = ''
82
80
  emailers: dict[str, str | None] = field(default_factory=dict)
@@ -102,7 +100,7 @@ class HighlightedNames(HighlightedText):
102
100
  self.emailers.get(name),
103
101
  ]
104
102
 
105
- info_pieces = [p for p in info_pieces if p is not None]
103
+ info_pieces = without_falsey(info_pieces)
106
104
  return ', '.join(info_pieces) if info_pieces else None
107
105
 
108
106
  def _emailer_pattern(self, name: str) -> str:
@@ -114,10 +112,10 @@ class HighlightedNames(HighlightedText):
114
112
  if name in EMAILER_ID_REGEXES:
115
113
  pattern = EMAILER_ID_REGEXES[name].pattern
116
114
 
117
- # Include regex for last name
118
- # TODO: handle word boundary issue for names that end in symbols
119
- if SIMPLE_NAME_REGEX.match(last_name) and last_name.lower() not in NAMES_TO_NOT_HIGHLIGHT:
120
- pattern += fr"|{last_name}"
115
+ # Include regex for first and last names
116
+ for partial_name in [first_name, last_name]:
117
+ if SIMPLE_NAME_REGEX.match(partial_name) and partial_name.lower() not in NAMES_TO_NOT_HIGHLIGHT:
118
+ pattern += fr"|{partial_name}"
121
119
 
122
120
  return pattern
123
121
  elif ' ' not in name:
@@ -163,7 +161,7 @@ HIGHLIGHTED_NAMES = [
163
161
  ALIREZA_ITTIHADIEH: 'CEO Freestream Aircraft Limited',
164
162
  BARBRO_C_EHNBOM: 'Swedish pharmaceuticals',
165
163
  FRED_HADDAD: "co-founder of Heck's in West Virginia",
166
- GERALD_BARTON: "Maryland property developer, fan of Trump's Irish golf course",
164
+ GERALD_BARTON: "Maryland property developer Landmark Land Company, fan of Trump's Irish golf course",
167
165
  GORDON_GETTY: 'heir of oil tycoon J. Paul Getty',
168
166
  NICHOLAS_RIBIS: 'Hilton CEO, former president of Trump Organization',
169
167
  'Philip Kafka': 'president of Prince Concepts (and son of Terry Kafka?)',
@@ -272,7 +270,7 @@ HIGHLIGHTED_NAMES = [
272
270
  HighlightedNames(
273
271
  label='europe',
274
272
  style='light_sky_blue3',
275
- pattern=r'(Angela )?Merk(el|le)|Austria|(Benjamin\s*)?Harnwell|Berlin|Borge|Brexit(eers?)?|Brit(ain|ish)|Brussels|Cannes|(Caroline|Jack)?\s*Lang(, Caroline)?|Cypr(iot|us)|Davos|ECB|EU|Europe(an)?(\s*Union)?|France|Geneva|Germany?|Gillard|Gree(ce|k)|Ital(ian|y)|Jacques|(Kevin\s*)?Rudd|Le\s*Pen|London|Macron|Melusine|Munich|(Natalia\s*)?Veselnitskaya|(Nicholas\s*)?Sarkozy|Nigel(\s*Farage)?|Oslo|Paris|Polish|(Sebastian )?Kurz|(Vi(c|k)tor\s+)?Orbah?n|Edward Rod Larsen|Strasbourg|Strauss[- ]?Kahn|Swed(en|ish)(?![-\s]+America)|Switzerland|(Tony\s)?Blair|Ukrain(e|ian)|Vienna|(Vitaly\s*)?Churkin|Zug',
273
+ pattern=r'(Angela )?Merk(el|le)|Austria|(Benjamin\s*)?Harnwell|Berlin|Borge|Boris\s*Johnson|Brexit(eers?)?|Brit(ain|ish)|Brussels|Cannes|(Caroline|Jack)?\s*Lang(, Caroline)?|Cypr(iot|us)|Davos|ECB|England|EU|Europe(an)?(\s*Union)?|Fr(ance|ench)|Geneva|Germany?|Gillard|Gree(ce|k)|Ital(ian|y)|Jacques|(Kevin\s*)?Rudd|Le\s*Pen|London|Macron|Melusine|Munich|(Natalia\s*)?Veselnitskaya|(Nicholas\s*)?Sarkozy|Nigel(\s*Farage)?|Norw(ay|egian)|Oslo|Paris|Polish|(Sebastian )?Kurz|(Vi(c|k)tor\s+)?Orbah?n|Edward Rod Larsen|Strasbourg|Strauss[- ]?Kahn|Swed(en|ish)(?![-\s]+America)|Switzerland|(Tony\s)?Blair|Ukrain(e|ian)|Vienna|(Vitaly\s*)?Churkin|Zug',
276
274
  emailers = {
277
275
  ANDRZEJ_DUDA: 'former president of Poland',
278
276
  MIROSLAV_LAJCAK: 'Russia-friendly Slovakian politician, friend of Steve Bannon',
@@ -306,7 +304,7 @@ HIGHLIGHTED_NAMES = [
306
304
  HighlightedNames(
307
305
  label='finance',
308
306
  style='green',
309
- pattern=r'Apollo|Ari\s*Glass|(Bernie\s*)?Madoff|Black(rock|stone)|BofA|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|MLPF&S|(money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
307
+ pattern=r'Apollo|Ari\s*Glass|(Bernie\s*)?Madoff|Black(rock|stone)|BofA|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|Mizrahi\s*Bank|MLPF&S|(money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
310
308
  emailers={
311
309
  AMANDA_ENS: 'Citigroup',
312
310
  DANIEL_SABBA: 'UBS Investment Bank',
@@ -342,7 +340,7 @@ HIGHLIGHTED_NAMES = [
342
340
  emailers = {
343
341
  ANIL_AMBANI: 'chairman of Reliance Group',
344
342
  VINIT_SAHNI: None,
345
- ZUBAIR_KHAN: 'Tranchulas CEO, InsightsPod founder',
343
+ ZUBAIR_KHAN: 'cybersecurity firm Tranchulas CEO, InsightsPod founder, based in Islamabad and Dubai',
346
344
  }
347
345
  ),
348
346
  HighlightedNames(
@@ -391,7 +389,7 @@ HIGHLIGHTED_NAMES = [
391
389
  HighlightedNames(
392
390
  label='law enforcement',
393
391
  style='color(24) bold',
394
- pattern=r'ag|(Alicia\s*)?Valle|((Bob|Robert)\s*)?Mueller|(Byung\s)?Pak|CFTC|CIA|CIS|CVRA|Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)|DHS|DOJ|FBI|FCPA|FDIC|Federal\s*Bureau\s*of\s*Investigation|FinCEN|FINRA|FOIA|FTC|IRS|(James\s*)?Comey|(Jennifer\s*Shasky\s*)?Calvery|((Judge|Mark)\s*)?(Carney|Filip)|(Kirk )?Blouin|KYC|NIH|NS(A|C)|OCC|OFAC|(Lann?a\s*)?Belohlavek|(Michael\s*)?Reiter|OGE|Office\s*of\s*Government\s*Ethics|Police Code Enforcement|(Preet\s*)?Bharara|SCOTUS|SD(FL|NY)|Southern\s*District\s*of\s*(Florida|New\s*York)|SEC|Securities\s*and\s*Exchange\s*Commission|State\s*Dep(artmen)?t|Strzok|Supreme\s*Court|Treasury\s*(Dep(artmen)?t|Secretary)|TSA|USAID|(William\s*J\.?\s*)?Zloch',
392
+ pattern=r'ag|(Alicia\s*)?Valle|attorney|((Bob|Robert)\s*)?Mueller|(Byung\s)?Pak|CFTC|CIA|CIS|CVRA|Dep(artmen)?t\.?\s*of\s*(the\s*)?(Justice|Treasury)|DHS|DOJ|FBI|FCPA|FDIC|Federal\s*Bureau\s*of\s*Investigation|FinCEN|FINRA|FOIA|FTC|IRS|(James\s*)?Comey|(Jennifer\s*Shasky\s*)?Calvery|((Judge|Mark)\s*)?(Carney|Filip)|(Kirk )?Blouin|KYC|NIH|NS(A|C)|OCC|OFAC|(Lann?a\s*)?Belohlavek|lawyer|(Michael\s*)?Reiter|OGE|Office\s*of\s*Government\s*Ethics|Police Code Enforcement|(Preet\s*)?Bharara|SCOTUS|SD(FL|NY)|Southern\s*District\s*of\s*(Florida|New\s*York)|SEC|Securities\s*and\s*Exchange\s*Commission|State\s*Dep(artmen)?t|Strzok|Supreme\s*Court|Treasury\s*(Dep(artmen)?t|Secretary)|TSA|USAID|(William\s*J\.?\s*)?Zloch',
395
393
  emailers = {
396
394
  ANN_MARIE_VILLAFANA: 'southern district of Florida U.S. Attorney',
397
395
  DANNY_FROST: 'Director of Communications at Manhattan DA',
@@ -426,7 +424,7 @@ HIGHLIGHTED_NAMES = [
426
424
  HighlightedNames(
427
425
  label='modeling',
428
426
  style='pale_violet_red1',
429
- pattern=r'\w+@mc2mm.com|(Nicole\s*)?Junkerman',
427
+ pattern=r'\w+@mc2mm.com|model(ed|ing)|(Nicole\s*)?Junkerman',
430
428
  emailers = {
431
429
  'Abi Schwinck': 'MC2 Model Management (?)',
432
430
  DANIEL_SIAD: None,
@@ -458,7 +456,8 @@ HIGHLIGHTED_NAMES = [
458
456
  HighlightedNames(
459
457
  label='republicans',
460
458
  style='bold dark_red',
461
- pattern=r'Alberto\sGonzale[sz]|(Alex\s*)?Acosta|(Bill\s*)?Barr|Bill\s*Shine|(Bob\s*)?Corker|(John\s*(R.?\s*)?)Bolton|Broidy|(Chris\s)?Christie|Devin\s*Nunes|(Don\s*)?McGa[hn]n|McMaster|(George\s*)?Nader|GOP|(Brett\s*)?Kavanaugh|Kissinger|Kobach|Koch\s*Brothers|Kolfage|Kudlow|Lewandowski|(Marco\s)?Rubio|(Mark\s*)Meadows|Mattis|(?<!Merwin Dela )Cruz|(Michael\s)?Hayden|((General|Mike)\s*)?(Flynn|Pence)|(Mitt\s*)?Romney|Mnuchin|Nikki|Haley|(Paul\s+)?Manafort|(Peter\s)?Navarro|Pompeo|Reagan|Republican|(?<!Cynthia )(Richard\s*)?Nixon|Sasse|(Rex\s*)?Tillerson',
459
+ pattern=r'Alberto\sGonzale[sz]|(Alex\s*)?Acosta|(Bill\s*)?Barr|Bill\s*Shine|(Bob\s*)?Corker|(John\s*(R.?\s*)?)Bolton|Broidy|(Chris\s)?Christie|Devin\s*Nunes|(Don\s*)?McGa[hn]n|McMaster|(George\s*)?Nader|GOP|(Brett\s*)?Kavanaugh|Kissinger|Kobach|Koch\s*Brothers|Kolfage|Kudlow|Lewandowski|(Marco\s)?Rubio|(Mark\s*)Meadows|Mattis|(?<!Merwin Dela )Cruz|(Michael\s)?Hayden|((General|Mike)\s*)?(Flynn|Pence)|(Mitt\s*)?Romney|Mnuchin|Nikki|Haley|(Paul\s+)?Manafort|(Peter\s)?Navarro|Pompeo|Reagan|Reince|Priebus|Republican|(?<!Cynthia )(Richard\s*)?Nixon|Sasse|(Rex\s*)?Tillerson',
460
+ # There's no emails from these people, they're just here to automate the regex creation for both first + last names
462
461
  emailers = {
463
462
  RUDY_GIULIANI: 'disbarred formed mayor of New York City',
464
463
  TULSI_GABBARD: None,
@@ -475,7 +474,7 @@ HIGHLIGHTED_NAMES = [
475
474
  HighlightedNames(
476
475
  label='russia',
477
476
  style='red bold',
478
- pattern=r'Alfa\s*Bank|Anya\s*Rasulova|Chernobyl|Day\s+One\s+Ventures|(Dmitry\s)?(Kiselyov|(Lana\s*)?Pozhidaeva|Medvedev|Rybolo(o?l?ev|vlev))|Dmitry|FSB|GRU|KGB|Kislyak|Kremlin|Kuznetsova|Lavrov|Lukoil|Moscow|(Oleg\s*)?Deripaska|Oleksandr Vilkul|Rosneft|RT|St.?\s*?Petersburg|Russian?|Sberbank|Soviet(\s*Union)?|USSR|(Vladimir\s*)?(Putin|Yudashkin)|Women\s*Empowerment|Xitrans',
477
+ pattern=r'Alfa\s*Bank|Anya\s*Rasulova|Chernobyl|Day\s+One\s+Ventures|(Dmitry\s)?(Kiselyov|(Lana\s*)?Pozhidaeva|Medvedev|Rybolo(o?l?ev|vlev))|Dmitry|FSB|GRU|KGB|Kislyak|Kremlin|Kuznetsova|Lavrov|Lukoil|Moscow|(Oleg\s*)?Deripaska|Oleksandr Vilkul|Rosneft|RT|St.?\s*?Petersburg|Russian?|Sberbank|Soviet(\s*Union)?|USSR|Vladimir|(Vladimir\s*)?(Putin|Yudashkin)|Women\s*Empowerment|Xitrans',
479
478
  emailers = {
480
479
  MASHA_DROKOVA: 'silicon valley VC, former Putin Youth',
481
480
  RENATA_BOLOTOVA: 'former aspiring model, now fund manager at New York State Insurance Fund',
@@ -541,7 +540,7 @@ HIGHLIGHTED_NAMES = [
541
540
  HighlightedNames(
542
541
  label=VIRGIN_ISLANDS,
543
542
  style='sea_green1',
544
- pattern=r'Bahamas|Caribb?ean|Dominican\s*Republic|(Great|Little)\s*St.?\s*James|Haiti(an)?|(John\s*)deJongh(\s*Jr\.?)|(Kenneth E\. )?Mapp|Palm\s*Beach(?!\s*Post)|PBI|S(ain)?t.?\s*Thomas|USVI|VI|(The\s*)?Virgin\s*Islands(\s*Daily\s*News)?', # TODO: VI Daily News should be yellow but it's hard bc Daily News xists
543
+ pattern=r'Antigua|Bahamas|Caribb?ean|Dominican\s*Republic|(Great|Little)\s*St.?\s*James|Haiti(an)?|(John\s*)deJongh(\s*Jr\.?)|(Kenneth E\. )?Mapp|Palm\s*Beach(?!\s*Post)|PBI|S(ain)?t.?\s*Thomas|USVI|VI|(The\s*)?Virgin\s*Islands(\s*Daily\s*News)?', # TODO: VI Daily News should be yellow but it's hard bc Daily News xists
545
544
  emailers = {
546
545
  CECILE_DE_JONGH: f'First lady 2007-2015',
547
546
  STACEY_PLASKETT: 'non-voting member of Congress',
@@ -561,7 +560,7 @@ HIGHLIGHTED_NAMES = [
561
560
  HighlightedNames(
562
561
  label=STEVE_BANNON,
563
562
  style='color(58)',
564
- pattern=r'((Steve|Sean)\s*)?Bannon?',
563
+ pattern=r'((Steve|Sean)\s*)?Bannon?|(American\s*)?Dharma',
565
564
  ),
566
565
  HighlightedNames(
567
566
  emailers={STEVEN_HOFFENBERG: HEADER_ABBREVIATIONS['Hoffenberg']},
@@ -578,7 +577,18 @@ HIGHLIGHTED_NAMES = [
578
577
  HighlightedNames(emailers={PRINCE_ANDREW: 'British royal family'}, style='dodger_blue1'),
579
578
  HighlightedNames(emailers={SOON_YI_PREVIN: "wife of Woody Allen"}, style='hot_pink'),
580
579
  HighlightedNames(emailers={SULTAN_BIN_SULAYEM: 'CEO of DP World, chairman of ports in Dubai'}, style='green1'),
581
- HighlightedText(label='unknown', style='cyan', pattern=r'\(unknown\)'), # HighlightedText bc of word boundary issue
580
+
581
+ # HighlightedText not HighlightedNames bc of word boundary issue
582
+ HighlightedText(
583
+ label='unknown',
584
+ style='cyan',
585
+ pattern=r'\(unknown\)'
586
+ ),
587
+ HighlightedText(
588
+ label='phone_number',
589
+ style='bright_green',
590
+ pattern=r"\+?(1?\(?\d{3}\)?[- ]\d{3}[- ]\d{4}|\d{2}[- ]\(?0?\)?\d{2}[- ]\d{4}[- ]\d{4})|[\d+]{10,12}",
591
+ ),
582
592
  ]
583
593
 
584
594
  # Highlight regexes for things other than names, only used by RegexHighlighter pattern matching
@@ -593,11 +603,6 @@ HIGHLIGHTED_TEXTS = [
593
603
  style=f'{ARCHIVE_LINK_COLOR} underline',
594
604
  pattern=r"https?:[^\s]+",
595
605
  ),
596
- HighlightedText(
597
- label='phone_number',
598
- style='bright_green',
599
- pattern=r"\+?(1?\(?\d{3}\)?[- ]\d{3}[- ]\d{4}|\d{2}[- ]\(?0?\)?\d{2}[- ]\d{4}[- ]\d{4})|[\d+]{10,12}",
600
- ),
601
606
  HighlightedText(
602
607
  label='quoted_reply_line',
603
608
  style='dim',
@@ -29,6 +29,7 @@ LOG_THEME[f"{ReprHighlighter.base_style}epstein_filename"] = FILENAME_STYLE
29
29
  LOG_LEVEL_ENV_VAR = 'LOG_LEVEL'
30
30
 
31
31
 
32
+ # Augment the standard log highlighter with 'epstein_filename' matcher
32
33
  class LogHighlighter(ReprHighlighter):
33
34
  highlights = ReprHighlighter.highlights + [
34
35
  *[fr"(?P<{doc_type}>{doc_type})" for doc_type in DOC_TYPE_STYLES.keys()],
@@ -102,7 +102,7 @@ def print_emails(epstein_files: EpsteinFiles) -> int:
102
102
  epstein_files.print_email_device_info()
103
103
 
104
104
  if args.all_emails:
105
- _verify_all_emails_printed(epstein_files, already_printed_emails)
105
+ _verify_all_emails_were_printed(epstein_files, already_printed_emails)
106
106
 
107
107
  logger.warning(f"Rewrote {len(Email.rewritten_header_ids)} headers of {len(epstein_files.emails)} emails")
108
108
  return len(already_printed_emails)
@@ -146,31 +146,30 @@ def print_text_messages(epstein_files: EpsteinFiles) -> None:
146
146
 
147
147
 
148
148
  def write_urls() -> None:
149
+ """Write _URL style constant variables to a file bash scripts can load as env vars."""
149
150
  if args.output_file == 'index.html':
150
151
  logger.warning(f"Can't write env vars to '{args.output_file}', writing to '{URLS_ENV}' instead.\n")
151
152
  args.output_file = URLS_ENV
152
153
 
153
154
  url_vars = {
154
155
  k: v for k, v in vars(urls).items()
155
- if isinstance(v, str) and \
156
- k.split('_')[-1] in ['URL'] and \
157
- 'michelcrypt4d4mus' in v and \
158
- 'github.com' not in v and \
159
- 'BASE' not in v
156
+ if isinstance(v, str) and k.split('_')[-1] in ['URL'] and 'github.io' in v and 'BASE' not in k
160
157
  }
161
158
 
162
159
  with open(args.output_file, 'w') as f:
163
160
  for var_name, url in url_vars.items():
164
161
  key_value = f"{var_name}='{url}'"
165
- console.print(key_value, style='dim')
162
+
163
+ if not args.suppress_output:
164
+ console.print(key_value, style='dim')
165
+
166
166
  f.write(f"{key_value}\n")
167
167
 
168
168
  console.line()
169
169
  logger.warning(f"Wrote {len(url_vars)} URL variables to '{args.output_file}'\n")
170
170
 
171
171
 
172
-
173
- def _verify_all_emails_printed(epstein_files: EpsteinFiles, already_printed_emails: list[Email]) -> None:
172
+ def _verify_all_emails_were_printed(epstein_files: EpsteinFiles, already_printed_emails: list[Email]) -> None:
174
173
  """Log warnings if some emails were never printed."""
175
174
  email_ids_that_were_printed = set([email.file_id for email in already_printed_emails])
176
175
  logger.warning(f"Printed {len(already_printed_emails)} emails of {len(email_ids_that_were_printed)} unique file IDs.")
@@ -148,7 +148,12 @@ def print_color_key() -> None:
148
148
 
149
149
 
150
150
  def print_header(epstein_files: 'EpsteinFiles') -> None:
151
- console.print(f"This site isn't optimized for mobile but if you get past the header it should be readable.", style='dim')
151
+ not_optimized_msg = f"This site isn't optimized for mobile"
152
+
153
+ if not args.all_emails:
154
+ not_optimized_msg += f" but if you get past the header it should be readable"
155
+
156
+ console.print(f"{not_optimized_msg}.\n", style='dim')
152
157
  print_page_title(width=TITLE_WIDTH)
153
158
  print_other_site_link()
154
159
  _print_external_links()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: epstein-files
3
- Version: 1.0.2
3
+ Version: 1.0.3
4
4
  Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
5
5
  Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
6
6
  License: GPL-3.0-or-later
@@ -25,6 +25,7 @@ Requires-Dist: requests (>=2.32.5,<3.0.0)
25
25
  Requires-Dist: rich (>=14.2.0,<15.0.0)
26
26
  Project-URL: Emails, https://michelcrypt4d4mus.github.io/epstein_text_messages/all_emails_epstein_files_nov_2025.html
27
27
  Project-URL: Metadata, https://michelcrypt4d4mus.github.io/epstein_text_messages/file_metadata_epstein_files_nov_2025.json
28
+ Project-URL: Repository, https://github.com/michelcrypt4d4mus/epstein_text_messages
28
29
  Project-URL: TextMessages, https://michelcrypt4d4mus.github.io/epstein_text_messages
29
30
  Project-URL: WordCounts, https://michelcrypt4d4mus.github.io/epstein_text_messages/communication_word_count_epstein_files_nov_2025.html
30
31
  Description-Content-Type: text/markdown
@@ -46,11 +47,20 @@ Description-Content-Type: text/markdown
46
47
  You need to set the `DOCS_DIR` environment variable with the path to the folder of files you just downloaded when running. You can either create a `.env` file modeled on [`.env.example`](./.env.example) (which will set it permanently) or you can run with:
47
48
 
48
49
  ```bash
49
- DOCS_DIR=/path/to/epstein/ocr_txt_files ./generate.py
50
+ # Generate color highlighted texts/emails/other files
51
+ DOCS_DIR=/path/to/epstein/ocr_txt_files epstein_generate
52
+
53
+ # Search
54
+ DOCS_DIR=/path/to/epstein/ocr_txt_files epstein_search Bannon
55
+
56
+ # Show a color highlighted file
57
+ DOCS_DIR=/path/to/epstein/ocr_txt_files epstein_show 030999
58
+ # This also works
59
+ DOCS_DIR=/path/to/epstein/ocr_txt_files epstein_show HOUSE_OVERSIGHT_030999
50
60
  ```
51
61
 
52
- Run `./generate.py --help` for command line option assistance. Look in the [scripts](./scripts/) folder for various scripts.
53
- The first time you run anything it will take a few minutes to fix all the data, attribute the redacted emails, etc. Once you've run things once you can run the `./generate.py --pickled` to load the cached fixed up data and things will be quick.
62
+ Run `epstein_generate --help` for command line option assistance.
63
+ The first time you run anything it will take a few minutes to fix all the data, attribute the redacted emails, etc. Once you've run things once you can run the `epstein_generate --pickled` to load the cached fixed up data and things will be quick.
54
64
 
55
65
  #### As A Library
56
66
  ```python
@@ -0,0 +1,33 @@
1
+ epstein_files/__init__.py,sha256=ovNZSEnsLnGahLlDZIai9jC6ZWo7RFd79KstrOGwJak,4861
2
+ epstein_files/documents/communication.py,sha256=SunZdjMhR9v6y8LlQ6jhIu8vYjSndaBK0Su1mKnhfj0,2060
3
+ epstein_files/documents/document.py,sha256=s6k3qqZ9pnAWBqkm45o1T4nTIUth1No2jxhiYTF3jpI,16732
4
+ epstein_files/documents/email.py,sha256=QRlOE3OIu_o4AU7o3XuBMzVTTV3OZCajEXpbIZUdsck,38211
5
+ epstein_files/documents/emails/email_header.py,sha256=wkPfSLbmzkAeQwvhf0bAeFDLPbQT-EeG0v8vNNLYktM,7502
6
+ epstein_files/documents/imessage/text_message.py,sha256=wfWPQhwGG5Yzyhbr1NAQAY0bzRjjqVZmh8SPl48XmAM,3025
7
+ epstein_files/documents/json_file.py,sha256=Vf9iHiEhUqk-hEh6lGZX_mJNWZUUQWeT-UI3FhKW0hw,1101
8
+ epstein_files/documents/messenger_log.py,sha256=rOSy5yMerYBQ2r_o4SvuJ7Oeu-KhZVIIlh_Csb6uar0,5860
9
+ epstein_files/documents/other_file.py,sha256=aD1nFkwsQ1eKb9Li1xfqEGGbqhkQlpR-6vIt5GHMyk8,9087
10
+ epstein_files/epstein_files.py,sha256=XLyit5LmICnP44XoQWHUgMpRLS1-QgJZDaxO15qUnsI,18349
11
+ epstein_files/util/constant/common_words.py,sha256=aR0UjoWmxyR49XS-DtHECQ1CiA_bK8hNP6CQ1TS9yZA,3696
12
+ epstein_files/util/constant/html.py,sha256=9U098TGzlghGg4WfxLYHyub5JGR17Dv7VP5i2MSu8Kk,1415
13
+ epstein_files/util/constant/names.py,sha256=iwnFqIceffYyLj7AaCsh3uve4Y0WbsZvHukjA1aFJWY,10104
14
+ epstein_files/util/constant/output_files.py,sha256=B2qEXfNI_gT2Vp5HGSld2xM0PfeZ27j65HNymSmyzX4,974
15
+ epstein_files/util/constant/strings.py,sha256=RBWJQnOXqBba8TwmjpvtkY8Jt54aFt4LYajoR8RanDk,1917
16
+ epstein_files/util/constant/urls.py,sha256=2AM7AUqJQcNjCLrB6bKdh2sMtBOn7u8a8mwNq6HC0Zk,5621
17
+ epstein_files/util/constants.py,sha256=GxI7RcnRgR8rDH4-dPrLlUH7q6mnhg5oK7ks56ZmpkA,110796
18
+ epstein_files/util/data.py,sha256=xwTqrbAi7ZDJM0iyFVOevnokP_oIQ2npkRjHzF1KGGY,2908
19
+ epstein_files/util/doc_cfg.py,sha256=I-n7_77hcA5DHGcqXISEjXOiTjYnTCLS7E16PUYw2kw,9736
20
+ epstein_files/util/env.py,sha256=mvYy3Kj576aULU6E7NPuN-5ZEVOn0Zb4HAwR8VOmUMo,4839
21
+ epstein_files/util/file_helper.py,sha256=v_bE10MHEcXti9DVJo4WqyOsG83Xrv05S3Vc70cYJkk,3082
22
+ epstein_files/util/highlighted_group.py,sha256=He6LDIhcT_YZyAkGHcOR-T_znACoEyNznKWOKH4lA40,35564
23
+ epstein_files/util/logging.py,sha256=b9iCTQkpAxu_3HphM5wm7VuPbWj9tocTuyUvMUTaI5A,2137
24
+ epstein_files/util/output.py,sha256=k4HrOKvP6Os0-LrxVRiNKk7Pw9M3RxrlYP2Eql7TFIA,7366
25
+ epstein_files/util/rich.py,sha256=x81QievKngC0oxsCpHRxKhjFstZ5hBVx91nFy63g_RQ,13709
26
+ epstein_files/util/search_result.py,sha256=1fxe0KPBQXBk4dLfu6m0QXIzYfZCzvaSkWqvghJGzxY,567
27
+ epstein_files/util/timer.py,sha256=8hxW4Y1JcTUfnBrHh7sL2pM9xu1sL4HFQM4CmmzTarU,837
28
+ epstein_files/util/word_count.py,sha256=XTINgLm01jFQlNgdiLCcVFCodXAIb1dNbaAvznoRb1o,6757
29
+ epstein_files-1.0.3.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
30
+ epstein_files-1.0.3.dist-info/METADATA,sha256=ZMzrOatPsGFecuYFiEGpvEHXBp9dh5GNzLO1NOuY_RM,4997
31
+ epstein_files-1.0.3.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
32
+ epstein_files-1.0.3.dist-info/entry_points.txt,sha256=EV9qTh_n9X_1MOiQnxG5hM6R5v0rfi8W4QE-bsZkw3o,238
33
+ epstein_files-1.0.3.dist-info/RECORD,,
@@ -1,33 +0,0 @@
1
- epstein_files/__init__.py,sha256=N4-A81KlSNWXyliBsjooi6GZr9_2qIB08qTG5RE9GzA,4725
2
- epstein_files/documents/communication.py,sha256=SunZdjMhR9v6y8LlQ6jhIu8vYjSndaBK0Su1mKnhfj0,2060
3
- epstein_files/documents/document.py,sha256=MOTS6AZFNOqnzpvYLXvoJC05ZVL9lTZnsRc-qjAjcJ4,16515
4
- epstein_files/documents/email.py,sha256=IpKeOuLTmHWowBvUUEp-tyTC8pwEmXg3XgLJkplQZWg,37717
5
- epstein_files/documents/emails/email_header.py,sha256=wkPfSLbmzkAeQwvhf0bAeFDLPbQT-EeG0v8vNNLYktM,7502
6
- epstein_files/documents/imessage/text_message.py,sha256=DMdd__L7UPad0YS49MJf_3bTVyE1BLfWafQbDQierC8,3272
7
- epstein_files/documents/json_file.py,sha256=Vf9iHiEhUqk-hEh6lGZX_mJNWZUUQWeT-UI3FhKW0hw,1101
8
- epstein_files/documents/messenger_log.py,sha256=-hSMFH_dyrjGLdph4SU2xQK4RpXOhkR3R_nqRrz3e1w,5620
9
- epstein_files/documents/other_file.py,sha256=JbKDtNrQ1Ua5vGPlZayON5Kgy0oJ-gHcdO9l9Iv9wRo,8930
10
- epstein_files/epstein_files.py,sha256=NpgQaxM3cC8CsAbzCyysakMbPdASWAt-wOhPZ879ZyQ,18018
11
- epstein_files/util/constant/common_words.py,sha256=aR0UjoWmxyR49XS-DtHECQ1CiA_bK8hNP6CQ1TS9yZA,3696
12
- epstein_files/util/constant/html.py,sha256=9U098TGzlghGg4WfxLYHyub5JGR17Dv7VP5i2MSu8Kk,1415
13
- epstein_files/util/constant/names.py,sha256=g0s9NA4zIexDGG7u5x0KDpLB9fyrYaYatsBhf_lH688,10253
14
- epstein_files/util/constant/output_files.py,sha256=B2qEXfNI_gT2Vp5HGSld2xM0PfeZ27j65HNymSmyzX4,974
15
- epstein_files/util/constant/strings.py,sha256=3JTqD0luJrC3NbGXn4q6P-gIaaNVx36P1oCmp92gAoM,1750
16
- epstein_files/util/constant/urls.py,sha256=x7Lv8yNNNLGU3GCvG4YbJ4qX3s_hiXffYuYUCjXyfbg,5526
17
- epstein_files/util/constants.py,sha256=xME94iH9a4b95N4yABs20Gn3Tu3cwmx5kNE_fPhsJEM,103420
18
- epstein_files/util/data.py,sha256=P4D_ggNNyScpTnu9wow8-67BlZtAXFKulJ5zbGtBR9A,2907
19
- epstein_files/util/doc_cfg.py,sha256=6H5EFLxG0ABG4BJHIEL7PSMBVwkcyjH1vvmitSQRa48,9615
20
- epstein_files/util/env.py,sha256=A2hEVg1HYymGd2odrLLo6k7yIvu0hh1XZniUW2u21dM,4734
21
- epstein_files/util/file_helper.py,sha256=v_bE10MHEcXti9DVJo4WqyOsG83Xrv05S3Vc70cYJkk,3082
22
- epstein_files/util/highlighted_group.py,sha256=7MfES52q10eq35L93iTIr_v5v0bamdQKCf7hlDIy7O8,35196
23
- epstein_files/util/logging.py,sha256=GjmOYiWAF1R_0Dvb5kXHAgPH5UJs-_gGcRig7LEDDL0,2066
24
- epstein_files/util/output.py,sha256=YH_-1YyNxiO29N88jvSAePbK5n25tgEIDFyP3sNNnnI,7309
25
- epstein_files/util/rich.py,sha256=gvSbWc-PQC5cAUg8zn02yZQkU57ExG1ArRLMIPrVxOc,13597
26
- epstein_files/util/search_result.py,sha256=1fxe0KPBQXBk4dLfu6m0QXIzYfZCzvaSkWqvghJGzxY,567
27
- epstein_files/util/timer.py,sha256=8hxW4Y1JcTUfnBrHh7sL2pM9xu1sL4HFQM4CmmzTarU,837
28
- epstein_files/util/word_count.py,sha256=XTINgLm01jFQlNgdiLCcVFCodXAIb1dNbaAvznoRb1o,6757
29
- epstein_files-1.0.2.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
30
- epstein_files-1.0.2.dist-info/METADATA,sha256=Wf4rPDzdGRxogbs0H8cZoFb7HsRTrkPvkYHdClbg74c,4653
31
- epstein_files-1.0.2.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
32
- epstein_files-1.0.2.dist-info/entry_points.txt,sha256=EV9qTh_n9X_1MOiQnxG5hM6R5v0rfi8W4QE-bsZkw3o,238
33
- epstein_files-1.0.2.dist-info/RECORD,,