epstein-files 1.2.1__py3-none-any.whl → 1.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
epstein_files/person.py CHANGED
@@ -120,7 +120,7 @@ class Person:
120
120
  else:
121
121
  email_count = len(self.unique_emails())
122
122
  num_days = self.email_conversation_length_in_days()
123
- title_suffix = f"to/from {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
123
+ title_suffix = f"{TO_FROM} {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
124
124
 
125
125
  title = f"Found {email_count} emails {title_suffix}"
126
126
  width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category()) + 8)
@@ -136,8 +136,12 @@ class Person:
136
136
  highlight_group = self.highlight_group()
137
137
 
138
138
  if highlight_group and isinstance(highlight_group, HighlightedNames) and self.name:
139
- return highlight_group.info_for(self.name)
140
- elif self.is_uninteresting_cc:
139
+ info = highlight_group.info_for(self.name)
140
+
141
+ if info:
142
+ return info
143
+
144
+ if self.is_uninteresting_cc:
141
145
  if self.has_any_epstein_emails():
142
146
  return UNINTERESTING_CC_INFO
143
147
  else:
@@ -152,7 +156,7 @@ class Person:
152
156
  elif self.name is None:
153
157
  return Text('(emails whose author or recipient could not be determined)', style=ALT_INFO_STYLE)
154
158
  elif self.category() == JUNK:
155
- return Text(f"({JUNK} mail)", style='tan dim')
159
+ return Text(f"({JUNK} mail)", style='bright_black dim')
156
160
  elif self.is_uninteresting_cc and (self.info_str() or '').startswith(UNINTERESTING_CC_INFO):
157
161
  if self.info_str() == UNINTERESTING_CC_INFO:
158
162
  return Text(f"({self.info_str()})", style='wheat4 dim')
@@ -168,7 +172,11 @@ class Person:
168
172
  else:
169
173
  return None
170
174
  else:
171
- return Text(self.info_str())
175
+ return Text(self.info_str(), style=self.style())
176
+
177
+ def internal_link(self) -> Text:
178
+ """Kind of like an anchor link to the section of the page containing these emails."""
179
+ return link_text_obj(internal_link_to_emails(self.name_str()), self.name_str(), style=self.style())
172
180
 
173
181
  def is_a_mystery(self) -> bool:
174
182
  """Return True if this is someone we theroetically could know more about."""
@@ -237,7 +245,13 @@ class Person:
237
245
  console.line()
238
246
 
239
247
  def sort_key(self) -> list[int | str]:
240
- counts = [len(self.unique_emails()), int(self.has_any_epstein_emails())]
248
+ counts = [
249
+ len(self.unique_emails()),
250
+ -1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
251
+ -1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO),
252
+ int(self.has_any_epstein_emails()),
253
+ ]
254
+
241
255
  counts = [-1 * count for count in counts]
242
256
 
243
257
  if args.sort_alphabetical:
@@ -276,6 +290,11 @@ class Person:
276
290
  highlighted = highlighted or people
277
291
  highlighted_names = [p.name for p in highlighted]
278
292
  is_selection = len(people) != len(highlighted) or args.emailers_info
293
+ all_emails = Document.uniquify(flatten([list(p.unique_emails()) for p in people]))
294
+ email_authors = [p for p in people if p.emails_by() and p.name]
295
+ attributed_emails = [email for email in all_emails if email.author]
296
+ footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}" \
297
+ f" out of {len(all_emails):,} emails, {len(all_emails) - len(attributed_emails)} still unknown)"
279
298
 
280
299
  if is_selection:
281
300
  title = Text(f"{EMAILER_INFO_TITLE} in This Order for the Highlighted Names (", style=TABLE_TITLE_STYLE)
@@ -283,7 +302,7 @@ class Person:
283
302
  else:
284
303
  title = f"{EMAILER_INFO_TITLE} in Chronological Order Based on Timestamp of First Email"
285
304
 
286
- table = build_table(title)
305
+ table = build_table(title, caption=footer)
287
306
  table.add_column('First')
288
307
  table.add_column('Name', max_width=24, no_wrap=True)
289
308
  table.add_column('Category', justify='left', style='dim italic')
@@ -298,6 +317,7 @@ class Person:
298
317
 
299
318
  for person in people:
300
319
  earliest_email_date = person.earliest_email_date()
320
+ is_on_page = False if show_epstein_total else person.name in highlighted_names
301
321
  year_months = (earliest_email_date.year * 12) + earliest_email_date.month
302
322
 
303
323
  # Color year rollovers more brightly
@@ -311,14 +331,14 @@ class Person:
311
331
 
312
332
  table.add_row(
313
333
  Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[0 if is_selection else grey_idx]}"),
314
- person.name_txt(), # TODO: make link?
334
+ person.internal_link() if is_on_page and not person.is_uninteresting_cc else person.name_txt(),
315
335
  person.category_txt(),
316
336
  f"{len(person.unique_emails() if show_epstein_total else person._unique_printable_emails())}",
317
337
  Text(f"{len(person.unique_emails_by())}", style='dim' if len(person.unique_emails_by()) == 0 else ''),
318
338
  Text(f"{len(person.unique_emails_to())}", style='dim' if len(person.unique_emails_to()) == 0 else ''),
319
339
  f"{person.email_conversation_length_in_days()}",
320
340
  person.info_txt() or '',
321
- style='' if person.name in highlighted_names else 'dim',
341
+ style='' if show_epstein_total or is_on_page else 'dim',
322
342
  )
323
343
 
324
344
  return table
@@ -61,6 +61,7 @@ DIANE_ZIMAN = 'Diane Ziman'
61
61
  DONALD_TRUMP = 'Donald Trump'
62
62
  EDUARDO_ROBLES = 'Eduardo Robles'
63
63
  EDWARD_JAY_EPSTEIN = 'Edward Jay Epstein'
64
+ EDWARD_ROD_LARSEN = 'Edward Rod Larsen'
64
65
  EHUD_BARAK = 'Ehud Barak'
65
66
  ERIC_ROTH = 'Eric Roth'
66
67
  FAITH_KATES = 'Faith Kates'
@@ -129,6 +130,7 @@ MOSHE_HOFFMAN = 'Moshe Hoffman'
129
130
  NADIA_MARCINKO = 'Nadia Marcinko'
130
131
  NEAL_KASSELL = 'Neal Kassell'
131
132
  NICHOLAS_RIBIS = 'Nicholas Ribis'
133
+ NILI_PRIELL_BARAK = 'Nili Priell Barak'
132
134
  NOAM_CHOMSKY = 'Noam Chomsky'
133
135
  NORMAN_D_RAU = 'Norman D. Rau'
134
136
  OLIVIER_COLOM = 'Olivier Colom'
@@ -215,13 +217,13 @@ UBS = 'UBS'
215
217
  # First and last names that should be made part of a highlighting regex for emailers
216
218
  NAMES_TO_NOT_HIGHLIGHT = """
217
219
  al alain alan alfredo allen alex alexander amanda andres andrew anthony
218
- bard barrett barry bennet bill black bob boris brad bruce
220
+ bard barrett barry bennet bernard bill black bob boris brad brenner bruce
219
221
  caroline carolyn chris christina cohen
220
- dan daniel danny darren dave david donald
222
+ dan daniel danny darren dave david debbie donald
221
223
  ed edward edwards enforcement enterprise enterprises entourage epstein eric erika etienne
222
- faith forget fred friendly frost fuller
223
- gerald george gold gordon
224
- haddad harry hay heather henry hill hoffman
224
+ faith fisher forget fred friendly frost fuller
225
+ gates gerald george gold gordon
226
+ haddad harry hay heather henry hill hoffman howard
225
227
  ian ivan
226
228
  jack james jay jean jeff jeffrey jennifer jeremy jessica joel john jon jonathan joseph jr
227
229
  kahn karl kate katherine kelly ken kevin krassner
@@ -230,7 +232,7 @@ NAMES_TO_NOT_HIGHLIGHT = """
230
232
  nancy neal new nicole norman
231
233
  owen
232
234
  paul paula pen peter philip prince
233
- randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubin
235
+ randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubenstein rubin
234
236
  scott sean skip stanley stern stephen steve steven stone susan
235
237
  the thomas tim tom tony tyler
236
238
  victor
@@ -243,7 +245,7 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
243
245
  aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
244
246
  baldwin barack barrett ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
245
247
  chapman charles charlie christopher clint cohen colin collins conway
246
- davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
248
+ davis dean debbie debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
247
249
  edmond elizabeth emily entwistle erik evelyn
248
250
  ferguson flachsbart francis franco frank
249
251
  gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth
@@ -1,6 +1,7 @@
1
1
  from pathlib import Path
2
2
 
3
3
  from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
4
+ from epstein_files.util.logging import logger
4
5
 
5
6
  # Files output by the code
6
7
  HTML_DIR = Path('docs')
@@ -16,9 +17,10 @@ URLS_ENV = '.urls.env'
16
17
  EMAILERS_TABLE_PNG_PATH = HTML_DIR.joinpath('emailers_info_table.png')
17
18
 
18
19
  # Deployment URLS
19
- # NOTE: don't rename these variables without changing deploy.sh!
20
+ # NOTE: don't rename these variables without changing deploy.sh
21
+ GH_REPO_NAME = 'epstein_text_messages'
20
22
  GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
21
- TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/epstein_text_messages"
23
+ TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/{GH_REPO_NAME}"
22
24
  ALL_EMAILS_URL = f"{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}"
23
25
  CHRONOLOGICAL_EMAILS_URL = f"{TEXT_MSGS_URL}/{CHRONOLOGICAL_EMAILS_PATH.name}"
24
26
  JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
@@ -44,6 +46,7 @@ BUILD_ARTIFACTS = [
44
46
  def make_clean() -> None:
45
47
  """Delete all build artifacts."""
46
48
  for build_file in BUILD_ARTIFACTS:
47
- if build_file.exists():
48
- print(f"Removing build file '{build_file}'...")
49
- build_file.unlink()
49
+ for file in [build_file, Path(f"{build_file}.txt")]:
50
+ if file.exists():
51
+ logger.warning(f"Removing build file '{file}'...")
52
+ file.unlink()
@@ -64,7 +64,8 @@ REDACTED = '<REDACTED>'
64
64
  QUESTION_MARKS = '(???)'
65
65
 
66
66
  # Regexes
67
- FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}(\d{{6}}(_\d{{1,2}})?)")
67
+ ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
68
+ FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({ID_REGEX.pattern})")
68
69
  FILE_NAME_REGEX = re.compile(fr"{FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
69
70
  QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
70
71
 
@@ -6,6 +6,7 @@ from inflection import parameterize
6
6
  from rich.text import Text
7
7
 
8
8
  from epstein_files.util.constant.output_files import *
9
+ from epstein_files.util.constant.strings import remove_question_marks
9
10
  from epstein_files.util.env import args
10
11
  from epstein_files.util.file_helper import coerce_file_stem
11
12
 
@@ -22,10 +23,11 @@ JMAIL = 'Jmail'
22
23
  ROLLCALL = 'RollCall'
23
24
  TWITTER = 'search X'
24
25
 
25
- GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
26
+ GH_PROJECT_URL = f'https://github.com/michelcrypt4d4mus/{GH_REPO_NAME}'
26
27
  GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
27
28
  ATTRIBUTIONS_URL = f'{GH_MASTER_URL}/epstein_files/util/constants.py'
28
29
  EXTRACTS_BASE_URL = f'{GH_MASTER_URL}/emails_extracted_from_legal_filings'
30
+ TO_FROM = 'to/from'
29
31
 
30
32
  extracted_file_url = lambda f: f"{EXTRACTS_BASE_URL}/{f}"
31
33
 
@@ -72,7 +74,6 @@ rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL],
72
74
  search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
73
75
  search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
74
76
 
75
-
76
77
  PERSON_LINK_BUILDERS: dict[ExternalSite, Callable[[str], str]] = {
77
78
  EPSTEIN_MEDIA: epstein_media_person_url,
78
79
  EPSTEIN_WEB: epstein_web_person_url,
@@ -98,6 +99,12 @@ def external_doc_link_txt(site: ExternalSite, filename_or_id: int | str, style:
98
99
  return Text.from_markup(external_doc_link_markup(site, filename_or_id, style))
99
100
 
100
101
 
102
+ def internal_link_to_emails(name: str) -> str:
103
+ """e.g. https://michelcrypt4d4mus.github.io/epstein_text_messages/all_emails_epstein_files_nov_2025.html#:~:text=to%2Ffrom%20Jack%20Goldberger"""
104
+ search_term = urllib.parse.quote(f"{TO_FROM} {remove_question_marks(name)}")
105
+ return f"{this_site_url()}#:~:text={search_term}"
106
+
107
+
101
108
  def link_markup(
102
109
  url: str,
103
110
  link_text: str | None = None,
@@ -121,6 +128,10 @@ def other_site_url() -> str:
121
128
  return SITE_URLS[other_site_type()]
122
129
 
123
130
 
131
+ def this_site_url() -> str:
132
+ return SITE_URLS[EMAIL if other_site_type() == TEXT_MESSAGE else TEXT_MESSAGE]
133
+
134
+
124
135
  CRYPTADAMUS_TWITTER = link_markup('https://x.com/cryptadamist', '@cryptadamist')
125
136
  THE_OTHER_PAGE_MARKUP = link_markup(other_site_url(), 'the other page', style='light_slate_grey bold')
126
137
  THE_OTHER_PAGE_TXT = Text.from_markup(THE_OTHER_PAGE_MARKUP)
@@ -39,6 +39,7 @@ HEADER_ABBREVIATIONS = {
39
39
  'MBZ': "Mohamed bin Zayed Al Nahyan (Emirates sheikh)",
40
40
  "Miro": MIROSLAV_LAJCAK,
41
41
  "Mooch": "Anthony 'The Mooch' Scaramucci (Skybridge crypto bro)",
42
+ "NPA": 'non-prosecution agreement',
42
43
  "Terje": TERJE_ROD_LARSEN,
43
44
  "VI": f"U.S. {VIRGIN_ISLANDS}",
44
45
  "Woody": "Woody Allen",
@@ -52,14 +53,14 @@ HEADER_ABBREVIATIONS = {
52
53
 
53
54
  # Emailers
54
55
  EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
55
- ALAN_DERSHOWITZ: re.compile(r'(alan.{1,7})?dershowi(lz?|tz)|AlanDersh', re.IGNORECASE),
56
+ ALAN_DERSHOWITZ: re.compile(r'(alan.{1,7})?dershowi(lz?|t?z)|AlanDersh', re.IGNORECASE),
56
57
  ALIREZA_ITTIHADIEH: re.compile(r'Alireza.[Il]ttihadieh', re.IGNORECASE),
57
58
  AMANDA_ENS: re.compile(r'ens, amanda?|Amanda.Ens', re.IGNORECASE),
58
59
  ANAS_ALRASHEED: re.compile(r'anas\s*al\s*rashee[cd]', re.IGNORECASE),
59
60
  ANIL_AMBANI: re.compile(r'Anil.Ambani', re.IGNORECASE),
60
61
  ANN_MARIE_VILLAFANA: re.compile(r'Villafana, Ann Marie|(A(\.|nn) Marie )?Villafa(c|n|ri)a', re.IGNORECASE),
61
62
  ANTHONY_SCARAMUCCI: re.compile(r"mooch|(Anthony ('The Mooch' )?)?Scaramucci", re.IGNORECASE),
62
- ARIANE_DE_ROTHSCHILD: re.compile(r'AdeR|((Ariane|Edmond) de )?Rothschild|Ariane', re.IGNORECASE),
63
+ ARIANE_DE_ROTHSCHILD: re.compile(r'AdeR|((Ariane|Edmond) (de )?)?Roths(ch|hc)?ild|Ariane', re.IGNORECASE),
63
64
  BARBRO_C_EHNBOM: re.compile(r'behnbom@aol.com|(Barbro\s.*)?Ehnbom', re.IGNORECASE),
64
65
  BARRY_J_COHEN: re.compile(r'barry\s*((j.?|james)\s*)?cohen?', re.IGNORECASE),
65
66
  BENNET_MOSKOWITZ: re.compile(r'Moskowitz.*Bennet|Bennet.*Moskowitz', re.IGNORECASE),
@@ -86,7 +87,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
86
87
  JABOR_Y: re.compile(r'[ji]abor\s*y?', re.IGNORECASE),
87
88
  JAMES_HILL: re.compile(r"hill, james e.|james.e.hill@abc.com", re.IGNORECASE),
88
89
  JANUSZ_BANASIAK: re.compile(r"Janu[is]z Banasiak", re.IGNORECASE),
89
- JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?', re.IGNORECASE),
90
+ JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?|JeanLuc', re.IGNORECASE),
90
91
  JEFF_FULLER: re.compile(r"jeff@mc2mm.com|Jeff Fuller", re.IGNORECASE),
91
92
  JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeffrey E((sp|ps)tein?)?( VI Foundation)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!(Mark L.|ard Jay) )Epstein', re.IGNORECASE),
92
93
  JESSICA_CADWELL: re.compile(r'Jessica Cadwell?', re.IGNORECASE),
@@ -104,7 +105,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
104
105
  LISA_NEW: re.compile(r'E?Lisa New?\b', re.IGNORECASE),
105
106
  MANUELA_MARTINEZ: re.compile(fr'Manuela (- Mega Partners|Martinez)', re.IGNORECASE),
106
107
  MARIANA_IDZKOWSKA: re.compile(r'Mariana [Il]d[źi]kowska?', re.IGNORECASE),
107
- MARK_EPSTEIN: re.compile(r'Mark (L\. )?Epstein', re.IGNORECASE),
108
+ MARK_EPSTEIN: re.compile(r'Mark (L\. )?(Epstein|Lloyd)', re.IGNORECASE),
108
109
  MARC_LEON: re.compile(r'Marc[.\s]+(Kensington|Leon)|Kensington2', re.IGNORECASE),
109
110
  MARTIN_NOWAK: re.compile(r'(Martin.*?)?No[vw]ak|Nowak, Martin', re.IGNORECASE),
110
111
  MARTIN_WEINBERG: re.compile(r'martin.*?weinberg', re.IGNORECASE),
@@ -113,7 +114,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
113
114
  MICHAEL_BUCHHOLTZ: re.compile(r'Michael.*Buchholtz', re.IGNORECASE),
114
115
  MICHAEL_MILLER: re.compile(r'Micha(el)? Miller|Miller, Micha(el)?', re.IGNORECASE),
115
116
  MICHAEL_SITRICK: re.compile(r'(Mi(chael|ke).{0,5})?[CS]itrick', re.IGNORECASE),
116
- MICHAEL_WOLFF: re.compile(r'Michael\s*Wol(f[ef]|i)|Wolff', re.IGNORECASE),
117
+ MICHAEL_WOLFF: re.compile(r'Michael\s*Wol(f[ef]e?|i)|Wolff', re.IGNORECASE),
117
118
  MIROSLAV_LAJCAK: re.compile(r"Miro(slav)?(\s+Laj[cč][aá]k)?"),
118
119
  MOHAMED_WAHEED_HASSAN: re.compile(r'Mohamed Waheed(\s+Hassan)?', re.IGNORECASE),
119
120
  NADIA_MARCINKO: re.compile(r"Na[dď]i?a\s+Marcinko(v[aá])?", re.IGNORECASE),
@@ -195,6 +196,7 @@ EMAILERS = [
195
196
  'Peter Aldhous',
196
197
  'Peter Green',
197
198
  ROGER_SCHANK,
199
+ 'Roy Black',
198
200
  STEVEN_PFEIFFER,
199
201
  'Steven Victor MD',
200
202
  'Susan Edelman',
@@ -513,7 +515,7 @@ EMAILS_CONFIG = [
513
515
  recipients=['George Krassner', 'Nick Kazan', 'Mrisman02', 'Rebecca Risman', 'Linda W. Grossman'],
514
516
  duplicate_ids=['031973']
515
517
  ),
516
- EmailCfg(id='032457', author=PAUL_KRASSNER), # Bad OCR (nofix)
518
+ EmailCfg(id='032457', author=PAUL_KRASSNER, recipients=[JEFFREY_EPSTEIN, 'Nancy Cain']), # Bad OCR (nofix)
517
519
  EmailCfg(id='029981', author=PAULA, attribution_reason='Name in reply + opera reference (Fisher now works in opera)'),
518
520
  EmailCfg(id='030482', author=PAULA, attribution_reason=PAULA_REASON),
519
521
  EmailCfg(id='033383', author=PAUL_PROSPERI, attribution_reason='Reply'),
@@ -643,7 +645,16 @@ EMAILS_CONFIG = [
643
645
  EmailCfg(id='029344', actual_text='I thought of you when I read this article. Was this your idea? Alan'),
644
646
  EmailCfg(id='032358', actual_text=REDACTED), # Completely redacted
645
647
  EmailCfg(id='033050', actual_text='schwartman'),
648
+ EmailCfg(id='031036', description=f"{BARBRO_C_EHNBOM} related donation and Swedish girls discussion"),
646
649
  EmailCfg(id='022219', description="discussion of attempts to clean up Epstein's Google search results"),
650
+ EmailCfg(id='030648', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
651
+ EmailCfg(id='030762', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
652
+ EmailCfg(id='030649', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
653
+ EmailCfg(id='026026', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
654
+ EmailCfg(id='026030', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
655
+ EmailCfg(id='026033', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
656
+ EmailCfg(id='031320', description=f"Epstein and {RICHARD_KAHN} appear to be discussing routing donatings through {PEGGY_SIEGAL}"),
657
+ EmailCfg(id='016693', description='signed "MM"'),
647
658
  EmailCfg(id='028524', is_fwded_article=True, description='Zach Braff op-ed on Woody Allen in NYT'),
648
659
  EmailCfg(id='031333', is_fwded_article=True, description='Fort Knox conspiracy theory, looks like a Russian disinfo article'), # Russia Says IMF Chief Jailed For Discovering All US Gold is Gone
649
660
  EmailCfg(id='031335', is_fwded_article=True, description='Fort Knox conspiracy theory, looks like a Russian disinfo article'), # DOMINQUE STRAUSS-KAHN ARRESTED, NOT BECAUSE HE RAPED A MAID, BUT BECAUSE HE HAD EVIDENCE US HAS NO GOLD IN FORT KNOX.
@@ -661,6 +672,9 @@ EMAILS_CONFIG = [
661
672
  EmailCfg(id='032023', is_fwded_article=True, duplicate_ids=['032012']), # American-Israeli Cooperative Enterprise Newsletter
662
673
  EmailCfg(id='021758', is_fwded_article=True, duplicate_ids=['030616']), # Radar Online article about Epstein's early prison release
663
674
  EmailCfg(id='033297', is_fwded_article=True, duplicate_ids=['033586']), # Sultan Sulayem fwding article about Trump and Russia
675
+ EmailCfg(id='026829', is_fwded_article=True), # Taxes
676
+ EmailCfg(id='020443', is_fwded_article=True), # WSJ Deplorables Bannon
677
+ EmailCfg(id='030372', is_fwded_article=True), # Bannon China Iran
664
678
  EmailCfg(id='030983', is_fwded_article=True), # Power Line blog Alex Acosta and Jeffrey Epstein Plea Deal Analysis
665
679
  EmailCfg(id='031774', is_fwded_article=True), # Krassner fwd of Palmer Report article
666
680
  EmailCfg(id='033345', is_fwded_article=True), # Krassner fwd of Palmer Report article
@@ -711,6 +725,8 @@ EMAILS_CONFIG = [
711
725
  EmailCfg(id='031340', is_fwded_article=True), # Article about Alex Jones threatening Robert Mueller
712
726
  EmailCfg(id='030209', is_fwded_article=True), # Atlantic Council Syria: Blackberry Diplomacy
713
727
  EmailCfg(id='026605', is_fwded_article=True), # Article about Ruemmler turning down attorney general job by NEDRA PICKLER
728
+ EmailCfg(id='031990', is_fwded_article=True), # newsmax on ken starr
729
+ EmailCfg(id='029433', is_fwded_article=True), # Estate Planning After the Enactment of the Tax Cuts and Jobs Act
714
730
  EmailCfg(id='032475', timestamp=parse('2017-02-15 13:31:25')),
715
731
  EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
716
732
 
@@ -733,12 +749,12 @@ EMAILS_CONFIG = [
733
749
  EmailCfg(id='033512', duplicate_ids=['033361']),
734
750
  EmailCfg(id='030299', duplicate_ids=['021794']),
735
751
  EmailCfg(id='033575', duplicate_ids=['012898']),
736
- EmailCfg(id='031428', duplicate_ids=['031388']),
752
+ EmailCfg(id='031428', is_fwded_article=True, duplicate_ids=['031388']),
737
753
  EmailCfg(id='031980', duplicate_ids=['019409']),
738
754
  EmailCfg(id='033486', duplicate_ids=['033156']),
739
755
  EmailCfg(id='025790', duplicate_ids=['031994']),
740
756
  EmailCfg(id='028497', duplicate_ids=['026228']),
741
- EmailCfg(id='033528', duplicate_ids=['033517']),
757
+ EmailCfg(id='033528', is_fwded_article=True, duplicate_ids=['033517']),
742
758
  EmailCfg(id='019412', duplicate_ids=['028621']),
743
759
  EmailCfg(id='027053', duplicate_ids=['028765']),
744
760
  EmailCfg(id='027049', duplicate_ids=['028773']),
@@ -1355,7 +1371,12 @@ OTHER_FILES_FINANCE = [
1355
1371
  DocCfg(id='012048', description=f"{PRESS_RELEASE} 'Rockefeller Partners with Gregory J. Fleming to Create Independent Financial Services Firm' and other articles"),
1356
1372
 
1357
1373
  # private placement memoranda
1358
- DocCfg(id='024432', description=f"Michael Milken's Knowledge Universe Education (KUE) $1,000,000 corporate share placement notice (SEC filing?)"),
1374
+ DocCfg(
1375
+ id='024432',
1376
+ date='2006-09-27',
1377
+ description=f"Michael Milken's Knowledge Universe Education (KUE) $1,000,000 corporate share placement notice (SEC filing?)"
1378
+ ),
1379
+
1359
1380
  DocCfg(id='024003', description=f"New Leaf Ventures ($375 million biotech fund) private placement memorandum"),
1360
1381
  ]
1361
1382
 
@@ -1689,13 +1710,16 @@ for cfg in ALL_CONFIGS:
1689
1710
 
1690
1711
  # Email related regexes (have to be here for circular dependencies reasons)
1691
1712
  FORWARDED_LINE_PATTERN = r"-+ ?(Forwarded|Original)\s*Message ?-*|Begin forwarded message:?"
1713
+ FRENCH_REPLY_PATTERN = r"Le .* a ecrit:"
1714
+ GERMAN_REPLY_PATTERN = r"Am \d\d\.\d\d\..*schrieb.*"
1715
+ NORWEGAIN_REPLY_PATTERN = r"(Den .* folgende|(fre|lor|son)\. .* skrev .*):"
1692
1716
  REPLY_LINE_IN_A_MSG_PATTERN = r"In a message dated \d+/\d+/\d+.*writes:"
1693
1717
  REPLY_LINE_ENDING_PATTERN = r"[_ \n](AM|PM|[<_]|wrote:?)"
1694
1718
  REPLY_LINE_ON_NUMERIC_DATE_PATTERN = fr"On \d+/\d+/\d+[, ].*{REPLY_LINE_ENDING_PATTERN}"
1695
1719
  REPLY_LINE_ON_DATE_PATTERN = fr"^On (\d+ )?((Mon|Tues?|Wed(nes)?|Thu(rs)?|Fri|Sat(ur)?|Sun)(day)?|(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*)[, ].*{REPLY_LINE_ENDING_PATTERN}"
1696
- REPLY_LINE_PATTERN = rf"({REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
1720
+ REPLY_LINE_PATTERN = rf"({FRENCH_REPLY_PATTERN}|{GERMAN_REPLY_PATTERN}|{NORWEGAIN_REPLY_PATTERN}|{REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
1697
1721
  REPLY_REGEX = re.compile(REPLY_LINE_PATTERN, re.IGNORECASE | re.MULTILINE)
1698
- SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?((Envoyé de mon|Sent (from|via)).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)', re.M | re.I)
1722
+ SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?((Envoyé de mon|Sent (from|via)).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)|Co-authored with iPhone auto-correct', re.M | re.I)
1699
1723
 
1700
1724
 
1701
1725
  # No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
@@ -22,6 +22,7 @@ ALL_NAMES = [v for k, v in vars(names).items() if isinstance(v, str) and CONSTAN
22
22
  PACIFIC_TZ = tz.gettz("America/Los_Angeles")
23
23
  TIMEZONE_INFO = {"PDT": PACIFIC_TZ, "PST": PACIFIC_TZ} # Suppresses annoying warnings from parse() calls
24
24
 
25
+ all_elements_same = lambda _list: len(_list) == 0 or all(x == _list[0] for x in _list)
25
26
  collapse_newlines = lambda text: MULTINEWLINE_REGEX.sub('\n\n', text)
26
27
  date_str = lambda dt: dt.isoformat()[0:10] if dt else None
27
28
  escape_double_quotes = lambda text: text.replace('"', r'\"')
epstein_files/util/env.py CHANGED
@@ -38,7 +38,7 @@ output.add_argument('--all-emails', '-ae', action='store_true', help='all the em
38
38
  output.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just the interesting ones')
39
39
  parser.add_argument('--build', '-b', nargs="?", default=None, const=DEFAULT_FILE, help='write output to HTML file')
40
40
  output.add_argument('--email-timeline', action='store_true', help='print a table of all emails in chronological order')
41
- output.add_argument('--emailers-info', action='store_true', help='write a .png of the eeailers info table')
41
+ output.add_argument('--emailers-info', '-ei', action='store_true', help='write a .png of the eeailers info table')
42
42
  output.add_argument('--json-files', action='store_true', help='pretty print all the raw JSON data files in the collection and exit')
43
43
  output.add_argument('--json-metadata', action='store_true', help='dump JSON metadata for all files and exit')
44
44
  output.add_argument('--output-emails', '-oe', action='store_true', help='generate emails section')
@@ -61,31 +61,34 @@ debug.add_argument('--deep-debug', '-dd', action='store_true', help='set debug l
61
61
  debug.add_argument('--json-stats', '-j', action='store_true', help='print JSON formatted stats about the files')
62
62
  debug.add_argument('--skip-other-files', '-sof', action='store_true', help='skip parsing non email/text files')
63
63
  debug.add_argument('--suppress-logs', '-sl', action='store_true', help='set debug level to FATAL')
64
+ debug.add_argument('--truncate', '-t', type=int, help='truncate emails to this many characters')
65
+ debug.add_argument('--write-txt', '-wt', action='store_true', help='write a plain text version of output')
64
66
 
65
67
 
66
68
  # Parse args
67
69
  args = parser.parse_args()
68
70
  is_html_script = parser.prog in HTML_SCRIPTS
69
71
 
70
- args.build = args.build
71
72
  args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
72
73
  args.names = [None if n == 'None' else n.strip() for n in (args.names or [])]
73
74
  args.output_emails = args.output_emails or args.all_emails
74
75
  args.output_other = args.output_other or args.all_other_files or args.uninteresting
75
76
  args.overwrite_pickle = args.overwrite_pickle or (is_env_var_set('OVERWRITE_PICKLE') and not is_env_var_set('PICKLED'))
76
77
  args.width = args.width if is_html_script else None
78
+ args.any_output_selected = any([is_output_arg(arg) and val for arg, val in vars(args).items()])
79
+
80
+ if not (args.any_output_selected or args.email_timeline or args.emailers_info):
81
+ logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
82
+ args.output_emails = args.output_other = args.output_texts = True
77
83
 
78
84
  if is_html_script:
79
85
  if args.positional_args:
80
86
  exit_with_error(f"{parser.prog} does not accept positional arguments (receeived {args.positional_args})")
81
87
 
82
88
  if parser.prog == EPSTEIN_GENERATE:
83
- if any([is_output_arg(arg) and val for arg, val in vars(args).items()]):
89
+ if args.any_output_selected:
84
90
  if args.email_timeline:
85
91
  exit_with_error(f"--email-timeline option is mutually exlusive with other output options")
86
- elif not args.email_timeline and not args.emailers_info:
87
- logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
88
- args.output_texts = args.output_emails = args.output_other = True
89
92
 
90
93
  if args.build == DEFAULT_FILE:
91
94
  if args.all_emails: