epstein-files 1.1.5__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@ HEADER_ABBREVIATIONS = {
19
19
  'bgC3': 'Bill Gates Ventures (renamed in 2018)',
20
20
  "Brock": 'Brock Pierce (crypto bro with a very sordid past)',
21
21
  "DB": "Deutsche Bank (maybe??)",
22
+ "GRAT": "Grantor Retained Annuity Trust (tax shelter)",
22
23
  'HBJ': "Sheikh Hamad bin Jassim (former Qatari prime minister)",
23
24
  'Jabor': '"an influential man in Qatar"',
24
25
  'Jared': "Jared Kushner",
@@ -62,6 +63,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
62
63
  BARBRO_C_EHNBOM: re.compile(r'behnbom@aol.com|(Barbro\s.*)?Ehnbom', re.IGNORECASE),
63
64
  BARRY_J_COHEN: re.compile(r'barry\s*((j.?|james)\s*)?cohen?', re.IGNORECASE),
64
65
  BENNET_MOSKOWITZ: re.compile(r'Moskowitz.*Bennet|Bennet.*Moskowitz', re.IGNORECASE),
66
+ BOB_CROWE: re.compile(r"[BR]ob Crowe", re.IGNORECASE),
65
67
  BORIS_NIKOLIC: re.compile(r'(boris )?nikolic?', re.IGNORECASE),
66
68
  BRAD_EDWARDS: re.compile(r'Brad(ley)?(\s*J(.?|ames))?\s*Edwards', re.IGNORECASE),
67
69
  BRAD_KARP: re.compile(r'Brad (S.? )?Karp|Karp, Brad', re.IGNORECASE),
@@ -83,6 +85,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
83
85
  JACKIE_PERCZEK: re.compile(r'jackie percze[kl]?', re.IGNORECASE),
84
86
  JABOR_Y: re.compile(r'[ji]abor\s*y?', re.IGNORECASE),
85
87
  JAMES_HILL: re.compile(r"hill, james e.|james.e.hill@abc.com", re.IGNORECASE),
88
+ JANUSZ_BANASIAK: re.compile(r"Janu[is]z Banasiak", re.IGNORECASE),
86
89
  JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?', re.IGNORECASE),
87
90
  JEFF_FULLER: re.compile(r"jeff@mc2mm.com|Jeff Fuller", re.IGNORECASE),
88
91
  JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeffrey E((sp|ps)tein?)?( VI Foundation)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!(Mark L.|ard Jay) )Epstein', re.IGNORECASE),
@@ -157,6 +160,7 @@ EMAILERS = [
157
160
  BILL_GATES,
158
161
  BILL_SIEGEL,
159
162
  BRAD_WECHSLER,
163
+ CHRISTINA_GALBRAITH,
160
164
  DANIEL_SABBA,
161
165
  'Danny Goldberg',
162
166
  DAVID_SCHOEN,
@@ -302,11 +306,10 @@ TEXTS_CONFIG = CONFIRMED_TEXTS_CONFIG + UNCONFIRMED_TEXTS_CONFIG
302
306
  ################################################ EMAILS ################################################
303
307
  ########################################################################################################
304
308
 
305
- MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
306
-
307
309
  # Some emails have a lot of uninteresting CCs
308
- IRAN_DEAL_RECIPIENTS = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
309
- FLIGHT_IN_2012_PEOPLE = ['Francis Derby', 'Januiz Banasiak', 'Louella Rabuyo', 'Richard Barnnet']
310
+ FLIGHT_IN_2012_PEOPLE: list[Name] = ['Francis Derby', JANUSZ_BANASIAK, 'Louella Rabuyo', 'Richard Barnnet']
311
+ IRAN_DEAL_RECIPIENTS: list[Name] = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
312
+ MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
310
313
 
311
314
  EMAILS_CONFIG = [
312
315
  # 026294 and 026296 might also be Ittihadieh based on timing
@@ -409,11 +412,11 @@ EMAILS_CONFIG = [
409
412
  dupe_type='redacted'
410
413
  ),
411
414
  EmailCfg(id='026547', author=GERALD_BARTON, recipients=[JEFFREY_EPSTEIN]), # Bad OCR # TODO: email header is really jacked up
412
- EmailCfg(id='029969', author=GWENDOLYN_BECK, attribution_reason='Signature'),
413
- EmailCfg(id='029968', author=GWENDOLYN_BECK, attribution_reason='Signature', duplicate_ids=['031120']),
415
+ EmailCfg(id='029969', author=GWENDOLYN_BECK, attribution_reason='signature "Longevity & Successful Aging"'),
416
+ EmailCfg(id='029968', author=GWENDOLYN_BECK, attribution_reason='signature "beckresearchlabs.com"', duplicate_ids=['031120']),
414
417
  EmailCfg(id='029970', author=GWENDOLYN_BECK, attribution_reason='signed "Longevity & Successful Agin"'),
415
- EmailCfg(id='029960', author=GWENDOLYN_BECK, attribution_reason='Reply'),
416
- EmailCfg(id='029959', author=GWENDOLYN_BECK, attribution_reason='"Longevity & Aging"'),
418
+ EmailCfg(id='029960', author=GWENDOLYN_BECK, attribution_reason='signature "Beck Center for Longevity & Aging"'),
419
+ EmailCfg(id='029959', author=GWENDOLYN_BECK, attribution_reason='signature "Beck Center for Longevity & Aging"'),
417
420
  EmailCfg(id='033360', author=HENRY_HOLT, attribution_reason='in signature'), # Henry Holt is a company not a person
418
421
  EmailCfg(id='033384', author=JACK_GOLDBERGER, attribution_reason='Might be Paul Prosperi?', is_attribution_uncertain=True),
419
422
  EmailCfg(id='026024', author=JEAN_HUGUEN, attribution_reason='Signature'),
@@ -474,7 +477,7 @@ EMAILS_CONFIG = [
474
477
  EmailCfg(
475
478
  id='029977',
476
479
  author=LAWRANCE_VISOSKI,
477
- recipients=cast(list[str | None], [JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE),
480
+ recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE,
478
481
  attribution_reason=LARRY_REASON,
479
482
  duplicate_ids=['031129'],
480
483
  ),
@@ -491,14 +494,12 @@ EMAILS_CONFIG = [
491
494
  EmailCfg(id='032606', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
492
495
  EmailCfg(id='032607', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
493
496
  EmailCfg(id='032609', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
494
- # 032581, 032604, 033025 may also be Masha based on timing, subject (interviews/articles), and sequential ID
495
497
  EmailCfg(id='032604', author=MASHA_DROKOVA, attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
496
498
  EmailCfg(id='032581', author=MASHA_DROKOVA, attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
497
- EmailCfg(id='033025', author=MASHA_DROKOVA, attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
498
499
  EmailCfg(id='030235', author=MELANIE_WALKER, attribution_reason='In fwd'),
499
500
  EmailCfg(id='032343', author=MELANIE_WALKER, attribution_reason='Name seen in later reply 032346'),
500
501
  EmailCfg(id='032212', author=MIROSLAV_LAJCAK, attribution_reason='signature'),
501
- EmailCfg(id='021814', author=NADIA_MARCINKO, attribution_reason='reply'),
502
+ EmailCfg(id='021814', author=NADIA_MARCINKO, attribution_reason='reply'), #, actual_text="I'm a pilot...I prefer sex slave to copilot ;)"),
502
503
  EmailCfg(id='021808', author=NADIA_MARCINKO, attribution_reason='reply'),
503
504
  EmailCfg(id='022190', author=NADIA_MARCINKO, attribution_reason='reply'),
504
505
  EmailCfg(id='021818', author=NADIA_MARCINKO, attribution_reason='reply'),
@@ -540,11 +541,12 @@ EMAILS_CONFIG = [
540
541
  author=SEAN_BANNON,
541
542
  attribution_reason="From protonmail, Bannon wrote 'just sent from my protonmail' in 027067",
542
543
  ),
543
- EmailCfg(id='029003', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
544
- EmailCfg(id='029005', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
545
- EmailCfg(id='029007', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
546
- EmailCfg(id='029010', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
547
- EmailCfg(id='032296', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
544
+ EmailCfg(id='029003', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
545
+ EmailCfg(id='029005', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
546
+ EmailCfg(id='029007', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
547
+ EmailCfg(id='029010', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
548
+ EmailCfg(id='032296', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
549
+ EmailCfg(id='033292', author=SOON_YI_PREVIN, attribution_reason='mentions "Woody\'s movie"', is_attribution_uncertain=True),
548
550
  EmailCfg(
549
551
  id='019109',
550
552
  author=STEVEN_HOFFENBERG,
@@ -557,7 +559,7 @@ EMAILS_CONFIG = [
557
559
  attribution_reason='ends with "Respectfully, terry"',
558
560
  author=TERRY_KAFKA,
559
561
  fwded_text_after='From: Mike Cohen',
560
- recipients=cast(list[str | None], [JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS),
562
+ recipients=[JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS,
561
563
  subject='Fw: The Iran Nuclear Deal',
562
564
  duplicate_ids=['028482'],
563
565
  ),
@@ -620,6 +622,7 @@ EMAILS_CONFIG = [
620
622
  EmailCfg(id='022250', recipients=[LESLEY_GROFF], attribution_reason='Reply'),
621
623
  EmailCfg(id='030242', recipients=[MARIANA_IDZKOWSKA], duplicate_ids=['032048'], dupe_type='redacted'),
622
624
  EmailCfg(id='033027', recipients=[MASHA_DROKOVA], attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
625
+ EmailCfg(id='033025', recipients=[MASHA_DROKOVA], attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
623
626
  EmailCfg(id='030368', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
624
627
  EmailCfg(id='030369', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
625
628
  EmailCfg(id='030371', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
@@ -711,6 +714,7 @@ EMAILS_CONFIG = [
711
714
  EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
712
715
 
713
716
  # Configure duplicates
717
+ EmailCfg(id='026631', duplicate_ids=['026632'], dupe_type='quoted'),
714
718
  EmailCfg(id='028768', duplicate_ids=['026563'], dupe_type='redacted'),
715
719
  EmailCfg(id='027056', duplicate_ids=['028762'], dupe_type='redacted'),
716
720
  EmailCfg(id='032248', duplicate_ids=['032246'], dupe_type='redacted'),
@@ -1379,8 +1383,6 @@ OTHER_FILES_LETTERS = [
1379
1383
  description=f"letter about algorithmic trading",
1380
1384
  date='2016-06-24', # date is based on Brexit reference but he could be backtesting,
1381
1385
  ),
1382
- DocCfg(id='029304', author=DONALD_TRUMP, description=f"recommendation letter for recently departed {TRUMP_ORG} lawyer {MICHAEL_J_BOCCIO}"),
1383
- DocCfg(id='029301', author=MICHAEL_J_BOCCIO, description=f"letter from former lawyer at the {TRUMP_ORG}", date='2011-08-07'),
1384
1386
  DocCfg(id='026134', description=f'letter to someone named George about investment opportunities in the Ukraine banking sector'),
1385
1387
  ]
1386
1388
 
@@ -1531,13 +1533,27 @@ OTHER_FILES_ACADEMIA = [
1531
1533
 
1532
1534
  # resumes and application letters
1533
1535
  OTHER_FILES_RESUMES = [
1536
+ DocCfg(
1537
+ id='029304',
1538
+ attached_to_email_id='029299',
1539
+ author=DONALD_TRUMP,
1540
+ description=f"recommendation letter for recently departed {TRUMP_ORG} lawyer {MICHAEL_J_BOCCIO}",
1541
+ ),
1534
1542
  DocCfg(id='022367', author='Jack J Grynberg', description=RESUME_OF, date='2014-07-01'),
1535
1543
  DocCfg(
1536
1544
  id='029302',
1545
+ attached_to_email_id='029299',
1537
1546
  author=MICHAEL_J_BOCCIO,
1538
1547
  description=f"{RESUME_OF} (former lawyer at the {TRUMP_ORG})",
1539
1548
  date='2011-08-07',
1540
1549
  ),
1550
+ DocCfg(
1551
+ id='029301',
1552
+ attached_to_email_id='029299',
1553
+ author=MICHAEL_J_BOCCIO,
1554
+ description=f"letter from former lawyer at the {TRUMP_ORG}",
1555
+ date='2011-08-07',
1556
+ ),
1541
1557
  DocCfg(id='029102', author=NERIO_ALESSANDRI, description=HBS_APPLICATION),
1542
1558
  DocCfg(id='029104', author=NERIO_ALESSANDRI, description=HBS_APPLICATION),
1543
1559
  DocCfg(id='015671', author='Robin Solomon', description=RESUME_OF, date='2015-06-02'), # She left Mount Sinai at some point in 2015,
@@ -1679,3 +1695,39 @@ REPLY_LINE_ON_DATE_PATTERN = fr"^On (\d+ )?((Mon|Tues?|Wed(nes)?|Thu(rs)?|Fri|Sa
1679
1695
  REPLY_LINE_PATTERN = rf"({REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
1680
1696
  REPLY_REGEX = re.compile(REPLY_LINE_PATTERN, re.IGNORECASE | re.MULTILINE)
1681
1697
  SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?((Envoyé de mon|Sent (from|via)).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)', re.M | re.I)
1698
+
1699
+
1700
+ # No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
1701
+ UNINTERESTING_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + [
1702
+ 'Alan Dlugash', # CCed with Richard Kahn
1703
+ 'Alan Rogers', # Random CC
1704
+ 'Andrew Friendly', # Presumably some relation of Kelly Friendly
1705
+ 'BS Stern', # A random fwd of email we have
1706
+ 'Cheryl Kleen', # Single email from Anne Boyles, displayed under Anne Boyles
1707
+ 'Connie Zaguirre', # Random CC
1708
+ 'Dan Fleuette', # CC from sean bannon
1709
+ 'Danny Goldberg', # Random Paul Krassner emails
1710
+ GERALD_LEFCOURT, # Single CC
1711
+ GORDON_GETTY, # Random CC
1712
+ JEFF_FULLER, # Random Jean Luc Brunel CC
1713
+ 'Jojo Fontanilla', # Random CC
1714
+ 'Joseph Vinciguerra', # Random CC
1715
+ 'Larry Cohen', # Random Bill Gates CC
1716
+ 'Lyn Fontanilla', # Random CC
1717
+ 'Mark Albert', # Random CC
1718
+ 'Matthew Schafer', # Random CC
1719
+ MICHAEL_BUCHHOLTZ, # Terry Kafka CC
1720
+ 'Nancy Dahl', # covered by Lawrence Krauss (her husband)
1721
+ 'Michael Simmons', # Random CC
1722
+ 'Nancy Portland', # Lawrence Krauss CC
1723
+ 'Oliver Goodenough', # Robert Trivers CC
1724
+ 'Peter Aldhous', # Lawrence Krauss CC
1725
+ 'Players2', # Hoffenberg CC
1726
+ 'Sam Harris', # Lawrence Krauss CC
1727
+ SAMUEL_LEFF, # Random CC
1728
+ 'Sean T Lehane', # Random CC
1729
+ 'Stephen Rubin', # Random CC
1730
+ 'Tim Kane', # Random CC
1731
+ 'Travis Pangburn', # Random CC
1732
+ 'Vahe Stepanian', # Random CC
1733
+ ]
@@ -38,25 +38,6 @@ def dict_sets_to_lists(d: dict[str, set]) -> dict[str, list]:
38
38
  return {k: sorted(list(v)) for k, v in d.items()}
39
39
 
40
40
 
41
- def extract_last_name(name: str) -> str:
42
- if ' ' not in name:
43
- return name
44
-
45
- names = name.removesuffix(QUESTION_MARKS).strip().split()
46
-
47
- if names[-1].startswith('Jr') and len(names[-1]) <= 3:
48
- return ' '.join(names[-2:])
49
- else:
50
- return names[-1]
51
-
52
-
53
- def extract_first_name(name: str) -> str:
54
- if ' ' not in name:
55
- return name
56
-
57
- return name.removesuffix(f" {extract_last_name(name)}")
58
-
59
-
60
41
  def flatten(_list: list[list[T]]) -> list[T]:
61
42
  return list(itertools.chain.from_iterable(_list))
62
43
 
@@ -62,7 +62,7 @@ class DocCfg:
62
62
 
63
63
  Attributes:
64
64
  id (str): ID of file
65
- author (str | None): Author of the document (if any)
65
+ author (Name): Author of the document (if any)
66
66
  category (str | None): Type of file
67
67
  date (str | None): If passed will be immediated parsed into the 'timestamp' field
68
68
  dupe_type (DuplicateType | None): The type of duplicate this file is or its 'duplicate_ids' are
@@ -74,7 +74,7 @@ class DocCfg:
74
74
  """
75
75
  id: str
76
76
  attached_to_email_id: str | None = None
77
- author: str | None = None
77
+ author: Name = None
78
78
  category: str | None = None
79
79
  date: str | None = None
80
80
  description: str | None = None
@@ -94,30 +94,40 @@ class DocCfg:
94
94
 
95
95
  def complete_description(self) -> str | None:
96
96
  """String that summarizes what is known about this document."""
97
+ description = ''
98
+
97
99
  if self.category and not self.description and not self.author:
98
100
  if self.category == JUNK:
99
101
  return None
100
102
  else:
101
- return self.category
103
+ description = self.category
102
104
  elif self.category == REPUTATION:
103
105
  author_str = f"{self.author} " if self.author else ''
104
- return f"{REPUTATION_MGMT}: {author_str}{self.description}"
106
+ description = f"{REPUTATION_MGMT}: {author_str}{self.description}"
105
107
  elif self.category == SKYPE_LOG:
106
108
  msg = f"{self.category} of conversation with {self.author}" if self.author else self.category
107
- return f"{msg} {self.description}" if self.description else msg
109
+ description = f"{msg} {self.description}" if self.description else msg
108
110
  elif self.author and self.description:
109
111
  if self.category in [ACADEMIA, BOOK]:
110
112
  title = self.description if '"' in self.description else f'"{self.description}"'
111
- return f"{title} by {self.author}"
113
+ description = f"{title} by {self.author}"
112
114
  elif self.category == FINANCE and self.author in FINANCIAL_REPORTS_AUTHORS:
113
- return f'{self.author} report: "{self.description}"'
115
+ description = f'{self.author} report: "{self.description}"'
114
116
  elif self.category == LEGAL and 'v.' in self.author:
115
- return f"{self.author}: {self.description}"
116
- elif self.category and self.author is None and self.description is None:
117
- return self.category
117
+ description = f"{self.author}: {self.description}"
118
+
119
+ if not description:
120
+ pieces = without_falsey([self.author, self.description])
121
+
122
+ if pieces:
123
+ description = ' '.join(pieces)
124
+ else:
125
+ return None
126
+
127
+ if self.attached_to_email_id:
128
+ description += f" attached to email {self.attached_to_email_id}"
118
129
 
119
- pieces = without_falsey([self.author, self.description])
120
- return ' '.join(pieces) if pieces else None
130
+ return description
121
131
 
122
132
  def duplicate_cfgs(self) -> Generator['DocCfg', None, None]:
123
133
  """Create synthetic DocCfg objects that set the 'duplicate_of_id' field to point back to this object."""
@@ -209,13 +219,13 @@ class EmailCfg(CommunicationCfg):
209
219
  actual_text (str | None): In dire cases of broken OCR we just configure the body of the email as a string.
210
220
  fwded_text_after (str | None): If set, any text after this is a fwd of an article or similar
211
221
  is_fwded_article (bool): True if this is a newspaper article someone fwded. Used to exclude articles from word counting.
212
- recipients (list[str | None]): Who received the email
222
+ recipients (list[Name]): Who received the email
213
223
  subject (str): Subject line
214
224
  """
215
225
  actual_text: str | None = None
216
226
  fwded_text_after: str | None = None
217
227
  is_fwded_article: bool = False
218
- recipients: list[str | None] = field(default_factory=list)
228
+ recipients: list[Name] = field(default_factory=list)
219
229
  subject: str | None = None
220
230
 
221
231
  # This is necessary because for some dumb reason @dataclass(repr=False) doesn't cut it
epstein_files/util/env.py CHANGED
@@ -8,7 +8,7 @@ from rich_argparse_plus import RichHelpFormatterPlus
8
8
  from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, CHRONOLOGICAL_EMAILS_PATH, TEXT_MSGS_HTML_PATH
9
9
  from epstein_files.util.logging import env_log_level, exit_with_error, logger
10
10
 
11
- DEFAULT_WIDTH = 145
11
+ DEFAULT_WIDTH = 155
12
12
  DEFAULT_FILE = 'default_file'
13
13
  EPSTEIN_GENERATE = 'epstein_generate'
14
14
  HTML_SCRIPTS = [EPSTEIN_GENERATE, 'epstein_word_count']
@@ -38,6 +38,7 @@ output.add_argument('--all-emails', '-ae', action='store_true', help='all the em
38
38
  output.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just the interesting ones')
39
39
  parser.add_argument('--build', '-b', nargs="?", default=None, const=DEFAULT_FILE, help='write output to HTML file')
40
40
  output.add_argument('--email-timeline', action='store_true', help='print a table of all emails in chronological order')
41
+ output.add_argument('--emailers-info-png', action='store_true', help='write a .png of the emeailers info table')
41
42
  output.add_argument('--json-files', action='store_true', help='pretty print all the raw JSON data files in the collection and exit')
42
43
  output.add_argument('--json-metadata', action='store_true', help='dump JSON metadata for all files and exit')
43
44
  output.add_argument('--output-emails', '-oe', action='store_true', help='generate emails section')
@@ -66,6 +67,7 @@ debug.add_argument('--suppress-logs', '-sl', action='store_true', help='set debu
66
67
  args = parser.parse_args()
67
68
  is_html_script = parser.prog in HTML_SCRIPTS
68
69
 
70
+ args.build = args.build or args.emailers_info_png
69
71
  args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
70
72
  args.names = [None if n == 'None' else n for n in (args.names or [])]
71
73
  args.output_emails = args.output_emails or args.all_emails