epstein-files 1.1.5__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@ HEADER_ABBREVIATIONS = {
19
19
  'bgC3': 'Bill Gates Ventures (renamed in 2018)',
20
20
  "Brock": 'Brock Pierce (crypto bro with a very sordid past)',
21
21
  "DB": "Deutsche Bank (maybe??)",
22
+ "GRAT": "Grantor Retained Annuity Trust (tax shelter)",
22
23
  'HBJ': "Sheikh Hamad bin Jassim (former Qatari prime minister)",
23
24
  'Jabor': '"an influential man in Qatar"',
24
25
  'Jared': "Jared Kushner",
@@ -62,6 +63,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
62
63
  BARBRO_C_EHNBOM: re.compile(r'behnbom@aol.com|(Barbro\s.*)?Ehnbom', re.IGNORECASE),
63
64
  BARRY_J_COHEN: re.compile(r'barry\s*((j.?|james)\s*)?cohen?', re.IGNORECASE),
64
65
  BENNET_MOSKOWITZ: re.compile(r'Moskowitz.*Bennet|Bennet.*Moskowitz', re.IGNORECASE),
66
+ BOB_CROWE: re.compile(r"[BR]ob Crowe", re.IGNORECASE),
65
67
  BORIS_NIKOLIC: re.compile(r'(boris )?nikolic?', re.IGNORECASE),
66
68
  BRAD_EDWARDS: re.compile(r'Brad(ley)?(\s*J(.?|ames))?\s*Edwards', re.IGNORECASE),
67
69
  BRAD_KARP: re.compile(r'Brad (S.? )?Karp|Karp, Brad', re.IGNORECASE),
@@ -83,6 +85,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
83
85
  JACKIE_PERCZEK: re.compile(r'jackie percze[kl]?', re.IGNORECASE),
84
86
  JABOR_Y: re.compile(r'[ji]abor\s*y?', re.IGNORECASE),
85
87
  JAMES_HILL: re.compile(r"hill, james e.|james.e.hill@abc.com", re.IGNORECASE),
88
+ JANUSZ_BANASIAK: re.compile(r"Janu[is]z Banasiak", re.IGNORECASE),
86
89
  JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?', re.IGNORECASE),
87
90
  JEFF_FULLER: re.compile(r"jeff@mc2mm.com|Jeff Fuller", re.IGNORECASE),
88
91
  JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeffrey E((sp|ps)tein?)?( VI Foundation)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!(Mark L.|ard Jay) )Epstein', re.IGNORECASE),
@@ -157,6 +160,7 @@ EMAILERS = [
157
160
  BILL_GATES,
158
161
  BILL_SIEGEL,
159
162
  BRAD_WECHSLER,
163
+ CHRISTINA_GALBRAITH,
160
164
  DANIEL_SABBA,
161
165
  'Danny Goldberg',
162
166
  DAVID_SCHOEN,
@@ -302,11 +306,10 @@ TEXTS_CONFIG = CONFIRMED_TEXTS_CONFIG + UNCONFIRMED_TEXTS_CONFIG
302
306
  ################################################ EMAILS ################################################
303
307
  ########################################################################################################
304
308
 
305
- MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
306
-
307
309
  # Some emails have a lot of uninteresting CCs
308
- IRAN_DEAL_RECIPIENTS = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
309
- FLIGHT_IN_2012_PEOPLE = ['Francis Derby', 'Januiz Banasiak', 'Louella Rabuyo', 'Richard Barnnet']
310
+ FLIGHT_IN_2012_PEOPLE: list[Name] = ['Francis Derby', JANUSZ_BANASIAK, 'Louella Rabuyo', 'Richard Barnnet']
311
+ IRAN_DEAL_RECIPIENTS: list[Name] = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
312
+ MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
310
313
 
311
314
  EMAILS_CONFIG = [
312
315
  # 026294 and 026296 might also be Ittihadieh based on timing
@@ -409,11 +412,11 @@ EMAILS_CONFIG = [
409
412
  dupe_type='redacted'
410
413
  ),
411
414
  EmailCfg(id='026547', author=GERALD_BARTON, recipients=[JEFFREY_EPSTEIN]), # Bad OCR # TODO: email header is really jacked up
412
- EmailCfg(id='029969', author=GWENDOLYN_BECK, attribution_reason='Signature'),
413
- EmailCfg(id='029968', author=GWENDOLYN_BECK, attribution_reason='Signature', duplicate_ids=['031120']),
415
+ EmailCfg(id='029969', author=GWENDOLYN_BECK, attribution_reason='signature "Longevity & Successful Aging"'),
416
+ EmailCfg(id='029968', author=GWENDOLYN_BECK, attribution_reason='signature "beckresearchlabs.com"', duplicate_ids=['031120']),
414
417
  EmailCfg(id='029970', author=GWENDOLYN_BECK, attribution_reason='signed "Longevity & Successful Agin"'),
415
- EmailCfg(id='029960', author=GWENDOLYN_BECK, attribution_reason='Reply'),
416
- EmailCfg(id='029959', author=GWENDOLYN_BECK, attribution_reason='"Longevity & Aging"'),
418
+ EmailCfg(id='029960', author=GWENDOLYN_BECK, attribution_reason='signature "Beck Center for Longevity & Aging"'),
419
+ EmailCfg(id='029959', author=GWENDOLYN_BECK, attribution_reason='signature "Beck Center for Longevity & Aging"'),
417
420
  EmailCfg(id='033360', author=HENRY_HOLT, attribution_reason='in signature'), # Henry Holt is a company not a person
418
421
  EmailCfg(id='033384', author=JACK_GOLDBERGER, attribution_reason='Might be Paul Prosperi?', is_attribution_uncertain=True),
419
422
  EmailCfg(id='026024', author=JEAN_HUGUEN, attribution_reason='Signature'),
@@ -474,7 +477,7 @@ EMAILS_CONFIG = [
474
477
  EmailCfg(
475
478
  id='029977',
476
479
  author=LAWRANCE_VISOSKI,
477
- recipients=cast(list[str | None], [JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE),
480
+ recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE,
478
481
  attribution_reason=LARRY_REASON,
479
482
  duplicate_ids=['031129'],
480
483
  ),
@@ -491,14 +494,12 @@ EMAILS_CONFIG = [
491
494
  EmailCfg(id='032606', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
492
495
  EmailCfg(id='032607', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
493
496
  EmailCfg(id='032609', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
494
- # 032581, 032604, 033025 may also be Masha based on timing, subject (interviews/articles), and sequential ID
495
497
  EmailCfg(id='032604', author=MASHA_DROKOVA, attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
496
498
  EmailCfg(id='032581', author=MASHA_DROKOVA, attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
497
- EmailCfg(id='033025', author=MASHA_DROKOVA, attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
498
499
  EmailCfg(id='030235', author=MELANIE_WALKER, attribution_reason='In fwd'),
499
500
  EmailCfg(id='032343', author=MELANIE_WALKER, attribution_reason='Name seen in later reply 032346'),
500
501
  EmailCfg(id='032212', author=MIROSLAV_LAJCAK, attribution_reason='signature'),
501
- EmailCfg(id='021814', author=NADIA_MARCINKO, attribution_reason='reply'),
502
+ EmailCfg(id='021814', author=NADIA_MARCINKO, attribution_reason='reply'), #, actual_text="I'm a pilot...I prefer sex slave to copilot ;)"),
502
503
  EmailCfg(id='021808', author=NADIA_MARCINKO, attribution_reason='reply'),
503
504
  EmailCfg(id='022190', author=NADIA_MARCINKO, attribution_reason='reply'),
504
505
  EmailCfg(id='021818', author=NADIA_MARCINKO, attribution_reason='reply'),
@@ -540,11 +541,12 @@ EMAILS_CONFIG = [
540
541
  author=SEAN_BANNON,
541
542
  attribution_reason="From protonmail, Bannon wrote 'just sent from my protonmail' in 027067",
542
543
  ),
543
- EmailCfg(id='029003', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
544
- EmailCfg(id='029005', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
545
- EmailCfg(id='029007', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
546
- EmailCfg(id='029010', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
547
- EmailCfg(id='032296', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
544
+ EmailCfg(id='029003', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
545
+ EmailCfg(id='029005', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
546
+ EmailCfg(id='029007', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
547
+ EmailCfg(id='029010', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
548
+ EmailCfg(id='032296', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
549
+ EmailCfg(id='033292', author=SOON_YI_PREVIN, attribution_reason='mentions "Woody\'s movie"', is_attribution_uncertain=True),
548
550
  EmailCfg(
549
551
  id='019109',
550
552
  author=STEVEN_HOFFENBERG,
@@ -557,7 +559,7 @@ EMAILS_CONFIG = [
557
559
  attribution_reason='ends with "Respectfully, terry"',
558
560
  author=TERRY_KAFKA,
559
561
  fwded_text_after='From: Mike Cohen',
560
- recipients=cast(list[str | None], [JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS),
562
+ recipients=[JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS,
561
563
  subject='Fw: The Iran Nuclear Deal',
562
564
  duplicate_ids=['028482'],
563
565
  ),
@@ -620,6 +622,7 @@ EMAILS_CONFIG = [
620
622
  EmailCfg(id='022250', recipients=[LESLEY_GROFF], attribution_reason='Reply'),
621
623
  EmailCfg(id='030242', recipients=[MARIANA_IDZKOWSKA], duplicate_ids=['032048'], dupe_type='redacted'),
622
624
  EmailCfg(id='033027', recipients=[MASHA_DROKOVA], attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
625
+ EmailCfg(id='033025', recipients=[MASHA_DROKOVA], attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
623
626
  EmailCfg(id='030368', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
624
627
  EmailCfg(id='030369', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
625
628
  EmailCfg(id='030371', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
@@ -658,6 +661,7 @@ EMAILS_CONFIG = [
658
661
  EmailCfg(id='032023', is_fwded_article=True, duplicate_ids=['032012']), # American-Israeli Cooperative Enterprise Newsletter
659
662
  EmailCfg(id='021758', is_fwded_article=True, duplicate_ids=['030616']), # Radar Online article about Epstein's early prison release
660
663
  EmailCfg(id='033297', is_fwded_article=True, duplicate_ids=['033586']), # Sultan Sulayem fwding article about Trump and Russia
664
+ EmailCfg(id='030983', is_fwded_article=True), # Power Line blog Alex Acosta and Jeffrey Epstein Plea Deal Analysis
661
665
  EmailCfg(id='031774', is_fwded_article=True), # Krassner fwd of Palmer Report article
662
666
  EmailCfg(id='033345', is_fwded_article=True), # Krassner fwd of Palmer Report article
663
667
  EmailCfg(id='029903', is_fwded_article=True), # Krassner fwd of Ann Coulter article about Epstein
@@ -711,6 +715,7 @@ EMAILS_CONFIG = [
711
715
  EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
712
716
 
713
717
  # Configure duplicates
718
+ EmailCfg(id='026631', duplicate_ids=['026632'], dupe_type='quoted'),
714
719
  EmailCfg(id='028768', duplicate_ids=['026563'], dupe_type='redacted'),
715
720
  EmailCfg(id='027056', duplicate_ids=['028762'], dupe_type='redacted'),
716
721
  EmailCfg(id='032248', duplicate_ids=['032246'], dupe_type='redacted'),
@@ -870,7 +875,7 @@ TWEET = 'tweet'
870
875
  # Legal cases
871
876
  BRUNEL_V_EPSTEIN = f"{JEAN_LUC_BRUNEL} v. {JEFFREY_EPSTEIN} and Tyler McDonald d/b/a YI.org"
872
877
  EDWARDS_V_DERSHOWITZ = f"{BRAD_EDWARDS} & {PAUL_G_CASSELL} v. {ALAN_DERSHOWITZ}"
873
- EPSTEIN_V_ROTHSTEIN_EDWARDS = f"Epstein v. Scott Rothstein, {BRAD_EDWARDS}, and L.M."
878
+ EPSTEIN_V_ROTHSTEIN_EDWARDS = f"Epstein v. Scott Rothstein, {BRAD_EDWARDS}, & L.M."
874
879
  GIUFFRE_V_DERSHOWITZ = f"{VIRGINIA_GIUFFRE} v. {ALAN_DERSHOWITZ}"
875
880
  GIUFFRE_V_EPSTEIN = f"{VIRGINIA_GIUFFRE} v. {JEFFREY_EPSTEIN}"
876
881
  GIUFFRE_V_MAXWELL = f"{VIRGINIA_GIUFFRE} v. {GHISLAINE_MAXWELL}"
@@ -1379,8 +1384,6 @@ OTHER_FILES_LETTERS = [
1379
1384
  description=f"letter about algorithmic trading",
1380
1385
  date='2016-06-24', # date is based on Brexit reference but he could be backtesting,
1381
1386
  ),
1382
- DocCfg(id='029304', author=DONALD_TRUMP, description=f"recommendation letter for recently departed {TRUMP_ORG} lawyer {MICHAEL_J_BOCCIO}"),
1383
- DocCfg(id='029301', author=MICHAEL_J_BOCCIO, description=f"letter from former lawyer at the {TRUMP_ORG}", date='2011-08-07'),
1384
1387
  DocCfg(id='026134', description=f'letter to someone named George about investment opportunities in the Ukraine banking sector'),
1385
1388
  ]
1386
1389
 
@@ -1531,13 +1534,27 @@ OTHER_FILES_ACADEMIA = [
1531
1534
 
1532
1535
  # resumes and application letters
1533
1536
  OTHER_FILES_RESUMES = [
1537
+ DocCfg(
1538
+ id='029304',
1539
+ attached_to_email_id='029299',
1540
+ author=DONALD_TRUMP,
1541
+ description=f"recommendation letter for recently departed {TRUMP_ORG} lawyer {MICHAEL_J_BOCCIO}",
1542
+ ),
1534
1543
  DocCfg(id='022367', author='Jack J Grynberg', description=RESUME_OF, date='2014-07-01'),
1535
1544
  DocCfg(
1536
1545
  id='029302',
1546
+ attached_to_email_id='029299',
1537
1547
  author=MICHAEL_J_BOCCIO,
1538
1548
  description=f"{RESUME_OF} (former lawyer at the {TRUMP_ORG})",
1539
1549
  date='2011-08-07',
1540
1550
  ),
1551
+ DocCfg(
1552
+ id='029301',
1553
+ attached_to_email_id='029299',
1554
+ author=MICHAEL_J_BOCCIO,
1555
+ description=f"letter from former lawyer at the {TRUMP_ORG}",
1556
+ date='2011-08-07',
1557
+ ),
1541
1558
  DocCfg(id='029102', author=NERIO_ALESSANDRI, description=HBS_APPLICATION),
1542
1559
  DocCfg(id='029104', author=NERIO_ALESSANDRI, description=HBS_APPLICATION),
1543
1560
  DocCfg(id='015671', author='Robin Solomon', description=RESUME_OF, date='2015-06-02'), # She left Mount Sinai at some point in 2015,
@@ -1679,3 +1696,40 @@ REPLY_LINE_ON_DATE_PATTERN = fr"^On (\d+ )?((Mon|Tues?|Wed(nes)?|Thu(rs)?|Fri|Sa
1679
1696
  REPLY_LINE_PATTERN = rf"({REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
1680
1697
  REPLY_REGEX = re.compile(REPLY_LINE_PATTERN, re.IGNORECASE | re.MULTILINE)
1681
1698
  SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?((Envoyé de mon|Sent (from|via)).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)', re.M | re.I)
1699
+
1700
+
1701
+ # No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
1702
+ UNINTERESTING_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + [
1703
+ 'Alan Dlugash', # CCed with Richard Kahn
1704
+ 'Alan Rogers', # Random CC
1705
+ 'Andrew Friendly', # Presumably some relation of Kelly Friendly
1706
+ 'BS Stern', # A random fwd of email we have
1707
+ 'Cheryl Kleen', # Single email from Anne Boyles, displayed under Anne Boyles
1708
+ 'Connie Zaguirre', # Random CC
1709
+ 'Dan Fleuette', # CC from sean bannon
1710
+ 'Danny Goldberg', # Random Paul Krassner emails
1711
+ GERALD_LEFCOURT, # Single CC
1712
+ GORDON_GETTY, # Random CC
1713
+ JEFF_FULLER, # Random Jean Luc Brunel CC
1714
+ 'Jojo Fontanilla', # Random CC
1715
+ 'Joseph Vinciguerra', # Random CC
1716
+ 'Larry Cohen', # Random Bill Gates CC
1717
+ 'Lyn Fontanilla', # Random CC
1718
+ 'Mark Albert', # Random CC
1719
+ 'Matthew Schafer', # Random CC
1720
+ MICHAEL_BUCHHOLTZ, # Terry Kafka CC
1721
+ 'Nancy Dahl', # covered by Lawrence Krauss (her husband)
1722
+ 'Michael Simmons', # Random CC
1723
+ 'Nancy Portland', # Lawrence Krauss CC
1724
+ 'Oliver Goodenough', # Robert Trivers CC
1725
+ 'Peter Aldhous', # Lawrence Krauss CC
1726
+ 'Players2', # Hoffenberg CC
1727
+ 'Police Code Enforcement', # Kirk Blouin / John Page CC
1728
+ 'Sam Harris', # Lawrence Krauss CC
1729
+ SAMUEL_LEFF, # Random CC
1730
+ 'Sean T Lehane', # Random CC
1731
+ 'Stephen Rubin', # Random CC
1732
+ 'Tim Kane', # Random CC
1733
+ 'Travis Pangburn', # Random CC
1734
+ 'Vahe Stepanian', # Random CC
1735
+ ]
@@ -29,7 +29,7 @@ escape_single_quotes = lambda text: text.replace("'", r"\'")
29
29
  iso_timestamp = lambda dt: dt.isoformat().replace('T', ' ')
30
30
  days_between = lambda dt1, dt2: (dt2 - dt1).days + 1
31
31
  days_between_str = lambda dt1, dt2: f"{days_between(dt1, dt2)} day" + ('s' if days_between(dt1, dt2) > 1 else '')
32
- remove_zero_time_from_timestamp_str = lambda dt: dt.isoformat().removesuffix('T00:00:00')
32
+ remove_zero_time = lambda dt: dt.isoformat().removesuffix('T00:00:00')
33
33
  uniquify = lambda _list: list(set(_list))
34
34
  without_falsey = lambda _list: [e for e in _list if e]
35
35
 
@@ -38,25 +38,6 @@ def dict_sets_to_lists(d: dict[str, set]) -> dict[str, list]:
38
38
  return {k: sorted(list(v)) for k, v in d.items()}
39
39
 
40
40
 
41
- def extract_last_name(name: str) -> str:
42
- if ' ' not in name:
43
- return name
44
-
45
- names = name.removesuffix(QUESTION_MARKS).strip().split()
46
-
47
- if names[-1].startswith('Jr') and len(names[-1]) <= 3:
48
- return ' '.join(names[-2:])
49
- else:
50
- return names[-1]
51
-
52
-
53
- def extract_first_name(name: str) -> str:
54
- if ' ' not in name:
55
- return name
56
-
57
- return name.removesuffix(f" {extract_last_name(name)}")
58
-
59
-
60
41
  def flatten(_list: list[list[T]]) -> list[T]:
61
42
  return list(itertools.chain.from_iterable(_list))
62
43
 
@@ -8,7 +8,7 @@ from dateutil.parser import parse
8
8
 
9
9
  from epstein_files.util.constant.names import *
10
10
  from epstein_files.util.constant.strings import *
11
- from epstein_files.util.data import remove_zero_time_from_timestamp_str, without_falsey
11
+ from epstein_files.util.data import remove_zero_time, without_falsey
12
12
 
13
13
  DuplicateType = Literal['earlier', 'quoted', 'redacted', 'same']
14
14
  Metadata = dict[str, bool | datetime | int | str | list[str | None] |dict[str, bool | str]]
@@ -62,7 +62,7 @@ class DocCfg:
62
62
 
63
63
  Attributes:
64
64
  id (str): ID of file
65
- author (str | None): Author of the document (if any)
65
+ author (Name): Author of the document (if any)
66
66
  category (str | None): Type of file
67
67
  date (str | None): If passed will be immediated parsed into the 'timestamp' field
68
68
  dupe_type (DuplicateType | None): The type of duplicate this file is or its 'duplicate_ids' are
@@ -74,13 +74,14 @@ class DocCfg:
74
74
  """
75
75
  id: str
76
76
  attached_to_email_id: str | None = None
77
- author: str | None = None
77
+ author: Name = None
78
78
  category: str | None = None
79
79
  date: str | None = None
80
80
  description: str | None = None
81
81
  dupe_type: DuplicateType | None = None
82
82
  duplicate_ids: list[str] = field(default_factory=list)
83
83
  duplicate_of_id: str | None = None
84
+ is_attribution_uncertain: bool = False
84
85
  is_interesting: bool | None = None
85
86
  is_synthetic: bool = False
86
87
  timestamp: datetime | None = None
@@ -94,30 +95,40 @@ class DocCfg:
94
95
 
95
96
  def complete_description(self) -> str | None:
96
97
  """String that summarizes what is known about this document."""
98
+ description = ''
99
+
97
100
  if self.category and not self.description and not self.author:
98
101
  if self.category == JUNK:
99
102
  return None
100
103
  else:
101
- return self.category
104
+ description = self.category
102
105
  elif self.category == REPUTATION:
103
106
  author_str = f"{self.author} " if self.author else ''
104
- return f"{REPUTATION_MGMT}: {author_str}{self.description}"
107
+ description = f"{REPUTATION_MGMT}: {author_str}{self.description}"
105
108
  elif self.category == SKYPE_LOG:
106
109
  msg = f"{self.category} of conversation with {self.author}" if self.author else self.category
107
- return f"{msg} {self.description}" if self.description else msg
110
+ description = f"{msg} {self.description}" if self.description else msg
108
111
  elif self.author and self.description:
109
112
  if self.category in [ACADEMIA, BOOK]:
110
113
  title = self.description if '"' in self.description else f'"{self.description}"'
111
- return f"{title} by {self.author}"
114
+ description = f"{title} by {self.author}"
112
115
  elif self.category == FINANCE and self.author in FINANCIAL_REPORTS_AUTHORS:
113
- return f'{self.author} report: "{self.description}"'
116
+ description = f'{self.author} report: "{self.description}"'
114
117
  elif self.category == LEGAL and 'v.' in self.author:
115
- return f"{self.author}: {self.description}"
116
- elif self.category and self.author is None and self.description is None:
117
- return self.category
118
+ description = f"{self.author}: {self.description}"
119
+
120
+ if not description:
121
+ pieces = without_falsey([self.author, self.description])
122
+
123
+ if pieces:
124
+ description = ' '.join(pieces)
125
+ else:
126
+ return None
118
127
 
119
- pieces = without_falsey([self.author, self.description])
120
- return ' '.join(pieces) if pieces else None
128
+ if self.attached_to_email_id:
129
+ description += f" attached to email {self.attached_to_email_id}"
130
+
131
+ return description
121
132
 
122
133
  def duplicate_cfgs(self) -> Generator['DocCfg', None, None]:
123
134
  """Create synthetic DocCfg objects that set the 'duplicate_of_id' field to point back to this object."""
@@ -152,7 +163,7 @@ class DocCfg:
152
163
  elif _field.name == 'timestamp' and self.date is not None:
153
164
  continue # Don't print both timestamp and date
154
165
  elif isinstance(value, datetime):
155
- value_str = remove_zero_time_from_timestamp_str(value)
166
+ value_str = remove_zero_time(value)
156
167
  add_prop(_field, f"parse('{value_str}')" if CONSTANTIZE_NAMES else f"'{value}'")
157
168
  elif isinstance(value, str):
158
169
  if "'" in value:
@@ -196,7 +207,6 @@ class CommunicationCfg(DocCfg):
196
207
  is_attribution_uncertain (bool): True if we have a good idea of who the author is but are not 100% certain
197
208
  """
198
209
  attribution_reason: str | None = None
199
- is_attribution_uncertain: bool = False
200
210
 
201
211
  def __repr__(self) -> str:
202
212
  return super().__repr__()
@@ -209,13 +219,13 @@ class EmailCfg(CommunicationCfg):
209
219
  actual_text (str | None): In dire cases of broken OCR we just configure the body of the email as a string.
210
220
  fwded_text_after (str | None): If set, any text after this is a fwd of an article or similar
211
221
  is_fwded_article (bool): True if this is a newspaper article someone fwded. Used to exclude articles from word counting.
212
- recipients (list[str | None]): Who received the email
222
+ recipients (list[Name]): Who received the email
213
223
  subject (str): Subject line
214
224
  """
215
225
  actual_text: str | None = None
216
226
  fwded_text_after: str | None = None
217
227
  is_fwded_article: bool = False
218
- recipients: list[str | None] = field(default_factory=list)
228
+ recipients: list[Name] = field(default_factory=list)
219
229
  subject: str | None = None
220
230
 
221
231
  # This is necessary because for some dumb reason @dataclass(repr=False) doesn't cut it
epstein_files/util/env.py CHANGED
@@ -8,7 +8,7 @@ from rich_argparse_plus import RichHelpFormatterPlus
8
8
  from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, CHRONOLOGICAL_EMAILS_PATH, TEXT_MSGS_HTML_PATH
9
9
  from epstein_files.util.logging import env_log_level, exit_with_error, logger
10
10
 
11
- DEFAULT_WIDTH = 145
11
+ DEFAULT_WIDTH = 155
12
12
  DEFAULT_FILE = 'default_file'
13
13
  EPSTEIN_GENERATE = 'epstein_generate'
14
14
  HTML_SCRIPTS = [EPSTEIN_GENERATE, 'epstein_word_count']
@@ -38,6 +38,7 @@ output.add_argument('--all-emails', '-ae', action='store_true', help='all the em
38
38
  output.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just the interesting ones')
39
39
  parser.add_argument('--build', '-b', nargs="?", default=None, const=DEFAULT_FILE, help='write output to HTML file')
40
40
  output.add_argument('--email-timeline', action='store_true', help='print a table of all emails in chronological order')
41
+ output.add_argument('--emailers-info', action='store_true', help='write a .png of the eeailers info table')
41
42
  output.add_argument('--json-files', action='store_true', help='pretty print all the raw JSON data files in the collection and exit')
42
43
  output.add_argument('--json-metadata', action='store_true', help='dump JSON metadata for all files and exit')
43
44
  output.add_argument('--output-emails', '-oe', action='store_true', help='generate emails section')
@@ -66,8 +67,9 @@ debug.add_argument('--suppress-logs', '-sl', action='store_true', help='set debu
66
67
  args = parser.parse_args()
67
68
  is_html_script = parser.prog in HTML_SCRIPTS
68
69
 
70
+ args.build = args.build
69
71
  args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
70
- args.names = [None if n == 'None' else n for n in (args.names or [])]
72
+ args.names = [None if n == 'None' else n.strip() for n in (args.names or [])]
71
73
  args.output_emails = args.output_emails or args.all_emails
72
74
  args.output_other = args.output_other or args.all_other_files or args.uninteresting
73
75
  args.overwrite_pickle = args.overwrite_pickle or (is_env_var_set('OVERWRITE_PICKLE') and not is_env_var_set('PICKLED'))
@@ -81,7 +83,7 @@ if is_html_script:
81
83
  if any([is_output_arg(arg) and val for arg, val in vars(args).items()]):
82
84
  if args.email_timeline:
83
85
  exit_with_error(f"--email-timeline option is mutually exlusive with other output options")
84
- elif not args.email_timeline:
86
+ elif not args.email_timeline and not args.emailers_info:
85
87
  logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
86
88
  args.output_texts = args.output_emails = args.output_other = True
87
89