epstein-files 1.1.3__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,12 +19,13 @@ HEADER_ABBREVIATIONS = {
19
19
  'bgC3': 'Bill Gates Ventures (renamed in 2018)',
20
20
  "Brock": 'Brock Pierce (crypto bro with a very sordid past)',
21
21
  "DB": "Deutsche Bank (maybe??)",
22
+ "GRAT": "Grantor Retained Annuity Trust (tax shelter)",
22
23
  'HBJ': "Sheikh Hamad bin Jassim (former Qatari prime minister)",
23
24
  'Jabor': '"an influential man in Qatar"',
24
25
  'Jared': "Jared Kushner",
25
26
  'Jagland': 'Thorbjørn Jagland (former Norwegian prime minister)',
26
27
  'JEGE': "Epstein's airplane holding company",
27
- 'Jeffrey Wernick': 'right wing crypto bro, former COO of Parler',
28
+ JEFFREY_WERNICK: 'right wing crypto bro, former COO of Parler',
28
29
  'Joi': f"Joi Ito ({MIT_MEDIA_LAB}, MIT Digital Currency Initiative)",
29
30
  "Hoffenberg": f"{STEVEN_HOFFENBERG} (Epstein's ponzi scheme partner)",
30
31
  'KSA': "Kingdom of Saudi Arabia",
@@ -62,6 +63,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
62
63
  BARBRO_C_EHNBOM: re.compile(r'behnbom@aol.com|(Barbro\s.*)?Ehnbom', re.IGNORECASE),
63
64
  BARRY_J_COHEN: re.compile(r'barry\s*((j.?|james)\s*)?cohen?', re.IGNORECASE),
64
65
  BENNET_MOSKOWITZ: re.compile(r'Moskowitz.*Bennet|Bennet.*Moskowitz', re.IGNORECASE),
66
+ BOB_CROWE: re.compile(r"[BR]ob Crowe", re.IGNORECASE),
65
67
  BORIS_NIKOLIC: re.compile(r'(boris )?nikolic?', re.IGNORECASE),
66
68
  BRAD_EDWARDS: re.compile(r'Brad(ley)?(\s*J(.?|ames))?\s*Edwards', re.IGNORECASE),
67
69
  BRAD_KARP: re.compile(r'Brad (S.? )?Karp|Karp, Brad', re.IGNORECASE),
@@ -83,13 +85,14 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
83
85
  JACKIE_PERCZEK: re.compile(r'jackie percze[kl]?', re.IGNORECASE),
84
86
  JABOR_Y: re.compile(r'[ji]abor\s*y?', re.IGNORECASE),
85
87
  JAMES_HILL: re.compile(r"hill, james e.|james.e.hill@abc.com", re.IGNORECASE),
88
+ JANUSZ_BANASIAK: re.compile(r"Janu[is]z Banasiak", re.IGNORECASE),
86
89
  JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?', re.IGNORECASE),
87
90
  JEFF_FULLER: re.compile(r"jeff@mc2mm.com|Jeff Fuller", re.IGNORECASE),
88
91
  JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeffrey E((sp|ps)tein?)?( VI Foundation)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!(Mark L.|ard Jay) )Epstein', re.IGNORECASE),
89
92
  JESSICA_CADWELL: re.compile(r'Jessica Cadwell?', re.IGNORECASE),
90
93
  JOHNNY_EL_HACHEM: re.compile(r'el hachem johnny|johnny el hachem', re.IGNORECASE),
91
94
  JOI_ITO: re.compile(r'ji@media.mit.?edu|(joichi|joi)( Ito)?', re.IGNORECASE),
92
- JONATHAN_FARKAS: re.compile(r'Jonathan Farka(s|il)', re.IGNORECASE),
95
+ JONATHAN_FARKAS: re.compile(r'Jonathan Fark(a|u)(s|il)', re.IGNORECASE),
93
96
  KATHRYN_RUEMMLER: re.compile(r'Kathr?yn? Ruemmler?', re.IGNORECASE),
94
97
  KEN_STARR: re.compile(r'starr, ken|Ken(neth\s*(W.\s*)?)?\s+starr?|starr', re.IGNORECASE),
95
98
  LANDON_THOMAS: re.compile(r'lando[nr] thomas( jr)?|thomas jr.?, lando[nr]', re.IGNORECASE),
@@ -157,6 +160,7 @@ EMAILERS = [
157
160
  BILL_GATES,
158
161
  BILL_SIEGEL,
159
162
  BRAD_WECHSLER,
163
+ CHRISTINA_GALBRAITH,
160
164
  DANIEL_SABBA,
161
165
  'Danny Goldberg',
162
166
  DAVID_SCHOEN,
@@ -302,15 +306,53 @@ TEXTS_CONFIG = CONFIRMED_TEXTS_CONFIG + UNCONFIRMED_TEXTS_CONFIG
302
306
  ################################################ EMAILS ################################################
303
307
  ########################################################################################################
304
308
 
305
- MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
306
-
307
309
  # Some emails have a lot of uninteresting CCs
308
- IRAN_DEAL_RECIPIENTS = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
309
- FLIGHT_IN_2012_PEOPLE = ['Francis Derby', 'Januiz Banasiak', 'Louella Rabuyo', 'Richard Barnnet']
310
+ FLIGHT_IN_2012_PEOPLE: list[Name] = ['Francis Derby', JANUSZ_BANASIAK, 'Louella Rabuyo', 'Richard Barnnet']
311
+ IRAN_DEAL_RECIPIENTS: list[Name] = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
312
+ MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
310
313
 
311
314
  EMAILS_CONFIG = [
315
+ # 026294 and 026296 might also be Ittihadieh based on timing
312
316
  EmailCfg(id='032436', author=ALIREZA_ITTIHADIEH, attribution_reason='Signature'),
317
+ # 032542 026078 026080 026083 026086 026090 might also be Anas based on discussion of Dubai and Kuwait
313
318
  EmailCfg(id='032543', author=ANAS_ALRASHEED, attribution_reason='Later reply 033000 has quote'),
319
+ EmailCfg(id='026167', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
320
+ EmailCfg(id='032571', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
321
+ EmailCfg(id='032573', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
322
+ EmailCfg(id='032575', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
323
+ EmailCfg(id='032577', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
324
+ EmailCfg(id='032579', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
325
+ EmailCfg(id='032582', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
326
+ EmailCfg(id='032585', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
327
+ EmailCfg(id='032588', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
328
+ EmailCfg(id='032591', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
329
+ EmailCfg(id='032595', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
330
+ EmailCfg(id='032599', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
331
+ EmailCfg(id='032611', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
332
+ EmailCfg(id='023661', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
333
+ EmailCfg(id='032616', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
334
+ EmailCfg(id='032622', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
335
+ EmailCfg(id='032628', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
336
+ EmailCfg(id='032629', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
337
+ EmailCfg(id='032631', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
338
+ EmailCfg(id='026168', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
339
+ EmailCfg(id='026170', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
340
+ EmailCfg(id='026173', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
341
+ EmailCfg(id='026176', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
342
+ EmailCfg(id='026180', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
343
+ EmailCfg(id='026184', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
344
+ EmailCfg(id='026188', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
345
+ EmailCfg(id='026193', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
346
+ EmailCfg(id='026198', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
347
+ EmailCfg(id='026210', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
348
+ EmailCfg(id='026204', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
349
+ EmailCfg(id='032660', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
350
+ EmailCfg(id='032663', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
351
+ EmailCfg(id='032667', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
352
+ EmailCfg(id='032672', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
353
+ EmailCfg(id='032676', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
354
+ EmailCfg(id='026237', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
355
+ EmailCfg(id='032682', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
314
356
  EmailCfg(id='026064', author=ARIANE_DE_ROTHSCHILD, attribution_reason='signature'),
315
357
  EmailCfg(id='026069', author=ARIANE_DE_ROTHSCHILD, attribution_reason='signature'),
316
358
  EmailCfg(id='030741', author=ARIANE_DE_ROTHSCHILD, attribution_reason='signature'),
@@ -351,6 +393,7 @@ EMAILS_CONFIG = [
351
393
  actual_text='',
352
394
  author=DARREN_INDYKE,
353
395
  description=f"heavily redacted email, quoted replies are from {STEVEN_HOFFENBERG} about James Patterson's book",
396
+ recipients=['Charles Michael'],
354
397
  timestamp=parse('2016-08-17 11:26:00'),
355
398
  attribution_reason='Quoted replies are in 019109',
356
399
  ),
@@ -369,11 +412,11 @@ EMAILS_CONFIG = [
369
412
  dupe_type='redacted'
370
413
  ),
371
414
  EmailCfg(id='026547', author=GERALD_BARTON, recipients=[JEFFREY_EPSTEIN]), # Bad OCR # TODO: email header is really jacked up
372
- EmailCfg(id='029969', author=GWENDOLYN_BECK, attribution_reason='Signature'),
373
- EmailCfg(id='029968', author=GWENDOLYN_BECK, attribution_reason='Signature', duplicate_ids=['031120']),
415
+ EmailCfg(id='029969', author=GWENDOLYN_BECK, attribution_reason='signature "Longevity & Successful Aging"'),
416
+ EmailCfg(id='029968', author=GWENDOLYN_BECK, attribution_reason='signature "beckresearchlabs.com"', duplicate_ids=['031120']),
374
417
  EmailCfg(id='029970', author=GWENDOLYN_BECK, attribution_reason='signed "Longevity & Successful Agin"'),
375
- EmailCfg(id='029960', author=GWENDOLYN_BECK, attribution_reason='Reply'),
376
- EmailCfg(id='029959', author=GWENDOLYN_BECK, attribution_reason='"Longevity & Aging"'),
418
+ EmailCfg(id='029960', author=GWENDOLYN_BECK, attribution_reason='signature "Beck Center for Longevity & Aging"'),
419
+ EmailCfg(id='029959', author=GWENDOLYN_BECK, attribution_reason='signature "Beck Center for Longevity & Aging"'),
377
420
  EmailCfg(id='033360', author=HENRY_HOLT, attribution_reason='in signature'), # Henry Holt is a company not a person
378
421
  EmailCfg(id='033384', author=JACK_GOLDBERGER, attribution_reason='Might be Paul Prosperi?', is_attribution_uncertain=True),
379
422
  EmailCfg(id='026024', author=JEAN_HUGUEN, attribution_reason='Signature'),
@@ -434,7 +477,7 @@ EMAILS_CONFIG = [
434
477
  EmailCfg(
435
478
  id='029977',
436
479
  author=LAWRANCE_VISOSKI,
437
- recipients=cast(list[str | None], [JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE),
480
+ recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE,
438
481
  attribution_reason=LARRY_REASON,
439
482
  duplicate_ids=['031129'],
440
483
  ),
@@ -444,11 +487,19 @@ EMAILS_CONFIG = [
444
487
  EmailCfg(id='017581', author='Lisa Randall', attribution_reason='reply header'),
445
488
  EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd, Mark Green is in signature'),
446
489
  EmailCfg(id='030472', author=MARTIN_WEINBERG, attribution_reason='Maybe. in reply', is_attribution_uncertain=True),
490
+ EmailCfg(id='032563', author=MASHA_DROKOVA, attribution_reason='replied to in 033014'),
491
+ EmailCfg(id='032564', author=MASHA_DROKOVA, attribution_reason='follow up to 032563 about huffpo article with link'),
492
+ EmailCfg(id='031544', author=MASHA_DROKOVA, attribution_reason='follow up to 032563 about huffpo article with link'),
493
+ EmailCfg(id='032605', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
494
+ EmailCfg(id='032606', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
495
+ EmailCfg(id='032607', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
496
+ EmailCfg(id='032609', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
497
+ EmailCfg(id='032604', author=MASHA_DROKOVA, attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
498
+ EmailCfg(id='032581', author=MASHA_DROKOVA, attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
447
499
  EmailCfg(id='030235', author=MELANIE_WALKER, attribution_reason='In fwd'),
448
500
  EmailCfg(id='032343', author=MELANIE_WALKER, attribution_reason='Name seen in later reply 032346'),
449
501
  EmailCfg(id='032212', author=MIROSLAV_LAJCAK, attribution_reason='signature'),
450
- EmailCfg(id='022193', author=NADIA_MARCINKO, attribution_reason='reply'),
451
- EmailCfg(id='021814', author=NADIA_MARCINKO, attribution_reason='reply'),
502
+ EmailCfg(id='021814', author=NADIA_MARCINKO, attribution_reason='reply'), #, actual_text="I'm a pilot...I prefer sex slave to copilot ;)"),
452
503
  EmailCfg(id='021808', author=NADIA_MARCINKO, attribution_reason='reply'),
453
504
  EmailCfg(id='022190', author=NADIA_MARCINKO, attribution_reason='reply'),
454
505
  EmailCfg(id='021818', author=NADIA_MARCINKO, attribution_reason='reply'),
@@ -490,11 +541,12 @@ EMAILS_CONFIG = [
490
541
  author=SEAN_BANNON,
491
542
  attribution_reason="From protonmail, Bannon wrote 'just sent from my protonmail' in 027067",
492
543
  ),
493
- EmailCfg(id='029003', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
494
- EmailCfg(id='029005', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
495
- EmailCfg(id='029007', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
496
- EmailCfg(id='029010', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
497
- EmailCfg(id='032296', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
544
+ EmailCfg(id='029003', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
545
+ EmailCfg(id='029005', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
546
+ EmailCfg(id='029007', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
547
+ EmailCfg(id='029010', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
548
+ EmailCfg(id='032296', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
549
+ EmailCfg(id='033292', author=SOON_YI_PREVIN, attribution_reason='mentions "Woody\'s movie"', is_attribution_uncertain=True),
498
550
  EmailCfg(
499
551
  id='019109',
500
552
  author=STEVEN_HOFFENBERG,
@@ -507,13 +559,15 @@ EMAILS_CONFIG = [
507
559
  attribution_reason='ends with "Respectfully, terry"',
508
560
  author=TERRY_KAFKA,
509
561
  fwded_text_after='From: Mike Cohen',
510
- recipients=cast(list[str | None], [JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS),
562
+ recipients=[JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS,
563
+ subject='Fw: The Iran Nuclear Deal',
511
564
  duplicate_ids=['028482'],
512
565
  ),
513
566
  EmailCfg(id='029992', author=TERRY_KAFKA, attribution_reason='Quoted reply'),
514
567
  EmailCfg(id='029985', author=TERRY_KAFKA, attribution_reason='Quoted reply in 029992'),
515
568
  EmailCfg(id='020666', author=TERRY_KAFKA, attribution_reason="Ends with 'Terry'"),
516
569
  EmailCfg(id='026014', author=ZUBAIR_KHAN, recipients=[JEFFREY_EPSTEIN], timestamp=parse('2016-11-04 17:46:00')),
570
+ EmailCfg(id='033021', recipients=[ANAS_ALRASHEED], attribution_reason='visible in 033022'),
517
571
  EmailCfg(id='027063', recipients=[ANTHONY_BARRETT]),
518
572
  EmailCfg(id='030764', recipients=[ARIANE_DE_ROTHSCHILD], attribution_reason='Reply'),
519
573
  EmailCfg(id='026431', recipients=[ARIANE_DE_ROTHSCHILD], attribution_reason='Reply'),
@@ -538,6 +592,7 @@ EMAILS_CONFIG = [
538
592
  EmailCfg(id='032780', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
539
593
  EmailCfg(id='029324', recipients=[JEFFREY_EPSTEIN, "Jojo Fontanilla", "Lyn Fontanilla"]), # Bad OCR (nofix)
540
594
  EmailCfg(id='013482', recipients=[JEFFREY_EPSTEIN], is_fwded_article=True), # other recipients redacted. "The view from the US: Stem cell therapy steps up a gear with firs"
595
+ EmailCfg(id='029558', recipients=[JEFFREY_EPSTEIN, KATHERINE_KEATING], attribution_reason='BCC', fwded_text_after='Creativity is central'),
541
596
  EmailCfg(id='033456', recipients=["Joel"], attribution_reason='Reply'),
542
597
  EmailCfg(id='033458', recipients=["Joel"], attribution_reason='Reply'),
543
598
  EmailCfg(id='033460', recipients=["Joel"], attribution_reason='Reply'),
@@ -566,10 +621,14 @@ EMAILS_CONFIG = [
566
621
  EmailCfg(id='033466', recipients=[LAWRANCE_VISOSKI], attribution_reason='Reply signature'),
567
622
  EmailCfg(id='022250', recipients=[LESLEY_GROFF], attribution_reason='Reply'),
568
623
  EmailCfg(id='030242', recipients=[MARIANA_IDZKOWSKA], duplicate_ids=['032048'], dupe_type='redacted'),
624
+ EmailCfg(id='033027', recipients=[MASHA_DROKOVA], attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
625
+ EmailCfg(id='033025', recipients=[MASHA_DROKOVA], attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
569
626
  EmailCfg(id='030368', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
570
627
  EmailCfg(id='030369', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
571
628
  EmailCfg(id='030371', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
572
629
  EmailCfg(id='022258', recipients=[NADIA_MARCINKO], attribution_reason='Reply header'),
630
+ EmailCfg(id='022193', recipients=[NADIA_MARCINKO], attribution_reason='reply'),
631
+ EmailCfg(id='030572', recipients=[PAULA], attribution_reason='quoted in 030482', is_attribution_uncertain=True),
573
632
  EmailCfg(id='030506', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
574
633
  EmailCfg(id='030507', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
575
634
  EmailCfg(id='030508', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
@@ -585,8 +644,9 @@ EMAILS_CONFIG = [
585
644
  EmailCfg(id='032358', actual_text=REDACTED), # Completely redacted
586
645
  EmailCfg(id='033050', actual_text='schwartman'),
587
646
  EmailCfg(id='022219', description="discussion of attempts to clean up Epstein's Google search results"),
588
- EmailCfg(id='031333', is_fwded_article=True, description='looks like a Russian disinfo article'), # Russia Says IMF Chief Jailed For Discovering All US Gold is Gone
589
- EmailCfg(id='031335', is_fwded_article=True, description='looks like a Russian disinfo article'), # DOMINQUE STRAUSS-KAHN ARRESTED, NOT BECAUSE HE RAPED A MAID, BUT BECAUSE HE HAD EVIDENCE US HAS NO GOLD IN FORT KNOX.
647
+ EmailCfg(id='028524', is_fwded_article=True, description='Zach Braff op-ed on Woody Allen in NYT'),
648
+ EmailCfg(id='031333', is_fwded_article=True, description='Fort Knox conspiracy theory, looks like a Russian disinfo article'), # Russia Says IMF Chief Jailed For Discovering All US Gold is Gone
649
+ EmailCfg(id='031335', is_fwded_article=True, description='Fort Knox conspiracy theory, looks like a Russian disinfo article'), # DOMINQUE STRAUSS-KAHN ARRESTED, NOT BECAUSE HE RAPED A MAID, BUT BECAUSE HE HAD EVIDENCE US HAS NO GOLD IN FORT KNOX.
590
650
  EmailCfg(id='023627', is_fwded_article=True, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
591
651
  EmailCfg(id='026298', is_fwded_article=True, duplicate_ids=['026499']), # Written by someone else?
592
652
  EmailCfg(id='029692', is_fwded_article=True, duplicate_ids=['029779']), # WaPo article
@@ -640,7 +700,6 @@ EMAILS_CONFIG = [
640
700
  EmailCfg(id='031472', is_fwded_article=True), # WSJ: Lawyers for Imam Wanted by Turkish authorities Fear for Their Client's Life
641
701
  EmailCfg(id='012684', is_fwded_article=True), # Trump in talks to buy socialite Kluge's Charlottesville vineyard
642
702
  EmailCfg(id='028536', is_fwded_article=True), # Palm Beach Post FBI Epstein files say he gave info. Does it explain sweetheart deal?
643
- EmailCfg(id='028524', is_fwded_article=True), # Zach Braff article on Woody Allen in NYT
644
703
  EmailCfg(id='030326', is_fwded_article=True), # NYP Congressional candidate compares Melania Trump to prostitute
645
704
  EmailCfg(id='030519', is_fwded_article=True), # Daily Mail on Prince Andrew
646
705
  EmailCfg(id='030878', is_fwded_article=True), # Steve Bannon almost appeared in Michael Moore's 'Fahrenheit 11/9'
@@ -655,6 +714,7 @@ EMAILS_CONFIG = [
655
714
  EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
656
715
 
657
716
  # Configure duplicates
717
+ EmailCfg(id='026631', duplicate_ids=['026632'], dupe_type='quoted'),
658
718
  EmailCfg(id='028768', duplicate_ids=['026563'], dupe_type='redacted'),
659
719
  EmailCfg(id='027056', duplicate_ids=['028762'], dupe_type='redacted'),
660
720
  EmailCfg(id='032248', duplicate_ids=['032246'], dupe_type='redacted'),
@@ -667,6 +727,7 @@ EMAILS_CONFIG = [
667
727
  EmailCfg(id='029841', duplicate_ids=['012711'], dupe_type='redacted'),
668
728
  EmailCfg(id='030414', duplicate_ids=['030578'], dupe_type='redacted'),
669
729
  EmailCfg(id='031135', duplicate_ids=['030634'], dupe_type='redacted'),
730
+ EmailCfg(id='030620', duplicate_ids=['023067']),
670
731
  EmailCfg(id='029835', duplicate_ids=['028968']),
671
732
  EmailCfg(id='033512', duplicate_ids=['033361']),
672
733
  EmailCfg(id='030299', duplicate_ids=['021794']),
@@ -768,7 +829,6 @@ EMAILS_CONFIG = [
768
829
  # Emails that need a little help determining how to separate the actual text from fwded text
769
830
  EmailCfg(id='013415', fwded_text_after='Darren K. Indyke'),
770
831
  EmailCfg(id='024624', fwded_text_after='On Tue, May 14'),
771
- EmailCfg(id='029558', fwded_text_after='Creativity is central'),
772
832
  EmailCfg(id='025888', fwded_text_after='Jul 24, 2015'),
773
833
  EmailCfg(id='016413', fwded_text_after='In a former warehouse'),
774
834
  EmailCfg(id='025548', fwded_text_after='Edward Jay Epstein'),
@@ -1323,8 +1383,6 @@ OTHER_FILES_LETTERS = [
1323
1383
  description=f"letter about algorithmic trading",
1324
1384
  date='2016-06-24', # date is based on Brexit reference but he could be backtesting,
1325
1385
  ),
1326
- DocCfg(id='029304', author=DONALD_TRUMP, description=f"recommendation letter for recently departed {TRUMP_ORG} lawyer {MICHAEL_J_BOCCIO}"),
1327
- DocCfg(id='029301', author=MICHAEL_J_BOCCIO, description=f"letter from former lawyer at the {TRUMP_ORG}", date='2011-08-07'),
1328
1386
  DocCfg(id='026134', description=f'letter to someone named George about investment opportunities in the Ukraine banking sector'),
1329
1387
  ]
1330
1388
 
@@ -1475,13 +1533,27 @@ OTHER_FILES_ACADEMIA = [
1475
1533
 
1476
1534
  # resumes and application letters
1477
1535
  OTHER_FILES_RESUMES = [
1536
+ DocCfg(
1537
+ id='029304',
1538
+ attached_to_email_id='029299',
1539
+ author=DONALD_TRUMP,
1540
+ description=f"recommendation letter for recently departed {TRUMP_ORG} lawyer {MICHAEL_J_BOCCIO}",
1541
+ ),
1478
1542
  DocCfg(id='022367', author='Jack J Grynberg', description=RESUME_OF, date='2014-07-01'),
1479
1543
  DocCfg(
1480
1544
  id='029302',
1545
+ attached_to_email_id='029299',
1481
1546
  author=MICHAEL_J_BOCCIO,
1482
1547
  description=f"{RESUME_OF} (former lawyer at the {TRUMP_ORG})",
1483
1548
  date='2011-08-07',
1484
1549
  ),
1550
+ DocCfg(
1551
+ id='029301',
1552
+ attached_to_email_id='029299',
1553
+ author=MICHAEL_J_BOCCIO,
1554
+ description=f"letter from former lawyer at the {TRUMP_ORG}",
1555
+ date='2011-08-07',
1556
+ ),
1485
1557
  DocCfg(id='029102', author=NERIO_ALESSANDRI, description=HBS_APPLICATION),
1486
1558
  DocCfg(id='029104', author=NERIO_ALESSANDRI, description=HBS_APPLICATION),
1487
1559
  DocCfg(id='015671', author='Robin Solomon', description=RESUME_OF, date='2015-06-02'), # She left Mount Sinai at some point in 2015,
@@ -1622,4 +1694,40 @@ REPLY_LINE_ON_NUMERIC_DATE_PATTERN = fr"On \d+/\d+/\d+[, ].*{REPLY_LINE_ENDING_P
1622
1694
  REPLY_LINE_ON_DATE_PATTERN = fr"^On (\d+ )?((Mon|Tues?|Wed(nes)?|Thu(rs)?|Fri|Sat(ur)?|Sun)(day)?|(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*)[, ].*{REPLY_LINE_ENDING_PATTERN}"
1623
1695
  REPLY_LINE_PATTERN = rf"({REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
1624
1696
  REPLY_REGEX = re.compile(REPLY_LINE_PATTERN, re.IGNORECASE | re.MULTILINE)
1625
- SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?(Sent (from|via).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)', re.M | re.I)
1697
+ SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?((Envoyé de mon|Sent (from|via)).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)', re.M | re.I)
1698
+
1699
+
1700
+ # No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
1701
+ UNINTERESTING_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + [
1702
+ 'Alan Dlugash', # CCed with Richard Kahn
1703
+ 'Alan Rogers', # Random CC
1704
+ 'Andrew Friendly', # Presumably some relation of Kelly Friendly
1705
+ 'BS Stern', # A random fwd of email we have
1706
+ 'Cheryl Kleen', # Single email from Anne Boyles, displayed under Anne Boyles
1707
+ 'Connie Zaguirre', # Random CC
1708
+ 'Dan Fleuette', # CC from sean bannon
1709
+ 'Danny Goldberg', # Random Paul Krassner emails
1710
+ GERALD_LEFCOURT, # Single CC
1711
+ GORDON_GETTY, # Random CC
1712
+ JEFF_FULLER, # Random Jean Luc Brunel CC
1713
+ 'Jojo Fontanilla', # Random CC
1714
+ 'Joseph Vinciguerra', # Random CC
1715
+ 'Larry Cohen', # Random Bill Gates CC
1716
+ 'Lyn Fontanilla', # Random CC
1717
+ 'Mark Albert', # Random CC
1718
+ 'Matthew Schafer', # Random CC
1719
+ MICHAEL_BUCHHOLTZ, # Terry Kafka CC
1720
+ 'Nancy Dahl', # covered by Lawrence Krauss (her husband)
1721
+ 'Michael Simmons', # Random CC
1722
+ 'Nancy Portland', # Lawrence Krauss CC
1723
+ 'Oliver Goodenough', # Robert Trivers CC
1724
+ 'Peter Aldhous', # Lawrence Krauss CC
1725
+ 'Players2', # Hoffenberg CC
1726
+ 'Sam Harris', # Lawrence Krauss CC
1727
+ SAMUEL_LEFF, # Random CC
1728
+ 'Sean T Lehane', # Random CC
1729
+ 'Stephen Rubin', # Random CC
1730
+ 'Tim Kane', # Random CC
1731
+ 'Travis Pangburn', # Random CC
1732
+ 'Vahe Stepanian', # Random CC
1733
+ ]
@@ -8,6 +8,7 @@ from dateutil import tz
8
8
  from typing import TypeVar
9
9
 
10
10
  from epstein_files.util.constant import names
11
+ from epstein_files.util.constant.strings import QUESTION_MARKS
11
12
  from epstein_files.util.env import args
12
13
  from epstein_files.util.logging import logger
13
14
 
@@ -37,18 +38,6 @@ def dict_sets_to_lists(d: dict[str, set]) -> dict[str, list]:
37
38
  return {k: sorted(list(v)) for k, v in d.items()}
38
39
 
39
40
 
40
- def extract_last_name(name: str) -> str:
41
- if ' ' not in name:
42
- return name
43
-
44
- names = name.split()
45
-
46
- if names[-1].startswith('Jr') and len(names[-1]) <= 3:
47
- return ' '.join(names[-2:])
48
- else:
49
- return names[-1]
50
-
51
-
52
41
  def flatten(_list: list[list[T]]) -> list[T]:
53
42
  return list(itertools.chain.from_iterable(_list))
54
43
 
@@ -62,7 +62,7 @@ class DocCfg:
62
62
 
63
63
  Attributes:
64
64
  id (str): ID of file
65
- author (str | None): Author of the document (if any)
65
+ author (Name): Author of the document (if any)
66
66
  category (str | None): Type of file
67
67
  date (str | None): If passed will be immediated parsed into the 'timestamp' field
68
68
  dupe_type (DuplicateType | None): The type of duplicate this file is or its 'duplicate_ids' are
@@ -74,7 +74,7 @@ class DocCfg:
74
74
  """
75
75
  id: str
76
76
  attached_to_email_id: str | None = None
77
- author: str | None = None
77
+ author: Name = None
78
78
  category: str | None = None
79
79
  date: str | None = None
80
80
  description: str | None = None
@@ -94,26 +94,40 @@ class DocCfg:
94
94
 
95
95
  def complete_description(self) -> str | None:
96
96
  """String that summarizes what is known about this document."""
97
+ description = ''
98
+
97
99
  if self.category and not self.description and not self.author:
98
- return self.category
100
+ if self.category == JUNK:
101
+ return None
102
+ else:
103
+ description = self.category
99
104
  elif self.category == REPUTATION:
100
- return f"{REPUTATION_MGMT}: {self.description}"
105
+ author_str = f"{self.author} " if self.author else ''
106
+ description = f"{REPUTATION_MGMT}: {author_str}{self.description}"
101
107
  elif self.category == SKYPE_LOG:
102
108
  msg = f"{self.category} of conversation with {self.author}" if self.author else self.category
103
- return f"{msg} {self.description}" if self.description else msg
109
+ description = f"{msg} {self.description}" if self.description else msg
104
110
  elif self.author and self.description:
105
111
  if self.category in [ACADEMIA, BOOK]:
106
112
  title = self.description if '"' in self.description else f'"{self.description}"'
107
- return f"{title} by {self.author}"
113
+ description = f"{title} by {self.author}"
108
114
  elif self.category == FINANCE and self.author in FINANCIAL_REPORTS_AUTHORS:
109
- return f'{self.author} report: "{self.description}"'
115
+ description = f'{self.author} report: "{self.description}"'
110
116
  elif self.category == LEGAL and 'v.' in self.author:
111
- return f"{self.author}: {self.description}"
112
- elif self.category and self.author is None and self.description is None:
113
- return self.category
117
+ description = f"{self.author}: {self.description}"
118
+
119
+ if not description:
120
+ pieces = without_falsey([self.author, self.description])
121
+
122
+ if pieces:
123
+ description = ' '.join(pieces)
124
+ else:
125
+ return None
126
+
127
+ if self.attached_to_email_id:
128
+ description += f" attached to email {self.attached_to_email_id}"
114
129
 
115
- pieces = without_falsey([self.author, self.description])
116
- return ' '.join(pieces) if pieces else None
130
+ return description
117
131
 
118
132
  def duplicate_cfgs(self) -> Generator['DocCfg', None, None]:
119
133
  """Create synthetic DocCfg objects that set the 'duplicate_of_id' field to point back to this object."""
@@ -205,12 +219,14 @@ class EmailCfg(CommunicationCfg):
205
219
  actual_text (str | None): In dire cases of broken OCR we just configure the body of the email as a string.
206
220
  fwded_text_after (str | None): If set, any text after this is a fwd of an article or similar
207
221
  is_fwded_article (bool): True if this is a newspaper article someone fwded. Used to exclude articles from word counting.
208
- recipients (list[str | None]): Who received the email
222
+ recipients (list[Name]): Who received the email
223
+ subject (str): Subject line
209
224
  """
210
225
  actual_text: str | None = None
211
226
  fwded_text_after: str | None = None
212
227
  is_fwded_article: bool = False
213
- recipients: list[str | None] = field(default_factory=list)
228
+ recipients: list[Name] = field(default_factory=list)
229
+ subject: str | None = None
214
230
 
215
231
  # This is necessary because for some dumb reason @dataclass(repr=False) doesn't cut it
216
232
  def __repr__(self) -> str:
epstein_files/util/env.py CHANGED
@@ -8,7 +8,7 @@ from rich_argparse_plus import RichHelpFormatterPlus
8
8
  from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, CHRONOLOGICAL_EMAILS_PATH, TEXT_MSGS_HTML_PATH
9
9
  from epstein_files.util.logging import env_log_level, exit_with_error, logger
10
10
 
11
- DEFAULT_WIDTH = 145
11
+ DEFAULT_WIDTH = 155
12
12
  DEFAULT_FILE = 'default_file'
13
13
  EPSTEIN_GENERATE = 'epstein_generate'
14
14
  HTML_SCRIPTS = [EPSTEIN_GENERATE, 'epstein_word_count']
@@ -38,6 +38,7 @@ output.add_argument('--all-emails', '-ae', action='store_true', help='all the em
38
38
  output.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just the interesting ones')
39
39
  parser.add_argument('--build', '-b', nargs="?", default=None, const=DEFAULT_FILE, help='write output to HTML file')
40
40
  output.add_argument('--email-timeline', action='store_true', help='print a table of all emails in chronological order')
41
+ output.add_argument('--emailers-info-png', action='store_true', help='write a .png of the emeailers info table')
41
42
  output.add_argument('--json-files', action='store_true', help='pretty print all the raw JSON data files in the collection and exit')
42
43
  output.add_argument('--json-metadata', action='store_true', help='dump JSON metadata for all files and exit')
43
44
  output.add_argument('--output-emails', '-oe', action='store_true', help='generate emails section')
@@ -66,6 +67,7 @@ debug.add_argument('--suppress-logs', '-sl', action='store_true', help='set debu
66
67
  args = parser.parse_args()
67
68
  is_html_script = parser.prog in HTML_SCRIPTS
68
69
 
70
+ args.build = args.build or args.emailers_info_png
69
71
  args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
70
72
  args.names = [None if n == 'None' else n for n in (args.names or [])]
71
73
  args.output_emails = args.output_emails or args.all_emails
@@ -33,10 +33,13 @@ def coerce_file_stem(filename_or_id: int | str) -> str:
33
33
 
34
34
 
35
35
  def extract_file_id(filename_or_id: int | str | Path) -> str:
36
+ if isinstance(filename_or_id, str):
37
+ filename_or_id = filename_or_id.removesuffix(',')
38
+
36
39
  if isinstance(filename_or_id, int) or (isinstance(filename_or_id, str) and len(filename_or_id) <= 6):
37
40
  return id_str(filename_or_id)
38
41
 
39
- file_match = FILE_ID_REGEX.match(str(filename_or_id))
42
+ file_match = FILE_ID_REGEX.match(str(filename_or_id).upper())
40
43
 
41
44
  if not file_match:
42
45
  raise RuntimeError(f"Failed to extract file ID from {filename_or_id}")