epstein-files 1.2.1__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,9 +7,13 @@ from dateutil.parser import parse
7
7
  from epstein_files.util.constant.names import *
8
8
  from epstein_files.util.constant.strings import *
9
9
  from epstein_files.util.doc_cfg import DocCfg, EmailCfg, TextCfg
10
+ from epstein_files.util.env import args
10
11
  from epstein_files.util.logging import logger
11
12
 
12
13
  FALLBACK_TIMESTAMP = parse("1/1/2051 12:01:01 AM")
14
+ MAX_CHARS_TO_PRINT = 4000
15
+ NO_TRUNCATE = -1
16
+ TRUNCATED_CHARS = int(MAX_CHARS_TO_PRINT / 3)
13
17
 
14
18
  HEADER_ABBREVIATIONS = {
15
19
  "AD": "Abu Dhabi",
@@ -19,6 +23,7 @@ HEADER_ABBREVIATIONS = {
19
23
  'bgC3': 'Bill Gates Ventures (renamed in 2018)',
20
24
  "Brock": 'Brock Pierce (crypto bro with a very sordid past)',
21
25
  "DB": "Deutsche Bank (maybe??)",
26
+ "GCC": "Gulf Cooperation Council",
22
27
  "GRAT": "Grantor Retained Annuity Trust (tax shelter)",
23
28
  'HBJ': "Sheikh Hamad bin Jassim (former Qatari prime minister)",
24
29
  'Jabor': '"an influential man in Qatar"',
@@ -39,6 +44,7 @@ HEADER_ABBREVIATIONS = {
39
44
  'MBZ': "Mohamed bin Zayed Al Nahyan (Emirates sheikh)",
40
45
  "Miro": MIROSLAV_LAJCAK,
41
46
  "Mooch": "Anthony 'The Mooch' Scaramucci (Skybridge crypto bro)",
47
+ "NPA": 'non-prosecution agreement',
42
48
  "Terje": TERJE_ROD_LARSEN,
43
49
  "VI": f"U.S. {VIRGIN_ISLANDS}",
44
50
  "Woody": "Woody Allen",
@@ -52,14 +58,14 @@ HEADER_ABBREVIATIONS = {
52
58
 
53
59
  # Emailers
54
60
  EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
55
- ALAN_DERSHOWITZ: re.compile(r'(alan.{1,7})?dershowi(lz?|tz)|AlanDersh', re.IGNORECASE),
61
+ ALAN_DERSHOWITZ: re.compile(r'(alan.{1,7})?dershowi(lz?|t?z)|AlanDersh', re.IGNORECASE),
56
62
  ALIREZA_ITTIHADIEH: re.compile(r'Alireza.[Il]ttihadieh', re.IGNORECASE),
57
63
  AMANDA_ENS: re.compile(r'ens, amanda?|Amanda.Ens', re.IGNORECASE),
58
64
  ANAS_ALRASHEED: re.compile(r'anas\s*al\s*rashee[cd]', re.IGNORECASE),
59
65
  ANIL_AMBANI: re.compile(r'Anil.Ambani', re.IGNORECASE),
60
66
  ANN_MARIE_VILLAFANA: re.compile(r'Villafana, Ann Marie|(A(\.|nn) Marie )?Villafa(c|n|ri)a', re.IGNORECASE),
61
67
  ANTHONY_SCARAMUCCI: re.compile(r"mooch|(Anthony ('The Mooch' )?)?Scaramucci", re.IGNORECASE),
62
- ARIANE_DE_ROTHSCHILD: re.compile(r'AdeR|((Ariane|Edmond) de )?Rothschild|Ariane', re.IGNORECASE),
68
+ ARIANE_DE_ROTHSCHILD: re.compile(r'AdeR|((Ariane|Edmond) (de )?)?Rothsh?ch?ild|Ariane(?!\s+Dwyer)', re.IGNORECASE),
63
69
  BARBRO_C_EHNBOM: re.compile(r'behnbom@aol.com|(Barbro\s.*)?Ehnbom', re.IGNORECASE),
64
70
  BARRY_J_COHEN: re.compile(r'barry\s*((j.?|james)\s*)?cohen?', re.IGNORECASE),
65
71
  BENNET_MOSKOWITZ: re.compile(r'Moskowitz.*Bennet|Bennet.*Moskowitz', re.IGNORECASE),
@@ -74,7 +80,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
74
80
  DAVID_HAIG: re.compile(fr'{DAVID_HAIG}|Haig, David', re.IGNORECASE),
75
81
  DAVID_STERN: re.compile(r'David Stern?', re.IGNORECASE),
76
82
  EDUARDO_ROBLES: re.compile(r'Ed(uardo)?\s*Robles', re.IGNORECASE),
77
- EDWARD_JAY_EPSTEIN: re.compile(r'Edward (Jay )?Epstein', re.IGNORECASE),
83
+ EDWARD_JAY_EPSTEIN: re.compile(r'(?<!Jeffrey )Edward (Jay )?Epstein', re.IGNORECASE),
78
84
  EHUD_BARAK: re.compile(r'(ehud|e?h)\s*barak|\behud', re.IGNORECASE),
79
85
  FAITH_KATES: re.compile(r'faith kates?', re.IGNORECASE),
80
86
  GERALD_BARTON: re.compile(r'Gerald.*Barton', re.IGNORECASE),
@@ -86,9 +92,10 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
86
92
  JABOR_Y: re.compile(r'[ji]abor\s*y?', re.IGNORECASE),
87
93
  JAMES_HILL: re.compile(r"hill, james e.|james.e.hill@abc.com", re.IGNORECASE),
88
94
  JANUSZ_BANASIAK: re.compile(r"Janu[is]z Banasiak", re.IGNORECASE),
89
- JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?', re.IGNORECASE),
95
+ JEAN_HUGUEN: re.compile(r"Jean[\s.]Huguen", re.IGNORECASE),
96
+ JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?|JeanLuc', re.IGNORECASE),
90
97
  JEFF_FULLER: re.compile(r"jeff@mc2mm.com|Jeff Fuller", re.IGNORECASE),
91
- JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeffrey E((sp|ps)tein?)?( VI Foundation)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!(Mark L.|ard Jay) )Epstein', re.IGNORECASE),
98
+ JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeff(rey)? (Edward )?E((sp|ps)tein?)?( VI Foundation)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!(Mark L.|ard Jay) )Epstein', re.IGNORECASE),
92
99
  JESSICA_CADWELL: re.compile(r'Jessica Cadwell?', re.IGNORECASE),
93
100
  JOHNNY_EL_HACHEM: re.compile(r'el hachem johnny|johnny el hachem', re.IGNORECASE),
94
101
  JOI_ITO: re.compile(r'ji@media.mit.?edu|(joichi|joi)( Ito)?', re.IGNORECASE),
@@ -104,7 +111,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
104
111
  LISA_NEW: re.compile(r'E?Lisa New?\b', re.IGNORECASE),
105
112
  MANUELA_MARTINEZ: re.compile(fr'Manuela (- Mega Partners|Martinez)', re.IGNORECASE),
106
113
  MARIANA_IDZKOWSKA: re.compile(r'Mariana [Il]d[źi]kowska?', re.IGNORECASE),
107
- MARK_EPSTEIN: re.compile(r'Mark (L\. )?Epstein', re.IGNORECASE),
114
+ MARK_EPSTEIN: re.compile(r'Mark (L\. )?(Epstein|Lloyd)', re.IGNORECASE),
108
115
  MARC_LEON: re.compile(r'Marc[.\s]+(Kensington|Leon)|Kensington2', re.IGNORECASE),
109
116
  MARTIN_NOWAK: re.compile(r'(Martin.*?)?No[vw]ak|Nowak, Martin', re.IGNORECASE),
110
117
  MARTIN_WEINBERG: re.compile(r'martin.*?weinberg', re.IGNORECASE),
@@ -113,7 +120,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
113
120
  MICHAEL_BUCHHOLTZ: re.compile(r'Michael.*Buchholtz', re.IGNORECASE),
114
121
  MICHAEL_MILLER: re.compile(r'Micha(el)? Miller|Miller, Micha(el)?', re.IGNORECASE),
115
122
  MICHAEL_SITRICK: re.compile(r'(Mi(chael|ke).{0,5})?[CS]itrick', re.IGNORECASE),
116
- MICHAEL_WOLFF: re.compile(r'Michael\s*Wol(f[ef]|i)|Wolff', re.IGNORECASE),
123
+ MICHAEL_WOLFF: re.compile(r'Michael\s*Wol(f[ef]e?|i)|Wolff', re.IGNORECASE),
117
124
  MIROSLAV_LAJCAK: re.compile(r"Miro(slav)?(\s+Laj[cč][aá]k)?"),
118
125
  MOHAMED_WAHEED_HASSAN: re.compile(r'Mohamed Waheed(\s+Hassan)?', re.IGNORECASE),
119
126
  NADIA_MARCINKO: re.compile(r"Na[dď]i?a\s+Marcinko(v[aá])?", re.IGNORECASE),
@@ -156,6 +163,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
156
163
  EMAILERS = [
157
164
  'Anne Boyles',
158
165
  AL_SECKEL,
166
+ 'Ariane Dwyer',
159
167
  AZIZA_ALAHMADI,
160
168
  BILL_GATES,
161
169
  BILL_SIEGEL,
@@ -195,10 +203,12 @@ EMAILERS = [
195
203
  'Peter Aldhous',
196
204
  'Peter Green',
197
205
  ROGER_SCHANK,
206
+ 'Roy Black',
198
207
  STEVEN_PFEIFFER,
199
208
  'Steven Victor MD',
200
209
  'Susan Edelman',
201
210
  TOM_BARRACK,
211
+ 'Vahe Stepanian',
202
212
  'Vladimir Yudashkin',
203
213
  ]
204
214
 
@@ -306,15 +316,47 @@ TEXTS_CONFIG = CONFIRMED_TEXTS_CONFIG + UNCONFIRMED_TEXTS_CONFIG
306
316
  ################################################ EMAILS ################################################
307
317
  ########################################################################################################
308
318
 
319
+ MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
309
320
  # Some emails have a lot of uninteresting CCs
310
321
  FLIGHT_IN_2012_PEOPLE: list[Name] = ['Francis Derby', JANUSZ_BANASIAK, 'Louella Rabuyo', 'Richard Barnnet']
311
322
  IRAN_DEAL_RECIPIENTS: list[Name] = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
312
- MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
323
+
324
+ TRIVERS_CCS: list[Name] = [
325
+ "Alan Rogers",
326
+ "Anna Dreber",
327
+ "Anula Jayasuriya",
328
+ "Bill Prezant",
329
+ "Bobby McCormick",
330
+ "Clive Crook",
331
+ "Dane Stangler",
332
+ "Ron Bailey",
333
+ "Ditsa Pines",
334
+ "David Darst",
335
+ "Gerry Ohrstrom",
336
+ "Paul Romer",
337
+ "John Mallen",
338
+ "Jim Halligan",
339
+ "Lee Silver",
340
+ "Monika Gruter Cheney",
341
+ "Marguerite Atkins",
342
+ "Matt Ridley",
343
+ "Mike Cagney",
344
+ "Evan Smith",
345
+ "Roger Edelen",
346
+ "Oliver Goodenough",
347
+ "Paul Zak",
348
+ "Peter J Richerson",
349
+ "Clair Brown",
350
+ "Terry Anderson",
351
+ "Tim Kane",
352
+ "Rob Hanson",
353
+ "president@usfca.edu",
354
+ ]
355
+
313
356
 
314
357
  EMAILS_CONFIG = [
315
358
  # 026294 and 026296 might also be Ittihadieh based on timing
316
359
  EmailCfg(id='032436', author=ALIREZA_ITTIHADIEH, attribution_reason='Signature'),
317
- # 032542 026078 026080 026083 026086 026090 might also be Anas based on discussion of Dubai and Kuwait
318
360
  EmailCfg(id='032543', author=ANAS_ALRASHEED, attribution_reason='Later reply 033000 has quote'),
319
361
  EmailCfg(id='026167', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
320
362
  EmailCfg(id='032571', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
@@ -353,6 +395,12 @@ EMAILS_CONFIG = [
353
395
  EmailCfg(id='032676', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
354
396
  EmailCfg(id='026237', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
355
397
  EmailCfg(id='032682', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
398
+ EmailCfg(id='032542', author=ANAS_ALRASHEED, attribution_reason='discussion of Kuwait and timing', is_attribution_uncertain=True),
399
+ EmailCfg(id='026078', author=ANAS_ALRASHEED, attribution_reason='discussion of Kuwait and timing', is_attribution_uncertain=True),
400
+ EmailCfg(id='026080', author=ANAS_ALRASHEED, attribution_reason='discussion of Kuwait and timing', is_attribution_uncertain=True),
401
+ EmailCfg(id='026083', author=ANAS_ALRASHEED, attribution_reason='discussion of Kuwait and timing', is_attribution_uncertain=True),
402
+ EmailCfg(id='026086', author=ANAS_ALRASHEED, attribution_reason='discussion of Kuwait and timing', is_attribution_uncertain=True),
403
+ EmailCfg(id='026090', author=ANAS_ALRASHEED, attribution_reason='discussion of Kuwait and timing', is_attribution_uncertain=True),
356
404
  EmailCfg(id='026064', author=ARIANE_DE_ROTHSCHILD, attribution_reason='signature'),
357
405
  EmailCfg(id='026069', author=ARIANE_DE_ROTHSCHILD, attribution_reason='signature'),
358
406
  EmailCfg(id='030741', author=ARIANE_DE_ROTHSCHILD, attribution_reason='signature'),
@@ -369,33 +417,40 @@ EMAILS_CONFIG = [
369
417
  fwded_text_after='Transcript: Phone call between President',
370
418
  ),
371
419
  EmailCfg(id='033328', author=AZIZA_ALAHMADI, attribution_reason='"Regards, Aziza" at bottom'),
372
- EmailCfg(id='026659', author=BARBRO_C_EHNBOM, attribution_reason='Reply'),
373
- EmailCfg(id='031215', author=BARBRO_C_EHNBOM, duplicate_ids=['026745'], dupe_type='redacted'), # the same except for 'your Anna!'. author must be specified because email address is redacted in 026745 so it needs the config
374
- EmailCfg(id='026764', author=BARRY_J_COHEN), # Bad OCR (nofix)
375
- EmailCfg(id='031206', author=BENNET_MOSKOWITZ, duplicate_ids=['031227']),
376
- EmailCfg(id='031442', author=CHRISTINA_GALBRAITH, duplicate_ids=['031996']),
420
+ EmailCfg(
421
+ id='031215',
422
+ author=BARBRO_C_EHNBOM,
423
+ duplicate_ids=['026745'],
424
+ dupe_type='redacted',
425
+ comment="the same except for 'your Anna!'. author must be specified because email address is redacted in 026745 so it needs the config",
426
+ ),
427
+ EmailCfg(id='031206', duplicate_ids=['031227']),
428
+ EmailCfg(id='031591', duplicate_ids=['031442', '031996']),
377
429
  EmailCfg(
378
430
  id='019446',
379
431
  author=CHRISTINA_GALBRAITH,
380
432
  attribution_reason='shows from "Christina media/PR" which fits',
381
433
  is_attribution_uncertain=True,
382
434
  ),
383
- EmailCfg(id='026625', author=DARREN_INDYKE, actual_text='Hysterical.'),
384
435
  EmailCfg(
385
436
  id='026624',
386
437
  author=DARREN_INDYKE,
387
- recipients=[JEFFREY_EPSTEIN],
388
- timestamp=parse('2016-10-01 16:40:00'),
438
+ date='2016-10-01 16:40:00',
389
439
  duplicate_ids=['031708'],
440
+ recipients=[JEFFREY_EPSTEIN],
441
+ subject='Donald Trump Rape Lawsuit Refiled With New Witness I Law News',
390
442
  ),
391
443
  EmailCfg(
392
444
  id='031278',
393
445
  actual_text='',
394
446
  author=DARREN_INDYKE,
395
- description=f"heavily redacted email, quoted replies are from {STEVEN_HOFFENBERG} about James Patterson's book",
396
- recipients=['Charles Michael'],
397
- timestamp=parse('2016-08-17 11:26:00'),
447
+ recipients=["Charles Michael"],
398
448
  attribution_reason='Quoted replies are in 019109',
449
+ date='2016-08-17 11:26:00',
450
+ description="heavily redacted email, quoted replies are from Steven Hoffenberg about James Patterson's book",
451
+ subject='FW: Privileged and Confidential - Fwd: JAMES PATTERSON NEW BOOK TELLING FEDS COVER UP OF BILLIONAIRE JEFF EPSTEIN CHILD RAPES RELEASE DATE OCT 10 2016 STEVEN HOFFENBERG IS ON THE BOOK WRITING TEAM !!!!',
452
+ truncate_to=2500,
453
+ comment='Hoffenberg',
399
454
  ),
400
455
  EmailCfg(id='026290', author=DAVID_SCHOEN, attribution_reason='Signature'),
401
456
  EmailCfg(id='031339', author=DAVID_SCHOEN, attribution_reason='Signature'),
@@ -403,43 +458,44 @@ EMAILS_CONFIG = [
403
458
  EmailCfg(id='031560', author=DAVID_SCHOEN, attribution_reason='Signature'),
404
459
  EmailCfg(id='026287', author=DAVID_SCHOEN, attribution_reason='Signature'),
405
460
  EmailCfg(id='033419', author=DAVID_SCHOEN, attribution_reason='Signature'),
406
- EmailCfg(id='031460', author=EDWARD_JAY_EPSTEIN, attribution_reason=f"quoted reply has edwardjayepstein.com", is_fwded_article=True),
461
+ EmailCfg(id='031460', author=EDWARD_JAY_EPSTEIN, attribution_reason='quoted reply has edwardjayepstein.com', is_fwded_article=True),
462
+ EmailCfg(id='031607', is_fwded_article=True, comment='Epstein reply to Edward Jay Epstein'),
407
463
  EmailCfg(
408
464
  id='030475',
409
465
  author=FAITH_KATES,
410
- attribution_reason=f'{NEXT_MANAGEMENT} legal signature',
466
+ attribution_reason='Next Management LLC legal signature',
411
467
  duplicate_ids=['030575'],
412
- dupe_type='redacted'
468
+ dupe_type='redacted',
413
469
  ),
414
- EmailCfg(id='026547', author=GERALD_BARTON, recipients=[JEFFREY_EPSTEIN]), # Bad OCR # TODO: email header is really jacked up
470
+ EmailCfg(id='026547', author=GERALD_BARTON, recipients=[JEFFREY_EPSTEIN]),
415
471
  EmailCfg(id='029969', author=GWENDOLYN_BECK, attribution_reason='signature "Longevity & Successful Aging"'),
416
472
  EmailCfg(id='029968', author=GWENDOLYN_BECK, attribution_reason='signature "beckresearchlabs.com"', duplicate_ids=['031120']),
417
- EmailCfg(id='029970', author=GWENDOLYN_BECK, attribution_reason='signed "Longevity & Successful Agin"'),
418
- EmailCfg(id='029960', author=GWENDOLYN_BECK, attribution_reason='signature "Beck Center for Longevity & Aging"'),
473
+ EmailCfg(id='029970', author=GWENDOLYN_BECK, attribution_reason='signed "Longevity & Successful Aging"', truncate_to=400),
474
+ EmailCfg(id='029960', author=GWENDOLYN_BECK, attribution_reason='signature "Beck Center for Longevity & Aging"', truncate_to=498),
419
475
  EmailCfg(id='029959', author=GWENDOLYN_BECK, attribution_reason='signature "Beck Center for Longevity & Aging"'),
420
- EmailCfg(id='033360', author=HENRY_HOLT, attribution_reason='in signature'), # Henry Holt is a company not a person
476
+ EmailCfg(id='033360', author=HENRY_HOLT, attribution_reason='in signature'),
421
477
  EmailCfg(id='033384', author=JACK_GOLDBERGER, attribution_reason='Might be Paul Prosperi?', is_attribution_uncertain=True),
422
478
  EmailCfg(id='026024', author=JEAN_HUGUEN, attribution_reason='Signature'),
423
479
  EmailCfg(id='021823', author=JEAN_LUC_BRUNEL, attribution_reason='Reply'),
424
- EmailCfg(id='022949', author=JEFFREY_EPSTEIN), # Bad OCR (nofix)
425
- EmailCfg(id='031624', author=JEFFREY_EPSTEIN), # Bad OCR (nofix)
426
- EmailCfg(id='031996', author=JEFFREY_EPSTEIN, recipients=[CHRISTINA_GALBRAITH], attribution_reason='bounced', duplicate_ids=['031442']),
427
- EmailCfg(id='018726', author=JEFFREY_EPSTEIN, timestamp=parse('2018-06-08 08:36:00')), # nofix
428
- EmailCfg(id='032283', author=JEFFREY_EPSTEIN, timestamp=parse('2016-09-14 08:04:00')), # nofix
429
- EmailCfg(id='026943', author=JEFFREY_EPSTEIN, timestamp=parse('2019-05-22 05:47:00')), # nofix
480
+ EmailCfg(id='031624', author=JEFFREY_EPSTEIN),
481
+ EmailCfg(id='018726', author=JEFFREY_EPSTEIN, date='2018-06-08 08:36:00'),
482
+ EmailCfg(id='032283', author=JEFFREY_EPSTEIN, date='2016-09-14 08:04:00'),
483
+ EmailCfg(id='026943', author=JEFFREY_EPSTEIN, date='2019-05-22 05:47:00'),
430
484
  EmailCfg(
431
485
  id='023208',
432
486
  author=JEFFREY_EPSTEIN,
433
- description=f"very long email chain about Leon Black's finances and things like Gratitude America",
434
- fwded_text_after='Date: Tue, Oct 27',
435
487
  recipients=[BRAD_WECHSLER, MELANIE_SPINELLA],
488
+ description="very long email chain about Leon Black's finances and things like Gratitude America",
436
489
  duplicate_ids=['023291'],
490
+ fwded_text_after='Date: Tue, Oct 27',
491
+ truncate_to=NO_TRUNCATE,
492
+ comment="Long discussion about leon black's finances",
437
493
  ),
438
494
  EmailCfg(
439
495
  id='032214',
440
496
  author=JEFFREY_EPSTEIN,
441
- actual_text='Agreed',
442
497
  recipients=[MIROSLAV_LAJCAK],
498
+ actual_text='Agreed',
443
499
  attribution_reason='Quoted reply has signature',
444
500
  ),
445
501
  EmailCfg(id='029582', author=JEFFREY_EPSTEIN, recipients=[RENATA_BOLOTOVA], attribution_reason=BOLOTOVA_REASON),
@@ -448,15 +504,24 @@ EMAILS_CONFIG = [
448
504
  EmailCfg(id='031826', author=JEFFREY_EPSTEIN, actual_text='I have'),
449
505
  EmailCfg(id='030768', author=JEFFREY_EPSTEIN, actual_text='ok'),
450
506
  EmailCfg(id='022938', author=JEFFREY_EPSTEIN, actual_text='what do you suggest?'),
451
- EmailCfg(id='031791', author=JESSICA_CADWELL, attribution_reason='signature'),
452
- EmailCfg(id='028851', author=JOI_ITO, recipients=[JEFFREY_EPSTEIN], timestamp=parse('2014-04-27 06:00:00')),
453
507
  EmailCfg(
454
- id='028849',
455
- attribution_reason='Conversation with Joi Ito',
456
- author=JOI_ITO,
457
- description=f"{JOI_ITO} reaching out to Epstein for an immediate phone call after news about illicit Russian money",
458
- recipients=[JEFFREY_EPSTEIN],
459
- timestamp=parse('2014-04-27 07:41:00'), # Filled in from 028847
508
+ id='031791',
509
+ author=JESSICA_CADWELL,
510
+ attribution_reason='signature',
511
+ truncate_to=NO_TRUNCATE,
512
+ comment='First email in Jessica Cadwell chain about service of legal documents',
513
+ ),
514
+ EmailCfg(
515
+ id='028850',
516
+ description='Joi Ito reaching out to Epstein for an immediate phone call after news about illicit Russian money',
517
+ duplicate_ids=['028851'],
518
+ dupe_type='quoted',
519
+ ),
520
+ EmailCfg(
521
+ id='028848',
522
+ description='Joi Ito reaching out to Epstein for an immediate phone call after news about illicit Russian money',
523
+ duplicate_ids=['028849'],
524
+ dupe_type='quoted',
460
525
  ),
461
526
  EmailCfg(id='028507', author=JONATHAN_FARKAS, attribution_reason='reply signed "best Jonathan"'),
462
527
  EmailCfg(id='033282', author=JONATHAN_FARKAS, attribution_reason='reply signed "thanks Jonathan"', duplicate_ids=['033484']),
@@ -468,9 +533,9 @@ EMAILS_CONFIG = [
468
533
  EmailCfg(id='032386', author=KATHRYN_RUEMMLER, attribution_reason=KATHY_REASON, is_attribution_uncertain=True),
469
534
  EmailCfg(id='032727', author=KATHRYN_RUEMMLER, attribution_reason=KATHY_REASON, is_attribution_uncertain=True),
470
535
  EmailCfg(id='030478', author=LANDON_THOMAS),
471
- EmailCfg(id='029013', author=LARRY_SUMMERS, recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
536
+ EmailCfg(id='029013', author=LARRY_SUMMERS, recipients=[JEFFREY_EPSTEIN]),
472
537
  EmailCfg(id='029196', author=LAWRENCE_KRAUSS, recipients=[JEFFREY_EPSTEIN], actual_text='Talk in 40?'),
473
- EmailCfg(id='033593', author=LAWRANCE_VISOSKI, attribution_reason='Signature'),
538
+ EmailCfg(id='033593', author=LAWRANCE_VISOSKI, attribution_reason='Signature', truncate_to=NO_TRUNCATE, comment='visoski email about planes'),
474
539
  EmailCfg(id='033370', author=LAWRANCE_VISOSKI, attribution_reason=LARRY_REASON),
475
540
  EmailCfg(id='033495', author=LAWRANCE_VISOSKI, attribution_reason=LARRY_REASON),
476
541
  EmailCfg(id='033487', author=LAWRANCE_VISOSKI, recipients=[JEFFREY_EPSTEIN]),
@@ -494,29 +559,44 @@ EMAILS_CONFIG = [
494
559
  EmailCfg(id='032606', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
495
560
  EmailCfg(id='032607', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
496
561
  EmailCfg(id='032609', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
497
- EmailCfg(id='032604', author=MASHA_DROKOVA, attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
498
- EmailCfg(id='032581', author=MASHA_DROKOVA, attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
562
+ EmailCfg(
563
+ id='032604',
564
+ author=MASHA_DROKOVA,
565
+ attribution_reason='timing, subject (interviews/articles), and sequential ID',
566
+ is_attribution_uncertain=True,
567
+ ),
568
+ EmailCfg(
569
+ id='032581',
570
+ author=MASHA_DROKOVA,
571
+ attribution_reason='timing, subject (interviews/articles), and sequential ID',
572
+ is_attribution_uncertain=True,
573
+ ),
499
574
  EmailCfg(id='030235', author=MELANIE_WALKER, attribution_reason='In fwd'),
500
575
  EmailCfg(id='032343', author=MELANIE_WALKER, attribution_reason='Name seen in later reply 032346'),
501
576
  EmailCfg(id='032212', author=MIROSLAV_LAJCAK, attribution_reason='signature'),
502
- EmailCfg(id='021814', author=NADIA_MARCINKO, attribution_reason='reply'), #, actual_text="I'm a pilot...I prefer sex slave to copilot ;)"),
503
- EmailCfg(id='021808', author=NADIA_MARCINKO, attribution_reason='reply'),
504
- EmailCfg(id='022190', author=NADIA_MARCINKO, attribution_reason='reply'),
577
+ EmailCfg(id='021814', author=NADIA_MARCINKO, attribution_reason='reply'),
578
+ EmailCfg(id='021808', author=NADIA_MARCINKO, attribution_reason='reply', truncate_to=NO_TRUNCATE),
505
579
  EmailCfg(id='021818', author=NADIA_MARCINKO, attribution_reason='reply'),
506
- EmailCfg(id='022197', author=NADIA_MARCINKO, attribution_reason='reply'),
507
580
  EmailCfg(id='022214', author=NADIA_MARCINKO, attribution_reason='Reply header'),
508
581
  EmailCfg(id='021811', author=NADIA_MARCINKO, attribution_reason='Signature and email address in the message'),
509
582
  EmailCfg(id='028487', author=NORMAN_D_RAU, attribution_reason='Fwded from "to" address', duplicate_ids=['026612']),
510
583
  EmailCfg(
511
584
  id='024923',
512
- author=PAUL_KRASSNER,
513
- recipients=['George Krassner', 'Nick Kazan', 'Mrisman02', 'Rebecca Risman', 'Linda W. Grossman'],
514
- duplicate_ids=['031973']
585
+ recipients=["George Krassner", "Nick Kazan", "Mrisman02", "Rebecca Risman", "Linda W. Grossman"],
586
+ duplicate_ids=['031973'],
587
+ comment='krassner',
515
588
  ),
516
- EmailCfg(id='032457', author=PAUL_KRASSNER), # Bad OCR (nofix)
589
+ EmailCfg(id='032457', author=PAUL_KRASSNER, recipients=[JEFFREY_EPSTEIN, "Nancy Cain"]),
517
590
  EmailCfg(id='029981', author=PAULA, attribution_reason='Name in reply + opera reference (Fisher now works in opera)'),
518
591
  EmailCfg(id='030482', author=PAULA, attribution_reason=PAULA_REASON),
519
592
  EmailCfg(id='033383', author=PAUL_PROSPERI, attribution_reason='Reply'),
593
+ EmailCfg(
594
+ id='012684',
595
+ author=PAUL_PROSPERI,
596
+ attribution_reason='one week after Epstein asked Prosperi for info on Kluge auction',
597
+ is_attribution_uncertain=True,
598
+ is_fwded_article=True,
599
+ ),
520
600
  EmailCfg(
521
601
  id='033561',
522
602
  author=PAUL_PROSPERI,
@@ -526,8 +606,16 @@ EMAILS_CONFIG = [
526
606
  EmailCfg(id='031694', author=PEGGY_SIEGAL, attribution_reason='quoted', is_attribution_uncertain=True),
527
607
  EmailCfg(id='032219', author=PEGGY_SIEGAL, attribution_reason='Signed "Peggy"'),
528
608
  EmailCfg(id='029020', author=RENATA_BOLOTOVA, attribution_reason='Signature'),
529
- EmailCfg(id='029605', author=RENATA_BOLOTOVA, attribution_reason=BOLOTOVA_REASON),
530
- EmailCfg(id='029606', author=RENATA_BOLOTOVA, attribution_reason=BOLOTOVA_REASON),
609
+ EmailCfg(
610
+ id='029605',
611
+ author=RENATA_BOLOTOVA,
612
+ attribution_reason=BOLOTOVA_REASON,
613
+ ),
614
+ EmailCfg(
615
+ id='029606',
616
+ author=RENATA_BOLOTOVA,
617
+ attribution_reason=BOLOTOVA_REASON,
618
+ ),
531
619
  EmailCfg(id='029604', author=RENATA_BOLOTOVA, attribution_reason='Continued in 239606 etc'),
532
620
  EmailCfg(
533
621
  id='033584',
@@ -536,66 +624,77 @@ EMAILS_CONFIG = [
536
624
  attribution_reason='Refs paper by Trivers',
537
625
  duplicate_ids=['033169'],
538
626
  ),
539
- EmailCfg(
540
- id='026320',
541
- author=SEAN_BANNON,
542
- attribution_reason="From protonmail, Bannon wrote 'just sent from my protonmail' in 027067",
543
- ),
544
- EmailCfg(id='029003', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
545
- EmailCfg(id='029005', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
546
- EmailCfg(id='029007', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
547
- EmailCfg(id='029010', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
548
- EmailCfg(id='032296', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
549
- EmailCfg(id='033292', author=SOON_YI_PREVIN, attribution_reason='mentions "Woody\'s movie"', is_attribution_uncertain=True),
627
+ EmailCfg(id='026320', author=SEAN_BANNON, attribution_reason="From protonmail, Bannon wrote 'just sent from my protonmail' in 027067"),
628
+ EmailCfg(id='029003', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
629
+ EmailCfg(id='029005', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
630
+ EmailCfg(id='029007', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
631
+ EmailCfg(id='029010', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
632
+ EmailCfg(id='032296', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
633
+ EmailCfg(id='033292', author=SOON_YI_PREVIN, attribution_reason="mentions \"Woody's movie\"", is_attribution_uncertain=True),
550
634
  EmailCfg(
551
635
  id='019109',
552
636
  author=STEVEN_HOFFENBERG,
553
637
  recipients=["Players2"],
554
- timestamp=parse('2016-08-11 09:36:01'),
555
- attribution_reason=f"Actually a fwd by Charles Michael but {STEVEN_HOFFENBERG} email more interesting",
638
+ description='looks like a memo from Charles Michael containing copy/paste of email contents?',
639
+ date='2016-08-11 09:36:01',
640
+ subject='FW: Privileged and Confidential - Fwd: JAMES PATTERSON NEW BOOK TELLING FEDS COVER UP OF BILLIONAIRE JEFF EPSTEIN CHILD RAPES RELEASE DATE OCT 10 2016 STEVEN HOFFENBERG IS ON THE BOOK WRITING TEAM !!!!',
556
641
  ),
557
642
  EmailCfg(
558
643
  id='026620',
559
- attribution_reason='ends with "Respectfully, terry"',
560
644
  author=TERRY_KAFKA,
561
- fwded_text_after='From: Mike Cohen',
562
645
  recipients=[JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS,
563
- subject='Fw: The Iran Nuclear Deal',
646
+ attribution_reason='ends with "Respectfully, terry"',
564
647
  duplicate_ids=['028482'],
648
+ fwded_text_after='From: Mike Cohen',
649
+ subject='Fw: The Iran Nuclear Deal',
565
650
  ),
566
651
  EmailCfg(id='029992', author=TERRY_KAFKA, attribution_reason='Quoted reply'),
567
652
  EmailCfg(id='029985', author=TERRY_KAFKA, attribution_reason='Quoted reply in 029992'),
568
653
  EmailCfg(id='020666', author=TERRY_KAFKA, attribution_reason="Ends with 'Terry'"),
569
- EmailCfg(id='026014', author=ZUBAIR_KHAN, recipients=[JEFFREY_EPSTEIN], timestamp=parse('2016-11-04 17:46:00')),
570
654
  EmailCfg(id='033021', recipients=[ANAS_ALRASHEED], attribution_reason='visible in 033022'),
571
655
  EmailCfg(id='027063', recipients=[ANTHONY_BARRETT]),
572
656
  EmailCfg(id='030764', recipients=[ARIANE_DE_ROTHSCHILD], attribution_reason='Reply'),
573
657
  EmailCfg(id='026431', recipients=[ARIANE_DE_ROTHSCHILD], attribution_reason='Reply'),
574
658
  EmailCfg(id='032876', recipients=[CECILIA_STEEN], attribution_reason='unredacted in 032267'),
575
659
  EmailCfg(id='026466', recipients=[DIANE_ZIMAN], attribution_reason='Quoted reply'),
576
- EmailCfg(id='031607', recipients=[EDWARD_JAY_EPSTEIN], attribution_reason=f"quoted reply has edwardjayepstein.com"),
660
+ EmailCfg(id='031607', recipients=[EDWARD_JAY_EPSTEIN], attribution_reason='quoted reply has edwardjayepstein.com'),
577
661
  EmailCfg(
578
662
  id='030525',
579
663
  recipients=[FAITH_KATES],
580
- attribution_reason=f'Reply in 030414 has {NEXT_MANAGEMENT} legal signature',
664
+ attribution_reason='Reply in 030414 has Next Management LLC legal signature',
581
665
  duplicate_ids=['030581'],
582
666
  ),
583
- EmailCfg(id='025329', recipients=['George Krassner', 'Nancy Cain', 'Tom', 'Marie Moneysmith', 'Steven Gaydos', 'Linda W. Grossman', 'Holly Krassner Dawson', 'Daniel Dawson', 'Danny Goldberg', 'Caryl Ratner', 'Kevin Bright', 'Michael Simmons', SAMUEL_LEFF, 'Bob Fass', 'Lynnie Tofte Fass', 'Barb Cowles', 'Lee Quarnstrom']),
584
- EmailCfg(id='033568', recipients=['George Krassner', 'Daniel Dawson', 'Danny Goldberg', 'Tom', 'Kevin Bright', 'Walli Leff', 'Michael Simmons', 'Lee Quarnstrom', 'Lanny Swerdlow', 'Larry Sloman', 'W&K', 'Harry Shearer', 'Jay Levin']),
667
+ EmailCfg(
668
+ id='025329',
669
+ recipients=["George Krassner", "Nancy Cain", "Tom", "Marie Moneysmith", "Steven Gaydos", "Linda W. Grossman", "Holly Krassner Dawson", "Daniel Dawson", "Danny Goldberg", "Caryl Ratner", "Kevin Bright", "Michael Simmons", SAMUEL_LEFF, "Bob Fass", "Lynnie Tofte Fass", "Barb Cowles", "Lee Quarnstrom"],
670
+ ),
671
+ EmailCfg(
672
+ id='033568',
673
+ recipients=["George Krassner", "Daniel Dawson", "Danny Goldberg", "Tom", "Kevin Bright", "Walli Leff", "Michael Simmons", "Lee Quarnstrom", "Lanny Swerdlow", "Larry Sloman", "W&K", "Harry Shearer", "Jay Levin"],
674
+ subject="Fwd: Daryl Cagle's Blog",
675
+ ),
585
676
  EmailCfg(id='026426', recipients=[JEAN_HUGUEN], attribution_reason='Reply'),
586
677
  EmailCfg(id='022202', recipients=[JEAN_LUC_BRUNEL], attribution_reason='Follow up / reply', duplicate_ids=['029975']),
587
- EmailCfg(id='022187', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
588
- EmailCfg(id='031489', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (unfixable)
589
- EmailCfg(id='030347', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
590
- EmailCfg(id='030367', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
591
- EmailCfg(id='033274', recipients=[JEFFREY_EPSTEIN]), # this is a note sent to self
592
- EmailCfg(id='032780', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
593
- EmailCfg(id='029324', recipients=[JEFFREY_EPSTEIN, "Jojo Fontanilla", "Lyn Fontanilla"]), # Bad OCR (nofix)
594
- EmailCfg(id='013482', recipients=[JEFFREY_EPSTEIN], is_fwded_article=True), # other recipients redacted. "The view from the US: Stem cell therapy steps up a gear with firs"
595
- EmailCfg(id='029558', recipients=[JEFFREY_EPSTEIN, KATHERINE_KEATING], attribution_reason='BCC', fwded_text_after='Creativity is central'),
678
+ EmailCfg(id='030347', recipients=[JEFFREY_EPSTEIN]),
679
+ EmailCfg(id='033274', recipients=[JEFFREY_EPSTEIN]),
680
+ EmailCfg(id='032780', recipients=[JEFFREY_EPSTEIN]),
681
+ EmailCfg(id='033386', recipients=[JEFFREY_EPSTEIN, None], duplicate_ids=['033599']),
682
+ EmailCfg(id='029324', recipients=[JEFFREY_EPSTEIN, "Jojo Fontanilla", "Lyn Fontanilla"]),
683
+ EmailCfg(id='013482', recipients=[JEFFREY_EPSTEIN], is_fwded_article=True),
684
+ EmailCfg(
685
+ id='029558',
686
+ recipients=[JEFFREY_EPSTEIN, KATHERINE_KEATING],
687
+ attribution_reason='BCC',
688
+ fwded_text_after='Creativity is central',
689
+ ),
596
690
  EmailCfg(id='033456', recipients=["Joel"], attribution_reason='Reply'),
597
691
  EmailCfg(id='033458', recipients=["Joel"], attribution_reason='Reply'),
598
692
  EmailCfg(id='033460', recipients=["Joel"], attribution_reason='Reply'),
693
+ EmailCfg(
694
+ id='025589',
695
+ recipients=[GORDON_GETTY, JEFFREY_EPSTEIN] + TRIVERS_CCS,
696
+ attribution_reason='Reply',
697
+ ),
599
698
  EmailCfg(
600
699
  id='021090',
601
700
  recipients=[JONATHAN_FARKAS],
@@ -606,15 +705,15 @@ EMAILS_CONFIG = [
606
705
  id='033073',
607
706
  recipients=[KATHRYN_RUEMMLER],
608
707
  attribution_reason='to "Kathy" about dems, sent from iPad',
609
- is_attribution_uncertain=True, # It's actually Kathy R. as the recipient that's the uncertain part
708
+ is_attribution_uncertain=True,
610
709
  ),
611
710
  EmailCfg(
612
711
  id='032939',
613
712
  recipients=[KATHRYN_RUEMMLER],
614
713
  attribution_reason='to "Kathy" about dems, sent from iPad',
615
- is_attribution_uncertain=True, # It's actually Kathy R. as the recipient that's the uncertain part
714
+ is_attribution_uncertain=True,
616
715
  ),
617
- EmailCfg(id='030522', recipients=[LANDON_THOMAS], attribution_reason='reply header', is_fwded_article=True), # Vicky Ward article
716
+ EmailCfg(id='030522', recipients=[LANDON_THOMAS], attribution_reason='reply header', is_fwded_article=True),
618
717
  EmailCfg(id='031413', recipients=[LANDON_THOMAS], attribution_reason='reply header'),
619
718
  EmailCfg(id='033591', recipients=[LAWRANCE_VISOSKI], attribution_reason='Reply signature', duplicate_ids=['033591']),
620
719
  EmailCfg(id='027097', recipients=[LAWRANCE_VISOSKI], attribution_reason='Reply signature', duplicate_ids=['028787']),
@@ -622,99 +721,159 @@ EMAILS_CONFIG = [
622
721
  EmailCfg(id='022250', recipients=[LESLEY_GROFF], attribution_reason='Reply'),
623
722
  EmailCfg(id='030242', recipients=[MARIANA_IDZKOWSKA], duplicate_ids=['032048'], dupe_type='redacted'),
624
723
  EmailCfg(id='033027', recipients=[MASHA_DROKOVA], attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
625
- EmailCfg(id='033025', recipients=[MASHA_DROKOVA], attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
724
+ EmailCfg(
725
+ id='033025',
726
+ recipients=[MASHA_DROKOVA],
727
+ attribution_reason='timing, subject (interviews/articles), and sequential ID',
728
+ is_attribution_uncertain=True,
729
+ ),
626
730
  EmailCfg(id='030368', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
627
731
  EmailCfg(id='030369', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
628
732
  EmailCfg(id='030371', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
629
733
  EmailCfg(id='022258', recipients=[NADIA_MARCINKO], attribution_reason='Reply header'),
630
734
  EmailCfg(id='022193', recipients=[NADIA_MARCINKO], attribution_reason='reply'),
735
+ EmailCfg(id='022197', recipients=[NADIA_MARCINKO], attribution_reason='reply'),
736
+ EmailCfg(id='022190', recipients=[NADIA_MARCINKO], attribution_reason='reply'),
631
737
  EmailCfg(id='030572', recipients=[PAULA], attribution_reason='quoted in 030482', is_attribution_uncertain=True),
632
- EmailCfg(id='030506', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
633
- EmailCfg(id='030507', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
634
- EmailCfg(id='030508', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
635
- EmailCfg(id='030509', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
738
+ EmailCfg(id='030506', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True,),
739
+ EmailCfg(id='030507', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True,),
740
+ EmailCfg(id='030508', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True,),
741
+ EmailCfg(id='030509', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True,),
636
742
  EmailCfg(id='030096', recipients=[PETER_MANDELSON], attribution_reason='reply header'),
637
743
  EmailCfg(id='032951', recipients=[RAAFAT_ALSABBAGH, None], attribution_reason='Redacted'),
638
744
  EmailCfg(id='029581', recipients=[RENATA_BOLOTOVA], attribution_reason=BOLOTOVA_REASON),
639
745
  EmailCfg(id='019334', recipients=[STEVE_BANNON], attribution_reason='quoted reply'),
640
746
  EmailCfg(id='021106', recipients=[STEVE_BANNON], attribution_reason='Reply'),
641
-
642
- # Misc configs
643
- EmailCfg(id='029344', actual_text='I thought of you when I read this article. Was this your idea? Alan'),
644
- EmailCfg(id='032358', actual_text=REDACTED), # Completely redacted
645
- EmailCfg(id='033050', actual_text='schwartman'),
646
- EmailCfg(id='022219', description="discussion of attempts to clean up Epstein's Google search results"),
647
- EmailCfg(id='028524', is_fwded_article=True, description='Zach Braff op-ed on Woody Allen in NYT'),
648
- EmailCfg(id='031333', is_fwded_article=True, description='Fort Knox conspiracy theory, looks like a Russian disinfo article'), # Russia Says IMF Chief Jailed For Discovering All US Gold is Gone
649
- EmailCfg(id='031335', is_fwded_article=True, description='Fort Knox conspiracy theory, looks like a Russian disinfo article'), # DOMINQUE STRAUSS-KAHN ARRESTED, NOT BECAUSE HE RAPED A MAID, BUT BECAUSE HE HAD EVIDENCE US HAS NO GOLD IN FORT KNOX.
650
- EmailCfg(id='023627', is_fwded_article=True, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
651
- EmailCfg(id='026298', is_fwded_article=True, duplicate_ids=['026499']), # Written by someone else?
652
- EmailCfg(id='029692', is_fwded_article=True, duplicate_ids=['029779']), # WaPo article
653
- EmailCfg(id='022344', is_fwded_article=True, duplicate_ids=['028529']), # Bill Gates is most admired from Nikolic
654
- EmailCfg(id='018197', is_fwded_article=True, duplicate_ids=['028648']), # Ray Takeyh article fwd
655
- EmailCfg(id='028728', is_fwded_article=True, duplicate_ids=['027102']), # WSJ forward to Larry Summers
656
- EmailCfg(id='028781', is_fwded_article=True, duplicate_ids=['013460']), # Atlantic on Jim Yong Kim, Obama's World Bank Pick
657
- EmailCfg(id='025041', is_fwded_article=True, duplicate_ids=['028675']), # Obama agenda
658
- EmailCfg(id='031136', is_fwded_article=True, duplicate_ids=['028791']), # 'Smart Money is Fleeing US Stocks'
659
- EmailCfg(id='031779', is_fwded_article=True, duplicate_ids=['026938']), # Sarah Silverman on AI
660
- EmailCfg(id='029849', is_fwded_article=True, duplicate_ids=['033482']), # Fareed Zakaria: Trump sells America short),
661
- EmailCfg(id='032023', is_fwded_article=True, duplicate_ids=['032012']), # American-Israeli Cooperative Enterprise Newsletter
662
- EmailCfg(id='021758', is_fwded_article=True, duplicate_ids=['030616']), # Radar Online article about Epstein's early prison release
663
- EmailCfg(id='033297', is_fwded_article=True, duplicate_ids=['033586']), # Sultan Sulayem fwding article about Trump and Russia
664
- EmailCfg(id='030983', is_fwded_article=True), # Power Line blog Alex Acosta and Jeffrey Epstein Plea Deal Analysis
665
- EmailCfg(id='031774', is_fwded_article=True), # Krassner fwd of Palmer Report article
666
- EmailCfg(id='033345', is_fwded_article=True), # Krassner fwd of Palmer Report article
667
- EmailCfg(id='029903', is_fwded_article=True), # Krassner fwd of Ann Coulter article about Epstein
668
- EmailCfg(id='030266', is_fwded_article=True), # Krassner fwd of article about Dershowitz
669
- EmailCfg(id='030868', is_fwded_article=True), # 'He doesn't like this sh*t': Trump reportedly hates his job and his staff after 1 month
670
- EmailCfg(id='026755', is_fwded_article=True), # HuffPo
671
- EmailCfg(id='016218', is_fwded_article=True), # AT&T confirms it paid Trump lawyer Cohen for insights on Trump
672
- EmailCfg(id='030528', is_fwded_article=True), # Vicky Ward article
673
- EmailCfg(id='030460', is_fwded_article=True), # Vicky Ward article
674
- EmailCfg(id='028508', is_fwded_article=True), # nanosatellites article
675
- EmailCfg(id='019845', is_fwded_article=True), # Pro Publica article on Preet Bharara
676
- EmailCfg(id='029021', is_fwded_article=True), # article about bannon sent by Alain Forget
677
- EmailCfg(id='031688', is_fwded_article=True), # Bill Siegel fwd of email about hamas
678
- EmailCfg(id='026551', is_fwded_article=True), # Sultan bin Sulayem "Ayatollah between the sheets"
679
- EmailCfg(id='031768', is_fwded_article=True), # Sultan bin Sulayem 'Horseface'
680
- EmailCfg(id='031569', is_fwded_article=True), # Article by Kathryn Alexeeff fwded to Peter Thiel
681
- EmailCfg(id='029689', is_fwded_article=True), # Tunisia article to Larry Summers
682
- EmailCfg(id='014525', is_fwded_article=True), # Really more of a mailing list from Paul Morris?
683
- EmailCfg(id='024384', is_fwded_article=True), # Interview with Bill Siegal re: Islam
684
- EmailCfg(id='030200', is_fwded_article=True), # Lawfare indicting a president
685
- EmailCfg(id='029509', is_fwded_article=True), # Deepak Chopra LSD, Quantum Healing
686
- EmailCfg(id='026778', is_fwded_article=True), # tax alert
687
- EmailCfg(id='023001', is_fwded_article=True), # Miami Herald article timeline of the sex abuse case
688
- EmailCfg(id='013405', is_fwded_article=True), # Articles about epstein case
689
- EmailCfg(id='021740', is_fwded_article=True), # Miami Herald article about Epstein prosecutor
690
- EmailCfg(id='023126', is_fwded_article=True), # Miami Herald on Alex Acosta
691
- EmailCfg(id='029625', is_fwded_article=True), # Conchita Sarnoff Daily Beast Articles - Epstein Sex Trafficking Investigation and Settlement
692
- EmailCfg(id='029505', is_fwded_article=True), # Foreign Policy Middle Eastern Monarchs Look at the Trump
693
- EmailCfg(id='029859', is_fwded_article=True), # Palm Beach Post: Epstein paid three women $5.5 million to end lawsuits
694
- EmailCfg(id='031988', is_fwded_article=True), # NYT review of Inside Job
695
- EmailCfg(id='029901', is_fwded_article=True), # THE EDGE question
696
- EmailCfg(id='031399', is_fwded_article=True), # Miami U.S. Attorney's Office recuses itself from Jeffrey Epstein case
697
- EmailCfg(id='031705', is_fwded_article=True), # Thomas Friedman why not in vegas?
698
- EmailCfg(id='016801', is_fwded_article=True), # Capital Market Outlook
699
- EmailCfg(id='023564', is_fwded_article=True), # BBG ;Leon Black's Tax-Overhaul Dilemma Could Alter Wall Street Model
700
- EmailCfg(id='025231', is_fwded_article=True), # Newsmax: Laffer, Laffer: Obama Must Use Reaganomics to Save Economy The only way President Barack Obama can solve
701
- EmailCfg(id='031472', is_fwded_article=True), # WSJ: Lawyers for Imam Wanted by Turkish authorities Fear for Their Client's Life
702
- EmailCfg(id='012684', is_fwded_article=True), # Trump in talks to buy socialite Kluge's Charlottesville vineyard
703
- EmailCfg(id='028536', is_fwded_article=True), # Palm Beach Post FBI Epstein files say he gave info. Does it explain sweetheart deal?
704
- EmailCfg(id='030326', is_fwded_article=True), # NYP Congressional candidate compares Melania Trump to prostitute
705
- EmailCfg(id='030519', is_fwded_article=True), # Daily Mail on Prince Andrew
706
- EmailCfg(id='030878', is_fwded_article=True), # Steve Bannon almost appeared in Michael Moore's 'Fahrenheit 11/9'
707
- EmailCfg(id='024300', is_fwded_article=True), # Bookstore owner calls police after customer confronted Steve Bannon
708
- EmailCfg(id='026924', is_fwded_article=True), # The Onion
709
- EmailCfg(id='033311', is_fwded_article=True), # 2016 election polls
710
- EmailCfg(id='026580', is_fwded_article=True), # NPR: Antigua: Land Of Sun, Sand, And Super Cheap
711
- EmailCfg(id='031340', is_fwded_article=True), # Article about Alex Jones threatening Robert Mueller
712
- EmailCfg(id='030209', is_fwded_article=True), # Atlantic Council Syria: Blackberry Diplomacy
713
- EmailCfg(id='026605', is_fwded_article=True), # Article about Ruemmler turning down attorney general job by NEDRA PICKLER
714
- EmailCfg(id='032475', timestamp=parse('2017-02-15 13:31:25')),
715
- EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
716
-
717
- # Configure duplicates
747
+ EmailCfg(id='029344', actual_text='I thought of you when I read this article. Was this your idea? Alan', is_fwded_article=True),
748
+ EmailCfg(id='032358', actual_text=REDACTED),
749
+ EmailCfg(
750
+ id='031036',
751
+ description='Barbro C. Ehnbom related donation and Swedish girls discussion',
752
+ is_interesting=True,
753
+ comment='Barbro Ehnbom talking about Swedish girl',
754
+ ),
755
+ EmailCfg(id='032946', description='discussion of obtaining a Moroccan visa for an unnamed woman', is_interesting=True),
756
+ EmailCfg(id='031320', description='discussion of routing Gratitude foundation money through Peggy Siegal', is_interesting=True),
757
+ EmailCfg(id='023627', description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT, is_fwded_article=True, is_interesting=True, truncate_to=16800),
758
+ EmailCfg(id='032671', description='connections to Fusion GPS who produced the Steele Dossier on Trump'),
759
+ EmailCfg(id='033052', description='connections to Fusion GPS who produced the Steele Dossier on Trump'),
760
+ EmailCfg(id='022219', description="discussion of attempts to clean up Epstein's Google search results", truncate_to=2404),
761
+ EmailCfg(id='031472', description="discussion of publishing an audio recording related to the coup in Turkey"),
762
+ EmailCfg(id='031333', description='Fort Knox conspiracy theory, looks like a Russian disinfo article', is_fwded_article=True),
763
+ EmailCfg(id='031335', description='Fort Knox conspiracy theory, looks like a Russian disinfo article', is_fwded_article=True),
764
+ EmailCfg(id='030648', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
765
+ EmailCfg(id='030762', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
766
+ EmailCfg(id='030649', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
767
+ EmailCfg(id='026026', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
768
+ EmailCfg(id='026030', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
769
+ EmailCfg(id='026033', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
770
+ EmailCfg(id='031011', description='jokes about Chicago corruption', duplicate_ids=['031090'], truncate_to=TRUNCATED_CHARS),
771
+ EmailCfg(id='016693', description='signed "MM"'),
772
+ EmailCfg(id='028524', description='Zach Braff op-ed on Woody Allen in NYT', is_fwded_article=True),
773
+ EmailCfg(id='026298', duplicate_ids=['026499'], is_fwded_article=True, comment='Written by someone else?'),
774
+ EmailCfg(id='029692', duplicate_ids=['029779'], is_fwded_article=True, comment='WaPo article'),
775
+ EmailCfg(id='022344', duplicate_ids=['028529'], is_fwded_article=True, comment='Bill Gates is most admired from Nikolic'),
776
+ EmailCfg(id='018197', duplicate_ids=['028648'], is_fwded_article=True, comment='Ray Takeyh article fwd'),
777
+ EmailCfg(id='028728', duplicate_ids=['027102'], is_fwded_article=True, comment='WSJ forward to Larry Summers'),
778
+ EmailCfg(id='028781', duplicate_ids=['013460'], is_fwded_article=True, comment="Atlantic on Jim Yong Kim, Obama's World Bank Pick"),
779
+ EmailCfg(id='025041', duplicate_ids=['028675'], is_fwded_article=True, comment='Obama agenda'),
780
+ EmailCfg(id='031136', duplicate_ids=['028791'], is_fwded_article=True, comment="'Smart Money is Fleeing US Stocks'"),
781
+ EmailCfg(id='031779', duplicate_ids=['026938'], is_fwded_article=True, comment='Sarah Silverman on AI'),
782
+ EmailCfg(id='029849', duplicate_ids=['033482'], is_fwded_article=True, comment='Fareed Zakaria: Trump sells America short),'),
783
+ EmailCfg(id='032023', duplicate_ids=['032012'], is_fwded_article=True, comment='American-Israeli Cooperative Enterprise Newsletter'),
784
+ EmailCfg(id='021758', duplicate_ids=['030616'], is_fwded_article=True, comment="Radar Online article about Epstein's early prison release"),
785
+ EmailCfg(id='033297', duplicate_ids=['033586'], is_fwded_article=True, comment='Sultan Sulayem article about Trump and Russia'),
786
+ EmailCfg(id='031428', duplicate_ids=['031388'], is_fwded_article=True),
787
+ EmailCfg(id='033528', duplicate_ids=['033517'], is_fwded_article=True),
788
+ EmailCfg(id='030238', duplicate_ids=['031130'], is_fwded_article=True),
789
+ EmailCfg(id='031112', duplicate_ids=['030876'], is_fwded_article=True),
790
+ EmailCfg(id='031423', duplicate_ids=['025361'], is_fwded_article=True),
791
+ EmailCfg(id='026755', is_fwded_article=True, truncate_to=TRUNCATED_CHARS, comment='Epstein self fwd'),
792
+ EmailCfg(id='026778', is_fwded_article=True, truncate_to=TRUNCATED_CHARS, comment='Kahn taxes'),
793
+ EmailCfg(id='032458', truncate_to=NO_TRUNCATE, description='discussion of acquiring pieces for Epstein\'s art collection'),
794
+ EmailCfg(id='032464', truncate_to=NO_TRUNCATE, description='redacted discussion about art advisor Etienne Binant'),
795
+ # TODO: just use a search string for truncation
796
+ EmailCfg(id='032964', truncate_to=700, description='redacted discussion about art advisor Etienne Binant'),
797
+ EmailCfg(id='032968', truncate_to=700, description='redacted discussion about art advisor Etienne Binant'),
798
+ EmailCfg(id='032467', truncate_to=700, description='redacted discussion about art advisor Etienne Binant'),
799
+ EmailCfg(id='032480', truncate_to=700, description='redacted discussion about art advisor Etienne Binant'),
800
+ EmailCfg(id='032486', truncate_to=700, description='redacted discussion about art advisor Etienne Binant'),
801
+ EmailCfg(id='032491', truncate_to=700, description='redacted discussion about art advisor Etienne Binant'),
802
+ EmailCfg(id='032975', truncate_to=700, description='redacted discussion about art advisor Etienne Binant'),
803
+ EmailCfg(id='032496', truncate_to=700, description='redacted discussion about art advisor Etienne Binant'),
804
+ EmailCfg(id='025643', is_fwded_article=True, comment='Alice Fisher stated to be in the mix of potential candidates for new FBI director.'),
805
+ EmailCfg(id='014523', is_fwded_article=True, comment='finance research'),
806
+ EmailCfg(id='014857', is_fwded_article=True, comment='finance research'),
807
+ EmailCfg(id='029458', is_fwded_article=True, comment='finance research'),
808
+ EmailCfg(id='026893', is_fwded_article=True, comment='finance research'),
809
+ EmailCfg(id='033362', is_fwded_article=True, comment='finance research'),
810
+ EmailCfg(id='030865', is_fwded_article=True, comment='Deutsche bank research'),
811
+ EmailCfg(id='021231', is_fwded_article=True, comment='11 places with worse economy than US'),
812
+ EmailCfg(id='029905', is_fwded_article=True, comment='Ann Coulter'),
813
+ EmailCfg(id='026829', is_fwded_article=True, comment='Taxes'),
814
+ EmailCfg(id='020443', is_fwded_article=True, comment='WSJ Deplorables Bannon'),
815
+ EmailCfg(id='030372', is_fwded_article=True, comment='Bannon China Iran'),
816
+ EmailCfg(id='030983', is_fwded_article=True, comment='Power Line blog Alex Acosta and Jeffrey Epstein Plea Deal Analysis'),
817
+ EmailCfg(id='031774', is_fwded_article=True, comment='Krassner fwd of Palmer Report article'),
818
+ EmailCfg(id='033345', is_fwded_article=True, comment='Krassner fwd of Palmer Report article'),
819
+ EmailCfg(id='029903', is_fwded_article=True, comment='Krassner fwd of Ann Coulter article about Epstein'),
820
+ EmailCfg(id='030266', is_fwded_article=True, comment='Krassner fwd of article about Dershowitz'),
821
+ EmailCfg(id='030868', is_fwded_article=True, comment="'He doesn't like this sh*t': Trump reportedly hates his job and his staff after 1 month"),
822
+ EmailCfg(id='016218', is_fwded_article=True, comment='AT&T confirms it paid Trump lawyer Cohen for insights on Trump'),
823
+ EmailCfg(id='030528', is_fwded_article=True, comment='Vicky Ward article'),
824
+ EmailCfg(id='030460', is_fwded_article=True, comment='Vicky Ward article'),
825
+ EmailCfg(id='028508', is_fwded_article=True, comment='nanosatellites article'),
826
+ EmailCfg(id='019845', is_fwded_article=True, comment='Pro Publica article on Preet Bharara'),
827
+ EmailCfg(id='029021', is_fwded_article=True, comment='article about bannon sent by Alain Forget'),
828
+ EmailCfg(id='031688', is_fwded_article=True, comment='Bill Siegel fwd of email about hamas'),
829
+ EmailCfg(id='026551', is_fwded_article=True, comment='Sultan bin Sulayem "Ayatollah between the sheets"'),
830
+ EmailCfg(id='031768', is_fwded_article=True, comment="Sultan bin Sulayem 'Horseface'"),
831
+ EmailCfg(id='031569', is_fwded_article=True, comment='Article by Kathryn Alexeeff fwded to Peter Thiel'),
832
+ EmailCfg(id='029689', is_fwded_article=True, comment='Tunisia article to Larry Summers'),
833
+ EmailCfg(id='014525', is_fwded_article=True, comment='Really more of a mailing list from Paul Morris?'),
834
+ EmailCfg(id='024384', is_fwded_article=True, comment='Interview with Bill Siegal re: Islam'),
835
+ EmailCfg(id='030200', is_fwded_article=True, comment='Lawfare indicting a president'),
836
+ EmailCfg(id='029509', is_fwded_article=True, comment='Deepak Chopra LSD, Quantum Healing'),
837
+ EmailCfg(id='023001', is_fwded_article=True, comment='Miami Herald article timeline of the sex abuse case'),
838
+ EmailCfg(id='013405', is_fwded_article=True, comment='Articles about epstein case'),
839
+ EmailCfg(id='021740', is_fwded_article=True, comment='Miami Herald article about Epstein prosecutor'),
840
+ EmailCfg(id='023126', is_fwded_article=True, comment='Miami Herald on Alex Acosta'),
841
+ EmailCfg(id='029625', is_fwded_article=True, comment='Conchita Sarnoff Daily Beast Articles - Epstein Sex Trafficking Investigation and Settlement'),
842
+ EmailCfg(id='029505', is_fwded_article=True, comment='Foreign Policy Middle Eastern Monarchs Look at the Trump'),
843
+ EmailCfg(id='029859', is_fwded_article=True, comment='Palm Beach Post: Epstein paid three women $5.5 million to end lawsuits'),
844
+ EmailCfg(id='031988', is_fwded_article=True, comment='NYT review of Inside Job'),
845
+ EmailCfg(id='029901', is_fwded_article=True, comment='THE EDGE question'),
846
+ EmailCfg(id='031399', is_fwded_article=True, comment="Miami U.S. Attorney's Office recuses itself from Jeffrey Epstein case"),
847
+ EmailCfg(id='031705', is_fwded_article=True, comment='Thomas Friedman why not in vegas?'),
848
+ EmailCfg(id='016801', is_fwded_article=True, comment='Capital Market Outlook'),
849
+ EmailCfg(id='023564', is_fwded_article=True, comment="BBG ;Leon Black's Tax-Overhaul Dilemma Could Alter Wall Street Model"),
850
+ EmailCfg(id='025231', is_fwded_article=True, comment='Newsmax: Laffer, Laffer: Obama Must Use Reaganomics to Save Economy The only way President Barack Obama can solve'),
851
+ EmailCfg(id='028536', is_fwded_article=True, comment='Palm Beach Post FBI Epstein files say he gave info. Does it explain sweetheart deal?'),
852
+ EmailCfg(id='030326', is_fwded_article=True, comment='NYP Congressional candidate compares Melania Trump to prostitute'),
853
+ EmailCfg(id='030519', is_fwded_article=True, comment='Daily Mail on Prince Andrew'),
854
+ EmailCfg(id='030878', is_fwded_article=True, comment="Steve Bannon almost appeared in Michael Moore's 'Fahrenheit 11/9'"),
855
+ EmailCfg(id='024300', is_fwded_article=True, comment='Bookstore owner calls police after customer confronted Steve Bannon'),
856
+ EmailCfg(id='026924', is_fwded_article=True, comment='The Onion'),
857
+ EmailCfg(id='022624', is_fwded_article=True, comment='Disgusting: Clinton Snared In Pedophile Ring'),
858
+ EmailCfg(id='022673', is_fwded_article=True, comment='Epstein + Clinton'),
859
+ EmailCfg(id='033301', is_fwded_article=True, comment='description of Trump jet'),
860
+ EmailCfg(id='021729', is_fwded_article=True, comment='Acosta rebuke'),
861
+ EmailCfg(id='023635', is_fwded_article=True, comment='Landon Thomas finance related'),
862
+ EmailCfg(id='026637', is_fwded_article=True, comment='Landon Thomas finance related'),
863
+ EmailCfg(id='021764', is_fwded_article=True, comment='He was 50 and they were girls'),
864
+ EmailCfg(id='033311', is_fwded_article=True, truncate_to=TRUNCATED_CHARS, comment='Kahn taxes'),
865
+ EmailCfg(id='026580', is_fwded_article=True, comment='NPR: Antigua: Land Of Sun, Sand, And Super Cheap'),
866
+ EmailCfg(id='031340', is_fwded_article=True, comment='Article about Alex Jones threatening Robert Mueller'),
867
+ EmailCfg(id='030209', is_fwded_article=True, comment='Atlantic Council Syria: Blackberry Diplomacy'),
868
+ EmailCfg(id='026605', is_fwded_article=True, comment='Article about Ruemmler turning down attorney general job by NEDRA PICKLER'),
869
+ EmailCfg(id='031990', is_fwded_article=True, comment='newsmax on ken starr'),
870
+ EmailCfg(id='029433', is_fwded_article=True, truncate_to=TRUNCATED_CHARS, comment='Kahn taxes'),
871
+ EmailCfg(id='030927', is_fwded_article=True, comment="don't talk to the FBI"),
872
+ EmailCfg(id='033329', is_fwded_article=True, comment='Chinese economists'),
873
+ EmailCfg(id='031764', is_fwded_article=True, truncate_to=3500, comment='broidy malaysia'),
874
+ EmailCfg(id='032475', date='2017-02-15 13:31:25'),
875
+ EmailCfg(id='030373', date='2018-10-03 01:49:27'),
876
+ EmailCfg(id='032325', duplicate_ids=['026014'], dupe_type='quoted', is_interesting=True, comment='Zubair'),
718
877
  EmailCfg(id='026631', duplicate_ids=['026632'], dupe_type='quoted'),
719
878
  EmailCfg(id='028768', duplicate_ids=['026563'], dupe_type='redacted'),
720
879
  EmailCfg(id='027056', duplicate_ids=['028762'], dupe_type='redacted'),
@@ -733,19 +892,16 @@ EMAILS_CONFIG = [
733
892
  EmailCfg(id='033512', duplicate_ids=['033361']),
734
893
  EmailCfg(id='030299', duplicate_ids=['021794']),
735
894
  EmailCfg(id='033575', duplicate_ids=['012898']),
736
- EmailCfg(id='031428', duplicate_ids=['031388']),
737
895
  EmailCfg(id='031980', duplicate_ids=['019409']),
738
896
  EmailCfg(id='033486', duplicate_ids=['033156']),
739
897
  EmailCfg(id='025790', duplicate_ids=['031994']),
740
898
  EmailCfg(id='028497', duplicate_ids=['026228']),
741
- EmailCfg(id='033528', duplicate_ids=['033517']),
742
899
  EmailCfg(id='019412', duplicate_ids=['028621']),
743
900
  EmailCfg(id='027053', duplicate_ids=['028765']),
744
901
  EmailCfg(id='027049', duplicate_ids=['028773']),
745
902
  EmailCfg(id='033580', duplicate_ids=['033207']),
746
903
  EmailCfg(id='028506', duplicate_ids=['025547']),
747
- EmailCfg(id='028784', duplicate_ids=['026549']),
748
- EmailCfg(id='033386', duplicate_ids=['033599']),
904
+ EmailCfg(id='028784', duplicate_ids=['026549'], is_interesting=True, comment='seminars: Money / Power'),
749
905
  EmailCfg(id='023024', duplicate_ids=['030622']),
750
906
  EmailCfg(id='030618', duplicate_ids=['023026']),
751
907
  EmailCfg(id='028780', duplicate_ids=['026834']),
@@ -756,15 +912,14 @@ EMAILS_CONFIG = [
756
912
  EmailCfg(id='030587', duplicate_ids=['030514']),
757
913
  EmailCfg(id='031089', duplicate_ids=['018084']),
758
914
  EmailCfg(id='031088', duplicate_ids=['030885']),
759
- EmailCfg(id='030238', duplicate_ids=['031130']),
760
915
  EmailCfg(id='030859', duplicate_ids=['031067']),
761
916
  EmailCfg(id='030635', duplicate_ids=['031134']),
762
- EmailCfg(id='028494', duplicate_ids=['026234']),
917
+ EmailCfg(id='028494', duplicate_ids=['026234'], truncate_to=NO_TRUNCATE, comment='Email about being in palm beach w/trump people'),
763
918
  EmailCfg(id='030311', duplicate_ids=['021790']),
764
919
  EmailCfg(id='033508', duplicate_ids=['029880']),
765
920
  EmailCfg(id='030493', duplicate_ids=['030612']),
766
921
  EmailCfg(id='032051', duplicate_ids=['031771']),
767
- EmailCfg(id='031217', duplicate_ids=['021761']),
922
+ EmailCfg(id='031217', duplicate_ids=['021761'], truncate_to=1800, comment='1st email for dersh, has long article'),
768
923
  EmailCfg(id='031346', duplicate_ids=['031426']),
769
924
  EmailCfg(id='031345', duplicate_ids=['031427']),
770
925
  EmailCfg(id='031343', duplicate_ids=['031432']),
@@ -790,12 +945,10 @@ EMAILS_CONFIG = [
790
945
  EmailCfg(id='027032', duplicate_ids=['028531']),
791
946
  EmailCfg(id='026777', duplicate_ids=['028493']),
792
947
  EmailCfg(id='029837', duplicate_ids=['029255']),
793
- EmailCfg(id='031423', duplicate_ids=['025361']),
794
948
  EmailCfg(id='029299', duplicate_ids=['033594']),
795
949
  EmailCfg(id='030904', duplicate_ids=['031069']),
796
950
  EmailCfg(id='030006', duplicate_ids=['031165']),
797
951
  EmailCfg(id='025215', duplicate_ids=['031159']),
798
- EmailCfg(id='031011', duplicate_ids=['031090']),
799
952
  EmailCfg(id='032068', duplicate_ids=['018158']),
800
953
  EmailCfg(id='031213', duplicate_ids=['031221']),
801
954
  EmailCfg(id='016595', duplicate_ids=['016690']),
@@ -811,7 +964,6 @@ EMAILS_CONFIG = [
811
964
  EmailCfg(id='028620', duplicate_ids=['027094']),
812
965
  EmailCfg(id='032456', duplicate_ids=['033579']),
813
966
  EmailCfg(id='030315', duplicate_ids=['030255']),
814
- EmailCfg(id='031112', duplicate_ids=['030876']),
815
967
  EmailCfg(id='030614', duplicate_ids=['030491']),
816
968
  EmailCfg(id='033585', duplicate_ids=['032279']),
817
969
  EmailCfg(id='031220', duplicate_ids=['031189']),
@@ -819,22 +971,21 @@ EMAILS_CONFIG = [
819
971
  EmailCfg(id='033230', duplicate_ids=['033577']),
820
972
  EmailCfg(id='032125', duplicate_ids=['023971']),
821
973
  EmailCfg(id='031230', duplicate_ids=['031203']),
822
- EmailCfg(id='028752', duplicate_ids=['026569']),
974
+ EmailCfg(id='028752', duplicate_ids=['026569'], subject='Re: Program & Attendee list'),
823
975
  EmailCfg(id='031773', duplicate_ids=['032050']),
824
976
  EmailCfg(id='021400', duplicate_ids=['031983']),
825
- EmailCfg(id='026548', duplicate_ids=['033491']),
977
+ EmailCfg(id='026548', duplicate_ids=['033491', '033495'], comment='033495 is HTML garbage version'),
826
978
  EmailCfg(id='029752', duplicate_ids=['023550']),
827
979
  EmailCfg(id='030339', duplicate_ids=['030592']),
828
- EmailCfg(id='032250', duplicate_ids=['033589']),
829
-
830
- # Emails that need a little help determining how to separate the actual text from fwded text
980
+ EmailCfg(id='032250', duplicate_ids=['033589'], truncate_to=1000, comment='Wolff article'),
831
981
  EmailCfg(id='013415', fwded_text_after='Darren K. Indyke'),
832
982
  EmailCfg(id='024624', fwded_text_after='On Tue, May 14'),
833
- EmailCfg(id='025888', fwded_text_after='Jul 24, 2015'),
983
+ EmailCfg(id='025888', fwded_text_after='Jul 24, 2015', comment='sultan asking about cholesterol drug'),
984
+ EmailCfg(id='026362', fwded_text_after='Jul 24, 2015', comment='sultan asking about cholesterol drug'),
834
985
  EmailCfg(id='016413', fwded_text_after='In a former warehouse'),
835
986
  EmailCfg(id='025548', fwded_text_after='Edward Jay Epstein'),
836
987
  EmailCfg(id='032806', fwded_text_after='• Sep 13, 2018'),
837
- EmailCfg(id='024251', fwded_text_after='Debate Schedule'),
988
+ EmailCfg(id='024251', fwded_text_after='Debate Schedule', truncate_to=TRUNCATED_CHARS, comment='Kahn taxes'),
838
989
  EmailCfg(id='028943', fwded_text_after='-Lisa'),
839
990
  EmailCfg(id='029431', fwded_text_after='I am writing now'),
840
991
  EmailCfg(id='020437', fwded_text_after='Will Cohen Cooperate'),
@@ -843,7 +994,7 @@ EMAILS_CONFIG = [
843
994
  EmailCfg(id='030324', fwded_text_after='For Federal Programs'),
844
995
  EmailCfg(id='022766', fwded_text_after='--- On Wed, 4/22/15'),
845
996
  EmailCfg(id='025606', fwded_text_after='> On May 6,'),
846
- EmailCfg(id='022977', fwded_text_after='Top of Form'),
997
+ EmailCfg(id='022977', fwded_text_after='Top of Form', truncate_to=1800, comment='Krassner with huge attachments field'),
847
998
  EmailCfg(id='033420', fwded_text_after='Slowing economy could increase pressure on'),
848
999
  EmailCfg(id='019203', fwded_text_after='This end-of-the-year'),
849
1000
  EmailCfg(id='022207', fwded_text_after='Web Images Videos Maps'),
@@ -855,9 +1006,97 @@ EMAILS_CONFIG = [
855
1006
  EmailCfg(id='031742', fwded_text_after="Trump's former campaign manager Paul Manafort"),
856
1007
  EmailCfg(id='028925', fwded_text_after='> on Jan 4, 2015'),
857
1008
  EmailCfg(id='029773', fwded_text_after='Omar Quadhafi', duplicate_ids=['012685']),
858
- EmailCfg(id='012197_4', fwded_text_after="Thanks -- Jay"),
1009
+ EmailCfg(id='012197_4', fwded_text_after='Thanks -- Jay', is_fwded_article=False),
1010
+ EmailCfg(id='026449', description="comments about Trump's 'former bridge girl and toy'", is_interesting=True),
1011
+ EmailCfg(id='030727', description='Epstein requesting help for Russia and discussion of an unknown dead Chinese shareholder', is_interesting=True),
1012
+ EmailCfg(id='033377', description="Epstein says he knows Russian oligarch Oleg Deripaska", is_interesting=True),
1013
+ EmailCfg(id='029098', description=f"{MASHA_DROKOVA}'s skype contact is in Moscow", is_interesting=True),
1014
+ EmailCfg(id='032229', description='Michael Wolff offers ideas for entrapping Trump', is_interesting=True),
1015
+ EmailCfg(id='023454', description='Email invitation sent to tech CEOs and Epstein', is_interesting=True, truncate_to=1878),
1016
+ EmailCfg(id='032842', description='discussion of "stealing" a casino in Atlantic City', is_interesting=True),
1017
+ EmailCfg(id='030630', is_interesting=True, comment="'What happens with zubair's project?'"),
1018
+ EmailCfg(id='033178', is_interesting=True, comment="'How is it going with Zubair?'"),
1019
+ EmailCfg(id='022396', is_interesting=True, comment='Ukraine friend'),
1020
+ EmailCfg(id='026505', is_interesting=True, comment='I know how dirty trump is'),
1021
+ EmailCfg(id='029679', is_interesting=True, comment="Trump's driver was the bag man"),
1022
+ EmailCfg(id='026258', is_interesting=True, comment='Bannon cripto coin issues'),
1023
+ EmailCfg(id='026260', is_interesting=True, comment='Bannon cripto coin issues'),
1024
+ EmailCfg(id='032359', is_interesting=True, comment='Jabor e-currency'),
1025
+ EmailCfg(id='031451', is_interesting=True, comment='"would you like photso of donald and girls in bikinis in my kitchen."'),
1026
+ EmailCfg(id='031596', is_interesting=True, comment='"would you like photso of donald and girls in bikinis in my kitchen."'),
1027
+ EmailCfg(id='031601', is_interesting=True, comment='Old gf i gave to donald', truncate_to=2000),
1028
+ EmailCfg(id='030725', is_interesting=True, comment='David Stern in Moscow'),
1029
+ EmailCfg(id='030714', is_interesting=True, comment='Bannon, Russian Dugan shout out'),
1030
+ EmailCfg(id='031659', is_interesting=True, comment='"i have met some very bad people „ none as bad as trump"'),
1031
+ EmailCfg(id='030245', is_interesting=True, comment='Epstein rationalizes his behavior in an open letter to the world'),
1032
+ EmailCfg(id='030781', is_interesting=True, comment='Bannon email about crypto coin issues'),
1033
+ EmailCfg(id='032906', is_interesting=True, comment='David Blaine email'),
1034
+ EmailCfg(id='026036', is_interesting=True, truncate_to=6000, comment='Gino Yu blockchain mention'),
1035
+ EmailCfg(id='029609', is_interesting=True, comment='Joi Ito'),
1036
+ EmailCfg(id='025233', is_interesting=True, comment='Reputation.com discussion'),
1037
+ EmailCfg(id='017827', is_interesting=True, comment='Bannon / Peggy Siegal email about netflix doc on Epstein'),
1038
+ EmailCfg(id='030222', is_interesting=True, comment='Ross Gow / Ghislaine correspondence'),
1039
+ EmailCfg(
1040
+ id='026028',
1041
+ description=f"Epstein introduces {LARRY_SUMMERS} to possibly corrupt Senegalese official Karim Wade",
1042
+ is_interesting=True,
1043
+ ),
1044
+ EmailCfg(id='029545', is_interesting=True, comment='Tyler Shears reputation'),
1045
+ EmailCfg(id='025812', is_interesting=True, comment='Tyler Shears reputation'),
1046
+ EmailCfg(
1047
+ id='029914',
1048
+ description='Epstein and Lord Mandelson discuss Russian investments',
1049
+ is_interesting=True,
1050
+ truncate_to=4500,
1051
+ ),
1052
+ EmailCfg(id='033453', is_interesting=True, description='possibly an email from one of the women who sued Trump'),
1053
+ EmailCfg(id='029342', is_interesting=True, truncate_to=2000, comment='Hakeem Jeffries'),
1054
+ EmailCfg(id='031326', is_interesting=True, comment='"dog that hasn\'t barked is trump"'),
1055
+ EmailCfg(id='033171', is_interesting=True, comment='Zubair'),
1056
+ EmailCfg(
1057
+ id='032319',
1058
+ dupe_type='quoted',
1059
+ duplicate_ids=['032283'],
1060
+ is_interesting=True,
1061
+ comment='Zubair',
1062
+ ),
1063
+ EmailCfg(
1064
+ id='031152',
1065
+ description='discussion of notoriously corrupt Kazakh politician Rakhat Aliyev whom Epstein apparently knew',
1066
+ is_interesting=True,
1067
+ ),
1068
+ EmailCfg(id='030745', description="planning a public statement for Ghislaine", truncate_to=NO_TRUNCATE),
1069
+ EmailCfg(id='028589', truncate_to=NO_TRUNCATE, comment='Long thread with Reid Weingarten'),
1070
+ EmailCfg(id='026059', truncate_to=2650, comment='Rothschild'),
1071
+ EmailCfg(id='032643', truncate_to=NO_TRUNCATE, comment='Anas al Rasheed'),
1072
+ EmailCfg(id='031619', truncate_to=652, comment='Reply to grab em by the pussy story'),
1073
+ EmailCfg(id='021096', truncate_to=700, comment='Sinofsky article quote'),
1074
+ EmailCfg(id='032865', truncate_to=445, comment='Barton reply'),
1075
+ EmailCfg(id='027126', truncate_to=1000, comment='Summers'),
1076
+ EmailCfg(id='030950', truncate_to=4500, comment='Ian Osborne'),
1077
+ EmailCfg(id='029684', truncate_to=402, comment='Maldives reply'),
1078
+ EmailCfg(id='018045', truncate_to=TRUNCATED_CHARS, comment='invite'),
1079
+ EmailCfg(id='017574', truncate_to=4000, comment='Lisa Randall invite'),
1080
+ EmailCfg(id='030589', truncate_to=1000, comment='Brett Jaffe Fwd'),
1081
+ EmailCfg(id='025655', truncate_to=400, comment='reply to article'),
1082
+ EmailCfg(id='026451', truncate_to=500, comment='reply to article'),
1083
+ EmailCfg(id='023717', truncate_to=489, comment='reply to article'),
1084
+ EmailCfg(id='022265', truncate_to=NO_TRUNCATE),
1085
+ EmailCfg(id='026243', truncate_to=NO_TRUNCATE),
1086
+ EmailCfg(id='029680', truncate_to=900, comment='Maldives'),
1087
+ EmailCfg(id='029534', truncate_to=900, comment='Maldives'),
1088
+ EmailCfg(id='028760', truncate_to=900, comment='Epstein in an argument with someone'),
1089
+ EmailCfg(id='028757', truncate_to=900, comment='Epstein in an argument with someone'),
1090
+ EmailCfg(id='027059', truncate_to=NO_TRUNCATE, comment='Jean Luc Brunel and Boris Nikolic'),
1091
+ EmailCfg(id='027028', truncate_to=1000, comment='Tom Pritzer penny pritzker'),
1092
+ EmailCfg(id='029910', truncate_to=NO_TRUNCATE, comment='Tom Pritzer Aspen'),
1093
+ EmailCfg(id='025163', truncate_to=NO_TRUNCATE, comment='Tom Pritzer'),
859
1094
  ]
860
1095
 
1096
+ if args.constantize:
1097
+ for email_cfg in EMAILS_CONFIG:
1098
+ print(f"{email_cfg},")
1099
+
861
1100
 
862
1101
  ################################################################################################
863
1102
  ####################################### OTHER FILES ############################################
@@ -894,7 +1133,7 @@ DEUTSCHE_BANK_TAX_TOPICS = f'{DEUTSCHE_BANK} Wealth Management Tax Topics'
894
1133
  DIANA_DEGETTE_CAMPAIGN = "Colorado legislator Diana DeGette's campaign"
895
1134
  FBI_REPORT = f"report on Epstein investigation (redacted)"
896
1135
  FBI_SEIZED_PROPERTY = f"seized property inventory (redacted)"
897
- FEMALE_HEALTH_COMPANY = 'Female Health Company (FHX)'
1136
+ FEMALE_HEALTH_COMPANY = 'Female Health Company (FHC)'
898
1137
  FIRE_AND_FURY = f"Fire And Fury"
899
1138
  HARVARD_POETRY = f'{HARVARD} poetry stuff from {LISA_NEW}'
900
1139
  HBS_APPLICATION = f"{HARVARD} Business School application letter"
@@ -1008,7 +1247,7 @@ OTHER_FILES_ARTICLES = [
1008
1247
  DocCfg(id='013435', author=PALM_BEACH_DAILY_NEWS, description=f"article about Epstein's address book", date='2011-03-11'),
1009
1248
  DocCfg(id='013440', author=PALM_BEACH_DAILY_NEWS, description=f"article about Epstein's gag order", date='2011-07-13'),
1010
1249
  DocCfg(id='029238', author=PALM_BEACH_DAILY_NEWS, description=f"article about Epstein's plea deal"),
1011
- DocCfg(id='021775', author=PALM_BEACH_POST, description="article about 'He Was 50. And They Were Girls'"),
1250
+ DocCfg(id='021775', author=PALM_BEACH_POST, description="article about 'He Was 50. And They Were Girls'", attached_to_email_id='021764'),
1012
1251
  DocCfg(id='022989', author=PALM_BEACH_POST, description="article about alleged rape of 13 year old by Trump"),
1013
1252
  DocCfg(id='022987', author=PALM_BEACH_POST, description="article about just a headline on Trump and Epstein"),
1014
1253
  DocCfg(id='015028', author=PALM_BEACH_POST, description="article about reopening Epstein's criminal case"),
@@ -1026,14 +1265,14 @@ OTHER_FILES_ARTICLES = [
1026
1265
  DocCfg(id='010715', author=PEGGY_SIEGAL, description=f"Oscar Diary April", date='2012-02-27'),
1027
1266
  DocCfg(id='019849', author=PEGGY_SIEGAL, description=f"Oscar Diary April", date='2017-02-27', duplicate_ids=['019864']),
1028
1267
  DocCfg(id='026851', author='Politifact', description=f"lying politicians chart", date='2016-07-26'),
1029
- DocCfg(id='033253', author=ROBERT_LAWRENCE_KUHN, description=f'{BBC} article about Rohingya in Myanmar'),
1268
+ DocCfg(id='033253', author=ROBERT_LAWRENCE_KUHN, description=f'{BBC} article about Rohingya in Myanmar', attached_to_email_id='033252'),
1030
1269
  DocCfg(id='026887', author=ROBERT_LAWRENCE_KUHN, description=f'{BBC} "New Tariffs - Trade War"'),
1031
1270
  DocCfg(id='026877', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "New Tariffs - Trade War"'),
1032
1271
  DocCfg(id='026868', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "Quest Means Business New China Tariffs — Trade War"', date='2018-09-18'),
1033
1272
  DocCfg(id='023707', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "Quest Means Business U.S. and China Agree to Pause Trade War"', date='2018-12-03'),
1034
- DocCfg(id='029176', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "U.S. China Tariffs - Trade War"'),
1035
- DocCfg(id='032638', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "Xi Jinping and the New Politburo Committee"'),
1036
- DocCfg(id='023666', author=ROBERT_LAWRENCE_KUHN, description=f"sizzle reel / television appearances"),
1273
+ DocCfg(id='029176', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "U.S. China Tariffs - Trade War"', attached_to_email_id='029174'),
1274
+ DocCfg(id='032638', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "Xi Jinping and the New Politburo Committee"', attached_to_email_id='032637'),
1275
+ DocCfg(id='023666', author=ROBERT_LAWRENCE_KUHN, description=f"sizzle reel / television appearances", date='2018-09-30', attached_to_email_id='033252'),
1037
1276
  DocCfg(id='016996', author=f'SciencExpress', description=f'article "Quantitative Analysis of Culture Using Millions of Digitized Books" by Jean-Baptiste Michel'),
1038
1277
  DocCfg(id='025104', author='SCMP', description=f"article about China and globalisation"),
1039
1278
  DocCfg(id='030030', author=SHIMON_POST, description=SHIMON_POST_ARTICLE, date='2011-03-29'),
@@ -1057,7 +1296,7 @@ OTHER_FILES_ARTICLES = [
1057
1296
  DocCfg(id='024997', author=SHIMON_POST, description=SHIMON_POST_ARTICLE, date='2011-09-08'),
1058
1297
  DocCfg(id='031941', author=SHIMON_POST, description=SHIMON_POST_ARTICLE, date='2011-11-17'),
1059
1298
  DocCfg(id='030829', author=f'South Florida Sun Sentinel', description=f'article about {BRAD_EDWARDS} and {JEFFREY_EPSTEIN}'),
1060
- DocCfg(id='021092', author='Tatler', description=f'single page of article about {GHISLAINE_MAXWELL} shredding documents', date='2019-08-15'),
1299
+ DocCfg(id='021092', author='Tatler', description=f'single page of article about {GHISLAINE_MAXWELL} shredding documents'),
1061
1300
  DocCfg(id='030333', author=f'The Independent', description=f'article about Prince Andrew, Epstein, and Epstein\'s butler who stole his address book'),
1062
1301
  DocCfg(id='010754', author=f'U.S. News', description=f"article about Yitzhak Rabin"),
1063
1302
  DocCfg(id='014498', author=VI_DAILY_NEWS, description='article', date='2016-12-13'),
@@ -1066,6 +1305,8 @@ OTHER_FILES_ARTICLES = [
1066
1305
  DocCfg(id='023046', author=VI_DAILY_NEWS, description='article', date='2019-02-27'),
1067
1306
  DocCfg(id='031170', author=VI_DAILY_NEWS, description='article', date='2019-03-06'),
1068
1307
  DocCfg(id='016506', author=VI_DAILY_NEWS, description='article', date='2019-02-28'),
1308
+ DocCfg(id='018862', author=VI_DAILY_NEWS, description='articles about Sen. Alvin Williams Jr. Fraud case, arson', date='2012-11-09'),
1309
+
1069
1310
  DocCfg(id='016507', author=VI_DAILY_NEWS, description=f'"Perversion of Justice" by {JULIE_K_BROWN}', date='2018-12-19'),
1070
1311
  DocCfg(id='019212', author=WAPO, description=f'and Times Tribune articles about Bannon, Trump, and healthcare execs'),
1071
1312
  DocCfg(id='033379', author=WAPO, description=f'"How Washington Pivoted From Finger-Wagging to Appeasement" (about Viktor Orban)', date='2018-05-25'),
@@ -1187,7 +1428,7 @@ OTHER_FILES_LEGAL = [
1187
1428
  DocCfg(id='013489', author=JANE_DOE_V_EPSTEIN_TRUMP, description=f'Affidavit of {BRAD_EDWARDS}', date='2010-07-20'),
1188
1429
  DocCfg(id='029398', author=JANE_DOE_V_EPSTEIN_TRUMP, description=f'article in Law.com'),
1189
1430
  DocCfg(id='026854', author=JANE_DOE_V_EPSTEIN_TRUMP, description=f"Civil Docket"),
1190
- DocCfg(id='026384', author=JANE_DOE_V_EPSTEIN_TRUMP, description=f"Complaint for rape and sexual abuse", date='2016-06-20'),
1431
+ DocCfg(id='026384', author=JANE_DOE_V_EPSTEIN_TRUMP, description=f"Complaint for rape and sexual abuse", date='2016-06-20', attached_to_email_id='029837'),
1191
1432
  DocCfg(id='013463', author=JANE_DOE_V_EPSTEIN_TRUMP, description=f'Deposition of Scott Rothstein', date='2010-03-23'),
1192
1433
  DocCfg(id='029257', author=JANE_DOE_V_EPSTEIN_TRUMP, description=f'allegations and identity of plaintiff Katie Johnson', date='2016-04-26'),
1193
1434
  DocCfg(id='032321', author=JANE_DOE_V_EPSTEIN_TRUMP, description=f"Notice of Initial Conference", date='2016-10-04'),
@@ -1247,7 +1488,7 @@ OTHER_FILES_CONFERENCES = [
1247
1488
  DocCfg(id='022267', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} founder essay about growing the seminar business'),
1248
1489
  DocCfg(id='022407', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} seminar pitch deck'),
1249
1490
  DocCfg(id='017524', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2012 program", date='2012-08-18', attached_to_email_id='031226'),
1250
- DocCfg(id='026747', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2017 program", date='2017-08-23'),
1491
+ DocCfg(id='026747', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2017 program", date='2017-08-23', attached_to_email_id='031215'),
1251
1492
  DocCfg(id='014951', author='TED Talks', description=f"2017 program", date='2017-04-20'),
1252
1493
  DocCfg(id='024179', author=UN_GENERAL_ASSEMBLY, description=f'president and first lady schedule', date='2012-09-21'),
1253
1494
  DocCfg(
@@ -1301,12 +1542,12 @@ OTHER_FILES_FINANCE = [
1301
1542
  DocCfg(id='024302', author='Carvana', description=f"form 14A SEC filing proxy statement", date='2019-04-23'),
1302
1543
  DocCfg(id='029305', author='CCH Tax', description=f"Briefing on end of Defense of Marriage Act", date='2013-06-27'),
1303
1544
  DocCfg(id='026794', author=DEUTSCHE_BANK, description=f"Global Political and Regulatory Risk in 2015/2016"),
1304
- DocCfg(id='022361', author=DEUTSCHE_BANK_TAX_TOPICS, date='2013-05-01'),
1545
+ DocCfg(id='022361', author=DEUTSCHE_BANK_TAX_TOPICS, date='2013-05-01', attached_to_email_id='022359'),
1305
1546
  DocCfg(id='022325', author=DEUTSCHE_BANK_TAX_TOPICS, date='2013-12-20'),
1306
1547
  DocCfg(id='022330', author=DEUTSCHE_BANK_TAX_TOPICS, date='2013-12-20', description='table of contents'),
1307
1548
  DocCfg(id='019440', author=DEUTSCHE_BANK_TAX_TOPICS, date='2014-01-29'),
1308
1549
  DocCfg(id='024202', author=ELECTRON_CAPITAL_PARTNERS, description=f"Global Utility White Paper", date='2013-03-08'),
1309
- DocCfg(id='022372', author='Ernst & Young', description=f'2016 election report'),
1550
+ DocCfg(id='022372', author='Ernst & Young', date='2016-11-09', description=f'2016 election report'),
1310
1551
  DocCfg(
1311
1552
  id='025663',
1312
1553
  author=GOLDMAN_INVESTMENT_MGMT,
@@ -1315,7 +1556,13 @@ OTHER_FILES_FINANCE = [
1315
1556
  is_interesting=True,
1316
1557
  ),
1317
1558
  DocCfg(id='014532', author=GOLDMAN_INVESTMENT_MGMT, description=f"Outlook - Half Full", date='2017-01-01'),
1318
- DocCfg(id='026909', author=GOLDMAN_INVESTMENT_MGMT, description=f"The Unsteady Undertow Commands the Seas (Temporarily)", date='2018-10-14'),
1559
+ DocCfg(
1560
+ id='026909',
1561
+ attached_to_email_id='026893',
1562
+ author=GOLDMAN_INVESTMENT_MGMT,
1563
+ description=f"The Unsteady Undertow Commands the Seas (Temporarily)",
1564
+ date='2018-10-14',
1565
+ ),
1319
1566
  DocCfg(id='026944', author=GOLDMAN_INVESTMENT_MGMT, description=f"Risk of a US-Iran Military Conflict", date='2019-05-23'),
1320
1567
  DocCfg(id='018804', author='Integra Realty Resources', description=f"appraisal of going concern for IGY American Yacht Harbor Marina in {VIRGIN_ISLANDS}"),
1321
1568
  DocCfg(id='026679', author='Invesco', description=f"Global Sovereign Asset Management Study 2017"),
@@ -1330,7 +1577,7 @@ OTHER_FILES_FINANCE = [
1330
1577
  DocCfg(id='030840', author=JP_MORGAN, description=f"Market Thoughts"),
1331
1578
  DocCfg(id='022350', author=JP_MORGAN, description=f"tax efficiency of Intentionally Defective Grantor Trusts (IDGT)"),
1332
1579
  DocCfg(id='025242', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2012-04-09'),
1333
- DocCfg(id='030010', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-06-14'),
1580
+ DocCfg(id='030010', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, attached_to_email_id='030006', date='2011-06-14'),
1334
1581
  DocCfg(id='030808', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-07-11'),
1335
1582
  DocCfg(id='025221', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-07-25'),
1336
1583
  DocCfg(id='025229', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-08-04'),
@@ -1355,7 +1602,11 @@ OTHER_FILES_FINANCE = [
1355
1602
  DocCfg(id='012048', description=f"{PRESS_RELEASE} 'Rockefeller Partners with Gregory J. Fleming to Create Independent Financial Services Firm' and other articles"),
1356
1603
 
1357
1604
  # private placement memoranda
1358
- DocCfg(id='024432', description=f"Michael Milken's Knowledge Universe Education (KUE) $1,000,000 corporate share placement notice (SEC filing?)"),
1605
+ DocCfg(
1606
+ id='024432',
1607
+ date='2006-09-27',
1608
+ description=f"Michael Milken's Knowledge Universe Education (KUE) $1,000,000 corporate share placement notice (SEC filing?)"
1609
+ ),
1359
1610
  DocCfg(id='024003', description=f"New Leaf Ventures ($375 million biotech fund) private placement memorandum"),
1360
1611
  ]
1361
1612
 
@@ -1434,12 +1685,12 @@ OTHER_FILES_SOCIAL = [
1434
1685
  DocCfg(id='017787', author=ALAN_DERSHOWITZ, description=DERSH_GIUFFRE_TWEET),
1435
1686
  DocCfg(id='033433', author=ALAN_DERSHOWITZ, description=f"{DERSH_GIUFFRE_TWEET} / David Boies", date='2019-03-02'),
1436
1687
  DocCfg(id='033432', author=ALAN_DERSHOWITZ, description=f"{DERSH_GIUFFRE_TWEET} / David Boies", date='2019-05-02'),
1437
- DocCfg(id='028815', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} business plan", date='2016-08-20'),
1438
- DocCfg(id='011170', author=ZUBAIR_AND_ANYA, description=f'{INSIGHTS_POD} collected tweets about #Brexit', date='2016-06-23'),
1439
- DocCfg(id='032324', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} election social media trend analysis", date='2016-11-05'),
1440
- DocCfg(id='032281', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} forecasting election for Trump", date='2016-10-25'),
1441
- DocCfg(id='028988', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} pitch deck", date='2016-08-20'),
1442
- DocCfg(id='026627', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} report on the presidential debate"),
1688
+ DocCfg(id='028815', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} business plan", date='2016-08-20', attached_to_email_id='033171'),
1689
+ DocCfg(id='011170', author=ZUBAIR_AND_ANYA, description=f'{INSIGHTS_POD} collected tweets from #Brexit case study', date='2016-06-23', attached_to_email_id='033171'),
1690
+ DocCfg(id='032324', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} election social media trend analysis", date='2016-11-05', attached_to_email_id='032323'),
1691
+ DocCfg(id='032281', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} eleciton social media trends report", date='2016-10-25', attached_to_email_id='032280'),
1692
+ DocCfg(id='028988', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} pitch deck", date='2016-08-20', attached_to_email_id='033171'),
1693
+ DocCfg(id='026627', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} report on the presidential debate", attached_to_email_id='026626'),
1443
1694
  DocCfg(id='022213', description=f"{SCREENSHOT} Facebook group called 'Shit Pilots Say' disparaging a 'global girl'"),
1444
1695
  DocCfg(id='030884', description=f"{TWEET} by Ed Krassenstein"),
1445
1696
  DocCfg(id='031546', description=f"{TWEET}s by Donald Trump about Russian collusion", date='2018-01-06'),
@@ -1467,7 +1718,7 @@ OTHER_FILES_POLITICS = [
1467
1718
  id='023133',
1468
1719
  author=f"{TERJE_ROD_LARSEN}, Nur Laiq, Fabrice Aidan",
1469
1720
  description=f'The Search for Peace in the Arab-Israeli Conflict',
1470
- date='2019-12-09',
1721
+ date='2014-12-09',
1471
1722
  ),
1472
1723
  DocCfg(id='033468', description=f'{ARTICLE_DRAFT} Rod Rosenstein', date='2018-09-24'),
1473
1724
  DocCfg(
@@ -1482,8 +1733,8 @@ OTHER_FILES_POLITICS = [
1482
1733
  date='2015-01-15', # TODO: this is just a guess
1483
1734
  duplicate_ids=['028887'],
1484
1735
  ),
1485
- DocCfg(id='010617', description=TRUMP_DISCLOSURES, date='2017-01-20', is_interesting=True),
1486
- DocCfg(id='016699', description=TRUMP_DISCLOSURES, date='2017-01-20', is_interesting=True),
1736
+ DocCfg(id='010617', description=TRUMP_DISCLOSURES, date='2017-01-20', is_interesting=True, attached_to_email_id='033091'),
1737
+ DocCfg(id='016699', description=TRUMP_DISCLOSURES, date='2017-01-20', is_interesting=True, attached_to_email_id='033091'),
1487
1738
  ]
1488
1739
 
1489
1740
  OTHER_FILES_ACADEMIA = [
@@ -1580,7 +1831,11 @@ OTHER_FILES_ARTS = [
1580
1831
  date='2010-02-01',
1581
1832
  duplicate_ids=['025210']
1582
1833
  ),
1583
- DocCfg(id='028281', description=f'art show flier for "The House Of The Nobleman" curated by Wolfe Von Lenkiewicz & Victoria Golembiovskaya'),
1834
+ DocCfg(
1835
+ id='028281',
1836
+ date='2010-10-13',
1837
+ description=f'art show flier for "The House Of The Nobleman" curated by Wolfe Von Lenkiewicz & Victoria Golembiovskaya',
1838
+ ),
1584
1839
  ]
1585
1840
 
1586
1841
  OTHER_FILES_MISC = [
@@ -1610,7 +1865,7 @@ OTHER_FILES_MISC = [
1610
1865
  DocCfg(id='031743', description=f'a few pages describing the internet as a "New Nation State" (Network State?)'),
1611
1866
  DocCfg(id='012718', description=f"{CVRA} congressional record", date='2011-06-17'),
1612
1867
  DocCfg(id='024117', description=f"FAQ about anti-money laundering (AML) and terrorist financing (CFT) law in the U.S."),
1613
- DocCfg(id='019448', description=f"Haitian business investment proposal called Jacmel", attached_to_email_id='019448'),
1868
+ DocCfg(id='019448', description=f"Haitian business investment proposal called Jacmel", attached_to_email_id='019446'),
1614
1869
  DocCfg(id='023644', description=f"interview with Mohammed bin Salman", date='2016-04-25'),
1615
1870
  DocCfg(
1616
1871
  id='030142',
@@ -1681,28 +1936,31 @@ for cfg in ALL_CONFIGS:
1681
1936
 
1682
1937
  # Add extra config objects for duplicate files that match the config of file they are duplicating
1683
1938
  for dupe_cfg in cfg.duplicate_cfgs():
1684
- if not isinstance(dupe_cfg, EmailCfg):
1685
- logger.debug(f"Generated synthetic config for dupe: {dupe_cfg}")
1939
+ # if not isinstance(dupe_cfg, EmailCfg):
1940
+ # logger.debug(f"Generated synthetic config for dupe: {dupe_cfg}")
1686
1941
 
1687
1942
  ALL_FILE_CONFIGS[dupe_cfg.id] = dupe_cfg
1688
1943
 
1689
1944
 
1690
1945
  # Email related regexes (have to be here for circular dependencies reasons)
1691
1946
  FORWARDED_LINE_PATTERN = r"-+ ?(Forwarded|Original)\s*Message ?-*|Begin forwarded message:?"
1947
+ FRENCH_REPLY_PATTERN = r"Le .* a ecrit:"
1948
+ GERMAN_REPLY_PATTERN = r"Am \d\d\.\d\d\..*schrieb.*"
1949
+ NORWEGAIN_REPLY_PATTERN = r"(Den .* folgende|(fre|lor|son)\. .* skrev .*):"
1692
1950
  REPLY_LINE_IN_A_MSG_PATTERN = r"In a message dated \d+/\d+/\d+.*writes:"
1693
1951
  REPLY_LINE_ENDING_PATTERN = r"[_ \n](AM|PM|[<_]|wrote:?)"
1694
1952
  REPLY_LINE_ON_NUMERIC_DATE_PATTERN = fr"On \d+/\d+/\d+[, ].*{REPLY_LINE_ENDING_PATTERN}"
1695
- REPLY_LINE_ON_DATE_PATTERN = fr"^On (\d+ )?((Mon|Tues?|Wed(nes)?|Thu(rs)?|Fri|Sat(ur)?|Sun)(day)?|(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*)[, ].*{REPLY_LINE_ENDING_PATTERN}"
1696
- REPLY_LINE_PATTERN = rf"({REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
1953
+ REPLY_LINE_ON_DATE_PATTERN = fr"^[> •]*On (\d+ )?((Mon|Tues?|Wed(nes)?|Thu(rs)?|Fri|Sat(ur)?|Sun)(day)?|(Jan|Fe(b|vr\.)|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*)[, ].*{REPLY_LINE_ENDING_PATTERN}"
1954
+ REPLY_LINE_PATTERN = rf"({FRENCH_REPLY_PATTERN}|{GERMAN_REPLY_PATTERN}|{NORWEGAIN_REPLY_PATTERN}|{REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
1697
1955
  REPLY_REGEX = re.compile(REPLY_LINE_PATTERN, re.IGNORECASE | re.MULTILINE)
1698
- SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?((Envoyé de mon|Sent (from|via)).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)', re.M | re.I)
1699
-
1956
+ SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?((Envoyé de mon|Sent (from|via)).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)|Co-authored with iPhone auto-correct', re.M | re.I)
1700
1957
 
1701
1958
  # No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
1702
- UNINTERESTING_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + [
1959
+ UNINTERESTING_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + TRIVERS_CCS + [
1703
1960
  'Alan Dlugash', # CCed with Richard Kahn
1704
1961
  'Alan Rogers', # Random CC
1705
1962
  'Andrew Friendly', # Presumably some relation of Kelly Friendly
1963
+ 'Ariane Dwyer', # Sabba CC
1706
1964
  'BS Stern', # A random fwd of email we have
1707
1965
  'Cheryl Kleen', # Single email from Anne Boyles, displayed under Anne Boyles
1708
1966
  'Connie Zaguirre', # Random CC
@@ -1710,9 +1968,11 @@ UNINTERESTING_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + [
1710
1968
  'Danny Goldberg', # Random Paul Krassner emails
1711
1969
  GERALD_LEFCOURT, # Single CC
1712
1970
  GORDON_GETTY, # Random CC
1971
+ 'Grant J. Smith', # Ken Jenne CC
1713
1972
  JEFF_FULLER, # Random Jean Luc Brunel CC
1714
1973
  'Jojo Fontanilla', # Random CC
1715
1974
  'Joseph Vinciguerra', # Random CC
1975
+ 'Kirk Blouin', # John Page / Police Code Enforcement chain
1716
1976
  'Larry Cohen', # Random Bill Gates CC
1717
1977
  'Lyn Fontanilla', # Random CC
1718
1978
  'Mark Albert', # Random CC
@@ -1723,12 +1983,14 @@ UNINTERESTING_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + [
1723
1983
  'Nancy Portland', # Lawrence Krauss CC
1724
1984
  'Oliver Goodenough', # Robert Trivers CC
1725
1985
  'Peter Aldhous', # Lawrence Krauss CC
1986
+ 'Peter Green', # Farkas emailer
1726
1987
  'Players2', # Hoffenberg CC
1727
1988
  'Police Code Enforcement', # Kirk Blouin / John Page CC
1728
1989
  'Sam Harris', # Lawrence Krauss CC
1729
1990
  SAMUEL_LEFF, # Random CC
1730
1991
  'Sean T Lehane', # Random CC
1731
1992
  'Stephen Rubin', # Random CC
1993
+ THANU_BOONYAWATANA, # Eduardo Robles CC
1732
1994
  'Tim Kane', # Random CC
1733
1995
  'Travis Pangburn', # Random CC
1734
1996
  'Vahe Stepanian', # Random CC