epstein-files 1.2.5__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,9 +7,13 @@ from dateutil.parser import parse
7
7
  from epstein_files.util.constant.names import *
8
8
  from epstein_files.util.constant.strings import *
9
9
  from epstein_files.util.doc_cfg import DocCfg, EmailCfg, TextCfg
10
+ from epstein_files.util.env import args
10
11
  from epstein_files.util.logging import logger
11
12
 
12
13
  FALLBACK_TIMESTAMP = parse("1/1/2051 12:01:01 AM")
14
+ MAX_CHARS_TO_PRINT = 4000
15
+ NO_TRUNCATE = -1
16
+ TRUNCATED_CHARS = int(MAX_CHARS_TO_PRINT / 3)
13
17
 
14
18
  HEADER_ABBREVIATIONS = {
15
19
  "AD": "Abu Dhabi",
@@ -19,6 +23,7 @@ HEADER_ABBREVIATIONS = {
19
23
  'bgC3': 'Bill Gates Ventures (renamed in 2018)',
20
24
  "Brock": 'Brock Pierce (crypto bro with a very sordid past)',
21
25
  "DB": "Deutsche Bank (maybe??)",
26
+ "GCC": "Gulf Cooperation Council",
22
27
  "GRAT": "Grantor Retained Annuity Trust (tax shelter)",
23
28
  'HBJ': "Sheikh Hamad bin Jassim (former Qatari prime minister)",
24
29
  'Jabor': '"an influential man in Qatar"',
@@ -60,7 +65,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
60
65
  ANIL_AMBANI: re.compile(r'Anil.Ambani', re.IGNORECASE),
61
66
  ANN_MARIE_VILLAFANA: re.compile(r'Villafana, Ann Marie|(A(\.|nn) Marie )?Villafa(c|n|ri)a', re.IGNORECASE),
62
67
  ANTHONY_SCARAMUCCI: re.compile(r"mooch|(Anthony ('The Mooch' )?)?Scaramucci", re.IGNORECASE),
63
- ARIANE_DE_ROTHSCHILD: re.compile(r'AdeR|((Ariane|Edmond) (de )?)?Roths(ch|hc)?ild|Ariane', re.IGNORECASE),
68
+ ARIANE_DE_ROTHSCHILD: re.compile(r'AdeR|((Ariane|Edmond) (de )?)?Rothsh?ch?ild|Ariane(?!\s+Dwyer)', re.IGNORECASE),
64
69
  BARBRO_C_EHNBOM: re.compile(r'behnbom@aol.com|(Barbro\s.*)?Ehnbom', re.IGNORECASE),
65
70
  BARRY_J_COHEN: re.compile(r'barry\s*((j.?|james)\s*)?cohen?', re.IGNORECASE),
66
71
  BENNET_MOSKOWITZ: re.compile(r'Moskowitz.*Bennet|Bennet.*Moskowitz', re.IGNORECASE),
@@ -75,7 +80,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
75
80
  DAVID_HAIG: re.compile(fr'{DAVID_HAIG}|Haig, David', re.IGNORECASE),
76
81
  DAVID_STERN: re.compile(r'David Stern?', re.IGNORECASE),
77
82
  EDUARDO_ROBLES: re.compile(r'Ed(uardo)?\s*Robles', re.IGNORECASE),
78
- EDWARD_JAY_EPSTEIN: re.compile(r'Edward (Jay )?Epstein', re.IGNORECASE),
83
+ EDWARD_JAY_EPSTEIN: re.compile(r'(?<!Jeffrey )Edward (Jay )?Epstein', re.IGNORECASE),
79
84
  EHUD_BARAK: re.compile(r'(ehud|e?h)\s*barak|\behud', re.IGNORECASE),
80
85
  FAITH_KATES: re.compile(r'faith kates?', re.IGNORECASE),
81
86
  GERALD_BARTON: re.compile(r'Gerald.*Barton', re.IGNORECASE),
@@ -87,9 +92,10 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
87
92
  JABOR_Y: re.compile(r'[ji]abor\s*y?', re.IGNORECASE),
88
93
  JAMES_HILL: re.compile(r"hill, james e.|james.e.hill@abc.com", re.IGNORECASE),
89
94
  JANUSZ_BANASIAK: re.compile(r"Janu[is]z Banasiak", re.IGNORECASE),
95
+ JEAN_HUGUEN: re.compile(r"Jean[\s.]Huguen", re.IGNORECASE),
90
96
  JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?|JeanLuc', re.IGNORECASE),
91
97
  JEFF_FULLER: re.compile(r"jeff@mc2mm.com|Jeff Fuller", re.IGNORECASE),
92
- JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeffrey E((sp|ps)tein?)?( VI Foundation)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!(Mark L.|ard Jay) )Epstein', re.IGNORECASE),
98
+ JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeff(rey)? (Edward )?E((sp|ps)tein?)?( VI Foundation)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!(Mark L.|ard Jay) )Epstein', re.IGNORECASE),
93
99
  JESSICA_CADWELL: re.compile(r'Jessica Cadwell?', re.IGNORECASE),
94
100
  JOHNNY_EL_HACHEM: re.compile(r'el hachem johnny|johnny el hachem', re.IGNORECASE),
95
101
  JOI_ITO: re.compile(r'ji@media.mit.?edu|(joichi|joi)( Ito)?', re.IGNORECASE),
@@ -157,6 +163,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
157
163
  EMAILERS = [
158
164
  'Anne Boyles',
159
165
  AL_SECKEL,
166
+ 'Ariane Dwyer',
160
167
  AZIZA_ALAHMADI,
161
168
  BILL_GATES,
162
169
  BILL_SIEGEL,
@@ -201,6 +208,7 @@ EMAILERS = [
201
208
  'Steven Victor MD',
202
209
  'Susan Edelman',
203
210
  TOM_BARRACK,
211
+ 'Vahe Stepanian',
204
212
  'Vladimir Yudashkin',
205
213
  ]
206
214
 
@@ -308,15 +316,47 @@ TEXTS_CONFIG = CONFIRMED_TEXTS_CONFIG + UNCONFIRMED_TEXTS_CONFIG
308
316
  ################################################ EMAILS ################################################
309
317
  ########################################################################################################
310
318
 
319
+ MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
311
320
  # Some emails have a lot of uninteresting CCs
312
321
  FLIGHT_IN_2012_PEOPLE: list[Name] = ['Francis Derby', JANUSZ_BANASIAK, 'Louella Rabuyo', 'Richard Barnnet']
313
322
  IRAN_DEAL_RECIPIENTS: list[Name] = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
314
- MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
323
+
324
+ TRIVERS_CCS: list[Name] = [
325
+ "Alan Rogers",
326
+ "Anna Dreber",
327
+ "Anula Jayasuriya",
328
+ "Bill Prezant",
329
+ "Bobby McCormick",
330
+ "Clive Crook",
331
+ "Dane Stangler",
332
+ "Ron Bailey",
333
+ "Ditsa Pines",
334
+ "David Darst",
335
+ "Gerry Ohrstrom",
336
+ "Paul Romer",
337
+ "John Mallen",
338
+ "Jim Halligan",
339
+ "Lee Silver",
340
+ "Monika Gruter Cheney",
341
+ "Marguerite Atkins",
342
+ "Matt Ridley",
343
+ "Mike Cagney",
344
+ "Evan Smith",
345
+ "Roger Edelen",
346
+ "Oliver Goodenough",
347
+ "Paul Zak",
348
+ "Peter J Richerson",
349
+ "Clair Brown",
350
+ "Terry Anderson",
351
+ "Tim Kane",
352
+ "Rob Hanson",
353
+ "president@usfca.edu",
354
+ ]
355
+
315
356
 
316
357
  EMAILS_CONFIG = [
317
358
  # 026294 and 026296 might also be Ittihadieh based on timing
318
359
  EmailCfg(id='032436', author=ALIREZA_ITTIHADIEH, attribution_reason='Signature'),
319
- # 032542 026078 026080 026083 026086 026090 might also be Anas based on discussion of Dubai and Kuwait
320
360
  EmailCfg(id='032543', author=ANAS_ALRASHEED, attribution_reason='Later reply 033000 has quote'),
321
361
  EmailCfg(id='026167', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
322
362
  EmailCfg(id='032571', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
@@ -355,6 +395,12 @@ EMAILS_CONFIG = [
355
395
  EmailCfg(id='032676', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
356
396
  EmailCfg(id='026237', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
357
397
  EmailCfg(id='032682', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
398
+ EmailCfg(id='032542', author=ANAS_ALRASHEED, attribution_reason='discussion of Kuwait and timing', is_attribution_uncertain=True),
399
+ EmailCfg(id='026078', author=ANAS_ALRASHEED, attribution_reason='discussion of Kuwait and timing', is_attribution_uncertain=True),
400
+ EmailCfg(id='026080', author=ANAS_ALRASHEED, attribution_reason='discussion of Kuwait and timing', is_attribution_uncertain=True),
401
+ EmailCfg(id='026083', author=ANAS_ALRASHEED, attribution_reason='discussion of Kuwait and timing', is_attribution_uncertain=True),
402
+ EmailCfg(id='026086', author=ANAS_ALRASHEED, attribution_reason='discussion of Kuwait and timing', is_attribution_uncertain=True),
403
+ EmailCfg(id='026090', author=ANAS_ALRASHEED, attribution_reason='discussion of Kuwait and timing', is_attribution_uncertain=True),
358
404
  EmailCfg(id='026064', author=ARIANE_DE_ROTHSCHILD, attribution_reason='signature'),
359
405
  EmailCfg(id='026069', author=ARIANE_DE_ROTHSCHILD, attribution_reason='signature'),
360
406
  EmailCfg(id='030741', author=ARIANE_DE_ROTHSCHILD, attribution_reason='signature'),
@@ -371,33 +417,40 @@ EMAILS_CONFIG = [
371
417
  fwded_text_after='Transcript: Phone call between President',
372
418
  ),
373
419
  EmailCfg(id='033328', author=AZIZA_ALAHMADI, attribution_reason='"Regards, Aziza" at bottom'),
374
- EmailCfg(id='026659', author=BARBRO_C_EHNBOM, attribution_reason='Reply'),
375
- EmailCfg(id='031215', author=BARBRO_C_EHNBOM, duplicate_ids=['026745'], dupe_type='redacted'), # the same except for 'your Anna!'. author must be specified because email address is redacted in 026745 so it needs the config
376
- EmailCfg(id='026764', author=BARRY_J_COHEN), # Bad OCR (nofix)
377
- EmailCfg(id='031206', author=BENNET_MOSKOWITZ, duplicate_ids=['031227']),
378
- EmailCfg(id='031442', author=CHRISTINA_GALBRAITH, duplicate_ids=['031996']),
420
+ EmailCfg(
421
+ id='031215',
422
+ author=BARBRO_C_EHNBOM,
423
+ duplicate_ids=['026745'],
424
+ dupe_type='redacted',
425
+ comment="the same except for 'your Anna!'. author must be specified because email address is redacted in 026745 so it needs the config",
426
+ ),
427
+ EmailCfg(id='031206', duplicate_ids=['031227']),
428
+ EmailCfg(id='031591', duplicate_ids=['031442', '031996']),
379
429
  EmailCfg(
380
430
  id='019446',
381
431
  author=CHRISTINA_GALBRAITH,
382
432
  attribution_reason='shows from "Christina media/PR" which fits',
383
433
  is_attribution_uncertain=True,
384
434
  ),
385
- EmailCfg(id='026625', author=DARREN_INDYKE, actual_text='Hysterical.'),
386
435
  EmailCfg(
387
436
  id='026624',
388
437
  author=DARREN_INDYKE,
389
- recipients=[JEFFREY_EPSTEIN],
390
- timestamp=parse('2016-10-01 16:40:00'),
438
+ date='2016-10-01 16:40:00',
391
439
  duplicate_ids=['031708'],
440
+ recipients=[JEFFREY_EPSTEIN],
441
+ subject='Donald Trump Rape Lawsuit Refiled With New Witness I Law News',
392
442
  ),
393
443
  EmailCfg(
394
444
  id='031278',
395
445
  actual_text='',
396
446
  author=DARREN_INDYKE,
397
- description=f"heavily redacted email, quoted replies are from {STEVEN_HOFFENBERG} about James Patterson's book",
398
- recipients=['Charles Michael'],
399
- timestamp=parse('2016-08-17 11:26:00'),
447
+ recipients=["Charles Michael"],
400
448
  attribution_reason='Quoted replies are in 019109',
449
+ date='2016-08-17 11:26:00',
450
+ description="heavily redacted email, quoted replies are from Steven Hoffenberg about James Patterson's book",
451
+ subject='FW: Privileged and Confidential - Fwd: JAMES PATTERSON NEW BOOK TELLING FEDS COVER UP OF BILLIONAIRE JEFF EPSTEIN CHILD RAPES RELEASE DATE OCT 10 2016 STEVEN HOFFENBERG IS ON THE BOOK WRITING TEAM !!!!',
452
+ truncate_to=2500,
453
+ comment='Hoffenberg',
401
454
  ),
402
455
  EmailCfg(id='026290', author=DAVID_SCHOEN, attribution_reason='Signature'),
403
456
  EmailCfg(id='031339', author=DAVID_SCHOEN, attribution_reason='Signature'),
@@ -405,43 +458,44 @@ EMAILS_CONFIG = [
405
458
  EmailCfg(id='031560', author=DAVID_SCHOEN, attribution_reason='Signature'),
406
459
  EmailCfg(id='026287', author=DAVID_SCHOEN, attribution_reason='Signature'),
407
460
  EmailCfg(id='033419', author=DAVID_SCHOEN, attribution_reason='Signature'),
408
- EmailCfg(id='031460', author=EDWARD_JAY_EPSTEIN, attribution_reason=f"quoted reply has edwardjayepstein.com", is_fwded_article=True),
461
+ EmailCfg(id='031460', author=EDWARD_JAY_EPSTEIN, attribution_reason='quoted reply has edwardjayepstein.com', is_fwded_article=True),
462
+ EmailCfg(id='031607', is_fwded_article=True, comment='Epstein reply to Edward Jay Epstein'),
409
463
  EmailCfg(
410
464
  id='030475',
411
465
  author=FAITH_KATES,
412
- attribution_reason=f'{NEXT_MANAGEMENT} legal signature',
466
+ attribution_reason='Next Management LLC legal signature',
413
467
  duplicate_ids=['030575'],
414
- dupe_type='redacted'
468
+ dupe_type='redacted',
415
469
  ),
416
- EmailCfg(id='026547', author=GERALD_BARTON, recipients=[JEFFREY_EPSTEIN]), # Bad OCR # TODO: email header is really jacked up
470
+ EmailCfg(id='026547', author=GERALD_BARTON, recipients=[JEFFREY_EPSTEIN]),
417
471
  EmailCfg(id='029969', author=GWENDOLYN_BECK, attribution_reason='signature "Longevity & Successful Aging"'),
418
472
  EmailCfg(id='029968', author=GWENDOLYN_BECK, attribution_reason='signature "beckresearchlabs.com"', duplicate_ids=['031120']),
419
- EmailCfg(id='029970', author=GWENDOLYN_BECK, attribution_reason='signed "Longevity & Successful Agin"'),
420
- EmailCfg(id='029960', author=GWENDOLYN_BECK, attribution_reason='signature "Beck Center for Longevity & Aging"'),
473
+ EmailCfg(id='029970', author=GWENDOLYN_BECK, attribution_reason='signed "Longevity & Successful Aging"', truncate_to=400),
474
+ EmailCfg(id='029960', author=GWENDOLYN_BECK, attribution_reason='signature "Beck Center for Longevity & Aging"', truncate_to=498),
421
475
  EmailCfg(id='029959', author=GWENDOLYN_BECK, attribution_reason='signature "Beck Center for Longevity & Aging"'),
422
- EmailCfg(id='033360', author=HENRY_HOLT, attribution_reason='in signature'), # Henry Holt is a company not a person
476
+ EmailCfg(id='033360', author=HENRY_HOLT, attribution_reason='in signature'),
423
477
  EmailCfg(id='033384', author=JACK_GOLDBERGER, attribution_reason='Might be Paul Prosperi?', is_attribution_uncertain=True),
424
478
  EmailCfg(id='026024', author=JEAN_HUGUEN, attribution_reason='Signature'),
425
479
  EmailCfg(id='021823', author=JEAN_LUC_BRUNEL, attribution_reason='Reply'),
426
- EmailCfg(id='022949', author=JEFFREY_EPSTEIN), # Bad OCR (nofix)
427
- EmailCfg(id='031624', author=JEFFREY_EPSTEIN), # Bad OCR (nofix)
428
- EmailCfg(id='031996', author=JEFFREY_EPSTEIN, recipients=[CHRISTINA_GALBRAITH], attribution_reason='bounced', duplicate_ids=['031442']),
429
- EmailCfg(id='018726', author=JEFFREY_EPSTEIN, timestamp=parse('2018-06-08 08:36:00')), # nofix
430
- EmailCfg(id='032283', author=JEFFREY_EPSTEIN, timestamp=parse('2016-09-14 08:04:00')), # nofix
431
- EmailCfg(id='026943', author=JEFFREY_EPSTEIN, timestamp=parse('2019-05-22 05:47:00')), # nofix
480
+ EmailCfg(id='031624', author=JEFFREY_EPSTEIN),
481
+ EmailCfg(id='018726', author=JEFFREY_EPSTEIN, date='2018-06-08 08:36:00'),
482
+ EmailCfg(id='032283', author=JEFFREY_EPSTEIN, date='2016-09-14 08:04:00'),
483
+ EmailCfg(id='026943', author=JEFFREY_EPSTEIN, date='2019-05-22 05:47:00'),
432
484
  EmailCfg(
433
485
  id='023208',
434
486
  author=JEFFREY_EPSTEIN,
435
- description=f"very long email chain about Leon Black's finances and things like Gratitude America",
436
- fwded_text_after='Date: Tue, Oct 27',
437
487
  recipients=[BRAD_WECHSLER, MELANIE_SPINELLA],
488
+ description="very long email chain about Leon Black's finances and things like Gratitude America",
438
489
  duplicate_ids=['023291'],
490
+ fwded_text_after='Date: Tue, Oct 27',
491
+ truncate_to=NO_TRUNCATE,
492
+ comment="Long discussion about leon black's finances",
439
493
  ),
440
494
  EmailCfg(
441
495
  id='032214',
442
496
  author=JEFFREY_EPSTEIN,
443
- actual_text='Agreed',
444
497
  recipients=[MIROSLAV_LAJCAK],
498
+ actual_text='Agreed',
445
499
  attribution_reason='Quoted reply has signature',
446
500
  ),
447
501
  EmailCfg(id='029582', author=JEFFREY_EPSTEIN, recipients=[RENATA_BOLOTOVA], attribution_reason=BOLOTOVA_REASON),
@@ -450,15 +504,24 @@ EMAILS_CONFIG = [
450
504
  EmailCfg(id='031826', author=JEFFREY_EPSTEIN, actual_text='I have'),
451
505
  EmailCfg(id='030768', author=JEFFREY_EPSTEIN, actual_text='ok'),
452
506
  EmailCfg(id='022938', author=JEFFREY_EPSTEIN, actual_text='what do you suggest?'),
453
- EmailCfg(id='031791', author=JESSICA_CADWELL, attribution_reason='signature'),
454
- EmailCfg(id='028851', author=JOI_ITO, recipients=[JEFFREY_EPSTEIN], timestamp=parse('2014-04-27 06:00:00')),
455
507
  EmailCfg(
456
- id='028849',
457
- attribution_reason='Conversation with Joi Ito',
458
- author=JOI_ITO,
459
- description=f"{JOI_ITO} reaching out to Epstein for an immediate phone call after news about illicit Russian money",
460
- recipients=[JEFFREY_EPSTEIN],
461
- timestamp=parse('2014-04-27 07:41:00'), # Filled in from 028847
508
+ id='031791',
509
+ author=JESSICA_CADWELL,
510
+ attribution_reason='signature',
511
+ truncate_to=NO_TRUNCATE,
512
+ comment='First email in Jessica Cadwell chain about service of legal documents',
513
+ ),
514
+ EmailCfg(
515
+ id='028850',
516
+ description='Joi Ito reaching out to Epstein for an immediate phone call after news about illicit Russian money',
517
+ duplicate_ids=['028851'],
518
+ dupe_type='quoted',
519
+ ),
520
+ EmailCfg(
521
+ id='028848',
522
+ description='Joi Ito reaching out to Epstein for an immediate phone call after news about illicit Russian money',
523
+ duplicate_ids=['028849'],
524
+ dupe_type='quoted',
462
525
  ),
463
526
  EmailCfg(id='028507', author=JONATHAN_FARKAS, attribution_reason='reply signed "best Jonathan"'),
464
527
  EmailCfg(id='033282', author=JONATHAN_FARKAS, attribution_reason='reply signed "thanks Jonathan"', duplicate_ids=['033484']),
@@ -470,9 +533,9 @@ EMAILS_CONFIG = [
470
533
  EmailCfg(id='032386', author=KATHRYN_RUEMMLER, attribution_reason=KATHY_REASON, is_attribution_uncertain=True),
471
534
  EmailCfg(id='032727', author=KATHRYN_RUEMMLER, attribution_reason=KATHY_REASON, is_attribution_uncertain=True),
472
535
  EmailCfg(id='030478', author=LANDON_THOMAS),
473
- EmailCfg(id='029013', author=LARRY_SUMMERS, recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
536
+ EmailCfg(id='029013', author=LARRY_SUMMERS, recipients=[JEFFREY_EPSTEIN]),
474
537
  EmailCfg(id='029196', author=LAWRENCE_KRAUSS, recipients=[JEFFREY_EPSTEIN], actual_text='Talk in 40?'),
475
- EmailCfg(id='033593', author=LAWRANCE_VISOSKI, attribution_reason='Signature'),
538
+ EmailCfg(id='033593', author=LAWRANCE_VISOSKI, attribution_reason='Signature', truncate_to=NO_TRUNCATE, comment='visoski email about planes'),
476
539
  EmailCfg(id='033370', author=LAWRANCE_VISOSKI, attribution_reason=LARRY_REASON),
477
540
  EmailCfg(id='033495', author=LAWRANCE_VISOSKI, attribution_reason=LARRY_REASON),
478
541
  EmailCfg(id='033487', author=LAWRANCE_VISOSKI, recipients=[JEFFREY_EPSTEIN]),
@@ -496,29 +559,44 @@ EMAILS_CONFIG = [
496
559
  EmailCfg(id='032606', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
497
560
  EmailCfg(id='032607', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
498
561
  EmailCfg(id='032609', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
499
- EmailCfg(id='032604', author=MASHA_DROKOVA, attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
500
- EmailCfg(id='032581', author=MASHA_DROKOVA, attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
562
+ EmailCfg(
563
+ id='032604',
564
+ author=MASHA_DROKOVA,
565
+ attribution_reason='timing, subject (interviews/articles), and sequential ID',
566
+ is_attribution_uncertain=True,
567
+ ),
568
+ EmailCfg(
569
+ id='032581',
570
+ author=MASHA_DROKOVA,
571
+ attribution_reason='timing, subject (interviews/articles), and sequential ID',
572
+ is_attribution_uncertain=True,
573
+ ),
501
574
  EmailCfg(id='030235', author=MELANIE_WALKER, attribution_reason='In fwd'),
502
575
  EmailCfg(id='032343', author=MELANIE_WALKER, attribution_reason='Name seen in later reply 032346'),
503
576
  EmailCfg(id='032212', author=MIROSLAV_LAJCAK, attribution_reason='signature'),
504
- EmailCfg(id='021814', author=NADIA_MARCINKO, attribution_reason='reply'), #, actual_text="I'm a pilot...I prefer sex slave to copilot ;)"),
505
- EmailCfg(id='021808', author=NADIA_MARCINKO, attribution_reason='reply'),
506
- EmailCfg(id='022190', author=NADIA_MARCINKO, attribution_reason='reply'),
577
+ EmailCfg(id='021814', author=NADIA_MARCINKO, attribution_reason='reply'),
578
+ EmailCfg(id='021808', author=NADIA_MARCINKO, attribution_reason='reply', truncate_to=NO_TRUNCATE),
507
579
  EmailCfg(id='021818', author=NADIA_MARCINKO, attribution_reason='reply'),
508
- EmailCfg(id='022197', author=NADIA_MARCINKO, attribution_reason='reply'),
509
580
  EmailCfg(id='022214', author=NADIA_MARCINKO, attribution_reason='Reply header'),
510
581
  EmailCfg(id='021811', author=NADIA_MARCINKO, attribution_reason='Signature and email address in the message'),
511
582
  EmailCfg(id='028487', author=NORMAN_D_RAU, attribution_reason='Fwded from "to" address', duplicate_ids=['026612']),
512
583
  EmailCfg(
513
584
  id='024923',
514
- author=PAUL_KRASSNER,
515
- recipients=['George Krassner', 'Nick Kazan', 'Mrisman02', 'Rebecca Risman', 'Linda W. Grossman'],
516
- duplicate_ids=['031973']
585
+ recipients=["George Krassner", "Nick Kazan", "Mrisman02", "Rebecca Risman", "Linda W. Grossman"],
586
+ duplicate_ids=['031973'],
587
+ comment='krassner',
517
588
  ),
518
- EmailCfg(id='032457', author=PAUL_KRASSNER, recipients=[JEFFREY_EPSTEIN, 'Nancy Cain']), # Bad OCR (nofix)
589
+ EmailCfg(id='032457', author=PAUL_KRASSNER, recipients=[JEFFREY_EPSTEIN, "Nancy Cain"]),
519
590
  EmailCfg(id='029981', author=PAULA, attribution_reason='Name in reply + opera reference (Fisher now works in opera)'),
520
591
  EmailCfg(id='030482', author=PAULA, attribution_reason=PAULA_REASON),
521
592
  EmailCfg(id='033383', author=PAUL_PROSPERI, attribution_reason='Reply'),
593
+ EmailCfg(
594
+ id='012684',
595
+ author=PAUL_PROSPERI,
596
+ attribution_reason='one week after Epstein asked Prosperi for info on Kluge auction',
597
+ is_attribution_uncertain=True,
598
+ is_fwded_article=True,
599
+ ),
522
600
  EmailCfg(
523
601
  id='033561',
524
602
  author=PAUL_PROSPERI,
@@ -528,8 +606,16 @@ EMAILS_CONFIG = [
528
606
  EmailCfg(id='031694', author=PEGGY_SIEGAL, attribution_reason='quoted', is_attribution_uncertain=True),
529
607
  EmailCfg(id='032219', author=PEGGY_SIEGAL, attribution_reason='Signed "Peggy"'),
530
608
  EmailCfg(id='029020', author=RENATA_BOLOTOVA, attribution_reason='Signature'),
531
- EmailCfg(id='029605', author=RENATA_BOLOTOVA, attribution_reason=BOLOTOVA_REASON),
532
- EmailCfg(id='029606', author=RENATA_BOLOTOVA, attribution_reason=BOLOTOVA_REASON),
609
+ EmailCfg(
610
+ id='029605',
611
+ author=RENATA_BOLOTOVA,
612
+ attribution_reason=BOLOTOVA_REASON,
613
+ ),
614
+ EmailCfg(
615
+ id='029606',
616
+ author=RENATA_BOLOTOVA,
617
+ attribution_reason=BOLOTOVA_REASON,
618
+ ),
533
619
  EmailCfg(id='029604', author=RENATA_BOLOTOVA, attribution_reason='Continued in 239606 etc'),
534
620
  EmailCfg(
535
621
  id='033584',
@@ -538,66 +624,77 @@ EMAILS_CONFIG = [
538
624
  attribution_reason='Refs paper by Trivers',
539
625
  duplicate_ids=['033169'],
540
626
  ),
541
- EmailCfg(
542
- id='026320',
543
- author=SEAN_BANNON,
544
- attribution_reason="From protonmail, Bannon wrote 'just sent from my protonmail' in 027067",
545
- ),
546
- EmailCfg(id='029003', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
547
- EmailCfg(id='029005', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
548
- EmailCfg(id='029007', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
549
- EmailCfg(id='029010', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
550
- EmailCfg(id='032296', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
551
- EmailCfg(id='033292', author=SOON_YI_PREVIN, attribution_reason='mentions "Woody\'s movie"', is_attribution_uncertain=True),
627
+ EmailCfg(id='026320', author=SEAN_BANNON, attribution_reason="From protonmail, Bannon wrote 'just sent from my protonmail' in 027067"),
628
+ EmailCfg(id='029003', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
629
+ EmailCfg(id='029005', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
630
+ EmailCfg(id='029007', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
631
+ EmailCfg(id='029010', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
632
+ EmailCfg(id='032296', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
633
+ EmailCfg(id='033292', author=SOON_YI_PREVIN, attribution_reason="mentions \"Woody's movie\"", is_attribution_uncertain=True),
552
634
  EmailCfg(
553
635
  id='019109',
554
636
  author=STEVEN_HOFFENBERG,
555
637
  recipients=["Players2"],
556
- timestamp=parse('2016-08-11 09:36:01'),
557
- attribution_reason=f"Actually a fwd by Charles Michael but {STEVEN_HOFFENBERG} email more interesting",
638
+ description='looks like a memo from Charles Michael containing copy/paste of email contents?',
639
+ date='2016-08-11 09:36:01',
640
+ subject='FW: Privileged and Confidential - Fwd: JAMES PATTERSON NEW BOOK TELLING FEDS COVER UP OF BILLIONAIRE JEFF EPSTEIN CHILD RAPES RELEASE DATE OCT 10 2016 STEVEN HOFFENBERG IS ON THE BOOK WRITING TEAM !!!!',
558
641
  ),
559
642
  EmailCfg(
560
643
  id='026620',
561
- attribution_reason='ends with "Respectfully, terry"',
562
644
  author=TERRY_KAFKA,
563
- fwded_text_after='From: Mike Cohen',
564
645
  recipients=[JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS,
565
- subject='Fw: The Iran Nuclear Deal',
646
+ attribution_reason='ends with "Respectfully, terry"',
566
647
  duplicate_ids=['028482'],
648
+ fwded_text_after='From: Mike Cohen',
649
+ subject='Fw: The Iran Nuclear Deal',
567
650
  ),
568
651
  EmailCfg(id='029992', author=TERRY_KAFKA, attribution_reason='Quoted reply'),
569
652
  EmailCfg(id='029985', author=TERRY_KAFKA, attribution_reason='Quoted reply in 029992'),
570
653
  EmailCfg(id='020666', author=TERRY_KAFKA, attribution_reason="Ends with 'Terry'"),
571
- EmailCfg(id='026014', author=ZUBAIR_KHAN, recipients=[JEFFREY_EPSTEIN], timestamp=parse('2016-11-04 17:46:00')),
572
654
  EmailCfg(id='033021', recipients=[ANAS_ALRASHEED], attribution_reason='visible in 033022'),
573
655
  EmailCfg(id='027063', recipients=[ANTHONY_BARRETT]),
574
656
  EmailCfg(id='030764', recipients=[ARIANE_DE_ROTHSCHILD], attribution_reason='Reply'),
575
657
  EmailCfg(id='026431', recipients=[ARIANE_DE_ROTHSCHILD], attribution_reason='Reply'),
576
658
  EmailCfg(id='032876', recipients=[CECILIA_STEEN], attribution_reason='unredacted in 032267'),
577
659
  EmailCfg(id='026466', recipients=[DIANE_ZIMAN], attribution_reason='Quoted reply'),
578
- EmailCfg(id='031607', recipients=[EDWARD_JAY_EPSTEIN], attribution_reason=f"quoted reply has edwardjayepstein.com"),
660
+ EmailCfg(id='031607', recipients=[EDWARD_JAY_EPSTEIN], attribution_reason='quoted reply has edwardjayepstein.com'),
579
661
  EmailCfg(
580
662
  id='030525',
581
663
  recipients=[FAITH_KATES],
582
- attribution_reason=f'Reply in 030414 has {NEXT_MANAGEMENT} legal signature',
664
+ attribution_reason='Reply in 030414 has Next Management LLC legal signature',
583
665
  duplicate_ids=['030581'],
584
666
  ),
585
- EmailCfg(id='025329', recipients=['George Krassner', 'Nancy Cain', 'Tom', 'Marie Moneysmith', 'Steven Gaydos', 'Linda W. Grossman', 'Holly Krassner Dawson', 'Daniel Dawson', 'Danny Goldberg', 'Caryl Ratner', 'Kevin Bright', 'Michael Simmons', SAMUEL_LEFF, 'Bob Fass', 'Lynnie Tofte Fass', 'Barb Cowles', 'Lee Quarnstrom']),
586
- EmailCfg(id='033568', recipients=['George Krassner', 'Daniel Dawson', 'Danny Goldberg', 'Tom', 'Kevin Bright', 'Walli Leff', 'Michael Simmons', 'Lee Quarnstrom', 'Lanny Swerdlow', 'Larry Sloman', 'W&K', 'Harry Shearer', 'Jay Levin']),
667
+ EmailCfg(
668
+ id='025329',
669
+ recipients=["George Krassner", "Nancy Cain", "Tom", "Marie Moneysmith", "Steven Gaydos", "Linda W. Grossman", "Holly Krassner Dawson", "Daniel Dawson", "Danny Goldberg", "Caryl Ratner", "Kevin Bright", "Michael Simmons", SAMUEL_LEFF, "Bob Fass", "Lynnie Tofte Fass", "Barb Cowles", "Lee Quarnstrom"],
670
+ ),
671
+ EmailCfg(
672
+ id='033568',
673
+ recipients=["George Krassner", "Daniel Dawson", "Danny Goldberg", "Tom", "Kevin Bright", "Walli Leff", "Michael Simmons", "Lee Quarnstrom", "Lanny Swerdlow", "Larry Sloman", "W&K", "Harry Shearer", "Jay Levin"],
674
+ subject="Fwd: Daryl Cagle's Blog",
675
+ ),
587
676
  EmailCfg(id='026426', recipients=[JEAN_HUGUEN], attribution_reason='Reply'),
588
677
  EmailCfg(id='022202', recipients=[JEAN_LUC_BRUNEL], attribution_reason='Follow up / reply', duplicate_ids=['029975']),
589
- EmailCfg(id='022187', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
590
- EmailCfg(id='031489', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (unfixable)
591
- EmailCfg(id='030347', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
592
- EmailCfg(id='030367', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
593
- EmailCfg(id='033274', recipients=[JEFFREY_EPSTEIN]), # this is a note sent to self
594
- EmailCfg(id='032780', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
595
- EmailCfg(id='029324', recipients=[JEFFREY_EPSTEIN, "Jojo Fontanilla", "Lyn Fontanilla"]), # Bad OCR (nofix)
596
- EmailCfg(id='013482', recipients=[JEFFREY_EPSTEIN], is_fwded_article=True), # other recipients redacted. "The view from the US: Stem cell therapy steps up a gear with firs"
597
- EmailCfg(id='029558', recipients=[JEFFREY_EPSTEIN, KATHERINE_KEATING], attribution_reason='BCC', fwded_text_after='Creativity is central'),
678
+ EmailCfg(id='030347', recipients=[JEFFREY_EPSTEIN]),
679
+ EmailCfg(id='033274', recipients=[JEFFREY_EPSTEIN]),
680
+ EmailCfg(id='032780', recipients=[JEFFREY_EPSTEIN]),
681
+ EmailCfg(id='033386', recipients=[JEFFREY_EPSTEIN, None], duplicate_ids=['033599']),
682
+ EmailCfg(id='029324', recipients=[JEFFREY_EPSTEIN, "Jojo Fontanilla", "Lyn Fontanilla"]),
683
+ EmailCfg(id='013482', recipients=[JEFFREY_EPSTEIN], is_fwded_article=True),
684
+ EmailCfg(
685
+ id='029558',
686
+ recipients=[JEFFREY_EPSTEIN, KATHERINE_KEATING],
687
+ attribution_reason='BCC',
688
+ fwded_text_after='Creativity is central',
689
+ ),
598
690
  EmailCfg(id='033456', recipients=["Joel"], attribution_reason='Reply'),
599
691
  EmailCfg(id='033458', recipients=["Joel"], attribution_reason='Reply'),
600
692
  EmailCfg(id='033460', recipients=["Joel"], attribution_reason='Reply'),
693
+ EmailCfg(
694
+ id='025589',
695
+ recipients=[GORDON_GETTY, JEFFREY_EPSTEIN] + TRIVERS_CCS,
696
+ attribution_reason='Reply',
697
+ ),
601
698
  EmailCfg(
602
699
  id='021090',
603
700
  recipients=[JONATHAN_FARKAS],
@@ -608,15 +705,15 @@ EMAILS_CONFIG = [
608
705
  id='033073',
609
706
  recipients=[KATHRYN_RUEMMLER],
610
707
  attribution_reason='to "Kathy" about dems, sent from iPad',
611
- is_attribution_uncertain=True, # It's actually Kathy R. as the recipient that's the uncertain part
708
+ is_attribution_uncertain=True,
612
709
  ),
613
710
  EmailCfg(
614
711
  id='032939',
615
712
  recipients=[KATHRYN_RUEMMLER],
616
713
  attribution_reason='to "Kathy" about dems, sent from iPad',
617
- is_attribution_uncertain=True, # It's actually Kathy R. as the recipient that's the uncertain part
714
+ is_attribution_uncertain=True,
618
715
  ),
619
- EmailCfg(id='030522', recipients=[LANDON_THOMAS], attribution_reason='reply header', is_fwded_article=True), # Vicky Ward article
716
+ EmailCfg(id='030522', recipients=[LANDON_THOMAS], attribution_reason='reply header', is_fwded_article=True),
620
717
  EmailCfg(id='031413', recipients=[LANDON_THOMAS], attribution_reason='reply header'),
621
718
  EmailCfg(id='033591', recipients=[LAWRANCE_VISOSKI], attribution_reason='Reply signature', duplicate_ids=['033591']),
622
719
  EmailCfg(id='027097', recipients=[LAWRANCE_VISOSKI], attribution_reason='Reply signature', duplicate_ids=['028787']),
@@ -624,113 +721,159 @@ EMAILS_CONFIG = [
624
721
  EmailCfg(id='022250', recipients=[LESLEY_GROFF], attribution_reason='Reply'),
625
722
  EmailCfg(id='030242', recipients=[MARIANA_IDZKOWSKA], duplicate_ids=['032048'], dupe_type='redacted'),
626
723
  EmailCfg(id='033027', recipients=[MASHA_DROKOVA], attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
627
- EmailCfg(id='033025', recipients=[MASHA_DROKOVA], attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
724
+ EmailCfg(
725
+ id='033025',
726
+ recipients=[MASHA_DROKOVA],
727
+ attribution_reason='timing, subject (interviews/articles), and sequential ID',
728
+ is_attribution_uncertain=True,
729
+ ),
628
730
  EmailCfg(id='030368', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
629
731
  EmailCfg(id='030369', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
630
732
  EmailCfg(id='030371', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
631
733
  EmailCfg(id='022258', recipients=[NADIA_MARCINKO], attribution_reason='Reply header'),
632
734
  EmailCfg(id='022193', recipients=[NADIA_MARCINKO], attribution_reason='reply'),
735
+ EmailCfg(id='022197', recipients=[NADIA_MARCINKO], attribution_reason='reply'),
736
+ EmailCfg(id='022190', recipients=[NADIA_MARCINKO], attribution_reason='reply'),
633
737
  EmailCfg(id='030572', recipients=[PAULA], attribution_reason='quoted in 030482', is_attribution_uncertain=True),
634
- EmailCfg(id='030506', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
635
- EmailCfg(id='030507', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
636
- EmailCfg(id='030508', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
637
- EmailCfg(id='030509', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
738
+ EmailCfg(id='030506', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True,),
739
+ EmailCfg(id='030507', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True,),
740
+ EmailCfg(id='030508', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True,),
741
+ EmailCfg(id='030509', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True,),
638
742
  EmailCfg(id='030096', recipients=[PETER_MANDELSON], attribution_reason='reply header'),
639
743
  EmailCfg(id='032951', recipients=[RAAFAT_ALSABBAGH, None], attribution_reason='Redacted'),
640
744
  EmailCfg(id='029581', recipients=[RENATA_BOLOTOVA], attribution_reason=BOLOTOVA_REASON),
641
745
  EmailCfg(id='019334', recipients=[STEVE_BANNON], attribution_reason='quoted reply'),
642
746
  EmailCfg(id='021106', recipients=[STEVE_BANNON], attribution_reason='Reply'),
643
-
644
- # Misc configs
645
- EmailCfg(id='029344', actual_text='I thought of you when I read this article. Was this your idea? Alan'),
646
- EmailCfg(id='032358', actual_text=REDACTED), # Completely redacted
647
- EmailCfg(id='033050', actual_text='schwartman'),
648
- EmailCfg(id='031036', description=f"{BARBRO_C_EHNBOM} related donation and Swedish girls discussion"),
649
- EmailCfg(id='022219', description="discussion of attempts to clean up Epstein's Google search results"),
747
+ EmailCfg(id='029344', actual_text='I thought of you when I read this article. Was this your idea? Alan', is_fwded_article=True),
748
+ EmailCfg(id='032358', actual_text=REDACTED),
749
+ EmailCfg(
750
+ id='031036',
751
+ description='Barbro C. Ehnbom related donation and Swedish girls discussion',
752
+ is_interesting=True,
753
+ comment='Barbro Ehnbom talking about Swedish girl',
754
+ ),
755
+ EmailCfg(id='032946', description='discussion of obtaining a Moroccan visa for an unnamed woman', is_interesting=True),
756
+ EmailCfg(id='031320', description='discussion of routing Gratitude foundation money through Peggy Siegal', is_interesting=True),
757
+ EmailCfg(id='023627', description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT, is_fwded_article=True, is_interesting=True, truncate_to=16800),
758
+ EmailCfg(id='032671', description='connections to Fusion GPS who produced the Steele Dossier on Trump'),
759
+ EmailCfg(id='033052', description='connections to Fusion GPS who produced the Steele Dossier on Trump'),
760
+ EmailCfg(id='022219', description="discussion of attempts to clean up Epstein's Google search results", truncate_to=2404),
761
+ EmailCfg(id='031472', description="discussion of publishing an audio recording related to the coup in Turkey"),
762
+ EmailCfg(id='031333', description='Fort Knox conspiracy theory, looks like a Russian disinfo article', is_fwded_article=True),
763
+ EmailCfg(id='031335', description='Fort Knox conspiracy theory, looks like a Russian disinfo article', is_fwded_article=True),
650
764
  EmailCfg(id='030648', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
651
765
  EmailCfg(id='030762', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
652
766
  EmailCfg(id='030649', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
653
767
  EmailCfg(id='026026', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
654
768
  EmailCfg(id='026030', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
655
769
  EmailCfg(id='026033', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
656
- EmailCfg(id='031320', description=f"Epstein and {RICHARD_KAHN} appear to be discussing routing donatings through {PEGGY_SIEGAL}"),
770
+ EmailCfg(id='031011', description='jokes about Chicago corruption', duplicate_ids=['031090'], truncate_to=TRUNCATED_CHARS),
657
771
  EmailCfg(id='016693', description='signed "MM"'),
658
- EmailCfg(id='028524', is_fwded_article=True, description='Zach Braff op-ed on Woody Allen in NYT'),
659
- EmailCfg(id='031333', is_fwded_article=True, description='Fort Knox conspiracy theory, looks like a Russian disinfo article'), # Russia Says IMF Chief Jailed For Discovering All US Gold is Gone
660
- EmailCfg(id='031335', is_fwded_article=True, description='Fort Knox conspiracy theory, looks like a Russian disinfo article'), # DOMINQUE STRAUSS-KAHN ARRESTED, NOT BECAUSE HE RAPED A MAID, BUT BECAUSE HE HAD EVIDENCE US HAS NO GOLD IN FORT KNOX.
661
- EmailCfg(id='023627', is_fwded_article=True, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
662
- EmailCfg(id='026298', is_fwded_article=True, duplicate_ids=['026499']), # Written by someone else?
663
- EmailCfg(id='029692', is_fwded_article=True, duplicate_ids=['029779']), # WaPo article
664
- EmailCfg(id='022344', is_fwded_article=True, duplicate_ids=['028529']), # Bill Gates is most admired from Nikolic
665
- EmailCfg(id='018197', is_fwded_article=True, duplicate_ids=['028648']), # Ray Takeyh article fwd
666
- EmailCfg(id='028728', is_fwded_article=True, duplicate_ids=['027102']), # WSJ forward to Larry Summers
667
- EmailCfg(id='028781', is_fwded_article=True, duplicate_ids=['013460']), # Atlantic on Jim Yong Kim, Obama's World Bank Pick
668
- EmailCfg(id='025041', is_fwded_article=True, duplicate_ids=['028675']), # Obama agenda
669
- EmailCfg(id='031136', is_fwded_article=True, duplicate_ids=['028791']), # 'Smart Money is Fleeing US Stocks'
670
- EmailCfg(id='031779', is_fwded_article=True, duplicate_ids=['026938']), # Sarah Silverman on AI
671
- EmailCfg(id='029849', is_fwded_article=True, duplicate_ids=['033482']), # Fareed Zakaria: Trump sells America short),
672
- EmailCfg(id='032023', is_fwded_article=True, duplicate_ids=['032012']), # American-Israeli Cooperative Enterprise Newsletter
673
- EmailCfg(id='021758', is_fwded_article=True, duplicate_ids=['030616']), # Radar Online article about Epstein's early prison release
674
- EmailCfg(id='033297', is_fwded_article=True, duplicate_ids=['033586']), # Sultan Sulayem fwding article about Trump and Russia
675
- EmailCfg(id='026829', is_fwded_article=True), # Taxes
676
- EmailCfg(id='020443', is_fwded_article=True), # WSJ Deplorables Bannon
677
- EmailCfg(id='030372', is_fwded_article=True), # Bannon China Iran
678
- EmailCfg(id='030983', is_fwded_article=True), # Power Line blog Alex Acosta and Jeffrey Epstein Plea Deal Analysis
679
- EmailCfg(id='031774', is_fwded_article=True), # Krassner fwd of Palmer Report article
680
- EmailCfg(id='033345', is_fwded_article=True), # Krassner fwd of Palmer Report article
681
- EmailCfg(id='029903', is_fwded_article=True), # Krassner fwd of Ann Coulter article about Epstein
682
- EmailCfg(id='030266', is_fwded_article=True), # Krassner fwd of article about Dershowitz
683
- EmailCfg(id='030868', is_fwded_article=True), # 'He doesn't like this sh*t': Trump reportedly hates his job and his staff after 1 month
684
- EmailCfg(id='026755', is_fwded_article=True), # HuffPo
685
- EmailCfg(id='016218', is_fwded_article=True), # AT&T confirms it paid Trump lawyer Cohen for insights on Trump
686
- EmailCfg(id='030528', is_fwded_article=True), # Vicky Ward article
687
- EmailCfg(id='030460', is_fwded_article=True), # Vicky Ward article
688
- EmailCfg(id='028508', is_fwded_article=True), # nanosatellites article
689
- EmailCfg(id='019845', is_fwded_article=True), # Pro Publica article on Preet Bharara
690
- EmailCfg(id='029021', is_fwded_article=True), # article about bannon sent by Alain Forget
691
- EmailCfg(id='031688', is_fwded_article=True), # Bill Siegel fwd of email about hamas
692
- EmailCfg(id='026551', is_fwded_article=True), # Sultan bin Sulayem "Ayatollah between the sheets"
693
- EmailCfg(id='031768', is_fwded_article=True), # Sultan bin Sulayem 'Horseface'
694
- EmailCfg(id='031569', is_fwded_article=True), # Article by Kathryn Alexeeff fwded to Peter Thiel
695
- EmailCfg(id='029689', is_fwded_article=True), # Tunisia article to Larry Summers
696
- EmailCfg(id='014525', is_fwded_article=True), # Really more of a mailing list from Paul Morris?
697
- EmailCfg(id='024384', is_fwded_article=True), # Interview with Bill Siegal re: Islam
698
- EmailCfg(id='030200', is_fwded_article=True), # Lawfare indicting a president
699
- EmailCfg(id='029509', is_fwded_article=True), # Deepak Chopra LSD, Quantum Healing
700
- EmailCfg(id='026778', is_fwded_article=True), # tax alert
701
- EmailCfg(id='023001', is_fwded_article=True), # Miami Herald article timeline of the sex abuse case
702
- EmailCfg(id='013405', is_fwded_article=True), # Articles about epstein case
703
- EmailCfg(id='021740', is_fwded_article=True), # Miami Herald article about Epstein prosecutor
704
- EmailCfg(id='023126', is_fwded_article=True), # Miami Herald on Alex Acosta
705
- EmailCfg(id='029625', is_fwded_article=True), # Conchita Sarnoff Daily Beast Articles - Epstein Sex Trafficking Investigation and Settlement
706
- EmailCfg(id='029505', is_fwded_article=True), # Foreign Policy Middle Eastern Monarchs Look at the Trump
707
- EmailCfg(id='029859', is_fwded_article=True), # Palm Beach Post: Epstein paid three women $5.5 million to end lawsuits
708
- EmailCfg(id='031988', is_fwded_article=True), # NYT review of Inside Job
709
- EmailCfg(id='029901', is_fwded_article=True), # THE EDGE question
710
- EmailCfg(id='031399', is_fwded_article=True), # Miami U.S. Attorney's Office recuses itself from Jeffrey Epstein case
711
- EmailCfg(id='031705', is_fwded_article=True), # Thomas Friedman why not in vegas?
712
- EmailCfg(id='016801', is_fwded_article=True), # Capital Market Outlook
713
- EmailCfg(id='023564', is_fwded_article=True), # BBG ;Leon Black's Tax-Overhaul Dilemma Could Alter Wall Street Model
714
- EmailCfg(id='025231', is_fwded_article=True), # Newsmax: Laffer, Laffer: Obama Must Use Reaganomics to Save Economy The only way President Barack Obama can solve
715
- EmailCfg(id='031472', is_fwded_article=True), # WSJ: Lawyers for Imam Wanted by Turkish authorities Fear for Their Client's Life
716
- EmailCfg(id='012684', is_fwded_article=True), # Trump in talks to buy socialite Kluge's Charlottesville vineyard
717
- EmailCfg(id='028536', is_fwded_article=True), # Palm Beach Post FBI Epstein files say he gave info. Does it explain sweetheart deal?
718
- EmailCfg(id='030326', is_fwded_article=True), # NYP Congressional candidate compares Melania Trump to prostitute
719
- EmailCfg(id='030519', is_fwded_article=True), # Daily Mail on Prince Andrew
720
- EmailCfg(id='030878', is_fwded_article=True), # Steve Bannon almost appeared in Michael Moore's 'Fahrenheit 11/9'
721
- EmailCfg(id='024300', is_fwded_article=True), # Bookstore owner calls police after customer confronted Steve Bannon
722
- EmailCfg(id='026924', is_fwded_article=True), # The Onion
723
- EmailCfg(id='033311', is_fwded_article=True), # 2016 election polls
724
- EmailCfg(id='026580', is_fwded_article=True), # NPR: Antigua: Land Of Sun, Sand, And Super Cheap
725
- EmailCfg(id='031340', is_fwded_article=True), # Article about Alex Jones threatening Robert Mueller
726
- EmailCfg(id='030209', is_fwded_article=True), # Atlantic Council Syria: Blackberry Diplomacy
727
- EmailCfg(id='026605', is_fwded_article=True), # Article about Ruemmler turning down attorney general job by NEDRA PICKLER
728
- EmailCfg(id='031990', is_fwded_article=True), # newsmax on ken starr
729
- EmailCfg(id='029433', is_fwded_article=True), # Estate Planning After the Enactment of the Tax Cuts and Jobs Act
730
- EmailCfg(id='032475', timestamp=parse('2017-02-15 13:31:25')),
731
- EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
732
-
733
- # Configure duplicates
772
+ EmailCfg(id='028524', description='Zach Braff op-ed on Woody Allen in NYT', is_fwded_article=True),
773
+ EmailCfg(id='026298', duplicate_ids=['026499'], is_fwded_article=True, comment='Written by someone else?'),
774
+ EmailCfg(id='029692', duplicate_ids=['029779'], is_fwded_article=True, comment='WaPo article'),
775
+ EmailCfg(id='022344', duplicate_ids=['028529'], is_fwded_article=True, comment='Bill Gates is most admired from Nikolic'),
776
+ EmailCfg(id='018197', duplicate_ids=['028648'], is_fwded_article=True, comment='Ray Takeyh article fwd'),
777
+ EmailCfg(id='028728', duplicate_ids=['027102'], is_fwded_article=True, comment='WSJ forward to Larry Summers'),
778
+ EmailCfg(id='028781', duplicate_ids=['013460'], is_fwded_article=True, comment="Atlantic on Jim Yong Kim, Obama's World Bank Pick"),
779
+ EmailCfg(id='025041', duplicate_ids=['028675'], is_fwded_article=True, comment='Obama agenda'),
780
+ EmailCfg(id='031136', duplicate_ids=['028791'], is_fwded_article=True, comment="'Smart Money is Fleeing US Stocks'"),
781
+ EmailCfg(id='031779', duplicate_ids=['026938'], is_fwded_article=True, comment='Sarah Silverman on AI'),
782
+ EmailCfg(id='029849', duplicate_ids=['033482'], is_fwded_article=True, comment='Fareed Zakaria: Trump sells America short),'),
783
+ EmailCfg(id='032023', duplicate_ids=['032012'], is_fwded_article=True, comment='American-Israeli Cooperative Enterprise Newsletter'),
784
+ EmailCfg(id='021758', duplicate_ids=['030616'], is_fwded_article=True, comment="Radar Online article about Epstein's early prison release"),
785
+ EmailCfg(id='033297', duplicate_ids=['033586'], is_fwded_article=True, comment='Sultan Sulayem article about Trump and Russia'),
786
+ EmailCfg(id='031428', duplicate_ids=['031388'], is_fwded_article=True),
787
+ EmailCfg(id='033528', duplicate_ids=['033517'], is_fwded_article=True),
788
+ EmailCfg(id='030238', duplicate_ids=['031130'], is_fwded_article=True),
789
+ EmailCfg(id='031112', duplicate_ids=['030876'], is_fwded_article=True),
790
+ EmailCfg(id='031423', duplicate_ids=['025361'], is_fwded_article=True),
791
+ EmailCfg(id='026755', is_fwded_article=True, truncate_to=TRUNCATED_CHARS, comment='Epstein self fwd'),
792
+ EmailCfg(id='026778', is_fwded_article=True, truncate_to=TRUNCATED_CHARS, comment='Kahn taxes'),
793
+ EmailCfg(id='032458', truncate_to=NO_TRUNCATE, description='discussion of acquiring pieces for Epstein\'s art collection'),
794
+ EmailCfg(id='032464', truncate_to=NO_TRUNCATE, description='redacted discussion about art advisor Etienne Binant'),
795
+ # TODO: just use a search string for truncation
796
+ EmailCfg(id='032964', truncate_to=700, description='redacted discussion about art advisor Etienne Binant'),
797
+ EmailCfg(id='032968', truncate_to=700, description='redacted discussion about art advisor Etienne Binant'),
798
+ EmailCfg(id='032467', truncate_to=700, description='redacted discussion about art advisor Etienne Binant'),
799
+ EmailCfg(id='032480', truncate_to=700, description='redacted discussion about art advisor Etienne Binant'),
800
+ EmailCfg(id='032486', truncate_to=700, description='redacted discussion about art advisor Etienne Binant'),
801
+ EmailCfg(id='032491', truncate_to=700, description='redacted discussion about art advisor Etienne Binant'),
802
+ EmailCfg(id='032975', truncate_to=700, description='redacted discussion about art advisor Etienne Binant'),
803
+ EmailCfg(id='032496', truncate_to=700, description='redacted discussion about art advisor Etienne Binant'),
804
+ EmailCfg(id='025643', is_fwded_article=True, comment='Alice Fisher stated to be in the mix of potential candidates for new FBI director.'),
805
+ EmailCfg(id='014523', is_fwded_article=True, comment='finance research'),
806
+ EmailCfg(id='014857', is_fwded_article=True, comment='finance research'),
807
+ EmailCfg(id='029458', is_fwded_article=True, comment='finance research'),
808
+ EmailCfg(id='026893', is_fwded_article=True, comment='finance research'),
809
+ EmailCfg(id='033362', is_fwded_article=True, comment='finance research'),
810
+ EmailCfg(id='030865', is_fwded_article=True, comment='Deutsche bank research'),
811
+ EmailCfg(id='021231', is_fwded_article=True, comment='11 places with worse economy than US'),
812
+ EmailCfg(id='029905', is_fwded_article=True, comment='Ann Coulter'),
813
+ EmailCfg(id='026829', is_fwded_article=True, comment='Taxes'),
814
+ EmailCfg(id='020443', is_fwded_article=True, comment='WSJ Deplorables Bannon'),
815
+ EmailCfg(id='030372', is_fwded_article=True, comment='Bannon China Iran'),
816
+ EmailCfg(id='030983', is_fwded_article=True, comment='Power Line blog Alex Acosta and Jeffrey Epstein Plea Deal Analysis'),
817
+ EmailCfg(id='031774', is_fwded_article=True, comment='Krassner fwd of Palmer Report article'),
818
+ EmailCfg(id='033345', is_fwded_article=True, comment='Krassner fwd of Palmer Report article'),
819
+ EmailCfg(id='029903', is_fwded_article=True, comment='Krassner fwd of Ann Coulter article about Epstein'),
820
+ EmailCfg(id='030266', is_fwded_article=True, comment='Krassner fwd of article about Dershowitz'),
821
+ EmailCfg(id='030868', is_fwded_article=True, comment="'He doesn't like this sh*t': Trump reportedly hates his job and his staff after 1 month"),
822
+ EmailCfg(id='016218', is_fwded_article=True, comment='AT&T confirms it paid Trump lawyer Cohen for insights on Trump'),
823
+ EmailCfg(id='030528', is_fwded_article=True, comment='Vicky Ward article'),
824
+ EmailCfg(id='030460', is_fwded_article=True, comment='Vicky Ward article'),
825
+ EmailCfg(id='028508', is_fwded_article=True, comment='nanosatellites article'),
826
+ EmailCfg(id='019845', is_fwded_article=True, comment='Pro Publica article on Preet Bharara'),
827
+ EmailCfg(id='029021', is_fwded_article=True, comment='article about bannon sent by Alain Forget'),
828
+ EmailCfg(id='031688', is_fwded_article=True, comment='Bill Siegel fwd of email about hamas'),
829
+ EmailCfg(id='026551', is_fwded_article=True, comment='Sultan bin Sulayem "Ayatollah between the sheets"'),
830
+ EmailCfg(id='031768', is_fwded_article=True, comment="Sultan bin Sulayem 'Horseface'"),
831
+ EmailCfg(id='031569', is_fwded_article=True, comment='Article by Kathryn Alexeeff fwded to Peter Thiel'),
832
+ EmailCfg(id='029689', is_fwded_article=True, comment='Tunisia article to Larry Summers'),
833
+ EmailCfg(id='014525', is_fwded_article=True, comment='Really more of a mailing list from Paul Morris?'),
834
+ EmailCfg(id='024384', is_fwded_article=True, comment='Interview with Bill Siegal re: Islam'),
835
+ EmailCfg(id='030200', is_fwded_article=True, comment='Lawfare indicting a president'),
836
+ EmailCfg(id='029509', is_fwded_article=True, comment='Deepak Chopra LSD, Quantum Healing'),
837
+ EmailCfg(id='023001', is_fwded_article=True, comment='Miami Herald article timeline of the sex abuse case'),
838
+ EmailCfg(id='013405', is_fwded_article=True, comment='Articles about epstein case'),
839
+ EmailCfg(id='021740', is_fwded_article=True, comment='Miami Herald article about Epstein prosecutor'),
840
+ EmailCfg(id='023126', is_fwded_article=True, comment='Miami Herald on Alex Acosta'),
841
+ EmailCfg(id='029625', is_fwded_article=True, comment='Conchita Sarnoff Daily Beast Articles - Epstein Sex Trafficking Investigation and Settlement'),
842
+ EmailCfg(id='029505', is_fwded_article=True, comment='Foreign Policy Middle Eastern Monarchs Look at the Trump'),
843
+ EmailCfg(id='029859', is_fwded_article=True, comment='Palm Beach Post: Epstein paid three women $5.5 million to end lawsuits'),
844
+ EmailCfg(id='031988', is_fwded_article=True, comment='NYT review of Inside Job'),
845
+ EmailCfg(id='029901', is_fwded_article=True, comment='THE EDGE question'),
846
+ EmailCfg(id='031399', is_fwded_article=True, comment="Miami U.S. Attorney's Office recuses itself from Jeffrey Epstein case"),
847
+ EmailCfg(id='031705', is_fwded_article=True, comment='Thomas Friedman why not in vegas?'),
848
+ EmailCfg(id='016801', is_fwded_article=True, comment='Capital Market Outlook'),
849
+ EmailCfg(id='023564', is_fwded_article=True, comment="BBG ;Leon Black's Tax-Overhaul Dilemma Could Alter Wall Street Model"),
850
+ EmailCfg(id='025231', is_fwded_article=True, comment='Newsmax: Laffer, Laffer: Obama Must Use Reaganomics to Save Economy The only way President Barack Obama can solve'),
851
+ EmailCfg(id='028536', is_fwded_article=True, comment='Palm Beach Post FBI Epstein files say he gave info. Does it explain sweetheart deal?'),
852
+ EmailCfg(id='030326', is_fwded_article=True, comment='NYP Congressional candidate compares Melania Trump to prostitute'),
853
+ EmailCfg(id='030519', is_fwded_article=True, comment='Daily Mail on Prince Andrew'),
854
+ EmailCfg(id='030878', is_fwded_article=True, comment="Steve Bannon almost appeared in Michael Moore's 'Fahrenheit 11/9'"),
855
+ EmailCfg(id='024300', is_fwded_article=True, comment='Bookstore owner calls police after customer confronted Steve Bannon'),
856
+ EmailCfg(id='026924', is_fwded_article=True, comment='The Onion'),
857
+ EmailCfg(id='022624', is_fwded_article=True, comment='Disgusting: Clinton Snared In Pedophile Ring'),
858
+ EmailCfg(id='022673', is_fwded_article=True, comment='Epstein + Clinton'),
859
+ EmailCfg(id='033301', is_fwded_article=True, comment='description of Trump jet'),
860
+ EmailCfg(id='021729', is_fwded_article=True, comment='Acosta rebuke'),
861
+ EmailCfg(id='023635', is_fwded_article=True, comment='Landon Thomas finance related'),
862
+ EmailCfg(id='026637', is_fwded_article=True, comment='Landon Thomas finance related'),
863
+ EmailCfg(id='021764', is_fwded_article=True, comment='He was 50 and they were girls'),
864
+ EmailCfg(id='033311', is_fwded_article=True, truncate_to=TRUNCATED_CHARS, comment='Kahn taxes'),
865
+ EmailCfg(id='026580', is_fwded_article=True, comment='NPR: Antigua: Land Of Sun, Sand, And Super Cheap'),
866
+ EmailCfg(id='031340', is_fwded_article=True, comment='Article about Alex Jones threatening Robert Mueller'),
867
+ EmailCfg(id='030209', is_fwded_article=True, comment='Atlantic Council Syria: Blackberry Diplomacy'),
868
+ EmailCfg(id='026605', is_fwded_article=True, comment='Article about Ruemmler turning down attorney general job by NEDRA PICKLER'),
869
+ EmailCfg(id='031990', is_fwded_article=True, comment='newsmax on ken starr'),
870
+ EmailCfg(id='029433', is_fwded_article=True, truncate_to=TRUNCATED_CHARS, comment='Kahn taxes'),
871
+ EmailCfg(id='030927', is_fwded_article=True, comment="don't talk to the FBI"),
872
+ EmailCfg(id='033329', is_fwded_article=True, comment='Chinese economists'),
873
+ EmailCfg(id='031764', is_fwded_article=True, truncate_to=3500, comment='broidy malaysia'),
874
+ EmailCfg(id='032475', date='2017-02-15 13:31:25'),
875
+ EmailCfg(id='030373', date='2018-10-03 01:49:27'),
876
+ EmailCfg(id='032325', duplicate_ids=['026014'], dupe_type='quoted', is_interesting=True, comment='Zubair'),
734
877
  EmailCfg(id='026631', duplicate_ids=['026632'], dupe_type='quoted'),
735
878
  EmailCfg(id='028768', duplicate_ids=['026563'], dupe_type='redacted'),
736
879
  EmailCfg(id='027056', duplicate_ids=['028762'], dupe_type='redacted'),
@@ -749,19 +892,16 @@ EMAILS_CONFIG = [
749
892
  EmailCfg(id='033512', duplicate_ids=['033361']),
750
893
  EmailCfg(id='030299', duplicate_ids=['021794']),
751
894
  EmailCfg(id='033575', duplicate_ids=['012898']),
752
- EmailCfg(id='031428', is_fwded_article=True, duplicate_ids=['031388']),
753
895
  EmailCfg(id='031980', duplicate_ids=['019409']),
754
896
  EmailCfg(id='033486', duplicate_ids=['033156']),
755
897
  EmailCfg(id='025790', duplicate_ids=['031994']),
756
898
  EmailCfg(id='028497', duplicate_ids=['026228']),
757
- EmailCfg(id='033528', is_fwded_article=True, duplicate_ids=['033517']),
758
899
  EmailCfg(id='019412', duplicate_ids=['028621']),
759
900
  EmailCfg(id='027053', duplicate_ids=['028765']),
760
901
  EmailCfg(id='027049', duplicate_ids=['028773']),
761
902
  EmailCfg(id='033580', duplicate_ids=['033207']),
762
903
  EmailCfg(id='028506', duplicate_ids=['025547']),
763
- EmailCfg(id='028784', duplicate_ids=['026549']),
764
- EmailCfg(id='033386', duplicate_ids=['033599']),
904
+ EmailCfg(id='028784', duplicate_ids=['026549'], is_interesting=True, comment='seminars: Money / Power'),
765
905
  EmailCfg(id='023024', duplicate_ids=['030622']),
766
906
  EmailCfg(id='030618', duplicate_ids=['023026']),
767
907
  EmailCfg(id='028780', duplicate_ids=['026834']),
@@ -772,15 +912,14 @@ EMAILS_CONFIG = [
772
912
  EmailCfg(id='030587', duplicate_ids=['030514']),
773
913
  EmailCfg(id='031089', duplicate_ids=['018084']),
774
914
  EmailCfg(id='031088', duplicate_ids=['030885']),
775
- EmailCfg(id='030238', duplicate_ids=['031130']),
776
915
  EmailCfg(id='030859', duplicate_ids=['031067']),
777
916
  EmailCfg(id='030635', duplicate_ids=['031134']),
778
- EmailCfg(id='028494', duplicate_ids=['026234']),
917
+ EmailCfg(id='028494', duplicate_ids=['026234'], truncate_to=NO_TRUNCATE, comment='Email about being in palm beach w/trump people'),
779
918
  EmailCfg(id='030311', duplicate_ids=['021790']),
780
919
  EmailCfg(id='033508', duplicate_ids=['029880']),
781
920
  EmailCfg(id='030493', duplicate_ids=['030612']),
782
921
  EmailCfg(id='032051', duplicate_ids=['031771']),
783
- EmailCfg(id='031217', duplicate_ids=['021761']),
922
+ EmailCfg(id='031217', duplicate_ids=['021761'], truncate_to=1800, comment='1st email for dersh, has long article'),
784
923
  EmailCfg(id='031346', duplicate_ids=['031426']),
785
924
  EmailCfg(id='031345', duplicate_ids=['031427']),
786
925
  EmailCfg(id='031343', duplicate_ids=['031432']),
@@ -806,12 +945,10 @@ EMAILS_CONFIG = [
806
945
  EmailCfg(id='027032', duplicate_ids=['028531']),
807
946
  EmailCfg(id='026777', duplicate_ids=['028493']),
808
947
  EmailCfg(id='029837', duplicate_ids=['029255']),
809
- EmailCfg(id='031423', duplicate_ids=['025361']),
810
948
  EmailCfg(id='029299', duplicate_ids=['033594']),
811
949
  EmailCfg(id='030904', duplicate_ids=['031069']),
812
950
  EmailCfg(id='030006', duplicate_ids=['031165']),
813
951
  EmailCfg(id='025215', duplicate_ids=['031159']),
814
- EmailCfg(id='031011', duplicate_ids=['031090']),
815
952
  EmailCfg(id='032068', duplicate_ids=['018158']),
816
953
  EmailCfg(id='031213', duplicate_ids=['031221']),
817
954
  EmailCfg(id='016595', duplicate_ids=['016690']),
@@ -827,7 +964,6 @@ EMAILS_CONFIG = [
827
964
  EmailCfg(id='028620', duplicate_ids=['027094']),
828
965
  EmailCfg(id='032456', duplicate_ids=['033579']),
829
966
  EmailCfg(id='030315', duplicate_ids=['030255']),
830
- EmailCfg(id='031112', duplicate_ids=['030876']),
831
967
  EmailCfg(id='030614', duplicate_ids=['030491']),
832
968
  EmailCfg(id='033585', duplicate_ids=['032279']),
833
969
  EmailCfg(id='031220', duplicate_ids=['031189']),
@@ -835,22 +971,21 @@ EMAILS_CONFIG = [
835
971
  EmailCfg(id='033230', duplicate_ids=['033577']),
836
972
  EmailCfg(id='032125', duplicate_ids=['023971']),
837
973
  EmailCfg(id='031230', duplicate_ids=['031203']),
838
- EmailCfg(id='028752', duplicate_ids=['026569']),
974
+ EmailCfg(id='028752', duplicate_ids=['026569'], subject='Re: Program & Attendee list'),
839
975
  EmailCfg(id='031773', duplicate_ids=['032050']),
840
976
  EmailCfg(id='021400', duplicate_ids=['031983']),
841
- EmailCfg(id='026548', duplicate_ids=['033491']),
977
+ EmailCfg(id='026548', duplicate_ids=['033491', '033495'], comment='033495 is HTML garbage version'),
842
978
  EmailCfg(id='029752', duplicate_ids=['023550']),
843
979
  EmailCfg(id='030339', duplicate_ids=['030592']),
844
- EmailCfg(id='032250', duplicate_ids=['033589']),
845
-
846
- # Emails that need a little help determining how to separate the actual text from fwded text
980
+ EmailCfg(id='032250', duplicate_ids=['033589'], truncate_to=1000, comment='Wolff article'),
847
981
  EmailCfg(id='013415', fwded_text_after='Darren K. Indyke'),
848
982
  EmailCfg(id='024624', fwded_text_after='On Tue, May 14'),
849
- EmailCfg(id='025888', fwded_text_after='Jul 24, 2015'),
983
+ EmailCfg(id='025888', fwded_text_after='Jul 24, 2015', comment='sultan asking about cholesterol drug'),
984
+ EmailCfg(id='026362', fwded_text_after='Jul 24, 2015', comment='sultan asking about cholesterol drug'),
850
985
  EmailCfg(id='016413', fwded_text_after='In a former warehouse'),
851
986
  EmailCfg(id='025548', fwded_text_after='Edward Jay Epstein'),
852
987
  EmailCfg(id='032806', fwded_text_after='• Sep 13, 2018'),
853
- EmailCfg(id='024251', fwded_text_after='Debate Schedule'),
988
+ EmailCfg(id='024251', fwded_text_after='Debate Schedule', truncate_to=TRUNCATED_CHARS, comment='Kahn taxes'),
854
989
  EmailCfg(id='028943', fwded_text_after='-Lisa'),
855
990
  EmailCfg(id='029431', fwded_text_after='I am writing now'),
856
991
  EmailCfg(id='020437', fwded_text_after='Will Cohen Cooperate'),
@@ -859,7 +994,7 @@ EMAILS_CONFIG = [
859
994
  EmailCfg(id='030324', fwded_text_after='For Federal Programs'),
860
995
  EmailCfg(id='022766', fwded_text_after='--- On Wed, 4/22/15'),
861
996
  EmailCfg(id='025606', fwded_text_after='> On May 6,'),
862
- EmailCfg(id='022977', fwded_text_after='Top of Form'),
997
+ EmailCfg(id='022977', fwded_text_after='Top of Form', truncate_to=1800, comment='Krassner with huge attachments field'),
863
998
  EmailCfg(id='033420', fwded_text_after='Slowing economy could increase pressure on'),
864
999
  EmailCfg(id='019203', fwded_text_after='This end-of-the-year'),
865
1000
  EmailCfg(id='022207', fwded_text_after='Web Images Videos Maps'),
@@ -871,9 +1006,97 @@ EMAILS_CONFIG = [
871
1006
  EmailCfg(id='031742', fwded_text_after="Trump's former campaign manager Paul Manafort"),
872
1007
  EmailCfg(id='028925', fwded_text_after='> on Jan 4, 2015'),
873
1008
  EmailCfg(id='029773', fwded_text_after='Omar Quadhafi', duplicate_ids=['012685']),
874
- EmailCfg(id='012197_4', fwded_text_after="Thanks -- Jay"),
1009
+ EmailCfg(id='012197_4', fwded_text_after='Thanks -- Jay', is_fwded_article=False),
1010
+ EmailCfg(id='026449', description="comments about Trump's 'former bridge girl and toy'", is_interesting=True),
1011
+ EmailCfg(id='030727', description='Epstein requesting help for Russia and discussion of an unknown dead Chinese shareholder', is_interesting=True),
1012
+ EmailCfg(id='033377', description="Epstein says he knows Russian oligarch Oleg Deripaska", is_interesting=True),
1013
+ EmailCfg(id='029098', description=f"{MASHA_DROKOVA}'s skype contact is in Moscow", is_interesting=True),
1014
+ EmailCfg(id='032229', description='Michael Wolff offers ideas for entrapping Trump', is_interesting=True),
1015
+ EmailCfg(id='023454', description='Email invitation sent to tech CEOs and Epstein', is_interesting=True, truncate_to=1878),
1016
+ EmailCfg(id='032842', description='discussion of "stealing" a casino in Atlantic City', is_interesting=True),
1017
+ EmailCfg(id='030630', is_interesting=True, comment="'What happens with zubair's project?'"),
1018
+ EmailCfg(id='033178', is_interesting=True, comment="'How is it going with Zubair?'"),
1019
+ EmailCfg(id='022396', is_interesting=True, comment='Ukraine friend'),
1020
+ EmailCfg(id='026505', is_interesting=True, comment='I know how dirty trump is'),
1021
+ EmailCfg(id='029679', is_interesting=True, comment="Trump's driver was the bag man"),
1022
+ EmailCfg(id='026258', is_interesting=True, comment='Bannon cripto coin issues'),
1023
+ EmailCfg(id='026260', is_interesting=True, comment='Bannon cripto coin issues'),
1024
+ EmailCfg(id='032359', is_interesting=True, comment='Jabor e-currency'),
1025
+ EmailCfg(id='031451', is_interesting=True, comment='"would you like photso of donald and girls in bikinis in my kitchen."'),
1026
+ EmailCfg(id='031596', is_interesting=True, comment='"would you like photso of donald and girls in bikinis in my kitchen."'),
1027
+ EmailCfg(id='031601', is_interesting=True, comment='Old gf i gave to donald', truncate_to=2000),
1028
+ EmailCfg(id='030725', is_interesting=True, comment='David Stern in Moscow'),
1029
+ EmailCfg(id='030714', is_interesting=True, comment='Bannon, Russian Dugan shout out'),
1030
+ EmailCfg(id='031659', is_interesting=True, comment='"i have met some very bad people „ none as bad as trump"'),
1031
+ EmailCfg(id='030245', is_interesting=True, comment='Epstein rationalizes his behavior in an open letter to the world'),
1032
+ EmailCfg(id='030781', is_interesting=True, comment='Bannon email about crypto coin issues'),
1033
+ EmailCfg(id='032906', is_interesting=True, comment='David Blaine email'),
1034
+ EmailCfg(id='026036', is_interesting=True, truncate_to=6000, comment='Gino Yu blockchain mention'),
1035
+ EmailCfg(id='029609', is_interesting=True, comment='Joi Ito'),
1036
+ EmailCfg(id='025233', is_interesting=True, comment='Reputation.com discussion'),
1037
+ EmailCfg(id='017827', is_interesting=True, comment='Bannon / Peggy Siegal email about netflix doc on Epstein'),
1038
+ EmailCfg(id='030222', is_interesting=True, comment='Ross Gow / Ghislaine correspondence'),
1039
+ EmailCfg(
1040
+ id='026028',
1041
+ description=f"Epstein introduces {LARRY_SUMMERS} to possibly corrupt Senegalese official Karim Wade",
1042
+ is_interesting=True,
1043
+ ),
1044
+ EmailCfg(id='029545', is_interesting=True, comment='Tyler Shears reputation'),
1045
+ EmailCfg(id='025812', is_interesting=True, comment='Tyler Shears reputation'),
1046
+ EmailCfg(
1047
+ id='029914',
1048
+ description='Epstein and Lord Mandelson discuss Russian investments',
1049
+ is_interesting=True,
1050
+ truncate_to=4500,
1051
+ ),
1052
+ EmailCfg(id='033453', is_interesting=True, description='possibly an email from one of the women who sued Trump'),
1053
+ EmailCfg(id='029342', is_interesting=True, truncate_to=2000, comment='Hakeem Jeffries'),
1054
+ EmailCfg(id='031326', is_interesting=True, comment='"dog that hasn\'t barked is trump"'),
1055
+ EmailCfg(id='033171', is_interesting=True, comment='Zubair'),
1056
+ EmailCfg(
1057
+ id='032319',
1058
+ dupe_type='quoted',
1059
+ duplicate_ids=['032283'],
1060
+ is_interesting=True,
1061
+ comment='Zubair',
1062
+ ),
1063
+ EmailCfg(
1064
+ id='031152',
1065
+ description='discussion of notoriously corrupt Kazakh politician Rakhat Aliyev whom Epstein apparently knew',
1066
+ is_interesting=True,
1067
+ ),
1068
+ EmailCfg(id='030745', description="planning a public statement for Ghislaine", truncate_to=NO_TRUNCATE),
1069
+ EmailCfg(id='028589', truncate_to=NO_TRUNCATE, comment='Long thread with Reid Weingarten'),
1070
+ EmailCfg(id='026059', truncate_to=2650, comment='Rothschild'),
1071
+ EmailCfg(id='032643', truncate_to=NO_TRUNCATE, comment='Anas al Rasheed'),
1072
+ EmailCfg(id='031619', truncate_to=652, comment='Reply to grab em by the pussy story'),
1073
+ EmailCfg(id='021096', truncate_to=700, comment='Sinofsky article quote'),
1074
+ EmailCfg(id='032865', truncate_to=445, comment='Barton reply'),
1075
+ EmailCfg(id='027126', truncate_to=1000, comment='Summers'),
1076
+ EmailCfg(id='030950', truncate_to=4500, comment='Ian Osborne'),
1077
+ EmailCfg(id='029684', truncate_to=402, comment='Maldives reply'),
1078
+ EmailCfg(id='018045', truncate_to=TRUNCATED_CHARS, comment='invite'),
1079
+ EmailCfg(id='017574', truncate_to=4000, comment='Lisa Randall invite'),
1080
+ EmailCfg(id='030589', truncate_to=1000, comment='Brett Jaffe Fwd'),
1081
+ EmailCfg(id='025655', truncate_to=400, comment='reply to article'),
1082
+ EmailCfg(id='026451', truncate_to=500, comment='reply to article'),
1083
+ EmailCfg(id='023717', truncate_to=489, comment='reply to article'),
1084
+ EmailCfg(id='022265', truncate_to=NO_TRUNCATE),
1085
+ EmailCfg(id='026243', truncate_to=NO_TRUNCATE),
1086
+ EmailCfg(id='029680', truncate_to=900, comment='Maldives'),
1087
+ EmailCfg(id='029534', truncate_to=900, comment='Maldives'),
1088
+ EmailCfg(id='028760', truncate_to=900, comment='Epstein in an argument with someone'),
1089
+ EmailCfg(id='028757', truncate_to=900, comment='Epstein in an argument with someone'),
1090
+ EmailCfg(id='027059', truncate_to=NO_TRUNCATE, comment='Jean Luc Brunel and Boris Nikolic'),
1091
+ EmailCfg(id='027028', truncate_to=1000, comment='Tom Pritzer penny pritzker'),
1092
+ EmailCfg(id='029910', truncate_to=NO_TRUNCATE, comment='Tom Pritzer Aspen'),
1093
+ EmailCfg(id='025163', truncate_to=NO_TRUNCATE, comment='Tom Pritzer'),
875
1094
  ]
876
1095
 
1096
+ if args.constantize:
1097
+ for email_cfg in EMAILS_CONFIG:
1098
+ print(f"{email_cfg},")
1099
+
877
1100
 
878
1101
  ################################################################################################
879
1102
  ####################################### OTHER FILES ############################################
@@ -910,7 +1133,7 @@ DEUTSCHE_BANK_TAX_TOPICS = f'{DEUTSCHE_BANK} Wealth Management Tax Topics'
910
1133
  DIANA_DEGETTE_CAMPAIGN = "Colorado legislator Diana DeGette's campaign"
911
1134
  FBI_REPORT = f"report on Epstein investigation (redacted)"
912
1135
  FBI_SEIZED_PROPERTY = f"seized property inventory (redacted)"
913
- FEMALE_HEALTH_COMPANY = 'Female Health Company (FHX)'
1136
+ FEMALE_HEALTH_COMPANY = 'Female Health Company (FHC)'
914
1137
  FIRE_AND_FURY = f"Fire And Fury"
915
1138
  HARVARD_POETRY = f'{HARVARD} poetry stuff from {LISA_NEW}'
916
1139
  HBS_APPLICATION = f"{HARVARD} Business School application letter"
@@ -1024,7 +1247,7 @@ OTHER_FILES_ARTICLES = [
1024
1247
  DocCfg(id='013435', author=PALM_BEACH_DAILY_NEWS, description=f"article about Epstein's address book", date='2011-03-11'),
1025
1248
  DocCfg(id='013440', author=PALM_BEACH_DAILY_NEWS, description=f"article about Epstein's gag order", date='2011-07-13'),
1026
1249
  DocCfg(id='029238', author=PALM_BEACH_DAILY_NEWS, description=f"article about Epstein's plea deal"),
1027
- DocCfg(id='021775', author=PALM_BEACH_POST, description="article about 'He Was 50. And They Were Girls'"),
1250
+ DocCfg(id='021775', author=PALM_BEACH_POST, description="article about 'He Was 50. And They Were Girls'", attached_to_email_id='021764'),
1028
1251
  DocCfg(id='022989', author=PALM_BEACH_POST, description="article about alleged rape of 13 year old by Trump"),
1029
1252
  DocCfg(id='022987', author=PALM_BEACH_POST, description="article about just a headline on Trump and Epstein"),
1030
1253
  DocCfg(id='015028', author=PALM_BEACH_POST, description="article about reopening Epstein's criminal case"),
@@ -1042,14 +1265,14 @@ OTHER_FILES_ARTICLES = [
1042
1265
  DocCfg(id='010715', author=PEGGY_SIEGAL, description=f"Oscar Diary April", date='2012-02-27'),
1043
1266
  DocCfg(id='019849', author=PEGGY_SIEGAL, description=f"Oscar Diary April", date='2017-02-27', duplicate_ids=['019864']),
1044
1267
  DocCfg(id='026851', author='Politifact', description=f"lying politicians chart", date='2016-07-26'),
1045
- DocCfg(id='033253', author=ROBERT_LAWRENCE_KUHN, description=f'{BBC} article about Rohingya in Myanmar'),
1268
+ DocCfg(id='033253', author=ROBERT_LAWRENCE_KUHN, description=f'{BBC} article about Rohingya in Myanmar', attached_to_email_id='033252'),
1046
1269
  DocCfg(id='026887', author=ROBERT_LAWRENCE_KUHN, description=f'{BBC} "New Tariffs - Trade War"'),
1047
1270
  DocCfg(id='026877', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "New Tariffs - Trade War"'),
1048
1271
  DocCfg(id='026868', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "Quest Means Business New China Tariffs — Trade War"', date='2018-09-18'),
1049
1272
  DocCfg(id='023707', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "Quest Means Business U.S. and China Agree to Pause Trade War"', date='2018-12-03'),
1050
- DocCfg(id='029176', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "U.S. China Tariffs - Trade War"'),
1051
- DocCfg(id='032638', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "Xi Jinping and the New Politburo Committee"'),
1052
- DocCfg(id='023666', author=ROBERT_LAWRENCE_KUHN, description=f"sizzle reel / television appearances"),
1273
+ DocCfg(id='029176', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "U.S. China Tariffs - Trade War"', attached_to_email_id='029174'),
1274
+ DocCfg(id='032638', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "Xi Jinping and the New Politburo Committee"', attached_to_email_id='032637'),
1275
+ DocCfg(id='023666', author=ROBERT_LAWRENCE_KUHN, description=f"sizzle reel / television appearances", date='2018-09-30', attached_to_email_id='033252'),
1053
1276
  DocCfg(id='016996', author=f'SciencExpress', description=f'article "Quantitative Analysis of Culture Using Millions of Digitized Books" by Jean-Baptiste Michel'),
1054
1277
  DocCfg(id='025104', author='SCMP', description=f"article about China and globalisation"),
1055
1278
  DocCfg(id='030030', author=SHIMON_POST, description=SHIMON_POST_ARTICLE, date='2011-03-29'),
@@ -1073,7 +1296,7 @@ OTHER_FILES_ARTICLES = [
1073
1296
  DocCfg(id='024997', author=SHIMON_POST, description=SHIMON_POST_ARTICLE, date='2011-09-08'),
1074
1297
  DocCfg(id='031941', author=SHIMON_POST, description=SHIMON_POST_ARTICLE, date='2011-11-17'),
1075
1298
  DocCfg(id='030829', author=f'South Florida Sun Sentinel', description=f'article about {BRAD_EDWARDS} and {JEFFREY_EPSTEIN}'),
1076
- DocCfg(id='021092', author='Tatler', description=f'single page of article about {GHISLAINE_MAXWELL} shredding documents', date='2019-08-15'),
1299
+ DocCfg(id='021092', author='Tatler', description=f'single page of article about {GHISLAINE_MAXWELL} shredding documents'),
1077
1300
  DocCfg(id='030333', author=f'The Independent', description=f'article about Prince Andrew, Epstein, and Epstein\'s butler who stole his address book'),
1078
1301
  DocCfg(id='010754', author=f'U.S. News', description=f"article about Yitzhak Rabin"),
1079
1302
  DocCfg(id='014498', author=VI_DAILY_NEWS, description='article', date='2016-12-13'),
@@ -1082,6 +1305,8 @@ OTHER_FILES_ARTICLES = [
1082
1305
  DocCfg(id='023046', author=VI_DAILY_NEWS, description='article', date='2019-02-27'),
1083
1306
  DocCfg(id='031170', author=VI_DAILY_NEWS, description='article', date='2019-03-06'),
1084
1307
  DocCfg(id='016506', author=VI_DAILY_NEWS, description='article', date='2019-02-28'),
1308
+ DocCfg(id='018862', author=VI_DAILY_NEWS, description='articles about Sen. Alvin Williams Jr. Fraud case, arson', date='2012-11-09'),
1309
+
1085
1310
  DocCfg(id='016507', author=VI_DAILY_NEWS, description=f'"Perversion of Justice" by {JULIE_K_BROWN}', date='2018-12-19'),
1086
1311
  DocCfg(id='019212', author=WAPO, description=f'and Times Tribune articles about Bannon, Trump, and healthcare execs'),
1087
1312
  DocCfg(id='033379', author=WAPO, description=f'"How Washington Pivoted From Finger-Wagging to Appeasement" (about Viktor Orban)', date='2018-05-25'),
@@ -1203,7 +1428,7 @@ OTHER_FILES_LEGAL = [
1203
1428
  DocCfg(id='013489', author=JANE_DOE_V_EPSTEIN_TRUMP, description=f'Affidavit of {BRAD_EDWARDS}', date='2010-07-20'),
1204
1429
  DocCfg(id='029398', author=JANE_DOE_V_EPSTEIN_TRUMP, description=f'article in Law.com'),
1205
1430
  DocCfg(id='026854', author=JANE_DOE_V_EPSTEIN_TRUMP, description=f"Civil Docket"),
1206
- DocCfg(id='026384', author=JANE_DOE_V_EPSTEIN_TRUMP, description=f"Complaint for rape and sexual abuse", date='2016-06-20'),
1431
+ DocCfg(id='026384', author=JANE_DOE_V_EPSTEIN_TRUMP, description=f"Complaint for rape and sexual abuse", date='2016-06-20', attached_to_email_id='029837'),
1207
1432
  DocCfg(id='013463', author=JANE_DOE_V_EPSTEIN_TRUMP, description=f'Deposition of Scott Rothstein', date='2010-03-23'),
1208
1433
  DocCfg(id='029257', author=JANE_DOE_V_EPSTEIN_TRUMP, description=f'allegations and identity of plaintiff Katie Johnson', date='2016-04-26'),
1209
1434
  DocCfg(id='032321', author=JANE_DOE_V_EPSTEIN_TRUMP, description=f"Notice of Initial Conference", date='2016-10-04'),
@@ -1263,7 +1488,7 @@ OTHER_FILES_CONFERENCES = [
1263
1488
  DocCfg(id='022267', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} founder essay about growing the seminar business'),
1264
1489
  DocCfg(id='022407', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} seminar pitch deck'),
1265
1490
  DocCfg(id='017524', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2012 program", date='2012-08-18', attached_to_email_id='031226'),
1266
- DocCfg(id='026747', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2017 program", date='2017-08-23'),
1491
+ DocCfg(id='026747', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2017 program", date='2017-08-23', attached_to_email_id='031215'),
1267
1492
  DocCfg(id='014951', author='TED Talks', description=f"2017 program", date='2017-04-20'),
1268
1493
  DocCfg(id='024179', author=UN_GENERAL_ASSEMBLY, description=f'president and first lady schedule', date='2012-09-21'),
1269
1494
  DocCfg(
@@ -1317,12 +1542,12 @@ OTHER_FILES_FINANCE = [
1317
1542
  DocCfg(id='024302', author='Carvana', description=f"form 14A SEC filing proxy statement", date='2019-04-23'),
1318
1543
  DocCfg(id='029305', author='CCH Tax', description=f"Briefing on end of Defense of Marriage Act", date='2013-06-27'),
1319
1544
  DocCfg(id='026794', author=DEUTSCHE_BANK, description=f"Global Political and Regulatory Risk in 2015/2016"),
1320
- DocCfg(id='022361', author=DEUTSCHE_BANK_TAX_TOPICS, date='2013-05-01'),
1545
+ DocCfg(id='022361', author=DEUTSCHE_BANK_TAX_TOPICS, date='2013-05-01', attached_to_email_id='022359'),
1321
1546
  DocCfg(id='022325', author=DEUTSCHE_BANK_TAX_TOPICS, date='2013-12-20'),
1322
1547
  DocCfg(id='022330', author=DEUTSCHE_BANK_TAX_TOPICS, date='2013-12-20', description='table of contents'),
1323
1548
  DocCfg(id='019440', author=DEUTSCHE_BANK_TAX_TOPICS, date='2014-01-29'),
1324
1549
  DocCfg(id='024202', author=ELECTRON_CAPITAL_PARTNERS, description=f"Global Utility White Paper", date='2013-03-08'),
1325
- DocCfg(id='022372', author='Ernst & Young', description=f'2016 election report'),
1550
+ DocCfg(id='022372', author='Ernst & Young', date='2016-11-09', description=f'2016 election report'),
1326
1551
  DocCfg(
1327
1552
  id='025663',
1328
1553
  author=GOLDMAN_INVESTMENT_MGMT,
@@ -1331,7 +1556,13 @@ OTHER_FILES_FINANCE = [
1331
1556
  is_interesting=True,
1332
1557
  ),
1333
1558
  DocCfg(id='014532', author=GOLDMAN_INVESTMENT_MGMT, description=f"Outlook - Half Full", date='2017-01-01'),
1334
- DocCfg(id='026909', author=GOLDMAN_INVESTMENT_MGMT, description=f"The Unsteady Undertow Commands the Seas (Temporarily)", date='2018-10-14'),
1559
+ DocCfg(
1560
+ id='026909',
1561
+ attached_to_email_id='026893',
1562
+ author=GOLDMAN_INVESTMENT_MGMT,
1563
+ description=f"The Unsteady Undertow Commands the Seas (Temporarily)",
1564
+ date='2018-10-14',
1565
+ ),
1335
1566
  DocCfg(id='026944', author=GOLDMAN_INVESTMENT_MGMT, description=f"Risk of a US-Iran Military Conflict", date='2019-05-23'),
1336
1567
  DocCfg(id='018804', author='Integra Realty Resources', description=f"appraisal of going concern for IGY American Yacht Harbor Marina in {VIRGIN_ISLANDS}"),
1337
1568
  DocCfg(id='026679', author='Invesco', description=f"Global Sovereign Asset Management Study 2017"),
@@ -1346,7 +1577,7 @@ OTHER_FILES_FINANCE = [
1346
1577
  DocCfg(id='030840', author=JP_MORGAN, description=f"Market Thoughts"),
1347
1578
  DocCfg(id='022350', author=JP_MORGAN, description=f"tax efficiency of Intentionally Defective Grantor Trusts (IDGT)"),
1348
1579
  DocCfg(id='025242', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2012-04-09'),
1349
- DocCfg(id='030010', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-06-14'),
1580
+ DocCfg(id='030010', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, attached_to_email_id='030006', date='2011-06-14'),
1350
1581
  DocCfg(id='030808', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-07-11'),
1351
1582
  DocCfg(id='025221', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-07-25'),
1352
1583
  DocCfg(id='025229', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-08-04'),
@@ -1376,7 +1607,6 @@ OTHER_FILES_FINANCE = [
1376
1607
  date='2006-09-27',
1377
1608
  description=f"Michael Milken's Knowledge Universe Education (KUE) $1,000,000 corporate share placement notice (SEC filing?)"
1378
1609
  ),
1379
-
1380
1610
  DocCfg(id='024003', description=f"New Leaf Ventures ($375 million biotech fund) private placement memorandum"),
1381
1611
  ]
1382
1612
 
@@ -1455,12 +1685,12 @@ OTHER_FILES_SOCIAL = [
1455
1685
  DocCfg(id='017787', author=ALAN_DERSHOWITZ, description=DERSH_GIUFFRE_TWEET),
1456
1686
  DocCfg(id='033433', author=ALAN_DERSHOWITZ, description=f"{DERSH_GIUFFRE_TWEET} / David Boies", date='2019-03-02'),
1457
1687
  DocCfg(id='033432', author=ALAN_DERSHOWITZ, description=f"{DERSH_GIUFFRE_TWEET} / David Boies", date='2019-05-02'),
1458
- DocCfg(id='028815', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} business plan", date='2016-08-20'),
1459
- DocCfg(id='011170', author=ZUBAIR_AND_ANYA, description=f'{INSIGHTS_POD} collected tweets about #Brexit', date='2016-06-23'),
1460
- DocCfg(id='032324', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} election social media trend analysis", date='2016-11-05'),
1461
- DocCfg(id='032281', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} forecasting election for Trump", date='2016-10-25'),
1462
- DocCfg(id='028988', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} pitch deck", date='2016-08-20'),
1463
- DocCfg(id='026627', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} report on the presidential debate"),
1688
+ DocCfg(id='028815', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} business plan", date='2016-08-20', attached_to_email_id='033171'),
1689
+ DocCfg(id='011170', author=ZUBAIR_AND_ANYA, description=f'{INSIGHTS_POD} collected tweets from #Brexit case study', date='2016-06-23', attached_to_email_id='033171'),
1690
+ DocCfg(id='032324', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} election social media trend analysis", date='2016-11-05', attached_to_email_id='032323'),
1691
+ DocCfg(id='032281', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} eleciton social media trends report", date='2016-10-25', attached_to_email_id='032280'),
1692
+ DocCfg(id='028988', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} pitch deck", date='2016-08-20', attached_to_email_id='033171'),
1693
+ DocCfg(id='026627', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} report on the presidential debate", attached_to_email_id='026626'),
1464
1694
  DocCfg(id='022213', description=f"{SCREENSHOT} Facebook group called 'Shit Pilots Say' disparaging a 'global girl'"),
1465
1695
  DocCfg(id='030884', description=f"{TWEET} by Ed Krassenstein"),
1466
1696
  DocCfg(id='031546', description=f"{TWEET}s by Donald Trump about Russian collusion", date='2018-01-06'),
@@ -1488,7 +1718,7 @@ OTHER_FILES_POLITICS = [
1488
1718
  id='023133',
1489
1719
  author=f"{TERJE_ROD_LARSEN}, Nur Laiq, Fabrice Aidan",
1490
1720
  description=f'The Search for Peace in the Arab-Israeli Conflict',
1491
- date='2019-12-09',
1721
+ date='2014-12-09',
1492
1722
  ),
1493
1723
  DocCfg(id='033468', description=f'{ARTICLE_DRAFT} Rod Rosenstein', date='2018-09-24'),
1494
1724
  DocCfg(
@@ -1503,8 +1733,8 @@ OTHER_FILES_POLITICS = [
1503
1733
  date='2015-01-15', # TODO: this is just a guess
1504
1734
  duplicate_ids=['028887'],
1505
1735
  ),
1506
- DocCfg(id='010617', description=TRUMP_DISCLOSURES, date='2017-01-20', is_interesting=True),
1507
- DocCfg(id='016699', description=TRUMP_DISCLOSURES, date='2017-01-20', is_interesting=True),
1736
+ DocCfg(id='010617', description=TRUMP_DISCLOSURES, date='2017-01-20', is_interesting=True, attached_to_email_id='033091'),
1737
+ DocCfg(id='016699', description=TRUMP_DISCLOSURES, date='2017-01-20', is_interesting=True, attached_to_email_id='033091'),
1508
1738
  ]
1509
1739
 
1510
1740
  OTHER_FILES_ACADEMIA = [
@@ -1601,7 +1831,11 @@ OTHER_FILES_ARTS = [
1601
1831
  date='2010-02-01',
1602
1832
  duplicate_ids=['025210']
1603
1833
  ),
1604
- DocCfg(id='028281', description=f'art show flier for "The House Of The Nobleman" curated by Wolfe Von Lenkiewicz & Victoria Golembiovskaya'),
1834
+ DocCfg(
1835
+ id='028281',
1836
+ date='2010-10-13',
1837
+ description=f'art show flier for "The House Of The Nobleman" curated by Wolfe Von Lenkiewicz & Victoria Golembiovskaya',
1838
+ ),
1605
1839
  ]
1606
1840
 
1607
1841
  OTHER_FILES_MISC = [
@@ -1631,7 +1865,7 @@ OTHER_FILES_MISC = [
1631
1865
  DocCfg(id='031743', description=f'a few pages describing the internet as a "New Nation State" (Network State?)'),
1632
1866
  DocCfg(id='012718', description=f"{CVRA} congressional record", date='2011-06-17'),
1633
1867
  DocCfg(id='024117', description=f"FAQ about anti-money laundering (AML) and terrorist financing (CFT) law in the U.S."),
1634
- DocCfg(id='019448', description=f"Haitian business investment proposal called Jacmel", attached_to_email_id='019448'),
1868
+ DocCfg(id='019448', description=f"Haitian business investment proposal called Jacmel", attached_to_email_id='019446'),
1635
1869
  DocCfg(id='023644', description=f"interview with Mohammed bin Salman", date='2016-04-25'),
1636
1870
  DocCfg(
1637
1871
  id='030142',
@@ -1702,8 +1936,8 @@ for cfg in ALL_CONFIGS:
1702
1936
 
1703
1937
  # Add extra config objects for duplicate files that match the config of file they are duplicating
1704
1938
  for dupe_cfg in cfg.duplicate_cfgs():
1705
- if not isinstance(dupe_cfg, EmailCfg):
1706
- logger.debug(f"Generated synthetic config for dupe: {dupe_cfg}")
1939
+ # if not isinstance(dupe_cfg, EmailCfg):
1940
+ # logger.debug(f"Generated synthetic config for dupe: {dupe_cfg}")
1707
1941
 
1708
1942
  ALL_FILE_CONFIGS[dupe_cfg.id] = dupe_cfg
1709
1943
 
@@ -1716,17 +1950,17 @@ NORWEGAIN_REPLY_PATTERN = r"(Den .* folgende|(fre|lor|son)\. .* skrev .*):"
1716
1950
  REPLY_LINE_IN_A_MSG_PATTERN = r"In a message dated \d+/\d+/\d+.*writes:"
1717
1951
  REPLY_LINE_ENDING_PATTERN = r"[_ \n](AM|PM|[<_]|wrote:?)"
1718
1952
  REPLY_LINE_ON_NUMERIC_DATE_PATTERN = fr"On \d+/\d+/\d+[, ].*{REPLY_LINE_ENDING_PATTERN}"
1719
- REPLY_LINE_ON_DATE_PATTERN = fr"^On (\d+ )?((Mon|Tues?|Wed(nes)?|Thu(rs)?|Fri|Sat(ur)?|Sun)(day)?|(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*)[, ].*{REPLY_LINE_ENDING_PATTERN}"
1953
+ REPLY_LINE_ON_DATE_PATTERN = fr"^[> •]*On (\d+ )?((Mon|Tues?|Wed(nes)?|Thu(rs)?|Fri|Sat(ur)?|Sun)(day)?|(Jan|Fe(b|vr\.)|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*)[, ].*{REPLY_LINE_ENDING_PATTERN}"
1720
1954
  REPLY_LINE_PATTERN = rf"({FRENCH_REPLY_PATTERN}|{GERMAN_REPLY_PATTERN}|{NORWEGAIN_REPLY_PATTERN}|{REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
1721
1955
  REPLY_REGEX = re.compile(REPLY_LINE_PATTERN, re.IGNORECASE | re.MULTILINE)
1722
1956
  SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?((Envoyé de mon|Sent (from|via)).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)|Co-authored with iPhone auto-correct', re.M | re.I)
1723
1957
 
1724
-
1725
1958
  # No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
1726
- UNINTERESTING_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + [
1959
+ UNINTERESTING_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + TRIVERS_CCS + [
1727
1960
  'Alan Dlugash', # CCed with Richard Kahn
1728
1961
  'Alan Rogers', # Random CC
1729
1962
  'Andrew Friendly', # Presumably some relation of Kelly Friendly
1963
+ 'Ariane Dwyer', # Sabba CC
1730
1964
  'BS Stern', # A random fwd of email we have
1731
1965
  'Cheryl Kleen', # Single email from Anne Boyles, displayed under Anne Boyles
1732
1966
  'Connie Zaguirre', # Random CC
@@ -1734,9 +1968,11 @@ UNINTERESTING_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + [
1734
1968
  'Danny Goldberg', # Random Paul Krassner emails
1735
1969
  GERALD_LEFCOURT, # Single CC
1736
1970
  GORDON_GETTY, # Random CC
1971
+ 'Grant J. Smith', # Ken Jenne CC
1737
1972
  JEFF_FULLER, # Random Jean Luc Brunel CC
1738
1973
  'Jojo Fontanilla', # Random CC
1739
1974
  'Joseph Vinciguerra', # Random CC
1975
+ 'Kirk Blouin', # John Page / Police Code Enforcement chain
1740
1976
  'Larry Cohen', # Random Bill Gates CC
1741
1977
  'Lyn Fontanilla', # Random CC
1742
1978
  'Mark Albert', # Random CC
@@ -1747,12 +1983,14 @@ UNINTERESTING_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + [
1747
1983
  'Nancy Portland', # Lawrence Krauss CC
1748
1984
  'Oliver Goodenough', # Robert Trivers CC
1749
1985
  'Peter Aldhous', # Lawrence Krauss CC
1986
+ 'Peter Green', # Farkas emailer
1750
1987
  'Players2', # Hoffenberg CC
1751
1988
  'Police Code Enforcement', # Kirk Blouin / John Page CC
1752
1989
  'Sam Harris', # Lawrence Krauss CC
1753
1990
  SAMUEL_LEFF, # Random CC
1754
1991
  'Sean T Lehane', # Random CC
1755
1992
  'Stephen Rubin', # Random CC
1993
+ THANU_BOONYAWATANA, # Eduardo Robles CC
1756
1994
  'Tim Kane', # Random CC
1757
1995
  'Travis Pangburn', # Random CC
1758
1996
  'Vahe Stepanian', # Random CC