epstein-files 1.1.3__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +15 -7
- epstein_files/documents/communication.py +3 -3
- epstein_files/documents/document.py +10 -3
- epstein_files/documents/email.py +105 -107
- epstein_files/documents/emails/email_header.py +4 -2
- epstein_files/documents/imessage/text_message.py +8 -12
- epstein_files/documents/messenger_log.py +8 -8
- epstein_files/epstein_files.py +123 -119
- epstein_files/person.py +350 -0
- epstein_files/util/constant/names.py +66 -50
- epstein_files/util/constant/output_files.py +1 -0
- epstein_files/util/constant/strings.py +3 -1
- epstein_files/util/constant/urls.py +14 -2
- epstein_files/util/constants.py +134 -26
- epstein_files/util/data.py +1 -12
- epstein_files/util/doc_cfg.py +30 -14
- epstein_files/util/env.py +3 -1
- epstein_files/util/file_helper.py +4 -1
- epstein_files/util/highlighted_group.py +228 -166
- epstein_files/util/output.py +108 -165
- epstein_files/util/rich.py +23 -45
- epstein_files/util/word_count.py +2 -3
- {epstein_files-1.1.3.dist-info → epstein_files-1.2.0.dist-info}/METADATA +2 -1
- epstein_files-1.2.0.dist-info/RECORD +34 -0
- epstein_files-1.1.3.dist-info/RECORD +0 -33
- {epstein_files-1.1.3.dist-info → epstein_files-1.2.0.dist-info}/LICENSE +0 -0
- {epstein_files-1.1.3.dist-info → epstein_files-1.2.0.dist-info}/WHEEL +0 -0
- {epstein_files-1.1.3.dist-info → epstein_files-1.2.0.dist-info}/entry_points.txt +0 -0
epstein_files/util/constants.py
CHANGED
|
@@ -19,12 +19,13 @@ HEADER_ABBREVIATIONS = {
|
|
|
19
19
|
'bgC3': 'Bill Gates Ventures (renamed in 2018)',
|
|
20
20
|
"Brock": 'Brock Pierce (crypto bro with a very sordid past)',
|
|
21
21
|
"DB": "Deutsche Bank (maybe??)",
|
|
22
|
+
"GRAT": "Grantor Retained Annuity Trust (tax shelter)",
|
|
22
23
|
'HBJ': "Sheikh Hamad bin Jassim (former Qatari prime minister)",
|
|
23
24
|
'Jabor': '"an influential man in Qatar"',
|
|
24
25
|
'Jared': "Jared Kushner",
|
|
25
26
|
'Jagland': 'Thorbjørn Jagland (former Norwegian prime minister)',
|
|
26
27
|
'JEGE': "Epstein's airplane holding company",
|
|
27
|
-
|
|
28
|
+
JEFFREY_WERNICK: 'right wing crypto bro, former COO of Parler',
|
|
28
29
|
'Joi': f"Joi Ito ({MIT_MEDIA_LAB}, MIT Digital Currency Initiative)",
|
|
29
30
|
"Hoffenberg": f"{STEVEN_HOFFENBERG} (Epstein's ponzi scheme partner)",
|
|
30
31
|
'KSA': "Kingdom of Saudi Arabia",
|
|
@@ -62,6 +63,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
62
63
|
BARBRO_C_EHNBOM: re.compile(r'behnbom@aol.com|(Barbro\s.*)?Ehnbom', re.IGNORECASE),
|
|
63
64
|
BARRY_J_COHEN: re.compile(r'barry\s*((j.?|james)\s*)?cohen?', re.IGNORECASE),
|
|
64
65
|
BENNET_MOSKOWITZ: re.compile(r'Moskowitz.*Bennet|Bennet.*Moskowitz', re.IGNORECASE),
|
|
66
|
+
BOB_CROWE: re.compile(r"[BR]ob Crowe", re.IGNORECASE),
|
|
65
67
|
BORIS_NIKOLIC: re.compile(r'(boris )?nikolic?', re.IGNORECASE),
|
|
66
68
|
BRAD_EDWARDS: re.compile(r'Brad(ley)?(\s*J(.?|ames))?\s*Edwards', re.IGNORECASE),
|
|
67
69
|
BRAD_KARP: re.compile(r'Brad (S.? )?Karp|Karp, Brad', re.IGNORECASE),
|
|
@@ -83,13 +85,14 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
83
85
|
JACKIE_PERCZEK: re.compile(r'jackie percze[kl]?', re.IGNORECASE),
|
|
84
86
|
JABOR_Y: re.compile(r'[ji]abor\s*y?', re.IGNORECASE),
|
|
85
87
|
JAMES_HILL: re.compile(r"hill, james e.|james.e.hill@abc.com", re.IGNORECASE),
|
|
88
|
+
JANUSZ_BANASIAK: re.compile(r"Janu[is]z Banasiak", re.IGNORECASE),
|
|
86
89
|
JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?', re.IGNORECASE),
|
|
87
90
|
JEFF_FULLER: re.compile(r"jeff@mc2mm.com|Jeff Fuller", re.IGNORECASE),
|
|
88
91
|
JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeffrey E((sp|ps)tein?)?( VI Foundation)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!(Mark L.|ard Jay) )Epstein', re.IGNORECASE),
|
|
89
92
|
JESSICA_CADWELL: re.compile(r'Jessica Cadwell?', re.IGNORECASE),
|
|
90
93
|
JOHNNY_EL_HACHEM: re.compile(r'el hachem johnny|johnny el hachem', re.IGNORECASE),
|
|
91
94
|
JOI_ITO: re.compile(r'ji@media.mit.?edu|(joichi|joi)( Ito)?', re.IGNORECASE),
|
|
92
|
-
JONATHAN_FARKAS: re.compile(r'Jonathan
|
|
95
|
+
JONATHAN_FARKAS: re.compile(r'Jonathan Fark(a|u)(s|il)', re.IGNORECASE),
|
|
93
96
|
KATHRYN_RUEMMLER: re.compile(r'Kathr?yn? Ruemmler?', re.IGNORECASE),
|
|
94
97
|
KEN_STARR: re.compile(r'starr, ken|Ken(neth\s*(W.\s*)?)?\s+starr?|starr', re.IGNORECASE),
|
|
95
98
|
LANDON_THOMAS: re.compile(r'lando[nr] thomas( jr)?|thomas jr.?, lando[nr]', re.IGNORECASE),
|
|
@@ -157,6 +160,7 @@ EMAILERS = [
|
|
|
157
160
|
BILL_GATES,
|
|
158
161
|
BILL_SIEGEL,
|
|
159
162
|
BRAD_WECHSLER,
|
|
163
|
+
CHRISTINA_GALBRAITH,
|
|
160
164
|
DANIEL_SABBA,
|
|
161
165
|
'Danny Goldberg',
|
|
162
166
|
DAVID_SCHOEN,
|
|
@@ -302,15 +306,53 @@ TEXTS_CONFIG = CONFIRMED_TEXTS_CONFIG + UNCONFIRMED_TEXTS_CONFIG
|
|
|
302
306
|
################################################ EMAILS ################################################
|
|
303
307
|
########################################################################################################
|
|
304
308
|
|
|
305
|
-
MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
|
|
306
|
-
|
|
307
309
|
# Some emails have a lot of uninteresting CCs
|
|
308
|
-
|
|
309
|
-
|
|
310
|
+
FLIGHT_IN_2012_PEOPLE: list[Name] = ['Francis Derby', JANUSZ_BANASIAK, 'Louella Rabuyo', 'Richard Barnnet']
|
|
311
|
+
IRAN_DEAL_RECIPIENTS: list[Name] = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
|
|
312
|
+
MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
|
|
310
313
|
|
|
311
314
|
EMAILS_CONFIG = [
|
|
315
|
+
# 026294 and 026296 might also be Ittihadieh based on timing
|
|
312
316
|
EmailCfg(id='032436', author=ALIREZA_ITTIHADIEH, attribution_reason='Signature'),
|
|
317
|
+
# 032542 026078 026080 026083 026086 026090 might also be Anas based on discussion of Dubai and Kuwait
|
|
313
318
|
EmailCfg(id='032543', author=ANAS_ALRASHEED, attribution_reason='Later reply 033000 has quote'),
|
|
319
|
+
EmailCfg(id='026167', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
|
|
320
|
+
EmailCfg(id='032571', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
|
|
321
|
+
EmailCfg(id='032573', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
|
|
322
|
+
EmailCfg(id='032575', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
|
|
323
|
+
EmailCfg(id='032577', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
|
|
324
|
+
EmailCfg(id='032579', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
|
|
325
|
+
EmailCfg(id='032582', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
|
|
326
|
+
EmailCfg(id='032585', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
|
|
327
|
+
EmailCfg(id='032588', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
|
|
328
|
+
EmailCfg(id='032591', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
|
|
329
|
+
EmailCfg(id='032595', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
|
|
330
|
+
EmailCfg(id='032599', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
|
|
331
|
+
EmailCfg(id='032611', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
|
|
332
|
+
EmailCfg(id='023661', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
|
|
333
|
+
EmailCfg(id='032616', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
|
|
334
|
+
EmailCfg(id='032622', author=ANAS_ALRASHEED, attribution_reason='name visible in 033022 reply'),
|
|
335
|
+
EmailCfg(id='032628', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
|
|
336
|
+
EmailCfg(id='032629', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
|
|
337
|
+
EmailCfg(id='032631', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
|
|
338
|
+
EmailCfg(id='026168', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
|
|
339
|
+
EmailCfg(id='026170', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
|
|
340
|
+
EmailCfg(id='026173', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
|
|
341
|
+
EmailCfg(id='026176', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
|
|
342
|
+
EmailCfg(id='026180', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
|
|
343
|
+
EmailCfg(id='026184', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
|
|
344
|
+
EmailCfg(id='026188', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
|
|
345
|
+
EmailCfg(id='026193', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
|
|
346
|
+
EmailCfg(id='026198', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
|
|
347
|
+
EmailCfg(id='026210', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
|
|
348
|
+
EmailCfg(id='026204', author=ANAS_ALRASHEED, attribution_reason='name visible in 026180 reply'),
|
|
349
|
+
EmailCfg(id='032660', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
|
|
350
|
+
EmailCfg(id='032663', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
|
|
351
|
+
EmailCfg(id='032667', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
|
|
352
|
+
EmailCfg(id='032672', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
|
|
353
|
+
EmailCfg(id='032676', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
|
|
354
|
+
EmailCfg(id='026237', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
|
|
355
|
+
EmailCfg(id='032682', author=ANAS_ALRASHEED, attribution_reason='name visible in 029113 reply'),
|
|
314
356
|
EmailCfg(id='026064', author=ARIANE_DE_ROTHSCHILD, attribution_reason='signature'),
|
|
315
357
|
EmailCfg(id='026069', author=ARIANE_DE_ROTHSCHILD, attribution_reason='signature'),
|
|
316
358
|
EmailCfg(id='030741', author=ARIANE_DE_ROTHSCHILD, attribution_reason='signature'),
|
|
@@ -351,6 +393,7 @@ EMAILS_CONFIG = [
|
|
|
351
393
|
actual_text='',
|
|
352
394
|
author=DARREN_INDYKE,
|
|
353
395
|
description=f"heavily redacted email, quoted replies are from {STEVEN_HOFFENBERG} about James Patterson's book",
|
|
396
|
+
recipients=['Charles Michael'],
|
|
354
397
|
timestamp=parse('2016-08-17 11:26:00'),
|
|
355
398
|
attribution_reason='Quoted replies are in 019109',
|
|
356
399
|
),
|
|
@@ -369,11 +412,11 @@ EMAILS_CONFIG = [
|
|
|
369
412
|
dupe_type='redacted'
|
|
370
413
|
),
|
|
371
414
|
EmailCfg(id='026547', author=GERALD_BARTON, recipients=[JEFFREY_EPSTEIN]), # Bad OCR # TODO: email header is really jacked up
|
|
372
|
-
EmailCfg(id='029969', author=GWENDOLYN_BECK, attribution_reason='
|
|
373
|
-
EmailCfg(id='029968', author=GWENDOLYN_BECK, attribution_reason='
|
|
415
|
+
EmailCfg(id='029969', author=GWENDOLYN_BECK, attribution_reason='signature "Longevity & Successful Aging"'),
|
|
416
|
+
EmailCfg(id='029968', author=GWENDOLYN_BECK, attribution_reason='signature "beckresearchlabs.com"', duplicate_ids=['031120']),
|
|
374
417
|
EmailCfg(id='029970', author=GWENDOLYN_BECK, attribution_reason='signed "Longevity & Successful Agin"'),
|
|
375
|
-
EmailCfg(id='029960', author=GWENDOLYN_BECK, attribution_reason='
|
|
376
|
-
EmailCfg(id='029959', author=GWENDOLYN_BECK, attribution_reason='"Longevity & Aging"'),
|
|
418
|
+
EmailCfg(id='029960', author=GWENDOLYN_BECK, attribution_reason='signature "Beck Center for Longevity & Aging"'),
|
|
419
|
+
EmailCfg(id='029959', author=GWENDOLYN_BECK, attribution_reason='signature "Beck Center for Longevity & Aging"'),
|
|
377
420
|
EmailCfg(id='033360', author=HENRY_HOLT, attribution_reason='in signature'), # Henry Holt is a company not a person
|
|
378
421
|
EmailCfg(id='033384', author=JACK_GOLDBERGER, attribution_reason='Might be Paul Prosperi?', is_attribution_uncertain=True),
|
|
379
422
|
EmailCfg(id='026024', author=JEAN_HUGUEN, attribution_reason='Signature'),
|
|
@@ -434,7 +477,7 @@ EMAILS_CONFIG = [
|
|
|
434
477
|
EmailCfg(
|
|
435
478
|
id='029977',
|
|
436
479
|
author=LAWRANCE_VISOSKI,
|
|
437
|
-
recipients=
|
|
480
|
+
recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE,
|
|
438
481
|
attribution_reason=LARRY_REASON,
|
|
439
482
|
duplicate_ids=['031129'],
|
|
440
483
|
),
|
|
@@ -444,11 +487,19 @@ EMAILS_CONFIG = [
|
|
|
444
487
|
EmailCfg(id='017581', author='Lisa Randall', attribution_reason='reply header'),
|
|
445
488
|
EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd, Mark Green is in signature'),
|
|
446
489
|
EmailCfg(id='030472', author=MARTIN_WEINBERG, attribution_reason='Maybe. in reply', is_attribution_uncertain=True),
|
|
490
|
+
EmailCfg(id='032563', author=MASHA_DROKOVA, attribution_reason='replied to in 033014'),
|
|
491
|
+
EmailCfg(id='032564', author=MASHA_DROKOVA, attribution_reason='follow up to 032563 about huffpo article with link'),
|
|
492
|
+
EmailCfg(id='031544', author=MASHA_DROKOVA, attribution_reason='follow up to 032563 about huffpo article with link'),
|
|
493
|
+
EmailCfg(id='032605', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
|
|
494
|
+
EmailCfg(id='032606', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
|
|
495
|
+
EmailCfg(id='032607', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
|
|
496
|
+
EmailCfg(id='032609', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
|
|
497
|
+
EmailCfg(id='032604', author=MASHA_DROKOVA, attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
|
|
498
|
+
EmailCfg(id='032581', author=MASHA_DROKOVA, attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
|
|
447
499
|
EmailCfg(id='030235', author=MELANIE_WALKER, attribution_reason='In fwd'),
|
|
448
500
|
EmailCfg(id='032343', author=MELANIE_WALKER, attribution_reason='Name seen in later reply 032346'),
|
|
449
501
|
EmailCfg(id='032212', author=MIROSLAV_LAJCAK, attribution_reason='signature'),
|
|
450
|
-
EmailCfg(id='
|
|
451
|
-
EmailCfg(id='021814', author=NADIA_MARCINKO, attribution_reason='reply'),
|
|
502
|
+
EmailCfg(id='021814', author=NADIA_MARCINKO, attribution_reason='reply'), #, actual_text="I'm a pilot...I prefer sex slave to copilot ;)"),
|
|
452
503
|
EmailCfg(id='021808', author=NADIA_MARCINKO, attribution_reason='reply'),
|
|
453
504
|
EmailCfg(id='022190', author=NADIA_MARCINKO, attribution_reason='reply'),
|
|
454
505
|
EmailCfg(id='021818', author=NADIA_MARCINKO, attribution_reason='reply'),
|
|
@@ -490,11 +541,12 @@ EMAILS_CONFIG = [
|
|
|
490
541
|
author=SEAN_BANNON,
|
|
491
542
|
attribution_reason="From protonmail, Bannon wrote 'just sent from my protonmail' in 027067",
|
|
492
543
|
),
|
|
493
|
-
EmailCfg(id='029003', author=SOON_YI_PREVIN, attribution_reason="
|
|
494
|
-
EmailCfg(id='029005', author=SOON_YI_PREVIN, attribution_reason="
|
|
495
|
-
EmailCfg(id='029007', author=SOON_YI_PREVIN, attribution_reason="
|
|
496
|
-
EmailCfg(id='029010', author=SOON_YI_PREVIN, attribution_reason="
|
|
497
|
-
EmailCfg(id='032296', author=SOON_YI_PREVIN, attribution_reason="
|
|
544
|
+
EmailCfg(id='029003', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
|
|
545
|
+
EmailCfg(id='029005', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
|
|
546
|
+
EmailCfg(id='029007', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
|
|
547
|
+
EmailCfg(id='029010', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
|
|
548
|
+
EmailCfg(id='032296', author=SOON_YI_PREVIN, attribution_reason='"Sent from Soon-Yi\'s iPhone"'),
|
|
549
|
+
EmailCfg(id='033292', author=SOON_YI_PREVIN, attribution_reason='mentions "Woody\'s movie"', is_attribution_uncertain=True),
|
|
498
550
|
EmailCfg(
|
|
499
551
|
id='019109',
|
|
500
552
|
author=STEVEN_HOFFENBERG,
|
|
@@ -507,13 +559,15 @@ EMAILS_CONFIG = [
|
|
|
507
559
|
attribution_reason='ends with "Respectfully, terry"',
|
|
508
560
|
author=TERRY_KAFKA,
|
|
509
561
|
fwded_text_after='From: Mike Cohen',
|
|
510
|
-
recipients=
|
|
562
|
+
recipients=[JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS,
|
|
563
|
+
subject='Fw: The Iran Nuclear Deal',
|
|
511
564
|
duplicate_ids=['028482'],
|
|
512
565
|
),
|
|
513
566
|
EmailCfg(id='029992', author=TERRY_KAFKA, attribution_reason='Quoted reply'),
|
|
514
567
|
EmailCfg(id='029985', author=TERRY_KAFKA, attribution_reason='Quoted reply in 029992'),
|
|
515
568
|
EmailCfg(id='020666', author=TERRY_KAFKA, attribution_reason="Ends with 'Terry'"),
|
|
516
569
|
EmailCfg(id='026014', author=ZUBAIR_KHAN, recipients=[JEFFREY_EPSTEIN], timestamp=parse('2016-11-04 17:46:00')),
|
|
570
|
+
EmailCfg(id='033021', recipients=[ANAS_ALRASHEED], attribution_reason='visible in 033022'),
|
|
517
571
|
EmailCfg(id='027063', recipients=[ANTHONY_BARRETT]),
|
|
518
572
|
EmailCfg(id='030764', recipients=[ARIANE_DE_ROTHSCHILD], attribution_reason='Reply'),
|
|
519
573
|
EmailCfg(id='026431', recipients=[ARIANE_DE_ROTHSCHILD], attribution_reason='Reply'),
|
|
@@ -538,6 +592,7 @@ EMAILS_CONFIG = [
|
|
|
538
592
|
EmailCfg(id='032780', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
|
|
539
593
|
EmailCfg(id='029324', recipients=[JEFFREY_EPSTEIN, "Jojo Fontanilla", "Lyn Fontanilla"]), # Bad OCR (nofix)
|
|
540
594
|
EmailCfg(id='013482', recipients=[JEFFREY_EPSTEIN], is_fwded_article=True), # other recipients redacted. "The view from the US: Stem cell therapy steps up a gear with firs"
|
|
595
|
+
EmailCfg(id='029558', recipients=[JEFFREY_EPSTEIN, KATHERINE_KEATING], attribution_reason='BCC', fwded_text_after='Creativity is central'),
|
|
541
596
|
EmailCfg(id='033456', recipients=["Joel"], attribution_reason='Reply'),
|
|
542
597
|
EmailCfg(id='033458', recipients=["Joel"], attribution_reason='Reply'),
|
|
543
598
|
EmailCfg(id='033460', recipients=["Joel"], attribution_reason='Reply'),
|
|
@@ -566,10 +621,14 @@ EMAILS_CONFIG = [
|
|
|
566
621
|
EmailCfg(id='033466', recipients=[LAWRANCE_VISOSKI], attribution_reason='Reply signature'),
|
|
567
622
|
EmailCfg(id='022250', recipients=[LESLEY_GROFF], attribution_reason='Reply'),
|
|
568
623
|
EmailCfg(id='030242', recipients=[MARIANA_IDZKOWSKA], duplicate_ids=['032048'], dupe_type='redacted'),
|
|
624
|
+
EmailCfg(id='033027', recipients=[MASHA_DROKOVA], attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
|
|
625
|
+
EmailCfg(id='033025', recipients=[MASHA_DROKOVA], attribution_reason="timing, subject (interviews/articles), and sequential ID", is_attribution_uncertain=True),
|
|
569
626
|
EmailCfg(id='030368', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
|
|
570
627
|
EmailCfg(id='030369', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
|
|
571
628
|
EmailCfg(id='030371', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
|
|
572
629
|
EmailCfg(id='022258', recipients=[NADIA_MARCINKO], attribution_reason='Reply header'),
|
|
630
|
+
EmailCfg(id='022193', recipients=[NADIA_MARCINKO], attribution_reason='reply'),
|
|
631
|
+
EmailCfg(id='030572', recipients=[PAULA], attribution_reason='quoted in 030482', is_attribution_uncertain=True),
|
|
573
632
|
EmailCfg(id='030506', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
|
|
574
633
|
EmailCfg(id='030507', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
|
|
575
634
|
EmailCfg(id='030508', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
|
|
@@ -585,8 +644,9 @@ EMAILS_CONFIG = [
|
|
|
585
644
|
EmailCfg(id='032358', actual_text=REDACTED), # Completely redacted
|
|
586
645
|
EmailCfg(id='033050', actual_text='schwartman'),
|
|
587
646
|
EmailCfg(id='022219', description="discussion of attempts to clean up Epstein's Google search results"),
|
|
588
|
-
EmailCfg(id='
|
|
589
|
-
EmailCfg(id='
|
|
647
|
+
EmailCfg(id='028524', is_fwded_article=True, description='Zach Braff op-ed on Woody Allen in NYT'),
|
|
648
|
+
EmailCfg(id='031333', is_fwded_article=True, description='Fort Knox conspiracy theory, looks like a Russian disinfo article'), # Russia Says IMF Chief Jailed For Discovering All US Gold is Gone
|
|
649
|
+
EmailCfg(id='031335', is_fwded_article=True, description='Fort Knox conspiracy theory, looks like a Russian disinfo article'), # DOMINQUE STRAUSS-KAHN ARRESTED, NOT BECAUSE HE RAPED A MAID, BUT BECAUSE HE HAD EVIDENCE US HAS NO GOLD IN FORT KNOX.
|
|
590
650
|
EmailCfg(id='023627', is_fwded_article=True, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
|
|
591
651
|
EmailCfg(id='026298', is_fwded_article=True, duplicate_ids=['026499']), # Written by someone else?
|
|
592
652
|
EmailCfg(id='029692', is_fwded_article=True, duplicate_ids=['029779']), # WaPo article
|
|
@@ -640,7 +700,6 @@ EMAILS_CONFIG = [
|
|
|
640
700
|
EmailCfg(id='031472', is_fwded_article=True), # WSJ: Lawyers for Imam Wanted by Turkish authorities Fear for Their Client's Life
|
|
641
701
|
EmailCfg(id='012684', is_fwded_article=True), # Trump in talks to buy socialite Kluge's Charlottesville vineyard
|
|
642
702
|
EmailCfg(id='028536', is_fwded_article=True), # Palm Beach Post FBI Epstein files say he gave info. Does it explain sweetheart deal?
|
|
643
|
-
EmailCfg(id='028524', is_fwded_article=True), # Zach Braff article on Woody Allen in NYT
|
|
644
703
|
EmailCfg(id='030326', is_fwded_article=True), # NYP Congressional candidate compares Melania Trump to prostitute
|
|
645
704
|
EmailCfg(id='030519', is_fwded_article=True), # Daily Mail on Prince Andrew
|
|
646
705
|
EmailCfg(id='030878', is_fwded_article=True), # Steve Bannon almost appeared in Michael Moore's 'Fahrenheit 11/9'
|
|
@@ -655,6 +714,7 @@ EMAILS_CONFIG = [
|
|
|
655
714
|
EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
|
|
656
715
|
|
|
657
716
|
# Configure duplicates
|
|
717
|
+
EmailCfg(id='026631', duplicate_ids=['026632'], dupe_type='quoted'),
|
|
658
718
|
EmailCfg(id='028768', duplicate_ids=['026563'], dupe_type='redacted'),
|
|
659
719
|
EmailCfg(id='027056', duplicate_ids=['028762'], dupe_type='redacted'),
|
|
660
720
|
EmailCfg(id='032248', duplicate_ids=['032246'], dupe_type='redacted'),
|
|
@@ -667,6 +727,7 @@ EMAILS_CONFIG = [
|
|
|
667
727
|
EmailCfg(id='029841', duplicate_ids=['012711'], dupe_type='redacted'),
|
|
668
728
|
EmailCfg(id='030414', duplicate_ids=['030578'], dupe_type='redacted'),
|
|
669
729
|
EmailCfg(id='031135', duplicate_ids=['030634'], dupe_type='redacted'),
|
|
730
|
+
EmailCfg(id='030620', duplicate_ids=['023067']),
|
|
670
731
|
EmailCfg(id='029835', duplicate_ids=['028968']),
|
|
671
732
|
EmailCfg(id='033512', duplicate_ids=['033361']),
|
|
672
733
|
EmailCfg(id='030299', duplicate_ids=['021794']),
|
|
@@ -768,7 +829,6 @@ EMAILS_CONFIG = [
|
|
|
768
829
|
# Emails that need a little help determining how to separate the actual text from fwded text
|
|
769
830
|
EmailCfg(id='013415', fwded_text_after='Darren K. Indyke'),
|
|
770
831
|
EmailCfg(id='024624', fwded_text_after='On Tue, May 14'),
|
|
771
|
-
EmailCfg(id='029558', fwded_text_after='Creativity is central'),
|
|
772
832
|
EmailCfg(id='025888', fwded_text_after='Jul 24, 2015'),
|
|
773
833
|
EmailCfg(id='016413', fwded_text_after='In a former warehouse'),
|
|
774
834
|
EmailCfg(id='025548', fwded_text_after='Edward Jay Epstein'),
|
|
@@ -1323,8 +1383,6 @@ OTHER_FILES_LETTERS = [
|
|
|
1323
1383
|
description=f"letter about algorithmic trading",
|
|
1324
1384
|
date='2016-06-24', # date is based on Brexit reference but he could be backtesting,
|
|
1325
1385
|
),
|
|
1326
|
-
DocCfg(id='029304', author=DONALD_TRUMP, description=f"recommendation letter for recently departed {TRUMP_ORG} lawyer {MICHAEL_J_BOCCIO}"),
|
|
1327
|
-
DocCfg(id='029301', author=MICHAEL_J_BOCCIO, description=f"letter from former lawyer at the {TRUMP_ORG}", date='2011-08-07'),
|
|
1328
1386
|
DocCfg(id='026134', description=f'letter to someone named George about investment opportunities in the Ukraine banking sector'),
|
|
1329
1387
|
]
|
|
1330
1388
|
|
|
@@ -1475,13 +1533,27 @@ OTHER_FILES_ACADEMIA = [
|
|
|
1475
1533
|
|
|
1476
1534
|
# resumes and application letters
|
|
1477
1535
|
OTHER_FILES_RESUMES = [
|
|
1536
|
+
DocCfg(
|
|
1537
|
+
id='029304',
|
|
1538
|
+
attached_to_email_id='029299',
|
|
1539
|
+
author=DONALD_TRUMP,
|
|
1540
|
+
description=f"recommendation letter for recently departed {TRUMP_ORG} lawyer {MICHAEL_J_BOCCIO}",
|
|
1541
|
+
),
|
|
1478
1542
|
DocCfg(id='022367', author='Jack J Grynberg', description=RESUME_OF, date='2014-07-01'),
|
|
1479
1543
|
DocCfg(
|
|
1480
1544
|
id='029302',
|
|
1545
|
+
attached_to_email_id='029299',
|
|
1481
1546
|
author=MICHAEL_J_BOCCIO,
|
|
1482
1547
|
description=f"{RESUME_OF} (former lawyer at the {TRUMP_ORG})",
|
|
1483
1548
|
date='2011-08-07',
|
|
1484
1549
|
),
|
|
1550
|
+
DocCfg(
|
|
1551
|
+
id='029301',
|
|
1552
|
+
attached_to_email_id='029299',
|
|
1553
|
+
author=MICHAEL_J_BOCCIO,
|
|
1554
|
+
description=f"letter from former lawyer at the {TRUMP_ORG}",
|
|
1555
|
+
date='2011-08-07',
|
|
1556
|
+
),
|
|
1485
1557
|
DocCfg(id='029102', author=NERIO_ALESSANDRI, description=HBS_APPLICATION),
|
|
1486
1558
|
DocCfg(id='029104', author=NERIO_ALESSANDRI, description=HBS_APPLICATION),
|
|
1487
1559
|
DocCfg(id='015671', author='Robin Solomon', description=RESUME_OF, date='2015-06-02'), # She left Mount Sinai at some point in 2015,
|
|
@@ -1622,4 +1694,40 @@ REPLY_LINE_ON_NUMERIC_DATE_PATTERN = fr"On \d+/\d+/\d+[, ].*{REPLY_LINE_ENDING_P
|
|
|
1622
1694
|
REPLY_LINE_ON_DATE_PATTERN = fr"^On (\d+ )?((Mon|Tues?|Wed(nes)?|Thu(rs)?|Fri|Sat(ur)?|Sun)(day)?|(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*)[, ].*{REPLY_LINE_ENDING_PATTERN}"
|
|
1623
1695
|
REPLY_LINE_PATTERN = rf"({REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
|
|
1624
1696
|
REPLY_REGEX = re.compile(REPLY_LINE_PATTERN, re.IGNORECASE | re.MULTILINE)
|
|
1625
|
-
SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?(Sent (from|via).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)', re.M | re.I)
|
|
1697
|
+
SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?((Envoyé de mon|Sent (from|via)).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)', re.M | re.I)
|
|
1698
|
+
|
|
1699
|
+
|
|
1700
|
+
# No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
|
|
1701
|
+
UNINTERESTING_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + [
|
|
1702
|
+
'Alan Dlugash', # CCed with Richard Kahn
|
|
1703
|
+
'Alan Rogers', # Random CC
|
|
1704
|
+
'Andrew Friendly', # Presumably some relation of Kelly Friendly
|
|
1705
|
+
'BS Stern', # A random fwd of email we have
|
|
1706
|
+
'Cheryl Kleen', # Single email from Anne Boyles, displayed under Anne Boyles
|
|
1707
|
+
'Connie Zaguirre', # Random CC
|
|
1708
|
+
'Dan Fleuette', # CC from sean bannon
|
|
1709
|
+
'Danny Goldberg', # Random Paul Krassner emails
|
|
1710
|
+
GERALD_LEFCOURT, # Single CC
|
|
1711
|
+
GORDON_GETTY, # Random CC
|
|
1712
|
+
JEFF_FULLER, # Random Jean Luc Brunel CC
|
|
1713
|
+
'Jojo Fontanilla', # Random CC
|
|
1714
|
+
'Joseph Vinciguerra', # Random CC
|
|
1715
|
+
'Larry Cohen', # Random Bill Gates CC
|
|
1716
|
+
'Lyn Fontanilla', # Random CC
|
|
1717
|
+
'Mark Albert', # Random CC
|
|
1718
|
+
'Matthew Schafer', # Random CC
|
|
1719
|
+
MICHAEL_BUCHHOLTZ, # Terry Kafka CC
|
|
1720
|
+
'Nancy Dahl', # covered by Lawrence Krauss (her husband)
|
|
1721
|
+
'Michael Simmons', # Random CC
|
|
1722
|
+
'Nancy Portland', # Lawrence Krauss CC
|
|
1723
|
+
'Oliver Goodenough', # Robert Trivers CC
|
|
1724
|
+
'Peter Aldhous', # Lawrence Krauss CC
|
|
1725
|
+
'Players2', # Hoffenberg CC
|
|
1726
|
+
'Sam Harris', # Lawrence Krauss CC
|
|
1727
|
+
SAMUEL_LEFF, # Random CC
|
|
1728
|
+
'Sean T Lehane', # Random CC
|
|
1729
|
+
'Stephen Rubin', # Random CC
|
|
1730
|
+
'Tim Kane', # Random CC
|
|
1731
|
+
'Travis Pangburn', # Random CC
|
|
1732
|
+
'Vahe Stepanian', # Random CC
|
|
1733
|
+
]
|
epstein_files/util/data.py
CHANGED
|
@@ -8,6 +8,7 @@ from dateutil import tz
|
|
|
8
8
|
from typing import TypeVar
|
|
9
9
|
|
|
10
10
|
from epstein_files.util.constant import names
|
|
11
|
+
from epstein_files.util.constant.strings import QUESTION_MARKS
|
|
11
12
|
from epstein_files.util.env import args
|
|
12
13
|
from epstein_files.util.logging import logger
|
|
13
14
|
|
|
@@ -37,18 +38,6 @@ def dict_sets_to_lists(d: dict[str, set]) -> dict[str, list]:
|
|
|
37
38
|
return {k: sorted(list(v)) for k, v in d.items()}
|
|
38
39
|
|
|
39
40
|
|
|
40
|
-
def extract_last_name(name: str) -> str:
|
|
41
|
-
if ' ' not in name:
|
|
42
|
-
return name
|
|
43
|
-
|
|
44
|
-
names = name.split()
|
|
45
|
-
|
|
46
|
-
if names[-1].startswith('Jr') and len(names[-1]) <= 3:
|
|
47
|
-
return ' '.join(names[-2:])
|
|
48
|
-
else:
|
|
49
|
-
return names[-1]
|
|
50
|
-
|
|
51
|
-
|
|
52
41
|
def flatten(_list: list[list[T]]) -> list[T]:
|
|
53
42
|
return list(itertools.chain.from_iterable(_list))
|
|
54
43
|
|
epstein_files/util/doc_cfg.py
CHANGED
|
@@ -62,7 +62,7 @@ class DocCfg:
|
|
|
62
62
|
|
|
63
63
|
Attributes:
|
|
64
64
|
id (str): ID of file
|
|
65
|
-
author (
|
|
65
|
+
author (Name): Author of the document (if any)
|
|
66
66
|
category (str | None): Type of file
|
|
67
67
|
date (str | None): If passed will be immediated parsed into the 'timestamp' field
|
|
68
68
|
dupe_type (DuplicateType | None): The type of duplicate this file is or its 'duplicate_ids' are
|
|
@@ -74,7 +74,7 @@ class DocCfg:
|
|
|
74
74
|
"""
|
|
75
75
|
id: str
|
|
76
76
|
attached_to_email_id: str | None = None
|
|
77
|
-
author:
|
|
77
|
+
author: Name = None
|
|
78
78
|
category: str | None = None
|
|
79
79
|
date: str | None = None
|
|
80
80
|
description: str | None = None
|
|
@@ -94,26 +94,40 @@ class DocCfg:
|
|
|
94
94
|
|
|
95
95
|
def complete_description(self) -> str | None:
|
|
96
96
|
"""String that summarizes what is known about this document."""
|
|
97
|
+
description = ''
|
|
98
|
+
|
|
97
99
|
if self.category and not self.description and not self.author:
|
|
98
|
-
|
|
100
|
+
if self.category == JUNK:
|
|
101
|
+
return None
|
|
102
|
+
else:
|
|
103
|
+
description = self.category
|
|
99
104
|
elif self.category == REPUTATION:
|
|
100
|
-
|
|
105
|
+
author_str = f"{self.author} " if self.author else ''
|
|
106
|
+
description = f"{REPUTATION_MGMT}: {author_str}{self.description}"
|
|
101
107
|
elif self.category == SKYPE_LOG:
|
|
102
108
|
msg = f"{self.category} of conversation with {self.author}" if self.author else self.category
|
|
103
|
-
|
|
109
|
+
description = f"{msg} {self.description}" if self.description else msg
|
|
104
110
|
elif self.author and self.description:
|
|
105
111
|
if self.category in [ACADEMIA, BOOK]:
|
|
106
112
|
title = self.description if '"' in self.description else f'"{self.description}"'
|
|
107
|
-
|
|
113
|
+
description = f"{title} by {self.author}"
|
|
108
114
|
elif self.category == FINANCE and self.author in FINANCIAL_REPORTS_AUTHORS:
|
|
109
|
-
|
|
115
|
+
description = f'{self.author} report: "{self.description}"'
|
|
110
116
|
elif self.category == LEGAL and 'v.' in self.author:
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
117
|
+
description = f"{self.author}: {self.description}"
|
|
118
|
+
|
|
119
|
+
if not description:
|
|
120
|
+
pieces = without_falsey([self.author, self.description])
|
|
121
|
+
|
|
122
|
+
if pieces:
|
|
123
|
+
description = ' '.join(pieces)
|
|
124
|
+
else:
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
if self.attached_to_email_id:
|
|
128
|
+
description += f" attached to email {self.attached_to_email_id}"
|
|
114
129
|
|
|
115
|
-
|
|
116
|
-
return ' '.join(pieces) if pieces else None
|
|
130
|
+
return description
|
|
117
131
|
|
|
118
132
|
def duplicate_cfgs(self) -> Generator['DocCfg', None, None]:
|
|
119
133
|
"""Create synthetic DocCfg objects that set the 'duplicate_of_id' field to point back to this object."""
|
|
@@ -205,12 +219,14 @@ class EmailCfg(CommunicationCfg):
|
|
|
205
219
|
actual_text (str | None): In dire cases of broken OCR we just configure the body of the email as a string.
|
|
206
220
|
fwded_text_after (str | None): If set, any text after this is a fwd of an article or similar
|
|
207
221
|
is_fwded_article (bool): True if this is a newspaper article someone fwded. Used to exclude articles from word counting.
|
|
208
|
-
recipients (list[
|
|
222
|
+
recipients (list[Name]): Who received the email
|
|
223
|
+
subject (str): Subject line
|
|
209
224
|
"""
|
|
210
225
|
actual_text: str | None = None
|
|
211
226
|
fwded_text_after: str | None = None
|
|
212
227
|
is_fwded_article: bool = False
|
|
213
|
-
recipients: list[
|
|
228
|
+
recipients: list[Name] = field(default_factory=list)
|
|
229
|
+
subject: str | None = None
|
|
214
230
|
|
|
215
231
|
# This is necessary because for some dumb reason @dataclass(repr=False) doesn't cut it
|
|
216
232
|
def __repr__(self) -> str:
|
epstein_files/util/env.py
CHANGED
|
@@ -8,7 +8,7 @@ from rich_argparse_plus import RichHelpFormatterPlus
|
|
|
8
8
|
from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, CHRONOLOGICAL_EMAILS_PATH, TEXT_MSGS_HTML_PATH
|
|
9
9
|
from epstein_files.util.logging import env_log_level, exit_with_error, logger
|
|
10
10
|
|
|
11
|
-
DEFAULT_WIDTH =
|
|
11
|
+
DEFAULT_WIDTH = 155
|
|
12
12
|
DEFAULT_FILE = 'default_file'
|
|
13
13
|
EPSTEIN_GENERATE = 'epstein_generate'
|
|
14
14
|
HTML_SCRIPTS = [EPSTEIN_GENERATE, 'epstein_word_count']
|
|
@@ -38,6 +38,7 @@ output.add_argument('--all-emails', '-ae', action='store_true', help='all the em
|
|
|
38
38
|
output.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just the interesting ones')
|
|
39
39
|
parser.add_argument('--build', '-b', nargs="?", default=None, const=DEFAULT_FILE, help='write output to HTML file')
|
|
40
40
|
output.add_argument('--email-timeline', action='store_true', help='print a table of all emails in chronological order')
|
|
41
|
+
output.add_argument('--emailers-info-png', action='store_true', help='write a .png of the emeailers info table')
|
|
41
42
|
output.add_argument('--json-files', action='store_true', help='pretty print all the raw JSON data files in the collection and exit')
|
|
42
43
|
output.add_argument('--json-metadata', action='store_true', help='dump JSON metadata for all files and exit')
|
|
43
44
|
output.add_argument('--output-emails', '-oe', action='store_true', help='generate emails section')
|
|
@@ -66,6 +67,7 @@ debug.add_argument('--suppress-logs', '-sl', action='store_true', help='set debu
|
|
|
66
67
|
args = parser.parse_args()
|
|
67
68
|
is_html_script = parser.prog in HTML_SCRIPTS
|
|
68
69
|
|
|
70
|
+
args.build = args.build or args.emailers_info_png
|
|
69
71
|
args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
|
|
70
72
|
args.names = [None if n == 'None' else n for n in (args.names or [])]
|
|
71
73
|
args.output_emails = args.output_emails or args.all_emails
|
|
@@ -33,10 +33,13 @@ def coerce_file_stem(filename_or_id: int | str) -> str:
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
def extract_file_id(filename_or_id: int | str | Path) -> str:
|
|
36
|
+
if isinstance(filename_or_id, str):
|
|
37
|
+
filename_or_id = filename_or_id.removesuffix(',')
|
|
38
|
+
|
|
36
39
|
if isinstance(filename_or_id, int) or (isinstance(filename_or_id, str) and len(filename_or_id) <= 6):
|
|
37
40
|
return id_str(filename_or_id)
|
|
38
41
|
|
|
39
|
-
file_match = FILE_ID_REGEX.match(str(filename_or_id))
|
|
42
|
+
file_match = FILE_ID_REGEX.match(str(filename_or_id).upper())
|
|
40
43
|
|
|
41
44
|
if not file_match:
|
|
42
45
|
raise RuntimeError(f"Failed to extract file ID from {filename_or_id}")
|