epstein-files 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +11 -24
- epstein_files/documents/communication.py +0 -3
- epstein_files/documents/document.py +61 -18
- epstein_files/documents/email.py +11 -5
- epstein_files/documents/emails/email_header.py +10 -2
- epstein_files/documents/imessage/text_message.py +3 -2
- epstein_files/documents/other_file.py +16 -34
- epstein_files/epstein_files.py +23 -33
- epstein_files/person.py +39 -65
- epstein_files/util/constant/names.py +13 -6
- epstein_files/util/constant/strings.py +0 -1
- epstein_files/util/constant/urls.py +1 -0
- epstein_files/util/constants.py +3 -1
- epstein_files/util/data.py +1 -1
- epstein_files/util/doc_cfg.py +3 -3
- epstein_files/util/env.py +4 -4
- epstein_files/util/highlighted_group.py +112 -94
- epstein_files/util/logging.py +1 -1
- epstein_files/util/output.py +36 -12
- epstein_files/util/rich.py +14 -14
- epstein_files/util/timer.py +14 -0
- {epstein_files-1.2.0.dist-info → epstein_files-1.2.1.dist-info}/METADATA +5 -2
- epstein_files-1.2.1.dist-info/RECORD +34 -0
- epstein_files-1.2.0.dist-info/RECORD +0 -34
- {epstein_files-1.2.0.dist-info → epstein_files-1.2.1.dist-info}/LICENSE +0 -0
- {epstein_files-1.2.0.dist-info → epstein_files-1.2.1.dist-info}/WHEEL +0 -0
- {epstein_files-1.2.0.dist-info → epstein_files-1.2.1.dist-info}/entry_points.txt +0 -0
|
@@ -10,7 +10,7 @@ from epstein_files.util.constant.names import *
|
|
|
10
10
|
from epstein_files.util.constant.strings import *
|
|
11
11
|
from epstein_files.util.constant.urls import ARCHIVE_LINK_COLOR
|
|
12
12
|
from epstein_files.util.constants import (EMAILER_ID_REGEXES, EPSTEIN_V_ROTHSTEIN_EDWARDS,
|
|
13
|
-
OSBORNE_LLP, REPLY_REGEX, SENT_FROM_REGEX
|
|
13
|
+
OSBORNE_LLP, REPLY_REGEX, SENT_FROM_REGEX)
|
|
14
14
|
from epstein_files.util.doc_cfg import *
|
|
15
15
|
from epstein_files.util.data import without_falsey
|
|
16
16
|
from epstein_files.util.env import args
|
|
@@ -20,17 +20,18 @@ CIVIL_ATTORNEY = 'civil attorney'
|
|
|
20
20
|
CRIMINAL_DEFENSE_ATTORNEY = 'criminal defense attorney'
|
|
21
21
|
CRIMINAL_DEFENSE_2008 = f"{CRIMINAL_DEFENSE_ATTORNEY} on 2008 case"
|
|
22
22
|
EPSTEIN_LAWYER = 'lawyer'
|
|
23
|
-
EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY = f"{CIVIL_ATTORNEY}
|
|
23
|
+
EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY = f"{CIVIL_ATTORNEY} {EPSTEIN_V_ROTHSTEIN_EDWARDS}"
|
|
24
24
|
ESTATE_EXECUTOR = 'estate executor'
|
|
25
25
|
EPSTEIN_ESTATE_EXECUTOR = f"Epstein {ESTATE_EXECUTOR}"
|
|
26
|
+
MC2_MODEL_MANAGEMENT = f"{JEAN_LUC_BRUNEL}'s MC2 Model Management"
|
|
26
27
|
MIDEAST = 'mideast'
|
|
27
28
|
QUESTION_MARKS_TXT = Text(QUESTION_MARKS, style='grey50')
|
|
28
29
|
REGEX_STYLE_PREFIX = 'regex'
|
|
29
|
-
SIMPLE_NAME_REGEX = re.compile(r"^[-\w ]+$", re.IGNORECASE)
|
|
30
|
+
SIMPLE_NAME_REGEX = re.compile(r"^[-\w, ]+$", re.IGNORECASE)
|
|
31
|
+
TECH_BRO = 'tech bro'
|
|
30
32
|
|
|
31
33
|
CATEGORY_STYLE_MAPPING = {
|
|
32
34
|
ARTICLE: JOURNALIST,
|
|
33
|
-
ARTS: ENTERTAINER,
|
|
34
35
|
BOOK: JOURNALIST,
|
|
35
36
|
LEGAL: EPSTEIN_LAWYER,
|
|
36
37
|
POLITICS: LOBBYIST,
|
|
@@ -152,7 +153,7 @@ class HighlightedNames(HighlightedText):
|
|
|
152
153
|
name_patterns = [remove_question_marks(name).replace(' ', r"\s+")]
|
|
153
154
|
|
|
154
155
|
if ' ' in name:
|
|
155
|
-
for partial_name in [extract_first_name(name), extract_last_name(name)]:
|
|
156
|
+
for partial_name in [extract_first_name(name), extract_last_name(name), reversed_name(name)]:
|
|
156
157
|
if partial_name.lower() not in NAMES_TO_NOT_HIGHLIGHT and SIMPLE_NAME_REGEX.match(partial_name):
|
|
157
158
|
name_patterns.append(partial_name.replace(' ', r"\s+"))
|
|
158
159
|
|
|
@@ -219,17 +220,19 @@ HIGHLIGHTED_NAMES = [
|
|
|
219
220
|
label=ACADEMIA,
|
|
220
221
|
style='light_goldenrod2',
|
|
221
222
|
emailers={
|
|
222
|
-
DAVID_HAIG: 'evolutionary geneticist?',
|
|
223
|
-
JOSCHA_BACH: 'cognitive science / AI research',
|
|
224
223
|
'Daniel Kahneman': 'Nobel economic sciences laureate and cognitivie psychologist (?)',
|
|
225
|
-
|
|
224
|
+
DAVID_HAIG: 'evolutionary geneticist?',
|
|
225
|
+
'David Grosof': 'MIT Sloan School of Management',
|
|
226
|
+
'Ed Boyden': f'{MIT_MEDIA_LAB} neurobiology professor',
|
|
226
227
|
'Harry Fisch': "men's health expert at New York-Presbyterian / Weill Cornell (?)",
|
|
227
|
-
|
|
228
|
+
JOSCHA_BACH: 'cognitive science / AI research',
|
|
229
|
+
LAWRENCE_KRAUSS: 'theoretical physicist with #MeToo problems',
|
|
228
230
|
LINDA_STONE: f'ex-Microsoft, {MIT_MEDIA_LAB}',
|
|
229
231
|
MARK_TRAMO: 'professor of neurology at UCLA',
|
|
230
232
|
'Nancy Dahl': f'wife of {LAWRENCE_KRAUSS}',
|
|
231
233
|
NEAL_KASSELL: 'professor of neurosurgery at University of Virginia',
|
|
232
234
|
NOAM_CHOMSKY: f"professor of linguistics at MIT",
|
|
235
|
+
'Norman Finkelstein': 'scholar, well known critic of Israel',
|
|
233
236
|
PETER_ATTIA: 'longevity medicine',
|
|
234
237
|
ROBERT_TRIVERS: 'evolutionary biology',
|
|
235
238
|
ROGER_SCHANK: 'Teachers College, Columbia University',
|
|
@@ -241,7 +244,6 @@ HIGHLIGHTED_NAMES = [
|
|
|
241
244
|
r"Carl\s*Sagan",
|
|
242
245
|
r"Columbia",
|
|
243
246
|
r"Dan(iel|ny) Kahneman",
|
|
244
|
-
r"David Grosof",
|
|
245
247
|
r"J(ames|im)\s*Watson",
|
|
246
248
|
r"(Lord\s*)?Martin\s*Rees",
|
|
247
249
|
r"Massachusetts\s*Institute\s*of\s*Technology",
|
|
@@ -290,10 +292,48 @@ HIGHLIGHTED_NAMES = [
|
|
|
290
292
|
r"Zimbabwe(an)?",
|
|
291
293
|
],
|
|
292
294
|
),
|
|
295
|
+
HighlightedNames(
|
|
296
|
+
label=ARTS,
|
|
297
|
+
style='light_steel_blue3',
|
|
298
|
+
emailers={
|
|
299
|
+
ANDRES_SERRANO: "'Piss Christ' artist",
|
|
300
|
+
'Barry Josephson': 'American film producer, editor FamilySecurityMatters.org',
|
|
301
|
+
BILL_SIEGEL: 'documentary film producer and director',
|
|
302
|
+
DAVID_BLAINE: 'famous magician',
|
|
303
|
+
'Richard Merkin': 'painter, illustrator and arts educator',
|
|
304
|
+
STEVEN_PFEIFFER: 'Associate Director at Independent Filmmaker Project (IFP)',
|
|
305
|
+
'Steven Gaydos': 'American screenwriter and journalist',
|
|
306
|
+
},
|
|
307
|
+
patterns=[
|
|
308
|
+
r"(Art )?Spiegelman",
|
|
309
|
+
r"Artspace",
|
|
310
|
+
r"Bobby slayton",
|
|
311
|
+
r"bono\s*mick",
|
|
312
|
+
r"Errol(\s*Morris)?",
|
|
313
|
+
r"Etienne Binant",
|
|
314
|
+
r"(Frank\s)?Gehry",
|
|
315
|
+
r"Jagger",
|
|
316
|
+
r"(Jeffrey\s*)?Katzenberg",
|
|
317
|
+
r"(Johnny\s*)?Depp",
|
|
318
|
+
r"Kid Rock",
|
|
319
|
+
r"(Larry\s*)?Gagosian",
|
|
320
|
+
r"Lena\s*Dunham",
|
|
321
|
+
r"Madonna",
|
|
322
|
+
r"Mark\s*Burnett",
|
|
323
|
+
r"Phaidon",
|
|
324
|
+
r"Ramsey Elkholy",
|
|
325
|
+
r"Regan arts",
|
|
326
|
+
r"shirley maclaine",
|
|
327
|
+
r"Woody( Allen)?",
|
|
328
|
+
r"Zach Braff",
|
|
329
|
+
],
|
|
330
|
+
),
|
|
293
331
|
HighlightedNames(
|
|
294
332
|
label=BILL_GATES,
|
|
295
333
|
style='turquoise4',
|
|
334
|
+
category=TECH_BRO,
|
|
296
335
|
emailers={
|
|
336
|
+
BILL_GATES: 'ex-Microsoft, Gates Foundation, bgC3',
|
|
297
337
|
BORIS_NIKOLIC: f'biotech VC partner of {BILL_GATES}, {EPSTEIN_ESTATE_EXECUTOR}',
|
|
298
338
|
},
|
|
299
339
|
patterns=[
|
|
@@ -344,6 +384,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
344
384
|
emailers={
|
|
345
385
|
ALIREZA_ITTIHADIEH: 'CEO Freestream Aircraft Limited',
|
|
346
386
|
BARBRO_C_EHNBOM: 'Swedish pharmaceuticals, SALSS',
|
|
387
|
+
BARRY_J_COHEN: None,
|
|
347
388
|
'David Mitchell': 'Mitchell Holdings New York real estate developer',
|
|
348
389
|
FRED_HADDAD: "co-founder of Heck's in West Virginia",
|
|
349
390
|
GERALD_BARTON: "Maryland property developer Landmark Land Company",
|
|
@@ -482,7 +523,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
482
523
|
MERWIN_DELA_CRUZ: None, # HOUSE_OVERSIGHT_032652 Groff says "Jojo and Merwin both requested off Nov. 25 and 26"
|
|
483
524
|
NADIA_MARCINKO: "Epstein's pilot",
|
|
484
525
|
'Sean J. Lancaster': 'airplane reseller',
|
|
485
|
-
ZUBAIR_KHAN: 'cybersecurity firm Tranchulas CEO, InsightsPod founder, Islamabad/Dubai',
|
|
526
|
+
ZUBAIR_KHAN: 'cybersecurity firm Tranchulas CEO, InsightsPod founder, Islamabad / Dubai',
|
|
486
527
|
},
|
|
487
528
|
patterns=[
|
|
488
529
|
r"Adriana\s*Ross",
|
|
@@ -490,42 +531,6 @@ HIGHLIGHTED_NAMES = [
|
|
|
490
531
|
r"(Sarah\s*)?Kellen", r"Vickers", # Married name is Metiers
|
|
491
532
|
],
|
|
492
533
|
),
|
|
493
|
-
HighlightedNames(
|
|
494
|
-
label=ENTERTAINER,
|
|
495
|
-
style='light_steel_blue3',
|
|
496
|
-
emailers={
|
|
497
|
-
ANDRES_SERRANO: "'Piss Christ' artist",
|
|
498
|
-
'Barry Josephson': 'American film producer, editor FamilySecurityMatters.org',
|
|
499
|
-
BILL_SIEGEL: 'documentary film producer and director',
|
|
500
|
-
DAVID_BLAINE: 'famous magician',
|
|
501
|
-
'Richard Merkin': 'painter, illustrator and arts educator',
|
|
502
|
-
STEVEN_PFEIFFER: 'Associate Director at Independent Filmmaker Project (IFP)',
|
|
503
|
-
},
|
|
504
|
-
patterns=[
|
|
505
|
-
r"(Art )?Spiegelman",
|
|
506
|
-
r"Artspace",
|
|
507
|
-
r"Bobby slayton",
|
|
508
|
-
r"bono\s*mick",
|
|
509
|
-
r"Errol(\s*Morris)?",
|
|
510
|
-
r"Etienne Binant",
|
|
511
|
-
r"(Frank\s)?Gehry",
|
|
512
|
-
r"Jagger",
|
|
513
|
-
r"(Jeffrey\s*)?Katzenberg",
|
|
514
|
-
r"(Johnny\s*)?Depp",
|
|
515
|
-
r"Kid Rock",
|
|
516
|
-
r"(Larry\s*)?Gagosian",
|
|
517
|
-
r"Lena\s*Dunham",
|
|
518
|
-
r"Madonna",
|
|
519
|
-
r"Mark\s*Burnett",
|
|
520
|
-
r"Phaidon",
|
|
521
|
-
r"Ramsey Elkholy",
|
|
522
|
-
r"Regan arts",
|
|
523
|
-
r"shirley maclaine",
|
|
524
|
-
r"Steven Gaydos?",
|
|
525
|
-
r"Woody( Allen)?",
|
|
526
|
-
r"Zach Braff",
|
|
527
|
-
],
|
|
528
|
-
),
|
|
529
534
|
HighlightedNames(
|
|
530
535
|
label='Epstein',
|
|
531
536
|
style='blue1',
|
|
@@ -565,7 +570,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
565
570
|
'Robert Gold': 'helped Epstein track down money belonging to Spanish families',
|
|
566
571
|
'Roy Black': CRIMINAL_DEFENSE_2008,
|
|
567
572
|
SCOTT_J_LINK: CRIMINAL_DEFENSE_ATTORNEY,
|
|
568
|
-
TONJA_HADDAD_COLEMAN: f'{EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY}, relation of Fred Haddad?
|
|
573
|
+
TONJA_HADDAD_COLEMAN: f'{EPSTEIN_V_ROTHSTEIN_EDWARDS_ATTORNEY}', # relation of Fred Haddad?
|
|
569
574
|
},
|
|
570
575
|
patterns=[
|
|
571
576
|
r"(Barry (E. )?)?Krischer",
|
|
@@ -593,8 +598,10 @@ HIGHLIGHTED_NAMES = [
|
|
|
593
598
|
style='light_sky_blue3',
|
|
594
599
|
emailers={
|
|
595
600
|
ANDRZEJ_DUDA: 'former president of Poland',
|
|
601
|
+
'Caroline Lang': 'daughter of Jack Lang',
|
|
596
602
|
"Edward Rod Larsen": f"son of {TERJE_ROD_LARSEN}",
|
|
597
603
|
'Fabrice Aidan': f'diplomat who worked with {TERJE_ROD_LARSEN}',
|
|
604
|
+
'Jack Lang': 'former French Minister of National Education',
|
|
598
605
|
MIROSLAV_LAJCAK: 'Russia-friendly Slovakian politician, friend of Steve Bannon',
|
|
599
606
|
PETER_MANDELSON: 'UK politics',
|
|
600
607
|
TERJE_ROD_LARSEN: 'Norwegian diplomat',
|
|
@@ -611,7 +618,6 @@ HIGHLIGHTED_NAMES = [
|
|
|
611
618
|
r"Brit(ain|ish)",
|
|
612
619
|
r"Brussels",
|
|
613
620
|
r"Cannes",
|
|
614
|
-
r"(Caroline|Jack)?\s*Lang(, Caroline)?",
|
|
615
621
|
r"Cypr(iot|us)",
|
|
616
622
|
r"Davos",
|
|
617
623
|
r"ECB",
|
|
@@ -662,12 +668,13 @@ HIGHLIGHTED_NAMES = [
|
|
|
662
668
|
emailers={
|
|
663
669
|
AMANDA_ENS: 'Citigroup',
|
|
664
670
|
BRAD_WECHSLER: f"head of {LEON_BLACK}'s personal investment vehicle according to FT",
|
|
671
|
+
CECILIA_STEEN: None,
|
|
665
672
|
DANIEL_SABBA: 'UBS Investment Bank',
|
|
666
673
|
DAVID_FISZEL: 'CIO Honeycomb Asset Management',
|
|
667
674
|
JES_STALEY: 'former CEO of Barclays',
|
|
668
675
|
JIDE_ZEITLIN: 'former partner at Goldman Sachs, allegations of sexual misconduct',
|
|
669
676
|
'Laurie Cameron': 'currency trading',
|
|
670
|
-
LEON_BLACK: 'Apollo CEO',
|
|
677
|
+
LEON_BLACK: 'Apollo CEO who paid Epstein tens of millions for tax advice',
|
|
671
678
|
MARC_LEON: 'Luxury Properties Sari Morrocco',
|
|
672
679
|
MELANIE_SPINELLA: 'representative of Leon Black',
|
|
673
680
|
MORTIMER_ZUCKERMAN: 'business partner of Epstein, newspaper publisher',
|
|
@@ -683,6 +690,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
683
690
|
r"Apollo",
|
|
684
691
|
r"Ari\s*Glass",
|
|
685
692
|
r"Bank(\s*of\s*Scotland)",
|
|
693
|
+
r"Bear\s*Stearns",
|
|
686
694
|
r"(Bernie\s*)?Madoff",
|
|
687
695
|
r"Black(rock|stone)",
|
|
688
696
|
r"B\s*of\s*A",
|
|
@@ -826,19 +834,21 @@ HIGHLIGHTED_NAMES = [
|
|
|
826
834
|
style='bright_yellow',
|
|
827
835
|
emailers={
|
|
828
836
|
'Alain Forget': 'author of "How To Get Out Of This World ALIVE"',
|
|
837
|
+
'Alex Yablon': 'New York Magazine fact checker (?)',
|
|
829
838
|
EDWARD_JAY_EPSTEIN: 'no relation, wrote books about spies',
|
|
830
839
|
HENRY_HOLT: f"{MICHAEL_WOLFF}'s book publisher",
|
|
831
840
|
JAMES_HILL: 'ABC News',
|
|
832
|
-
JENNIFER_JACQUET: 'Future Science',
|
|
841
|
+
JENNIFER_JACQUET: 'Future Science magazine',
|
|
833
842
|
JOHN_BROCKMAN: 'literary agent and author specializing in scientific literature',
|
|
834
|
-
LANDON_THOMAS: 'New York Times',
|
|
843
|
+
LANDON_THOMAS: 'New York Times financial reporter',
|
|
835
844
|
MICHAEL_WOLFF: 'Author of "Fire and Fury: Inside the Trump White House"',
|
|
836
845
|
PAUL_KRASSNER: '60s counterculture guy',
|
|
846
|
+
'Peter Aldhous': 'Buzzfeed science reporter',
|
|
847
|
+
"Susan Edelman": 'New York Post reporter',
|
|
837
848
|
'Tim Zagat': 'Zagat restaurant guide CEO',
|
|
838
849
|
},
|
|
839
850
|
patterns=[
|
|
840
851
|
r"ABC(\s*News)?",
|
|
841
|
-
r"Alex\s*Yablon",
|
|
842
852
|
r"Arianna(\s*Huffington)?",
|
|
843
853
|
r"(Arthur\s*)?Kretchmer",
|
|
844
854
|
r'Associated\s*Press',
|
|
@@ -872,6 +882,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
872
882
|
r"Huff(ington)?(\s*Po(st)?)?",
|
|
873
883
|
r"Ingram, David",
|
|
874
884
|
r"(James\s*)?(Hill|Patterson)",
|
|
885
|
+
r"Jesse Kornbluth",
|
|
875
886
|
r"Jonathan\s*Karl",
|
|
876
887
|
r"Julie\s*(K.?\s*)?Brown", r'jbrown@miamiherald.com',
|
|
877
888
|
r"(Katie\s*)?Couric",
|
|
@@ -895,7 +906,6 @@ HIGHLIGHTED_NAMES = [
|
|
|
895
906
|
r"(Sean\s*)?Hannity",
|
|
896
907
|
r"Sulzberger",
|
|
897
908
|
r"SunSentinel",
|
|
898
|
-
r"Susan Edelman",
|
|
899
909
|
r"(The\s*)?Financial\s*Times",
|
|
900
910
|
r"The\s*Guardian",
|
|
901
911
|
r"TheHill",
|
|
@@ -956,6 +966,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
956
966
|
emailers={
|
|
957
967
|
ANN_MARIE_VILLAFANA: 'Southern District of Florida (SDFL) U.S. Attorney',
|
|
958
968
|
DANNY_FROST: 'Director of Communications at Manhattan D.A.',
|
|
969
|
+
'Police Code Enforcement': f"{PALM_BEACH} buildings code enforcement",
|
|
959
970
|
},
|
|
960
971
|
patterns=[
|
|
961
972
|
r"AG",
|
|
@@ -984,7 +995,6 @@ HIGHLIGHTED_NAMES = [
|
|
|
984
995
|
r"(General\s*)?P(a|e)traeus",
|
|
985
996
|
r"IRS",
|
|
986
997
|
r"(James\s*)?Comey",
|
|
987
|
-
r"Jeff(rey)?\s*Sessions",
|
|
988
998
|
r"(Jennifer\s*Shasky\s*)?Calvery",
|
|
989
999
|
r"((Judge|Mark)\s*)?(Carney|Filip)",
|
|
990
1000
|
r"(Judge\s*)?(Kenneth\s*)?(A\.?\s*)?Marra",
|
|
@@ -999,14 +1009,14 @@ HIGHLIGHTED_NAMES = [
|
|
|
999
1009
|
r"(Michael\s*)?Reiter",
|
|
1000
1010
|
r"OGE",
|
|
1001
1011
|
r"Office\s*of\s*Government\s*Ethics",
|
|
1002
|
-
r"
|
|
1012
|
+
r"police",
|
|
1003
1013
|
r"(Preet\s*)?Bharara",
|
|
1004
1014
|
r"SCOTUS",
|
|
1005
1015
|
r"SD(FL|NY)",
|
|
1006
1016
|
r"SEC",
|
|
1007
1017
|
r"Secret\s*Service",
|
|
1008
1018
|
r"Securities\s*and\s*Exchange\s*Commission",
|
|
1009
|
-
r"Southern\s*District\s*of\s*(Florida|New\s*York)",
|
|
1019
|
+
r"Southern\s*District(\s*of\s*(Florida|New\s*York))?",
|
|
1010
1020
|
r"State\s*Dep(artmen)?t",
|
|
1011
1021
|
r"Strzok",
|
|
1012
1022
|
r"Supreme\s*Court",
|
|
@@ -1136,16 +1146,18 @@ HIGHLIGHTED_NAMES = [
|
|
|
1136
1146
|
label='modeling',
|
|
1137
1147
|
style='pale_violet_red1',
|
|
1138
1148
|
emailers={
|
|
1139
|
-
'Abi Schwinck': '
|
|
1149
|
+
'Abi Schwinck': f'{MC2_MODEL_MANAGEMENT} {QUESTION_MARKS}',
|
|
1140
1150
|
DANIEL_SIAD: None,
|
|
1141
1151
|
FAITH_KATES: 'Next Models co-founder',
|
|
1142
1152
|
'Gianni Serazzi': 'fashion consultant?',
|
|
1143
1153
|
HEATHER_MANN: 'South African former model, ex-girlfriend of Prince Andrew (?)',
|
|
1144
|
-
JEAN_LUC_BRUNEL: 'MC2 Model Management founder, died by suicide in French jail',
|
|
1145
|
-
JEFF_FULLER: 'president of
|
|
1154
|
+
JEAN_LUC_BRUNEL: f'MC2 Model Management founder, died by suicide in French jail',
|
|
1155
|
+
JEFF_FULLER: f'president of {MC2_MODEL_MANAGEMENT} USA',
|
|
1156
|
+
'lorraine@mc2mm.com': f'{MC2_MODEL_MANAGEMENT}',
|
|
1157
|
+
'pink@mc2mm.com': f'{MC2_MODEL_MANAGEMENT}',
|
|
1146
1158
|
MANUELA_MARTINEZ: 'Mega Partners (Brazilian agency)',
|
|
1147
1159
|
MARIANA_IDZKOWSKA: None,
|
|
1148
|
-
'Michael Sanka': '
|
|
1160
|
+
'Michael Sanka': f'{MC2_MODEL_MANAGEMENT} {QUESTION_MARKS}',
|
|
1149
1161
|
'Vladimir Yudashkin': 'director of the 1 Mother Agency',
|
|
1150
1162
|
},
|
|
1151
1163
|
patterns=[
|
|
@@ -1202,6 +1214,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1202
1214
|
r"George\s*(H\.?\s*)?(W\.?\s*)?Bush",
|
|
1203
1215
|
r"(George\s*)?Nader",
|
|
1204
1216
|
r"GOP",
|
|
1217
|
+
r"Jeff(rey)?\s*Sessions",
|
|
1205
1218
|
r"(John\s*(R.?\s*)?)Bolton",
|
|
1206
1219
|
r"Kissinger",
|
|
1207
1220
|
r"Kobach",
|
|
@@ -1245,7 +1258,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1245
1258
|
emailers={
|
|
1246
1259
|
'Dasha Zhukova': 'art collector, daughter of Alexander Zhukov',
|
|
1247
1260
|
MASHA_DROKOVA: 'silicon valley VC, former Putin Youth member',
|
|
1248
|
-
RENATA_BOLOTOVA: 'former
|
|
1261
|
+
RENATA_BOLOTOVA: 'former model, fund manager at New York State Insurance Fund',
|
|
1249
1262
|
SVETLANA_POZHIDAEVA: "Epstein's Russian assistant who was recommended for a visa by Sergei Belyakov (FSB) and David Blaine",
|
|
1250
1263
|
},
|
|
1251
1264
|
patterns=[
|
|
@@ -1302,7 +1315,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
1302
1315
|
],
|
|
1303
1316
|
),
|
|
1304
1317
|
HighlightedNames(
|
|
1305
|
-
label=
|
|
1318
|
+
label=TECH_BRO,
|
|
1306
1319
|
style='bright_cyan',
|
|
1307
1320
|
emailers={
|
|
1308
1321
|
'Auren Hoffman': 'CEO of SafeGraph (firm that gathers location data from mobile devices) and LiveRamp',
|
|
@@ -1369,6 +1382,33 @@ HIGHLIGHTED_NAMES = [
|
|
|
1369
1382
|
r"(The\s*)?Art\s*of\s*the\s*Deal",
|
|
1370
1383
|
],
|
|
1371
1384
|
),
|
|
1385
|
+
HighlightedNames(
|
|
1386
|
+
label='USVI',
|
|
1387
|
+
style='sea_green1',
|
|
1388
|
+
emailers={
|
|
1389
|
+
CECILE_DE_JONGH: 'Virgin Islands first lady 2007-2015',
|
|
1390
|
+
KENNETH_E_MAPP: 'Virgin Islands Governor',
|
|
1391
|
+
STACEY_PLASKETT: 'Virgin Islands non-voting member of Congress',
|
|
1392
|
+
},
|
|
1393
|
+
patterns=[
|
|
1394
|
+
r"Antigua",
|
|
1395
|
+
r"Bahamas",
|
|
1396
|
+
r"BVI",
|
|
1397
|
+
r"Caribb?ean",
|
|
1398
|
+
r"Dominican\s*Republic",
|
|
1399
|
+
r"(Great|Little)\s*St.?\s*James",
|
|
1400
|
+
r"Haiti(an)?",
|
|
1401
|
+
r"(John\s*)deJongh(\s*Jr\.?)",
|
|
1402
|
+
r"(Kenneth E\. )?Mapp",
|
|
1403
|
+
r"PBI",
|
|
1404
|
+
r"Puerto\s*Ric(an|o)",
|
|
1405
|
+
r"S(ain)?t.?\s*Thomas",
|
|
1406
|
+
r"USVI",
|
|
1407
|
+
r"(?<!Epstein )VI",
|
|
1408
|
+
r"(The\s*)?Virgin\s*Islands(\s*Daily\s*News)?", # Hard to make this work right
|
|
1409
|
+
r"(West\s*)?Palm\s*Beach(?!\s*(Daily|Post))",
|
|
1410
|
+
],
|
|
1411
|
+
),
|
|
1372
1412
|
HighlightedNames(
|
|
1373
1413
|
label='victim',
|
|
1374
1414
|
style='orchid1',
|
|
@@ -1399,35 +1439,6 @@ HIGHLIGHTED_NAMES = [
|
|
|
1399
1439
|
r"(Scott\s*)?Rothstein",
|
|
1400
1440
|
],
|
|
1401
1441
|
),
|
|
1402
|
-
HighlightedNames(
|
|
1403
|
-
label='USVI',
|
|
1404
|
-
style='sea_green1',
|
|
1405
|
-
emailers={
|
|
1406
|
-
CECILE_DE_JONGH: 'Virgin Islands first lady 2007-2015',
|
|
1407
|
-
KENNETH_E_MAPP: 'Virgin Islands Governor',
|
|
1408
|
-
STACEY_PLASKETT: 'Virgin Islands non-voting member of Congress',
|
|
1409
|
-
},
|
|
1410
|
-
patterns=[
|
|
1411
|
-
r"Antigua",
|
|
1412
|
-
r"Bahamas",
|
|
1413
|
-
r"BVI",
|
|
1414
|
-
r"Caribb?ean",
|
|
1415
|
-
r"Dominican\s*Republic",
|
|
1416
|
-
r"(Great|Little)\s*St.?\s*James",
|
|
1417
|
-
r"Haiti(an)?",
|
|
1418
|
-
r"(John\s*)deJongh(\s*Jr\.?)",
|
|
1419
|
-
r"(Kenneth E\. )?Mapp",
|
|
1420
|
-
r"PBI",
|
|
1421
|
-
r"Puerto\s*Ric(an|o)",
|
|
1422
|
-
r"S(ain)?t.?\s*Thomas",
|
|
1423
|
-
r"USVI",
|
|
1424
|
-
r"(?<!Epstein )VI",
|
|
1425
|
-
r"(The\s*)?Virgin\s*Islands(\s*Daily\s*News)?", # Hard to make this work right
|
|
1426
|
-
r"(West\s*)?Palm\s*Beach(?!\s*(Daily|Post))",
|
|
1427
|
-
],
|
|
1428
|
-
),
|
|
1429
|
-
|
|
1430
|
-
# Individuals
|
|
1431
1442
|
HighlightedNames(
|
|
1432
1443
|
label=STEVE_BANNON,
|
|
1433
1444
|
style='color(58)',
|
|
@@ -1441,13 +1452,20 @@ HIGHLIGHTED_NAMES = [
|
|
|
1441
1452
|
r"Biosphere",
|
|
1442
1453
|
],
|
|
1443
1454
|
),
|
|
1455
|
+
|
|
1456
|
+
# Individuals
|
|
1444
1457
|
HighlightedNames(
|
|
1458
|
+
emailers={STEVEN_HOFFENBERG: "Epstein's Towers Financial ponzi scheme partner, prison 18 years"},
|
|
1445
1459
|
style='dark_olive_green3',
|
|
1446
1460
|
category=FINANCE,
|
|
1447
|
-
emailers={STEVEN_HOFFENBERG: "Epstein's Towers Financial ponzi scheme partner, prison 18 years"},
|
|
1448
1461
|
patterns=[r"(steven?\s*)?hoffenberg?w?"],
|
|
1449
1462
|
),
|
|
1450
|
-
HighlightedNames(
|
|
1463
|
+
HighlightedNames(
|
|
1464
|
+
emailers={GHISLAINE_MAXWELL: "Epstein's girlfriend, daughter of the spy Robert Maxwell"},
|
|
1465
|
+
category='Epstein',
|
|
1466
|
+
patterns=[r"gmax(1@ellmax.com)?", r"(The )?TerraMar Project"],
|
|
1467
|
+
style='deep_pink3',
|
|
1468
|
+
),
|
|
1451
1469
|
HighlightedNames(emailers={JABOR_Y: '"an influential man in Qatar"'}, category=MIDEAST, style='spring_green1'),
|
|
1452
1470
|
HighlightedNames(emailers={KATHRYN_RUEMMLER: 'former Obama legal counsel'}, style='magenta2', category=FRIEND),
|
|
1453
1471
|
HighlightedNames(emailers={MELANIE_WALKER: f"doctor, friend of {BILL_GATES}"}, style='pale_violet_red1', category=FRIEND),
|
epstein_files/util/logging.py
CHANGED
|
@@ -33,7 +33,7 @@ LOG_LEVEL_ENV_VAR = 'LOG_LEVEL'
|
|
|
33
33
|
# Augment the standard log highlighter with 'epstein_filename' matcher
|
|
34
34
|
class LogHighlighter(ReprHighlighter):
|
|
35
35
|
highlights = ReprHighlighter.highlights + [
|
|
36
|
-
*[fr"(?P<{doc_type}>{doc_type}(Cfg)?)" for doc_type in DOC_TYPE_STYLES.keys()],
|
|
36
|
+
*[fr"(?P<{doc_type}>{doc_type}(Cfg|s)?)" for doc_type in DOC_TYPE_STYLES.keys()],
|
|
37
37
|
"(?P<epstein_filename>" + FILE_NAME_REGEX.pattern + ')',
|
|
38
38
|
]
|
|
39
39
|
|
epstein_files/util/output.py
CHANGED
|
@@ -18,7 +18,7 @@ from epstein_files.util.constant.strings import AUTHOR, TIMESTAMP_STYLE
|
|
|
18
18
|
from epstein_files.util.data import dict_sets_to_lists, uniquify
|
|
19
19
|
from epstein_files.util.env import args
|
|
20
20
|
from epstein_files.util.file_helper import log_file_write
|
|
21
|
-
from epstein_files.util.logging import logger
|
|
21
|
+
from epstein_files.util.logging import logger, exit_with_error
|
|
22
22
|
from epstein_files.util.rich import *
|
|
23
23
|
|
|
24
24
|
DEVICE_SIGNATURE_SUBTITLE = f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown"
|
|
@@ -69,6 +69,11 @@ INTERESTING_EMAIL_IDS = [
|
|
|
69
69
|
#'023208', # Extremely long Leon Black email chain
|
|
70
70
|
]
|
|
71
71
|
|
|
72
|
+
INTERESTING_TEXT_IDS = [
|
|
73
|
+
'027275', # "Crypto- Kerry- Qatar -sessions"
|
|
74
|
+
'027165', # melaniee walker crypto health
|
|
75
|
+
]
|
|
76
|
+
|
|
72
77
|
|
|
73
78
|
def print_email_timeline(epstein_files: EpsteinFiles) -> None:
|
|
74
79
|
"""Print a table of all emails in chronological order."""
|
|
@@ -83,11 +88,17 @@ def print_email_timeline(epstein_files: EpsteinFiles) -> None:
|
|
|
83
88
|
console.print(email)
|
|
84
89
|
|
|
85
90
|
|
|
86
|
-
def
|
|
91
|
+
def print_emailers_info(epstein_files: EpsteinFiles) -> None:
|
|
92
|
+
"""Print tbe summary table of everyone in the files to an image."""
|
|
87
93
|
print_color_key()
|
|
88
94
|
console.line()
|
|
89
95
|
all_emailers = sorted(epstein_files.emailers(), key=lambda person: person.sort_key())
|
|
90
|
-
console.print(Person.emailer_info_table(all_emailers))
|
|
96
|
+
console.print(Person.emailer_info_table(all_emailers, show_epstein_total=True))
|
|
97
|
+
|
|
98
|
+
if not args.build:
|
|
99
|
+
logger.warning(f"Not writing .png file because --build is not set")
|
|
100
|
+
return
|
|
101
|
+
|
|
91
102
|
svg_path = f"{EMAILERS_TABLE_PNG_PATH}.svg"
|
|
92
103
|
console.save_svg(svg_path, theme=HTML_TERMINAL_THEME, title="Epstein Emailers")
|
|
93
104
|
log_file_write(svg_path)
|
|
@@ -106,7 +117,10 @@ def print_emails_section(epstein_files: EpsteinFiles) -> list[Email]:
|
|
|
106
117
|
people_to_print: list[Person]
|
|
107
118
|
|
|
108
119
|
if args.names:
|
|
109
|
-
|
|
120
|
+
try:
|
|
121
|
+
people_to_print = epstein_files.person_objs(args.names)
|
|
122
|
+
except Exception as e:
|
|
123
|
+
exit_with_error(str(e))
|
|
110
124
|
else:
|
|
111
125
|
if args.all_emails:
|
|
112
126
|
people_to_print = all_emailers
|
|
@@ -147,10 +161,10 @@ def print_emails_section(epstein_files: EpsteinFiles) -> list[Email]:
|
|
|
147
161
|
if args.all_emails:
|
|
148
162
|
_verify_all_emails_were_printed(epstein_files, printed_emails)
|
|
149
163
|
|
|
150
|
-
|
|
164
|
+
_print_email_device_signature_info(epstein_files)
|
|
151
165
|
fwded_articles = [e for e in printed_emails if e.config and e.is_fwded_article()]
|
|
152
166
|
log_msg = f"Rewrote {len(Email.rewritten_header_ids)} of {len(printed_emails)} email headers"
|
|
153
|
-
logger.warning(f"{log_msg}, {len(fwded_articles)} of the
|
|
167
|
+
logger.warning(f" -> {log_msg}, {len(fwded_articles)} of the Emails printed were forwarded articles.")
|
|
154
168
|
return printed_emails
|
|
155
169
|
|
|
156
170
|
|
|
@@ -193,22 +207,30 @@ def print_json_stats(epstein_files: EpsteinFiles) -> None:
|
|
|
193
207
|
print_json("count_by_month", count_by_month(epstein_files.all_documents()))
|
|
194
208
|
|
|
195
209
|
|
|
196
|
-
def print_other_files_section(
|
|
210
|
+
def print_other_files_section(epstein_files: EpsteinFiles) -> list[OtherFile]:
|
|
197
211
|
"""Returns the OtherFile objects that were interesting enough to print."""
|
|
212
|
+
if args.uninteresting:
|
|
213
|
+
files = [f for f in epstein_files.other_files if not f.is_interesting()]
|
|
214
|
+
else:
|
|
215
|
+
files = [f for f in epstein_files.other_files if args.all_other_files or f.is_interesting()]
|
|
216
|
+
|
|
198
217
|
title_pfx = '' if args.all_other_files else 'Selected '
|
|
199
|
-
category_table = OtherFile.
|
|
218
|
+
category_table = OtherFile.summary_table(files, title_pfx=title_pfx)
|
|
200
219
|
other_files_preview_table = OtherFile.files_preview_table(files, title_pfx=title_pfx)
|
|
201
220
|
print_section_header(f"{FIRST_FEW_LINES} of {len(files)} {title_pfx}Files That Are Neither Emails Nor Text Messages")
|
|
202
221
|
print_other_page_link(epstein_files)
|
|
203
222
|
print_centered(Padding(category_table, (2, 0)))
|
|
204
223
|
console.print(other_files_preview_table)
|
|
224
|
+
return files
|
|
205
225
|
|
|
206
226
|
|
|
207
|
-
def print_text_messages_section(
|
|
227
|
+
def print_text_messages_section(epstein_files: EpsteinFiles) -> list[MessengerLog]:
|
|
208
228
|
"""Print summary table and stats for text messages."""
|
|
229
|
+
imessage_logs = [log for log in epstein_files.imessage_logs if not args.names or log.author in args.names]
|
|
230
|
+
|
|
209
231
|
if not imessage_logs:
|
|
210
|
-
logger.warning(f"No
|
|
211
|
-
return
|
|
232
|
+
logger.warning(f"No MessengerLogs found for {args.names}")
|
|
233
|
+
return imessage_logs
|
|
212
234
|
|
|
213
235
|
print_section_header('All of His Text Messages')
|
|
214
236
|
print_centered("(conversations are sorted chronologically based on timestamp of first message in the log file)", style='dim')
|
|
@@ -222,6 +244,8 @@ def print_text_messages_section(imessage_logs: list[MessengerLog]) -> None:
|
|
|
222
244
|
console.print(Padding(log_file))
|
|
223
245
|
console.line(2)
|
|
224
246
|
|
|
247
|
+
return imessage_logs
|
|
248
|
+
|
|
225
249
|
|
|
226
250
|
def write_urls() -> None:
|
|
227
251
|
"""Write _URL style constant variables to URLS_ENV file so bash scripts can load as env vars."""
|
|
@@ -245,7 +269,7 @@ def write_urls() -> None:
|
|
|
245
269
|
logger.warning(f"Wrote {len(url_vars)} URL variables to '{URLS_ENV}'\n")
|
|
246
270
|
|
|
247
271
|
|
|
248
|
-
def
|
|
272
|
+
def _print_email_device_signature_info(epstein_files: EpsteinFiles) -> None:
|
|
249
273
|
print_subtitle_panel(DEVICE_SIGNATURE_SUBTITLE)
|
|
250
274
|
console.print(_signature_table(epstein_files.email_device_signatures_to_authors(), (DEVICE_SIGNATURE, AUTHOR), ', '))
|
|
251
275
|
console.print(_signature_table(epstein_files.email_authors_to_device_signatures(), (AUTHOR, DEVICE_SIGNATURE)))
|
epstein_files/util/rich.py
CHANGED
|
@@ -70,7 +70,7 @@ CONSOLE_ARGS = {
|
|
|
70
70
|
'color_system': '256',
|
|
71
71
|
'highlighter': EpsteinHighlighter(),
|
|
72
72
|
'record': args.build,
|
|
73
|
-
'safe_box':
|
|
73
|
+
'safe_box': True,
|
|
74
74
|
'theme': Theme(THEME_STYLES),
|
|
75
75
|
'width': args.width,
|
|
76
76
|
}
|
|
@@ -83,21 +83,21 @@ console = Console(**CONSOLE_ARGS)
|
|
|
83
83
|
highlighter = CONSOLE_ARGS['highlighter']
|
|
84
84
|
|
|
85
85
|
|
|
86
|
-
def add_cols_to_table(table: Table,
|
|
86
|
+
def add_cols_to_table(table: Table, cols: list[str | dict], justify: str = 'center') -> None:
|
|
87
87
|
"""Left most col will be left justified, rest are center justified."""
|
|
88
|
-
for i, col in enumerate(
|
|
89
|
-
|
|
88
|
+
for i, col in enumerate(cols):
|
|
89
|
+
col_justify = 'left' if i == 0 else justify
|
|
90
90
|
|
|
91
91
|
if isinstance(col, dict):
|
|
92
92
|
col_name = col['name']
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
del
|
|
93
|
+
col_kwargs = deepcopy(col)
|
|
94
|
+
col_kwargs['justify'] = col_kwargs.get('justify', col_justify)
|
|
95
|
+
del col_kwargs['name']
|
|
96
96
|
else:
|
|
97
97
|
col_name = col
|
|
98
|
-
|
|
98
|
+
col_kwargs = {'justify': col_justify}
|
|
99
99
|
|
|
100
|
-
table.add_column(col_name, **
|
|
100
|
+
table.add_column(col_name, **col_kwargs)
|
|
101
101
|
|
|
102
102
|
|
|
103
103
|
def build_highlighter(pattern: str) -> EpsteinHighlighter:
|
|
@@ -187,7 +187,7 @@ def print_title_page_header() -> None:
|
|
|
187
187
|
Text.from_markup(link_markup(other_site_url(), other_site_msg, f"{OTHER_SITE_LINK_STYLE} bold")),
|
|
188
188
|
link_text_obj(WORD_COUNT_URL, 'most frequently used words in the emails and texts', AUX_SITE_LINK_STYLE),
|
|
189
189
|
link_text_obj(JSON_METADATA_URL, 'author attribution explanations', AUX_SITE_LINK_STYLE),
|
|
190
|
-
link_text_obj(
|
|
190
|
+
link_text_obj(JSON_FILES_URL, "epstein's json files", AUX_SITE_LINK_STYLE),
|
|
191
191
|
]
|
|
192
192
|
|
|
193
193
|
for link in links:
|
|
@@ -199,7 +199,8 @@ def print_title_page_tables(epstein_files: 'EpsteinFiles') -> None:
|
|
|
199
199
|
_print_external_links()
|
|
200
200
|
console.line()
|
|
201
201
|
_print_abbreviations_table()
|
|
202
|
-
epstein_files.
|
|
202
|
+
print_centered(epstein_files.overview_table())
|
|
203
|
+
console.line()
|
|
203
204
|
print_color_key()
|
|
204
205
|
print_centered(f"if you think there's an attribution error or can deanonymize an {UNKNOWN} contact {CRYPTADAMUS_TWITTER}", 'grey46')
|
|
205
206
|
print_centered('note this site is based on the OCR text provided by Congress which is not always the greatest', 'grey23')
|
|
@@ -252,7 +253,7 @@ def print_subtitle_panel(msg: str, style: str = 'black on white') -> None:
|
|
|
252
253
|
def print_section_header(msg: str, style: str = SECTION_HEADER_STYLE, is_centered: bool = False) -> None:
|
|
253
254
|
panel = Panel(Text(msg, justify='center'), expand=True, padding=(1, 1), style=style)
|
|
254
255
|
panel = Align.center(panel) if is_centered else panel
|
|
255
|
-
console.print(Padding(panel, (3,
|
|
256
|
+
console.print(Padding(panel, (3, 5, 1, 5)))
|
|
256
257
|
|
|
257
258
|
|
|
258
259
|
def print_starred_header(msg: str, num_stars: int = 7, num_spaces: int = 2, style: str = TITLE_STYLE) -> None:
|
|
@@ -311,8 +312,7 @@ def _print_external_links() -> None:
|
|
|
311
312
|
raw_docs_link = join_texts([link_text_obj(RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL, 'raw files', style=f"{ARCHIVE_LINK_COLOR} dim")], encloser='()')
|
|
312
313
|
print_centered(join_texts([presser_link, raw_docs_link]))
|
|
313
314
|
print_centered(link_markup(JMAIL_URL, JMAIL) + " (read His Emails via Gmail interface)")
|
|
314
|
-
print_centered(link_markup(
|
|
315
|
-
print_centered(link_markup(COURIER_NEWSROOM_ARCHIVE_URL, 'Searchable Archive') + " (Courier Newsroom)")
|
|
315
|
+
print_centered(link_markup(EPSTEIN_DOCS_URL) + " (searchable archive)")
|
|
316
316
|
print_centered(link_markup(EPSTEINIFY_URL) + " (raw document images)")
|
|
317
317
|
print_centered(link_markup(EPSTEIN_WEB_URL) + " (character summaries)")
|
|
318
318
|
print_centered(link_markup(EPSTEIN_MEDIA_URL) + " (raw document images)")
|
epstein_files/util/timer.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import time
|
|
2
2
|
from dataclasses import dataclass, field
|
|
3
|
+
from typing import Type
|
|
3
4
|
|
|
4
5
|
from epstein_files.util.logging import logger
|
|
5
6
|
|
|
@@ -10,6 +11,19 @@ class Timer:
|
|
|
10
11
|
checkpoint_at: float = field(default_factory=lambda: time.perf_counter())
|
|
11
12
|
decimals: int = 2
|
|
12
13
|
|
|
14
|
+
def log_section_complete(self, label: str, all_docs: list, printed_docs: list) -> None:
|
|
15
|
+
num_skipped = len(all_docs) - len(printed_docs)
|
|
16
|
+
prefix = suffix = ''
|
|
17
|
+
|
|
18
|
+
if num_skipped == 0:
|
|
19
|
+
prefix = 'all '
|
|
20
|
+
elif num_skipped < 0:
|
|
21
|
+
suffix = f"(at least {num_skipped} {label}s printed more than once)"
|
|
22
|
+
else:
|
|
23
|
+
suffix = f"(skipped {num_skipped})"
|
|
24
|
+
|
|
25
|
+
self.print_at_checkpoint(f"Printed {prefix}{len(printed_docs)} {label}s {suffix}".strip())
|
|
26
|
+
|
|
13
27
|
def print_at_checkpoint(self, msg: str) -> None:
|
|
14
28
|
logger.warning(f"{msg} in {self.seconds_since_checkpoint_str()}...")
|
|
15
29
|
self.checkpoint_at = time.perf_counter()
|