epstein-files 1.2.0__py3-none-any.whl → 1.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +42 -30
- epstein_files/documents/communication.py +0 -3
- epstein_files/documents/document.py +66 -19
- epstein_files/documents/email.py +203 -208
- epstein_files/documents/emails/email_header.py +10 -2
- epstein_files/documents/imessage/text_message.py +3 -2
- epstein_files/documents/other_file.py +16 -34
- epstein_files/epstein_files.py +24 -35
- epstein_files/person.py +67 -73
- epstein_files/util/constant/names.py +21 -12
- epstein_files/util/constant/output_files.py +8 -5
- epstein_files/util/constant/strings.py +2 -2
- epstein_files/util/constant/urls.py +14 -2
- epstein_files/util/constants.py +38 -12
- epstein_files/util/data.py +2 -1
- epstein_files/util/doc_cfg.py +3 -3
- epstein_files/util/env.py +10 -7
- epstein_files/util/highlighted_group.py +366 -202
- epstein_files/util/logging.py +1 -1
- epstein_files/util/output.py +54 -21
- epstein_files/util/rich.py +21 -16
- epstein_files/util/timer.py +14 -0
- epstein_files/util/word_count.py +1 -1
- {epstein_files-1.2.0.dist-info → epstein_files-1.2.5.dist-info}/METADATA +5 -2
- epstein_files-1.2.5.dist-info/RECORD +34 -0
- epstein_files-1.2.0.dist-info/RECORD +0 -34
- {epstein_files-1.2.0.dist-info → epstein_files-1.2.5.dist-info}/LICENSE +0 -0
- {epstein_files-1.2.0.dist-info → epstein_files-1.2.5.dist-info}/WHEEL +0 -0
- {epstein_files-1.2.0.dist-info → epstein_files-1.2.5.dist-info}/entry_points.txt +0 -0
epstein_files/util/constants.py
CHANGED
|
@@ -39,6 +39,7 @@ HEADER_ABBREVIATIONS = {
|
|
|
39
39
|
'MBZ': "Mohamed bin Zayed Al Nahyan (Emirates sheikh)",
|
|
40
40
|
"Miro": MIROSLAV_LAJCAK,
|
|
41
41
|
"Mooch": "Anthony 'The Mooch' Scaramucci (Skybridge crypto bro)",
|
|
42
|
+
"NPA": 'non-prosecution agreement',
|
|
42
43
|
"Terje": TERJE_ROD_LARSEN,
|
|
43
44
|
"VI": f"U.S. {VIRGIN_ISLANDS}",
|
|
44
45
|
"Woody": "Woody Allen",
|
|
@@ -52,14 +53,14 @@ HEADER_ABBREVIATIONS = {
|
|
|
52
53
|
|
|
53
54
|
# Emailers
|
|
54
55
|
EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
55
|
-
ALAN_DERSHOWITZ: re.compile(r'(alan.{1,7})?dershowi(lz?|
|
|
56
|
+
ALAN_DERSHOWITZ: re.compile(r'(alan.{1,7})?dershowi(lz?|t?z)|AlanDersh', re.IGNORECASE),
|
|
56
57
|
ALIREZA_ITTIHADIEH: re.compile(r'Alireza.[Il]ttihadieh', re.IGNORECASE),
|
|
57
58
|
AMANDA_ENS: re.compile(r'ens, amanda?|Amanda.Ens', re.IGNORECASE),
|
|
58
59
|
ANAS_ALRASHEED: re.compile(r'anas\s*al\s*rashee[cd]', re.IGNORECASE),
|
|
59
60
|
ANIL_AMBANI: re.compile(r'Anil.Ambani', re.IGNORECASE),
|
|
60
61
|
ANN_MARIE_VILLAFANA: re.compile(r'Villafana, Ann Marie|(A(\.|nn) Marie )?Villafa(c|n|ri)a', re.IGNORECASE),
|
|
61
62
|
ANTHONY_SCARAMUCCI: re.compile(r"mooch|(Anthony ('The Mooch' )?)?Scaramucci", re.IGNORECASE),
|
|
62
|
-
ARIANE_DE_ROTHSCHILD: re.compile(r'AdeR|((Ariane|Edmond) de )?
|
|
63
|
+
ARIANE_DE_ROTHSCHILD: re.compile(r'AdeR|((Ariane|Edmond) (de )?)?Roths(ch|hc)?ild|Ariane', re.IGNORECASE),
|
|
63
64
|
BARBRO_C_EHNBOM: re.compile(r'behnbom@aol.com|(Barbro\s.*)?Ehnbom', re.IGNORECASE),
|
|
64
65
|
BARRY_J_COHEN: re.compile(r'barry\s*((j.?|james)\s*)?cohen?', re.IGNORECASE),
|
|
65
66
|
BENNET_MOSKOWITZ: re.compile(r'Moskowitz.*Bennet|Bennet.*Moskowitz', re.IGNORECASE),
|
|
@@ -86,7 +87,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
86
87
|
JABOR_Y: re.compile(r'[ji]abor\s*y?', re.IGNORECASE),
|
|
87
88
|
JAMES_HILL: re.compile(r"hill, james e.|james.e.hill@abc.com", re.IGNORECASE),
|
|
88
89
|
JANUSZ_BANASIAK: re.compile(r"Janu[is]z Banasiak", re.IGNORECASE),
|
|
89
|
-
JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel
|
|
90
|
+
JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?|JeanLuc', re.IGNORECASE),
|
|
90
91
|
JEFF_FULLER: re.compile(r"jeff@mc2mm.com|Jeff Fuller", re.IGNORECASE),
|
|
91
92
|
JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeffrey E((sp|ps)tein?)?( VI Foundation)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!(Mark L.|ard Jay) )Epstein', re.IGNORECASE),
|
|
92
93
|
JESSICA_CADWELL: re.compile(r'Jessica Cadwell?', re.IGNORECASE),
|
|
@@ -104,7 +105,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
104
105
|
LISA_NEW: re.compile(r'E?Lisa New?\b', re.IGNORECASE),
|
|
105
106
|
MANUELA_MARTINEZ: re.compile(fr'Manuela (- Mega Partners|Martinez)', re.IGNORECASE),
|
|
106
107
|
MARIANA_IDZKOWSKA: re.compile(r'Mariana [Il]d[źi]kowska?', re.IGNORECASE),
|
|
107
|
-
MARK_EPSTEIN: re.compile(r'Mark (L\. )?Epstein', re.IGNORECASE),
|
|
108
|
+
MARK_EPSTEIN: re.compile(r'Mark (L\. )?(Epstein|Lloyd)', re.IGNORECASE),
|
|
108
109
|
MARC_LEON: re.compile(r'Marc[.\s]+(Kensington|Leon)|Kensington2', re.IGNORECASE),
|
|
109
110
|
MARTIN_NOWAK: re.compile(r'(Martin.*?)?No[vw]ak|Nowak, Martin', re.IGNORECASE),
|
|
110
111
|
MARTIN_WEINBERG: re.compile(r'martin.*?weinberg', re.IGNORECASE),
|
|
@@ -113,7 +114,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
113
114
|
MICHAEL_BUCHHOLTZ: re.compile(r'Michael.*Buchholtz', re.IGNORECASE),
|
|
114
115
|
MICHAEL_MILLER: re.compile(r'Micha(el)? Miller|Miller, Micha(el)?', re.IGNORECASE),
|
|
115
116
|
MICHAEL_SITRICK: re.compile(r'(Mi(chael|ke).{0,5})?[CS]itrick', re.IGNORECASE),
|
|
116
|
-
MICHAEL_WOLFF: re.compile(r'Michael\s*Wol(f[ef]
|
|
117
|
+
MICHAEL_WOLFF: re.compile(r'Michael\s*Wol(f[ef]e?|i)|Wolff', re.IGNORECASE),
|
|
117
118
|
MIROSLAV_LAJCAK: re.compile(r"Miro(slav)?(\s+Laj[cč][aá]k)?"),
|
|
118
119
|
MOHAMED_WAHEED_HASSAN: re.compile(r'Mohamed Waheed(\s+Hassan)?', re.IGNORECASE),
|
|
119
120
|
NADIA_MARCINKO: re.compile(r"Na[dď]i?a\s+Marcinko(v[aá])?", re.IGNORECASE),
|
|
@@ -195,6 +196,7 @@ EMAILERS = [
|
|
|
195
196
|
'Peter Aldhous',
|
|
196
197
|
'Peter Green',
|
|
197
198
|
ROGER_SCHANK,
|
|
199
|
+
'Roy Black',
|
|
198
200
|
STEVEN_PFEIFFER,
|
|
199
201
|
'Steven Victor MD',
|
|
200
202
|
'Susan Edelman',
|
|
@@ -513,7 +515,7 @@ EMAILS_CONFIG = [
|
|
|
513
515
|
recipients=['George Krassner', 'Nick Kazan', 'Mrisman02', 'Rebecca Risman', 'Linda W. Grossman'],
|
|
514
516
|
duplicate_ids=['031973']
|
|
515
517
|
),
|
|
516
|
-
EmailCfg(id='032457', author=PAUL_KRASSNER), # Bad OCR (nofix)
|
|
518
|
+
EmailCfg(id='032457', author=PAUL_KRASSNER, recipients=[JEFFREY_EPSTEIN, 'Nancy Cain']), # Bad OCR (nofix)
|
|
517
519
|
EmailCfg(id='029981', author=PAULA, attribution_reason='Name in reply + opera reference (Fisher now works in opera)'),
|
|
518
520
|
EmailCfg(id='030482', author=PAULA, attribution_reason=PAULA_REASON),
|
|
519
521
|
EmailCfg(id='033383', author=PAUL_PROSPERI, attribution_reason='Reply'),
|
|
@@ -643,7 +645,16 @@ EMAILS_CONFIG = [
|
|
|
643
645
|
EmailCfg(id='029344', actual_text='I thought of you when I read this article. Was this your idea? Alan'),
|
|
644
646
|
EmailCfg(id='032358', actual_text=REDACTED), # Completely redacted
|
|
645
647
|
EmailCfg(id='033050', actual_text='schwartman'),
|
|
648
|
+
EmailCfg(id='031036', description=f"{BARBRO_C_EHNBOM} related donation and Swedish girls discussion"),
|
|
646
649
|
EmailCfg(id='022219', description="discussion of attempts to clean up Epstein's Google search results"),
|
|
650
|
+
EmailCfg(id='030648', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
|
|
651
|
+
EmailCfg(id='030762', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
|
|
652
|
+
EmailCfg(id='030649', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
|
|
653
|
+
EmailCfg(id='026026', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
|
|
654
|
+
EmailCfg(id='026030', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
|
|
655
|
+
EmailCfg(id='026033', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
|
|
656
|
+
EmailCfg(id='031320', description=f"Epstein and {RICHARD_KAHN} appear to be discussing routing donatings through {PEGGY_SIEGAL}"),
|
|
657
|
+
EmailCfg(id='016693', description='signed "MM"'),
|
|
647
658
|
EmailCfg(id='028524', is_fwded_article=True, description='Zach Braff op-ed on Woody Allen in NYT'),
|
|
648
659
|
EmailCfg(id='031333', is_fwded_article=True, description='Fort Knox conspiracy theory, looks like a Russian disinfo article'), # Russia Says IMF Chief Jailed For Discovering All US Gold is Gone
|
|
649
660
|
EmailCfg(id='031335', is_fwded_article=True, description='Fort Knox conspiracy theory, looks like a Russian disinfo article'), # DOMINQUE STRAUSS-KAHN ARRESTED, NOT BECAUSE HE RAPED A MAID, BUT BECAUSE HE HAD EVIDENCE US HAS NO GOLD IN FORT KNOX.
|
|
@@ -661,6 +672,10 @@ EMAILS_CONFIG = [
|
|
|
661
672
|
EmailCfg(id='032023', is_fwded_article=True, duplicate_ids=['032012']), # American-Israeli Cooperative Enterprise Newsletter
|
|
662
673
|
EmailCfg(id='021758', is_fwded_article=True, duplicate_ids=['030616']), # Radar Online article about Epstein's early prison release
|
|
663
674
|
EmailCfg(id='033297', is_fwded_article=True, duplicate_ids=['033586']), # Sultan Sulayem fwding article about Trump and Russia
|
|
675
|
+
EmailCfg(id='026829', is_fwded_article=True), # Taxes
|
|
676
|
+
EmailCfg(id='020443', is_fwded_article=True), # WSJ Deplorables Bannon
|
|
677
|
+
EmailCfg(id='030372', is_fwded_article=True), # Bannon China Iran
|
|
678
|
+
EmailCfg(id='030983', is_fwded_article=True), # Power Line blog Alex Acosta and Jeffrey Epstein Plea Deal Analysis
|
|
664
679
|
EmailCfg(id='031774', is_fwded_article=True), # Krassner fwd of Palmer Report article
|
|
665
680
|
EmailCfg(id='033345', is_fwded_article=True), # Krassner fwd of Palmer Report article
|
|
666
681
|
EmailCfg(id='029903', is_fwded_article=True), # Krassner fwd of Ann Coulter article about Epstein
|
|
@@ -710,6 +725,8 @@ EMAILS_CONFIG = [
|
|
|
710
725
|
EmailCfg(id='031340', is_fwded_article=True), # Article about Alex Jones threatening Robert Mueller
|
|
711
726
|
EmailCfg(id='030209', is_fwded_article=True), # Atlantic Council Syria: Blackberry Diplomacy
|
|
712
727
|
EmailCfg(id='026605', is_fwded_article=True), # Article about Ruemmler turning down attorney general job by NEDRA PICKLER
|
|
728
|
+
EmailCfg(id='031990', is_fwded_article=True), # newsmax on ken starr
|
|
729
|
+
EmailCfg(id='029433', is_fwded_article=True), # Estate Planning After the Enactment of the Tax Cuts and Jobs Act
|
|
713
730
|
EmailCfg(id='032475', timestamp=parse('2017-02-15 13:31:25')),
|
|
714
731
|
EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
|
|
715
732
|
|
|
@@ -732,12 +749,12 @@ EMAILS_CONFIG = [
|
|
|
732
749
|
EmailCfg(id='033512', duplicate_ids=['033361']),
|
|
733
750
|
EmailCfg(id='030299', duplicate_ids=['021794']),
|
|
734
751
|
EmailCfg(id='033575', duplicate_ids=['012898']),
|
|
735
|
-
EmailCfg(id='031428', duplicate_ids=['031388']),
|
|
752
|
+
EmailCfg(id='031428', is_fwded_article=True, duplicate_ids=['031388']),
|
|
736
753
|
EmailCfg(id='031980', duplicate_ids=['019409']),
|
|
737
754
|
EmailCfg(id='033486', duplicate_ids=['033156']),
|
|
738
755
|
EmailCfg(id='025790', duplicate_ids=['031994']),
|
|
739
756
|
EmailCfg(id='028497', duplicate_ids=['026228']),
|
|
740
|
-
EmailCfg(id='033528', duplicate_ids=['033517']),
|
|
757
|
+
EmailCfg(id='033528', is_fwded_article=True, duplicate_ids=['033517']),
|
|
741
758
|
EmailCfg(id='019412', duplicate_ids=['028621']),
|
|
742
759
|
EmailCfg(id='027053', duplicate_ids=['028765']),
|
|
743
760
|
EmailCfg(id='027049', duplicate_ids=['028773']),
|
|
@@ -874,7 +891,7 @@ TWEET = 'tweet'
|
|
|
874
891
|
# Legal cases
|
|
875
892
|
BRUNEL_V_EPSTEIN = f"{JEAN_LUC_BRUNEL} v. {JEFFREY_EPSTEIN} and Tyler McDonald d/b/a YI.org"
|
|
876
893
|
EDWARDS_V_DERSHOWITZ = f"{BRAD_EDWARDS} & {PAUL_G_CASSELL} v. {ALAN_DERSHOWITZ}"
|
|
877
|
-
EPSTEIN_V_ROTHSTEIN_EDWARDS = f"Epstein v. Scott Rothstein, {BRAD_EDWARDS},
|
|
894
|
+
EPSTEIN_V_ROTHSTEIN_EDWARDS = f"Epstein v. Scott Rothstein, {BRAD_EDWARDS}, & L.M."
|
|
878
895
|
GIUFFRE_V_DERSHOWITZ = f"{VIRGINIA_GIUFFRE} v. {ALAN_DERSHOWITZ}"
|
|
879
896
|
GIUFFRE_V_EPSTEIN = f"{VIRGINIA_GIUFFRE} v. {JEFFREY_EPSTEIN}"
|
|
880
897
|
GIUFFRE_V_MAXWELL = f"{VIRGINIA_GIUFFRE} v. {GHISLAINE_MAXWELL}"
|
|
@@ -1354,7 +1371,12 @@ OTHER_FILES_FINANCE = [
|
|
|
1354
1371
|
DocCfg(id='012048', description=f"{PRESS_RELEASE} 'Rockefeller Partners with Gregory J. Fleming to Create Independent Financial Services Firm' and other articles"),
|
|
1355
1372
|
|
|
1356
1373
|
# private placement memoranda
|
|
1357
|
-
DocCfg(
|
|
1374
|
+
DocCfg(
|
|
1375
|
+
id='024432',
|
|
1376
|
+
date='2006-09-27',
|
|
1377
|
+
description=f"Michael Milken's Knowledge Universe Education (KUE) $1,000,000 corporate share placement notice (SEC filing?)"
|
|
1378
|
+
),
|
|
1379
|
+
|
|
1358
1380
|
DocCfg(id='024003', description=f"New Leaf Ventures ($375 million biotech fund) private placement memorandum"),
|
|
1359
1381
|
]
|
|
1360
1382
|
|
|
@@ -1688,13 +1710,16 @@ for cfg in ALL_CONFIGS:
|
|
|
1688
1710
|
|
|
1689
1711
|
# Email related regexes (have to be here for circular dependencies reasons)
|
|
1690
1712
|
FORWARDED_LINE_PATTERN = r"-+ ?(Forwarded|Original)\s*Message ?-*|Begin forwarded message:?"
|
|
1713
|
+
FRENCH_REPLY_PATTERN = r"Le .* a ecrit:"
|
|
1714
|
+
GERMAN_REPLY_PATTERN = r"Am \d\d\.\d\d\..*schrieb.*"
|
|
1715
|
+
NORWEGAIN_REPLY_PATTERN = r"(Den .* folgende|(fre|lor|son)\. .* skrev .*):"
|
|
1691
1716
|
REPLY_LINE_IN_A_MSG_PATTERN = r"In a message dated \d+/\d+/\d+.*writes:"
|
|
1692
1717
|
REPLY_LINE_ENDING_PATTERN = r"[_ \n](AM|PM|[<_]|wrote:?)"
|
|
1693
1718
|
REPLY_LINE_ON_NUMERIC_DATE_PATTERN = fr"On \d+/\d+/\d+[, ].*{REPLY_LINE_ENDING_PATTERN}"
|
|
1694
1719
|
REPLY_LINE_ON_DATE_PATTERN = fr"^On (\d+ )?((Mon|Tues?|Wed(nes)?|Thu(rs)?|Fri|Sat(ur)?|Sun)(day)?|(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*)[, ].*{REPLY_LINE_ENDING_PATTERN}"
|
|
1695
|
-
REPLY_LINE_PATTERN = rf"({REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
|
|
1720
|
+
REPLY_LINE_PATTERN = rf"({FRENCH_REPLY_PATTERN}|{GERMAN_REPLY_PATTERN}|{NORWEGAIN_REPLY_PATTERN}|{REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
|
|
1696
1721
|
REPLY_REGEX = re.compile(REPLY_LINE_PATTERN, re.IGNORECASE | re.MULTILINE)
|
|
1697
|
-
SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?((Envoyé de mon|Sent (from|via)).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)', re.M | re.I)
|
|
1722
|
+
SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?((Envoyé de mon|Sent (from|via)).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)|Co-authored with iPhone auto-correct', re.M | re.I)
|
|
1698
1723
|
|
|
1699
1724
|
|
|
1700
1725
|
# No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
|
|
@@ -1723,6 +1748,7 @@ UNINTERESTING_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + [
|
|
|
1723
1748
|
'Oliver Goodenough', # Robert Trivers CC
|
|
1724
1749
|
'Peter Aldhous', # Lawrence Krauss CC
|
|
1725
1750
|
'Players2', # Hoffenberg CC
|
|
1751
|
+
'Police Code Enforcement', # Kirk Blouin / John Page CC
|
|
1726
1752
|
'Sam Harris', # Lawrence Krauss CC
|
|
1727
1753
|
SAMUEL_LEFF, # Random CC
|
|
1728
1754
|
'Sean T Lehane', # Random CC
|
epstein_files/util/data.py
CHANGED
|
@@ -22,6 +22,7 @@ ALL_NAMES = [v for k, v in vars(names).items() if isinstance(v, str) and CONSTAN
|
|
|
22
22
|
PACIFIC_TZ = tz.gettz("America/Los_Angeles")
|
|
23
23
|
TIMEZONE_INFO = {"PDT": PACIFIC_TZ, "PST": PACIFIC_TZ} # Suppresses annoying warnings from parse() calls
|
|
24
24
|
|
|
25
|
+
all_elements_same = lambda _list: len(_list) == 0 or all(x == _list[0] for x in _list)
|
|
25
26
|
collapse_newlines = lambda text: MULTINEWLINE_REGEX.sub('\n\n', text)
|
|
26
27
|
date_str = lambda dt: dt.isoformat()[0:10] if dt else None
|
|
27
28
|
escape_double_quotes = lambda text: text.replace('"', r'\"')
|
|
@@ -29,7 +30,7 @@ escape_single_quotes = lambda text: text.replace("'", r"\'")
|
|
|
29
30
|
iso_timestamp = lambda dt: dt.isoformat().replace('T', ' ')
|
|
30
31
|
days_between = lambda dt1, dt2: (dt2 - dt1).days + 1
|
|
31
32
|
days_between_str = lambda dt1, dt2: f"{days_between(dt1, dt2)} day" + ('s' if days_between(dt1, dt2) > 1 else '')
|
|
32
|
-
|
|
33
|
+
remove_zero_time = lambda dt: dt.isoformat().removesuffix('T00:00:00')
|
|
33
34
|
uniquify = lambda _list: list(set(_list))
|
|
34
35
|
without_falsey = lambda _list: [e for e in _list if e]
|
|
35
36
|
|
epstein_files/util/doc_cfg.py
CHANGED
|
@@ -8,7 +8,7 @@ from dateutil.parser import parse
|
|
|
8
8
|
|
|
9
9
|
from epstein_files.util.constant.names import *
|
|
10
10
|
from epstein_files.util.constant.strings import *
|
|
11
|
-
from epstein_files.util.data import
|
|
11
|
+
from epstein_files.util.data import remove_zero_time, without_falsey
|
|
12
12
|
|
|
13
13
|
DuplicateType = Literal['earlier', 'quoted', 'redacted', 'same']
|
|
14
14
|
Metadata = dict[str, bool | datetime | int | str | list[str | None] |dict[str, bool | str]]
|
|
@@ -81,6 +81,7 @@ class DocCfg:
|
|
|
81
81
|
dupe_type: DuplicateType | None = None
|
|
82
82
|
duplicate_ids: list[str] = field(default_factory=list)
|
|
83
83
|
duplicate_of_id: str | None = None
|
|
84
|
+
is_attribution_uncertain: bool = False
|
|
84
85
|
is_interesting: bool | None = None
|
|
85
86
|
is_synthetic: bool = False
|
|
86
87
|
timestamp: datetime | None = None
|
|
@@ -162,7 +163,7 @@ class DocCfg:
|
|
|
162
163
|
elif _field.name == 'timestamp' and self.date is not None:
|
|
163
164
|
continue # Don't print both timestamp and date
|
|
164
165
|
elif isinstance(value, datetime):
|
|
165
|
-
value_str =
|
|
166
|
+
value_str = remove_zero_time(value)
|
|
166
167
|
add_prop(_field, f"parse('{value_str}')" if CONSTANTIZE_NAMES else f"'{value}'")
|
|
167
168
|
elif isinstance(value, str):
|
|
168
169
|
if "'" in value:
|
|
@@ -206,7 +207,6 @@ class CommunicationCfg(DocCfg):
|
|
|
206
207
|
is_attribution_uncertain (bool): True if we have a good idea of who the author is but are not 100% certain
|
|
207
208
|
"""
|
|
208
209
|
attribution_reason: str | None = None
|
|
209
|
-
is_attribution_uncertain: bool = False
|
|
210
210
|
|
|
211
211
|
def __repr__(self) -> str:
|
|
212
212
|
return super().__repr__()
|
epstein_files/util/env.py
CHANGED
|
@@ -38,7 +38,7 @@ output.add_argument('--all-emails', '-ae', action='store_true', help='all the em
|
|
|
38
38
|
output.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just the interesting ones')
|
|
39
39
|
parser.add_argument('--build', '-b', nargs="?", default=None, const=DEFAULT_FILE, help='write output to HTML file')
|
|
40
40
|
output.add_argument('--email-timeline', action='store_true', help='print a table of all emails in chronological order')
|
|
41
|
-
output.add_argument('--emailers-info-
|
|
41
|
+
output.add_argument('--emailers-info', '-ei', action='store_true', help='write a .png of the eeailers info table')
|
|
42
42
|
output.add_argument('--json-files', action='store_true', help='pretty print all the raw JSON data files in the collection and exit')
|
|
43
43
|
output.add_argument('--json-metadata', action='store_true', help='dump JSON metadata for all files and exit')
|
|
44
44
|
output.add_argument('--output-emails', '-oe', action='store_true', help='generate emails section')
|
|
@@ -61,31 +61,34 @@ debug.add_argument('--deep-debug', '-dd', action='store_true', help='set debug l
|
|
|
61
61
|
debug.add_argument('--json-stats', '-j', action='store_true', help='print JSON formatted stats about the files')
|
|
62
62
|
debug.add_argument('--skip-other-files', '-sof', action='store_true', help='skip parsing non email/text files')
|
|
63
63
|
debug.add_argument('--suppress-logs', '-sl', action='store_true', help='set debug level to FATAL')
|
|
64
|
+
debug.add_argument('--truncate', '-t', type=int, help='truncate emails to this many characters')
|
|
65
|
+
debug.add_argument('--write-txt', '-wt', action='store_true', help='write a plain text version of output')
|
|
64
66
|
|
|
65
67
|
|
|
66
68
|
# Parse args
|
|
67
69
|
args = parser.parse_args()
|
|
68
70
|
is_html_script = parser.prog in HTML_SCRIPTS
|
|
69
71
|
|
|
70
|
-
args.build = args.build or args.emailers_info_png
|
|
71
72
|
args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
|
|
72
|
-
args.names = [None if n == 'None' else n for n in (args.names or [])]
|
|
73
|
+
args.names = [None if n == 'None' else n.strip() for n in (args.names or [])]
|
|
73
74
|
args.output_emails = args.output_emails or args.all_emails
|
|
74
75
|
args.output_other = args.output_other or args.all_other_files or args.uninteresting
|
|
75
76
|
args.overwrite_pickle = args.overwrite_pickle or (is_env_var_set('OVERWRITE_PICKLE') and not is_env_var_set('PICKLED'))
|
|
76
77
|
args.width = args.width if is_html_script else None
|
|
78
|
+
args.any_output_selected = any([is_output_arg(arg) and val for arg, val in vars(args).items()])
|
|
79
|
+
|
|
80
|
+
if not (args.any_output_selected or args.email_timeline or args.emailers_info):
|
|
81
|
+
logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
|
|
82
|
+
args.output_emails = args.output_other = args.output_texts = True
|
|
77
83
|
|
|
78
84
|
if is_html_script:
|
|
79
85
|
if args.positional_args:
|
|
80
86
|
exit_with_error(f"{parser.prog} does not accept positional arguments (receeived {args.positional_args})")
|
|
81
87
|
|
|
82
88
|
if parser.prog == EPSTEIN_GENERATE:
|
|
83
|
-
if
|
|
89
|
+
if args.any_output_selected:
|
|
84
90
|
if args.email_timeline:
|
|
85
91
|
exit_with_error(f"--email-timeline option is mutually exlusive with other output options")
|
|
86
|
-
elif not args.email_timeline:
|
|
87
|
-
logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
|
|
88
|
-
args.output_texts = args.output_emails = args.output_other = True
|
|
89
92
|
|
|
90
93
|
if args.build == DEFAULT_FILE:
|
|
91
94
|
if args.all_emails:
|