epstein-files 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +31 -18
- epstein_files/documents/communication.py +9 -5
- epstein_files/documents/document.py +225 -136
- epstein_files/documents/doj_file.py +242 -0
- epstein_files/documents/doj_files/full_text.py +166 -0
- epstein_files/documents/email.py +138 -163
- epstein_files/documents/emails/email_header.py +21 -11
- epstein_files/documents/emails/emailers.py +223 -0
- epstein_files/documents/imessage/text_message.py +2 -3
- epstein_files/documents/json_file.py +18 -14
- epstein_files/documents/messenger_log.py +23 -39
- epstein_files/documents/other_file.py +48 -44
- epstein_files/epstein_files.py +54 -33
- epstein_files/person.py +142 -110
- epstein_files/util/constant/names.py +29 -6
- epstein_files/util/constant/output_files.py +2 -0
- epstein_files/util/constant/strings.py +12 -6
- epstein_files/util/constant/urls.py +17 -0
- epstein_files/util/constants.py +101 -174
- epstein_files/util/data.py +2 -0
- epstein_files/util/doc_cfg.py +20 -15
- epstein_files/util/env.py +24 -16
- epstein_files/util/file_helper.py +28 -6
- epstein_files/util/helpers/debugging_helper.py +13 -0
- epstein_files/util/helpers/env_helpers.py +21 -0
- epstein_files/util/highlighted_group.py +57 -16
- epstein_files/util/layout/left_bar_panel.py +26 -0
- epstein_files/util/logging.py +28 -13
- epstein_files/util/output.py +33 -10
- epstein_files/util/rich.py +28 -2
- epstein_files/util/word_count.py +7 -7
- {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/METADATA +14 -1
- epstein_files-1.5.0.dist-info/RECORD +40 -0
- epstein_files-1.4.1.dist-info/RECORD +0 -34
- {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/LICENSE +0 -0
- {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/WHEEL +0 -0
- {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/entry_points.txt +0 -0
|
@@ -177,6 +177,16 @@ ZUBAIR_KHAN = 'Zubair Khan'
|
|
|
177
177
|
|
|
178
178
|
UNKNOWN = '(unknown)'
|
|
179
179
|
|
|
180
|
+
# DOJ files emails
|
|
181
|
+
ALISON_J_NATHAN = 'Alison J. Nathan'
|
|
182
|
+
AMIR_TAAKI = 'Amir Taaki'
|
|
183
|
+
BROCK_PIERCE = 'Brock Pierce'
|
|
184
|
+
CHRISTIAN_EVERDELL = 'Christian Everdell'
|
|
185
|
+
CHRISTOPHER_DILORIO = 'Christopher Dilorio'
|
|
186
|
+
DOUGLAS_WIGDOR = 'Douglas Wigdor'
|
|
187
|
+
KARYNA_SHULIAK = 'Karyna Shuliak'
|
|
188
|
+
STACEY_RICHMAN = 'Stacey Richman'
|
|
189
|
+
|
|
180
190
|
# No communications but name is in the files
|
|
181
191
|
BILL_GATES = 'Bill Gates'
|
|
182
192
|
DONALD_TRUMP = 'Donald Trump'
|
|
@@ -216,10 +226,10 @@ UBS = 'UBS'
|
|
|
216
226
|
|
|
217
227
|
# First and last names that should be made part of a highlighting regex for emailers
|
|
218
228
|
NAMES_TO_NOT_HIGHLIGHT = """
|
|
219
|
-
al alain alan alfredo allen alex alexander amanda andres andrew anthony
|
|
229
|
+
al alain alan alison alfredo allen alex alexander amanda andres andrew anthony
|
|
220
230
|
bard barrett barry bennet bernard bill black bob boris brad brenner bruce
|
|
221
|
-
cameron caroline carolyn chris christina cohen
|
|
222
|
-
dan daniel danny darren dave david debbie donald
|
|
231
|
+
cameron caroline carolyn chris christian christina cohen
|
|
232
|
+
dan daniel danny darren dave david debbie donald douglas
|
|
223
233
|
ed edward edwards enforcement enterprise enterprises entourage epstein eric erika etienne
|
|
224
234
|
faith fisher forget fred friendly frost fuller
|
|
225
235
|
gates gerald george gold gordon
|
|
@@ -229,11 +239,11 @@ NAMES_TO_NOT_HIGHLIGHT = """
|
|
|
229
239
|
kafka kahn karl kate katherine kelly ken kevin krassner
|
|
230
240
|
larry larsen laurie lawrence leon lesley linda link lisa
|
|
231
241
|
mann marc marie mark martin matthew melanie michael mike miller mitchell miles morris moskowitz
|
|
232
|
-
nancy neal new nicole norman
|
|
242
|
+
nancy nathan neal new nicole norman
|
|
233
243
|
owen
|
|
234
|
-
paul paula pen peter philip prince
|
|
244
|
+
paul paula pen peter philip pierce prince
|
|
235
245
|
randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubenstein rubin
|
|
236
|
-
scott sean skip smith stanley stern stephen steve steven stone susan
|
|
246
|
+
scott sean skip smith stacey stanley stern stephen steve steven stone susan
|
|
237
247
|
terry the thomas tim tom tony tyler
|
|
238
248
|
victor
|
|
239
249
|
wade waters
|
|
@@ -304,7 +314,20 @@ def extract_last_name(name: str) -> str:
|
|
|
304
314
|
return first_last_names[-1]
|
|
305
315
|
|
|
306
316
|
|
|
317
|
+
def reverse_first_and_last_names(name: str) -> str:
|
|
318
|
+
"""If there's a comma in the name in the style 'Lastname, Firstname', reverse it and remove comma."""
|
|
319
|
+
if '@' in name:
|
|
320
|
+
return name.lower()
|
|
321
|
+
|
|
322
|
+
if ', ' in name:
|
|
323
|
+
names = name.split(', ')
|
|
324
|
+
return f"{names[1]} {names[0]}"
|
|
325
|
+
else:
|
|
326
|
+
return name
|
|
327
|
+
|
|
328
|
+
|
|
307
329
|
def reversed_name(name: str) -> str:
|
|
330
|
+
"""'Jeffrey Epstein' becomes 'Epstein Jeffrey'."""
|
|
308
331
|
if ' ' not in name:
|
|
309
332
|
return name
|
|
310
333
|
|
|
@@ -13,6 +13,7 @@ JSON_METADATA_PATH = HTML_DIR.joinpath(f'file_metadata_{EPSTEIN_FILES_NOV_2025}.
|
|
|
13
13
|
TEXT_MSGS_HTML_PATH = HTML_DIR.joinpath('index.html')
|
|
14
14
|
WORD_COUNT_HTML_PATH = HTML_DIR.joinpath(f'communication_word_count_{EPSTEIN_FILES_NOV_2025}.html')
|
|
15
15
|
# EPSTEIN_WORD_COUNT_HTML_PATH = HTML_DIR.joinpath('epstein_texts_and_emails_word_count.html')
|
|
16
|
+
DOJ_2026_HTML_PATH = HTML_DIR.joinpath('doj_2026-01-30_files.html')
|
|
16
17
|
URLS_ENV = '.urls.env'
|
|
17
18
|
EMAILERS_TABLE_PNG_PATH = HTML_DIR.joinpath('emailers_info_table.png')
|
|
18
19
|
|
|
@@ -26,6 +27,7 @@ CHRONOLOGICAL_EMAILS_URL = f"{TEXT_MSGS_URL}/{CHRONOLOGICAL_EMAILS_PATH.name}"
|
|
|
26
27
|
JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
|
|
27
28
|
JSON_METADATA_URL = f"{TEXT_MSGS_URL}/{JSON_METADATA_PATH.name}"
|
|
28
29
|
WORD_COUNT_URL = f"{TEXT_MSGS_URL}/{WORD_COUNT_HTML_PATH.name}"
|
|
30
|
+
DOJ_2026_URL = f"{TEXT_MSGS_URL}/{DOJ_2026_HTML_PATH.name}"
|
|
29
31
|
|
|
30
32
|
SITE_URLS: dict[SiteType, str] = {
|
|
31
33
|
EMAIL: ALL_EMAILS_URL,
|
|
@@ -57,25 +57,31 @@ TIMESTAMP_DIM = f"turquoise4 dim"
|
|
|
57
57
|
# Misc
|
|
58
58
|
AUTHOR = 'author'
|
|
59
59
|
DEFAULT = 'default'
|
|
60
|
+
EFTA_PREFIX = 'EFTA'
|
|
60
61
|
HOUSE_OVERSIGHT_PREFIX = 'HOUSE_OVERSIGHT_'
|
|
61
62
|
JSON = 'json'
|
|
62
63
|
NA = 'n/a'
|
|
63
64
|
REDACTED = '<REDACTED>'
|
|
64
65
|
QUESTION_MARKS = '(???)'
|
|
65
66
|
|
|
66
|
-
# Regexes
|
|
67
|
-
ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
|
|
68
|
-
FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({ID_REGEX.pattern})")
|
|
69
|
-
FILE_NAME_REGEX = re.compile(fr"{FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
|
|
70
|
-
QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
|
|
71
|
-
|
|
72
67
|
# Document subclass names (this sucks)
|
|
73
68
|
DOCUMENT_CLASS = 'Document'
|
|
69
|
+
DOJ_FILE_CLASS = 'DojFile'
|
|
74
70
|
EMAIL_CLASS = 'Email'
|
|
75
71
|
JSON_FILE_CLASS = 'JsonFile'
|
|
76
72
|
MESSENGER_LOG_CLASS = 'MessengerLog'
|
|
77
73
|
OTHER_FILE_CLASS = 'OtherFile'
|
|
78
74
|
|
|
75
|
+
# Regexes
|
|
76
|
+
DOJ_FILE_STEM_REGEX = re.compile(fr"{EFTA_PREFIX}\d{{8}}")
|
|
77
|
+
DOJ_FILE_NAME_REGEX = re.compile(fr"{DOJ_FILE_STEM_REGEX.pattern}(\.txt)?")
|
|
78
|
+
|
|
79
|
+
HOUSE_OVERSIGHT_NOV_2025_ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
|
|
80
|
+
HOUSE_OVERSIGHT_NOV_2025_FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({HOUSE_OVERSIGHT_NOV_2025_ID_REGEX.pattern})")
|
|
81
|
+
HOUSE_OVERSIGHT_NOV_2025_FILE_NAME_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_NOV_2025_FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
|
|
82
|
+
|
|
83
|
+
QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
|
|
84
|
+
|
|
79
85
|
|
|
80
86
|
remove_question_marks = lambda name: QUESTION_MARKS_REGEX.sub('', name).strip()
|
|
81
87
|
|
|
@@ -12,6 +12,7 @@ from epstein_files.util.file_helper import coerce_file_stem
|
|
|
12
12
|
|
|
13
13
|
# Style stuff
|
|
14
14
|
ARCHIVE_LINK_COLOR = 'slate_blue3'
|
|
15
|
+
ARCHIVE_ALT_LINK_STYLE = 'medium_purple4 italic'
|
|
15
16
|
TEXT_LINK = 'text_link'
|
|
16
17
|
|
|
17
18
|
# External site names
|
|
@@ -39,6 +40,9 @@ EPSTEIN_DOCS_URL = 'https://epstein-docs.github.io'
|
|
|
39
40
|
OVERSIGHT_REPUBLICANS_PRESSER_URL = 'https://oversight.house.gov/release/oversight-committee-releases-additional-epstein-estate-documents/'
|
|
40
41
|
RAW_OVERSIGHT_DOCS_GOOGLE_DRIVE_URL = 'https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_'
|
|
41
42
|
SUBSTACK_URL = 'https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great'
|
|
43
|
+
# DOJ docs
|
|
44
|
+
DOJ_2026_URL = 'https://www.justice.gov/epstein/doj-disclosures'
|
|
45
|
+
DOJ_SEARCH_URL = 'https://www.justice.gov/epstein/search'
|
|
42
46
|
|
|
43
47
|
# Document source sites
|
|
44
48
|
EPSTEINIFY_URL = 'https://epsteinify.com'
|
|
@@ -53,6 +57,9 @@ DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
|
|
|
53
57
|
ROLLCALL: f'https://rollcall.com/factbase/epstein/file?id=',
|
|
54
58
|
}
|
|
55
59
|
|
|
60
|
+
# Example: https://www.justice.gov/epstein/files/DataSet%208/EFTA00009802.pdf
|
|
61
|
+
DOJ_2026_FILE_BASE_URL = "https://www.justice.gov/epstein/files/DataSet%20"
|
|
62
|
+
|
|
56
63
|
|
|
57
64
|
epsteinify_api_url = lambda file_stem: f"{EPSTEINIFY_URL}/api/documents/{file_stem}"
|
|
58
65
|
epsteinify_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEINIFY, filename_or_id, style)
|
|
@@ -90,6 +97,16 @@ def build_doc_url(base_url: str, filename_or_id: int | str, case: Literal['lower
|
|
|
90
97
|
return f"{base_url}{file_stem}"
|
|
91
98
|
|
|
92
99
|
|
|
100
|
+
def doj_2026_file_url(dataset_id: int, file_stem: str) -> str:
|
|
101
|
+
"""Link to justice.gov for a DOJ file."""
|
|
102
|
+
return f"{DOJ_2026_FILE_BASE_URL}{dataset_id}/{file_stem}.pdf"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def jmail_doj_2026_file_url(dataset_id: int, file_stem: str) -> str:
|
|
106
|
+
"""Link to Jmail backup of DOJ file."""
|
|
107
|
+
return f"{JMAIL_URL}/drive/vol{dataset_id:05}-{file_stem.lower()}-pdf"
|
|
108
|
+
|
|
109
|
+
|
|
93
110
|
def external_doc_link_markup(site: ExternalSite, filename_or_id: int | str, style: str = TEXT_LINK) -> str:
|
|
94
111
|
url = build_doc_url(DOC_LINK_BASE_URLS[site], filename_or_id)
|
|
95
112
|
return link_markup(url, coerce_file_stem(filename_or_id), style)
|
epstein_files/util/constants.py
CHANGED
|
@@ -4,6 +4,7 @@ from typing import cast
|
|
|
4
4
|
|
|
5
5
|
from dateutil.parser import parse
|
|
6
6
|
|
|
7
|
+
from epstein_files.documents.doj_files.full_text import EFTA00009622_TEXT
|
|
7
8
|
from epstein_files.util.constant.names import *
|
|
8
9
|
from epstein_files.util.constant.strings import *
|
|
9
10
|
from epstein_files.util.doc_cfg import DocCfg, EmailCfg, TextCfg
|
|
@@ -56,172 +57,6 @@ HEADER_ABBREVIATIONS = {
|
|
|
56
57
|
# Emailers Config Stuff #
|
|
57
58
|
#########################
|
|
58
59
|
|
|
59
|
-
# Emailers
|
|
60
|
-
EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
61
|
-
ALAN_DERSHOWITZ: re.compile(r'(alan.{1,7})?dershowi(lz?|t?z)|AlanDersh', re.IGNORECASE),
|
|
62
|
-
ALIREZA_ITTIHADIEH: re.compile(r'Alireza.[Il]ttihadieh', re.IGNORECASE),
|
|
63
|
-
AMANDA_ENS: re.compile(r'ens, amanda?|Amanda.Ens', re.IGNORECASE),
|
|
64
|
-
ANAS_ALRASHEED: re.compile(r'anas\s*al\s*rashee[cd]', re.IGNORECASE),
|
|
65
|
-
ANIL_AMBANI: re.compile(r'Anil.Ambani', re.IGNORECASE),
|
|
66
|
-
ANN_MARIE_VILLAFANA: re.compile(r'Villafana, Ann Marie|(A(\.|nn) Marie )?Villafa(c|n|ri)a', re.IGNORECASE),
|
|
67
|
-
ANTHONY_SCARAMUCCI: re.compile(r"mooch|(Anthony ('The Mooch' )?)?Scaramucci", re.IGNORECASE),
|
|
68
|
-
ARIANE_DE_ROTHSCHILD: re.compile(r'AdeR|((Ariane|Edmond) (de )?)?Rothsh?ch?ild|Ariane(?!\s+Dwyer)', re.IGNORECASE),
|
|
69
|
-
BARBRO_C_EHNBOM: re.compile(r'behnbom@aol.com|(Barbro\s.*)?Ehnbom', re.IGNORECASE),
|
|
70
|
-
BARRY_J_COHEN: re.compile(r'barry\s*((j.?|james)\s*)?cohen?', re.IGNORECASE),
|
|
71
|
-
BENNET_MOSKOWITZ: re.compile(r'Moskowitz.*Bennet|Bennet.*Moskowitz', re.IGNORECASE),
|
|
72
|
-
BOB_CROWE: re.compile(r"[BR]ob Crowe", re.IGNORECASE),
|
|
73
|
-
BORIS_NIKOLIC: re.compile(r'(boris )?nikolic?', re.IGNORECASE),
|
|
74
|
-
BRAD_EDWARDS: re.compile(r'Brad(ley)?(\s*J(.?|ames))?\s*Edwards', re.IGNORECASE),
|
|
75
|
-
BRAD_KARP: re.compile(r'Brad (S.? )?Karp|Karp, Brad', re.IGNORECASE),
|
|
76
|
-
DANGENE_AND_JENNIE_ENTERPRISE: re.compile(r'Dangene and Jennie Enterprise?', re.IGNORECASE),
|
|
77
|
-
DANNY_FROST: re.compile(r'Frost, Danny|frostd@dany.nyc.gov|Danny\s*Frost', re.IGNORECASE),
|
|
78
|
-
DARREN_INDYKE: re.compile(r'darren$|Darren\s*(K\.?\s*)?[il]n[dq]_?yke?|dkiesq', re.IGNORECASE),
|
|
79
|
-
DAVID_FISZEL: re.compile(r'David\s*Fis?zel', re.IGNORECASE),
|
|
80
|
-
DAVID_HAIG: re.compile(fr'{DAVID_HAIG}|Haig, David', re.IGNORECASE),
|
|
81
|
-
DAVID_STERN: re.compile(r'David Stern?', re.IGNORECASE),
|
|
82
|
-
EDUARDO_ROBLES: re.compile(r'Ed(uardo)?\s*Robles', re.IGNORECASE),
|
|
83
|
-
EDWARD_JAY_EPSTEIN: re.compile(r'(?<!Jeffrey )Edward (Jay )?Epstein', re.IGNORECASE),
|
|
84
|
-
EHUD_BARAK: re.compile(r'(ehud|e?h)\s*barak|\behud', re.IGNORECASE),
|
|
85
|
-
FAITH_KATES: re.compile(r'faith kates?', re.IGNORECASE),
|
|
86
|
-
GERALD_BARTON: re.compile(r'Gerald.*Barton', re.IGNORECASE),
|
|
87
|
-
GERALD_LEFCOURT: re.compile(r'Gerald\s*(B\.?\s*)?Lefcourt', re.IGNORECASE),
|
|
88
|
-
GHISLAINE_MAXWELL: re.compile(r'g ?max(well)?|Ghislaine|Maxwell', re.IGNORECASE),
|
|
89
|
-
HEATHER_MANN: re.compile(r'Heather Mann?', re.IGNORECASE),
|
|
90
|
-
INTELLIGENCE_SQUARED: re.compile(r'intelligence\s*squared', re.IGNORECASE),
|
|
91
|
-
JACKIE_PERCZEK: re.compile(r'jackie percze[kl]?', re.IGNORECASE),
|
|
92
|
-
JABOR_Y: re.compile(r'[ji]abor\s*y?', re.IGNORECASE),
|
|
93
|
-
JAMES_HILL: re.compile(r"hill, james e.|james.e.hill@abc.com", re.IGNORECASE),
|
|
94
|
-
JANUSZ_BANASIAK: re.compile(r"Janu[is]z Banasiak", re.IGNORECASE),
|
|
95
|
-
JEAN_HUGUEN: re.compile(r"Jean[\s.]Huguen", re.IGNORECASE),
|
|
96
|
-
JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?|JeanLuc', re.IGNORECASE),
|
|
97
|
-
JEFF_FULLER: re.compile(r"jeff@mc2mm.com|Jeff Fuller", re.IGNORECASE),
|
|
98
|
-
JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeff(rey)? (Edward )?E((sp|ps)tein?)?( VI Foundation)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!(Mark L.|ard Jay) )Epstein', re.IGNORECASE),
|
|
99
|
-
JESSICA_CADWELL: re.compile(r'Jessica Cadwell?', re.IGNORECASE),
|
|
100
|
-
JOHNNY_EL_HACHEM: re.compile(r'el hachem johnny|johnny el hachem', re.IGNORECASE),
|
|
101
|
-
JOI_ITO: re.compile(r'ji@media.mit.?edu|(joichi|joi)( Ito)?', re.IGNORECASE),
|
|
102
|
-
JONATHAN_FARKAS: re.compile(r'Jonathan Fark(a|u)(s|il)', re.IGNORECASE),
|
|
103
|
-
KATHRYN_RUEMMLER: re.compile(r'Kathr?yn? Ruemmler?', re.IGNORECASE),
|
|
104
|
-
KEN_STARR: re.compile(r'starr, ken|Ken(neth\s*(W.\s*)?)?\s+starr?|starr', re.IGNORECASE),
|
|
105
|
-
LANDON_THOMAS: re.compile(r'lando[nr] thomas( jr)?|thomas jr.?, lando[nr]', re.IGNORECASE),
|
|
106
|
-
LARRY_SUMMERS: re.compile(r'(La(wrence|rry).{1,5})?Summers?|^LH$|LHS|[Il]hsofficel?', re.IGNORECASE),
|
|
107
|
-
LAWRANCE_VISOSKI: re.compile(r'La(rry|wrance) Visoski?|Lvjet', re.IGNORECASE),
|
|
108
|
-
LAWRENCE_KRAUSS: re.compile(r'Lawrence Kraus[es]?|[jl]awkrauss|kruase', re.IGNORECASE),
|
|
109
|
-
LEON_BLACK: re.compile(r'Leon\s*Black?|(?<!Marc )Leon(?! (Botstein|Jaworski|Wieseltier))', re.IGNORECASE),
|
|
110
|
-
LILLY_SANCHEZ: re.compile(r'Lilly.*Sanchez', re.IGNORECASE),
|
|
111
|
-
LISA_NEW: re.compile(r'E?Lisa New?\b', re.IGNORECASE),
|
|
112
|
-
MANUELA_MARTINEZ: re.compile(fr'Manuela (- Mega Partners|Martinez)', re.IGNORECASE),
|
|
113
|
-
MARIANA_IDZKOWSKA: re.compile(r'Mariana [Il]d[źi]kowska?', re.IGNORECASE),
|
|
114
|
-
MARK_EPSTEIN: re.compile(r'Mark (L\. )?(Epstein|Lloyd)', re.IGNORECASE),
|
|
115
|
-
MARC_LEON: re.compile(r'Marc[.\s]+(Kensington|Leon)|Kensington2', re.IGNORECASE),
|
|
116
|
-
MARTIN_NOWAK: re.compile(r'(Martin.*?)?No[vw]ak|Nowak, Martin', re.IGNORECASE),
|
|
117
|
-
MARTIN_WEINBERG: re.compile(r'martin.*?weinberg', re.IGNORECASE),
|
|
118
|
-
"Matthew Schafer": re.compile(r"matthew\.?schafer?", re.IGNORECASE),
|
|
119
|
-
MELANIE_SPINELLA: re.compile(r'M?elanie Spine[Il]{2}a', re.IGNORECASE),
|
|
120
|
-
MICHAEL_BUCHHOLTZ: re.compile(r'Michael.*Buchholtz', re.IGNORECASE),
|
|
121
|
-
MICHAEL_MILLER: re.compile(r'Micha(el)? Miller|Miller, Micha(el)?', re.IGNORECASE),
|
|
122
|
-
MICHAEL_SITRICK: re.compile(r'(Mi(chael|ke).{0,5})?[CS]itrick', re.IGNORECASE),
|
|
123
|
-
MICHAEL_WOLFF: re.compile(r'Michael\s*Wol(f[ef]e?|i)|Wolff', re.IGNORECASE),
|
|
124
|
-
MIROSLAV_LAJCAK: re.compile(r"Miro(slav)?(\s+Laj[cč][aá]k)?"),
|
|
125
|
-
MOHAMED_WAHEED_HASSAN: re.compile(r'Mohamed Waheed(\s+Hassan)?', re.IGNORECASE),
|
|
126
|
-
NADIA_MARCINKO: re.compile(r"Na[dď]i?a\s+Marcinko(v[aá])?", re.IGNORECASE),
|
|
127
|
-
NEAL_KASSELL: re.compile(r'Neal\s*Kassell?', re.IGNORECASE),
|
|
128
|
-
NICHOLAS_RIBIS: re.compile(r'Nic(holas|k)[\s._]Ribi?s?|Ribbis', re.IGNORECASE),
|
|
129
|
-
OLIVIER_COLOM: re.compile(fr'Colom, Olivier|{OLIVIER_COLOM}', re.IGNORECASE),
|
|
130
|
-
PAUL_BARRETT: re.compile(r'Paul Barre(d|tt)', re.IGNORECASE),
|
|
131
|
-
PAUL_KRASSNER: re.compile(r'Pa\s?ul Krassner', re.IGNORECASE),
|
|
132
|
-
PAUL_MORRIS: re.compile(r'morris, paul|Paul Morris', re.IGNORECASE),
|
|
133
|
-
PAULA: re.compile(r'^Paula( Heil Fisher)?$', re.IGNORECASE),
|
|
134
|
-
PEGGY_SIEGAL: re.compile(r'Peggy Siegal?', re.IGNORECASE),
|
|
135
|
-
PETER_ATTIA: re.compile(r'Peter Attia?', re.IGNORECASE),
|
|
136
|
-
PETER_MANDELSON: re.compile(r"((Lord|Peter) )?Mandelson", re.IGNORECASE),
|
|
137
|
-
'pink@mc2mm.com': re.compile(r"^Pink$|pink@mc2mm\.com", re.IGNORECASE),
|
|
138
|
-
PRINCE_ANDREW: re.compile(r'Prince Andrew|The Duke', re.IGNORECASE),
|
|
139
|
-
REID_WEINGARTEN: re.compile(r'Weingarten, Rei[cdi]|Rei[cdi] Weingarten', re.IGNORECASE),
|
|
140
|
-
RICHARD_KAHN: re.compile(r'rich(ard)? kahn?', re.IGNORECASE),
|
|
141
|
-
ROBERT_D_CRITTON_JR: re.compile(r'Robert D.? Critton,? Jr.?', re.IGNORECASE),
|
|
142
|
-
ROBERT_LAWRENCE_KUHN: re.compile(r'Robert\s*(Lawrence)?\s*Kuhn', re.IGNORECASE),
|
|
143
|
-
ROBERT_TRIVERS: re.compile(r'tri[vy]ersr@gmail|Robert\s*Trivers?', re.IGNORECASE),
|
|
144
|
-
ROSS_GOW: re.compile(fr"Ross(acuity)? Gow|(ross@)?acuity\s*reputation(\.com)?", re.IGNORECASE),
|
|
145
|
-
SAMUEL_LEFF: re.compile(r"Sam(uel)?(/Walli)? Leff", re.IGNORECASE),
|
|
146
|
-
SCOTT_J_LINK: re.compile(r'scott j. link?', re.IGNORECASE),
|
|
147
|
-
SEAN_BANNON: re.compile(r'sean bannon?', re.IGNORECASE),
|
|
148
|
-
SHAHER_ABDULHAK_BESHER: re.compile(r'\bShaher( Abdulhak Besher)?\b', re.IGNORECASE),
|
|
149
|
-
SOON_YI_PREVIN: re.compile(r'Soon[- ]Yi Previn?', re.IGNORECASE),
|
|
150
|
-
STEPHEN_HANSON: re.compile(r'ste(phen|ve) hanson?|Shanson900', re.IGNORECASE),
|
|
151
|
-
STEVE_BANNON: re.compile(r'steve banno[nr]?', re.IGNORECASE),
|
|
152
|
-
STEVEN_SINOFSKY: re.compile(r'Steven Sinofsky?', re.IGNORECASE),
|
|
153
|
-
SULTAN_BIN_SULAYEM: re.compile(r'Sultan (Ahmed )?bin Sulaye?m?', re.IGNORECASE),
|
|
154
|
-
TERJE_ROD_LARSEN: re.compile(r"Terje(( (R[øo]e?d[- ])?)?Lars[eo]n)?", re.IGNORECASE),
|
|
155
|
-
TERRY_KAFKA: re.compile(r'Terry Kafka?', re.IGNORECASE),
|
|
156
|
-
THANU_BOONYAWATANA: re.compile(r"Thanu (BOONYAWATANA|Cnx)", re.IGNORECASE),
|
|
157
|
-
THORBJORN_JAGLAND: re.compile(r'(Thor.{3,8})?Jag[il]and?', re.IGNORECASE),
|
|
158
|
-
TONJA_HADDAD_COLEMAN: re.compile(r"To(nj|rl)a Haddad Coleman|haddadfm@aol.com", re.IGNORECASE),
|
|
159
|
-
VINCENZO_IOZZO: re.compile(r"Vincenzo [IL]ozzo", re.IGNORECASE),
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
# If found as substring consider them the author
|
|
163
|
-
EMAILERS = [
|
|
164
|
-
'Anne Boyles',
|
|
165
|
-
AL_SECKEL,
|
|
166
|
-
'Ariane Dwyer',
|
|
167
|
-
AZIZA_ALAHMADI,
|
|
168
|
-
BILL_GATES,
|
|
169
|
-
BILL_SIEGEL,
|
|
170
|
-
BRAD_WECHSLER,
|
|
171
|
-
CHRISTINA_GALBRAITH,
|
|
172
|
-
DANIEL_SABBA,
|
|
173
|
-
'Danny Goldberg',
|
|
174
|
-
DAVID_SCHOEN,
|
|
175
|
-
DEBBIE_FEIN,
|
|
176
|
-
DEEPAK_CHOPRA,
|
|
177
|
-
GLENN_DUBIN,
|
|
178
|
-
GORDON_GETTY,
|
|
179
|
-
'Kevin Bright',
|
|
180
|
-
'Jack Lang',
|
|
181
|
-
JACK_SCAROLA,
|
|
182
|
-
JAY_LEFKOWITZ,
|
|
183
|
-
JES_STALEY,
|
|
184
|
-
JOHN_PAGE,
|
|
185
|
-
'Jokeland',
|
|
186
|
-
JOSCHA_BACH,
|
|
187
|
-
'Kathleen Ruderman',
|
|
188
|
-
KENNETH_E_MAPP,
|
|
189
|
-
'Larry Cohen',
|
|
190
|
-
LESLEY_GROFF,
|
|
191
|
-
'lorraine@mc2mm.com',
|
|
192
|
-
LINDA_STONE,
|
|
193
|
-
'Lyn Fontanilla',
|
|
194
|
-
MARK_TRAMO,
|
|
195
|
-
MELANIE_WALKER,
|
|
196
|
-
MERWIN_DELA_CRUZ,
|
|
197
|
-
'Michael Simmons', # Not the only "To:"
|
|
198
|
-
'middle.east.update@hotmail.com',
|
|
199
|
-
'Nancy Cain',
|
|
200
|
-
'Nancy Dahl',
|
|
201
|
-
'Nancy Portland',
|
|
202
|
-
'Oliver Goodenough',
|
|
203
|
-
'Peter Aldhous',
|
|
204
|
-
'Peter Green',
|
|
205
|
-
ROGER_SCHANK,
|
|
206
|
-
'Roy Black',
|
|
207
|
-
STEVEN_PFEIFFER,
|
|
208
|
-
'Steven Victor MD',
|
|
209
|
-
'Susan Edelman',
|
|
210
|
-
TOM_BARRACK,
|
|
211
|
-
'Vahe Stepanian',
|
|
212
|
-
'Vladimir Yudashkin',
|
|
213
|
-
]
|
|
214
|
-
|
|
215
|
-
EMAILER_REGEXES = deepcopy(EMAILER_ID_REGEXES) # Keep a copy without the simple EMAILERS regexes
|
|
216
|
-
|
|
217
|
-
# Add simple matching regexes for EMAILERS entries to EMAILER_REGEXES
|
|
218
|
-
for emailer in EMAILERS:
|
|
219
|
-
if emailer in EMAILER_REGEXES:
|
|
220
|
-
raise RuntimeError(f"Can't overwrite emailer regex for '{emailer}'")
|
|
221
|
-
|
|
222
|
-
EMAILER_REGEXES[emailer] = re.compile(emailer, re.IGNORECASE)
|
|
223
|
-
|
|
224
|
-
|
|
225
60
|
# Atribution reasons
|
|
226
61
|
BOLOTOVA_REASON = 'Same signature style as 029020 ("--" followed by "Sincerely Renata Bolotova")'
|
|
227
62
|
KATHY_REASON = 'from "Kathy" about dems, sent from iPad'
|
|
@@ -459,7 +294,6 @@ EMAILS_CONFIG = [
|
|
|
459
294
|
EmailCfg(id='026287', author=DAVID_SCHOEN, attribution_reason='Signature'),
|
|
460
295
|
EmailCfg(id='033419', author=DAVID_SCHOEN, attribution_reason='Signature'),
|
|
461
296
|
EmailCfg(id='031460', author=EDWARD_JAY_EPSTEIN, attribution_reason='quoted reply has edwardjayepstein.com', is_fwded_article=True),
|
|
462
|
-
EmailCfg(id='031607', is_fwded_article=True, comment='Epstein reply to Edward Jay Epstein'),
|
|
463
297
|
EmailCfg(
|
|
464
298
|
id='030475',
|
|
465
299
|
author=FAITH_KATES,
|
|
@@ -553,7 +387,12 @@ EMAILS_CONFIG = [
|
|
|
553
387
|
EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd, Mark Green is in signature'),
|
|
554
388
|
EmailCfg(id='030472', author=MARTIN_WEINBERG, attribution_reason='Maybe. in reply', is_attribution_uncertain=True),
|
|
555
389
|
EmailCfg(id='032563', author=MASHA_DROKOVA, attribution_reason='replied to in 033014'),
|
|
556
|
-
EmailCfg(
|
|
390
|
+
EmailCfg(
|
|
391
|
+
id='032564',
|
|
392
|
+
attribution_reason='follow up to 032563 about huffpo article with link',
|
|
393
|
+
author=MASHA_DROKOVA,
|
|
394
|
+
description='an archived version of the HuffPost link is here: https://archive.is/hJxT3 '
|
|
395
|
+
),
|
|
557
396
|
EmailCfg(id='031544', author=MASHA_DROKOVA, attribution_reason='follow up to 032563 about huffpo article with link'),
|
|
558
397
|
EmailCfg(id='032605', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
|
|
559
398
|
EmailCfg(id='032606', author=MASHA_DROKOVA, attribution_reason="re: PR interview, 031544 says she'll be in NY at that time"),
|
|
@@ -657,7 +496,7 @@ EMAILS_CONFIG = [
|
|
|
657
496
|
EmailCfg(id='026431', recipients=[ARIANE_DE_ROTHSCHILD], attribution_reason='Reply'),
|
|
658
497
|
EmailCfg(id='032876', recipients=[CECILIA_STEEN], attribution_reason='unredacted in 032267'),
|
|
659
498
|
EmailCfg(id='026466', recipients=[DIANE_ZIMAN], attribution_reason='Quoted reply'),
|
|
660
|
-
EmailCfg(id='031607', recipients=[EDWARD_JAY_EPSTEIN], attribution_reason='quoted reply has edwardjayepstein.com'),
|
|
499
|
+
EmailCfg(id='031607', recipients=[EDWARD_JAY_EPSTEIN], is_fwded_article=True, attribution_reason='quoted reply has edwardjayepstein.com'),
|
|
661
500
|
EmailCfg(
|
|
662
501
|
id='030525',
|
|
663
502
|
recipients=[FAITH_KATES],
|
|
@@ -1091,6 +930,40 @@ EMAILS_CONFIG = [
|
|
|
1091
930
|
EmailCfg(id='027028', truncate_to=1000, comment='Tom Pritzer penny pritzker'),
|
|
1092
931
|
EmailCfg(id='029910', truncate_to=NO_TRUNCATE, comment='Tom Pritzer Aspen'),
|
|
1093
932
|
EmailCfg(id='025163', truncate_to=NO_TRUNCATE, comment='Tom Pritzer'),
|
|
933
|
+
|
|
934
|
+
# DOJ files
|
|
935
|
+
EmailCfg(id='EFTA00935996', recipients=[RENATA_BOLOTOVA], attribution_reason='"sneaky dog"'),
|
|
936
|
+
EmailCfg(id='EFTA02731737', date='2023-06-30T16:05:00'),
|
|
937
|
+
EmailCfg(id='EFTA02731689', author=UNKNOWN, recipients=[None], date='2023-06-09 20:14:00'),
|
|
938
|
+
EmailCfg(id='EFTA02731475', date='2023-05-31T20:53:00'),
|
|
939
|
+
EmailCfg(id='EFTA02731732', date='2024-03-06T12:21:00'),
|
|
940
|
+
EmailCfg(id='EFTA02731485', date='2023-06-12T13:53:00'),
|
|
941
|
+
EmailCfg(id='EFTA02731617', date='2021-04-28T15:05:41'),
|
|
942
|
+
EmailCfg(id='EFTA02730483', date='2023-07-11T08:25:00'), # TODO: actually reply timewtamp
|
|
943
|
+
EmailCfg(id='EFTA02730481', date='2023-07-07T11:01:00'), # TODO: actually reply timewtamp
|
|
944
|
+
EmailCfg(id='EFTA02731754', date='2024-03-06T23:24:00'), # TODO: actually reply timewtamp
|
|
945
|
+
EmailCfg(id='EFTA02731735', date='2024-03-04T05:04:00'), # TODO: actually reply timewtamp
|
|
946
|
+
EmailCfg(id='EFTA02731577', date='2024-10-16T00:00:00'), # TODO: actually reply timewtamp
|
|
947
|
+
EmailCfg(id='EFTA02730468', date='2019-07-11T08:25:00'), # TODO: This is just wrong
|
|
948
|
+
# Generated basd on OtheFile extract_timestamp()
|
|
949
|
+
EmailCfg(id='EFTA02731783', date='2022-01-21 17:28:00'),
|
|
950
|
+
EmailCfg(id='EFTA02731587', date='2022-01-21 17:28:00'),
|
|
951
|
+
EmailCfg(id='EFTA02731729', date='2021-08-17 00:00:00'),
|
|
952
|
+
EmailCfg(id='EFTA02731578', date='2021-05-28 10:00:00'),
|
|
953
|
+
EmailCfg(id='EFTA02730473', date='2013-04-24 16:32:00'),
|
|
954
|
+
EmailCfg(id='EFTA02731699', date='2021-05-27 10:19:00'),
|
|
955
|
+
EmailCfg(id='EFTA02731583', date='2022-01-21 17:28:00'),
|
|
956
|
+
EmailCfg(id='EFTA02731552', date='2021-05-26 16:12:00'),
|
|
957
|
+
EmailCfg(id='EFTA00039888', date='2019-05-14 16:49:00'),
|
|
958
|
+
EmailCfg(id='EFTA02731684', date='2021-05-11 15:27:00'),
|
|
959
|
+
EmailCfg(id='EFTA02731697', date='2021-06-07 17:33:00'),
|
|
960
|
+
EmailCfg(id='EFTA02731733', date='2021-05-17 17:29:00'),
|
|
961
|
+
EmailCfg(id='EFTA00040145', date='2021-11-09 17:24:30'),
|
|
962
|
+
# EmailCfg(id='EFTA02730468', date='2004-02-03 00:00:00'), # TODO: ???
|
|
963
|
+
EmailCfg(id='EFTA02731528', date='2021-05-06 09:39:15'),
|
|
964
|
+
EmailCfg(id='EFTA02730485', date='2021-12-03 00:00:00'),
|
|
965
|
+
EmailCfg(id='EFTA00039689', truncate_to=NO_TRUNCATE),
|
|
966
|
+
EmailCfg(id='EFTA00995559', author=RENATA_BOLOTOVA, attribution_reason='poorly redacted signature'),
|
|
1094
967
|
]
|
|
1095
968
|
|
|
1096
969
|
if args.constantize:
|
|
@@ -1163,7 +1036,7 @@ ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
|
|
|
1163
1036
|
OTHER_FILES_BOOKS = [
|
|
1164
1037
|
DocCfg(id='017088', author=ALAN_DERSHOWITZ, description=f'"Taking the Stand: My Life in the Law" (draft)'),
|
|
1165
1038
|
DocCfg(id='013501', author='Arnold J. Mandell', description=f'The Nearness Of Grace: A Personal Science Of Spiritual Transformation', date='2005-01-01'),
|
|
1166
|
-
DocCfg(id='012899', author='Ben Goertzel', description=f'Engineering General Intelligence: A Path to Advanced AGI Via Embodied Learning and Cognitive Synergy'),
|
|
1039
|
+
DocCfg(id='012899', author='Ben Goertzel', description=f'Engineering General Intelligence: A Path to Advanced AGI Via Embodied Learning and Cognitive Synergy', date='2013-09-19'),
|
|
1167
1040
|
DocCfg(id='018438', author='Clarisse Thorn', description=f'The S&M Feminist'),
|
|
1168
1041
|
DocCfg(id='019477', author=EDWARD_JAY_EPSTEIN, description=f'How America Lost Its Secrets: Edward Snowden, the Man, and the Theft'),
|
|
1169
1042
|
DocCfg(id='020153', author=EDWARD_JAY_EPSTEIN, description=f'The Snowden Affair: A Spy Story In Six Parts'),
|
|
@@ -1475,6 +1348,10 @@ OTHER_FILES_LEGAL = [
|
|
|
1475
1348
|
DocCfg(id='028540', author='SCOTUS', description=f"decision in Budha Ismail Jam et al. v. INTERNATIONAL FINANCE CORP"),
|
|
1476
1349
|
DocCfg(id='012197', author='SDFL', description=f"response to {JAY_LEFKOWITZ} on Epstein Plea Agreement Compliance"),
|
|
1477
1350
|
DocCfg(id='022277', description=f"text of National Labour Relations Board (NLRB) law", is_interesting=False),
|
|
1351
|
+
|
|
1352
|
+
# DOJ files
|
|
1353
|
+
DocCfg(id='EFTA00007157', description='victim list and police log'),
|
|
1354
|
+
DocCfg(id='EFTA02730274', description='evidence inventory that appears to have since been deleted from the DOJ website'),
|
|
1478
1355
|
]
|
|
1479
1356
|
|
|
1480
1357
|
OTHER_FILES_CONFERENCES = [
|
|
@@ -1585,7 +1462,12 @@ OTHER_FILES_FINANCE = [
|
|
|
1585
1462
|
DocCfg(id='024132', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2012-03-15'),
|
|
1586
1463
|
DocCfg(id='024194', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2012-10-22'),
|
|
1587
1464
|
DocCfg(id='025296', author='Laffer Associates', description=f'report predicting Trump win', date='2016-07-06'),
|
|
1588
|
-
DocCfg(
|
|
1465
|
+
DocCfg(
|
|
1466
|
+
id='020824',
|
|
1467
|
+
author='Mary Meeker',
|
|
1468
|
+
date='2011-02-01',
|
|
1469
|
+
description=f"USA Inc: A Basic Summary of America's Financial Statements compiled",
|
|
1470
|
+
),
|
|
1589
1471
|
DocCfg(id='025551', author='Morgan Stanley', description=f'report about alternative asset managers', date='2018-01-30'),
|
|
1590
1472
|
DocCfg(id='019856', author='Sadis Goldberg LLP', description=f"report on SCOTUS ruling about insider trading", is_interesting=True),
|
|
1591
1473
|
DocCfg(id='025763', author='S&P', description=f"Economic Research: How Increasing Income Inequality Is Dampening U.S. Growth", date='2014-08-05'),
|
|
@@ -1594,12 +1476,20 @@ OTHER_FILES_FINANCE = [
|
|
|
1594
1476
|
DocCfg(id='026584', description=f"article about tax implications of disregarded entities", date='2009-07-01', is_interesting=True),
|
|
1595
1477
|
DocCfg(
|
|
1596
1478
|
id='024271',
|
|
1597
|
-
description=f"Blockchain Capital and Brock Pierce pitch deck",
|
|
1598
1479
|
date='2015-10-01',
|
|
1480
|
+
description=f"Blockchain Capital and Brock Pierce pitch deck",
|
|
1599
1481
|
is_interesting=True,
|
|
1600
1482
|
),
|
|
1601
|
-
DocCfg(
|
|
1602
|
-
|
|
1483
|
+
DocCfg(
|
|
1484
|
+
id='024817',
|
|
1485
|
+
date='2019-02-25',
|
|
1486
|
+
description=f"Cowen's Collective View of CBD / Cannabis report",
|
|
1487
|
+
is_interesting=True
|
|
1488
|
+
),
|
|
1489
|
+
DocCfg(
|
|
1490
|
+
id='012048',
|
|
1491
|
+
description=f"{PRESS_RELEASE} 'Rockefeller Partners with Gregory J. Fleming to Create Independent Financial Services Firm' and other articles"
|
|
1492
|
+
),
|
|
1603
1493
|
|
|
1604
1494
|
# private placement memoranda
|
|
1605
1495
|
DocCfg(
|
|
@@ -1668,6 +1558,11 @@ OTHER_FILES_PROPERTY = [
|
|
|
1668
1558
|
description=f"{VIRGIN_ISLANDS} property deal pitch deck, building will be leased to the U.S. govt GSA",
|
|
1669
1559
|
date='2014-06-01',
|
|
1670
1560
|
),
|
|
1561
|
+
|
|
1562
|
+
# DOJ files
|
|
1563
|
+
DocCfg(id='EFTA00001884', date='2019-03-14', description='photo of letter from Virgin Islands DOJ to Epstein'),
|
|
1564
|
+
DocCfg(id='EFTA00005783', date='2019-08-29', description='heavily redacted handwritten note and 30+ completely blacked out redacted pages'),
|
|
1565
|
+
|
|
1671
1566
|
]
|
|
1672
1567
|
|
|
1673
1568
|
OTHER_FILES_REPUTATION = [
|
|
@@ -1881,6 +1776,32 @@ OTHER_FILES_MISC = [
|
|
|
1881
1776
|
DocCfg(id='033434', description=f"{SCREENSHOT} iPhone chat labeled 'Edwards' at the top"),
|
|
1882
1777
|
DocCfg(id='029475', description=f'{VIRGIN_ISLANDS} Twin City Mobile Integrated Health Services (TCMIH) proposal/request for donation'),
|
|
1883
1778
|
DocCfg(id='029448', description=f"weird short essay titled 'President Obama and Self-Deception'"),
|
|
1779
|
+
|
|
1780
|
+
# DOJ files
|
|
1781
|
+
|
|
1782
|
+
DocCfg(id='EFTA00007781', description='paychecks signed by Epstein deposited at Colonial Bank'),
|
|
1783
|
+
DocCfg(id='EFTA00009622', description='handwritten note transcribed Claude AI', date='2006-07-19', replace_text_with=EFTA00009622_TEXT),
|
|
1784
|
+
DocCfg(id='EFTA00039295', replace_text_with='Bureau of Prisons inmate telephone privileges Program Statement'),
|
|
1785
|
+
DocCfg(
|
|
1786
|
+
id='EFTA00004477',
|
|
1787
|
+
replace_text_with='Epstein 50th birthday photo book 12 "THAIS, MOSCOW GIRLS, AFRICA, HAWAII, [REDACTED] [REDACTED], Zorro, [REDACTED] [REDACTED] [REDACTED], CRACK WHOLE PROPOSAL, BALI/THAILAND/ASIA, RUSSIA, [REDACTED], [REDACTED], NUDES, YOGAL GIRLS',
|
|
1788
|
+
),
|
|
1789
|
+
DocCfg(id='EFTA00008120', replace_text_with='"Part II: The Art of Receiving a Massage"'),
|
|
1790
|
+
DocCfg(id='EFTA00008020', replace_text_with='"Massage for Dummies"'),
|
|
1791
|
+
DocCfg(id='EFTA00008220', replace_text_with='"Massage book: Chapter 11: Putting the Moves Together"'),
|
|
1792
|
+
DocCfg(id='EFTA00008320', replace_text_with='"Massage for Dummies (???)"'),
|
|
1793
|
+
DocCfg(id='EFTA00000476', replace_text_with='photo of JEFFREY EPSTEIN CASH DISBURSEMENTS for the month 2006-09'),
|
|
1794
|
+
DocCfg(id='EFTA00039312', replace_text_with='Bureau of Prisons Program Statement / Memo about BOP Pharmacy Program'),
|
|
1795
|
+
# Phone bills TODO: Some kind of special handling?
|
|
1796
|
+
DocCfg(id='EFTA00006387', replace_text_with='T-Mobile phone bill covering 2006-06-15 to 2006-07-23'),
|
|
1797
|
+
DocCfg(id='EFTA00007501', replace_text_with='T-Mobile phone bill from 2005'),
|
|
1798
|
+
DocCfg(id='EFTA00006587', replace_text_with='T-Mobile phone bill from 2006-09-04 to 2016-10-15'),
|
|
1799
|
+
DocCfg(id='EFTA00006687', replace_text_with='T-Mobile phone bill from 2006-10-31 to 2006-12-25'),
|
|
1800
|
+
DocCfg(id='EFTA00007401', replace_text_with='T-Mobile phone bill from 2004-08-25 to 2005-07-13'),
|
|
1801
|
+
DocCfg(id='EFTA00007301', replace_text_with='T-Mobile response to subpoena March 23, 2007 - Blackberry phone logs for 2005'),
|
|
1802
|
+
DocCfg(id='EFTA00006487', replace_text_with='T-Mobile phone bill 2006-08-26'),
|
|
1803
|
+
DocCfg(id='EFTA00006100', replace_text_with='Palm Beach Police fax machine activity log 2005-12-28 to 2006-01-04'),
|
|
1804
|
+
DocCfg(id='EFTA00007253', replace_text_with='T-Mobile response to subpoena March 23, 2007 - phone bill '),
|
|
1884
1805
|
]
|
|
1885
1806
|
|
|
1886
1807
|
OTHER_FILES_JUNK = [
|
|
@@ -1894,6 +1815,12 @@ OTHER_FILES_JUNK = [
|
|
|
1894
1815
|
DocCfg(id='029351', description=OBAMA_JOKE, date='2013-07-26'),
|
|
1895
1816
|
DocCfg(id='029354', description=OBAMA_JOKE, date='2013-07-26'),
|
|
1896
1817
|
DocCfg(id='031293'),
|
|
1818
|
+
|
|
1819
|
+
# Completely redacted DOJ emails, no timestamp at all
|
|
1820
|
+
DocCfg(id='EFTA02731726'),
|
|
1821
|
+
DocCfg(id='EFTA02731728'),
|
|
1822
|
+
# Almost no timestamp
|
|
1823
|
+
DocCfg(id='EFTA00003154'),
|
|
1897
1824
|
]
|
|
1898
1825
|
|
|
1899
1826
|
OTHER_FILES_CATEGORIES = [
|
epstein_files/util/data.py
CHANGED
|
@@ -19,6 +19,8 @@ MULTINEWLINE_REGEX = re.compile(r"\n{2,}")
|
|
|
19
19
|
CONSTANT_VAR_REGEX = re.compile(r"^[A-Z_]+$")
|
|
20
20
|
ALL_NAMES = [v for k, v in vars(names).items() if isinstance(v, str) and CONSTANT_VAR_REGEX.match(k)]
|
|
21
21
|
|
|
22
|
+
AMERICAN_DATE_FORMAT = r"%m/%d/%y %I:%M:%S %p"
|
|
23
|
+
AMERICAN_TIME_REGEX = re.compile(r"(\d{1,2}/\d{1,2}/\d{2,4}\s+\d{1,2}:\d{2}(?::\d{2})?\s*(?:AM|PM)?)")
|
|
22
24
|
PACIFIC_TZ = tz.gettz("America/Los_Angeles")
|
|
23
25
|
TIMEZONE_INFO = {"PDT": PACIFIC_TZ, "PST": PACIFIC_TZ} # Suppresses annoying warnings from parse() calls
|
|
24
26
|
|
epstein_files/util/doc_cfg.py
CHANGED
|
@@ -74,6 +74,7 @@ class DocCfg:
|
|
|
74
74
|
duplicate_of_id (str | None): If this is a dupe the ID of the duplicated file. This file will be suppressed
|
|
75
75
|
is_interesting (bool | None): Override other considerations and always consider this file interesting (or not)
|
|
76
76
|
is_synthetic (bool): True if this config was generated by the duplicate_cfgs() method
|
|
77
|
+
replace_text_with (bool): True if `description` should replace body of the document when printing.
|
|
77
78
|
"""
|
|
78
79
|
id: str
|
|
79
80
|
attached_to_email_id: str | None = None
|
|
@@ -88,11 +89,9 @@ class DocCfg:
|
|
|
88
89
|
is_attribution_uncertain: bool = False
|
|
89
90
|
is_interesting: bool | None = None
|
|
90
91
|
is_synthetic: bool = False
|
|
92
|
+
replace_text_with: str = ''
|
|
91
93
|
|
|
92
|
-
|
|
93
|
-
if self.duplicate_of_id or self.duplicate_ids:
|
|
94
|
-
self.dupe_type = self.dupe_type or SAME
|
|
95
|
-
|
|
94
|
+
@property
|
|
96
95
|
def complete_description(self) -> str | None:
|
|
97
96
|
"""String that summarizes what is known about this document."""
|
|
98
97
|
description = ''
|
|
@@ -130,17 +129,7 @@ class DocCfg:
|
|
|
130
129
|
|
|
131
130
|
return description
|
|
132
131
|
|
|
133
|
-
|
|
134
|
-
"""Create synthetic DocCfg objects that set the 'duplicate_of_id' field to point back to this object."""
|
|
135
|
-
for id in self.duplicate_ids:
|
|
136
|
-
dupe_cfg = deepcopy(self)
|
|
137
|
-
dupe_cfg.id = id
|
|
138
|
-
dupe_cfg.duplicate_of_id = self.id
|
|
139
|
-
dupe_cfg.duplicate_ids = []
|
|
140
|
-
dupe_cfg.dupe_type = self.dupe_type
|
|
141
|
-
dupe_cfg.is_synthetic = True
|
|
142
|
-
yield dupe_cfg
|
|
143
|
-
|
|
132
|
+
@property
|
|
144
133
|
def metadata(self) -> Metadata:
|
|
145
134
|
metadata = {k: v for k, v in asdict(self).items() if k not in NON_METADATA_FIELDS and v}
|
|
146
135
|
|
|
@@ -149,10 +138,26 @@ class DocCfg:
|
|
|
149
138
|
|
|
150
139
|
return metadata
|
|
151
140
|
|
|
141
|
+
@property
|
|
152
142
|
def timestamp(self) -> datetime | None:
|
|
153
143
|
if self.date:
|
|
154
144
|
return parse(self.date)
|
|
155
145
|
|
|
146
|
+
def __post_init__(self):
|
|
147
|
+
if self.duplicate_of_id or self.duplicate_ids:
|
|
148
|
+
self.dupe_type = self.dupe_type or SAME
|
|
149
|
+
|
|
150
|
+
def duplicate_cfgs(self) -> Generator['DocCfg', None, None]:
|
|
151
|
+
"""Create synthetic DocCfg objects that set the 'duplicate_of_id' field to point back to this object."""
|
|
152
|
+
for id in self.duplicate_ids:
|
|
153
|
+
dupe_cfg = deepcopy(self)
|
|
154
|
+
dupe_cfg.id = id
|
|
155
|
+
dupe_cfg.duplicate_of_id = self.id
|
|
156
|
+
dupe_cfg.duplicate_ids = []
|
|
157
|
+
dupe_cfg.dupe_type = self.dupe_type
|
|
158
|
+
dupe_cfg.is_synthetic = True
|
|
159
|
+
yield dupe_cfg
|
|
160
|
+
|
|
156
161
|
def _props_strs(self) -> list[str]:
|
|
157
162
|
props = []
|
|
158
163
|
add_prop = lambda f, value: props.append(f"{f.name}={value}")
|