epstein-files 1.0.14__py3-none-any.whl → 1.0.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +8 -8
- epstein_files/documents/document.py +7 -5
- epstein_files/documents/email.py +8 -3
- epstein_files/documents/imessage/text_message.py +1 -1
- epstein_files/documents/messenger_log.py +10 -11
- epstein_files/documents/other_file.py +33 -32
- epstein_files/epstein_files.py +2 -43
- epstein_files/util/constant/names.py +3 -0
- epstein_files/util/constant/strings.py +0 -2
- epstein_files/util/constant/urls.py +8 -5
- epstein_files/util/constants.py +82 -87
- epstein_files/util/doc_cfg.py +4 -3
- epstein_files/util/env.py +10 -8
- epstein_files/util/file_helper.py +8 -4
- epstein_files/util/highlighted_group.py +35 -31
- epstein_files/util/output.py +47 -14
- epstein_files/util/rich.py +17 -13
- epstein_files/util/word_count.py +6 -8
- {epstein_files-1.0.14.dist-info → epstein_files-1.0.16.dist-info}/METADATA +1 -1
- epstein_files-1.0.16.dist-info/RECORD +33 -0
- epstein_files-1.0.14.dist-info/RECORD +0 -33
- {epstein_files-1.0.14.dist-info → epstein_files-1.0.16.dist-info}/LICENSE +0 -0
- {epstein_files-1.0.14.dist-info → epstein_files-1.0.16.dist-info}/WHEEL +0 -0
- {epstein_files-1.0.14.dist-info → epstein_files-1.0.16.dist-info}/entry_points.txt +0 -0
epstein_files/util/constants.py
CHANGED
|
@@ -198,7 +198,7 @@ EMAILERS = [
|
|
|
198
198
|
'Vladimir Yudashkin',
|
|
199
199
|
]
|
|
200
200
|
|
|
201
|
-
EMAILER_REGEXES = deepcopy(EMAILER_ID_REGEXES)
|
|
201
|
+
EMAILER_REGEXES = deepcopy(EMAILER_ID_REGEXES) # Keep a copy without the simple EMAILERS regexes
|
|
202
202
|
|
|
203
203
|
# Add simple matching regexes for EMAILERS entries to EMAILER_REGEXES
|
|
204
204
|
for emailer in EMAILERS:
|
|
@@ -208,73 +208,6 @@ for emailer in EMAILERS:
|
|
|
208
208
|
EMAILER_REGEXES[emailer] = re.compile(emailer, re.IGNORECASE)
|
|
209
209
|
|
|
210
210
|
|
|
211
|
-
##########################
|
|
212
|
-
# OtherFile config stuff #
|
|
213
|
-
##########################
|
|
214
|
-
|
|
215
|
-
# strings
|
|
216
|
-
FBI = 'FBI'
|
|
217
|
-
MEME = 'meme of'
|
|
218
|
-
PRESS_RELEASE = 'press release'
|
|
219
|
-
RESUME_OF = 'professional resumé'
|
|
220
|
-
SCREENSHOT = 'screenshot of'
|
|
221
|
-
TRANSLATION = 'translation of'
|
|
222
|
-
TWEET = 'tweet'
|
|
223
|
-
|
|
224
|
-
# Legal cases
|
|
225
|
-
BRUNEL_V_EPSTEIN = f"{JEAN_LUC_BRUNEL} v. {JEFFREY_EPSTEIN} and Tyler McDonald d/b/a YI.org"
|
|
226
|
-
EDWARDS_V_DERSHOWITZ = f"{BRAD_EDWARDS} & {PAUL_G_CASSELL} v. {ALAN_DERSHOWITZ}"
|
|
227
|
-
EPSTEIN_V_ROTHSTEIN_EDWARDS = f"Epstein v. Scott Rothstein, {BRAD_EDWARDS}, and L.M."
|
|
228
|
-
GIUFFRE_V_DERSHOWITZ = f"{VIRGINIA_GIUFFRE} v. {ALAN_DERSHOWITZ}"
|
|
229
|
-
GIUFFRE_V_EPSTEIN = f"{VIRGINIA_GIUFFRE} v. {JEFFREY_EPSTEIN}"
|
|
230
|
-
GIUFFRE_V_MAXWELL = f"{VIRGINIA_GIUFFRE} v. {GHISLAINE_MAXWELL}"
|
|
231
|
-
JANE_DOE_V_EPSTEIN_TRUMP = f"Jane Doe v. Donald Trump and {JEFFREY_EPSTEIN}"
|
|
232
|
-
JANE_DOE_V_USA = 'Jane Doe #1 and Jane Doe #2 v. United States'
|
|
233
|
-
NEW_YORK_V_EPSTEIN = f"New York v. {JEFFREY_EPSTEIN}"
|
|
234
|
-
|
|
235
|
-
# Descriptions of non-email, non-text message files
|
|
236
|
-
ARTICLE_DRAFT = 'draft of an article about'
|
|
237
|
-
BOFA_MERRILL = f'{BOFA} / Merrill Lynch Report'
|
|
238
|
-
BOFA_WEALTH_MGMT = f'{BOFA} Wealth Management'
|
|
239
|
-
BROCKMAN_INC = 'Brockman, Inc.'
|
|
240
|
-
CVRA = "Crime Victims' Rights Act [CVRA]"
|
|
241
|
-
DAVID_BLAINE_VISA_LETTER = f"letter of recommendation for visa for a model"
|
|
242
|
-
DERSH_GIUFFRE_TWEET = f"{TWEET} about {VIRGINIA_GIUFFRE}"
|
|
243
|
-
DEUTSCHE_BANK_TAX_TOPICS = f'{DEUTSCHE_BANK} Wealth Management Tax Topics'
|
|
244
|
-
DIANA_DEGETTE_CAMPAIGN = "Colorado legislator Diana DeGette's campaign"
|
|
245
|
-
EPSTEIN_FOUNDATION = 'Jeffrey Epstein VI Foundation'
|
|
246
|
-
FBI_REPORT = f"report on Epstein investigation (redacted)"
|
|
247
|
-
FBI_SEIZED_PROPERTY = f"seized property inventory (redacted)"
|
|
248
|
-
FEMALE_HEALTH_COMPANY = 'Female Health Company (FHX)'
|
|
249
|
-
FIRE_AND_FURY = f"Fire And Fury"
|
|
250
|
-
HARVARD_POETRY = f'{HARVARD} poetry stuff from {LISA_NEW}'
|
|
251
|
-
HBS_APPLICATION = f"{HARVARD} Business School application letter"
|
|
252
|
-
JASTA = 'JASTA'
|
|
253
|
-
JASTA_SAUDI_LAWSUIT = f"{JASTA} lawsuit against Saudi Arabia by 9/11 victims"
|
|
254
|
-
JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
|
|
255
|
-
LAWRENCE_KRAUSS_ASU_ORIGINS = f"{LAWRENCE_KRAUSS}'s ASU Origins Project"
|
|
256
|
-
LEXIS_NEXIS_CVRA_SEARCH = f"{LEXIS_NEXIS} search for case law around the {CVRA}"
|
|
257
|
-
KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
|
|
258
|
-
MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
|
|
259
|
-
NIGHT_FLIGHT_BOOK = f'"Night Flight" (draft)'
|
|
260
|
-
NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
|
|
261
|
-
OBAMA_JOKE = 'joke about Obama'
|
|
262
|
-
PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} Code Enforcement'
|
|
263
|
-
PALM_BEACH_PROPERTY_INFO = f"{PALM_BEACH} property info"
|
|
264
|
-
PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
|
|
265
|
-
PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
|
|
266
|
-
PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
|
|
267
|
-
REAL_DEAL_ARTICLE = 'article by Keith Larsen'
|
|
268
|
-
SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
|
|
269
|
-
SINGLE_PAGE = 'single page of'
|
|
270
|
-
STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
|
|
271
|
-
SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
|
|
272
|
-
TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
|
|
273
|
-
UBS_CIO_REPORT = 'CIO Monthly Extended report'
|
|
274
|
-
UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
|
|
275
|
-
WOMEN_EMPOWERMENT = f"Women Empowerment (WE) conference"
|
|
276
|
-
ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
|
|
277
|
-
|
|
278
211
|
# Atribution reasons
|
|
279
212
|
BOLOTOVA_REASON = 'Same signature style as 029020 ("--" followed by "Sincerely Renata Bolotova")'
|
|
280
213
|
KATHY_REASON = 'from "Kathy" about dems, sent from iPad'
|
|
@@ -369,6 +302,8 @@ TEXTS_CONFIG = CONFIRMED_TEXTS_CONFIG + UNCONFIRMED_TEXTS_CONFIG
|
|
|
369
302
|
################################################ EMAILS ################################################
|
|
370
303
|
########################################################################################################
|
|
371
304
|
|
|
305
|
+
MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
|
|
306
|
+
|
|
372
307
|
# Some emails have a lot of uninteresting CCs
|
|
373
308
|
IRAN_DEAL_RECIPIENTS = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
|
|
374
309
|
FLIGHT_IN_2012_PEOPLE = ['Francis Derby', 'Januiz Banasiak', 'Louella Rabuyo', 'Richard Barnnet']
|
|
@@ -601,6 +536,7 @@ EMAILS_CONFIG = [
|
|
|
601
536
|
EmailCfg(id='033274', recipients=[JEFFREY_EPSTEIN]), # this is a note sent to self
|
|
602
537
|
EmailCfg(id='032780', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
|
|
603
538
|
EmailCfg(id='029324', recipients=[JEFFREY_EPSTEIN, "Jojo Fontanilla", "Lyn Fontanilla"]), # Bad OCR (nofix)
|
|
539
|
+
EmailCfg(id='013482', recipients=[JEFFREY_EPSTEIN], is_fwded_article=True), # other recipients redacted. "The view from the US: Stem cell therapy steps up a gear with firs"
|
|
604
540
|
EmailCfg(id='033456', recipients=["Joel"], attribution_reason='Reply'),
|
|
605
541
|
EmailCfg(id='033458', recipients=["Joel"], attribution_reason='Reply'),
|
|
606
542
|
EmailCfg(id='033460', recipients=["Joel"], attribution_reason='Reply'),
|
|
@@ -691,7 +627,6 @@ EMAILS_CONFIG = [
|
|
|
691
627
|
EmailCfg(id='021740', is_fwded_article=True), # Miami Herald article about Epstein prosecutor
|
|
692
628
|
EmailCfg(id='023126', is_fwded_article=True), # Miami Herald on Alex Acosta
|
|
693
629
|
EmailCfg(id='029625', is_fwded_article=True), # Conchita Sarnoff Daily Beast Articles - Epstein Sex Trafficking Investigation and Settlement
|
|
694
|
-
EmailCfg(id='013482', is_fwded_article=True), # The view from the US: Stem cell therapy steps up a gear with firs
|
|
695
630
|
EmailCfg(id='029505', is_fwded_article=True), # Foreign Policy Middle Eastern Monarchs Look at the Trump
|
|
696
631
|
EmailCfg(id='029859', is_fwded_article=True), # Palm Beach Post: Epstein paid three women $5.5 million to end lawsuits
|
|
697
632
|
EmailCfg(id='031988', is_fwded_article=True), # NYT review of Inside Job
|
|
@@ -866,6 +801,65 @@ EMAILS_CONFIG = [
|
|
|
866
801
|
####################################### OTHER FILES ############################################
|
|
867
802
|
################################################################################################
|
|
868
803
|
|
|
804
|
+
# strings
|
|
805
|
+
FBI = 'FBI'
|
|
806
|
+
MEME = 'meme of'
|
|
807
|
+
PRESS_RELEASE = 'press release'
|
|
808
|
+
RESUME_OF = 'professional resumé'
|
|
809
|
+
SCREENSHOT = 'screenshot of'
|
|
810
|
+
TRANSLATION = 'translation of'
|
|
811
|
+
TWEET = 'tweet'
|
|
812
|
+
|
|
813
|
+
# Legal cases
|
|
814
|
+
BRUNEL_V_EPSTEIN = f"{JEAN_LUC_BRUNEL} v. {JEFFREY_EPSTEIN} and Tyler McDonald d/b/a YI.org"
|
|
815
|
+
EDWARDS_V_DERSHOWITZ = f"{BRAD_EDWARDS} & {PAUL_G_CASSELL} v. {ALAN_DERSHOWITZ}"
|
|
816
|
+
EPSTEIN_V_ROTHSTEIN_EDWARDS = f"Epstein v. Scott Rothstein, {BRAD_EDWARDS}, and L.M."
|
|
817
|
+
GIUFFRE_V_DERSHOWITZ = f"{VIRGINIA_GIUFFRE} v. {ALAN_DERSHOWITZ}"
|
|
818
|
+
GIUFFRE_V_EPSTEIN = f"{VIRGINIA_GIUFFRE} v. {JEFFREY_EPSTEIN}"
|
|
819
|
+
GIUFFRE_V_MAXWELL = f"{VIRGINIA_GIUFFRE} v. {GHISLAINE_MAXWELL}"
|
|
820
|
+
JANE_DOE_V_EPSTEIN_TRUMP = f"Jane Doe v. Donald Trump and {JEFFREY_EPSTEIN}"
|
|
821
|
+
JANE_DOE_V_USA = 'Jane Doe #1 and Jane Doe #2 v. United States'
|
|
822
|
+
NEW_YORK_V_EPSTEIN = f"New York v. {JEFFREY_EPSTEIN}"
|
|
823
|
+
|
|
824
|
+
# Descriptions of non-email, non-text message files
|
|
825
|
+
ARTICLE_DRAFT = 'draft of an article about'
|
|
826
|
+
BOFA_WEALTH_MGMT = f'{BOFA} Wealth Management'
|
|
827
|
+
BROCKMAN_INC = 'Brockman, Inc.'
|
|
828
|
+
CVRA = "Crime Victims' Rights Act [CVRA]"
|
|
829
|
+
DAVID_BLAINE_VISA_LETTER = f"letter of recommendation for visa for a model"
|
|
830
|
+
DERSH_GIUFFRE_TWEET = f"{TWEET} about {VIRGINIA_GIUFFRE}"
|
|
831
|
+
DEUTSCHE_BANK_TAX_TOPICS = f'{DEUTSCHE_BANK} Wealth Management Tax Topics'
|
|
832
|
+
DIANA_DEGETTE_CAMPAIGN = "Colorado legislator Diana DeGette's campaign"
|
|
833
|
+
FBI_REPORT = f"report on Epstein investigation (redacted)"
|
|
834
|
+
FBI_SEIZED_PROPERTY = f"seized property inventory (redacted)"
|
|
835
|
+
FEMALE_HEALTH_COMPANY = 'Female Health Company (FHX)'
|
|
836
|
+
FIRE_AND_FURY = f"Fire And Fury"
|
|
837
|
+
HARVARD_POETRY = f'{HARVARD} poetry stuff from {LISA_NEW}'
|
|
838
|
+
HBS_APPLICATION = f"{HARVARD} Business School application letter"
|
|
839
|
+
JASTA = 'JASTA'
|
|
840
|
+
JASTA_SAUDI_LAWSUIT = f"{JASTA} lawsuit against Saudi Arabia by 9/11 victims"
|
|
841
|
+
JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
|
|
842
|
+
LAWRENCE_KRAUSS_ASU_ORIGINS = f"{LAWRENCE_KRAUSS}'s ASU Origins Project"
|
|
843
|
+
KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
|
|
844
|
+
LEXIS_NEXIS_CVRA_SEARCH = f"{LEXIS_NEXIS} search for case law around the {CVRA}"
|
|
845
|
+
NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
|
|
846
|
+
OBAMA_JOKE = 'joke about Obama'
|
|
847
|
+
PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} Code Enforcement'
|
|
848
|
+
PALM_BEACH_PROPERTY_INFO = f"{PALM_BEACH} property info"
|
|
849
|
+
PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
|
|
850
|
+
PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
|
|
851
|
+
PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
|
|
852
|
+
REAL_DEAL_ARTICLE = 'article by Keith Larsen'
|
|
853
|
+
SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
|
|
854
|
+
STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
|
|
855
|
+
SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
|
|
856
|
+
TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
|
|
857
|
+
UBS_CIO_REPORT = 'CIO Monthly Extended report'
|
|
858
|
+
UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
|
|
859
|
+
WOMEN_EMPOWERMENT = f"Women Empowerment (WE) conference"
|
|
860
|
+
ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
|
|
861
|
+
|
|
862
|
+
|
|
869
863
|
OTHER_FILES_BOOKS = [
|
|
870
864
|
DocCfg(id='017088', author=ALAN_DERSHOWITZ, description=f'"Taking the Stand: My Life in the Law" (draft)'),
|
|
871
865
|
DocCfg(id='013501', author='Arnold J. Mandell', description=f'The Nearness Of Grace: A Personal Science Of Spiritual Transformation', date='2005-01-01'),
|
|
@@ -873,7 +867,7 @@ OTHER_FILES_BOOKS = [
|
|
|
873
867
|
DocCfg(id='018438', author='Clarisse Thorn', description=f'The S&M Feminist'),
|
|
874
868
|
DocCfg(id='019477', author=EDWARD_JAY_EPSTEIN, description=f'How America Lost Its Secrets: Edward Snowden, the Man, and the Theft'),
|
|
875
869
|
DocCfg(id='020153', author=EDWARD_JAY_EPSTEIN, description=f'The Snowden Affair: A Spy Story In Six Parts'),
|
|
876
|
-
DocCfg(id='011472', author=EHUD_BARAK, description=
|
|
870
|
+
DocCfg(id='011472', author=EHUD_BARAK, description=f'"Night Flight" (draft)', date='2006-07-12', duplicate_ids=['027849']), # date from _extract_timestamp()
|
|
877
871
|
DocCfg(id='010912', author=GORDON_GETTY, description=f'"Free Growth and Other Surprises" (draft)', date='2018-10-18'),
|
|
878
872
|
DocCfg(id='010477', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
|
|
879
873
|
DocCfg(id='010486', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
|
|
@@ -1002,7 +996,7 @@ OTHER_FILES_ARTICLES = [
|
|
|
1002
996
|
DocCfg(id='024997', author=SHIMON_POST, description=SHIMON_POST_ARTICLE, date='2011-09-08'),
|
|
1003
997
|
DocCfg(id='031941', author=SHIMON_POST, description=SHIMON_POST_ARTICLE, date='2011-11-17'),
|
|
1004
998
|
DocCfg(id='030829', author=f'South Florida Sun Sentinel', description=f'article about {BRAD_EDWARDS} and {JEFFREY_EPSTEIN}'),
|
|
1005
|
-
DocCfg(id='021092', author='Tatler', description=f'
|
|
999
|
+
DocCfg(id='021092', author='Tatler', description=f'single page of article about {GHISLAINE_MAXWELL} shredding documents', date='2019-08-15'),
|
|
1006
1000
|
DocCfg(id='030333', author=f'The Independent', description=f'article about Prince Andrew, Epstein, and Epstein\'s butler who stole his address book'),
|
|
1007
1001
|
DocCfg(id='010754', author=f'U.S. News', description=f"article about Yitzhak Rabin"),
|
|
1008
1002
|
DocCfg(id='014498', author=VI_DAILY_NEWS, description='article', date='2016-12-13'),
|
|
@@ -1011,7 +1005,7 @@ OTHER_FILES_ARTICLES = [
|
|
|
1011
1005
|
DocCfg(id='023046', author=VI_DAILY_NEWS, description='article', date='2019-02-27'),
|
|
1012
1006
|
DocCfg(id='031170', author=VI_DAILY_NEWS, description='article', date='2019-03-06'),
|
|
1013
1007
|
DocCfg(id='016506', author=VI_DAILY_NEWS, description='article', date='2019-02-28'),
|
|
1014
|
-
DocCfg(id='016507', author=VI_DAILY_NEWS, description=f"
|
|
1008
|
+
DocCfg(id='016507', author=VI_DAILY_NEWS, description=f'"Perversion of Justice" by {JULIE_K_BROWN}', date='2018-12-19'),
|
|
1015
1009
|
DocCfg(id='019212', author=WAPO, description=f'and Times Tribune articles about Bannon, Trump, and healthcare execs'),
|
|
1016
1010
|
DocCfg(id='033379', author=WAPO, description=f'"How Washington Pivoted From Finger-Wagging to Appeasement" (about Viktor Orban)', date='2018-05-25'),
|
|
1017
1011
|
DocCfg(
|
|
@@ -1026,12 +1020,12 @@ OTHER_FILES_ARTICLES = [
|
|
|
1026
1020
|
DocCfg(id='026648', description=f'article about {JASTA} lawsuit against Saudi Arabia by 9/11 victims (Russian propaganda?)', date='2017-05-13'),
|
|
1027
1021
|
DocCfg(id='032159', description=f"article about microfinance and cell phones in Zimbabwe, Strive Masiyiwa (Econet Wireless)"),
|
|
1028
1022
|
DocCfg(id='030825', description=f'{ARTICLE_DRAFT} Syria'),
|
|
1029
|
-
DocCfg(id='027051', description=f"German
|
|
1023
|
+
DocCfg(id='027051', description=f"German article about the 2013 Lifeball / AIDS Gala", date='2013-01-01', attached_to_email_id='027049'),
|
|
1030
1024
|
DocCfg(id='033480', description=f"John Bolton press clipping", date='2018-04-06', duplicate_ids=['033481']),
|
|
1031
1025
|
DocCfg(id='013403', description=f"{LEXIS_NEXIS} result from The Evening Standard about Bernie Madoff", date='2009-12-24'),
|
|
1032
1026
|
DocCfg(id='021093', description=f"page of unknown article about Epstein and Maxwell"),
|
|
1033
|
-
DocCfg(id='031191', description=f"
|
|
1034
|
-
DocCfg(id='026520', description=f'Spanish
|
|
1027
|
+
DocCfg(id='031191', description=f"single page of unknown article about Epstein and Trump's relationship in 1997"),
|
|
1028
|
+
DocCfg(id='026520', description=f'Spanish article about {SULTAN_BIN_SULAYEM}', date='2013-09-27'),
|
|
1035
1029
|
DocCfg(
|
|
1036
1030
|
id='031736',
|
|
1037
1031
|
description=f"{TRANSLATION} Arabic article by Abdulnaser Salamah 'Trump; Prince of Believers (Caliph)!'",
|
|
@@ -1186,16 +1180,16 @@ OTHER_FILES_LEGAL = [
|
|
|
1186
1180
|
]
|
|
1187
1181
|
|
|
1188
1182
|
OTHER_FILES_CONFERENCES = [
|
|
1189
|
-
DocCfg(id='014315', author=BOFA_MERRILL, description=f'2016 Future of Financials Conference,
|
|
1183
|
+
DocCfg(id='014315', author=BOFA_MERRILL, description=f'2016 Future of Financials Conference', attached_to_email_id='014312'),
|
|
1190
1184
|
DocCfg(id='026825', author=DEUTSCHE_BANK, description=f"Asset & Wealth Management featured speaker bios"), # Really "Deutsche Asset" which may not be Deutsche Bank?
|
|
1191
1185
|
DocCfg(id='023123', author=LAWRENCE_KRAUSS_ASU_ORIGINS, description=f"{STRANGE_BEDFELLOWS} (old draft)"),
|
|
1192
1186
|
DocCfg(id='023120', author=LAWRENCE_KRAUSS_ASU_ORIGINS, description=STRANGE_BEDFELLOWS, duplicate_ids=['023121'], dupe_type='earlier'),
|
|
1193
|
-
DocCfg(id='031359', author=NOBEL_CHARITABLE_TRUST, description=f"Earth Environment Convention about ESG investing
|
|
1187
|
+
DocCfg(id='031359', author=NOBEL_CHARITABLE_TRUST, description=f'"Earth Environment Convention" about ESG investing'),
|
|
1194
1188
|
DocCfg(id='031354', author=NOBEL_CHARITABLE_TRUST, description=f'"Thinking About the Environment and Technology" report 2011'),
|
|
1195
1189
|
DocCfg(id='019300', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} f. {KATHRYN_RUEMMLER}', date='2019-04-05'),
|
|
1196
1190
|
DocCfg(id='022267', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} founder essay about growing the seminar business'),
|
|
1197
1191
|
DocCfg(id='022407', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} seminar pitch deck'),
|
|
1198
|
-
DocCfg(id='017524', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2012 program
|
|
1192
|
+
DocCfg(id='017524', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2012 program", date='2012-08-18', attached_to_email_id='031226'),
|
|
1199
1193
|
DocCfg(id='026747', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2017 program", date='2017-08-23'),
|
|
1200
1194
|
DocCfg(id='014951', author='TED Talks', description=f"2017 program", date='2017-04-20'),
|
|
1201
1195
|
DocCfg(id='024179', author=UN_GENERAL_ASSEMBLY, description=f'president and first lady schedule', date='2012-09-21'),
|
|
@@ -1411,7 +1405,7 @@ OTHER_FILES_POLITICS = [
|
|
|
1411
1405
|
description=f"'Breaking Down Democracy: Goals, Strategies, and Methods of Modern Authoritarians'",
|
|
1412
1406
|
date='2017-06-02',
|
|
1413
1407
|
),
|
|
1414
|
-
DocCfg(id='026856', author='Kevin Rudd', description=f
|
|
1408
|
+
DocCfg(id='026856', author='Kevin Rudd', description=f'speech "Xi Jinping, China And The Global Order"', date='2018-06-26'),
|
|
1415
1409
|
DocCfg(id='026827', author='Scowcroft Group', description=f'report on ISIS', date='2015-11-14'),
|
|
1416
1410
|
DocCfg(id='024294', author=STACEY_PLASKETT, description=f"campaign flier", date='2016-10-01'),
|
|
1417
1411
|
DocCfg(
|
|
@@ -1424,7 +1418,7 @@ OTHER_FILES_POLITICS = [
|
|
|
1424
1418
|
DocCfg(
|
|
1425
1419
|
id='025849',
|
|
1426
1420
|
author='US Office of Government Information Services',
|
|
1427
|
-
description=f"Building a Bridge Between FOIA Requesters & Agencies",
|
|
1421
|
+
description=f'"Building a Bridge Between FOIA Requesters & Agencies"',
|
|
1428
1422
|
),
|
|
1429
1423
|
DocCfg(id='031670', description=f"letter from General Mike Flynn's lawyers to senators Mark Warner & Richard Burr about subpoena"),
|
|
1430
1424
|
DocCfg(
|
|
@@ -1502,6 +1496,13 @@ OTHER_FILES_RESUMES = [
|
|
|
1502
1496
|
OTHER_FILES_ARTS = [
|
|
1503
1497
|
DocCfg(id='018703', author=ANDRES_SERRANO, description=f"artist statement about Trump objects"),
|
|
1504
1498
|
DocCfg(id='023438', author=BROCKMAN_INC, description=f"announcement of auction of 'Noise' by Daniel Kahneman, Olivier Sibony, and Cass Sunstein"),
|
|
1499
|
+
DocCfg(
|
|
1500
|
+
id='025147',
|
|
1501
|
+
author=BROCKMAN_INC,
|
|
1502
|
+
description=f'hot list Frankfurt Book Fair (includes article about Silk Road/Ross Ulbricht)',
|
|
1503
|
+
date='2016-10-23',
|
|
1504
|
+
is_interesting=True,
|
|
1505
|
+
),
|
|
1505
1506
|
DocCfg(id='030769', author='Independent Filmmaker Project (IFP)', description=f"2017 Gotham Awards invitation"),
|
|
1506
1507
|
DocCfg(
|
|
1507
1508
|
id='025205',
|
|
@@ -1526,12 +1527,6 @@ OTHER_FILES_MISC = [
|
|
|
1526
1527
|
author=f'linkspirit (French?) and {LAWRENCE_KRAUSS}',
|
|
1527
1528
|
is_interesting=True, # we don't know who linkspirit is yet
|
|
1528
1529
|
),
|
|
1529
|
-
DocCfg(
|
|
1530
|
-
id='025147',
|
|
1531
|
-
author=BROCKMAN_INC,
|
|
1532
|
-
description=f'hot list Frankfurt Book Fair (includes article about Silk Road/Ross Ulbricht)',
|
|
1533
|
-
date='2016-10-23',
|
|
1534
|
-
),
|
|
1535
1530
|
DocCfg(id='022494', author='DOJ', description=f'Foreign Corrupt Practices Act (FCPA) Resource Guide'),
|
|
1536
1531
|
DocCfg(id='023096', author=EPSTEIN_FOUNDATION, description=f'blog post', date='2012-11-15'),
|
|
1537
1532
|
DocCfg(id='029326', author=EPSTEIN_FOUNDATION, description=f'{PRESS_RELEASE}', date='2013-02-15'),
|
|
@@ -1539,7 +1534,7 @@ OTHER_FILES_MISC = [
|
|
|
1539
1534
|
DocCfg(id='027071', author=FEMALE_HEALTH_COMPANY, description=f"brochure requesting donations for female condoms in Uganda"),
|
|
1540
1535
|
DocCfg(id='027074', author=FEMALE_HEALTH_COMPANY, description=f"pitch deck (USAID was a customer)"),
|
|
1541
1536
|
DocCfg(id='032735', author=GORDON_GETTY, description=f"on Trump", date='2018-03-20'), # Dated based on concurrent emails from Getty
|
|
1542
|
-
DocCfg(id='025540', author=JEFFREY_EPSTEIN, description=f"rough draft of
|
|
1537
|
+
DocCfg(id='025540', author=JEFFREY_EPSTEIN, description=f"rough draft of his side of the story"),
|
|
1543
1538
|
DocCfg(id='026634', author='Michael Carrier', description=f"comments about an Apollo linked hedge fund 'DE Fund VIII'"),
|
|
1544
1539
|
DocCfg(id='031425', author=SCOTT_J_LINK, description=f'completely redacted email from', is_interesting=False),
|
|
1545
1540
|
DocCfg(id='020447', author='Working Group on Chinese Influence Activities in the U.S.', description=f'Promoting Constructive Vigilance'),
|
epstein_files/util/doc_cfg.py
CHANGED
|
@@ -36,7 +36,7 @@ FIELD_SORT_KEY = {
|
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
FINANCIAL_REPORTS_AUTHORS = [
|
|
39
|
-
|
|
39
|
+
BOFA_MERRILL,
|
|
40
40
|
DEUTSCHE_BANK,
|
|
41
41
|
ELECTRON_CAPITAL_PARTNERS,
|
|
42
42
|
GOLDMAN_INVESTMENT_MGMT,
|
|
@@ -73,6 +73,7 @@ class DocCfg:
|
|
|
73
73
|
is_synthetic (bool): True if this config was generated by the duplicate_cfgs() method
|
|
74
74
|
"""
|
|
75
75
|
id: str
|
|
76
|
+
attached_to_email_id: str | None = None
|
|
76
77
|
author: str | None = None
|
|
77
78
|
category: str | None = None
|
|
78
79
|
date: str | None = None
|
|
@@ -102,10 +103,10 @@ class DocCfg:
|
|
|
102
103
|
return f"{msg} {self.description}" if self.description else msg
|
|
103
104
|
elif self.author and self.description:
|
|
104
105
|
if self.category in [ACADEMIA, BOOK]:
|
|
105
|
-
title = self.description if '"' in self.description else f"
|
|
106
|
+
title = self.description if '"' in self.description else f'"{self.description}"'
|
|
106
107
|
return f"{title} by {self.author}"
|
|
107
108
|
elif self.category == FINANCE and self.author in FINANCIAL_REPORTS_AUTHORS:
|
|
108
|
-
return f
|
|
109
|
+
return f'{self.author} report: "{self.description}"'
|
|
109
110
|
elif self.category == LEGAL and 'v.' in self.author:
|
|
110
111
|
return f"{self.author}: {self.description}"
|
|
111
112
|
elif self.category and self.author is None and self.description is None:
|
epstein_files/util/env.py
CHANGED
|
@@ -8,10 +8,8 @@ from rich_argparse_plus import RichHelpFormatterPlus
|
|
|
8
8
|
|
|
9
9
|
from epstein_files.util.logging import env_log_level, logger
|
|
10
10
|
|
|
11
|
-
COUNT_WORDS_SCRIPT = 'epstein_word_count'
|
|
12
11
|
DEFAULT_WIDTH = 145
|
|
13
|
-
HTML_SCRIPTS = ['epstein_generate',
|
|
14
|
-
EPSTEIN_DOCS_DIR_ENV_VAR_NAME = 'EPSTEIN_DOCS_DIR'
|
|
12
|
+
HTML_SCRIPTS = ['epstein_generate', 'epstein_word_count']
|
|
15
13
|
|
|
16
14
|
|
|
17
15
|
RichHelpFormatterPlus.choose_theme('morning_glory')
|
|
@@ -50,6 +48,7 @@ args = parser.parse_args()
|
|
|
50
48
|
|
|
51
49
|
|
|
52
50
|
# Verify Epstein docs can be found
|
|
51
|
+
EPSTEIN_DOCS_DIR_ENV_VAR_NAME = 'EPSTEIN_DOCS_DIR'
|
|
53
52
|
DOCS_DIR_ENV = environ.get(EPSTEIN_DOCS_DIR_ENV_VAR_NAME)
|
|
54
53
|
DOCS_DIR = Path(DOCS_DIR_ENV or '').resolve()
|
|
55
54
|
|
|
@@ -65,13 +64,13 @@ is_env_var_set = lambda s: len(environ.get(s) or '') > 0
|
|
|
65
64
|
is_html_script = current_script in HTML_SCRIPTS
|
|
66
65
|
|
|
67
66
|
args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
|
|
67
|
+
args.names = [None if n == 'None' else n for n in (args.names or [])]
|
|
68
68
|
args.output_emails = args.output_emails or args.all_emails
|
|
69
69
|
args.output_other = args.output_other or args.all_other_files or args.uninteresting
|
|
70
70
|
args.overwrite_pickle = args.overwrite_pickle or (is_env_var_set('OVERWRITE_PICKLE') and not is_env_var_set('PICKLED'))
|
|
71
71
|
args.width = args.width if is_html_script else None
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
specified_names: list[str | None] = [None if n == 'None' else n for n in (args.names or [])]
|
|
72
|
+
is_any_output_selected = any([arg.startswith('output_') and value for arg, value in vars(args).items()])
|
|
73
|
+
is_any_output_selected = is_any_output_selected or args.json_metadata or args.colors_only
|
|
75
74
|
|
|
76
75
|
# Log level args
|
|
77
76
|
if args.deep_debug:
|
|
@@ -86,9 +85,12 @@ elif not env_log_level:
|
|
|
86
85
|
logger.info(f'Log level set to {logger.level}...')
|
|
87
86
|
|
|
88
87
|
# Massage args that depend on other args to the appropriate state
|
|
89
|
-
if current_script == 'epstein_generate' and not (
|
|
88
|
+
if current_script == 'epstein_generate' and not (is_any_output_selected or args.make_clean):
|
|
90
89
|
logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
|
|
91
90
|
args.output_texts = args.output_emails = args.output_other = True
|
|
92
91
|
|
|
93
92
|
if args.debug:
|
|
94
|
-
logger.warning(f"Invocation args:\ncurrent_script={current_script}\nis_html_script={is_html_script},\nis_output_selected={
|
|
93
|
+
logger.warning(f"Invocation args:\ncurrent_script={current_script}\nis_html_script={is_html_script},\nis_output_selected={is_any_output_selected},\nargs={args}")
|
|
94
|
+
|
|
95
|
+
if args.names:
|
|
96
|
+
logger.warning(f"Output restricted to {args.names}")
|
|
@@ -12,7 +12,6 @@ KB = 1024
|
|
|
12
12
|
MB = KB * KB
|
|
13
13
|
|
|
14
14
|
file_size = lambda file_path: Path(file_path).stat().st_size
|
|
15
|
-
file_size_str = lambda file_path: file_size_to_str(file_size(file_path))
|
|
16
15
|
|
|
17
16
|
# Coerce methods handle both string and int arguments.
|
|
18
17
|
coerce_file_name = lambda filename_or_id: coerce_file_stem(filename_or_id) + '.txt'
|
|
@@ -46,8 +45,12 @@ def extract_file_id(filename_or_id: int | str | Path) -> str:
|
|
|
46
45
|
return file_match.group(1)
|
|
47
46
|
|
|
48
47
|
|
|
49
|
-
def
|
|
50
|
-
|
|
48
|
+
def file_size_str(file_path, digits: int | None = None):
|
|
49
|
+
return file_size_to_str(file_size(file_path), digits)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def file_size_to_str(size: int, digits: int | None = None) -> str:
|
|
53
|
+
_digits = 2
|
|
51
54
|
|
|
52
55
|
if size > MB:
|
|
53
56
|
size_num = float(size) / MB
|
|
@@ -55,10 +58,11 @@ def file_size_to_str(size: int) -> str:
|
|
|
55
58
|
elif size > KB:
|
|
56
59
|
size_num = float(size) / KB
|
|
57
60
|
size_str = 'kb'
|
|
58
|
-
|
|
61
|
+
_digits = 1
|
|
59
62
|
else:
|
|
60
63
|
return f"{size} b"
|
|
61
64
|
|
|
65
|
+
digits = _digits if digits is None else digits
|
|
62
66
|
return f"{size_num:,.{digits}f} {size_str}"
|
|
63
67
|
|
|
64
68
|
|