epstein-files 1.0.14__py3-none-any.whl → 1.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -198,7 +198,7 @@ EMAILERS = [
198
198
  'Vladimir Yudashkin',
199
199
  ]
200
200
 
201
- EMAILER_REGEXES = deepcopy(EMAILER_ID_REGEXES)
201
+ EMAILER_REGEXES = deepcopy(EMAILER_ID_REGEXES) # Keep a copy without the simple EMAILERS regexes
202
202
 
203
203
  # Add simple matching regexes for EMAILERS entries to EMAILER_REGEXES
204
204
  for emailer in EMAILERS:
@@ -208,73 +208,6 @@ for emailer in EMAILERS:
208
208
  EMAILER_REGEXES[emailer] = re.compile(emailer, re.IGNORECASE)
209
209
 
210
210
 
211
- ##########################
212
- # OtherFile config stuff #
213
- ##########################
214
-
215
- # strings
216
- FBI = 'FBI'
217
- MEME = 'meme of'
218
- PRESS_RELEASE = 'press release'
219
- RESUME_OF = 'professional resumé'
220
- SCREENSHOT = 'screenshot of'
221
- TRANSLATION = 'translation of'
222
- TWEET = 'tweet'
223
-
224
- # Legal cases
225
- BRUNEL_V_EPSTEIN = f"{JEAN_LUC_BRUNEL} v. {JEFFREY_EPSTEIN} and Tyler McDonald d/b/a YI.org"
226
- EDWARDS_V_DERSHOWITZ = f"{BRAD_EDWARDS} & {PAUL_G_CASSELL} v. {ALAN_DERSHOWITZ}"
227
- EPSTEIN_V_ROTHSTEIN_EDWARDS = f"Epstein v. Scott Rothstein, {BRAD_EDWARDS}, and L.M."
228
- GIUFFRE_V_DERSHOWITZ = f"{VIRGINIA_GIUFFRE} v. {ALAN_DERSHOWITZ}"
229
- GIUFFRE_V_EPSTEIN = f"{VIRGINIA_GIUFFRE} v. {JEFFREY_EPSTEIN}"
230
- GIUFFRE_V_MAXWELL = f"{VIRGINIA_GIUFFRE} v. {GHISLAINE_MAXWELL}"
231
- JANE_DOE_V_EPSTEIN_TRUMP = f"Jane Doe v. Donald Trump and {JEFFREY_EPSTEIN}"
232
- JANE_DOE_V_USA = 'Jane Doe #1 and Jane Doe #2 v. United States'
233
- NEW_YORK_V_EPSTEIN = f"New York v. {JEFFREY_EPSTEIN}"
234
-
235
- # Descriptions of non-email, non-text message files
236
- ARTICLE_DRAFT = 'draft of an article about'
237
- BOFA_MERRILL = f'{BOFA} / Merrill Lynch Report'
238
- BOFA_WEALTH_MGMT = f'{BOFA} Wealth Management'
239
- BROCKMAN_INC = 'Brockman, Inc.'
240
- CVRA = "Crime Victims' Rights Act [CVRA]"
241
- DAVID_BLAINE_VISA_LETTER = f"letter of recommendation for visa for a model"
242
- DERSH_GIUFFRE_TWEET = f"{TWEET} about {VIRGINIA_GIUFFRE}"
243
- DEUTSCHE_BANK_TAX_TOPICS = f'{DEUTSCHE_BANK} Wealth Management Tax Topics'
244
- DIANA_DEGETTE_CAMPAIGN = "Colorado legislator Diana DeGette's campaign"
245
- EPSTEIN_FOUNDATION = 'Jeffrey Epstein VI Foundation'
246
- FBI_REPORT = f"report on Epstein investigation (redacted)"
247
- FBI_SEIZED_PROPERTY = f"seized property inventory (redacted)"
248
- FEMALE_HEALTH_COMPANY = 'Female Health Company (FHX)'
249
- FIRE_AND_FURY = f"Fire And Fury"
250
- HARVARD_POETRY = f'{HARVARD} poetry stuff from {LISA_NEW}'
251
- HBS_APPLICATION = f"{HARVARD} Business School application letter"
252
- JASTA = 'JASTA'
253
- JASTA_SAUDI_LAWSUIT = f"{JASTA} lawsuit against Saudi Arabia by 9/11 victims"
254
- JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
255
- LAWRENCE_KRAUSS_ASU_ORIGINS = f"{LAWRENCE_KRAUSS}'s ASU Origins Project"
256
- LEXIS_NEXIS_CVRA_SEARCH = f"{LEXIS_NEXIS} search for case law around the {CVRA}"
257
- KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
258
- MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
259
- NIGHT_FLIGHT_BOOK = f'"Night Flight" (draft)'
260
- NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
261
- OBAMA_JOKE = 'joke about Obama'
262
- PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} Code Enforcement'
263
- PALM_BEACH_PROPERTY_INFO = f"{PALM_BEACH} property info"
264
- PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
265
- PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
266
- PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
267
- REAL_DEAL_ARTICLE = 'article by Keith Larsen'
268
- SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
269
- SINGLE_PAGE = 'single page of'
270
- STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
271
- SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
272
- TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
273
- UBS_CIO_REPORT = 'CIO Monthly Extended report'
274
- UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
275
- WOMEN_EMPOWERMENT = f"Women Empowerment (WE) conference"
276
- ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
277
-
278
211
  # Atribution reasons
279
212
  BOLOTOVA_REASON = 'Same signature style as 029020 ("--" followed by "Sincerely Renata Bolotova")'
280
213
  KATHY_REASON = 'from "Kathy" about dems, sent from iPad'
@@ -369,6 +302,8 @@ TEXTS_CONFIG = CONFIRMED_TEXTS_CONFIG + UNCONFIRMED_TEXTS_CONFIG
369
302
  ################################################ EMAILS ################################################
370
303
  ########################################################################################################
371
304
 
305
+ MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
306
+
372
307
  # Some emails have a lot of uninteresting CCs
373
308
  IRAN_DEAL_RECIPIENTS = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
374
309
  FLIGHT_IN_2012_PEOPLE = ['Francis Derby', 'Januiz Banasiak', 'Louella Rabuyo', 'Richard Barnnet']
@@ -601,6 +536,7 @@ EMAILS_CONFIG = [
601
536
  EmailCfg(id='033274', recipients=[JEFFREY_EPSTEIN]), # this is a note sent to self
602
537
  EmailCfg(id='032780', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
603
538
  EmailCfg(id='029324', recipients=[JEFFREY_EPSTEIN, "Jojo Fontanilla", "Lyn Fontanilla"]), # Bad OCR (nofix)
539
+ EmailCfg(id='013482', recipients=[JEFFREY_EPSTEIN], is_fwded_article=True), # other recipients redacted. "The view from the US: Stem cell therapy steps up a gear with firs"
604
540
  EmailCfg(id='033456', recipients=["Joel"], attribution_reason='Reply'),
605
541
  EmailCfg(id='033458', recipients=["Joel"], attribution_reason='Reply'),
606
542
  EmailCfg(id='033460', recipients=["Joel"], attribution_reason='Reply'),
@@ -691,7 +627,6 @@ EMAILS_CONFIG = [
691
627
  EmailCfg(id='021740', is_fwded_article=True), # Miami Herald article about Epstein prosecutor
692
628
  EmailCfg(id='023126', is_fwded_article=True), # Miami Herald on Alex Acosta
693
629
  EmailCfg(id='029625', is_fwded_article=True), # Conchita Sarnoff Daily Beast Articles - Epstein Sex Trafficking Investigation and Settlement
694
- EmailCfg(id='013482', is_fwded_article=True), # The view from the US: Stem cell therapy steps up a gear with firs
695
630
  EmailCfg(id='029505', is_fwded_article=True), # Foreign Policy Middle Eastern Monarchs Look at the Trump
696
631
  EmailCfg(id='029859', is_fwded_article=True), # Palm Beach Post: Epstein paid three women $5.5 million to end lawsuits
697
632
  EmailCfg(id='031988', is_fwded_article=True), # NYT review of Inside Job
@@ -866,6 +801,65 @@ EMAILS_CONFIG = [
866
801
  ####################################### OTHER FILES ############################################
867
802
  ################################################################################################
868
803
 
804
+ # strings
805
+ FBI = 'FBI'
806
+ MEME = 'meme of'
807
+ PRESS_RELEASE = 'press release'
808
+ RESUME_OF = 'professional resumé'
809
+ SCREENSHOT = 'screenshot of'
810
+ TRANSLATION = 'translation of'
811
+ TWEET = 'tweet'
812
+
813
+ # Legal cases
814
+ BRUNEL_V_EPSTEIN = f"{JEAN_LUC_BRUNEL} v. {JEFFREY_EPSTEIN} and Tyler McDonald d/b/a YI.org"
815
+ EDWARDS_V_DERSHOWITZ = f"{BRAD_EDWARDS} & {PAUL_G_CASSELL} v. {ALAN_DERSHOWITZ}"
816
+ EPSTEIN_V_ROTHSTEIN_EDWARDS = f"Epstein v. Scott Rothstein, {BRAD_EDWARDS}, and L.M."
817
+ GIUFFRE_V_DERSHOWITZ = f"{VIRGINIA_GIUFFRE} v. {ALAN_DERSHOWITZ}"
818
+ GIUFFRE_V_EPSTEIN = f"{VIRGINIA_GIUFFRE} v. {JEFFREY_EPSTEIN}"
819
+ GIUFFRE_V_MAXWELL = f"{VIRGINIA_GIUFFRE} v. {GHISLAINE_MAXWELL}"
820
+ JANE_DOE_V_EPSTEIN_TRUMP = f"Jane Doe v. Donald Trump and {JEFFREY_EPSTEIN}"
821
+ JANE_DOE_V_USA = 'Jane Doe #1 and Jane Doe #2 v. United States'
822
+ NEW_YORK_V_EPSTEIN = f"New York v. {JEFFREY_EPSTEIN}"
823
+
824
+ # Descriptions of non-email, non-text message files
825
+ ARTICLE_DRAFT = 'draft of an article about'
826
+ BOFA_WEALTH_MGMT = f'{BOFA} Wealth Management'
827
+ BROCKMAN_INC = 'Brockman, Inc.'
828
+ CVRA = "Crime Victims' Rights Act [CVRA]"
829
+ DAVID_BLAINE_VISA_LETTER = f"letter of recommendation for visa for a model"
830
+ DERSH_GIUFFRE_TWEET = f"{TWEET} about {VIRGINIA_GIUFFRE}"
831
+ DEUTSCHE_BANK_TAX_TOPICS = f'{DEUTSCHE_BANK} Wealth Management Tax Topics'
832
+ DIANA_DEGETTE_CAMPAIGN = "Colorado legislator Diana DeGette's campaign"
833
+ FBI_REPORT = f"report on Epstein investigation (redacted)"
834
+ FBI_SEIZED_PROPERTY = f"seized property inventory (redacted)"
835
+ FEMALE_HEALTH_COMPANY = 'Female Health Company (FHX)'
836
+ FIRE_AND_FURY = f"Fire And Fury"
837
+ HARVARD_POETRY = f'{HARVARD} poetry stuff from {LISA_NEW}'
838
+ HBS_APPLICATION = f"{HARVARD} Business School application letter"
839
+ JASTA = 'JASTA'
840
+ JASTA_SAUDI_LAWSUIT = f"{JASTA} lawsuit against Saudi Arabia by 9/11 victims"
841
+ JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
842
+ LAWRENCE_KRAUSS_ASU_ORIGINS = f"{LAWRENCE_KRAUSS}'s ASU Origins Project"
843
+ KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
844
+ LEXIS_NEXIS_CVRA_SEARCH = f"{LEXIS_NEXIS} search for case law around the {CVRA}"
845
+ NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
846
+ OBAMA_JOKE = 'joke about Obama'
847
+ PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} Code Enforcement'
848
+ PALM_BEACH_PROPERTY_INFO = f"{PALM_BEACH} property info"
849
+ PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
850
+ PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
851
+ PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
852
+ REAL_DEAL_ARTICLE = 'article by Keith Larsen'
853
+ SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
854
+ STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
855
+ SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
856
+ TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
857
+ UBS_CIO_REPORT = 'CIO Monthly Extended report'
858
+ UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
859
+ WOMEN_EMPOWERMENT = f"Women Empowerment (WE) conference"
860
+ ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
861
+
862
+
869
863
  OTHER_FILES_BOOKS = [
870
864
  DocCfg(id='017088', author=ALAN_DERSHOWITZ, description=f'"Taking the Stand: My Life in the Law" (draft)'),
871
865
  DocCfg(id='013501', author='Arnold J. Mandell', description=f'The Nearness Of Grace: A Personal Science Of Spiritual Transformation', date='2005-01-01'),
@@ -873,7 +867,7 @@ OTHER_FILES_BOOKS = [
873
867
  DocCfg(id='018438', author='Clarisse Thorn', description=f'The S&M Feminist'),
874
868
  DocCfg(id='019477', author=EDWARD_JAY_EPSTEIN, description=f'How America Lost Its Secrets: Edward Snowden, the Man, and the Theft'),
875
869
  DocCfg(id='020153', author=EDWARD_JAY_EPSTEIN, description=f'The Snowden Affair: A Spy Story In Six Parts'),
876
- DocCfg(id='011472', author=EHUD_BARAK, description=NIGHT_FLIGHT_BOOK, date='2006-07-12', duplicate_ids=['027849']), # date from _extract_timestamp()
870
+ DocCfg(id='011472', author=EHUD_BARAK, description=f'"Night Flight" (draft)', date='2006-07-12', duplicate_ids=['027849']), # date from _extract_timestamp()
877
871
  DocCfg(id='010912', author=GORDON_GETTY, description=f'"Free Growth and Other Surprises" (draft)', date='2018-10-18'),
878
872
  DocCfg(id='010477', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
879
873
  DocCfg(id='010486', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
@@ -1002,7 +996,7 @@ OTHER_FILES_ARTICLES = [
1002
996
  DocCfg(id='024997', author=SHIMON_POST, description=SHIMON_POST_ARTICLE, date='2011-09-08'),
1003
997
  DocCfg(id='031941', author=SHIMON_POST, description=SHIMON_POST_ARTICLE, date='2011-11-17'),
1004
998
  DocCfg(id='030829', author=f'South Florida Sun Sentinel', description=f'article about {BRAD_EDWARDS} and {JEFFREY_EPSTEIN}'),
1005
- DocCfg(id='021092', author='Tatler', description=f'{SINGLE_PAGE} of article about {GHISLAINE_MAXWELL} shredding documents', date='2019-08-15'),
999
+ DocCfg(id='021092', author='Tatler', description=f'single page of article about {GHISLAINE_MAXWELL} shredding documents', date='2019-08-15'),
1006
1000
  DocCfg(id='030333', author=f'The Independent', description=f'article about Prince Andrew, Epstein, and Epstein\'s butler who stole his address book'),
1007
1001
  DocCfg(id='010754', author=f'U.S. News', description=f"article about Yitzhak Rabin"),
1008
1002
  DocCfg(id='014498', author=VI_DAILY_NEWS, description='article', date='2016-12-13'),
@@ -1011,7 +1005,7 @@ OTHER_FILES_ARTICLES = [
1011
1005
  DocCfg(id='023046', author=VI_DAILY_NEWS, description='article', date='2019-02-27'),
1012
1006
  DocCfg(id='031170', author=VI_DAILY_NEWS, description='article', date='2019-03-06'),
1013
1007
  DocCfg(id='016506', author=VI_DAILY_NEWS, description='article', date='2019-02-28'),
1014
- DocCfg(id='016507', author=VI_DAILY_NEWS, description=f"'Perversion of Justice' by {JULIE_K_BROWN}", date='2018-12-19'),
1008
+ DocCfg(id='016507', author=VI_DAILY_NEWS, description=f'"Perversion of Justice" by {JULIE_K_BROWN}', date='2018-12-19'),
1015
1009
  DocCfg(id='019212', author=WAPO, description=f'and Times Tribune articles about Bannon, Trump, and healthcare execs'),
1016
1010
  DocCfg(id='033379', author=WAPO, description=f'"How Washington Pivoted From Finger-Wagging to Appeasement" (about Viktor Orban)', date='2018-05-25'),
1017
1011
  DocCfg(
@@ -1026,12 +1020,12 @@ OTHER_FILES_ARTICLES = [
1026
1020
  DocCfg(id='026648', description=f'article about {JASTA} lawsuit against Saudi Arabia by 9/11 victims (Russian propaganda?)', date='2017-05-13'),
1027
1021
  DocCfg(id='032159', description=f"article about microfinance and cell phones in Zimbabwe, Strive Masiyiwa (Econet Wireless)"),
1028
1022
  DocCfg(id='030825', description=f'{ARTICLE_DRAFT} Syria'),
1029
- DocCfg(id='027051', description=f"German language article about the 2013 Lifeball / AIDS Gala", date='2013-01-01'),
1023
+ DocCfg(id='027051', description=f"German article about the 2013 Lifeball / AIDS Gala", date='2013-01-01', attached_to_email_id='027049'),
1030
1024
  DocCfg(id='033480', description=f"John Bolton press clipping", date='2018-04-06', duplicate_ids=['033481']),
1031
1025
  DocCfg(id='013403', description=f"{LEXIS_NEXIS} result from The Evening Standard about Bernie Madoff", date='2009-12-24'),
1032
1026
  DocCfg(id='021093', description=f"page of unknown article about Epstein and Maxwell"),
1033
- DocCfg(id='031191', description=f"{SINGLE_PAGE} unknown article about Epstein and Trump's relationship in 1997"),
1034
- DocCfg(id='026520', description=f'Spanish language article about {SULTAN_BIN_SULAYEM}', date='2013-09-27'),
1027
+ DocCfg(id='031191', description=f"single page of unknown article about Epstein and Trump's relationship in 1997"),
1028
+ DocCfg(id='026520', description=f'Spanish article about {SULTAN_BIN_SULAYEM}', date='2013-09-27'),
1035
1029
  DocCfg(
1036
1030
  id='031736',
1037
1031
  description=f"{TRANSLATION} Arabic article by Abdulnaser Salamah 'Trump; Prince of Believers (Caliph)!'",
@@ -1186,16 +1180,16 @@ OTHER_FILES_LEGAL = [
1186
1180
  ]
1187
1181
 
1188
1182
  OTHER_FILES_CONFERENCES = [
1189
- DocCfg(id='014315', author=BOFA_MERRILL, description=f'2016 Future of Financials Conference, attached to 014312'),
1183
+ DocCfg(id='014315', author=BOFA_MERRILL, description=f'2016 Future of Financials Conference', attached_to_email_id='014312'),
1190
1184
  DocCfg(id='026825', author=DEUTSCHE_BANK, description=f"Asset & Wealth Management featured speaker bios"), # Really "Deutsche Asset" which may not be Deutsche Bank?
1191
1185
  DocCfg(id='023123', author=LAWRENCE_KRAUSS_ASU_ORIGINS, description=f"{STRANGE_BEDFELLOWS} (old draft)"),
1192
1186
  DocCfg(id='023120', author=LAWRENCE_KRAUSS_ASU_ORIGINS, description=STRANGE_BEDFELLOWS, duplicate_ids=['023121'], dupe_type='earlier'),
1193
- DocCfg(id='031359', author=NOBEL_CHARITABLE_TRUST, description=f"Earth Environment Convention about ESG investing"),
1187
+ DocCfg(id='031359', author=NOBEL_CHARITABLE_TRUST, description=f'"Earth Environment Convention" about ESG investing'),
1194
1188
  DocCfg(id='031354', author=NOBEL_CHARITABLE_TRUST, description=f'"Thinking About the Environment and Technology" report 2011'),
1195
1189
  DocCfg(id='019300', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} f. {KATHRYN_RUEMMLER}', date='2019-04-05'),
1196
1190
  DocCfg(id='022267', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} founder essay about growing the seminar business'),
1197
1191
  DocCfg(id='022407', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} seminar pitch deck'),
1198
- DocCfg(id='017524', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2012 program emailed to epstein BY {BARBRO_C_EHNBOM} in 031226", date='2012-08-18'),
1192
+ DocCfg(id='017524', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2012 program", date='2012-08-18', attached_to_email_id='031226'),
1199
1193
  DocCfg(id='026747', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2017 program", date='2017-08-23'),
1200
1194
  DocCfg(id='014951', author='TED Talks', description=f"2017 program", date='2017-04-20'),
1201
1195
  DocCfg(id='024179', author=UN_GENERAL_ASSEMBLY, description=f'president and first lady schedule', date='2012-09-21'),
@@ -1411,7 +1405,7 @@ OTHER_FILES_POLITICS = [
1411
1405
  description=f"'Breaking Down Democracy: Goals, Strategies, and Methods of Modern Authoritarians'",
1412
1406
  date='2017-06-02',
1413
1407
  ),
1414
- DocCfg(id='026856', author='Kevin Rudd', description=f"speech 'Xi Jinping, China And The Global Order'", date='2018-06-26'),
1408
+ DocCfg(id='026856', author='Kevin Rudd', description=f'speech "Xi Jinping, China And The Global Order"', date='2018-06-26'),
1415
1409
  DocCfg(id='026827', author='Scowcroft Group', description=f'report on ISIS', date='2015-11-14'),
1416
1410
  DocCfg(id='024294', author=STACEY_PLASKETT, description=f"campaign flier", date='2016-10-01'),
1417
1411
  DocCfg(
@@ -1424,7 +1418,7 @@ OTHER_FILES_POLITICS = [
1424
1418
  DocCfg(
1425
1419
  id='025849',
1426
1420
  author='US Office of Government Information Services',
1427
- description=f"Building a Bridge Between FOIA Requesters & Agencies",
1421
+ description=f'"Building a Bridge Between FOIA Requesters & Agencies"',
1428
1422
  ),
1429
1423
  DocCfg(id='031670', description=f"letter from General Mike Flynn's lawyers to senators Mark Warner & Richard Burr about subpoena"),
1430
1424
  DocCfg(
@@ -1502,6 +1496,13 @@ OTHER_FILES_RESUMES = [
1502
1496
  OTHER_FILES_ARTS = [
1503
1497
  DocCfg(id='018703', author=ANDRES_SERRANO, description=f"artist statement about Trump objects"),
1504
1498
  DocCfg(id='023438', author=BROCKMAN_INC, description=f"announcement of auction of 'Noise' by Daniel Kahneman, Olivier Sibony, and Cass Sunstein"),
1499
+ DocCfg(
1500
+ id='025147',
1501
+ author=BROCKMAN_INC,
1502
+ description=f'hot list Frankfurt Book Fair (includes article about Silk Road/Ross Ulbricht)',
1503
+ date='2016-10-23',
1504
+ is_interesting=True,
1505
+ ),
1505
1506
  DocCfg(id='030769', author='Independent Filmmaker Project (IFP)', description=f"2017 Gotham Awards invitation"),
1506
1507
  DocCfg(
1507
1508
  id='025205',
@@ -1526,12 +1527,6 @@ OTHER_FILES_MISC = [
1526
1527
  author=f'linkspirit (French?) and {LAWRENCE_KRAUSS}',
1527
1528
  is_interesting=True, # we don't know who linkspirit is yet
1528
1529
  ),
1529
- DocCfg(
1530
- id='025147',
1531
- author=BROCKMAN_INC,
1532
- description=f'hot list Frankfurt Book Fair (includes article about Silk Road/Ross Ulbricht)',
1533
- date='2016-10-23',
1534
- ),
1535
1530
  DocCfg(id='022494', author='DOJ', description=f'Foreign Corrupt Practices Act (FCPA) Resource Guide'),
1536
1531
  DocCfg(id='023096', author=EPSTEIN_FOUNDATION, description=f'blog post', date='2012-11-15'),
1537
1532
  DocCfg(id='029326', author=EPSTEIN_FOUNDATION, description=f'{PRESS_RELEASE}', date='2013-02-15'),
@@ -1539,7 +1534,7 @@ OTHER_FILES_MISC = [
1539
1534
  DocCfg(id='027071', author=FEMALE_HEALTH_COMPANY, description=f"brochure requesting donations for female condoms in Uganda"),
1540
1535
  DocCfg(id='027074', author=FEMALE_HEALTH_COMPANY, description=f"pitch deck (USAID was a customer)"),
1541
1536
  DocCfg(id='032735', author=GORDON_GETTY, description=f"on Trump", date='2018-03-20'), # Dated based on concurrent emails from Getty
1542
- DocCfg(id='025540', author=JEFFREY_EPSTEIN, description=f"rough draft of Epstein's side of the story?"),
1537
+ DocCfg(id='025540', author=JEFFREY_EPSTEIN, description=f"rough draft of his side of the story"),
1543
1538
  DocCfg(id='026634', author='Michael Carrier', description=f"comments about an Apollo linked hedge fund 'DE Fund VIII'"),
1544
1539
  DocCfg(id='031425', author=SCOTT_J_LINK, description=f'completely redacted email from', is_interesting=False),
1545
1540
  DocCfg(id='020447', author='Working Group on Chinese Influence Activities in the U.S.', description=f'Promoting Constructive Vigilance'),
@@ -36,7 +36,7 @@ FIELD_SORT_KEY = {
36
36
  }
37
37
 
38
38
  FINANCIAL_REPORTS_AUTHORS = [
39
- BOFA,
39
+ BOFA_MERRILL,
40
40
  DEUTSCHE_BANK,
41
41
  ELECTRON_CAPITAL_PARTNERS,
42
42
  GOLDMAN_INVESTMENT_MGMT,
@@ -73,6 +73,7 @@ class DocCfg:
73
73
  is_synthetic (bool): True if this config was generated by the duplicate_cfgs() method
74
74
  """
75
75
  id: str
76
+ attached_to_email_id: str | None = None
76
77
  author: str | None = None
77
78
  category: str | None = None
78
79
  date: str | None = None
@@ -102,10 +103,10 @@ class DocCfg:
102
103
  return f"{msg} {self.description}" if self.description else msg
103
104
  elif self.author and self.description:
104
105
  if self.category in [ACADEMIA, BOOK]:
105
- title = self.description if '"' in self.description else f"'{self.description}'"
106
+ title = self.description if '"' in self.description else f'"{self.description}"'
106
107
  return f"{title} by {self.author}"
107
108
  elif self.category == FINANCE and self.author in FINANCIAL_REPORTS_AUTHORS:
108
- return f"{self.author} report: '{self.description}'"
109
+ return f'{self.author} report: "{self.description}"'
109
110
  elif self.category == LEGAL and 'v.' in self.author:
110
111
  return f"{self.author}: {self.description}"
111
112
  elif self.category and self.author is None and self.description is None:
epstein_files/util/env.py CHANGED
@@ -8,10 +8,8 @@ from rich_argparse_plus import RichHelpFormatterPlus
8
8
 
9
9
  from epstein_files.util.logging import env_log_level, logger
10
10
 
11
- COUNT_WORDS_SCRIPT = 'epstein_word_count'
12
11
  DEFAULT_WIDTH = 145
13
- HTML_SCRIPTS = ['epstein_generate', COUNT_WORDS_SCRIPT]
14
- EPSTEIN_DOCS_DIR_ENV_VAR_NAME = 'EPSTEIN_DOCS_DIR'
12
+ HTML_SCRIPTS = ['epstein_generate', 'epstein_word_count']
15
13
 
16
14
 
17
15
  RichHelpFormatterPlus.choose_theme('morning_glory')
@@ -50,6 +48,7 @@ args = parser.parse_args()
50
48
 
51
49
 
52
50
  # Verify Epstein docs can be found
51
+ EPSTEIN_DOCS_DIR_ENV_VAR_NAME = 'EPSTEIN_DOCS_DIR'
53
52
  DOCS_DIR_ENV = environ.get(EPSTEIN_DOCS_DIR_ENV_VAR_NAME)
54
53
  DOCS_DIR = Path(DOCS_DIR_ENV or '').resolve()
55
54
 
@@ -65,13 +64,13 @@ is_env_var_set = lambda s: len(environ.get(s) or '') > 0
65
64
  is_html_script = current_script in HTML_SCRIPTS
66
65
 
67
66
  args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
67
+ args.names = [None if n == 'None' else n for n in (args.names or [])]
68
68
  args.output_emails = args.output_emails or args.all_emails
69
69
  args.output_other = args.output_other or args.all_other_files or args.uninteresting
70
70
  args.overwrite_pickle = args.overwrite_pickle or (is_env_var_set('OVERWRITE_PICKLE') and not is_env_var_set('PICKLED'))
71
71
  args.width = args.width if is_html_script else None
72
- is_output_selected = any([arg.startswith('output_') and value for arg, value in vars(args).items()])
73
- is_output_selected = is_output_selected or args.json_metadata or args.colors_only
74
- specified_names: list[str | None] = [None if n == 'None' else n for n in (args.names or [])]
72
+ is_any_output_selected = any([arg.startswith('output_') and value for arg, value in vars(args).items()])
73
+ is_any_output_selected = is_any_output_selected or args.json_metadata or args.colors_only
75
74
 
76
75
  # Log level args
77
76
  if args.deep_debug:
@@ -86,9 +85,12 @@ elif not env_log_level:
86
85
  logger.info(f'Log level set to {logger.level}...')
87
86
 
88
87
  # Massage args that depend on other args to the appropriate state
89
- if current_script == 'epstein_generate' and not (is_output_selected or args.make_clean):
88
+ if current_script == 'epstein_generate' and not (is_any_output_selected or args.make_clean):
90
89
  logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
91
90
  args.output_texts = args.output_emails = args.output_other = True
92
91
 
93
92
  if args.debug:
94
- logger.warning(f"Invocation args:\ncurrent_script={current_script}\nis_html_script={is_html_script},\nis_output_selected={is_output_selected}\nspecified_names={specified_names},\nargs={args}")
93
+ logger.warning(f"Invocation args:\ncurrent_script={current_script}\nis_html_script={is_html_script},\nis_output_selected={is_any_output_selected},\nargs={args}")
94
+
95
+ if args.names:
96
+ logger.warning(f"Output restricted to {args.names}")
@@ -12,7 +12,6 @@ KB = 1024
12
12
  MB = KB * KB
13
13
 
14
14
  file_size = lambda file_path: Path(file_path).stat().st_size
15
- file_size_str = lambda file_path: file_size_to_str(file_size(file_path))
16
15
 
17
16
  # Coerce methods handle both string and int arguments.
18
17
  coerce_file_name = lambda filename_or_id: coerce_file_stem(filename_or_id) + '.txt'
@@ -46,8 +45,12 @@ def extract_file_id(filename_or_id: int | str | Path) -> str:
46
45
  return file_match.group(1)
47
46
 
48
47
 
49
- def file_size_to_str(size: int) -> str:
50
- digits = 2
48
+ def file_size_str(file_path, digits: int | None = None):
49
+ return file_size_to_str(file_size(file_path), digits)
50
+
51
+
52
+ def file_size_to_str(size: int, digits: int | None = None) -> str:
53
+ _digits = 2
51
54
 
52
55
  if size > MB:
53
56
  size_num = float(size) / MB
@@ -55,10 +58,11 @@ def file_size_to_str(size: int) -> str:
55
58
  elif size > KB:
56
59
  size_num = float(size) / KB
57
60
  size_str = 'kb'
58
- digits = 1
61
+ _digits = 1
59
62
  else:
60
63
  return f"{size} b"
61
64
 
65
+ digits = _digits if digits is None else digits
62
66
  return f"{size_num:,.{digits}f} {size_str}"
63
67
 
64
68