epstein-files 1.0.13__py3-none-any.whl → 1.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,7 +22,6 @@ PUBLICIST = 'publicist'
22
22
  REPUTATION = 'reputation'
23
23
  SKYPE_LOG = 'Skype log'
24
24
  SOCIAL = 'social'
25
- SPEECH = 'speech'
26
25
 
27
26
  # Locations
28
27
  PALM_BEACH = 'Palm Beach'
@@ -35,6 +34,7 @@ CHINA_DAILY = "China Daily"
35
34
  DAILY_MAIL = 'Daily Mail'
36
35
  DAILY_TELEGRAPH = "Daily Telegraph"
37
36
  LA_TIMES = 'LA Times'
37
+ LEXIS_NEXIS = 'Lexis Nexis'
38
38
  MIAMI_HERALD = 'Miami Herald'
39
39
  NYT = "New York Times"
40
40
  PALM_BEACH_DAILY_NEWS = f'{PALM_BEACH} Daily News'
@@ -57,12 +57,10 @@ TIMESTAMP_DIM = f"turquoise4 dim"
57
57
  AUTHOR = 'author'
58
58
  DEFAULT = 'default'
59
59
  EVERYONE = 'everyone'
60
- FIRST_FEW_LINES = 'First Few Lines'
61
60
  HOUSE_OVERSIGHT_PREFIX = 'HOUSE_OVERSIGHT_'
62
61
  JSON = 'json'
63
62
  NA = 'n/a'
64
63
  REDACTED = '<REDACTED>'
65
- URL_SIGNIFIERS = ['gclid', 'htm', 'ref=', 'utm']
66
64
  QUESTION_MARKS = '(???)'
67
65
 
68
66
  # Regexes
@@ -49,7 +49,7 @@ DOC_LINK_BASE_URLS: dict[ExternalSite, str] = {
49
49
  }
50
50
 
51
51
 
52
- epsteinify_api_url = lambda file_id: f"{EPSTEINIFY_URL}/api/documents/HOUSE_OVERSIGHT_{file_id}"
52
+ epsteinify_api_url = lambda file_stem: f"{EPSTEINIFY_URL}/api/documents/{file_stem}"
53
53
  epsteinify_doc_link_markup = lambda filename_or_id, style = TEXT_LINK: external_doc_link_markup(EPSTEINIFY, filename_or_id, style)
54
54
  epsteinify_doc_link_txt = lambda filename_or_id, style = TEXT_LINK: Text.from_markup(external_doc_link_markup(filename_or_id, style))
55
55
  epsteinify_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[EPSTEINIFY], file_stem)
@@ -66,8 +66,6 @@ epstein_web_search_url = lambda s: f"{EPSTEIN_WEB_URL}/?ewmfileq={urllib.parse.q
66
66
 
67
67
  rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL], file_stem, 'title')
68
68
 
69
- search_archive_url = lambda txt: f"{COURIER_NEWSROOM_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
70
- search_coffeezilla_url = lambda txt: f"{COFFEEZILLA_ARCHIVE_URL}&q={urllib.parse.quote(txt)}&p=1"
71
69
  search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
72
70
  search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
73
71
 
@@ -103,8 +101,4 @@ def link_text_obj(url: str, link_text: str | None = None, style: str = ARCHIVE_L
103
101
  return Text.from_markup(link_markup(url, link_text, style))
104
102
 
105
103
 
106
- def search_coffeezilla_link(text: str, link_txt: str, style: str = ARCHIVE_LINK_COLOR) -> Text:
107
- return link_text_obj(search_coffeezilla_url(text), link_txt or text, style)
108
-
109
-
110
104
  CRYPTADAMUS_TWITTER = link_markup('https://x.com/cryptadamist', '@cryptadamist')
@@ -45,9 +45,9 @@ HEADER_ABBREVIATIONS = {
45
45
  }
46
46
 
47
47
 
48
- #######################
49
- # Emails Config Stuff #
50
- #######################
48
+ #########################
49
+ # Emailers Config Stuff #
50
+ #########################
51
51
 
52
52
  # Emailers
53
53
  EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
@@ -85,7 +85,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
85
85
  JAMES_HILL: re.compile(r"hill, james e.|james.e.hill@abc.com", re.IGNORECASE),
86
86
  JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?', re.IGNORECASE),
87
87
  JEFF_FULLER: re.compile(r"jeff@mc2mm.com|Jeff Fuller", re.IGNORECASE),
88
- JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeffrey E((sp|ps)tein?)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!(Mark L.|ard Jay) )Epstein', re.IGNORECASE),
88
+ JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeffrey E((sp|ps)tein?)?( VI Foundation)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!(Mark L.|ard Jay) )Epstein', re.IGNORECASE),
89
89
  JESSICA_CADWELL: re.compile(r'Jessica Cadwell?', re.IGNORECASE),
90
90
  JOHNNY_EL_HACHEM: re.compile(r'el hachem johnny|johnny el hachem', re.IGNORECASE),
91
91
  JOI_ITO: re.compile(r'ji@media.mit.?edu|(joichi|joi)( Ito)?', re.IGNORECASE),
@@ -208,74 +208,6 @@ for emailer in EMAILERS:
208
208
  EMAILER_REGEXES[emailer] = re.compile(emailer, re.IGNORECASE)
209
209
 
210
210
 
211
- ##########################
212
- # OtherFile config stuff #
213
- ##########################
214
-
215
- # strings
216
- FBI = 'FBI'
217
- MEME = 'meme of'
218
- PRESS_RELEASE = 'press release'
219
- RESUME_OF = 'professional resumé'
220
- SCREENSHOT = 'screenshot of'
221
- TRANSLATION = 'translation of'
222
- TWEET = 'tweet'
223
- TEXT_OF_US_LAW = 'text of U.S. law:'
224
-
225
- # Legal cases
226
- BRUNEL_V_EPSTEIN = f"{JEAN_LUC_BRUNEL} v. {JEFFREY_EPSTEIN} and Tyler McDonald d/b/a YI.org"
227
- EDWARDS_V_DERSHOWITZ = f"{BRAD_EDWARDS} & {PAUL_G_CASSELL} v. {ALAN_DERSHOWITZ}"
228
- EPSTEIN_V_ROTHSTEIN_EDWARDS = f"Epstein v. Scott Rothstein, {BRAD_EDWARDS}, and L.M."
229
- GIUFFRE_V_DERSHOWITZ = f"{VIRGINIA_GIUFFRE} v. {ALAN_DERSHOWITZ}"
230
- GIUFFRE_V_EPSTEIN = f"{VIRGINIA_GIUFFRE} v. {JEFFREY_EPSTEIN}"
231
- GIUFFRE_V_MAXWELL = f"{VIRGINIA_GIUFFRE} v. {GHISLAINE_MAXWELL}"
232
- JANE_DOE_V_EPSTEIN_TRUMP = f"Jane Doe v. Donald Trump and {JEFFREY_EPSTEIN}"
233
- JANE_DOE_V_USA = 'Jane Doe #1 and Jane Doe #2 v. United States'
234
- NEW_YORK_V_EPSTEIN = f"New York v. {JEFFREY_EPSTEIN}"
235
-
236
- # Descriptions of non-email, non-text message files
237
- ARTICLE_DRAFT = 'draft of an article about'
238
- BOFA_MERRILL = f'{BOFA} / Merrill Lynch Report'
239
- BOFA_WEALTH_MGMT = f'{BOFA} Wealth Management'
240
- BROCKMAN_INC = 'Brockman, Inc.'
241
- CVRA = "Crime Victims' Rights Act [CVRA]"
242
- CVRA_LEXIS_SEARCH = f"Lexis Nexis search for case law around the {CVRA}"
243
- DAVID_BLAINE_VISA_LETTER = f"letter of recommendation for visa for a model"
244
- DERSH_GIUFFRE_TWEET = f"{TWEET} about {VIRGINIA_GIUFFRE}"
245
- DEUTSCHE_BANK_TAX_TOPICS = f'{DEUTSCHE_BANK} Wealth Management Tax Topics'
246
- DIANA_DEGETTE_CAMPAIGN = "Colorado legislator Diana DeGette's campaign"
247
- EPSTEIN_FOUNDATION = 'Jeffrey Epstein VI Foundation'
248
- FBI_REPORT = f"report on Epstein investigation (redacted)"
249
- FBI_SEIZED_PROPERTY = f"seized property inventory (redacted)"
250
- FEMALE_HEALTH_COMPANY = 'Female Health Company (FHX)'
251
- FIRE_AND_FURY = f"Fire And Fury"
252
- HARVARD_POETRY = f'{HARVARD} poetry stuff from {LISA_NEW}'
253
- HBS_APPLICATION = f"{HARVARD} Business School application letter"
254
- JASTA = 'JASTA'
255
- JASTA_SAUDI_LAWSUIT = f"{JASTA} lawsuit against Saudi Arabia by 9/11 victims"
256
- JOHN_BOLTON_PRESS_CLIPPING = 'John Bolton press clipping'
257
- JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
258
- LAWRENCE_KRAUSS_ASU_ORIGINS = f"{LAWRENCE_KRAUSS}'s ASU Origins Project"
259
- KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
260
- MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
261
- NIGHT_FLIGHT_BOOK = f'"Night Flight" (draft)'
262
- NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
263
- OBAMA_JOKE = 'joke about Obama'
264
- PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} Code Enforcement'
265
- PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
266
- PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
267
- PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
268
- REAL_DEAL_ARTICLE = 'article by Keith Larsen'
269
- SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
270
- SINGLE_PAGE = 'single page of'
271
- STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
272
- SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
273
- TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
274
- UBS_CIO_REPORT = 'CIO Monthly Extended report'
275
- UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
276
- WOMEN_EMPOWERMENT = f"Women Empowerment (WE) conference"
277
- ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
278
-
279
211
  # Atribution reasons
280
212
  BOLOTOVA_REASON = 'Same signature style as 029020 ("--" followed by "Sincerely Renata Bolotova")'
281
213
  KATHY_REASON = 'from "Kathy" about dems, sent from iPad'
@@ -370,6 +302,8 @@ TEXTS_CONFIG = CONFIRMED_TEXTS_CONFIG + UNCONFIRMED_TEXTS_CONFIG
370
302
  ################################################ EMAILS ################################################
371
303
  ########################################################################################################
372
304
 
305
+ MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
306
+
373
307
  # Some emails have a lot of uninteresting CCs
374
308
  IRAN_DEAL_RECIPIENTS = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
375
309
  FLIGHT_IN_2012_PEOPLE = ['Francis Derby', 'Januiz Banasiak', 'Louella Rabuyo', 'Richard Barnnet']
@@ -649,9 +583,9 @@ EMAILS_CONFIG = [
649
583
  EmailCfg(id='032358', actual_text=REDACTED), # Completely redacted
650
584
  EmailCfg(id='033050', actual_text='schwartman'),
651
585
  EmailCfg(id='022219', description="discussion of attempts to clean up Epstein's Google search results"),
652
- EmailCfg(id='023627', is_fwded_article=True, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
653
586
  EmailCfg(id='031333', is_fwded_article=True, description='looks like a Russian disinfo article'), # Russia Says IMF Chief Jailed For Discovering All US Gold is Gone
654
587
  EmailCfg(id='031335', is_fwded_article=True, description='looks like a Russian disinfo article'), # DOMINQUE STRAUSS-KAHN ARRESTED, NOT BECAUSE HE RAPED A MAID, BUT BECAUSE HE HAD EVIDENCE US HAS NO GOLD IN FORT KNOX.
588
+ EmailCfg(id='023627', is_fwded_article=True, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
655
589
  EmailCfg(id='026298', is_fwded_article=True, duplicate_ids=['026499']), # Written by someone else?
656
590
  EmailCfg(id='029692', is_fwded_article=True, duplicate_ids=['029779']), # WaPo article
657
591
  EmailCfg(id='022344', is_fwded_article=True, duplicate_ids=['028529']), # Bill Gates is most admired from Nikolic
@@ -664,6 +598,7 @@ EMAILS_CONFIG = [
664
598
  EmailCfg(id='029849', is_fwded_article=True, duplicate_ids=['033482']), # Fareed Zakaria: Trump sells America short),
665
599
  EmailCfg(id='032023', is_fwded_article=True, duplicate_ids=['032012']), # American-Israeli Cooperative Enterprise Newsletter
666
600
  EmailCfg(id='021758', is_fwded_article=True, duplicate_ids=['030616']), # Radar Online article about Epstein's early prison release
601
+ EmailCfg(id='033297', is_fwded_article=True, duplicate_ids=['033586']), # Sultan Sulayem fwding article about Trump and Russia
667
602
  EmailCfg(id='031774', is_fwded_article=True), # Krassner fwd of Palmer Report article
668
603
  EmailCfg(id='033345', is_fwded_article=True), # Krassner fwd of Palmer Report article
669
604
  EmailCfg(id='029903', is_fwded_article=True), # Krassner fwd of Ann Coulter article about Epstein
@@ -715,7 +650,6 @@ EMAILS_CONFIG = [
715
650
  EmailCfg(id='031340', is_fwded_article=True), # Article about Alex Jones threatening Robert Mueller
716
651
  EmailCfg(id='030209', is_fwded_article=True), # Atlantic Council Syria: Blackberry Diplomacy
717
652
  EmailCfg(id='026605', is_fwded_article=True), # Article about Ruemmler turning down attorney general job by NEDRA PICKLER
718
- EmailCfg(id='033297', is_fwded_article=True, duplicate_ids=['033586']), # Sultan Sulayem fwding article about Trump and Russia
719
653
  EmailCfg(id='032475', timestamp=parse('2017-02-15 13:31:25')),
720
654
  EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
721
655
 
@@ -867,15 +801,73 @@ EMAILS_CONFIG = [
867
801
  ####################################### OTHER FILES ############################################
868
802
  ################################################################################################
869
803
 
804
+ # strings
805
+ FBI = 'FBI'
806
+ MEME = 'meme of'
807
+ PRESS_RELEASE = 'press release'
808
+ RESUME_OF = 'professional resumé'
809
+ SCREENSHOT = 'screenshot of'
810
+ TRANSLATION = 'translation of'
811
+ TWEET = 'tweet'
812
+
813
+ # Legal cases
814
+ BRUNEL_V_EPSTEIN = f"{JEAN_LUC_BRUNEL} v. {JEFFREY_EPSTEIN} and Tyler McDonald d/b/a YI.org"
815
+ EDWARDS_V_DERSHOWITZ = f"{BRAD_EDWARDS} & {PAUL_G_CASSELL} v. {ALAN_DERSHOWITZ}"
816
+ EPSTEIN_V_ROTHSTEIN_EDWARDS = f"Epstein v. Scott Rothstein, {BRAD_EDWARDS}, and L.M."
817
+ GIUFFRE_V_DERSHOWITZ = f"{VIRGINIA_GIUFFRE} v. {ALAN_DERSHOWITZ}"
818
+ GIUFFRE_V_EPSTEIN = f"{VIRGINIA_GIUFFRE} v. {JEFFREY_EPSTEIN}"
819
+ GIUFFRE_V_MAXWELL = f"{VIRGINIA_GIUFFRE} v. {GHISLAINE_MAXWELL}"
820
+ JANE_DOE_V_EPSTEIN_TRUMP = f"Jane Doe v. Donald Trump and {JEFFREY_EPSTEIN}"
821
+ JANE_DOE_V_USA = 'Jane Doe #1 and Jane Doe #2 v. United States'
822
+ NEW_YORK_V_EPSTEIN = f"New York v. {JEFFREY_EPSTEIN}"
823
+
824
+ # Descriptions of non-email, non-text message files
825
+ ARTICLE_DRAFT = 'draft of an article about'
826
+ BOFA_WEALTH_MGMT = f'{BOFA} Wealth Management'
827
+ BROCKMAN_INC = 'Brockman, Inc.'
828
+ CVRA = "Crime Victims' Rights Act [CVRA]"
829
+ DAVID_BLAINE_VISA_LETTER = f"letter of recommendation for visa for a model"
830
+ DERSH_GIUFFRE_TWEET = f"{TWEET} about {VIRGINIA_GIUFFRE}"
831
+ DEUTSCHE_BANK_TAX_TOPICS = f'{DEUTSCHE_BANK} Wealth Management Tax Topics'
832
+ DIANA_DEGETTE_CAMPAIGN = "Colorado legislator Diana DeGette's campaign"
833
+ FBI_REPORT = f"report on Epstein investigation (redacted)"
834
+ FBI_SEIZED_PROPERTY = f"seized property inventory (redacted)"
835
+ FEMALE_HEALTH_COMPANY = 'Female Health Company (FHX)'
836
+ FIRE_AND_FURY = f"Fire And Fury"
837
+ HARVARD_POETRY = f'{HARVARD} poetry stuff from {LISA_NEW}'
838
+ HBS_APPLICATION = f"{HARVARD} Business School application letter"
839
+ JASTA = 'JASTA'
840
+ JASTA_SAUDI_LAWSUIT = f"{JASTA} lawsuit against Saudi Arabia by 9/11 victims"
841
+ JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
842
+ LAWRENCE_KRAUSS_ASU_ORIGINS = f"{LAWRENCE_KRAUSS}'s ASU Origins Project"
843
+ KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
844
+ LEXIS_NEXIS_CVRA_SEARCH = f"{LEXIS_NEXIS} search for case law around the {CVRA}"
845
+ NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
846
+ OBAMA_JOKE = 'joke about Obama'
847
+ PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} Code Enforcement'
848
+ PALM_BEACH_PROPERTY_INFO = f"{PALM_BEACH} property info"
849
+ PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
850
+ PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
851
+ PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
852
+ REAL_DEAL_ARTICLE = 'article by Keith Larsen'
853
+ SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
854
+ STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
855
+ SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
856
+ TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
857
+ UBS_CIO_REPORT = 'CIO Monthly Extended report'
858
+ UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
859
+ WOMEN_EMPOWERMENT = f"Women Empowerment (WE) conference"
860
+ ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
861
+
862
+
870
863
  OTHER_FILES_BOOKS = [
871
- DocCfg(id='017088', author=ALAN_DERSHOWITZ, description=f'"Taking the Stand: My Life in the Law" (draft)'),
864
+ DocCfg(id='017088', author=ALAN_DERSHOWITZ, description=f'"Taking the Stand: My Life in the Law" (draft)'),
872
865
  DocCfg(id='013501', author='Arnold J. Mandell', description=f'The Nearness Of Grace: A Personal Science Of Spiritual Transformation', date='2005-01-01'),
873
866
  DocCfg(id='012899', author='Ben Goertzel', description=f'Engineering General Intelligence: A Path to Advanced AGI Via Embodied Learning and Cognitive Synergy'),
874
867
  DocCfg(id='018438', author='Clarisse Thorn', description=f'The S&M Feminist'),
875
868
  DocCfg(id='019477', author=EDWARD_JAY_EPSTEIN, description=f'How America Lost Its Secrets: Edward Snowden, the Man, and the Theft'),
876
869
  DocCfg(id='020153', author=EDWARD_JAY_EPSTEIN, description=f'The Snowden Affair: A Spy Story In Six Parts'),
877
- DocCfg(id='011472', author=EHUD_BARAK, description=NIGHT_FLIGHT_BOOK),
878
- DocCfg(id='027849', author=EHUD_BARAK, description=NIGHT_FLIGHT_BOOK),
870
+ DocCfg(id='011472', author=EHUD_BARAK, description=f'"Night Flight" (draft)', date='2006-07-12', duplicate_ids=['027849']), # date from _extract_timestamp()
879
871
  DocCfg(id='010912', author=GORDON_GETTY, description=f'"Free Growth and Other Surprises" (draft)', date='2018-10-18'),
880
872
  DocCfg(id='010477', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
881
873
  DocCfg(id='010486', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
@@ -899,9 +891,6 @@ OTHER_FILES_BOOKS = [
899
891
  ]
900
892
 
901
893
  OTHER_FILES_ARTICLES = [
902
- DocCfg(id='030258', author=ALAN_DERSHOWITZ, description=f'{ARTICLE_DRAFT} Mueller probe, almost same as 030248'),
903
- DocCfg(id='030248', author=ALAN_DERSHOWITZ, description=f'{ARTICLE_DRAFT} Mueller probe, almost same as 030258'),
904
- DocCfg(id='029165', author=ALAN_DERSHOWITZ, description=f'{ARTICLE_DRAFT} Mueller probe, almost same as 030258'),
905
894
  DocCfg(id='030013', author=f'Aviation International News', description=f'article', date='2012-07-01'),
906
895
  DocCfg(id='013275', author=BLOOMBERG, description=f"article on notable 2013 obituaries", date='2013-12-26'),
907
896
  DocCfg(id='026543', author=BLOOMBERG, description=f"BNA article about taxes"),
@@ -943,6 +932,7 @@ OTHER_FILES_ARTICLES = [
943
932
  DocCfg(id='022952', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
944
933
  DocCfg(id='024229', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
945
934
  DocCfg(id='031198', author='Morning News USA', description=f"article about identify of Jane Doe in {JANE_DOE_V_EPSTEIN_TRUMP}"),
935
+ DocCfg(id='015462', author='Nautilus Education', description=f'magazine (?) issue'),
946
936
  DocCfg(id='031972', author=NYT, description=f"article about #MeToo allegations against {LAWRENCE_KRAUSS}", date='2018-03-07'),
947
937
  DocCfg(id='032435', author=NYT, description=f'article about Chinese butlers'),
948
938
  DocCfg(id='029452', author=NYT, description=f"article about {PETER_THIEL}"),
@@ -1006,7 +996,7 @@ OTHER_FILES_ARTICLES = [
1006
996
  DocCfg(id='024997', author=SHIMON_POST, description=SHIMON_POST_ARTICLE, date='2011-09-08'),
1007
997
  DocCfg(id='031941', author=SHIMON_POST, description=SHIMON_POST_ARTICLE, date='2011-11-17'),
1008
998
  DocCfg(id='030829', author=f'South Florida Sun Sentinel', description=f'article about {BRAD_EDWARDS} and {JEFFREY_EPSTEIN}'),
1009
- DocCfg(id='021092', author='Tatler', description=f'{SINGLE_PAGE} of article about {GHISLAINE_MAXWELL} shredding documents', date='2019-08-15'),
999
+ DocCfg(id='021092', author='Tatler', description=f'single page of article about {GHISLAINE_MAXWELL} shredding documents', date='2019-08-15'),
1010
1000
  DocCfg(id='030333', author=f'The Independent', description=f'article about Prince Andrew, Epstein, and Epstein\'s butler who stole his address book'),
1011
1001
  DocCfg(id='010754', author=f'U.S. News', description=f"article about Yitzhak Rabin"),
1012
1002
  DocCfg(id='014498', author=VI_DAILY_NEWS, description='article', date='2016-12-13'),
@@ -1029,14 +1019,12 @@ OTHER_FILES_ARTICLES = [
1029
1019
  DocCfg(id='031725', description=f"article about Gloria Allred and Trump allegations", date='2016-10-10'),
1030
1020
  DocCfg(id='026648', description=f'article about {JASTA} lawsuit against Saudi Arabia by 9/11 victims (Russian propaganda?)', date='2017-05-13'),
1031
1021
  DocCfg(id='032159', description=f"article about microfinance and cell phones in Zimbabwe, Strive Masiyiwa (Econet Wireless)"),
1032
- DocCfg(id='033468', description=f'{ARTICLE_DRAFT} Rod Rosenstein', date='2018-09-24'),
1033
1022
  DocCfg(id='030825', description=f'{ARTICLE_DRAFT} Syria'),
1034
1023
  DocCfg(id='027051', description=f"German language article about the 2013 Lifeball / AIDS Gala", date='2013-01-01'),
1035
- DocCfg(id='033480', description=f"{JOHN_BOLTON_PRESS_CLIPPING}", date='2018-04-06', duplicate_ids=['033481']),
1036
- DocCfg(id='013403', description=f"Lexis Nexis result from The Evening Standard about Bernie Madoff", date='2009-12-24'),
1037
- DocCfg(id='015462', description=f'Nautilus Education magazine (?) issue'),
1024
+ DocCfg(id='033480', description=f"John Bolton press clipping", date='2018-04-06', duplicate_ids=['033481']),
1025
+ DocCfg(id='013403', description=f"{LEXIS_NEXIS} result from The Evening Standard about Bernie Madoff", date='2009-12-24'),
1038
1026
  DocCfg(id='021093', description=f"page of unknown article about Epstein and Maxwell"),
1039
- DocCfg(id='031191', description=f"{SINGLE_PAGE} unknown article about Epstein and Trump's relationship in 1997"),
1027
+ DocCfg(id='031191', description=f"single page of unknown article about Epstein and Trump's relationship in 1997"),
1040
1028
  DocCfg(id='026520', description=f'Spanish language article about {SULTAN_BIN_SULAYEM}', date='2013-09-27'),
1041
1029
  DocCfg(
1042
1030
  id='031736',
@@ -1044,16 +1032,16 @@ OTHER_FILES_ARTICLES = [
1044
1032
  date='2017-05-13',
1045
1033
  ),
1046
1034
  DocCfg(id='025094', description=f'{TRANSLATION} Spanish article about Cuba', date='2015-11-08'),
1047
- DocCfg(id='031794', description=f"very short French magazine clipping", is_interesting=False),
1035
+ DocCfg(id='031794', description=f"very short French magazine clipping"),
1048
1036
  ]
1049
1037
 
1050
1038
  OTHER_FILES_LEGAL = [
1051
1039
  DocCfg(id='017789', author=ALAN_DERSHOWITZ, description=f'letter to {HARVARD} Crimson complaining he was defamed'),
1052
1040
  DocCfg(id='011908', author=BRUNEL_V_EPSTEIN, description=f"court filing"),
1053
- DocCfg(id='017603', author=DAVID_SCHOEN, description=CVRA_LEXIS_SEARCH, date='2019-02-28'),
1054
- DocCfg(id='017635', author=DAVID_SCHOEN, description=CVRA_LEXIS_SEARCH, date='2019-02-28'),
1055
- DocCfg(id='016509', author=DAVID_SCHOEN, description=CVRA_LEXIS_SEARCH, date='2019-02-28'),
1056
- DocCfg(id='017714', author=DAVID_SCHOEN, description=CVRA_LEXIS_SEARCH, date='2019-02-28'),
1041
+ DocCfg(id='017603', author=DAVID_SCHOEN, description=LEXIS_NEXIS_CVRA_SEARCH, date='2019-02-28'),
1042
+ DocCfg(id='017635', author=DAVID_SCHOEN, description=LEXIS_NEXIS_CVRA_SEARCH, date='2019-02-28'),
1043
+ DocCfg(id='016509', author=DAVID_SCHOEN, description=LEXIS_NEXIS_CVRA_SEARCH, date='2019-02-28'),
1044
+ DocCfg(id='017714', author=DAVID_SCHOEN, description=LEXIS_NEXIS_CVRA_SEARCH, date='2019-02-28'),
1057
1045
  DocCfg(id='021824', author=EDWARDS_V_DERSHOWITZ, description=f"deposition of {PAUL_G_CASSELL}"),
1058
1046
  DocCfg(
1059
1047
  id='010757',
@@ -1187,21 +1175,21 @@ OTHER_FILES_LEGAL = [
1187
1175
  date='2019-01-11',
1188
1176
  ),
1189
1177
  DocCfg(id='028540', author='SCOTUS', description=f"decision in Budha Ismail Jam et al. v. INTERNATIONAL FINANCE CORP"),
1190
- DocCfg(id='012197', author='SDFL', description=f"Response to {JAY_LEFKOWITZ} on Epstein Plea Agreement Compliance"),
1191
- DocCfg(id='022277', description=f"{TEXT_OF_US_LAW} National Labour Relations Board (NLRB)"),
1178
+ DocCfg(id='012197', author='SDFL', description=f"response to {JAY_LEFKOWITZ} on Epstein Plea Agreement Compliance"),
1179
+ DocCfg(id='022277', description=f"text of National Labour Relations Board (NLRB) law", is_interesting=False),
1192
1180
  ]
1193
1181
 
1194
1182
  OTHER_FILES_CONFERENCES = [
1195
- DocCfg(id='014315', author=BOFA_MERRILL, description=f'2016 Future of Financials Conference, attached to 014312'),
1183
+ DocCfg(id='014315', author=BOFA_MERRILL, description=f'2016 Future of Financials Conference', attached_to_email_id='014312'),
1196
1184
  DocCfg(id='026825', author=DEUTSCHE_BANK, description=f"Asset & Wealth Management featured speaker bios"), # Really "Deutsche Asset" which may not be Deutsche Bank?
1197
1185
  DocCfg(id='023123', author=LAWRENCE_KRAUSS_ASU_ORIGINS, description=f"{STRANGE_BEDFELLOWS} (old draft)"),
1198
1186
  DocCfg(id='023120', author=LAWRENCE_KRAUSS_ASU_ORIGINS, description=STRANGE_BEDFELLOWS, duplicate_ids=['023121'], dupe_type='earlier'),
1199
- DocCfg(id='031359', author=NOBEL_CHARITABLE_TRUST, description=f"Earth Environment Convention about ESG investing"),
1187
+ DocCfg(id='031359', author=NOBEL_CHARITABLE_TRUST, description=f'"Earth Environment Convention" about ESG investing'),
1200
1188
  DocCfg(id='031354', author=NOBEL_CHARITABLE_TRUST, description=f'"Thinking About the Environment and Technology" report 2011'),
1201
1189
  DocCfg(id='019300', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} f. {KATHRYN_RUEMMLER}', date='2019-04-05'),
1202
1190
  DocCfg(id='022267', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} founder essay about growing the seminar business'),
1203
1191
  DocCfg(id='022407', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} seminar pitch deck'),
1204
- DocCfg(id='017524', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2012 program emailed to epstein BY {BARBRO_C_EHNBOM} in 031226", date='2012-08-18'),
1192
+ DocCfg(id='017524', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2012 program", date='2012-08-18', attached_to_email_id='031226'),
1205
1193
  DocCfg(id='026747', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2017 program", date='2017-08-23'),
1206
1194
  DocCfg(id='014951', author='TED Talks', description=f"2017 program", date='2017-04-20'),
1207
1195
  DocCfg(id='024179', author=UN_GENERAL_ASSEMBLY, description=f'president and first lady schedule', date='2012-09-21'),
@@ -1210,10 +1198,20 @@ OTHER_FILES_CONFERENCES = [
1210
1198
  author=UN_GENERAL_ASSEMBLY,
1211
1199
  description=f'schedule including "Presidents Private Dinner - Jeffrey Epstine (sic)"',
1212
1200
  date='2012-09-21',
1201
+ is_interesting=True,
1213
1202
  ),
1214
1203
  DocCfg(id='017526', description=f'Intellectual Jazz conference brochure f. {DAVID_BLAINE}'),
1215
- DocCfg(id='029427', description=f"seems related to an IRL meeting about concerns China will attempt to absorb Mongolia"),
1216
- DocCfg(id='025797', description=f'someone\'s notes from Aspen Strategy Group', date='2013-05-29'),
1204
+ DocCfg(
1205
+ id='029427',
1206
+ description=f"seems related to an IRL meeting about concerns China will attempt to absorb Mongolia",
1207
+ is_interesting=True,
1208
+ ),
1209
+ DocCfg(
1210
+ id='025797',
1211
+ date='2013-05-29',
1212
+ description=f"someone's notes from Aspen Strategy Group",
1213
+ is_interesting=True,
1214
+ ),
1217
1215
  DocCfg(
1218
1216
  id='017060',
1219
1217
  description=f'World Economic Forum (WEF) Annual Meeting 2011 List of Participants',
@@ -1264,7 +1262,12 @@ OTHER_FILES_FINANCE = [
1264
1262
  DocCfg(id='026944', author=GOLDMAN_INVESTMENT_MGMT, description=f"Risk of a US-Iran Military Conflict", date='2019-05-23'),
1265
1263
  DocCfg(id='018804', author='Integra Realty Resources', description=f"appraisal of going concern for IGY American Yacht Harbor Marina in {VIRGIN_ISLANDS}"),
1266
1264
  DocCfg(id='026679', author='Invesco', description=f"Global Sovereign Asset Management Study 2017"),
1267
- DocCfg(id='033220', author='Joseph G. Carson', description=f"short economic report on defense spending under Trump"),
1265
+ DocCfg(
1266
+ id='033220',
1267
+ author='Joseph G. Carson',
1268
+ description=f"short economic report on defense spending under Trump",
1269
+ is_interesting=True,
1270
+ ),
1268
1271
  DocCfg(id='026572', author=JP_MORGAN, description=f"Global Asset Allocation report", date='2012-11-09'),
1269
1272
  DocCfg(id='030848', author=JP_MORGAN, description=f"Global Asset Allocation report", date='2013-03-28'),
1270
1273
  DocCfg(id='030840', author=JP_MORGAN, description=f"Market Thoughts"),
@@ -1284,7 +1287,7 @@ OTHER_FILES_FINANCE = [
1284
1287
  DocCfg(id='025763', author='S&P', description=f"Economic Research: How Increasing Income Inequality Is Dampening U.S. Growth", date='2014-08-05'),
1285
1288
  DocCfg(id='024135', author=UBS, description=UBS_CIO_REPORT, date='2012-06-29'),
1286
1289
  DocCfg(id='025247', author=UBS, description=UBS_CIO_REPORT, date='2012-10-25'),
1287
- DocCfg(id='026584', description=f"article about tax implications of disregarded entities", date='2009-07-01'),
1290
+ DocCfg(id='026584', description=f"article about tax implications of disregarded entities", date='2009-07-01', is_interesting=True),
1288
1291
  DocCfg(
1289
1292
  id='024271',
1290
1293
  description=f"Blockchain Capital and Brock Pierce pitch deck",
@@ -1345,8 +1348,8 @@ OTHER_FILES_PROPERTY = [
1345
1348
  DocCfg(id='029520', author=THE_REAL_DEAL, description=f"{REAL_DEAL_ARTICLE} 'Lost Paradise at the Palm House'", date='2019-06-17'),
1346
1349
  DocCfg(id='016597', author='Trump Properties LLC', description=f'appeal of some decision about Mar-a-Lago by {PALM_BEACH} authorities'),
1347
1350
  DocCfg(id='018743', description=f"Las Vegas property listing"),
1348
- DocCfg(id='016695', description=f"{PALM_BEACH} property info (?)"),
1349
- DocCfg(id='016697', description=f"{PALM_BEACH} property tax info (?) that mentions Trump"),
1351
+ DocCfg(id='016695', description=f"{PALM_BEACH_PROPERTY_INFO} (?)"),
1352
+ DocCfg(id='016697', description=f"{PALM_BEACH_PROPERTY_INFO} (?) that mentions Trump"),
1350
1353
  DocCfg(id='016599', description=f"{PALM_BEACH_TSV} consumption (water?)"),
1351
1354
  DocCfg(id='016600', description=f"{PALM_BEACH_TSV} consumption (water?)"),
1352
1355
  DocCfg(id='016601', description=f"{PALM_BEACH_TSV} consumption (water?)"),
@@ -1389,15 +1392,20 @@ OTHER_FILES_SOCIAL = [
1389
1392
  ]
1390
1393
 
1391
1394
  OTHER_FILES_POLITICS = [
1395
+ DocCfg(id='030258', author=ALAN_DERSHOWITZ, description=f'{ARTICLE_DRAFT} Mueller probe, almost same as 030248'),
1396
+ DocCfg(id='030248', author=ALAN_DERSHOWITZ, description=f'{ARTICLE_DRAFT} Mueller probe, almost same as 030258'),
1397
+ DocCfg(id='029165', author=ALAN_DERSHOWITZ, description=f'{ARTICLE_DRAFT} Mueller probe, almost same as 030258'),
1392
1398
  DocCfg(id='029918', author=DIANA_DEGETTE_CAMPAIGN, description=f"bio", date='2012-09-27'),
1393
1399
  DocCfg(id='031184', author=DIANA_DEGETTE_CAMPAIGN, description=f"invitation to fundraiser hosted by {BARBRO_C_EHNBOM}", date='2012-09-27'),
1394
1400
  DocCfg(id='026248', author='Don McGahn', description=f'letter from Trump lawyer to Devin Nunes (R-CA) about FISA courts and Trump'),
1401
+ DocCfg(id='027009', author=EHUD_BARAK, description=f"speech to AIPAC", date='2013-03-03'),
1395
1402
  DocCfg(
1396
1403
  id='019233',
1397
1404
  author='Freedom House',
1398
1405
  description=f"'Breaking Down Democracy: Goals, Strategies, and Methods of Modern Authoritarians'",
1399
1406
  date='2017-06-02',
1400
1407
  ),
1408
+ DocCfg(id='026856', author='Kevin Rudd', description=f'speech "Xi Jinping, China And The Global Order"', date='2018-06-26'),
1401
1409
  DocCfg(id='026827', author='Scowcroft Group', description=f'report on ISIS', date='2015-11-14'),
1402
1410
  DocCfg(id='024294', author=STACEY_PLASKETT, description=f"campaign flier", date='2016-10-01'),
1403
1411
  DocCfg(
@@ -1406,6 +1414,7 @@ OTHER_FILES_POLITICS = [
1406
1414
  description=f'The Search for Peace in the Arab-Israeli Conflict',
1407
1415
  date='2019-12-09',
1408
1416
  ),
1417
+ DocCfg(id='033468', description=f'{ARTICLE_DRAFT} Rod Rosenstein', date='2018-09-24'),
1409
1418
  DocCfg(
1410
1419
  id='025849',
1411
1420
  author='US Office of Government Information Services',
@@ -1418,8 +1427,8 @@ OTHER_FILES_POLITICS = [
1418
1427
  date='2015-01-15', # TODO: this is just a guess
1419
1428
  duplicate_ids=['028887'],
1420
1429
  ),
1421
- DocCfg(id='010617', description=TRUMP_DISCLOSURES, date='2017-01-20'),
1422
- DocCfg(id='016699', description=TRUMP_DISCLOSURES, date='2017-01-20'),
1430
+ DocCfg(id='010617', description=TRUMP_DISCLOSURES, date='2017-01-20', is_interesting=True),
1431
+ DocCfg(id='016699', description=TRUMP_DISCLOSURES, date='2017-01-20', is_interesting=True),
1423
1432
  ]
1424
1433
 
1425
1434
  OTHER_FILES_ACADEMIA = [
@@ -1431,6 +1440,7 @@ OTHER_FILES_ACADEMIA = [
1431
1440
  description=f'report: "Challenges of AI: Envisioning and Addressing Adverse Outcomes"',
1432
1441
  duplicate_ids=['011284']
1433
1442
  ),
1443
+ DocCfg(id='026731', author='Lord Martin Rees', description=f"speech at first inaugural Cornell Carl Sagan Lecture"),
1434
1444
  DocCfg(id='015501', author=f"{MOSHE_HOFFMAN}, Erez Yoeli, and Carlos David Navarrete", description=f"Game Theory and Morality"),
1435
1445
  DocCfg(
1436
1446
  id='026521',
@@ -1458,13 +1468,13 @@ OTHER_FILES_ACADEMIA = [
1458
1468
  DocCfg(id='029592', description=HARVARD_POETRY),
1459
1469
  DocCfg(id='019396', description=f'{HARVARD} Economics 1545 Professor Kenneth Rogoff syllabus'),
1460
1470
  DocCfg(id='022445', description=f"Inference: International Review of Science Feedback & Comments", date='2018-11-01'),
1461
- DocCfg(id='029355', description=f'{SCREENSHOT} quote in book about {LARRY_SUMMERS}', duplicate_ids=['029356'], dupe_type='quoted'), # 029356 is zoomed in corner
1462
- ]
1463
-
1464
- OTHER_FILES_SPEECH = [
1465
- DocCfg(id='027009', author=EHUD_BARAK, description=f"speech to AIPAC", date='2013-03-03'),
1466
- DocCfg(id='026856', author='Kevin Rudd', description=f"speech 'Xi Jinping, China And The Global Order'", date='2018-06-26'),
1467
- DocCfg(id='026731', author='Lord Martin Rees', description=f"speech at first inaugural Cornell Carl Sagan Lecture"),
1471
+ DocCfg(
1472
+ id='029355',
1473
+ description=f'{SCREENSHOT} quote in book about {LARRY_SUMMERS}',
1474
+ duplicate_ids=['029356'], # 029356 is zoomed in corner
1475
+ dupe_type='quoted',
1476
+ is_interesting=False,
1477
+ ),
1468
1478
  ]
1469
1479
 
1470
1480
  # resumes and application letters
@@ -1525,7 +1535,7 @@ OTHER_FILES_MISC = [
1525
1535
  DocCfg(id='032735', author=GORDON_GETTY, description=f"on Trump", date='2018-03-20'), # Dated based on concurrent emails from Getty
1526
1536
  DocCfg(id='025540', author=JEFFREY_EPSTEIN, description=f"rough draft of Epstein's side of the story?"),
1527
1537
  DocCfg(id='026634', author='Michael Carrier', description=f"comments about an Apollo linked hedge fund 'DE Fund VIII'"),
1528
- DocCfg(id='031425', author=SCOTT_J_LINK, description=f'completely redacted email from'),
1538
+ DocCfg(id='031425', author=SCOTT_J_LINK, description=f'completely redacted email from', is_interesting=False),
1529
1539
  DocCfg(id='020447', author='Working Group on Chinese Influence Activities in the U.S.', description=f'Promoting Constructive Vigilance'),
1530
1540
  DocCfg(id='031743', description=f'a few pages describing the internet as a "New Nation State" (Network State?)'),
1531
1541
  DocCfg(id='012718', description=f"{CVRA} congressional record", date='2011-06-17'),
@@ -1542,7 +1552,7 @@ OTHER_FILES_MISC = [
1542
1552
  date='2000-06-07',
1543
1553
  description=f"{PRESS_RELEASE} announcing Donald Trump & {NICHOLAS_RIBIS} ended their working relationship at Trump's casino",
1544
1554
  ),
1545
- DocCfg(id='029328', description=f"Rafanelli Events promotional deck"),
1555
+ DocCfg(id='029328', description=f"Rafanelli Events promotional deck", is_interesting=False),
1546
1556
  DocCfg(id='033434', description=f"{SCREENSHOT} iPhone chat labeled 'Edwards' at the top"),
1547
1557
  DocCfg(id='029475', description=f'{VIRGIN_ISLANDS} Twin City Mobile Integrated Health Services (TCMIH) proposal/request for donation'),
1548
1558
  DocCfg(id='029448', description=f"weird short essay titled 'President Obama and Self-Deception'"),
@@ -1577,7 +1587,6 @@ OTHER_FILES_CATEGORIES = [
1577
1587
  REPUTATION,
1578
1588
  'RESUMES',
1579
1589
  SOCIAL,
1580
- SPEECH,
1581
1590
  ]
1582
1591
 
1583
1592
  OTHER_FILES_CONFIG = []
@@ -1602,6 +1611,9 @@ for cfg in ALL_CONFIGS:
1602
1611
 
1603
1612
  # Add extra config objects for duplicate files that match the config of file they are duplicating
1604
1613
  for dupe_cfg in cfg.duplicate_cfgs():
1614
+ if not isinstance(dupe_cfg, EmailCfg):
1615
+ logger.debug(f"Generated synthetic config for dupe: {dupe_cfg}")
1616
+
1605
1617
  ALL_FILE_CONFIGS[dupe_cfg.id] = dupe_cfg
1606
1618
 
1607
1619
 
@@ -26,6 +26,8 @@ date_str = lambda dt: dt.isoformat()[0:10] if dt else None
26
26
  escape_double_quotes = lambda text: text.replace('"', r'\"')
27
27
  escape_single_quotes = lambda text: text.replace("'", r"\'")
28
28
  iso_timestamp = lambda dt: dt.isoformat().replace('T', ' ')
29
+ days_between = lambda dt1, dt2: (dt2 - dt1).days + 1
30
+ days_between_str = lambda dt1, dt2: f"{days_between(dt1, dt2)} day" + ('s' if days_between(dt1, dt2) > 1 else '')
29
31
  remove_zero_time_from_timestamp_str = lambda dt: dt.isoformat().removesuffix('T00:00:00')
30
32
  uniquify = lambda _list: list(set(_list))
31
33
  without_falsey = lambda _list: [e for e in _list if e]
@@ -36,7 +36,7 @@ FIELD_SORT_KEY = {
36
36
  }
37
37
 
38
38
  FINANCIAL_REPORTS_AUTHORS = [
39
- BOFA,
39
+ BOFA_MERRILL,
40
40
  DEUTSCHE_BANK,
41
41
  ELECTRON_CAPITAL_PARTNERS,
42
42
  GOLDMAN_INVESTMENT_MGMT,
@@ -65,22 +65,23 @@ class DocCfg:
65
65
  author (str | None): Author of the document (if any)
66
66
  category (str | None): Type of file
67
67
  date (str | None): If passed will be immediated parsed into the 'timestamp' field
68
- dupe_of_id (str | None): If this is a dupe the ID of the duplicated file. This file will be suppressed
69
68
  dupe_type (DuplicateType | None): The type of duplicate this file is or its 'duplicate_ids' are
70
69
  duplicate_ids (list[str]): IDs of *other* documents that are dupes of this document
71
- is_interesting (bool): Override other considerations and always consider this file interesting
70
+ duplicate_of_id (str | None): If this is a dupe the ID of the duplicated file. This file will be suppressed
71
+ is_interesting (bool | None): Override other considerations and always consider this file interesting (or not)
72
72
  timestamp (datetime | None): Time this email was sent, file was created, article published, etc.
73
73
  is_synthetic (bool): True if this config was generated by the duplicate_cfgs() method
74
74
  """
75
75
  id: str
76
+ attached_to_email_id: str | None = None
76
77
  author: str | None = None
77
78
  category: str | None = None
78
79
  date: str | None = None
79
80
  description: str | None = None
80
- dupe_of_id: str | None = None
81
81
  dupe_type: DuplicateType | None = None
82
82
  duplicate_ids: list[str] = field(default_factory=list)
83
- is_interesting: bool = False
83
+ duplicate_of_id: str | None = None
84
+ is_interesting: bool | None = None
84
85
  is_synthetic: bool = False
85
86
  timestamp: datetime | None = None
86
87
 
@@ -88,7 +89,7 @@ class DocCfg:
88
89
  if self.date:
89
90
  self.timestamp = parse(self.date)
90
91
 
91
- if self.dupe_of_id or self.duplicate_ids:
92
+ if self.duplicate_of_id or self.duplicate_ids:
92
93
  self.dupe_type = self.dupe_type or SAME
93
94
 
94
95
  def complete_description(self) -> str | None:
@@ -102,10 +103,10 @@ class DocCfg:
102
103
  return f"{msg} {self.description}" if self.description else msg
103
104
  elif self.author and self.description:
104
105
  if self.category in [ACADEMIA, BOOK]:
105
- title = self.description if '"' in self.description else f"'{self.description}'"
106
+ title = self.description if '"' in self.description else f'"{self.description}"'
106
107
  return f"{title} by {self.author}"
107
108
  elif self.category == FINANCE and self.author in FINANCIAL_REPORTS_AUTHORS:
108
- return f"{self.author} report: '{self.description}'"
109
+ return f'{self.author} report: "{self.description}"'
109
110
  elif self.category == LEGAL and 'v.' in self.author:
110
111
  return f"{self.author}: {self.description}"
111
112
  elif self.category and self.author is None and self.description is None:
@@ -115,11 +116,11 @@ class DocCfg:
115
116
  return ' '.join(pieces) if pieces else None
116
117
 
117
118
  def duplicate_cfgs(self) -> Generator['DocCfg', None, None]:
118
- """Create synthetic DocCfg objects that set the 'dupe_of_id' field to point back to this object."""
119
+ """Create synthetic DocCfg objects that set the 'duplicate_of_id' field to point back to this object."""
119
120
  for id in self.duplicate_ids:
120
121
  dupe_cfg = deepcopy(self)
121
122
  dupe_cfg.id = id
122
- dupe_cfg.dupe_of_id = self.id
123
+ dupe_cfg.duplicate_of_id = self.id
123
124
  dupe_cfg.duplicate_ids = []
124
125
  dupe_cfg.dupe_type = self.dupe_type
125
126
  dupe_cfg.is_synthetic = True