epstein-files 1.0.10__py3-none-any.whl → 1.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +7 -9
- epstein_files/documents/communication.py +2 -2
- epstein_files/documents/document.py +94 -81
- epstein_files/documents/email.py +47 -5
- epstein_files/documents/imessage/text_message.py +4 -13
- epstein_files/documents/json_file.py +13 -1
- epstein_files/documents/messenger_log.py +32 -19
- epstein_files/documents/other_file.py +67 -44
- epstein_files/epstein_files.py +22 -15
- epstein_files/util/constant/names.py +11 -10
- epstein_files/util/constant/strings.py +2 -1
- epstein_files/util/constants.py +98 -88
- epstein_files/util/data.py +1 -1
- epstein_files/util/doc_cfg.py +32 -62
- epstein_files/util/env.py +29 -17
- epstein_files/util/file_helper.py +12 -29
- epstein_files/util/highlighted_group.py +34 -17
- epstein_files/util/logging.py +1 -7
- epstein_files/util/output.py +13 -8
- epstein_files/util/rich.py +15 -10
- epstein_files/util/word_count.py +65 -5
- {epstein_files-1.0.10.dist-info → epstein_files-1.0.12.dist-info}/METADATA +1 -1
- epstein_files-1.0.12.dist-info/RECORD +33 -0
- epstein_files/count_words.py +0 -72
- epstein_files-1.0.10.dist-info/RECORD +0 -34
- {epstein_files-1.0.10.dist-info → epstein_files-1.0.12.dist-info}/LICENSE +0 -0
- {epstein_files-1.0.10.dist-info → epstein_files-1.0.12.dist-info}/WHEEL +0 -0
- {epstein_files-1.0.10.dist-info → epstein_files-1.0.12.dist-info}/entry_points.txt +0 -0
epstein_files/util/constants.py
CHANGED
|
@@ -56,7 +56,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
56
56
|
AMANDA_ENS: re.compile(r'ens, amanda?|Amanda.Ens', re.IGNORECASE),
|
|
57
57
|
ANAS_ALRASHEED: re.compile(r'anas\s*al\s*rashee[cd]', re.IGNORECASE),
|
|
58
58
|
ANIL_AMBANI: re.compile(r'Anil.Ambani', re.IGNORECASE),
|
|
59
|
-
ANN_MARIE_VILLAFANA: re.compile(r'Villafana, Ann Marie|(A(\.|nn) Marie )?Villafa(n|ri)a', re.IGNORECASE),
|
|
59
|
+
ANN_MARIE_VILLAFANA: re.compile(r'Villafana, Ann Marie|(A(\.|nn) Marie )?Villafa(c|n|ri)a', re.IGNORECASE),
|
|
60
60
|
ANTHONY_SCARAMUCCI: re.compile(r"mooch|(Anthony ('The Mooch' )?)?Scaramucci", re.IGNORECASE),
|
|
61
61
|
ARIANE_DE_ROTHSCHILD: re.compile(r'AdeR|((Ariane|Edmond) de )?Rothschild|Ariane', re.IGNORECASE),
|
|
62
62
|
BARBRO_C_EHNBOM: re.compile(r'behnbom@aol.com|(Barbro\s.*)?Ehnbom', re.IGNORECASE),
|
|
@@ -66,7 +66,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
66
66
|
BRAD_EDWARDS: re.compile(r'Brad(ley)?(\s*J(.?|ames))?\s*Edwards', re.IGNORECASE),
|
|
67
67
|
BRAD_KARP: re.compile(r'Brad (S.? )?Karp|Karp, Brad', re.IGNORECASE),
|
|
68
68
|
'Dangene and Jennie Enterprise': re.compile(r'Dangene and Jennie Enterprise?', re.IGNORECASE),
|
|
69
|
-
DANNY_FROST: re.compile(r'Frost, Danny|frostd@dany.nyc.gov', re.IGNORECASE),
|
|
69
|
+
DANNY_FROST: re.compile(r'Frost, Danny|frostd@dany.nyc.gov|Danny\s*Frost', re.IGNORECASE),
|
|
70
70
|
DARREN_INDYKE: re.compile(r'darren$|Darren\s*(K\.?\s*)?[il]n[dq]_?yke?|dkiesq', re.IGNORECASE),
|
|
71
71
|
DAVID_FISZEL: re.compile(r'David\s*Fis?zel', re.IGNORECASE),
|
|
72
72
|
DAVID_HAIG: re.compile(fr'{DAVID_HAIG}|Haig, David', re.IGNORECASE),
|
|
@@ -128,7 +128,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
128
128
|
PRINCE_ANDREW: re.compile(r'Prince Andrew|The Duke', re.IGNORECASE),
|
|
129
129
|
REID_WEINGARTEN: re.compile(r'Weingarten, Rei[cdi]|Rei[cdi] Weingarten', re.IGNORECASE),
|
|
130
130
|
RICHARD_KAHN: re.compile(r'rich(ard)? kahn?', re.IGNORECASE),
|
|
131
|
-
|
|
131
|
+
ROBERT_D_CRITTON_JR: re.compile(r'Robert D.? Critton Jr.?', re.IGNORECASE),
|
|
132
132
|
ROBERT_LAWRENCE_KUHN: re.compile(r'Robert\s*(Lawrence)?\s*Kuhn', re.IGNORECASE),
|
|
133
133
|
ROBERT_TRIVERS: re.compile(r'tri[vy]ersr@gmail|Robert\s*Trivers?', re.IGNORECASE),
|
|
134
134
|
ROSS_GOW: re.compile(fr"{ROSS_GOW}|ross@acuityreputation.com", re.IGNORECASE),
|
|
@@ -163,6 +163,7 @@ EMAILERS = [
|
|
|
163
163
|
DEEPAK_CHOPRA,
|
|
164
164
|
GLENN_DUBIN,
|
|
165
165
|
GORDON_GETTY,
|
|
166
|
+
'Kevin Bright',
|
|
166
167
|
'Jack Lang',
|
|
167
168
|
JACK_SCAROLA,
|
|
168
169
|
JAY_LEFKOWITZ,
|
|
@@ -238,14 +239,14 @@ BOFA_MERRILL = f'{BOFA} / Merrill Lynch Report'
|
|
|
238
239
|
BOFA_WEALTH_MGMT = f'{BOFA} Wealth Management'
|
|
239
240
|
BROCKMAN_INC = 'Brockman, Inc.'
|
|
240
241
|
CVRA = "Crime Victims' Rights Act [CVRA]"
|
|
242
|
+
CVRA_LEXIS_SEARCH = f"Lexis Nexis search for case law around the {CVRA}"
|
|
241
243
|
DAVID_BLAINE_VISA_LETTER = f"letter of recommendation for visa for a model"
|
|
242
|
-
DAVID_SCHOEN_CVRA_LEXIS_SEARCH = f"Lexis Nexis search for case law around the {CVRA} by {DAVID_SCHOEN}"
|
|
243
244
|
DERSH_GIUFFRE_TWEET = f"{TWEET} about {VIRGINIA_GIUFFRE}"
|
|
244
245
|
DEUTSCHE_BANK_TAX_TOPICS = f'{DEUTSCHE_BANK} Wealth Management Tax Topics'
|
|
245
246
|
DIANA_DEGETTE_CAMPAIGN = "Colorado legislator Diana DeGette's campaign"
|
|
246
247
|
EPSTEIN_FOUNDATION = 'Jeffrey Epstein VI Foundation'
|
|
247
|
-
FBI_REPORT = f"
|
|
248
|
-
FBI_SEIZED_PROPERTY = f"
|
|
248
|
+
FBI_REPORT = f"report on Epstein investigation (redacted)"
|
|
249
|
+
FBI_SEIZED_PROPERTY = f"seized property inventory (redacted)"
|
|
249
250
|
FEMALE_HEALTH_COMPANY = 'Female Health Company (FHX)'
|
|
250
251
|
FIRE_AND_FURY = f"Fire And Fury"
|
|
251
252
|
HARVARD_POETRY = f'{HARVARD} poetry stuff from {LISA_NEW}'
|
|
@@ -256,8 +257,7 @@ JOHN_BOLTON_PRESS_CLIPPING = 'John Bolton press clipping'
|
|
|
256
257
|
JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
|
|
257
258
|
LAWRENCE_KRAUSS_ASU_ORIGINS = f"{LAWRENCE_KRAUSS}'s ASU Origins Project"
|
|
258
259
|
KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
|
|
259
|
-
|
|
260
|
-
NERIO_ALESSANDRI = 'Nerio Alessandri (Founder and Chairman of Technogym S.p.A. Italy)'
|
|
260
|
+
MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
|
|
261
261
|
NIGHT_FLIGHT_BOOK = f'"Night Flight" (draft)'
|
|
262
262
|
NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
|
|
263
263
|
OBAMA_JOKE = 'joke about Obama'
|
|
@@ -265,12 +265,11 @@ PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} Code Enforcement'
|
|
|
265
265
|
PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
|
|
266
266
|
PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
|
|
267
267
|
PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
|
|
268
|
-
|
|
268
|
+
REAL_DEAL_ARTICLE = 'article by Keith Larsen'
|
|
269
269
|
SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
|
|
270
270
|
SINGLE_PAGE = 'single page of'
|
|
271
271
|
STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
|
|
272
272
|
SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
|
|
273
|
-
THE_REAL_DEAL_ARTICLE = 'article by Keith Larsen'
|
|
274
273
|
TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
|
|
275
274
|
UBS_CIO_REPORT = 'CIO Monthly Extended report'
|
|
276
275
|
UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
|
|
@@ -500,7 +499,7 @@ EMAILS_CONFIG = [
|
|
|
500
499
|
EmailCfg(
|
|
501
500
|
id='029977',
|
|
502
501
|
author=LAWRANCE_VISOSKI,
|
|
503
|
-
recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE,
|
|
502
|
+
recipients=cast(list[str | None], [JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE),
|
|
504
503
|
attribution_reason=LARRY_REASON,
|
|
505
504
|
duplicate_ids=['031129'],
|
|
506
505
|
),
|
|
@@ -508,7 +507,7 @@ EMAILS_CONFIG = [
|
|
|
508
507
|
EmailCfg(id='033488', author=LAWRANCE_VISOSKI, duplicate_ids=['033154']),
|
|
509
508
|
EmailCfg(id='033309', author=LINDA_STONE, attribution_reason='"Co-authored with iPhone autocorrect"'),
|
|
510
509
|
EmailCfg(id='017581', author='Lisa Randall', attribution_reason='reply header'),
|
|
511
|
-
EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd'),
|
|
510
|
+
EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd, Mark Green is in signature'),
|
|
512
511
|
EmailCfg(id='030472', author=MARTIN_WEINBERG, attribution_reason='Maybe. in reply', is_attribution_uncertain=True),
|
|
513
512
|
EmailCfg(id='030235', author=MELANIE_WALKER, attribution_reason='In fwd'),
|
|
514
513
|
EmailCfg(id='032343', author=MELANIE_WALKER, attribution_reason='Name seen in later reply 032346'),
|
|
@@ -573,7 +572,7 @@ EMAILS_CONFIG = [
|
|
|
573
572
|
attribution_reason='ends with "Respectfully, terry"',
|
|
574
573
|
author=TERRY_KAFKA,
|
|
575
574
|
fwded_text_after='From: Mike Cohen',
|
|
576
|
-
recipients=[JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS,
|
|
575
|
+
recipients=cast(list[str | None], [JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS),
|
|
577
576
|
duplicate_ids=['028482'],
|
|
578
577
|
),
|
|
579
578
|
EmailCfg(id='029992', author=TERRY_KAFKA, attribution_reason='Quoted reply'),
|
|
@@ -646,10 +645,11 @@ EMAILS_CONFIG = [
|
|
|
646
645
|
EmailCfg(id='021106', recipients=[STEVE_BANNON], attribution_reason='Reply'),
|
|
647
646
|
|
|
648
647
|
# Misc configs
|
|
649
|
-
EmailCfg(id='033050', actual_text='schwartman'),
|
|
650
648
|
EmailCfg(id='029344', actual_text='I thought of you when I read this article. Was this your idea? Alan'),
|
|
649
|
+
EmailCfg(id='032358', actual_text=REDACTED), # Completely redacted
|
|
650
|
+
EmailCfg(id='033050', actual_text='schwartman'),
|
|
651
651
|
EmailCfg(id='022219', description="discussion of attempts to clean up Epstein's Google search results"),
|
|
652
|
-
EmailCfg(id='023627', is_fwded_article=True, description=
|
|
652
|
+
EmailCfg(id='023627', is_fwded_article=True, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
|
|
653
653
|
EmailCfg(id='031333', is_fwded_article=True, description='looks like a Russian disinfo article'), # Russia Says IMF Chief Jailed For Discovering All US Gold is Gone
|
|
654
654
|
EmailCfg(id='031335', is_fwded_article=True, description='looks like a Russian disinfo article'), # DOMINQUE STRAUSS-KAHN ARRESTED, NOT BECAUSE HE RAPED A MAID, BUT BECAUSE HE HAD EVIDENCE US HAS NO GOLD IN FORT KNOX.
|
|
655
655
|
EmailCfg(id='026298', is_fwded_article=True, duplicate_ids=['026499']), # Written by someone else?
|
|
@@ -661,8 +661,13 @@ EMAILS_CONFIG = [
|
|
|
661
661
|
EmailCfg(id='025041', is_fwded_article=True, duplicate_ids=['028675']), # Obama agenda
|
|
662
662
|
EmailCfg(id='031136', is_fwded_article=True, duplicate_ids=['028791']), # 'Smart Money is Fleeing US Stocks'
|
|
663
663
|
EmailCfg(id='031779', is_fwded_article=True, duplicate_ids=['026938']), # Sarah Silverman on AI
|
|
664
|
-
EmailCfg(id='029849', is_fwded_article=True, duplicate_ids=['033482']),
|
|
665
|
-
EmailCfg(id='032023', is_fwded_article=True, duplicate_ids=['032012']),
|
|
664
|
+
EmailCfg(id='029849', is_fwded_article=True, duplicate_ids=['033482']), # Fareed Zakaria: Trump sells America short),
|
|
665
|
+
EmailCfg(id='032023', is_fwded_article=True, duplicate_ids=['032012']), # American-Israeli Cooperative Enterprise Newsletter
|
|
666
|
+
EmailCfg(id='021758', is_fwded_article=True, duplicate_ids=['030616']), # Radar Online article about Epstein's early prison release
|
|
667
|
+
EmailCfg(id='031774', is_fwded_article=True), # Krassner fwd of Palmer Report article
|
|
668
|
+
EmailCfg(id='033345', is_fwded_article=True), # Krassner fwd of Palmer Report article
|
|
669
|
+
EmailCfg(id='029903', is_fwded_article=True), # Krassner fwd of Ann Coulter article about Epstein
|
|
670
|
+
EmailCfg(id='030266', is_fwded_article=True), # Krassner fwd of article about Dershowitz
|
|
666
671
|
EmailCfg(id='030868', is_fwded_article=True), # 'He doesn't like this sh*t': Trump reportedly hates his job and his staff after 1 month
|
|
667
672
|
EmailCfg(id='026755', is_fwded_article=True), # HuffPo
|
|
668
673
|
EmailCfg(id='016218', is_fwded_article=True), # AT&T confirms it paid Trump lawyer Cohen for insights on Trump
|
|
@@ -707,6 +712,9 @@ EMAILS_CONFIG = [
|
|
|
707
712
|
EmailCfg(id='026924', is_fwded_article=True), # The Onion
|
|
708
713
|
EmailCfg(id='033311', is_fwded_article=True), # 2016 election polls
|
|
709
714
|
EmailCfg(id='026580', is_fwded_article=True), # NPR: Antigua: Land Of Sun, Sand, And Super Cheap
|
|
715
|
+
EmailCfg(id='031340', is_fwded_article=True), # Article about Alex Jones threatening Robert Mueller
|
|
716
|
+
EmailCfg(id='030209', is_fwded_article=True), # Atlantic Council Syria: Blackberry Diplomacy
|
|
717
|
+
EmailCfg(id='033297', is_fwded_article=True, duplicate_ids=['033586']), # Sultan Sulayem fwding article about Trump and Russia
|
|
710
718
|
EmailCfg(id='032475', timestamp=parse('2017-02-15 13:31:25')),
|
|
711
719
|
EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
|
|
712
720
|
|
|
@@ -748,8 +756,6 @@ EMAILS_CONFIG = [
|
|
|
748
756
|
EmailCfg(id='031118', duplicate_ids=['019465']),
|
|
749
757
|
EmailCfg(id='031912', duplicate_ids=['032158']),
|
|
750
758
|
EmailCfg(id='030587', duplicate_ids=['030514']),
|
|
751
|
-
EmailCfg(id='029773', duplicate_ids=['012685'], fwded_text_after='Omar Quadhafi'),
|
|
752
|
-
EmailCfg(id='033297', duplicate_ids=['033586']),
|
|
753
759
|
EmailCfg(id='031089', duplicate_ids=['018084']),
|
|
754
760
|
EmailCfg(id='031088', duplicate_ids=['030885']),
|
|
755
761
|
EmailCfg(id='030238', duplicate_ids=['031130']),
|
|
@@ -803,7 +809,6 @@ EMAILS_CONFIG = [
|
|
|
803
809
|
EmailCfg(id='026618', duplicate_ids=['028485']),
|
|
804
810
|
EmailCfg(id='030609', duplicate_ids=['030495']),
|
|
805
811
|
EmailCfg(id='029831', duplicate_ids=['028972']),
|
|
806
|
-
EmailCfg(id='021758', duplicate_ids=['030616']),
|
|
807
812
|
EmailCfg(id='033498', duplicate_ids=['029884']),
|
|
808
813
|
EmailCfg(id='028620', duplicate_ids=['027094']),
|
|
809
814
|
EmailCfg(id='032456', duplicate_ids=['033579']),
|
|
@@ -851,7 +856,9 @@ EMAILS_CONFIG = [
|
|
|
851
856
|
EmailCfg(id='030015', fwded_text_after='Bill Clinton reportedly'),
|
|
852
857
|
EmailCfg(id='026312', fwded_text_after='Steve Bannon trying to get on disgraced'),
|
|
853
858
|
EmailCfg(id='031742', fwded_text_after="Trump's former campaign manager Paul Manafort"),
|
|
854
|
-
|
|
859
|
+
EmailCfg(id='028925', fwded_text_after='> on Jan 4, 2015'),
|
|
860
|
+
EmailCfg(id='029773', fwded_text_after='Omar Quadhafi', duplicate_ids=['012685']),
|
|
861
|
+
EmailCfg(id='012197_4', fwded_text_after="Thanks -- Jay"),
|
|
855
862
|
]
|
|
856
863
|
|
|
857
864
|
|
|
@@ -925,14 +932,14 @@ OTHER_FILES_ARTICLES = [
|
|
|
925
932
|
DocCfg(id='031776', author='Law360', description=f"article about Michael Avenatti by Andrew Strickler"),
|
|
926
933
|
DocCfg(id='023102', author=f'Litigation Daily', description=f"article about {REID_WEINGARTEN}", date='2015-09-04'),
|
|
927
934
|
DocCfg(id='029340', author=f'MarketWatch', description=f'article about estate taxes, particularly Epstein\'s favoured GRATs'),
|
|
928
|
-
DocCfg(id='022707', author=MICHAEL_WOLFF, description=
|
|
929
|
-
DocCfg(id='022727', author=MICHAEL_WOLFF, description=
|
|
930
|
-
DocCfg(id='022746', author=MICHAEL_WOLFF, description=
|
|
931
|
-
DocCfg(id='022844', author=MICHAEL_WOLFF, description=
|
|
932
|
-
DocCfg(id='022863', author=MICHAEL_WOLFF, description=
|
|
933
|
-
DocCfg(id='022894', author=MICHAEL_WOLFF, description=
|
|
934
|
-
DocCfg(id='022952', author=MICHAEL_WOLFF, description=
|
|
935
|
-
DocCfg(id='024229', author=MICHAEL_WOLFF, description=
|
|
935
|
+
DocCfg(id='022707', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
|
|
936
|
+
DocCfg(id='022727', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
|
|
937
|
+
DocCfg(id='022746', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
|
|
938
|
+
DocCfg(id='022844', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
|
|
939
|
+
DocCfg(id='022863', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
|
|
940
|
+
DocCfg(id='022894', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
|
|
941
|
+
DocCfg(id='022952', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
|
|
942
|
+
DocCfg(id='024229', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_EPSTEIN_ARTICLE_DRAFT),
|
|
936
943
|
DocCfg(id='031198', author='Morning News USA', description=f"article about identify of Jane Doe in {JANE_DOE_V_EPSTEIN_TRUMP}"),
|
|
937
944
|
DocCfg(id='031972', author=NYT, description=f"article about #MeToo allegations against {LAWRENCE_KRAUSS}", date='2018-03-07'),
|
|
938
945
|
DocCfg(id='032435', author=NYT, description=f'article about Chinese butlers'),
|
|
@@ -1016,22 +1023,12 @@ OTHER_FILES_ARTICLES = [
|
|
|
1016
1023
|
date='2019-02-06',
|
|
1017
1024
|
duplicate_ids=['031415'],
|
|
1018
1025
|
),
|
|
1019
|
-
|
|
1020
|
-
DocCfg(
|
|
1021
|
-
id='030199',
|
|
1022
|
-
description=f'article about allegations Trump raped a 13 year old girl {JANE_DOE_V_EPSTEIN_TRUMP}',
|
|
1023
|
-
date='2017-11-16',
|
|
1024
|
-
),
|
|
1026
|
+
DocCfg(id='030199', description=f'article about Trump rape allegations in {JANE_DOE_V_EPSTEIN_TRUMP}', date='2017-11-16'),
|
|
1025
1027
|
DocCfg(id='031725', description=f"article about Gloria Allred and Trump allegations", date='2016-10-10'),
|
|
1026
1028
|
DocCfg(id='026648', description=f'article about {JASTA} lawsuit against Saudi Arabia by 9/11 victims (Russian propaganda?)', date='2017-05-13'),
|
|
1027
1029
|
DocCfg(id='032159', description=f"article about microfinance and cell phones in Zimbabwe, Strive Masiyiwa (Econet Wireless)"),
|
|
1028
1030
|
DocCfg(id='033468', description=f'{ARTICLE_DRAFT} Rod Rosenstein', date='2018-09-24'),
|
|
1029
1031
|
DocCfg(id='030825', description=f'{ARTICLE_DRAFT} Syria'),
|
|
1030
|
-
DocCfg(
|
|
1031
|
-
id='019233',
|
|
1032
|
-
description=f"Freedom House: 'Breaking Down Democracy: Goals, Strategies, and Methods of Modern Authoritarians'",
|
|
1033
|
-
date='2017-06-02',
|
|
1034
|
-
),
|
|
1035
1032
|
DocCfg(id='027051', description=f"German language article about the 2013 Lifeball / AIDS Gala", date='2013-01-01'),
|
|
1036
1033
|
DocCfg(id='033480', description=f"{JOHN_BOLTON_PRESS_CLIPPING}", date='2018-04-06', duplicate_ids=['033481']),
|
|
1037
1034
|
DocCfg(id='013403', description=f"Lexis Nexis result from The Evening Standard about Bernie Madoff", date='2009-12-24'),
|
|
@@ -1045,14 +1042,16 @@ OTHER_FILES_ARTICLES = [
|
|
|
1045
1042
|
date='2017-05-13',
|
|
1046
1043
|
),
|
|
1047
1044
|
DocCfg(id='025094', description=f'{TRANSLATION} Spanish article about Cuba', date='2015-11-08'),
|
|
1048
|
-
DocCfg(id='031794', description=f"very short French magazine clipping"),
|
|
1045
|
+
DocCfg(id='031794', description=f"very short French magazine clipping", is_interesting=False),
|
|
1049
1046
|
]
|
|
1050
1047
|
|
|
1051
1048
|
OTHER_FILES_LEGAL = [
|
|
1049
|
+
DocCfg(id='017789', author=ALAN_DERSHOWITZ, description=f'letter to {HARVARD} Crimson complaining he was defamed'),
|
|
1052
1050
|
DocCfg(id='011908', author=BRUNEL_V_EPSTEIN, description=f"court filing"),
|
|
1053
|
-
DocCfg(id='
|
|
1054
|
-
DocCfg(id='
|
|
1055
|
-
DocCfg(id='
|
|
1051
|
+
DocCfg(id='017603', author=DAVID_SCHOEN, description=CVRA_LEXIS_SEARCH, date='2019-02-28'),
|
|
1052
|
+
DocCfg(id='017635', author=DAVID_SCHOEN, description=CVRA_LEXIS_SEARCH, date='2019-02-28'),
|
|
1053
|
+
DocCfg(id='016509', author=DAVID_SCHOEN, description=CVRA_LEXIS_SEARCH, date='2019-02-28'),
|
|
1054
|
+
DocCfg(id='017714', author=DAVID_SCHOEN, description=CVRA_LEXIS_SEARCH, date='2019-02-28'),
|
|
1056
1055
|
DocCfg(id='021824', author=EDWARDS_V_DERSHOWITZ, description=f"deposition of {PAUL_G_CASSELL}"),
|
|
1057
1056
|
DocCfg(
|
|
1058
1057
|
id='010757',
|
|
@@ -1084,10 +1083,10 @@ OTHER_FILES_LEGAL = [
|
|
|
1084
1083
|
DocCfg(id='017488', author=EPSTEIN_V_ROTHSTEIN_EDWARDS, description=f"Deposition of Scott Rothstein", date='2012-06-22'),
|
|
1085
1084
|
DocCfg(id='029315', author=EPSTEIN_V_ROTHSTEIN_EDWARDS, description=f"Plaintiff Motion for Summary Judgment by {JACK_SCAROLA}", date='2013-09-13'),
|
|
1086
1085
|
DocCfg(id='013304', author=EPSTEIN_V_ROTHSTEIN_EDWARDS, description=f"Plaintiff Response to Epstein's Motion for Summary Judgment", date='2014-04-17'),
|
|
1087
|
-
DocCfg(id='019352', description=FBI_REPORT,),
|
|
1088
|
-
DocCfg(id='021434', description=FBI_REPORT,),
|
|
1089
|
-
DocCfg(id='018872', description=FBI_SEIZED_PROPERTY,),
|
|
1090
|
-
DocCfg(id='021569', description=FBI_SEIZED_PROPERTY,),
|
|
1086
|
+
DocCfg(id='019352', author=FBI, description=FBI_REPORT,),
|
|
1087
|
+
DocCfg(id='021434', author=FBI, description=FBI_REPORT,),
|
|
1088
|
+
DocCfg(id='018872', author=FBI, description=FBI_SEIZED_PROPERTY,),
|
|
1089
|
+
DocCfg(id='021569', author=FBI, description=FBI_SEIZED_PROPERTY,),
|
|
1091
1090
|
DocCfg(id='017792', author=GIUFFRE_V_DERSHOWITZ, description=f"article about {ALAN_DERSHOWITZ}'s appearance on Wolf Blitzer"),
|
|
1092
1091
|
DocCfg(id='017767', author=GIUFFRE_V_DERSHOWITZ, description=f"article about {ALAN_DERSHOWITZ} working with {JEFFREY_EPSTEIN}"),
|
|
1093
1092
|
DocCfg(id='017796', author=GIUFFRE_V_DERSHOWITZ, description=f"article about {ALAN_DERSHOWITZ}"),
|
|
@@ -1147,6 +1146,9 @@ OTHER_FILES_LEGAL = [
|
|
|
1147
1146
|
DocCfg(id='017830', author=JASTA_SAUDI_LAWSUIT, description=f"legal text and court documents"),
|
|
1148
1147
|
DocCfg(id='017904', author=JASTA_SAUDI_LAWSUIT, description=f"Westlaw search results", date='2019-01-01'),
|
|
1149
1148
|
DocCfg(id='014037', author='Journal of Criminal Law and Criminology', description=f"article on {CVRA}"),
|
|
1149
|
+
DocCfg(id='025353', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-05-19', duplicate_ids=['010723', '019224'], dupe_type='redacted'),
|
|
1150
|
+
DocCfg(id='025704', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-05-27', duplicate_ids=['010732', '019221'], dupe_type='redacted'),
|
|
1151
|
+
DocCfg(id='012130', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-06-19', duplicate_ids=['012135']),
|
|
1150
1152
|
DocCfg(
|
|
1151
1153
|
id='031447',
|
|
1152
1154
|
author=MARTIN_WEINBERG,
|
|
@@ -1158,6 +1160,17 @@ OTHER_FILES_LEGAL = [
|
|
|
1158
1160
|
description=f"letter from to ABC / Good Morning America threatening libel lawsuit",
|
|
1159
1161
|
duplicate_ids=['028928']
|
|
1160
1162
|
),
|
|
1163
|
+
DocCfg(
|
|
1164
|
+
id='026793',
|
|
1165
|
+
author='Mintz Fraade',
|
|
1166
|
+
description=f"letter from {STEVEN_HOFFENBERG}'s lawyers offering to take over Epstein's business and resolve his legal issues",
|
|
1167
|
+
date='2018-03-23',
|
|
1168
|
+
),
|
|
1169
|
+
DocCfg(
|
|
1170
|
+
id='020662',
|
|
1171
|
+
author='Mishcon de Reya',
|
|
1172
|
+
description=f"letter from {ALAN_DERSHOWITZ}'s British lawyers to Daily Mail threatening libel suit",
|
|
1173
|
+
),
|
|
1161
1174
|
DocCfg(
|
|
1162
1175
|
id='029416',
|
|
1163
1176
|
author="National Enquirer / Radar Online v. FBI",
|
|
@@ -1173,12 +1186,6 @@ OTHER_FILES_LEGAL = [
|
|
|
1173
1186
|
),
|
|
1174
1187
|
DocCfg(id='028540', author='SCOTUS', description=f"decision in Budha Ismail Jam et al. v. INTERNATIONAL FINANCE CORP"),
|
|
1175
1188
|
DocCfg(id='012197', author='SDFL', description=f"Response to {JAY_LEFKOWITZ} on Epstein Plea Agreement Compliance"),
|
|
1176
|
-
DocCfg(id='020662', author='Mishcon de Reya', description=f"letter from {ALAN_DERSHOWITZ}'s British lawyers to Daily Mail threatening libel suit"),
|
|
1177
|
-
DocCfg(
|
|
1178
|
-
id='026793',
|
|
1179
|
-
description=f"letter from {STEVEN_HOFFENBERG}'s lawyers at Mintz Fraade offering to take over Epstein's business and resolve his legal issues",
|
|
1180
|
-
date='2018-03-23',
|
|
1181
|
-
),
|
|
1182
1189
|
DocCfg(id='022277', description=f"{TEXT_OF_US_LAW} National Labour Relations Board (NLRB)"),
|
|
1183
1190
|
]
|
|
1184
1191
|
|
|
@@ -1202,10 +1209,6 @@ OTHER_FILES_CONFERENCES = [
|
|
|
1202
1209
|
description=f'schedule including "Presidents Private Dinner - Jeffrey Epstine (sic)"',
|
|
1203
1210
|
date='2012-09-21',
|
|
1204
1211
|
),
|
|
1205
|
-
DocCfg(id='017603', description=DAVID_SCHOEN_CVRA_LEXIS_SEARCH, date='2019-02-28'),
|
|
1206
|
-
DocCfg(id='017635', description=DAVID_SCHOEN_CVRA_LEXIS_SEARCH, date='2019-02-28'),
|
|
1207
|
-
DocCfg(id='016509', description=DAVID_SCHOEN_CVRA_LEXIS_SEARCH, date='2019-02-28'),
|
|
1208
|
-
DocCfg(id='017714', description=DAVID_SCHOEN_CVRA_LEXIS_SEARCH, date='2019-02-28'),
|
|
1209
1212
|
DocCfg(id='017526', description=f'Intellectual Jazz conference brochure f. {DAVID_BLAINE}'),
|
|
1210
1213
|
DocCfg(id='029427', description=f"seems related to an IRL meeting about concerns China will attempt to absorb Mongolia"),
|
|
1211
1214
|
DocCfg(id='025797', description=f'someone\'s notes from Aspen Strategy Group', date='2013-05-29'),
|
|
@@ -1218,6 +1221,7 @@ OTHER_FILES_CONFERENCES = [
|
|
|
1218
1221
|
|
|
1219
1222
|
# All authors of documents in this category will be marked uninteresting
|
|
1220
1223
|
OTHER_FILES_FINANCE = [
|
|
1224
|
+
DocCfg(id='024631', author='Ackrell Capital', description=f"Cannabis Investment Report 2018", is_interesting=True),
|
|
1221
1225
|
DocCfg(id='016111', author=BOFA_MERRILL, description=f"GEMs Paper #26 Saudi Arabia: beyond oil but not so fast", date='2016-06-30'),
|
|
1222
1226
|
DocCfg(id='010609', author=BOFA_MERRILL, description=f"Liquid Insight Trump\'s effect on MXN", date='2016-09-22'),
|
|
1223
1227
|
DocCfg(id='025978', author=BOFA_MERRILL, description=f"Understanding when risk parity risk Increases", date='2016-08-09'),
|
|
@@ -1236,6 +1240,7 @@ OTHER_FILES_FINANCE = [
|
|
|
1236
1240
|
DocCfg(id='023575', author=BOFA_MERRILL, description=f"Global Equity Volatility Insights", date='2017-06-01'),
|
|
1237
1241
|
DocCfg(id='014518', author=BOFA_WEALTH_MGMT, description=f'tax alert', date='2016-05-02'),
|
|
1238
1242
|
DocCfg(id='029438', author=BOFA_WEALTH_MGMT, description=f'tax report', date='2018-01-02'),
|
|
1243
|
+
DocCfg(id='026668', author="Boothbay Fund Management", description=f"2016-Q4 earnings report signed by Ari Glass"),
|
|
1239
1244
|
DocCfg(id='024302', author='Carvana', description=f"form 14A SEC filing proxy statement", date='2019-04-23'),
|
|
1240
1245
|
DocCfg(id='029305', author='CCH Tax', description=f"Briefing on end of Defense of Marriage Act", date='2013-06-27'),
|
|
1241
1246
|
DocCfg(id='026794', author=DEUTSCHE_BANK, description=f"Global Political and Regulatory Risk in 2015/2016"),
|
|
@@ -1273,24 +1278,26 @@ OTHER_FILES_FINANCE = [
|
|
|
1273
1278
|
DocCfg(id='025296', author='Laffer Associates', description=f'report predicting Trump win', date='2016-07-06'),
|
|
1274
1279
|
DocCfg(id='020824', author='Mary Meeker', description=f"USA Inc: A Basic Summary of America's Financial Statements compiled", date='2011-02-01'),
|
|
1275
1280
|
DocCfg(id='025551', author='Morgan Stanley', description=f'report about alternative asset managers', date='2018-01-30'),
|
|
1281
|
+
DocCfg(id='019856', author='Sadis Goldberg LLP', description=f"report on SCOTUS ruling about insider trading", is_interesting=True),
|
|
1276
1282
|
DocCfg(id='025763', author='S&P', description=f"Economic Research: How Increasing Income Inequality Is Dampening U.S. Growth", date='2014-08-05'),
|
|
1277
1283
|
DocCfg(id='024135', author=UBS, description=UBS_CIO_REPORT, date='2012-06-29'),
|
|
1278
1284
|
DocCfg(id='025247', author=UBS, description=UBS_CIO_REPORT, date='2012-10-25'),
|
|
1279
|
-
DocCfg(id='024631', description=f"Ackrell Capital report: Cannabis Investment Report 2018"),
|
|
1280
1285
|
DocCfg(id='026584', description=f"article about tax implications of disregarded entities", date='2009-07-01'),
|
|
1281
|
-
DocCfg(
|
|
1286
|
+
DocCfg(
|
|
1287
|
+
id='024271',
|
|
1288
|
+
description=f"Blockchain Capital and Brock Pierce pitch deck",
|
|
1289
|
+
date='2015-10-01',
|
|
1290
|
+
is_interesting=True,
|
|
1291
|
+
),
|
|
1282
1292
|
DocCfg(id='024817', description=f"Cowen's Collective View of CBD / Cannabis report"),
|
|
1283
1293
|
DocCfg(id='012048', description=f"{PRESS_RELEASE} 'Rockefeller Partners with Gregory J. Fleming to Create Independent Financial Services Firm' and other articles"),
|
|
1284
|
-
DocCfg(id='019856', description=f"Sadis Goldberg LLP report on SCOTUS ruling about insider trading"),
|
|
1285
1294
|
|
|
1286
1295
|
# private placement memoranda
|
|
1287
1296
|
DocCfg(id='024432', description=f"Michael Milken's Knowledge Universe Education (KUE) $1,000,000 corporate share placement notice (SEC filing?)"),
|
|
1288
|
-
DocCfg(id='024003', description=f"New Leaf Ventures private placement memorandum"),
|
|
1297
|
+
DocCfg(id='024003', description=f"New Leaf Ventures ($375 million biotech fund) private placement memorandum"),
|
|
1289
1298
|
]
|
|
1290
1299
|
|
|
1291
1300
|
OTHER_FILES_LETTERS = [
|
|
1292
|
-
DocCfg(id='017789', author=ALAN_DERSHOWITZ, description=f'letter to {HARVARD} Crimson complaining he was defamed'),
|
|
1293
|
-
DocCfg(id='026668', author="Boothbay Fund Management", description=f"2016-Q4 earnings report signed by Ari Glass"),
|
|
1294
1301
|
DocCfg(
|
|
1295
1302
|
id='019086',
|
|
1296
1303
|
author=DAVID_BLAINE,
|
|
@@ -1315,19 +1322,25 @@ OTHER_FILES_LETTERS = [
|
|
|
1315
1322
|
description=f"letter about algorithmic trading",
|
|
1316
1323
|
date='2016-06-24', # date is based on Brexit reference but he could be backtesting,
|
|
1317
1324
|
),
|
|
1318
|
-
DocCfg(id='026248', author='Don McGahn', description=f'letter from Trump lawyer to Devin Nunes (R-CA) about FISA courts and Trump'),
|
|
1319
1325
|
DocCfg(id='029304', author=DONALD_TRUMP, description=f"recommendation letter for recently departed {TRUMP_ORG} lawyer {MICHAEL_J_BOCCIO}"),
|
|
1320
1326
|
DocCfg(id='029301', author=MICHAEL_J_BOCCIO, description=f"letter from former lawyer at the {TRUMP_ORG}", date='2011-08-07'),
|
|
1321
|
-
DocCfg(id='022405', author=NOAM_CHOMSKY, description=f"letter attesting to Epstein's good character"),
|
|
1322
1327
|
DocCfg(id='026134', description=f'letter to someone named George about investment opportunities in the Ukraine banking sector'),
|
|
1323
1328
|
]
|
|
1324
1329
|
|
|
1325
1330
|
OTHER_FILES_PROPERTY = [
|
|
1326
|
-
DocCfg(
|
|
1331
|
+
DocCfg(
|
|
1332
|
+
id='026759',
|
|
1333
|
+
author='Great Bay Condominium Owners Association',
|
|
1334
|
+
description=f'{PRESS_RELEASE} about Hurricane Irma damage',
|
|
1335
|
+
date='2017-09-13',
|
|
1336
|
+
is_interesting=False,
|
|
1337
|
+
),
|
|
1327
1338
|
DocCfg(id='016602', author=PALM_BEACH_CODE_ENFORCEMENT, description='board minutes', date='2008-04-17'),
|
|
1328
1339
|
DocCfg(id='016554', author=PALM_BEACH_CODE_ENFORCEMENT, description='board minutes', date='2008-07-17', duplicate_ids=['016616', '016574']),
|
|
1329
|
-
DocCfg(id='
|
|
1330
|
-
DocCfg(id='
|
|
1340
|
+
DocCfg(id='016636', author=PALM_BEACH_WATER_COMMITTEE, description=f"Meeting on January 29, 2009"),
|
|
1341
|
+
DocCfg(id='022417', author='Park Partners NYC', description=f"letter to partners in real estate project with architectural plans"),
|
|
1342
|
+
DocCfg(id='027068', author=THE_REAL_DEAL, description=f"{REAL_DEAL_ARTICLE} Palm House Hotel Bankruptcy and EB-5 Visa Fraud Allegations"),
|
|
1343
|
+
DocCfg(id='029520', author=THE_REAL_DEAL, description=f"{REAL_DEAL_ARTICLE} 'Lost Paradise at the Palm House'", date='2019-06-17'),
|
|
1331
1344
|
DocCfg(id='016597', author='Trump Properties LLC', description=f'appeal of some decision about Mar-a-Lago by {PALM_BEACH} authorities'),
|
|
1332
1345
|
DocCfg(id='018743', description=f"Las Vegas property listing"),
|
|
1333
1346
|
DocCfg(id='016695', description=f"{PALM_BEACH} property info (?)"),
|
|
@@ -1339,8 +1352,6 @@ OTHER_FILES_PROPERTY = [
|
|
|
1339
1352
|
DocCfg(id='016552', description=f"{PALM_BEACH_TSV} info"),
|
|
1340
1353
|
DocCfg(id='016698', description=f"{PALM_BEACH_TSV} info (broken?)"),
|
|
1341
1354
|
DocCfg(id='016696', description=f"{PALM_BEACH_TSV} info (water quality?"),
|
|
1342
|
-
DocCfg(id='016636', description=f"{PALM_BEACH_WATER_COMMITTEE} Meeting on January 29, 2009"),
|
|
1343
|
-
DocCfg(id='022417', description=f"Park Partners NYC letter to partners in real estate project with architectural plans"),
|
|
1344
1355
|
DocCfg(
|
|
1345
1356
|
id='018727',
|
|
1346
1357
|
description=f"{VIRGIN_ISLANDS} property deal pitch deck, building will be leased to the U.S. govt GSA",
|
|
@@ -1378,6 +1389,13 @@ OTHER_FILES_SOCIAL = [
|
|
|
1378
1389
|
OTHER_FILES_POLITICS = [
|
|
1379
1390
|
DocCfg(id='029918', author=DIANA_DEGETTE_CAMPAIGN, description=f"bio", date='2012-09-27'),
|
|
1380
1391
|
DocCfg(id='031184', author=DIANA_DEGETTE_CAMPAIGN, description=f"invitation to fundraiser hosted by {BARBRO_C_EHNBOM}", date='2012-09-27'),
|
|
1392
|
+
DocCfg(id='026248', author='Don McGahn', description=f'letter from Trump lawyer to Devin Nunes (R-CA) about FISA courts and Trump'),
|
|
1393
|
+
DocCfg(
|
|
1394
|
+
id='019233',
|
|
1395
|
+
author='Freedom House',
|
|
1396
|
+
description=f"'Breaking Down Democracy: Goals, Strategies, and Methods of Modern Authoritarians'",
|
|
1397
|
+
date='2017-06-02',
|
|
1398
|
+
),
|
|
1381
1399
|
DocCfg(id='026827', author='Scowcroft Group', description=f'report on ISIS', date='2015-11-14'),
|
|
1382
1400
|
DocCfg(id='024294', author=STACEY_PLASKETT, description=f"campaign flier", date='2016-10-01'),
|
|
1383
1401
|
DocCfg(
|
|
@@ -1417,6 +1435,7 @@ OTHER_FILES_ACADEMIA = [
|
|
|
1417
1435
|
author=f"{MOSHE_HOFFMAN}, Erez Yoeli, and {MARTIN_NOWAK}",
|
|
1418
1436
|
description=f"Cooperating Without Looking: Game Theory Model of Trust and Reciprocal Cooperation"
|
|
1419
1437
|
),
|
|
1438
|
+
DocCfg(id='022405', author=NOAM_CHOMSKY, description=f"letter attesting to Epstein's good character"),
|
|
1420
1439
|
DocCfg(id='025143', author=ROBERT_TRIVERS, description=f"Africa, Parasites, Intelligence", date='2018-06-25'),
|
|
1421
1440
|
DocCfg(id='029155', author=ROBERT_TRIVERS, description=f'response sent to the Gruterites ({GORDON_GETTY} fans)', date='2018-03-19'),
|
|
1422
1441
|
DocCfg(
|
|
@@ -1482,8 +1501,13 @@ OTHER_FILES_MISC = [
|
|
|
1482
1501
|
DocCfg(id='032206', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
|
|
1483
1502
|
DocCfg(id='032208', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
|
|
1484
1503
|
DocCfg(id='032209', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
|
|
1485
|
-
DocCfg(id='
|
|
1486
|
-
DocCfg(
|
|
1504
|
+
DocCfg(id='032210', category=SKYPE_LOG, author='linkspirit', is_interesting=True),
|
|
1505
|
+
DocCfg(
|
|
1506
|
+
id='018224',
|
|
1507
|
+
category=SKYPE_LOG,
|
|
1508
|
+
author=f'linkspirit (French?) and {LAWRENCE_KRAUSS}',
|
|
1509
|
+
is_interesting=True, # we don't know who linkspirit is yet
|
|
1510
|
+
),
|
|
1487
1511
|
DocCfg(
|
|
1488
1512
|
id='025147',
|
|
1489
1513
|
author=BROCKMAN_INC,
|
|
@@ -1532,6 +1556,7 @@ OTHER_FILES_JUNK = [
|
|
|
1532
1556
|
DocCfg(id='029352', description=OBAMA_JOKE, date='2013-07-26'),
|
|
1533
1557
|
DocCfg(id='029351', description=OBAMA_JOKE, date='2013-07-26'),
|
|
1534
1558
|
DocCfg(id='029354', description=OBAMA_JOKE, date='2013-07-26'),
|
|
1559
|
+
DocCfg(id='031293'),
|
|
1535
1560
|
]
|
|
1536
1561
|
|
|
1537
1562
|
OTHER_FILES_CATEGORIES = [
|
|
@@ -1587,18 +1612,3 @@ REPLY_LINE_ON_DATE_PATTERN = fr"^On (\d+ )?((Mon|Tues?|Wed(nes)?|Thu(rs)?|Fri|Sa
|
|
|
1587
1612
|
REPLY_LINE_PATTERN = rf"({REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
|
|
1588
1613
|
REPLY_REGEX = re.compile(REPLY_LINE_PATTERN, re.IGNORECASE | re.MULTILINE)
|
|
1589
1614
|
SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?(Sent (from|via).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)', re.M | re.I)
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
# Error checking.
|
|
1593
|
-
if len(OTHER_FILES_CONFIG) != 442:
|
|
1594
|
-
logger.warning(f"Found {len(OTHER_FILES_CONFIG)} configured other files!")
|
|
1595
|
-
|
|
1596
|
-
encountered_file_ids = set()
|
|
1597
|
-
|
|
1598
|
-
for cfg in ALL_CONFIGS:
|
|
1599
|
-
if cfg.id in encountered_file_ids:
|
|
1600
|
-
raise ValueError(f"{cfg.id} configured twice!\n\n{cfg}\n")
|
|
1601
|
-
elif cfg.dupe_of_id and cfg.dupe_of_id == cfg.id:
|
|
1602
|
-
raise ValueError(f"Invalid config!\n\n{cfg}\n")
|
|
1603
|
-
|
|
1604
|
-
encountered_file_ids.add(cfg.id)
|
epstein_files/util/data.py
CHANGED
|
@@ -21,12 +21,12 @@ ALL_NAMES = [v for k, v in vars(names).items() if isinstance(v, str) and CONSTAN
|
|
|
21
21
|
PACIFIC_TZ = tz.gettz("America/Los_Angeles")
|
|
22
22
|
TIMEZONE_INFO = {"PDT": PACIFIC_TZ, "PST": PACIFIC_TZ} # Suppresses annoying warnings from parse() calls
|
|
23
23
|
|
|
24
|
-
|
|
25
24
|
collapse_newlines = lambda text: MULTINEWLINE_REGEX.sub('\n\n', text)
|
|
26
25
|
date_str = lambda dt: dt.isoformat()[0:10] if dt else None
|
|
27
26
|
escape_double_quotes = lambda text: text.replace('"', r'\"')
|
|
28
27
|
escape_single_quotes = lambda text: text.replace("'", r"\'")
|
|
29
28
|
iso_timestamp = lambda dt: dt.isoformat().replace('T', ' ')
|
|
29
|
+
remove_time_from_timestamp_str = lambda dt: dt.isoformat().removesuffix('T00:00:00')
|
|
30
30
|
uniquify = lambda _list: list(set(_list))
|
|
31
31
|
without_falsey = lambda _list: [e for e in _list if e]
|
|
32
32
|
|