epstein-files 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,22 +5,9 @@ from dateutil.parser import parse
5
5
 
6
6
  from epstein_files.util.constant.names import *
7
7
  from epstein_files.util.constant.strings import *
8
- from epstein_files.util.file_cfg import MessageCfg, FileCfg
8
+ from epstein_files.util.doc_cfg import DocCfg, EmailCfg, TextCfg
9
9
 
10
- # Misc
11
10
  FALLBACK_TIMESTAMP = parse("1/1/2051 12:01:01 AM")
12
- RESUME_OF = 'professional resumé of'
13
- SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?(Sent (from|via).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)', re.M | re.I)
14
-
15
- # Email reply regexes (has to be here for circular dependencies reasons)
16
- FORWARDED_LINE_PATTERN = r"-+ ?(Forwarded|Original)\s*Message ?-*|Begin forwarded message:?"
17
- REPLY_LINE_IN_A_MSG_PATTERN = r"In a message dated \d+/\d+/\d+.*writes:"
18
- REPLY_LINE_ENDING_PATTERN = r"[_ \n](AM|PM|[<_]|wrote:?)"
19
- REPLY_LINE_ON_NUMERIC_DATE_PATTERN = fr"On \d+/\d+/\d+[, ].*{REPLY_LINE_ENDING_PATTERN}"
20
- REPLY_LINE_ON_DATE_PATTERN = fr"^On (\d+ )?((Mon|Tues?|Wed(nes)?|Thu(rs)?|Fri|Sat(ur)?|Sun)(day)?|(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*)[, ].*{REPLY_LINE_ENDING_PATTERN}"
21
- REPLY_LINE_PATTERN = rf"({REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
22
- REPLY_REGEX = re.compile(REPLY_LINE_PATTERN, re.IGNORECASE | re.MULTILINE)
23
-
24
11
 
25
12
  HEADER_ABBREVIATIONS = {
26
13
  "AD": "Abu Dhabi",
@@ -80,6 +67,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
80
67
  DANNY_FROST: re.compile(r'Frost, Danny|frostd@dany.nyc.gov', re.IGNORECASE),
81
68
  DARREN_INDYKE: re.compile(r'darren$|Darren\s*(K\.?\s*)?[il]n[dq]_?yke?|dkiesq', re.IGNORECASE),
82
69
  DAVID_FISZEL: re.compile(r'David\s*Fis?zel', re.IGNORECASE),
70
+ DAVID_HAIG: re.compile(fr'{DAVID_HAIG}|Haig, David', re.IGNORECASE),
83
71
  DAVID_STERN: re.compile(r'David Stern?', re.IGNORECASE),
84
72
  EDUARDO_ROBLES: re.compile(r'Ed(uardo)?\s*Robles', re.IGNORECASE),
85
73
  EDWARD_JAY_EPSTEIN: re.compile(r'Edward (Jay )?Epstein', re.IGNORECASE),
@@ -96,6 +84,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
96
84
  JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?', re.IGNORECASE),
97
85
  JEFF_FULLER: re.compile(r"jeff@mc2mm.com|Jeff Fuller", re.IGNORECASE),
98
86
  JEFFREY_EPSTEIN: re.compile(r'[djl]ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeffrey E((sp|ps)tein?)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!Mark L. )Epstein', re.IGNORECASE),
87
+ JESSICA_CADWELL: re.compile(r'Jessica Cadwell?', re.IGNORECASE),
99
88
  JOHNNY_EL_HACHEM: re.compile(r'el hachem johnny|johnny el hachem', re.IGNORECASE),
100
89
  JOI_ITO: re.compile(r'ji@media.mit.?edu|(joichi|joi)( Ito)?', re.IGNORECASE),
101
90
  JONATHAN_FARKAS: re.compile(r'Jonathan Farka(s|il)', re.IGNORECASE),
@@ -168,6 +157,7 @@ EMAILERS = [
168
157
  DANIEL_SABBA,
169
158
  'Danny Goldberg',
170
159
  DAVID_SCHOEN,
160
+ DEBBIE_FEIN,
171
161
  DEEPAK_CHOPRA,
172
162
  GLENN_DUBIN,
173
163
  GORDON_GETTY,
@@ -175,7 +165,6 @@ EMAILERS = [
175
165
  JACK_SCAROLA,
176
166
  JAY_LEFKOWITZ,
177
167
  JES_STALEY,
178
- JESSICA_CADWELL,
179
168
  JOHN_PAGE,
180
169
  'Jokeland',
181
170
  JOSCHA_BACH,
@@ -215,29 +204,22 @@ for emailer in EMAILERS:
215
204
 
216
205
  EMAILER_REGEXES[emailer] = re.compile(emailer, re.IGNORECASE)
217
206
 
218
- # Some emails have a lot of uninteresting CCs
219
- IRAN_NUCLEAR_DEAL_SPAM_EMAIL_RECIPIENTS: list[str | None] = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
220
- KRASSNER_MANSON_RECIPIENTS: list[str | None] = ['Nancy Cain', 'Tom', 'Marie Moneysmith', 'Steven Gaydos', 'George Krassner', 'Linda W. Grossman', 'Holly Krassner Dawson', 'Daniel Dawson', 'Danny Goldberg', 'Caryl Ratner', 'Kevin Bright', 'Michael Simmons', SAMUEL_LEFF, 'Bob Fass', 'Lynnie Tofte Fass', 'Barb Cowles', 'Lee Quarnstrom']
221
- KRASSNER_024923_RECIPIENTS: list[str | None] = ['George Krassner', 'Nick Kazan', 'Mrisman02', 'Rebecca Risman', 'Linda W. Grossman']
222
- KRASSNER_033568_RECIPIENTS: list[str | None] = ['George Krassner', 'Daniel Dawson', 'Danny Goldberg', 'Tom', 'Kevin Bright', 'Walli Leff', 'Michael Simmons', 'Lee Quarnstrom', 'Lanny Swerdlow', 'Larry Sloman', 'W&K', 'Harry Shearer', 'Jay Levin']
223
- FLIGHT_IN_2012_PEOPLE: list[str | None] = ['Francis Derby', 'Januiz Banasiak', 'Louella Rabuyo', 'Richard Barnnet']
224
-
225
207
 
226
208
  ##########################
227
- # OtherFile Config Stuff #
209
+ # OtherFile config stuff #
228
210
  ##########################
229
- BOOK = 'book:'
211
+
212
+ # strings
230
213
  FBI = 'FBI'
231
- FLIGHT_LOGS = 'flight logs'
232
214
  MEME = 'meme of'
233
215
  PRESS_RELEASE = 'press release'
234
- REPUTATION_MGMT = 'reputation management:'
216
+ RESUME_OF = 'professional resumé of'
235
217
  SCREENSHOT = 'screenshot of'
236
218
  TRANSLATION = 'translation of'
237
219
  TWEET = 'tweet'
238
220
  TEXT_OF_US_LAW = 'text of U.S. law:'
239
221
 
240
- # Court cases
222
+ # Legal cases
241
223
  BRUNEL_V_EPSTEIN = f"{JEAN_LUC_BRUNEL} v. {JEFFREY_EPSTEIN}"
242
224
  EDWARDS_V_DERSHOWITZ = f"{BRAD_EDWARDS} & {PAUL_G_CASSELL} v. {ALAN_DERSHOWITZ}:"
243
225
  EPSTEIN_V_ROTHSTEIN_EDWARDS = f"Epstein v. Scott Rothstein, {BRAD_EDWARDS}, and L.M.:"
@@ -250,14 +232,12 @@ NEW_YORK_V_EPSTEIN = f"New York v. {JEFFREY_EPSTEIN}:"
250
232
 
251
233
  # Descriptions of non-email, non-text message files
252
234
  ARTICLE_DRAFT = 'draft of an article about'
253
- BOFA = 'BofA'
254
235
  BOFA_MERRILL = f'{BOFA} / Merrill Lynch Report'
255
236
  BOFA_WEALTH_MGMT = f'{BOFA} Wealth Management'
256
- CHALLENGES_OF_AI = f'ASU Origins Project ({LAWRENCE_KRAUSS}) report "Challenges of AI: Envisioning and Addressing Adverse Outcomes"'
237
+ BROCKMAN_INC = 'Brockman, Inc.'
257
238
  CVRA = "Crime Victims' Rights Act [CVRA]"
258
239
  DAVID_BLAINE_VISA_LETTER = f"letter of recommendation for visa for a model"
259
240
  DAVID_SCHOEN_CVRA_LEXIS_SEARCH = f"Lexis Nexis search for case law around the {CVRA} by {DAVID_SCHOEN}"
260
- DEEP_THINKING_HINT = f'{BOOK} "Deep Thinking: Twenty-Five Ways of Looking at AI" by John Brockman'
261
241
  DERSH_GIUFFRE_TWEET = f"{TWEET} by {ALAN_DERSHOWITZ} about {VIRGINIA_GIUFFRE}"
262
242
  DEUTSCHE_BANK_TAX_TOPICS = f'{DEUTSCHE_BANK} Wealth Management Tax Topics'
263
243
  DIANA_DEGETTES_CAMPAIGN = "Colorado legislator Diana DeGette's campaign"
@@ -265,34 +245,32 @@ EPSTEIN_FOUNDATION = 'Jeffrey Epstein VI Foundation'
265
245
  FBI_REPORT = f"{FBI} report on Epstein investigation (redacted)"
266
246
  FBI_SEIZED_PROPERTY = f"{FBI} seized property inventory (redacted)"
267
247
  FEMALE_HEALTH_COMPANY = 'Female Health Company (FHX)'
268
- FIRE_AND_FURY = f"'Fire And Fury' by {MICHAEL_WOLFF}"
269
- GOLDMAN_REPORT = f'{GOLDMAN_SACHS} Investment Management Division report'
248
+ FIRE_AND_FURY = f"Fire And Fury"
270
249
  HARVARD_POETRY = f'{HARVARD} poetry stuff from {LISA_NEW}'
271
- HBS_APPLICATION_NERIO = f"{HARVARD} Business School application letter from Nerio Alessandri (Founder and Chairman Technogym SPA Italy)"
250
+ HBS_APPLICATION_NERIO = f"{HARVARD} Business School application letter from Nerio Alessandri (Founder and Chairman of Technogym S.p.A. Italy)"
272
251
  JASTA = 'JASTA'
273
252
  JASTA_SAUDI_LAWSUIT = f"{JASTA} lawsuit against Saudi Arabia by 9/11 victims"
274
253
  JOHN_BOLTON_PRESS_CLIPPING = 'John Bolton press clipping'
275
- JP_MORGAN_EYE_ON_THE_MARKET = f"{JP_MORGAN} Eye On The Market report"
254
+ JP_MORGAN_EYE_ON_THE_MARKET = f"Eye On The Market"
276
255
  KEN_STARR_LETTER = f"letter to judge overseeing Epstein's criminal prosecution, mentions Alex Acosta"
277
256
  MICHAEL_WOLFF_ARTICLE_HINT = f"draft of an unpublished article about Epstein by {MICHAEL_WOLFF} written ca. 2014/2015"
278
- NIGHT_FLIGHT_HINT = f'draft of book named "Night Flight"'
257
+ NIGHT_FLIGHT_BOOK = f'"Night Flight" (draft)'
279
258
  NOBEL_CHARITABLE_TRUST = 'Nobel Charitable Trust'
280
259
  OBAMA_JOKE = 'joke about Obama'
281
- PALM_BEACH = 'Palm Beach'
282
260
  PALM_BEACH_CODE_ENFORCEMENT = f'{PALM_BEACH} code enforcement board minutes'
283
261
  PALM_BEACH_DAILY_ARTICLE = f'{PALM_BEACH} Daily News article about'
284
262
  PALM_BEACH_POST_ARTICLE = f'{PALM_BEACH} Post article about'
285
263
  PALM_BEACH_TSV = f"TSV of {PALM_BEACH} property"
286
264
  PALM_BEACH_WATER_COMMITTEE = f'{PALM_BEACH} Water Committee'
287
- PATTERSON_BOOK_SCANS = f"pages of 'Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}'"
265
+ PATTERSON_BOOK_SCANS = f'pages of "Filthy Rich: The Shocking True Story of {JEFFREY_EPSTEIN}"'
288
266
  SHIMON_POST = 'The Shimon Post'
289
267
  SHIMON_POST_ARTICLE = f'{SHIMON_POST} selection of articles about the mideast'
290
268
  SINGLE_PAGE = 'single page of'
291
269
  SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit"
292
270
  THE_REAL_DEAL_ARTICLE = 'article by Keith Larsen'
293
271
  TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
294
- UBS = 'UBS'
295
272
  UBS_CIO_REPORT = 'CIO Monthly Extended report'
273
+ UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
296
274
  VI_DAILY_NEWS_ARTICLE = f'{VIRGIN_ISLANDS} Daily News article'
297
275
  WOMEN_EMPOWERMENT = f"Women Empowerment (WE) conference run by {SVETLANA_POZHIDAEVA}"
298
276
  ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
@@ -301,6 +279,7 @@ ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
301
279
  BOLOTOVA_REASON = 'Same signature style as 029020 ("--" followed by "Sincerely Renata Bolotova")'
302
280
  KATHY_REASON = 'from "Kathy" about dems, sent from iPad'
303
281
  LARRY_REASON = 'Planes discussion signed "Larry"'
282
+ PARTICIPANTS_FIELD = 'Participants: field'
304
283
  PAULA_REASON = 'Signature of "Sent via BlackBerry from T-Mobile"'
305
284
 
306
285
 
@@ -308,506 +287,494 @@ PAULA_REASON = 'Signature of "Sent via BlackBerry from T-Mobile"'
308
287
  ############################################ TEXTS #############################################
309
288
  ################################################################################################
310
289
 
311
- TEXTS_CONFIG = [
312
- # Confirmed text attributions
313
- MessageCfg(id='031042', author=ANIL_AMBANI, attribution_reason='Participants: field'),
314
- MessageCfg(id='027225', author=ANIL_AMBANI, attribution_reason='Birthday mentioned and confirmed as Ambani\'s'),
315
- MessageCfg(id='031173', author=ARDA_BESKARDES, attribution_reason='Participants: field'),
316
- MessageCfg(id='027401', author='Eva (Dubin?)', attribution_reason='Participants: field'),
317
- MessageCfg(id='027650', author=JOI_ITO, attribution_reason='Participants: field'),
318
- MessageCfg(id='027777', author=LARRY_SUMMERS, attribution_reason='Participants: field'),
319
- MessageCfg(id='027515', author=MIROSLAV_LAJCAK, attribution_reason='https://x.com/ImDrinknWyn/status/1990210266114789713'),
320
- MessageCfg(id='027165', author=MELANIE_WALKER, attribution_reason='https://www.wired.com/story/jeffrey-epstein-claimed-intimate-knowledge-of-donald-trumps-views-in-texts-with-bill-gates-adviser/'),
321
- MessageCfg(id='027248', author=MELANIE_WALKER, attribution_reason='Says "we met through trump" which is confirmed by Melanie in 032803'),
322
- MessageCfg(id='025429', author=STACEY_PLASKETT),
323
- MessageCfg(id='027333', author=ANTHONY_SCARAMUCCI, attribution_reason='unredacted phone number in one of the messages belongs to Scaramucci'),
324
- MessageCfg(id='027128', author=SOON_YI_PREVIN, attribution_reason='https://x.com/ImDrinknWyn/status/1990227281101434923'),
325
- MessageCfg(id='027217', author=SOON_YI_PREVIN, attribution_reason='refs marriage to woody allen'),
326
- MessageCfg(id='027244', author=SOON_YI_PREVIN, attribution_reason='refs Woody'),
327
- MessageCfg(id='027257', author=SOON_YI_PREVIN, attribution_reason="'Woody Allen' in Participants: field"),
328
- MessageCfg(id='027460', author=STEVE_BANNON, attribution_reason='Discusses leaving scotland when Bannon was confirmed in Scotland, also NYT'),
329
- MessageCfg(id='027307', author=STEVE_BANNON),
330
- MessageCfg(id='027278', author=TERJE_ROD_LARSEN),
331
- MessageCfg(id='027255', author=TERJE_ROD_LARSEN),
332
- # Uncertain text attributions
333
- MessageCfg(id='027762', author=ANDRZEJ_DUDA, is_attribution_uncertain=True),
334
- MessageCfg(id='027774', author=ANDRZEJ_DUDA, is_attribution_uncertain=True),
335
- MessageCfg(id='027221', author=ANIL_AMBANI, is_attribution_uncertain=True),
336
- MessageCfg(id='025436', author=CELINA_DUBIN, is_attribution_uncertain=True),
337
- MessageCfg(id='027576', author=MELANIE_WALKER, is_attribution_uncertain=True, attribution_reason='https://www.ahajournals.org/doi/full/10.1161/STROKEAHA.118.023700'),
338
- MessageCfg(id='027141', author=MELANIE_WALKER, is_attribution_uncertain=True),
339
- MessageCfg(id='027232', author=MELANIE_WALKER, is_attribution_uncertain=True),
340
- MessageCfg(id='027133', author=MELANIE_WALKER, is_attribution_uncertain=True),
341
- MessageCfg(id='027184', author=MELANIE_WALKER, is_attribution_uncertain=True),
342
- MessageCfg(id='027214', author=MELANIE_WALKER, is_attribution_uncertain=True),
343
- MessageCfg(id='027148', author=MELANIE_WALKER, is_attribution_uncertain=True),
344
- MessageCfg(id='027440', author=MICHAEL_WOLFF, is_attribution_uncertain=True, attribution_reason='AI says Trump book/journalism project'),
345
- MessageCfg(id='027396', author=ANTHONY_SCARAMUCCI, is_attribution_uncertain=True),
346
- MessageCfg(id='031054', author=ANTHONY_SCARAMUCCI, is_attribution_uncertain=True),
347
- MessageCfg(id='025363', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='Trump and New York Times coverage'),
348
- MessageCfg(id='025368', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='Trump and New York Times coverage'),
349
- MessageCfg(id='027585', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='Tokyo trip'),
350
- MessageCfg(id='027568', author=STEVE_BANNON, is_attribution_uncertain=True),
351
- MessageCfg(id='027695', author=STEVE_BANNON, is_attribution_uncertain=True),
352
- MessageCfg(id='027594', author=STEVE_BANNON, is_attribution_uncertain=True),
353
- MessageCfg(id='027720', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='first 3 lines of 027722'),
354
- MessageCfg(id='027549', author=STEVE_BANNON, is_attribution_uncertain=True),
355
- MessageCfg(id='027434', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='References Maher appearance'),
356
- MessageCfg(id='027764', author=STEVE_BANNON, is_attribution_uncertain=True),
357
- MessageCfg(id='027428', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='References HBJ meeting on 9/28 from other Bannon/Epstein convo'),
358
- MessageCfg(id='025400', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says Trump NYT article criticism; Hannity media strategy'),
359
- MessageCfg(id='025408', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says Trump and New York Times coverage'),
360
- MessageCfg(id='025452', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says Trump and New York Times coverage'),
361
- MessageCfg(id='025479', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says China strategy and geopolitics; Trump discussions'),
362
- MessageCfg(id='025707', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says Trump and New York Times coverage'),
363
- MessageCfg(id='025734', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says China strategy and geopolitics; Trump discussions'),
364
- MessageCfg(id='027260', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says Trump and New York Times coverage'),
365
- MessageCfg(id='027281', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says Trump and New York Times coverage'),
366
- MessageCfg(id='027346', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says Trump and New York Times coverage'),
367
- MessageCfg(id='027365', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says Trump and New York Times coverage'),
368
- MessageCfg(id='027374', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says China strategy and geopolitics'),
369
- MessageCfg(id='027406', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says Trump and New York Times coverage'),
370
- MessageCfg(id='027445', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says China strategy and geopolitics; Trump discussions'),
371
- MessageCfg(id='027455', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says China strategy and geopolitics; Trump discussions'),
372
- MessageCfg(id='027536', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says China strategy and geopolitics; Trump discussions'),
373
- MessageCfg(id='027655', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says Trump and New York Times coverage'),
374
- MessageCfg(id='027707', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says Italian politics; Trump discussions'),
375
- MessageCfg(id='027722', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says Trump and New York Times coverage'),
376
- MessageCfg(id='027735', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says Trump and New York Times coverage'),
377
- MessageCfg(id='027794', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says Trump and New York Times coverage'),
378
- MessageCfg(id='029744', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says Trump and New York Times coverage'),
379
- MessageCfg(id='031045', author=STEVE_BANNON, is_attribution_uncertain=True, attribution_reason='AI says Trump and New York Times coverage'),
290
+ CONFIRMED_TEXTS_CONFIG = [
291
+ TextCfg(id='031042', author=ANIL_AMBANI, attribution_reason=PARTICIPANTS_FIELD),
292
+ TextCfg(id='027225', author=ANIL_AMBANI, attribution_reason="birthday mentioned in texts is confirmed as Ambani's"),
293
+ TextCfg(id='031054', author=ANTHONY_SCARAMUCCI, attribution_reason="Scaramucci's phone number is at top of raw file"),
294
+ TextCfg(id='027333', author=ANTHONY_SCARAMUCCI, attribution_reason="Scaramucci's phone number is in one of the messages"),
295
+ TextCfg(id='031173', author=ARDA_BESKARDES, attribution_reason=PARTICIPANTS_FIELD),
296
+ TextCfg(id='027401', author='Eva (Dubin?)', attribution_reason=PARTICIPANTS_FIELD),
297
+ TextCfg(id='027650', author=JOI_ITO, attribution_reason=PARTICIPANTS_FIELD),
298
+ TextCfg(id='027777', author=LARRY_SUMMERS, attribution_reason=PARTICIPANTS_FIELD),
299
+ TextCfg(id='027515', author=MIROSLAV_LAJCAK, attribution_reason='https://x.com/ImDrinknWyn/status/1990210266114789713'),
300
+ TextCfg(id='027165', author=MELANIE_WALKER, attribution_reason='https://www.wired.com/story/jeffrey-epstein-claimed-intimate-knowledge-of-donald-trumps-views-in-texts-with-bill-gates-adviser/'),
301
+ TextCfg(id='027248', author=MELANIE_WALKER, attribution_reason='says "we met through Trump" which is confirmed by Melanie in 032803'),
302
+ TextCfg(id='025429', author=STACEY_PLASKETT, attribution_reason='widely reported'),
303
+ TextCfg(id='027128', author=SOON_YI_PREVIN, attribution_reason='https://x.com/ImDrinknWyn/status/1990227281101434923'),
304
+ TextCfg(id='027217', author=SOON_YI_PREVIN, attribution_reason='refs marriage to Woody Allen'),
305
+ TextCfg(id='027244', author=SOON_YI_PREVIN, attribution_reason='refs Woody Allen'),
306
+ TextCfg(id='027257', author=SOON_YI_PREVIN, attribution_reason=f"Woody Allen in {PARTICIPANTS_FIELD}"),
307
+ TextCfg(id='027460', author=STEVE_BANNON, attribution_reason='Discusses leaving scotland when Bannon was confirmed in Scotland, also NYT'),
308
+ TextCfg(id='027307', author=STEVE_BANNON, attribution_reason='texts mention "Epstein Bannon Kurz"'),
309
+ TextCfg(id='027278', author=TERJE_ROD_LARSEN, attribution_reason=PARTICIPANTS_FIELD),
310
+ TextCfg(id='027255', author=TERJE_ROD_LARSEN, attribution_reason=PARTICIPANTS_FIELD),
311
+ ]
312
+
313
+ UNCONFIRMED_TEXTS_CONFIG = [
314
+ TextCfg(id='027762', author=ANDRZEJ_DUDA, attribution_reason=f"Duda in NY at that time, took train"),
315
+ TextCfg(id='027774', author=ANDRZEJ_DUDA, attribution_reason=f"Duda in NY at that time, took train"),
316
+ TextCfg(id='027221', author=ANIL_AMBANI),
317
+ TextCfg(id='027396', author=ANTHONY_SCARAMUCCI, attribution_reason='says "I need to make peace with Bannon"'),
318
+ TextCfg(id='025436', author=CELINA_DUBIN),
319
+ TextCfg(id='027576', author=MELANIE_WALKER, attribution_reason='https://www.ahajournals.org/doi/full/10.1161/STROKEAHA.118.023700'),
320
+ TextCfg(id='027141', author=MELANIE_WALKER),
321
+ TextCfg(id='027232', author=MELANIE_WALKER),
322
+ TextCfg(id='027133', author=MELANIE_WALKER),
323
+ TextCfg(id='027184', author=MELANIE_WALKER),
324
+ TextCfg(id='027214', author=MELANIE_WALKER),
325
+ TextCfg(id='027148', author=MELANIE_WALKER),
326
+ TextCfg(id='027440', author=MICHAEL_WOLFF, attribution_reason='AI says Trump book/journalism project'),
327
+ TextCfg(id='025363', author=STEVE_BANNON, attribution_reason='Trump and New York Times coverage'),
328
+ TextCfg(id='025368', author=STEVE_BANNON, attribution_reason='Trump and New York Times coverage'),
329
+ TextCfg(id='027585', author=STEVE_BANNON, attribution_reason='references Tokyo trip'),
330
+ TextCfg(id='027568', author=STEVE_BANNON),
331
+ TextCfg(id='027695', author=STEVE_BANNON),
332
+ TextCfg(id='027594', author=STEVE_BANNON),
333
+ TextCfg(id='027720', author=STEVE_BANNON, attribution_reason='first 3 lines of 027722'),
334
+ TextCfg(id='027549', author=STEVE_BANNON),
335
+ TextCfg(id='027434', author=STEVE_BANNON, attribution_reason='references Maher appearance'),
336
+ TextCfg(id='027764', author=STEVE_BANNON),
337
+ TextCfg(id='027428', author=STEVE_BANNON, attribution_reason='references HBJ meeting on 9/28 from other Bannon/Epstein convo'),
338
+ TextCfg(id='025400', author=STEVE_BANNON, attribution_reason='AI says Trump NYT article criticism; Hannity media strategy'),
339
+ TextCfg(id='025408', author=STEVE_BANNON, attribution_reason='AI says Trump and New York Times coverage'),
340
+ TextCfg(id='025452', author=STEVE_BANNON, attribution_reason='AI says Trump and New York Times coverage'),
341
+ TextCfg(id='025479', author=STEVE_BANNON, attribution_reason='AI says China strategy and geopolitics; Trump discussions'),
342
+ TextCfg(id='025707', author=STEVE_BANNON, attribution_reason='AI says Trump and New York Times coverage'),
343
+ TextCfg(id='025734', author=STEVE_BANNON, attribution_reason='AI says China strategy and geopolitics; Trump discussions'),
344
+ TextCfg(id='027260', author=STEVE_BANNON, attribution_reason='AI says Trump and New York Times coverage'),
345
+ TextCfg(id='027281', author=STEVE_BANNON, attribution_reason='AI says Trump and New York Times coverage'),
346
+ TextCfg(id='027346', author=STEVE_BANNON, attribution_reason='AI says Trump and New York Times coverage'),
347
+ TextCfg(id='027365', author=STEVE_BANNON, attribution_reason='AI says Trump and New York Times coverage'),
348
+ TextCfg(id='027374', author=STEVE_BANNON, attribution_reason='AI says China strategy and geopolitics'),
349
+ TextCfg(id='027406', author=STEVE_BANNON, attribution_reason='AI says Trump and New York Times coverage'),
350
+ TextCfg(id='027445', author=STEVE_BANNON, attribution_reason='AI says China strategy and geopolitics; Trump discussions'),
351
+ TextCfg(id='027455', author=STEVE_BANNON, attribution_reason='AI says China strategy and geopolitics; Trump discussions'),
352
+ TextCfg(id='027536', author=STEVE_BANNON, attribution_reason='AI says China strategy and geopolitics; Trump discussions'),
353
+ TextCfg(id='027655', author=STEVE_BANNON, attribution_reason='AI says Trump and New York Times coverage'),
354
+ TextCfg(id='027707', author=STEVE_BANNON, attribution_reason='AI says Italian politics; Trump discussions'),
355
+ TextCfg(id='027722', author=STEVE_BANNON, attribution_reason='AI says Trump and New York Times coverage'),
356
+ TextCfg(id='027735', author=STEVE_BANNON, attribution_reason='AI says Trump and New York Times coverage'),
357
+ TextCfg(id='027794', author=STEVE_BANNON, attribution_reason='AI says Trump and New York Times coverage'),
358
+ TextCfg(id='029744', author=STEVE_BANNON, attribution_reason='AI says Trump and New York Times coverage'),
359
+ TextCfg(id='031045', author=STEVE_BANNON, attribution_reason='AI says Trump and New York Times coverage'),
380
360
  ]
381
361
 
362
+ for cfg in UNCONFIRMED_TEXTS_CONFIG:
363
+ cfg.is_attribution_uncertain = True
364
+
365
+ TEXTS_CONFIG = CONFIRMED_TEXTS_CONFIG + UNCONFIRMED_TEXTS_CONFIG
366
+
382
367
 
383
- ############################################################################################################
384
- ################################################ EMAIL_INFO ################################################
385
- ############################################################################################################
368
+ ########################################################################################################
369
+ ################################################ EMAILS ################################################
370
+ ########################################################################################################
371
+
372
+ # Some emails have a lot of uninteresting CCs
373
+ IRAN_NUCLEAR_DEAL_SPAM_EMAIL_RECIPIENTS: list[str | None] = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
374
+ FLIGHT_IN_2012_PEOPLE: list[str | None] = ['Francis Derby', 'Januiz Banasiak', 'Louella Rabuyo', 'Richard Barnnet']
386
375
 
387
376
  EMAILS_CONFIG = [
388
- MessageCfg(id='032436', author=ALIREZA_ITTIHADIEH, attribution_reason='Signature'),
389
- MessageCfg(id='032543', author=ANAS_ALRASHEED, attribution_reason='Later reply 033000 has quote'),
390
- MessageCfg(id='026064', author=ARIANE_DE_ROTHSCHILD),
391
- MessageCfg(id='026069', author=ARIANE_DE_ROTHSCHILD),
392
- MessageCfg(id='030741', author=ARIANE_DE_ROTHSCHILD),
393
- MessageCfg(id='026018', author=ARIANE_DE_ROTHSCHILD),
394
- MessageCfg(
377
+ EmailCfg(id='032436', author=ALIREZA_ITTIHADIEH, attribution_reason='Signature'),
378
+ EmailCfg(id='032543', author=ANAS_ALRASHEED, attribution_reason='Later reply 033000 has quote'),
379
+ EmailCfg(id='026064', author=ARIANE_DE_ROTHSCHILD, attribution_reason='signature'),
380
+ EmailCfg(id='026069', author=ARIANE_DE_ROTHSCHILD, attribution_reason='signature'),
381
+ EmailCfg(id='030741', author=ARIANE_DE_ROTHSCHILD, attribution_reason='signature'),
382
+ EmailCfg(id='026018', author=ARIANE_DE_ROTHSCHILD, attribution_reason='signature'),
383
+ EmailCfg(
395
384
  id='029504',
396
385
  author='Audrey/Aubrey Raimbault (???)',
397
- attribution_reason='based on "GMI" in signature, a company registered by "aubrey raimbault"',
386
+ attribution_reason='based on "GMI" in signature, a company registered by "Aubrey Raimbault"',
398
387
  ),
399
- MessageCfg(id='033316', author=AZIZA_ALAHMADI, attribution_reason='"Regards, Aziza" at bottom'),
400
- MessageCfg(id='033328', author=AZIZA_ALAHMADI, attribution_reason='"Regards, Aziza" at bottom'),
401
- MessageCfg(id='026659', author=BARBRO_C_EHNBOM, attribution_reason='Reply'),
402
- MessageCfg(id='031215', author=BARBRO_C_EHNBOM, duplicate_ids=['026745'], dupe_type='redacted'), # the same except for 'your Anna!'. author must be specified because email address is redacted in 026745 so it needs the config
403
- MessageCfg(id='026764', author=BARRY_J_COHEN),
404
- MessageCfg(id='031206', author=BENNET_MOSKOWITZ, duplicate_ids=['031227']),
405
- MessageCfg(id='031442', author=CHRISTINA_GALBRAITH, duplicate_ids=['031996']),
406
- MessageCfg(
388
+ EmailCfg(id='033316', author=AZIZA_ALAHMADI, attribution_reason='"Regards, Aziza" at bottom'),
389
+ EmailCfg(id='033328', author=AZIZA_ALAHMADI, attribution_reason='"Regards, Aziza" at bottom'),
390
+ EmailCfg(id='026659', author=BARBRO_C_EHNBOM, attribution_reason='Reply'),
391
+ EmailCfg(id='031215', author=BARBRO_C_EHNBOM, duplicate_ids=['026745'], dupe_type='redacted'), # the same except for 'your Anna!'. author must be specified because email address is redacted in 026745 so it needs the config
392
+ EmailCfg(id='026764', author=BARRY_J_COHEN), # Bad OCR (nofix)
393
+ EmailCfg(id='031206', author=BENNET_MOSKOWITZ, duplicate_ids=['031227']),
394
+ EmailCfg(id='031442', author=CHRISTINA_GALBRAITH, duplicate_ids=['031996']),
395
+ EmailCfg(
407
396
  id='019446',
408
397
  author=CHRISTINA_GALBRAITH,
409
398
  attribution_reason='shows from "Christina media/PR" which fits',
410
399
  is_attribution_uncertain=True,
411
400
  ),
412
- MessageCfg(id='026625', author=DARREN_INDYKE, actual_text='Hysterical.'),
413
- MessageCfg(
401
+ EmailCfg(id='026625', author=DARREN_INDYKE, actual_text='Hysterical.'),
402
+ EmailCfg(
414
403
  id='026624',
415
404
  author=DARREN_INDYKE,
416
405
  recipients=[JEFFREY_EPSTEIN],
417
406
  timestamp=parse('2016-10-01 16:40:00'),
418
407
  duplicate_ids=['031708'],
419
408
  ),
420
- MessageCfg(
409
+ EmailCfg(
421
410
  id='031278',
422
411
  author=DARREN_INDYKE,
423
- description=f"heavily redacted email, quoted replies are from {STEVEN_HOFFENBERG} about James Patterson's book", # Quoted replies are in 019109,
412
+ description=f"heavily redacted email, quoted replies are from {STEVEN_HOFFENBERG} about James Patterson's book",
424
413
  timestamp=parse('2016-08-17 11:26:00'),
425
414
  attribution_reason='Quoted replies are in 019109',
426
415
  ),
427
- MessageCfg(id='026290', author=DAVID_SCHOEN, attribution_reason='Signature'),
428
- MessageCfg(id='031339', author=DAVID_SCHOEN, attribution_reason='Signature'),
429
- MessageCfg(id='031492', author=DAVID_SCHOEN, attribution_reason='Signature'),
430
- MessageCfg(id='031560', author=DAVID_SCHOEN, attribution_reason='Signature'),
431
- MessageCfg(id='026287', author=DAVID_SCHOEN, attribution_reason='Signature'),
432
- MessageCfg(id='033419', author=DAVID_SCHOEN, attribution_reason='Signature'),
433
- MessageCfg(id='031460', author=EDWARD_JAY_EPSTEIN),
434
- MessageCfg(id='030414', author=FAITH_KATES, duplicate_ids=['030578'], dupe_type='redacted'),
435
- MessageCfg(
436
- id='031135',
416
+ EmailCfg(id='026290', author=DAVID_SCHOEN, attribution_reason='Signature'),
417
+ EmailCfg(id='031339', author=DAVID_SCHOEN, attribution_reason='Signature'),
418
+ EmailCfg(id='031492', author=DAVID_SCHOEN, attribution_reason='Signature'),
419
+ EmailCfg(id='031560', author=DAVID_SCHOEN, attribution_reason='Signature'),
420
+ EmailCfg(id='026287', author=DAVID_SCHOEN, attribution_reason='Signature'),
421
+ EmailCfg(id='033419', author=DAVID_SCHOEN, attribution_reason='Signature'),
422
+ EmailCfg(id='031460', author=EDWARD_JAY_EPSTEIN, attribution_reason=f"quoted reply has edwardjayepstein.com"),
423
+ EmailCfg(
424
+ id='030475',
437
425
  author=FAITH_KATES,
438
- duplicate_ids=['030634'],
439
- dupe_type='redacted',
440
- attribution_reason='Same as unredacted 031135, same legal signature',
441
- ),
442
- MessageCfg(id='026547', author=GERALD_BARTON, recipients=[JEFFREY_EPSTEIN]), # Bad OCR
443
- MessageCfg(id='029969', author=GWENDOLYN_BECK, attribution_reason='Signature'),
444
- MessageCfg(id='029968', author=GWENDOLYN_BECK, attribution_reason='Signature', duplicate_ids=['031120']),
445
- MessageCfg(id='029970', author=GWENDOLYN_BECK),
446
- MessageCfg(id='029960', author=GWENDOLYN_BECK, attribution_reason='Reply'),
447
- MessageCfg(id='029959', author=GWENDOLYN_BECK, attribution_reason='"Longevity & Aging"'),
448
- MessageCfg(id='033360', author=HENRY_HOLT, attribution_reason='in signature'), # Henry Holt is a company not a person
449
- MessageCfg(id='033384', author=JACK_GOLDBERGER, attribution_reason='Might be Paul Prosperi?'),
450
- MessageCfg(id='026024', author=JEAN_HUGUEN, attribution_reason='Signature'),
451
- MessageCfg(id='021823', author=JEAN_LUC_BRUNEL, attribution_reason='Reply'),
452
- MessageCfg(id='022949', author=JEFFREY_EPSTEIN),
453
- MessageCfg(id='031624', author=JEFFREY_EPSTEIN),
454
- MessageCfg(id='031996', author=JEFFREY_EPSTEIN, recipients=[CHRISTINA_GALBRAITH], attribution_reason='bounced', duplicate_ids=['031442']),
455
- MessageCfg(id='025041', author=JEFFREY_EPSTEIN, recipients=[LARRY_SUMMERS], duplicate_ids=['028675']), # Bad OCR
456
- MessageCfg(
457
- id='029692',
458
- author=JEFFREY_EPSTEIN,
459
- is_fwded_article=True, # Bad OCR, WaPo article
460
- recipients=[LARRY_SUMMERS],
461
- duplicate_ids=['029779'],
426
+ attribution_reason=f'{NEXT_MANAGEMENT} legal signature',
427
+ duplicate_ids=['030575'],
428
+ dupe_type='redacted'
462
429
  ),
463
- MessageCfg(id='018726', author=JEFFREY_EPSTEIN, timestamp=parse('2018-06-08 08:36:00')),
464
- MessageCfg(id='032283', author=JEFFREY_EPSTEIN, timestamp=parse('2016-09-14 08:04:00')),
465
- MessageCfg(id='026943', author=JEFFREY_EPSTEIN, timestamp=parse('2019-05-22 05:47:00')),
466
- MessageCfg(id='023208', author=JEFFREY_EPSTEIN, recipients=[BRAD_WECHSLER, MELANIE_SPINELLA], duplicate_ids=['023291']),
467
- MessageCfg(
430
+ EmailCfg(id='026547', author=GERALD_BARTON, recipients=[JEFFREY_EPSTEIN]), # Bad OCR # TODO: email header is really jacked up
431
+ EmailCfg(id='029969', author=GWENDOLYN_BECK, attribution_reason='Signature'),
432
+ EmailCfg(id='029968', author=GWENDOLYN_BECK, attribution_reason='Signature', duplicate_ids=['031120']),
433
+ EmailCfg(id='029970', author=GWENDOLYN_BECK, attribution_reason='signed "Longevity & Successful Agin"'),
434
+ EmailCfg(id='029960', author=GWENDOLYN_BECK, attribution_reason='Reply'),
435
+ EmailCfg(id='029959', author=GWENDOLYN_BECK, attribution_reason='"Longevity & Aging"'),
436
+ EmailCfg(id='033360', author=HENRY_HOLT, attribution_reason='in signature'), # Henry Holt is a company not a person
437
+ EmailCfg(id='033384', author=JACK_GOLDBERGER, attribution_reason='Might be Paul Prosperi?', is_attribution_uncertain=True),
438
+ EmailCfg(id='026024', author=JEAN_HUGUEN, attribution_reason='Signature'),
439
+ EmailCfg(id='021823', author=JEAN_LUC_BRUNEL, attribution_reason='Reply'),
440
+ EmailCfg(id='022949', author=JEFFREY_EPSTEIN), # Bad OCR (nofix)
441
+ EmailCfg(id='031624', author=JEFFREY_EPSTEIN), # Bad OCR (nofix)
442
+ EmailCfg(id='031996', author=JEFFREY_EPSTEIN, recipients=[CHRISTINA_GALBRAITH], attribution_reason='bounced', duplicate_ids=['031442']),
443
+ EmailCfg(id='018726', author=JEFFREY_EPSTEIN, timestamp=parse('2018-06-08 08:36:00')), # nofix
444
+ EmailCfg(id='032283', author=JEFFREY_EPSTEIN, timestamp=parse('2016-09-14 08:04:00')), # nofix
445
+ EmailCfg(id='026943', author=JEFFREY_EPSTEIN, timestamp=parse('2019-05-22 05:47:00')), # nofix
446
+ EmailCfg(id='023208', author=JEFFREY_EPSTEIN, recipients=[BRAD_WECHSLER, MELANIE_SPINELLA], duplicate_ids=['023291']),
447
+ EmailCfg(
468
448
  id='032214',
469
449
  author=JEFFREY_EPSTEIN,
470
450
  actual_text='Agreed',
471
451
  recipients=[MIROSLAV_LAJCAK],
472
452
  attribution_reason='Quoted reply has signature',
473
453
  ),
474
- MessageCfg(id='029582', author=JEFFREY_EPSTEIN, recipients=[RENATA_BOLOTOVA], attribution_reason=BOLOTOVA_REASON),
475
- MessageCfg(id='030997', author=JEFFREY_EPSTEIN, actual_text='call back'),
476
- MessageCfg(id='028770', author=JEFFREY_EPSTEIN, actual_text='call me now'),
477
- MessageCfg(id='031826', author=JEFFREY_EPSTEIN, actual_text='I have'),
478
- MessageCfg(id='030768', author=JEFFREY_EPSTEIN, actual_text='ok'),
479
- MessageCfg(id='022938', author=JEFFREY_EPSTEIN, actual_text='what do you suggest?'), # TODO: this email's header rewrite sucks
480
- MessageCfg(id='031791', author=JESSICA_CADWELL),
481
- MessageCfg(id='028851', author=JOI_ITO, recipients=[JEFFREY_EPSTEIN], timestamp=parse('2014-04-27 06:00:00')),
482
- MessageCfg(
454
+ EmailCfg(id='029582', author=JEFFREY_EPSTEIN, recipients=[RENATA_BOLOTOVA], attribution_reason=BOLOTOVA_REASON),
455
+ EmailCfg(id='030997', author=JEFFREY_EPSTEIN, actual_text='call back'),
456
+ EmailCfg(id='028770', author=JEFFREY_EPSTEIN, actual_text='call me now'),
457
+ EmailCfg(id='031826', author=JEFFREY_EPSTEIN, actual_text='I have'),
458
+ EmailCfg(id='030768', author=JEFFREY_EPSTEIN, actual_text='ok'),
459
+ EmailCfg(id='022938', author=JEFFREY_EPSTEIN, actual_text='what do you suggest?'),
460
+ EmailCfg(id='031791', author=JESSICA_CADWELL, attribution_reason='signature'),
461
+ EmailCfg(id='028851', author=JOI_ITO, recipients=[JEFFREY_EPSTEIN], timestamp=parse('2014-04-27 06:00:00')),
462
+ EmailCfg(
483
463
  id='028849',
464
+ attribution_reason='Conversation with Joi Ito',
484
465
  author=JOI_ITO,
466
+ description=f"{JOI_ITO} reaching out to Epstein for an immediate phone call after news about illicit Russian money",
485
467
  recipients=[JEFFREY_EPSTEIN],
486
- timestamp=parse('2014-04-27 06:30:00'),
487
- attribution_reason='Conversation with Joi Ito',
468
+ timestamp=parse('2014-04-27 07:41:00'), # Filled in from 028847
488
469
  ),
489
- MessageCfg(id='016692', author=JOHN_PAGE),
490
- MessageCfg(id='016693', author=JOHN_PAGE),
491
- MessageCfg(id='028507', author=JONATHAN_FARKAS),
492
- MessageCfg(id='033282', author=JONATHAN_FARKAS, duplicate_ids=['033484']),
493
- MessageCfg(id='033582', author=JONATHAN_FARKAS, attribution_reason='Reply', duplicate_ids=['032389']),
494
- MessageCfg(id='033203', author=JONATHAN_FARKAS, attribution_reason='Reply', duplicate_ids=['033581']),
495
- MessageCfg(id='032052', author=JONATHAN_FARKAS, attribution_reason='Reply', duplicate_ids=['031732']),
496
- MessageCfg(id='033490', author=JONATHAN_FARKAS, attribution_reason='Signature', duplicate_ids=['032531']),
497
- MessageCfg(id='026652', author=KATHRYN_RUEMMLER), # Bad OCR
498
- MessageCfg(id='032224', author=KATHRYN_RUEMMLER, recipients=[JEFFREY_EPSTEIN], attribution_reason='Reply'),
499
- MessageCfg(id='032386', author=KATHRYN_RUEMMLER, attribution_reason=KATHY_REASON, is_attribution_uncertain=True),
500
- MessageCfg(id='032727', author=KATHRYN_RUEMMLER, attribution_reason=KATHY_REASON, is_attribution_uncertain=True),
501
- MessageCfg(id='030478', author=LANDON_THOMAS),
502
- MessageCfg(id='029013', author=LARRY_SUMMERS, recipients=[JEFFREY_EPSTEIN]),
503
- MessageCfg(id='032206', author=LAWRENCE_KRAUSS), # More of a text convo?
504
- MessageCfg(id='032208', author=LAWRENCE_KRAUSS, recipients=[JEFFREY_EPSTEIN]), # More of a text convo?
505
- MessageCfg(id='032209', author=LAWRENCE_KRAUSS, recipients=[JEFFREY_EPSTEIN]), # More of a text convo?
506
- MessageCfg(id='029196', author=LAWRENCE_KRAUSS, recipients=[JEFFREY_EPSTEIN], actual_text='Talk in 40?'), # TODO: this email's header rewrite sucks
507
- MessageCfg(id='033593', author=LAWRANCE_VISOSKI, attribution_reason='Signature'),
508
- MessageCfg(id='033370', author=LAWRANCE_VISOSKI, attribution_reason=LARRY_REASON),
509
- MessageCfg(id='033495', author=LAWRANCE_VISOSKI, attribution_reason=LARRY_REASON),
510
- MessageCfg(id='033487', author=LAWRANCE_VISOSKI, recipients=[JEFFREY_EPSTEIN]),
511
- MessageCfg(
470
+ EmailCfg(id='028507', author=JONATHAN_FARKAS, attribution_reason='reply signed "best Jonathan"'),
471
+ EmailCfg(id='033282', author=JONATHAN_FARKAS, attribution_reason='reply signed "thanks Jonathan"', duplicate_ids=['033484']),
472
+ EmailCfg(id='033582', author=JONATHAN_FARKAS, attribution_reason='Reply', duplicate_ids=['032389']),
473
+ EmailCfg(id='033203', author=JONATHAN_FARKAS, attribution_reason='Reply', duplicate_ids=['033581']),
474
+ EmailCfg(id='032052', author=JONATHAN_FARKAS, attribution_reason='Reply', duplicate_ids=['031732']),
475
+ EmailCfg(id='033490', author=JONATHAN_FARKAS, attribution_reason='Signature', duplicate_ids=['032531']),
476
+ EmailCfg(id='032224', author=KATHRYN_RUEMMLER, recipients=[JEFFREY_EPSTEIN], attribution_reason='Reply'),
477
+ EmailCfg(id='032386', author=KATHRYN_RUEMMLER, attribution_reason=KATHY_REASON, is_attribution_uncertain=True),
478
+ EmailCfg(id='032727', author=KATHRYN_RUEMMLER, attribution_reason=KATHY_REASON, is_attribution_uncertain=True),
479
+ EmailCfg(id='030478', author=LANDON_THOMAS),
480
+ EmailCfg(id='029013', author=LARRY_SUMMERS, recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
481
+ EmailCfg(id='032206', author=LAWRENCE_KRAUSS), # More of a text convo?
482
+ EmailCfg(id='032208', author=LAWRENCE_KRAUSS, recipients=[JEFFREY_EPSTEIN]), # More of a text convo?
483
+ EmailCfg(id='032209', author=LAWRENCE_KRAUSS, recipients=[JEFFREY_EPSTEIN]), # More of a text convo?
484
+ EmailCfg(id='029196', author=LAWRENCE_KRAUSS, recipients=[JEFFREY_EPSTEIN], actual_text='Talk in 40?'),
485
+ EmailCfg(id='033593', author=LAWRANCE_VISOSKI, attribution_reason='Signature'),
486
+ EmailCfg(id='033370', author=LAWRANCE_VISOSKI, attribution_reason=LARRY_REASON),
487
+ EmailCfg(id='033495', author=LAWRANCE_VISOSKI, attribution_reason=LARRY_REASON),
488
+ EmailCfg(id='033487', author=LAWRANCE_VISOSKI, recipients=[JEFFREY_EPSTEIN]),
489
+ EmailCfg(
512
490
  id='029977',
513
491
  author=LAWRANCE_VISOSKI,
514
492
  recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, LESLEY_GROFF, RICHARD_KAHN] + FLIGHT_IN_2012_PEOPLE,
515
493
  attribution_reason=LARRY_REASON,
516
494
  duplicate_ids=['031129'],
517
495
  ),
518
- MessageCfg(id='027046', author=LAWRANCE_VISOSKI, duplicate_ids=['028789']),
519
- MessageCfg(id='033488', author=LAWRANCE_VISOSKI, duplicate_ids=['033154']),
520
- MessageCfg(id='033309', author=LINDA_STONE, attribution_reason='"Co-authored with iPhone autocorrect"'),
521
- MessageCfg(id='017581', author='Lisa Randall'),
522
- MessageCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd'),
523
- MessageCfg(id='030472', author=MARTIN_WEINBERG, attribution_reason='Maybe. in reply', is_attribution_uncertain=True),
524
- MessageCfg(id='030235', author=MELANIE_WALKER, attribution_reason='In fwd'),
525
- MessageCfg(id='032343', author=MELANIE_WALKER, attribution_reason='Name seen in later reply 032346'),
526
- MessageCfg(id='032212', author=MIROSLAV_LAJCAK),
527
- MessageCfg(id='022193', author=NADIA_MARCINKO),
528
- MessageCfg(id='021814', author=NADIA_MARCINKO),
529
- MessageCfg(id='021808', author=NADIA_MARCINKO),
530
- MessageCfg(id='022190', author=NADIA_MARCINKO),
531
- MessageCfg(id='021818', author=NADIA_MARCINKO),
532
- MessageCfg(id='022197', author=NADIA_MARCINKO),
533
- MessageCfg(id='022214', author=NADIA_MARCINKO, attribution_reason='Reply header'),
534
- MessageCfg(id='021811', author=NADIA_MARCINKO, attribution_reason='Signature and email address in the message'),
535
- MessageCfg(id='028487', author=NORMAN_D_RAU, attribution_reason='Fwded from "to" address', duplicate_ids=['026612']),
536
- MessageCfg(id='024923', author=PAUL_KRASSNER, recipients=KRASSNER_024923_RECIPIENTS, duplicate_ids=['031973']),
537
- MessageCfg(id='032457', author=PAUL_KRASSNER),
538
- MessageCfg(id='029981', author=PAULA, attribution_reason='Name in reply + opera reference (Fisher now works in opera)'),
539
- MessageCfg(id='030482', author=PAULA, attribution_reason=PAULA_REASON),
540
- MessageCfg(id='033383', author=PAUL_PROSPERI, attribution_reason='Reply'),
541
- MessageCfg(
496
+ EmailCfg(id='027046', author=LAWRANCE_VISOSKI, duplicate_ids=['028789']),
497
+ EmailCfg(id='033488', author=LAWRANCE_VISOSKI, duplicate_ids=['033154']),
498
+ EmailCfg(id='033309', author=LINDA_STONE, attribution_reason='"Co-authored with iPhone autocorrect"'),
499
+ EmailCfg(id='017581', author='Lisa Randall', attribution_reason='reply header'),
500
+ EmailCfg(id='026609', author='Mark Green', attribution_reason='Actually a fwd'),
501
+ EmailCfg(id='030472', author=MARTIN_WEINBERG, attribution_reason='Maybe. in reply', is_attribution_uncertain=True),
502
+ EmailCfg(id='030235', author=MELANIE_WALKER, attribution_reason='In fwd'),
503
+ EmailCfg(id='032343', author=MELANIE_WALKER, attribution_reason='Name seen in later reply 032346'),
504
+ EmailCfg(id='032212', author=MIROSLAV_LAJCAK, attribution_reason='signature'),
505
+ EmailCfg(id='022193', author=NADIA_MARCINKO, attribution_reason='reply'),
506
+ EmailCfg(id='021814', author=NADIA_MARCINKO, attribution_reason='reply'),
507
+ EmailCfg(id='021808', author=NADIA_MARCINKO, attribution_reason='reply'),
508
+ EmailCfg(id='022190', author=NADIA_MARCINKO, attribution_reason='reply'),
509
+ EmailCfg(id='021818', author=NADIA_MARCINKO, attribution_reason='reply'),
510
+ EmailCfg(id='022197', author=NADIA_MARCINKO, attribution_reason='reply'),
511
+ EmailCfg(id='022214', author=NADIA_MARCINKO, attribution_reason='Reply header'),
512
+ EmailCfg(id='021811', author=NADIA_MARCINKO, attribution_reason='Signature and email address in the message'),
513
+ EmailCfg(id='028487', author=NORMAN_D_RAU, attribution_reason='Fwded from "to" address', duplicate_ids=['026612']),
514
+ EmailCfg(
515
+ id='024923',
516
+ author=PAUL_KRASSNER,
517
+ recipients=['George Krassner', 'Nick Kazan', 'Mrisman02', 'Rebecca Risman', 'Linda W. Grossman'],
518
+ duplicate_ids=['031973']
519
+ ),
520
+ EmailCfg(id='032457', author=PAUL_KRASSNER), # Bad OCR (nofix)
521
+ EmailCfg(id='029981', author=PAULA, attribution_reason='Name in reply + opera reference (Fisher now works in opera)'),
522
+ EmailCfg(id='030482', author=PAULA, attribution_reason=PAULA_REASON),
523
+ EmailCfg(id='033383', author=PAUL_PROSPERI, attribution_reason='Reply'),
524
+ EmailCfg(
542
525
  id='033561',
543
526
  author=PAUL_PROSPERI,
544
527
  attribution_reason='Fwded mail sent to Prosperi. Might be Subotnick Stuart?',
545
528
  duplicate_ids=['033157'],
546
529
  ),
547
- MessageCfg(id='031694', author=PEGGY_SIEGAL),
548
- MessageCfg(id='032219', author=PEGGY_SIEGAL, attribution_reason='Signed "Peggy"'),
549
- MessageCfg(id='029020', author=RENATA_BOLOTOVA, attribution_reason='Signature'),
550
- MessageCfg(id='029605', author=RENATA_BOLOTOVA, attribution_reason=BOLOTOVA_REASON),
551
- MessageCfg(id='029606', author=RENATA_BOLOTOVA, attribution_reason=BOLOTOVA_REASON),
552
- MessageCfg(id='029604', author=RENATA_BOLOTOVA, attribution_reason='Continued in 239606 etc'),
553
- MessageCfg(
530
+ EmailCfg(id='031694', author=PEGGY_SIEGAL, attribution_reason='quoted', is_attribution_uncertain=True),
531
+ EmailCfg(id='032219', author=PEGGY_SIEGAL, attribution_reason='Signed "Peggy"'),
532
+ EmailCfg(id='029020', author=RENATA_BOLOTOVA, attribution_reason='Signature'),
533
+ EmailCfg(id='029605', author=RENATA_BOLOTOVA, attribution_reason=BOLOTOVA_REASON),
534
+ EmailCfg(id='029606', author=RENATA_BOLOTOVA, attribution_reason=BOLOTOVA_REASON),
535
+ EmailCfg(id='029604', author=RENATA_BOLOTOVA, attribution_reason='Continued in 239606 etc'),
536
+ EmailCfg(
554
537
  id='033584',
555
538
  author=ROBERT_TRIVERS,
556
539
  recipients=[JEFFREY_EPSTEIN],
557
540
  attribution_reason='Refs paper by Trivers',
558
541
  duplicate_ids=['033169'],
559
542
  ),
560
- MessageCfg(
543
+ EmailCfg(
561
544
  id='026320',
562
545
  author=SEAN_BANNON,
563
546
  attribution_reason="From protonmail, Bannon wrote 'just sent from my protonmail' in 027067",
564
547
  ),
565
- MessageCfg(id='029003', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
566
- MessageCfg(id='029005', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
567
- MessageCfg(id='029007', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
568
- MessageCfg(id='029010', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
569
- MessageCfg(id='032296', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
570
- MessageCfg(
548
+ EmailCfg(id='029003', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
549
+ EmailCfg(id='029005', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
550
+ EmailCfg(id='029007', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
551
+ EmailCfg(id='029010', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
552
+ EmailCfg(id='032296', author=SOON_YI_PREVIN, attribution_reason="\"Sent from Soon-Yi's iPhone\""),
553
+ EmailCfg(
571
554
  id='019109',
572
555
  author=STEVEN_HOFFENBERG,
573
556
  recipients=["Players2"],
574
557
  timestamp=parse('2016-08-11 09:36:01'),
575
558
  attribution_reason='Actually a fwd by Charles Michael but Hoffenberg email more interesting',
576
559
  ),
577
- MessageCfg(
560
+ EmailCfg(
578
561
  id='026620',
579
562
  author=TERRY_KAFKA,
580
563
  recipients=[JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_NUCLEAR_DEAL_SPAM_EMAIL_RECIPIENTS,
581
- attribution_reason='"Respectfully, terry"',
564
+ attribution_reason='ends with "Respectfully, terry"',
582
565
  duplicate_ids=['028482'],
583
566
  ),
584
- MessageCfg(id='029992', author=TERRY_KAFKA, attribution_reason='Quoted reply'),
585
- MessageCfg(id='029985', author=TERRY_KAFKA, attribution_reason='Quoted reply in 029992'),
586
- MessageCfg(id='020666', author=TERRY_KAFKA, attribution_reason="Ends with 'Terry'"),
587
- MessageCfg(id='026014', author=ZUBAIR_KHAN, recipients=[JEFFREY_EPSTEIN], timestamp=parse('2016-11-04 17:46:00')),
588
- MessageCfg(id='030626', recipients=[ALAN_DERSHOWITZ, DARREN_INDYKE, KATHRYN_RUEMMLER, KEN_STARR, MARTIN_WEINBERG]),
589
- MessageCfg(id='029835', recipients=[ALAN_DERSHOWITZ, JACK_GOLDBERGER, JEFFREY_EPSTEIN], duplicate_ids=['028968']),
590
- MessageCfg(id='027063', recipients=[ANTHONY_BARRETT]),
591
- MessageCfg(id='030764', recipients=[ARIANE_DE_ROTHSCHILD], attribution_reason='Reply'),
592
- MessageCfg(id='026431', recipients=[ARIANE_DE_ROTHSCHILD], attribution_reason='Reply'),
593
- MessageCfg(id='032876', recipients=[CECILIA_STEEN]),
594
- MessageCfg(id='033583', recipients=[DARREN_INDYKE, JACK_GOLDBERGER]), # Bad OCR
595
- MessageCfg(id='033144', recipients=[DARREN_INDYKE, RICHARD_KAHN]),
596
- MessageCfg(id='026466', recipients=[DIANE_ZIMAN], attribution_reason='Quoted reply'),
597
- MessageCfg(id='031607', recipients=[EDWARD_JAY_EPSTEIN]),
598
- MessageCfg(
567
+ EmailCfg(id='029992', author=TERRY_KAFKA, attribution_reason='Quoted reply'),
568
+ EmailCfg(id='029985', author=TERRY_KAFKA, attribution_reason='Quoted reply in 029992'),
569
+ EmailCfg(id='020666', author=TERRY_KAFKA, attribution_reason="Ends with 'Terry'"),
570
+ EmailCfg(id='026014', author=ZUBAIR_KHAN, recipients=[JEFFREY_EPSTEIN], timestamp=parse('2016-11-04 17:46:00')),
571
+ EmailCfg(id='027063', recipients=[ANTHONY_BARRETT]),
572
+ EmailCfg(id='030764', recipients=[ARIANE_DE_ROTHSCHILD], attribution_reason='Reply'),
573
+ EmailCfg(id='026431', recipients=[ARIANE_DE_ROTHSCHILD], attribution_reason='Reply'),
574
+ EmailCfg(id='032876', recipients=[CECILIA_STEEN], attribution_reason='unredacted in 032267'),
575
+ EmailCfg(id='026466', recipients=[DIANE_ZIMAN], attribution_reason='Quoted reply'),
576
+ EmailCfg(id='031607', recipients=[EDWARD_JAY_EPSTEIN], attribution_reason=f"quoted reply has edwardjayepstein.com"),
577
+ EmailCfg(
599
578
  id='030525',
600
579
  recipients=[FAITH_KATES],
601
- attribution_reason='Same as unredacted 030414, same legal signature',
580
+ attribution_reason=f'Reply in 030414 has {NEXT_MANAGEMENT} legal signature',
602
581
  duplicate_ids=['030581'],
603
582
  ),
604
- MessageCfg(
605
- id='030475',
606
- recipients=[FAITH_KATES],
607
- attribution_reason='Next Management LLC legal signature',
608
- duplicate_ids=['030575'],
609
- dupe_type='redacted'
610
- ),
611
- MessageCfg(id='030999', recipients=[JACK_GOLDBERGER, ROBERT_D_CRITTON]),
612
- MessageCfg(id='026426', recipients=[JEAN_HUGUEN], attribution_reason='Reply'),
613
- MessageCfg(id='022202', recipients=[JEAN_LUC_BRUNEL], attribution_reason='Follow up / reply', duplicate_ids=['029975']),
614
- MessageCfg(id='022187', recipients=[JEFFREY_EPSTEIN]),
615
- MessageCfg(id='031489', recipients=[JEFFREY_EPSTEIN]), # Bad OCR
616
- MessageCfg(id='032210', recipients=[JEFFREY_EPSTEIN]), # More of a text convo?
617
- MessageCfg(id='030347', recipients=[JEFFREY_EPSTEIN]), # Bad OCR
618
- MessageCfg(id='030367', recipients=[JEFFREY_EPSTEIN]), # Bad OCR
619
- MessageCfg(id='033274', recipients=[JEFFREY_EPSTEIN]), # this is a note sent to self
620
- MessageCfg(id='032780', recipients=[JEFFREY_EPSTEIN]), # Bad OCR
621
- MessageCfg(id='025233', recipients=[JEFFREY_EPSTEIN]), # Bad OCR
622
- MessageCfg(id='022344', recipients=[JEFFREY_EPSTEIN], duplicate_ids=['028529']), # Bad OCR
623
- MessageCfg(id='029324', recipients=[JEFFREY_EPSTEIN, "Jojo Fontanilla", "Lyn Fontanilla"]),
624
- MessageCfg(id='033575', recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, DEBBIE_FEIN], duplicate_ids=['012898']),
625
- MessageCfg(id='023067', recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, DEBBIE_FEIN, TONJA_HADDAD_COLEMAN]), # Bad OCR
626
- MessageCfg(id='033228', recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, FRED_HADDAD]), # Bad OCR
627
- MessageCfg(id='025790', recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, JACK_GOLDBERGER], duplicate_ids=['031994']), # Bad OCR
628
- MessageCfg(
629
- id='031384',
630
- actual_text='',
631
- recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, JACK_GOLDBERGER, MARTIN_WEINBERG, SCOTT_J_LINK],
632
- ),
633
- MessageCfg(id='033512', recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, JACKIE_PERCZEK, MARTIN_WEINBERG], duplicate_ids=['033361']),
634
- MessageCfg(id='032063', recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, REID_WEINGARTEN]),
635
- MessageCfg(id='033486', recipients=[JEFFREY_EPSTEIN, DARREN_INDYKE, RICHARD_KAHN], duplicate_ids=['033156']), # Bad OCR
636
- MessageCfg(id='029154', recipients=[JEFFREY_EPSTEIN, DAVID_HAIG]), # Bad OCR
637
- MessageCfg(id='029498', recipients=[JEFFREY_EPSTEIN, DAVID_HAIG, GORDON_GETTY, "Norman Finkelstein"]), # Bad OCR
638
- MessageCfg(id='029889', recipients=[JEFFREY_EPSTEIN, "Connie Zaguirre", JACK_GOLDBERGER, ROBERT_D_CRITTON]), # Bad OCR
639
- MessageCfg(id='028931', recipients=[JEFFREY_EPSTEIN, LAWRENCE_KRAUSS]), # Bad OCR
640
- MessageCfg(id='019407', recipients=[JEFFREY_EPSTEIN, MICHAEL_SITRICK]), # Bad OCR
641
- MessageCfg(id='031980', recipients=[JEFFREY_EPSTEIN, MICHAEL_SITRICK], duplicate_ids=['019409']), # Bad OCR
642
- MessageCfg(id='029163', recipients=[JEFFREY_EPSTEIN, ROBERT_TRIVERS]), # Bad OCR
643
- MessageCfg(id='026228', recipients=[JEFFREY_EPSTEIN, STEVEN_PFEIFFER]), # Bad OCR
644
- MessageCfg(id='030299', recipients=[JESSICA_CADWELL, ROBERT_D_CRITTON], duplicate_ids=['021794']), # Bad OCR
645
- MessageCfg(id='033456', recipients=["Joel"], attribution_reason='Reply'),
646
- MessageCfg(id='033460', recipients=["Joel"], attribution_reason='Reply'),
647
- MessageCfg(
583
+ EmailCfg(id='026426', recipients=[JEAN_HUGUEN], attribution_reason='Reply'),
584
+ EmailCfg(id='022202', recipients=[JEAN_LUC_BRUNEL], attribution_reason='Follow up / reply', duplicate_ids=['029975']),
585
+ EmailCfg(id='022187', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
586
+ EmailCfg(id='031489', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (unfixable)
587
+ EmailCfg(id='032210', recipients=[JEFFREY_EPSTEIN]), # More of a text convo?
588
+ EmailCfg(id='030347', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
589
+ EmailCfg(id='030367', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
590
+ EmailCfg(id='033274', recipients=[JEFFREY_EPSTEIN]), # this is a note sent to self
591
+ EmailCfg(id='032780', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
592
+ EmailCfg(id='029324', recipients=[JEFFREY_EPSTEIN, "Jojo Fontanilla", "Lyn Fontanilla"]), # Bad OCR (nofix)
593
+ EmailCfg(id='033456', recipients=["Joel"], attribution_reason='Reply'),
594
+ EmailCfg(id='033458', recipients=["Joel"], attribution_reason='Reply'),
595
+ EmailCfg(id='033460', recipients=["Joel"], attribution_reason='Reply'),
596
+ EmailCfg(
648
597
  id='021090',
649
- is_fwded_article=True,
650
598
  recipients=[JONATHAN_FARKAS],
651
599
  attribution_reason='Reply to a message signed "jonathan" same as other Farkas emails',
600
+ is_fwded_article=True,
652
601
  ),
653
- MessageCfg(
602
+ EmailCfg(
654
603
  id='033073',
655
604
  recipients=[KATHRYN_RUEMMLER],
656
605
  attribution_reason='to "Kathy" about dems, sent from iPad',
657
- is_attribution_uncertain=True, # It's actually Kathy R. as t eh recipient that's the uncertain part
606
+ is_attribution_uncertain=True, # It's actually Kathy R. as the recipient that's the uncertain part
658
607
  ),
659
- MessageCfg(
608
+ EmailCfg(
660
609
  id='032939',
661
610
  recipients=[KATHRYN_RUEMMLER],
662
611
  attribution_reason='to "Kathy" about dems, sent from iPad',
663
- is_attribution_uncertain=True, # It's actually Kathy R. as t eh recipient that's the uncertain part
612
+ is_attribution_uncertain=True, # It's actually Kathy R. as the recipient that's the uncertain part
664
613
  ),
665
- MessageCfg(id='031428', recipients=[KEN_STARR, LILLY_SANCHEZ, MARTIN_WEINBERG, REID_WEINGARTEN], duplicate_ids=['031388']), # Bad OCR
666
- MessageCfg(id='025329', recipients=KRASSNER_MANSON_RECIPIENTS),
667
- MessageCfg(id='033568', recipients=KRASSNER_033568_RECIPIENTS),
668
- MessageCfg(id='030522', recipients=[LANDON_THOMAS], is_fwded_article=True), # Vicky Ward article
669
- MessageCfg(id='031413', recipients=[LANDON_THOMAS]),
670
- MessageCfg(id='033591', recipients=[LAWRANCE_VISOSKI], attribution_reason='Reply signature', duplicate_ids=['033591']),
671
- MessageCfg(id='027097', recipients=[LAWRANCE_VISOSKI], attribution_reason='Reply signature', duplicate_ids=['028787']),
672
- MessageCfg(id='033466', recipients=[LAWRANCE_VISOSKI], attribution_reason='Reply signature'),
673
- MessageCfg(id='022250', recipients=[LESLEY_GROFF], attribution_reason='Reply'),
674
- MessageCfg(id='030242', recipients=[MARIANA_IDZKOWSKA], duplicate_ids=['032048'], dupe_type='redacted'),
675
- MessageCfg(id='030368', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
676
- MessageCfg(id='030369', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
677
- MessageCfg(id='030371', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
678
- MessageCfg(id='022258', recipients=[NADIA_MARCINKO], attribution_reason='Reply header'),
679
- MessageCfg(id='033097', recipients=[PAUL_BARRETT, RICHARD_KAHN]), # Bad OCR
680
- MessageCfg(id='030506', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
681
- MessageCfg(id='030507', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
682
- MessageCfg(id='030508', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
683
- MessageCfg(id='030509', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
684
- MessageCfg(id='030096', recipients=[PETER_MANDELSON]),
685
- MessageCfg(id='032951', recipients=[RAAFAT_ALSABBAGH, None], attribution_reason='Redacted'),
686
- MessageCfg(id='029581', recipients=[RENATA_BOLOTOVA], attribution_reason=BOLOTOVA_REASON),
687
- MessageCfg(id='030384', recipients=[RICHARD_KAHN, "Alan Dlugash"]),
688
- MessageCfg(id='019334', recipients=[STEVE_BANNON]),
689
- MessageCfg(id='021106', recipients=[STEVE_BANNON], attribution_reason='Reply'),
690
- MessageCfg(id='033050', actual_text='schwartman'),
691
- MessageCfg(id='026298', is_fwded_article=True, duplicate_ids=['026499']), # Written by someone else?
692
- MessageCfg(id='026755', is_fwded_article=True), # HuffPo
693
- MessageCfg(id='030528', is_fwded_article=True), # Vicky Ward article
694
- MessageCfg(id='018197', is_fwded_article=True, duplicate_ids=['028648']), # Ray Takeyh article fwd
695
- MessageCfg(id='028728', is_fwded_article=True, duplicate_ids=['027102']), # WSJ forward to Larry Summers
696
- MessageCfg(id='028508', is_fwded_article=True), # nanosatellites article
697
- MessageCfg(id='028781', is_fwded_article=True, duplicate_ids=['013460']), # Atlantic on Jim Yong Kim, Obama's World Bank Pick
698
- MessageCfg(id='019845', is_fwded_article=True), # Pro Publica article on Preet Bharara
699
- MessageCfg(id='029021', is_fwded_article=True), # article about bannon sent by Alain Forget
700
- MessageCfg(id='031688', is_fwded_article=True), # Bill Siegel fwd of email about hamas
701
- MessageCfg(id='026551', is_fwded_article=True), # Sultan bin Sulayem "Ayatollah between the sheets"
702
- MessageCfg(id='031768', is_fwded_article=True), # Sultan bin Sulayem 'Horseface'
703
- MessageCfg(id='031569', is_fwded_article=True), # Article by Kathryn Alexeeff fwded to Peter Thiel
704
- MessageCfg(id='023627', is_fwded_article=True, description=MICHAEL_WOLFF_ARTICLE_HINT),
705
- MessageCfg(id='032475', timestamp=parse('2017-02-15 13:31:25')),
706
- MessageCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
707
- MessageCfg(id='028768', duplicate_ids=['026563'], dupe_type='redacted'),
708
- MessageCfg(id='027056', duplicate_ids=['028762'], dupe_type='redacted'),
709
- MessageCfg(id='032248', duplicate_ids=['032246'], dupe_type='redacted'),
710
- MessageCfg(id='030628', duplicate_ids=['023065'], dupe_type='redacted'),
711
- MessageCfg(id='017523', duplicate_ids=['031226'], dupe_type='redacted'),
712
- MessageCfg(id='031099', duplicate_ids=['031008'], dupe_type='redacted'),
713
- MessageCfg(id='033596', duplicate_ids=['033463'], dupe_type='redacted'),
714
- MessageCfg(id='030624', duplicate_ids=['023018'], dupe_type='redacted'),
715
- MessageCfg(id='030335', duplicate_ids=['030596'], dupe_type='redacted'),
716
- MessageCfg(id='029841', duplicate_ids=['012711'], dupe_type='redacted'),
717
- MessageCfg(id='033528', duplicate_ids=['033517']),
718
- MessageCfg(id='032023', duplicate_ids=['032012']),
719
- MessageCfg(id='019412', duplicate_ids=['028621']),
720
- MessageCfg(id='027053', duplicate_ids=['028765']),
721
- MessageCfg(id='027049', duplicate_ids=['028773']),
722
- MessageCfg(id='033580', duplicate_ids=['033207']),
723
- MessageCfg(id='028506', duplicate_ids=['025547']),
724
- MessageCfg(id='028784', duplicate_ids=['026549']),
725
- MessageCfg(id='033386', duplicate_ids=['033599']),
726
- MessageCfg(id='023024', duplicate_ids=['030622']),
727
- MessageCfg(id='030618', duplicate_ids=['023026']),
728
- MessageCfg(id='028780', duplicate_ids=['026834']),
729
- MessageCfg(id='028775', duplicate_ids=['026835']),
730
- MessageCfg(id='033251', duplicate_ids=['033489']),
731
- MessageCfg(id='031118', duplicate_ids=['019465']),
732
- MessageCfg(id='031912', duplicate_ids=['032158']),
733
- MessageCfg(id='030587', duplicate_ids=['030514']),
734
- MessageCfg(id='029773', duplicate_ids=['012685']),
735
- MessageCfg(id='029849', duplicate_ids=['033482']),
736
- MessageCfg(id='033297', duplicate_ids=['033586']),
737
- MessageCfg(id='031089', duplicate_ids=['018084']),
738
- MessageCfg(id='031088', duplicate_ids=['030885']),
739
- MessageCfg(id='030238', duplicate_ids=['031130']),
740
- MessageCfg(id='030859', duplicate_ids=['031067']),
741
- MessageCfg(id='031136', duplicate_ids=['028791']),
742
- MessageCfg(id='030635', duplicate_ids=['031134']),
743
- MessageCfg(id='028494', duplicate_ids=['026234']),
744
- MessageCfg(id='030311', duplicate_ids=['021790']),
745
- MessageCfg(id='033508', duplicate_ids=['029880']),
746
- MessageCfg(id='030493', duplicate_ids=['030612']),
747
- MessageCfg(id='032051', duplicate_ids=['031771']),
748
- MessageCfg(id='031217', duplicate_ids=['021761']),
749
- MessageCfg(id='031346', duplicate_ids=['031426']),
750
- MessageCfg(id='031345', duplicate_ids=['031427']),
751
- MessageCfg(id='031343', duplicate_ids=['031432']),
752
- MessageCfg(id='031020', duplicate_ids=['031084']),
753
- MessageCfg(id='033354', duplicate_ids=['033485']),
754
- MessageCfg(id='031999', duplicate_ids=['021241']),
755
- MessageCfg(id='030502', duplicate_ids=['030602']),
756
- MessageCfg(id='030574', duplicate_ids=['030617']),
757
- MessageCfg(id='031156', duplicate_ids=['025226']),
758
- MessageCfg(id='031018', duplicate_ids=['031086']),
759
- MessageCfg(id='031026', duplicate_ids=['031079']),
760
- MessageCfg(id='032011', duplicate_ids=['031787']),
761
- MessageCfg(id='030606', duplicate_ids=['030498']),
762
- MessageCfg(id='032005', duplicate_ids=['021235']),
763
- MessageCfg(id='028505', duplicate_ids=['026160']),
764
- MessageCfg(id='031126', duplicate_ids=['030837']),
765
- MessageCfg(id='029624', duplicate_ids=['029778']),
766
- MessageCfg(id='031338', duplicate_ids=['031422']),
767
- MessageCfg(id='033587', duplicate_ids=['033289']),
768
- MessageCfg(id='032107', duplicate_ids=['012722']),
769
- MessageCfg(id='030844', duplicate_ids=['031114']),
770
- MessageCfg(id='031031', duplicate_ids=['031074']),
771
- MessageCfg(id='027032', duplicate_ids=['028531']),
772
- MessageCfg(id='026777', duplicate_ids=['028493']),
773
- MessageCfg(id='029837', duplicate_ids=['029255']),
774
- MessageCfg(id='031423', duplicate_ids=['025361']),
775
- MessageCfg(id='029299', duplicate_ids=['033594']),
776
- MessageCfg(id='030904', duplicate_ids=['031069']),
777
- MessageCfg(id='030006', duplicate_ids=['031165']),
778
- MessageCfg(id='025215', duplicate_ids=['031159']),
779
- MessageCfg(id='031011', duplicate_ids=['031090']),
780
- MessageCfg(id='032068', duplicate_ids=['018158']),
781
- MessageCfg(id='031213', duplicate_ids=['031221']),
782
- MessageCfg(id='016595', duplicate_ids=['016690']),
783
- MessageCfg(id='029833', duplicate_ids=['028970']),
784
- MessageCfg(id='029839', duplicate_ids=['028958']),
785
- MessageCfg(id='029893', duplicate_ids=['033503']),
786
- MessageCfg(id='025878', duplicate_ids=['028486']),
787
- MessageCfg(id='032764', duplicate_ids=['033565']),
788
- MessageCfg(id='026618', duplicate_ids=['028485']),
789
- MessageCfg(id='030609', duplicate_ids=['030495']),
790
- MessageCfg(id='029831', duplicate_ids=['028972']),
791
- MessageCfg(id='021758', duplicate_ids=['030616']),
792
- MessageCfg(id='033498', duplicate_ids=['029884']),
793
- MessageCfg(id='028620', duplicate_ids=['027094']),
794
- MessageCfg(id='032456', duplicate_ids=['033579']),
795
- MessageCfg(id='030315', duplicate_ids=['030255']),
796
- MessageCfg(id='031112', duplicate_ids=['030876']),
797
- MessageCfg(id='030614', duplicate_ids=['030491']),
798
- MessageCfg(id='033585', duplicate_ids=['032279']),
799
- MessageCfg(id='031220', duplicate_ids=['031189']),
800
- MessageCfg(id='032779', duplicate_ids=['033563']),
801
- MessageCfg(id='033230', duplicate_ids=['033577']),
802
- MessageCfg(id='032125', duplicate_ids=['023971']),
803
- MessageCfg(id='031230', duplicate_ids=['031203']),
804
- MessageCfg(id='028752', duplicate_ids=['026569']),
805
- MessageCfg(id='031773', duplicate_ids=['032050']),
806
- MessageCfg(id='021400', duplicate_ids=['031983']),
807
- MessageCfg(id='026548', duplicate_ids=['033491']),
808
- MessageCfg(id='029752', duplicate_ids=['023550']),
809
- MessageCfg(id='030339', duplicate_ids=['030592']),
810
- MessageCfg(id='032250', duplicate_ids=['033589']),
614
+ EmailCfg(id='025329', recipients=['Nancy Cain', 'Tom', 'Marie Moneysmith', 'Steven Gaydos', 'George Krassner', 'Linda W. Grossman', 'Holly Krassner Dawson', 'Daniel Dawson', 'Danny Goldberg', 'Caryl Ratner', 'Kevin Bright', 'Michael Simmons', SAMUEL_LEFF, 'Bob Fass', 'Lynnie Tofte Fass', 'Barb Cowles', 'Lee Quarnstrom']),
615
+ EmailCfg(id='033568', recipients=['George Krassner', 'Daniel Dawson', 'Danny Goldberg', 'Tom', 'Kevin Bright', 'Walli Leff', 'Michael Simmons', 'Lee Quarnstrom', 'Lanny Swerdlow', 'Larry Sloman', 'W&K', 'Harry Shearer', 'Jay Levin']),
616
+ EmailCfg(id='030522', recipients=[LANDON_THOMAS], attribution_reason='reply header', is_fwded_article=True), # Vicky Ward article
617
+ EmailCfg(id='031413', recipients=[LANDON_THOMAS], attribution_reason='reply header'),
618
+ EmailCfg(id='033591', recipients=[LAWRANCE_VISOSKI], attribution_reason='Reply signature', duplicate_ids=['033591']),
619
+ EmailCfg(id='027097', recipients=[LAWRANCE_VISOSKI], attribution_reason='Reply signature', duplicate_ids=['028787']),
620
+ EmailCfg(id='033466', recipients=[LAWRANCE_VISOSKI], attribution_reason='Reply signature'),
621
+ EmailCfg(id='022250', recipients=[LESLEY_GROFF], attribution_reason='Reply'),
622
+ EmailCfg(id='030242', recipients=[MARIANA_IDZKOWSKA], duplicate_ids=['032048'], dupe_type='redacted'),
623
+ EmailCfg(id='030368', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
624
+ EmailCfg(id='030369', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
625
+ EmailCfg(id='030371', recipients=[MELANIE_SPINELLA], attribution_reason='Actually a self fwd from jeffrey to jeffrey'),
626
+ EmailCfg(id='022258', recipients=[NADIA_MARCINKO], attribution_reason='Reply header'),
627
+ EmailCfg(id='030506', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
628
+ EmailCfg(id='030507', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
629
+ EmailCfg(id='030508', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
630
+ EmailCfg(id='030509', recipients=[PAULA], attribution_reason=PAULA_REASON, is_attribution_uncertain=True),
631
+ EmailCfg(id='030096', recipients=[PETER_MANDELSON], attribution_reason='reply header'),
632
+ EmailCfg(id='032951', recipients=[RAAFAT_ALSABBAGH, None], attribution_reason='Redacted'),
633
+ EmailCfg(id='029581', recipients=[RENATA_BOLOTOVA], attribution_reason=BOLOTOVA_REASON),
634
+ EmailCfg(id='019334', recipients=[STEVE_BANNON], attribution_reason='quoted reply'),
635
+ EmailCfg(id='021106', recipients=[STEVE_BANNON], attribution_reason='Reply'),
636
+
637
+ # Misc configs
638
+ EmailCfg(id='033050', actual_text='schwartman'),
639
+ EmailCfg(id='029344', actual_text='I thought of you when I read this article. Was this your idea? Alan'),
640
+ EmailCfg(id='023627', description=MICHAEL_WOLFF_ARTICLE_HINT, is_fwded_article=True),
641
+ EmailCfg(id='026298', is_fwded_article=True, duplicate_ids=['026499']), # Written by someone else?
642
+ EmailCfg(id='029692', is_fwded_article=True, duplicate_ids=['029779']), # WaPo article
643
+ EmailCfg(id='022344', is_fwded_article=True, duplicate_ids=['028529']), # Bill Gates is most admired from Nikolic
644
+ EmailCfg(id='018197', is_fwded_article=True, duplicate_ids=['028648']), # Ray Takeyh article fwd
645
+ EmailCfg(id='028728', is_fwded_article=True, duplicate_ids=['027102']), # WSJ forward to Larry Summers
646
+ EmailCfg(id='028781', is_fwded_article=True, duplicate_ids=['013460']), # Atlantic on Jim Yong Kim, Obama's World Bank Pick
647
+ EmailCfg(id='026755', is_fwded_article=True), # HuffPo
648
+ EmailCfg(id='030528', is_fwded_article=True), # Vicky Ward article
649
+ EmailCfg(id='028508', is_fwded_article=True), # nanosatellites article
650
+ EmailCfg(id='019845', is_fwded_article=True), # Pro Publica article on Preet Bharara
651
+ EmailCfg(id='029021', is_fwded_article=True), # article about bannon sent by Alain Forget
652
+ EmailCfg(id='031688', is_fwded_article=True), # Bill Siegel fwd of email about hamas
653
+ EmailCfg(id='026551', is_fwded_article=True), # Sultan bin Sulayem "Ayatollah between the sheets"
654
+ EmailCfg(id='031768', is_fwded_article=True), # Sultan bin Sulayem 'Horseface'
655
+ EmailCfg(id='031569', is_fwded_article=True), # Article by Kathryn Alexeeff fwded to Peter Thiel
656
+ EmailCfg(id='029689', is_fwded_article=True), # Tunisia article to Larry Summers
657
+ EmailCfg(id='014525', is_fwded_article=True), # Really more of a mailing list from Paul Morris?
658
+ EmailCfg(id='032475', timestamp=parse('2017-02-15 13:31:25')),
659
+ EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
660
+
661
+ # Configure duplicates
662
+ EmailCfg(id='028768', duplicate_ids=['026563'], dupe_type='redacted'),
663
+ EmailCfg(id='027056', duplicate_ids=['028762'], dupe_type='redacted'),
664
+ EmailCfg(id='032248', duplicate_ids=['032246'], dupe_type='redacted'),
665
+ EmailCfg(id='030628', duplicate_ids=['023065'], dupe_type='redacted'),
666
+ EmailCfg(id='017523', duplicate_ids=['031226'], dupe_type='redacted'),
667
+ EmailCfg(id='031099', duplicate_ids=['031008'], dupe_type='redacted'),
668
+ EmailCfg(id='033596', duplicate_ids=['033463'], dupe_type='redacted'),
669
+ EmailCfg(id='030624', duplicate_ids=['023018'], dupe_type='redacted'),
670
+ EmailCfg(id='030335', duplicate_ids=['030596'], dupe_type='redacted'),
671
+ EmailCfg(id='029841', duplicate_ids=['012711'], dupe_type='redacted'),
672
+ EmailCfg(id='030414', duplicate_ids=['030578'], dupe_type='redacted'),
673
+ EmailCfg(id='031135', duplicate_ids=['030634'], dupe_type='redacted'),
674
+ EmailCfg(id='029835', duplicate_ids=['028968']),
675
+ EmailCfg(id='033512', duplicate_ids=['033361']),
676
+ EmailCfg(id='030299', duplicate_ids=['021794']),
677
+ EmailCfg(id='033575', duplicate_ids=['012898']),
678
+ EmailCfg(id='031428', duplicate_ids=['031388']),
679
+ EmailCfg(id='031980', duplicate_ids=['019409']),
680
+ EmailCfg(id='033486', duplicate_ids=['033156']),
681
+ EmailCfg(id='025790', duplicate_ids=['031994']),
682
+ EmailCfg(id='025041', duplicate_ids=['028675']),
683
+ EmailCfg(id='028497', duplicate_ids=['026228']),
684
+ EmailCfg(id='033528', duplicate_ids=['033517']),
685
+ EmailCfg(id='032023', duplicate_ids=['032012']),
686
+ EmailCfg(id='019412', duplicate_ids=['028621']),
687
+ EmailCfg(id='027053', duplicate_ids=['028765']),
688
+ EmailCfg(id='027049', duplicate_ids=['028773']),
689
+ EmailCfg(id='033580', duplicate_ids=['033207']),
690
+ EmailCfg(id='028506', duplicate_ids=['025547']),
691
+ EmailCfg(id='028784', duplicate_ids=['026549']),
692
+ EmailCfg(id='033386', duplicate_ids=['033599']),
693
+ EmailCfg(id='023024', duplicate_ids=['030622']),
694
+ EmailCfg(id='030618', duplicate_ids=['023026']),
695
+ EmailCfg(id='028780', duplicate_ids=['026834']),
696
+ EmailCfg(id='028775', duplicate_ids=['026835']),
697
+ EmailCfg(id='033251', duplicate_ids=['033489']),
698
+ EmailCfg(id='031118', duplicate_ids=['019465']),
699
+ EmailCfg(id='031912', duplicate_ids=['032158']),
700
+ EmailCfg(id='030587', duplicate_ids=['030514']),
701
+ EmailCfg(id='029773', duplicate_ids=['012685']),
702
+ EmailCfg(id='029849', duplicate_ids=['033482']),
703
+ EmailCfg(id='033297', duplicate_ids=['033586']),
704
+ EmailCfg(id='031089', duplicate_ids=['018084']),
705
+ EmailCfg(id='031088', duplicate_ids=['030885']),
706
+ EmailCfg(id='030238', duplicate_ids=['031130']),
707
+ EmailCfg(id='030859', duplicate_ids=['031067']),
708
+ EmailCfg(id='031136', duplicate_ids=['028791']),
709
+ EmailCfg(id='030635', duplicate_ids=['031134']),
710
+ EmailCfg(id='028494', duplicate_ids=['026234']),
711
+ EmailCfg(id='030311', duplicate_ids=['021790']),
712
+ EmailCfg(id='033508', duplicate_ids=['029880']),
713
+ EmailCfg(id='030493', duplicate_ids=['030612']),
714
+ EmailCfg(id='032051', duplicate_ids=['031771']),
715
+ EmailCfg(id='031217', duplicate_ids=['021761']),
716
+ EmailCfg(id='031346', duplicate_ids=['031426']),
717
+ EmailCfg(id='031345', duplicate_ids=['031427']),
718
+ EmailCfg(id='031343', duplicate_ids=['031432']),
719
+ EmailCfg(id='031020', duplicate_ids=['031084']),
720
+ EmailCfg(id='033354', duplicate_ids=['033485']),
721
+ EmailCfg(id='031999', duplicate_ids=['021241']),
722
+ EmailCfg(id='030502', duplicate_ids=['030602']),
723
+ EmailCfg(id='030574', duplicate_ids=['030617']),
724
+ EmailCfg(id='031156', duplicate_ids=['025226']),
725
+ EmailCfg(id='031018', duplicate_ids=['031086']),
726
+ EmailCfg(id='031026', duplicate_ids=['031079']),
727
+ EmailCfg(id='032011', duplicate_ids=['031787']),
728
+ EmailCfg(id='030606', duplicate_ids=['030498']),
729
+ EmailCfg(id='032005', duplicate_ids=['021235']),
730
+ EmailCfg(id='028505', duplicate_ids=['026160']),
731
+ EmailCfg(id='031126', duplicate_ids=['030837']),
732
+ EmailCfg(id='029624', duplicate_ids=['029778']),
733
+ EmailCfg(id='031338', duplicate_ids=['031422']),
734
+ EmailCfg(id='033587', duplicate_ids=['033289']),
735
+ EmailCfg(id='032107', duplicate_ids=['012722']),
736
+ EmailCfg(id='030844', duplicate_ids=['031114']),
737
+ EmailCfg(id='031031', duplicate_ids=['031074']),
738
+ EmailCfg(id='027032', duplicate_ids=['028531']),
739
+ EmailCfg(id='026777', duplicate_ids=['028493']),
740
+ EmailCfg(id='029837', duplicate_ids=['029255']),
741
+ EmailCfg(id='031423', duplicate_ids=['025361']),
742
+ EmailCfg(id='029299', duplicate_ids=['033594']),
743
+ EmailCfg(id='030904', duplicate_ids=['031069']),
744
+ EmailCfg(id='030006', duplicate_ids=['031165']),
745
+ EmailCfg(id='025215', duplicate_ids=['031159']),
746
+ EmailCfg(id='031011', duplicate_ids=['031090']),
747
+ EmailCfg(id='032068', duplicate_ids=['018158']),
748
+ EmailCfg(id='031213', duplicate_ids=['031221']),
749
+ EmailCfg(id='016595', duplicate_ids=['016690']),
750
+ EmailCfg(id='029833', duplicate_ids=['028970']),
751
+ EmailCfg(id='029839', duplicate_ids=['028958']),
752
+ EmailCfg(id='029893', duplicate_ids=['033503']),
753
+ EmailCfg(id='025878', duplicate_ids=['028486']),
754
+ EmailCfg(id='032764', duplicate_ids=['033565']),
755
+ EmailCfg(id='026618', duplicate_ids=['028485']),
756
+ EmailCfg(id='030609', duplicate_ids=['030495']),
757
+ EmailCfg(id='029831', duplicate_ids=['028972']),
758
+ EmailCfg(id='021758', duplicate_ids=['030616']),
759
+ EmailCfg(id='033498', duplicate_ids=['029884']),
760
+ EmailCfg(id='028620', duplicate_ids=['027094']),
761
+ EmailCfg(id='032456', duplicate_ids=['033579']),
762
+ EmailCfg(id='030315', duplicate_ids=['030255']),
763
+ EmailCfg(id='031112', duplicate_ids=['030876']),
764
+ EmailCfg(id='030614', duplicate_ids=['030491']),
765
+ EmailCfg(id='033585', duplicate_ids=['032279']),
766
+ EmailCfg(id='031220', duplicate_ids=['031189']),
767
+ EmailCfg(id='032779', duplicate_ids=['033563']),
768
+ EmailCfg(id='033230', duplicate_ids=['033577']),
769
+ EmailCfg(id='032125', duplicate_ids=['023971']),
770
+ EmailCfg(id='031230', duplicate_ids=['031203']),
771
+ EmailCfg(id='028752', duplicate_ids=['026569']),
772
+ EmailCfg(id='031773', duplicate_ids=['032050']),
773
+ EmailCfg(id='021400', duplicate_ids=['031983']),
774
+ EmailCfg(id='026548', duplicate_ids=['033491']),
775
+ EmailCfg(id='029752', duplicate_ids=['023550']),
776
+ EmailCfg(id='030339', duplicate_ids=['030592']),
777
+ EmailCfg(id='032250', duplicate_ids=['033589']),
811
778
  ]
812
779
 
813
780
 
@@ -815,732 +782,718 @@ EMAILS_CONFIG = [
815
782
  ####################################### OTHER FILES ############################################
816
783
  ################################################################################################
817
784
 
818
- OTHER_FILES_CONFIG = [
819
- # books
820
- FileCfg(id='015032', description=f"{BOOK} '60 Years of Investigative Satire: The Best of {PAUL_KRASSNER}'"),
821
- FileCfg(id='015675', description=f'{BOOK} "Are the Androids Dreaming Yet? Amazing Brain Human Communication, Creativity & Free Will" by James Tagg'),
822
- FileCfg(id='012899', description=f'{BOOK} "Engineering General Intelligence: A Path to Advanced AGI Via Embodied Learning and Cognitive Synergy" by Ben Goertzel'),
823
- FileCfg(id='012747', description=f'{BOOK} "Evilicious: Explaining Our Taste For Excessive Harm" by Marc D. Hauser'),
824
- FileCfg(id='019874', description=f'{BOOK} {FIRE_AND_FURY}', date='2018-01-05'),
825
- FileCfg(id='032724', description=f'{BOOK} cover of {FIRE_AND_FURY}', date='2018-01-05'),
826
- FileCfg(id='010912', description=f"{BOOK} 'Free Growth and Other Surprises' by Gordon Getty (draft)", date='2018-10-18'),
827
- FileCfg(
828
- id='021247',
829
- description=f'{BOOK} "Invisible Forces And Powerful Beliefs: Gravity, Gods, And Minds" by The Chicago Social Brain Network',
830
- date='2010-10-04',
831
- ),
832
- FileCfg(id='019477', description=f'{BOOK} "How America Lost Its Secrets: Edward Snowden, the Man, and the Theft" by {EDWARD_JAY_EPSTEIN}'),
833
- FileCfg(id='017088', description=f'{BOOK} "Taking the Stand: My Life in the Law" by {ALAN_DERSHOWITZ} (draft)'),
834
- FileCfg(id='023731', description=f'{BOOK} "Teaching Minds How Cognitive Science Can Save Our Schools" by {ROGER_SCHANK}'),
835
- FileCfg(id='013796', description=f'{BOOK} "The 4-Hour Workweek" by Tim Ferriss'),
836
- FileCfg(id='021145', description=f'{BOOK} "The Billionaire\'s Playboy Club" by {VIRGINIA_GIUFFRE} (draft?)'),
837
- FileCfg(id='013501', description=f'{BOOK} "The Nearness Of Grace: A Personal Science Of Spiritual Transformation" by Arnold J. Mandell', date='2005-01-01'),
838
- FileCfg(id='018438', description=f'{BOOK} "The S&M Feminist" by Clarisse Thorn'),
839
- FileCfg(id='018232', description=f'{BOOK} "The Seventh Sense: Power, Fortune & Survival in the Age of Networks" by Joshua Cooper Ramo'),
840
- FileCfg(id='020153', description=f'{BOOK} "The Snowden Affair: A Spy Story In Six Parts" by {EDWARD_JAY_EPSTEIN}'),
841
- FileCfg(id='021120', description=f'{BOOK} chapter of "Siege: Trump Under Fire" by {MICHAEL_WOLFF}'),
842
- FileCfg(id='016804', description=DEEP_THINKING_HINT, date='2019-02-19', duplicate_ids=['016221']),
843
- FileCfg(id='011472', author=EHUD_BARAK, description=NIGHT_FLIGHT_HINT,),
844
- FileCfg(id='027849', author=EHUD_BARAK, description=NIGHT_FLIGHT_HINT,),
845
- FileCfg(id='010477', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
846
- FileCfg(id='010486', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
847
- FileCfg(id='021958', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
848
- FileCfg(id='022058', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
849
- FileCfg(id='022118', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
850
- FileCfg(id='019111', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
851
- FileCfg(id='031533', description=f'pages from a book about the Baylor University sexual assault scandal and Sam Ukwuachu'),
785
+ OTHER_FILES_BOOKS = [
786
+ DocCfg(id='017088', author=ALAN_DERSHOWITZ, description=f'"Taking the Stand: My Life in the Law" (draft)'),
787
+ DocCfg(id='013501', author='Arnold J. Mandell', description=f'The Nearness Of Grace: A Personal Science Of Spiritual Transformation', date='2005-01-01'),
788
+ DocCfg(id='012899', author='Ben Goertzel', description=f'Engineering General Intelligence: A Path to Advanced AGI Via Embodied Learning and Cognitive Synergy'),
789
+ DocCfg(id='018438', author='Clarisse Thorn', description=f'The S&M Feminist'),
790
+ DocCfg(id='019477', author=EDWARD_JAY_EPSTEIN, description=f'How America Lost Its Secrets: Edward Snowden, the Man, and the Theft'),
791
+ DocCfg(id='020153', author=EDWARD_JAY_EPSTEIN, description=f'The Snowden Affair: A Spy Story In Six Parts'),
792
+ DocCfg(id='011472', author=EHUD_BARAK, description=NIGHT_FLIGHT_BOOK),
793
+ DocCfg(id='027849', author=EHUD_BARAK, description=NIGHT_FLIGHT_BOOK),
794
+ DocCfg(id='010912', author=GORDON_GETTY, description=f'"Free Growth and Other Surprises" (draft)', date='2018-10-18'),
795
+ DocCfg(id='010477', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
796
+ DocCfg(id='010486', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
797
+ DocCfg(id='021958', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
798
+ DocCfg(id='022058', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
799
+ DocCfg(id='022118', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
800
+ DocCfg(id='019111', author=JAMES_PATTERSON, description=PATTERSON_BOOK_SCANS, date='2016-10-10'),
801
+ DocCfg(id='015675', author='James Tagg', description=f'Are the Androids Dreaming Yet? Amazing Brain Human Communication, Creativity & Free Will'),
802
+ DocCfg(id='016804', author=JOHN_BROCKMAN, description='Deep Thinking: Twenty-Five Ways of Looking at AI', date='2019-02-19', duplicate_ids=['016221']),
803
+ DocCfg(id='018232', author='Joshua Cooper Ramo', description=f'The Seventh Sense: Power, Fortune & Survival in the Age of Networks'),
804
+ DocCfg(id='012747', author='Marc D. Hauser', description=f'Evilicious: Explaining Our Taste For Excessive Harm'),
805
+ DocCfg(id='032724', author=MICHAEL_WOLFF, description=f'cover of "{FIRE_AND_FURY}"', date='2018-01-05'),
806
+ DocCfg(id='021120', author=MICHAEL_WOLFF, description=f'chapter of "Siege: Trump Under Fire"'),
807
+ DocCfg(id='019874', author=MICHAEL_WOLFF, description=FIRE_AND_FURY, date='2018-01-05'),
808
+ DocCfg(id='015032', author=PAUL_KRASSNER, description=f"60 Years of Investigative Satire: The Best of {PAUL_KRASSNER}"),
809
+ DocCfg(id='023731', author=ROGER_SCHANK, description=f'Teaching Minds How Cognitive Science Can Save Our Schools'),
810
+ DocCfg(id='021247', author='The Chicago Social Brain Network', description=f'Invisible Forces And Powerful Beliefs: Gravity, Gods, And Minds', date='2010-10-04'),
811
+ DocCfg(id='013796', author='Tim Ferriss', description=f'The 4-Hour Workweek'),
812
+ DocCfg(id='021145', author=VIRGINIA_GIUFFRE, description=f'"The Billionaire\'s Playboy Club" (draft?)'),
813
+ DocCfg(id='031533', description=f'pages from a book about the Baylor University sexual assault scandal and Sam Ukwuachu'),
814
+ ]
852
815
 
853
- # articles
854
- FileCfg(id='013275', author=BLOOMBERG, description=f"article on notable 2013 obituaries", date='2013-12-26'),
855
- FileCfg(id='026543', author=BLOOMBERG, description=f"BNA article about taxes"),
856
- FileCfg(id='023572', author=CHINA_DAILY, description=f"article on China's Belt & Road Initiative"),
857
- FileCfg(id='023571', author=CHINA_DAILY, description=f"article on terrorism, Macau, trade initiatives", date='2016-09-18'),
858
- FileCfg(id='023570', author=CHINA_DAILY, description=f"article on Belt & Road in Central/South America, Xi philosophy", date='2017-05-14'),
859
- FileCfg(id='025115', author=CHINA_DAILY, description=f"article on China and the US working together", date='2017-05-14'),
860
- FileCfg(id='025292', author=DAILY_MAIL, description=f"article on Bill Clinton being named in a lawsuit"),
861
- FileCfg(id='019468', author=DAILY_MAIL, description=f"article on Epstein and Clinton"),
862
- FileCfg(id='022970', author=DAILY_MAIL, description=f"article on Epstein and Prince Andrew"),
863
- FileCfg(id='031186', author=DAILY_MAIL, description=f'article on allegations of rape of 13 year old against Trump', date='2016-11-02'),
864
- FileCfg(id='013437', author=DAILY_TELEGRAPH, description=f"article about Epstein's diary", date='2011-03-05'),
865
- FileCfg(id='023287', author=DAILY_TELEGRAPH, description=f"article about a play based on the Oslo Accords", date='2017-09-15'),
866
- FileCfg(id='019206', author=EDWARD_JAY_EPSTEIN, description=f"WSJ article about Edward Snowden", date='2016-12-30'),
867
- FileCfg(id='029865', author=LA_TIMES, description=f"front page article about {DEEPAK_CHOPRA} and young Iranians", date='2016-11-05'),
868
- FileCfg(id='026598', author=LA_TIMES, description=f"op-ed about why America needs a Ministry of Culture"),
869
- FileCfg(id='022707', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_ARTICLE_HINT),
870
- FileCfg(id='022727', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_ARTICLE_HINT),
871
- FileCfg(id='022746', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_ARTICLE_HINT),
872
- FileCfg(id='022844', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_ARTICLE_HINT),
873
- FileCfg(id='022863', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_ARTICLE_HINT),
874
- FileCfg(id='022894', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_ARTICLE_HINT),
875
- FileCfg(id='022952', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_ARTICLE_HINT),
876
- FileCfg(id='024229', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_ARTICLE_HINT),
877
- FileCfg(id='031753', author=PAUL_KRASSNER, description=f'essay for Playboy in the 1980s', date='1985-01-01'),
878
- FileCfg(id='023638', author=PAUL_KRASSNER, description=f'magazine interview'),
879
- FileCfg(id='024374', author=PAUL_KRASSNER, description=f"'Remembering Cavalier Magazine'"),
880
- FileCfg(id='030187', author=PAUL_KRASSNER, description=f'"Remembering Lenny Bruce While We\'re Thinking About Trump" (draft?)'),
881
- FileCfg(id='019088', author=PAUL_KRASSNER, description=f'"Are Rape Jokes Funny? (draft)', date='2012-07-28'),
882
- FileCfg(id='012740', author=PEGGY_SIEGAL, description=f"article about Venice Film Festival"),
883
- FileCfg(id='013442', author=PEGGY_SIEGAL, description=f"draft about Oscars", date='2011-02-27'),
884
- FileCfg(id='012700', author=PEGGY_SIEGAL, description=f"film events diary", date='2011-02-27'),
885
- FileCfg(id='012690', author=PEGGY_SIEGAL, description=f"film events diary early draft of 012700", date='2011-02-27'),
886
- FileCfg(id='013450', author=PEGGY_SIEGAL, description=f"Oscar Diary in Avenue Magazine", date='2011-02-27'),
887
- FileCfg(id='010715', author=PEGGY_SIEGAL, description=f"Oscar Diary April", date='2012-02-27'),
888
- FileCfg(id='019849', author=PEGGY_SIEGAL, description=f"Oscar Diary April", date='2017-02-27', duplicate_ids=['019864']),
889
- FileCfg(id='033253', author=ROBERT_LAWRENCE_KUHN, description=f'{BBC} article about Rohingya in Myanmar'),
890
- FileCfg(id='026887', author=ROBERT_LAWRENCE_KUHN, description=f'{BBC} "New Tariffs - Trade War"'),
891
- FileCfg(id='026877', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "New Tariffs - Trade War"'),
892
- FileCfg(id='026868', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "Quest Means Business New China Tariffs Trade War"', date='2018-09-18'),
893
- FileCfg(id='023707', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "Quest Means Business U.S. and China Agree to Pause Trade War"', date='2018-12-03'),
894
- FileCfg(id='029176', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "U.S. China Tariffs - Trade War"'),
895
- FileCfg(id='032638', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "Xi Jinping and the New Politburo Committee"'),
896
- FileCfg(id='023666', author=ROBERT_LAWRENCE_KUHN, description=f"sizzle reel / television appearances"),
897
- FileCfg(id='025104', author='SCMP', description=f"article about China and globalisation"),
898
- FileCfg(id='033379', author=WAPO, description=f'How Washington Pivoted From Finger-Wagging to Appeasement (about Viktor Orban)', date='2018-05-25'),
899
- FileCfg(
816
+ OTHER_FILES_ARTICLES = [
817
+ DocCfg(id='013275', author=BLOOMBERG, description=f"article on notable 2013 obituaries", date='2013-12-26'),
818
+ DocCfg(id='026543', author=BLOOMBERG, description=f"BNA article about taxes"),
819
+ DocCfg(id='023572', author=CHINA_DAILY, description=f"article on China's Belt & Road Initiative"),
820
+ DocCfg(id='023571', author=CHINA_DAILY, description=f"article on terrorism, Macau, trade initiatives", date='2016-09-18'),
821
+ DocCfg(id='023570', author=CHINA_DAILY, description=f"article on Belt & Road in Central/South America, Xi philosophy", date='2017-05-14'),
822
+ DocCfg(id='025115', author=CHINA_DAILY, description=f"article on China and the US working together", date='2017-05-14'),
823
+ DocCfg(id='025292', author=DAILY_MAIL, description=f"article on Bill Clinton being named in a lawsuit"),
824
+ DocCfg(id='019468', author=DAILY_MAIL, description=f"article on Epstein and Clinton"),
825
+ DocCfg(id='022970', author=DAILY_MAIL, description=f"article on Epstein and Prince Andrew"),
826
+ DocCfg(id='031186', author=DAILY_MAIL, description=f'article on allegations of rape of 13 year old against Trump', date='2016-11-02'),
827
+ DocCfg(id='013437', author=DAILY_TELEGRAPH, description=f"article about Epstein's diary", date='2011-03-05'),
828
+ DocCfg(id='023287', author=DAILY_TELEGRAPH, description=f"article about a play based on the Oslo Accords", date='2017-09-15'),
829
+ DocCfg(id='019206', author=EDWARD_JAY_EPSTEIN, description=f"WSJ article about Edward Snowden", date='2016-12-30'),
830
+ DocCfg(id='029539', author=LA_TIMES, description=f"Alan Trounson interview on California stem cell research and CIRM"),
831
+ DocCfg(id='029865', author=LA_TIMES, description=f"front page article about {DEEPAK_CHOPRA} and young Iranians", date='2016-11-05'),
832
+ DocCfg(id='026598', author=LA_TIMES, description=f"op-ed about why America needs a Ministry of Culture"),
833
+ DocCfg(id='027024', author=LA_TIMES, description=f"Scientists Create Human Embryos to Make Stem Cells", date='2013-05-15'),
834
+ DocCfg(id='022707', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_ARTICLE_HINT),
835
+ DocCfg(id='022727', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_ARTICLE_HINT),
836
+ DocCfg(id='022746', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_ARTICLE_HINT),
837
+ DocCfg(id='022844', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_ARTICLE_HINT),
838
+ DocCfg(id='022863', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_ARTICLE_HINT),
839
+ DocCfg(id='022894', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_ARTICLE_HINT),
840
+ DocCfg(id='022952', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_ARTICLE_HINT),
841
+ DocCfg(id='024229', author=MICHAEL_WOLFF, description=MICHAEL_WOLFF_ARTICLE_HINT),
842
+ DocCfg(id='031753', author=PAUL_KRASSNER, description=f'essay for Playboy in the 1980s', date='1985-01-01'),
843
+ DocCfg(id='023638', author=PAUL_KRASSNER, description=f'magazine interview'),
844
+ DocCfg(id='024374', author=PAUL_KRASSNER, description=f"Remembering Cavalier Magazine"),
845
+ DocCfg(id='030187', author=PAUL_KRASSNER, description=f'"Remembering Lenny Bruce While We\'re Thinking About Trump" (draft?)'),
846
+ DocCfg(id='019088', author=PAUL_KRASSNER, description=f'"Are Rape Jokes Funny?" (draft)', date='2012-07-28'),
847
+ DocCfg(id='012740', author=PEGGY_SIEGAL, description=f"article about Venice Film Festival"),
848
+ DocCfg(id='013442', author=PEGGY_SIEGAL, description=f"draft about Oscars", date='2011-02-27'),
849
+ DocCfg(id='012700', author=PEGGY_SIEGAL, description=f"film events diary", date='2011-02-27'),
850
+ DocCfg(id='012690', author=PEGGY_SIEGAL, description=f"film events diary early draft of 012700", date='2011-02-27'),
851
+ DocCfg(id='013450', author=PEGGY_SIEGAL, description=f"Oscar Diary in Avenue Magazine", date='2011-02-27'),
852
+ DocCfg(id='010715', author=PEGGY_SIEGAL, description=f"Oscar Diary April", date='2012-02-27'),
853
+ DocCfg(id='019849', author=PEGGY_SIEGAL, description=f"Oscar Diary April", date='2017-02-27', duplicate_ids=['019864']),
854
+ DocCfg(id='033253', author=ROBERT_LAWRENCE_KUHN, description=f'{BBC} article about Rohingya in Myanmar'),
855
+ DocCfg(id='026887', author=ROBERT_LAWRENCE_KUHN, description=f'{BBC} "New Tariffs - Trade War"'),
856
+ DocCfg(id='026877', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "New Tariffs - Trade War"'),
857
+ DocCfg(id='026868', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "Quest Means Business New China Tariffs Trade War"', date='2018-09-18'),
858
+ DocCfg(id='023707', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "Quest Means Business U.S. and China Agree to Pause Trade War"', date='2018-12-03'),
859
+ DocCfg(id='029176', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "U.S. China Tariffs - Trade War"'),
860
+ DocCfg(id='032638', author=ROBERT_LAWRENCE_KUHN, description=f'{CNN} "Xi Jinping and the New Politburo Committee"'),
861
+ DocCfg(id='023666', author=ROBERT_LAWRENCE_KUHN, description=f"sizzle reel / television appearances"),
862
+ DocCfg(id='025104', author='SCMP', description=f"article about China and globalisation"),
863
+ DocCfg(id='033379', author=WAPO, description=f'How Washington Pivoted From Finger-Wagging to Appeasement (about Viktor Orban)', date='2018-05-25'),
864
+ DocCfg(
900
865
  id='031396',
901
866
  author=WAPO,
902
867
  description=f"DOJ discipline office with limited reach to probe handling of controversial sex abuse case",
903
868
  date='2019-02-06',
904
869
  duplicate_ids=['031415'],
905
870
  ),
906
- FileCfg(
871
+ DocCfg(
907
872
  id='030199',
908
873
  description=f'article about allegations Trump raped a 13 year old girl {JANE_DOE_V_EPSTEIN_TRUMP}',
909
874
  date='2017-11-16',
910
875
  ),
911
- FileCfg(id='031725', description=f"article about Gloria Allred and Trump allegations", date='2016-10-10'),
912
- FileCfg(id='031198', description=f"article about identify of Jane Doe in {JANE_DOE_V_EPSTEIN_TRUMP}"),
913
- FileCfg(id='012704', description=f"article about {JANE_DOE_V_USA} and {CVRA}", date='2011-04-21'),
914
- FileCfg(id='026648', description=f'article about {JASTA} lawsuit against Saudi Arabia by 9/11 victims (Russian propaganda?)', date='2017-05-13'),
915
- FileCfg(id='031776', description=f"article about Michael Avenatti by Andrew Strickler"),
916
- FileCfg(id='032159', description=f"article about microfinance and cell phones in Zimbabwe, Strive Masiyiwa (Econet Wireless)"),
917
- FileCfg(id='026584', description=f"article about tax implications of 'disregarded entities'", date='2009-07-01'),
918
- FileCfg(id='030258', description=f'{ARTICLE_DRAFT} Mueller probe, almost same as 030248'),
919
- FileCfg(id='030248', description=f'{ARTICLE_DRAFT} Mueller probe, almost same as 030258'),
920
- FileCfg(id='029165', description=f'{ARTICLE_DRAFT} Mueller probe, almost same as 030258'),
921
- FileCfg(id='033468', description=f'{ARTICLE_DRAFT} Rod Rosenstein', date='2018-09-24'),
922
- FileCfg(id='030825', description=f'{ARTICLE_DRAFT} Syria'),
923
- FileCfg(id='030013', description=f'Aviation International News article', date='2012-07-01'),
924
- FileCfg(id='014865', description=f"Boston Globe article about {ALAN_DERSHOWITZ}"),
925
- FileCfg(id='033231', description=f"Business Standard article about Trump's visit with India's Modi"),
926
- FileCfg(id='023567', description=f"Financial Times article about quantitative easing"),
927
- FileCfg(id='026761', description=f"Forbes article about {BARBRO_C_EHNBOM} 'Swedish American Group Focuses On Cancer'"),
928
- FileCfg(id='031716', description=f'Fortune Magazine article by {TOM_BARRACK}', date='2016-10-22'),
929
- FileCfg(
876
+ DocCfg(id='031725', description=f"article about Gloria Allred and Trump allegations", date='2016-10-10'),
877
+ DocCfg(id='031198', description=f"article about identify of Jane Doe in {JANE_DOE_V_EPSTEIN_TRUMP}"),
878
+ DocCfg(id='012704', description=f"article about {JANE_DOE_V_USA} and {CVRA}", date='2011-04-21'),
879
+ DocCfg(id='026648', description=f'article about {JASTA} lawsuit against Saudi Arabia by 9/11 victims (Russian propaganda?)', date='2017-05-13'),
880
+ DocCfg(id='031776', description=f"article about Michael Avenatti by Andrew Strickler"),
881
+ DocCfg(id='032159', description=f"article about microfinance and cell phones in Zimbabwe, Strive Masiyiwa (Econet Wireless)"),
882
+ DocCfg(id='026584', description=f"article about tax implications of 'disregarded entities'", date='2009-07-01'),
883
+ DocCfg(id='030258', description=f'{ARTICLE_DRAFT} Mueller probe, almost same as 030248'),
884
+ DocCfg(id='030248', description=f'{ARTICLE_DRAFT} Mueller probe, almost same as 030258'),
885
+ DocCfg(id='029165', description=f'{ARTICLE_DRAFT} Mueller probe, almost same as 030258'),
886
+ DocCfg(id='033468', description=f'{ARTICLE_DRAFT} Rod Rosenstein', date='2018-09-24'),
887
+ DocCfg(id='030825', description=f'{ARTICLE_DRAFT} Syria'),
888
+ DocCfg(id='030013', description=f'Aviation International News article', date='2012-07-01'),
889
+ DocCfg(id='014865', description=f"Boston Globe article about {ALAN_DERSHOWITZ}"),
890
+ DocCfg(id='033231', description=f"Business Standard article about Trump's visit with India's Modi"),
891
+ DocCfg(id='023567', description=f"Financial Times article about quantitative easing"),
892
+ DocCfg(id='026761', description=f"Forbes article about {BARBRO_C_EHNBOM} 'Swedish American Group Focuses On Cancer'"),
893
+ DocCfg(id='031716', description=f'Fortune Magazine article by {TOM_BARRACK}', date='2016-10-22'),
894
+ DocCfg(
930
895
  id='019233',
931
896
  description=f"Freedom House: 'Breaking Down Democracy: Goals, Strategies, and Methods of Modern Authoritarians'",
932
897
  date='2017-06-02',
933
898
  ),
934
- FileCfg(id='019444', description=f"Frontlines magazine article 'Biologists Dig Deeper'", date='2008-01-01'),
935
- FileCfg(id='023720', description=f'Future Science article: "Is Shame Necessary?" by {JENNIFER_JACQUET}'),
936
- FileCfg(id='027051', description=f"German language article about the 2013 Lifeball / AIDS Gala", date='2013-01-01'),
937
- FileCfg(id='021094', description=f"Globe and Mail article about Gerd Heinrich"),
938
- FileCfg(id='013268', description=f"JetGala article about airplane interior designer {ERIC_ROTH}"),
939
- FileCfg(id='033480', description=f"{JOHN_BOLTON_PRESS_CLIPPING}", date='2018-04-06', duplicate_ids=['033481']),
940
- FileCfg(id='013403', description=f"Lexis Nexis result from The Evening Standard about Bernie Madoff", date='2009-12-24'),
941
- FileCfg(id='023102', description=f"Litigation Daily article about {REID_WEINGARTEN}", date='2015-09-04'),
942
- FileCfg(id='029340', description=f'MarketWatch article about estate taxes, particularly Epstein\'s favoured GRATs'),
943
- FileCfg(
899
+ DocCfg(id='019444', description=f"Frontlines magazine article 'Biologists Dig Deeper'", date='2008-01-01'),
900
+ DocCfg(id='023720', description=f'Future Science article: "Is Shame Necessary?" by {JENNIFER_JACQUET}'),
901
+ DocCfg(id='027051', description=f"German language article about the 2013 Lifeball / AIDS Gala", date='2013-01-01'),
902
+ DocCfg(id='021094', description=f"Globe and Mail article about Gerd Heinrich"),
903
+ DocCfg(id='013268', description=f"JetGala article about airplane interior designer {ERIC_ROTH}"),
904
+ DocCfg(id='033480', description=f"{JOHN_BOLTON_PRESS_CLIPPING}", date='2018-04-06', duplicate_ids=['033481']),
905
+ DocCfg(id='013403', description=f"Lexis Nexis result from The Evening Standard about Bernie Madoff", date='2009-12-24'),
906
+ DocCfg(id='023102', description=f"Litigation Daily article about {REID_WEINGARTEN}", date='2015-09-04'),
907
+ DocCfg(id='029340', description=f'MarketWatch article about estate taxes, particularly Epstein\'s favoured GRATs'),
908
+ DocCfg(
944
909
  id='029416',
945
910
  description=f"National Enquirer / Radar Online v. FBI FOIA lawsuit court filing",
946
911
  date='2017-05-25',
947
912
  duplicate_ids=['029405']
948
913
  ),
949
- FileCfg(id='015462', description=f'Nautilus Education magazine (?) issue'),
950
- FileCfg(id='029925', description=f"New Yorker article about the placebo effect by Michael Specter"),
951
- FileCfg(id='031972', description=f"{NYT_ARTICLE} #MeToo allegations against {LAWRENCE_KRAUSS}", date='2018-03-07'),
952
- FileCfg(id='032435', description=f'{NYT_ARTICLE} Chinese butlers'),
953
- FileCfg(id='029452', description=f"{NYT_ARTICLE} {PETER_THIEL}"),
954
- FileCfg(id='025328', description=f"{NYT_ARTICLE} radio host Bob Fass and Robert Durst"),
955
- FileCfg(id='033479', description=f"{NYT_ARTICLE} Rex Tillerson", date='2010-03-14'),
956
- FileCfg(id='028481', description=f'{NYT_ARTICLE} {STEVE_BANNON}', date='2018-03-09'),
957
- FileCfg(id='033181', description=f'{NYT_ARTICLE} Trump\'s tax avoidance', date='2016-10-31'),
958
- FileCfg(id='023097', description=f"{NYT_COLUMN} The Aristocrats by Frank Rich 'The Greatest Dirty Joke Ever Told'"),
959
- FileCfg(id='033365', description=f'{NYT_COLUMN} trade war with China by Kevin Rudd'),
960
- FileCfg(id='019439', description=f"{NYT_COLUMN} the Clintons and money by Maureen Dowd", date='2013-08-17'),
961
- FileCfg(id='021093', description=f"page of unknown article about Epstein and Maxwell"),
962
- FileCfg(id='013435', description=f"{PALM_BEACH_DAILY_ARTICLE} Epstein's address book", date='2011-03-11'),
963
- FileCfg(id='013440', description=f"{PALM_BEACH_DAILY_ARTICLE} Epstein's gag order", date='2011-07-13'),
964
- FileCfg(id='029238', description=f"{PALM_BEACH_DAILY_ARTICLE} Epstein's plea deal"),
965
- FileCfg(id='021775', description=f"{PALM_BEACH_POST_ARTICLE} 'He Was 50. And They Were Girls'"),
966
- FileCfg(id='022989', description=f"{PALM_BEACH_POST_ARTICLE} alleged rape of 13 year old by Trump"),
967
- FileCfg(id='022987', description=f"{PALM_BEACH_POST_ARTICLE} just a headline on Trump and Epstein"),
968
- FileCfg(id='015028', description=f"{PALM_BEACH_POST_ARTICLE} reopening Epstein's criminal case"),
969
- FileCfg(id='022990', description=f"{PALM_BEACH_POST_ARTICLE} Trump and Epstein"),
970
- FileCfg(id='016996', description=f'SciencExpress article "Quantitative Analysis of Culture Using Millions of Digitized Books" by Jean-Baptiste Michel'),
971
- FileCfg(id='030030', description=SHIMON_POST_ARTICLE, date='2011-03-29'),
972
- FileCfg(id='025610', description=SHIMON_POST_ARTICLE, date='2011-04-03'),
973
- FileCfg(id='023458', description=SHIMON_POST_ARTICLE, date='2011-04-17'),
974
- FileCfg(id='023487', description=SHIMON_POST_ARTICLE, date='2011-04-18'),
975
- FileCfg(id='030531', description=SHIMON_POST_ARTICLE, date='2011-05-16'),
976
- FileCfg(id='024958', description=SHIMON_POST_ARTICLE, date='2011-05-08'),
977
- FileCfg(id='030060', description=SHIMON_POST_ARTICLE, date='2011-05-13'),
978
- FileCfg(id='031834', description=SHIMON_POST_ARTICLE, date='2011-05-16'),
979
- FileCfg(id='023517', description=SHIMON_POST_ARTICLE, date='2011-05-26'),
980
- FileCfg(id='030268', description=SHIMON_POST_ARTICLE, date='2011-05-29'),
981
- FileCfg(id='029628', description=SHIMON_POST_ARTICLE, date='2011-06-04'),
982
- FileCfg(id='018085', description=SHIMON_POST_ARTICLE, date='2011-06-07'),
983
- FileCfg(id='030156', description=SHIMON_POST_ARTICLE, date='2011-06-22'),
984
- FileCfg(id='031876', description=SHIMON_POST_ARTICLE, date='2011-06-14'),
985
- FileCfg(id='032171', description=SHIMON_POST_ARTICLE, date='2011-06-26'),
986
- FileCfg(id='029932', description=SHIMON_POST_ARTICLE, date='2011-07-03'),
987
- FileCfg(id='031913', description=SHIMON_POST_ARTICLE, date='2011-08-24'),
988
- FileCfg(id='024592', description=SHIMON_POST_ARTICLE, date='2011-08-25'),
989
- FileCfg(id='024997', description=SHIMON_POST_ARTICLE, date='2011-09-08'),
990
- FileCfg(id='031941', description=SHIMON_POST_ARTICLE, date='2011-11-17'),
991
- FileCfg(id='021092', description=f'{SINGLE_PAGE} Tatler article about {GHISLAINE_MAXWELL} shredding documents', date='2019-08-15'),
992
- FileCfg(id='031191', description=f"{SINGLE_PAGE} unknown article about Epstein and Trump's relationship in 1997"),
993
- FileCfg(id='030829', description=f'South Florida Sun Sentinel article about {BRAD_EDWARDS} and {JEFFREY_EPSTEIN}'),
994
- FileCfg(id='026520', description=f'Spanish language article about {SULTAN_BIN_SULAYEM}', date='2013-09-27'),
995
- FileCfg(id='030333', description=f'The Independent article about Prince Andrew, Epstein, and Epstein\'s butler who stole his address book'),
996
- FileCfg(
914
+ DocCfg(id='015462', description=f'Nautilus Education magazine (?) issue'),
915
+ DocCfg(id='029925', description=f"New Yorker article about the placebo effect by Michael Specter"),
916
+ DocCfg(id='031972', description=f"{NYT_ARTICLE} #MeToo allegations against {LAWRENCE_KRAUSS}", date='2018-03-07'),
917
+ DocCfg(id='032435', description=f'{NYT_ARTICLE} Chinese butlers'),
918
+ DocCfg(id='029452', description=f"{NYT_ARTICLE} {PETER_THIEL}"),
919
+ DocCfg(id='025328', description=f"{NYT_ARTICLE} radio host Bob Fass and Robert Durst"),
920
+ DocCfg(id='033479', description=f"{NYT_ARTICLE} Rex Tillerson", date='2010-03-14'),
921
+ DocCfg(id='028481', description=f'{NYT_ARTICLE} {STEVE_BANNON}', date='2018-03-09'),
922
+ DocCfg(id='033181', description=f'{NYT_ARTICLE} Trump\'s tax avoidance', date='2016-10-31'),
923
+ DocCfg(id='023097', description=f"{NYT_COLUMN} The Aristocrats by Frank Rich 'The Greatest Dirty Joke Ever Told'"),
924
+ DocCfg(id='033365', description=f'{NYT_COLUMN} trade war with China by Kevin Rudd'),
925
+ DocCfg(id='019439', description=f"{NYT_COLUMN} the Clintons and money by Maureen Dowd", date='2013-08-17'),
926
+ DocCfg(id='021093', description=f"page of unknown article about Epstein and Maxwell"),
927
+ DocCfg(id='013435', description=f"{PALM_BEACH_DAILY_ARTICLE} Epstein's address book", date='2011-03-11'),
928
+ DocCfg(id='013440', description=f"{PALM_BEACH_DAILY_ARTICLE} Epstein's gag order", date='2011-07-13'),
929
+ DocCfg(id='029238', description=f"{PALM_BEACH_DAILY_ARTICLE} Epstein's plea deal"),
930
+ DocCfg(id='021775', description=f"{PALM_BEACH_POST_ARTICLE} 'He Was 50. And They Were Girls'"),
931
+ DocCfg(id='022989', description=f"{PALM_BEACH_POST_ARTICLE} alleged rape of 13 year old by Trump"),
932
+ DocCfg(id='022987', description=f"{PALM_BEACH_POST_ARTICLE} just a headline on Trump and Epstein"),
933
+ DocCfg(id='015028', description=f"{PALM_BEACH_POST_ARTICLE} reopening Epstein's criminal case"),
934
+ DocCfg(id='022990', description=f"{PALM_BEACH_POST_ARTICLE} Trump and Epstein"),
935
+ DocCfg(id='016996', description=f'SciencExpress article "Quantitative Analysis of Culture Using Millions of Digitized Books" by Jean-Baptiste Michel'),
936
+ DocCfg(id='030030', description=SHIMON_POST_ARTICLE, date='2011-03-29'),
937
+ DocCfg(id='025610', description=SHIMON_POST_ARTICLE, date='2011-04-03'),
938
+ DocCfg(id='023458', description=SHIMON_POST_ARTICLE, date='2011-04-17'),
939
+ DocCfg(id='023487', description=SHIMON_POST_ARTICLE, date='2011-04-18'),
940
+ DocCfg(id='030531', description=SHIMON_POST_ARTICLE, date='2011-05-16'),
941
+ DocCfg(id='024958', description=SHIMON_POST_ARTICLE, date='2011-05-08'),
942
+ DocCfg(id='030060', description=SHIMON_POST_ARTICLE, date='2011-05-13'),
943
+ DocCfg(id='031834', description=SHIMON_POST_ARTICLE, date='2011-05-16'),
944
+ DocCfg(id='023517', description=SHIMON_POST_ARTICLE, date='2011-05-26'),
945
+ DocCfg(id='030268', description=SHIMON_POST_ARTICLE, date='2011-05-29'),
946
+ DocCfg(id='029628', description=SHIMON_POST_ARTICLE, date='2011-06-04'),
947
+ DocCfg(id='018085', description=SHIMON_POST_ARTICLE, date='2011-06-07'),
948
+ DocCfg(id='030156', description=SHIMON_POST_ARTICLE, date='2011-06-22'),
949
+ DocCfg(id='031876', description=SHIMON_POST_ARTICLE, date='2011-06-14'),
950
+ DocCfg(id='032171', description=SHIMON_POST_ARTICLE, date='2011-06-26'),
951
+ DocCfg(id='029932', description=SHIMON_POST_ARTICLE, date='2011-07-03'),
952
+ DocCfg(id='031913', description=SHIMON_POST_ARTICLE, date='2011-08-24'),
953
+ DocCfg(id='024592', description=SHIMON_POST_ARTICLE, date='2011-08-25'),
954
+ DocCfg(id='024997', description=SHIMON_POST_ARTICLE, date='2011-09-08'),
955
+ DocCfg(id='031941', description=SHIMON_POST_ARTICLE, date='2011-11-17'),
956
+ DocCfg(id='021092', description=f'{SINGLE_PAGE} Tatler article about {GHISLAINE_MAXWELL} shredding documents', date='2019-08-15'),
957
+ DocCfg(id='031191', description=f"{SINGLE_PAGE} unknown article about Epstein and Trump's relationship in 1997"),
958
+ DocCfg(id='030829', description=f'South Florida Sun Sentinel article about {BRAD_EDWARDS} and {JEFFREY_EPSTEIN}'),
959
+ DocCfg(id='026520', description=f'Spanish language article about {SULTAN_BIN_SULAYEM}', date='2013-09-27'),
960
+ DocCfg(id='030333', description=f'The Independent article about Prince Andrew, Epstein, and Epstein\'s butler who stole his address book'),
961
+ DocCfg(
997
962
  id='031736',
998
963
  description=f"{TRANSLATION} Arabic article by Abdulnaser Salamah 'Trump; Prince of Believers (Caliph)!'",
999
964
  date='2017-05-13',
1000
965
  ),
1001
- FileCfg(id='025094', description=f'{TRANSLATION} Spanish article about Cuba', date='2015-11-08'),
1002
- FileCfg(id='010754', description=f"U.S. News article about Yitzhak Rabin"),
1003
- FileCfg(id='031794', description=f"very short French magazine clipping"),
1004
- FileCfg(id='014498', description=VI_DAILY_NEWS_ARTICLE, date='2016-12-13'),
1005
- FileCfg(id='031171', description=VI_DAILY_NEWS_ARTICLE, date='2019-02-06'),
1006
- FileCfg(id='023048', description=VI_DAILY_NEWS_ARTICLE, date='2019-02-27'),
1007
- FileCfg(id='023046', description=VI_DAILY_NEWS_ARTICLE, date='2019-02-27'),
1008
- FileCfg(id='031170', description=VI_DAILY_NEWS_ARTICLE, date='2019-03-06'),
1009
- FileCfg(id='016506', description=VI_DAILY_NEWS_ARTICLE, date='2019-02-28'),
1010
- FileCfg(id='016507', description=f"{VI_DAILY_NEWS_ARTICLE} 'Perversion of Justice' by {JULIE_K_BROWN}", date='2018-12-19'),
1011
- FileCfg(id='019212', description=f'{WAPO} and Times Tribune articles about Bannon, Trump, and healthcare execs'),
966
+ DocCfg(id='025094', description=f'{TRANSLATION} Spanish article about Cuba', date='2015-11-08'),
967
+ DocCfg(id='010754', description=f"U.S. News article about Yitzhak Rabin"),
968
+ DocCfg(id='031794', description=f"very short French magazine clipping"),
969
+ DocCfg(id='014498', description=VI_DAILY_NEWS_ARTICLE, date='2016-12-13'),
970
+ DocCfg(id='031171', description=VI_DAILY_NEWS_ARTICLE, date='2019-02-06'),
971
+ DocCfg(id='023048', description=VI_DAILY_NEWS_ARTICLE, date='2019-02-27'),
972
+ DocCfg(id='023046', description=VI_DAILY_NEWS_ARTICLE, date='2019-02-27'),
973
+ DocCfg(id='031170', description=VI_DAILY_NEWS_ARTICLE, date='2019-03-06'),
974
+ DocCfg(id='016506', description=VI_DAILY_NEWS_ARTICLE, date='2019-02-28'),
975
+ DocCfg(id='016507', description=f"{VI_DAILY_NEWS_ARTICLE} 'Perversion of Justice' by {JULIE_K_BROWN}", date='2018-12-19'),
976
+ DocCfg(id='019212', description=f'{WAPO} and Times Tribune articles about Bannon, Trump, and healthcare execs'),
977
+ ]
1012
978
 
1013
- # court docs
1014
- FileCfg(id='025353', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-05-19', duplicate_ids=['010723', '019224'], dupe_type='redacted'),
1015
- FileCfg(id='025704', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-05-27', duplicate_ids=['010732', '019221'], dupe_type='redacted'),
1016
- FileCfg(id='012130', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-06-19', duplicate_ids=['012135']),
1017
- FileCfg(id='011908', description=f"{BRUNEL_V_EPSTEIN} and Tyler McDonald d/b/a YI.org court filing"),
1018
- FileCfg(id='017603', description=DAVID_SCHOEN_CVRA_LEXIS_SEARCH, date='2019-02-28'),
1019
- FileCfg(id='017635', description=DAVID_SCHOEN_CVRA_LEXIS_SEARCH, date='2019-02-28'),
1020
- FileCfg(id='016509', description=DAVID_SCHOEN_CVRA_LEXIS_SEARCH, date='2019-02-28'),
1021
- FileCfg(id='017714', description=DAVID_SCHOEN_CVRA_LEXIS_SEARCH, date='2019-02-28'),
1022
- FileCfg(id='021824', description=f"{EDWARDS_V_DERSHOWITZ} deposition of {PAUL_G_CASSELL}"),
1023
- FileCfg(
979
+ OTHER_FILES_LEGAL = [
980
+ DocCfg(id='025353', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-05-19', duplicate_ids=['010723', '019224'], dupe_type='redacted'),
981
+ DocCfg(id='025704', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-05-27', duplicate_ids=['010732', '019221'], dupe_type='redacted'),
982
+ DocCfg(id='012130', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-06-19', duplicate_ids=['012135']),
983
+ DocCfg(id='011908', description=f"{BRUNEL_V_EPSTEIN} and Tyler McDonald d/b/a YI.org court filing"),
984
+ DocCfg(id='017603', description=DAVID_SCHOEN_CVRA_LEXIS_SEARCH, date='2019-02-28'),
985
+ DocCfg(id='017635', description=DAVID_SCHOEN_CVRA_LEXIS_SEARCH, date='2019-02-28'),
986
+ DocCfg(id='016509', description=DAVID_SCHOEN_CVRA_LEXIS_SEARCH, date='2019-02-28'),
987
+ DocCfg(id='017714', description=DAVID_SCHOEN_CVRA_LEXIS_SEARCH, date='2019-02-28'),
988
+ DocCfg(id='021824', description=f"{EDWARDS_V_DERSHOWITZ} deposition of {PAUL_G_CASSELL}"),
989
+ DocCfg(
1024
990
  id='010757',
1025
991
  description=f"{EDWARDS_V_DERSHOWITZ} plaintiff response to Dershowitz Motion to Determine Confidentiality of Court Records",
1026
992
  date='2015-11-23',
1027
993
  ),
1028
- FileCfg(
994
+ DocCfg(
1029
995
  id='010887',
1030
996
  description=f"{EDWARDS_V_DERSHOWITZ} Dershowitz Motion for Clarification of Confidentiality Order",
1031
997
  date='2016-01-29',
1032
998
  ),
1033
- FileCfg(
999
+ DocCfg(
1034
1000
  id='015590',
1035
1001
  description=f"{EDWARDS_V_DERSHOWITZ} Dershowitz Redacted Motion to Modify Confidentiality Order",
1036
1002
  date='2016-02-03',
1037
1003
  ),
1038
- FileCfg(
1004
+ DocCfg(
1039
1005
  id='015650',
1040
1006
  description=f"{EDWARDS_V_DERSHOWITZ} Giuffre Response to Dershowitz Motion for Clarification of Confidentiality Order",
1041
1007
  date='2016-02-08',
1042
1008
  ),
1043
- FileCfg(id='010566', description=f"{EPSTEIN_V_ROTHSTEIN_EDWARDS} Statement of Undisputed Facts", date='2010-11-04'),
1044
- FileCfg(id='012707', description=f"{EPSTEIN_V_ROTHSTEIN_EDWARDS} Master Contact List - Privilege Log", date='2011-03-22'),
1045
- FileCfg(id='012103', description=f"{EPSTEIN_V_ROTHSTEIN_EDWARDS} Telephone Interview with {VIRGINIA_GIUFFRE}", date='2011-05-17'),
1046
- FileCfg(id='017488', description=f"{EPSTEIN_V_ROTHSTEIN_EDWARDS} Deposition of Scott Rothstein", date='2012-06-22'),
1047
- FileCfg(id='029315', description=f"{EPSTEIN_V_ROTHSTEIN_EDWARDS} Plaintiff Motion for Summary Judgment by {JACK_SCAROLA}", date='2013-09-13'),
1048
- FileCfg(id='013304', description=f"{EPSTEIN_V_ROTHSTEIN_EDWARDS} Plaintiff Response to Epstein's Motion for Summary Judgment", date='2014-04-17'),
1049
- FileCfg(id='019352', description=FBI_REPORT,),
1050
- FileCfg(id='021434', description=FBI_REPORT,),
1051
- FileCfg(id='018872', description=FBI_SEIZED_PROPERTY,),
1052
- FileCfg(id='021569', description=FBI_SEIZED_PROPERTY,),
1053
- FileCfg(id='017792', description=f"{GIUFFRE_V_DERSHOWITZ} article about {ALAN_DERSHOWITZ}'s appearance on Wolf Blitzer"),
1054
- FileCfg(id='017767', description=f"{GIUFFRE_V_DERSHOWITZ} article about {ALAN_DERSHOWITZ} working with {JEFFREY_EPSTEIN}"),
1055
- FileCfg(id='017796', description=f"{GIUFFRE_V_DERSHOWITZ} article about {ALAN_DERSHOWITZ}"),
1056
- FileCfg(id='017935', description=f"{GIUFFRE_V_DERSHOWITZ} defamation complaint", date='2019-04-16'),
1057
- FileCfg(id='017824', description=f"{GIUFFRE_V_DERSHOWITZ} {MIAMI_HERALD} article by {JULIE_K_BROWN}"),
1058
- FileCfg(
1009
+ DocCfg(id='010566', description=f"{EPSTEIN_V_ROTHSTEIN_EDWARDS} Statement of Undisputed Facts", date='2010-11-04'),
1010
+ DocCfg(id='012707', description=f"{EPSTEIN_V_ROTHSTEIN_EDWARDS} Master Contact List - Privilege Log", date='2011-03-22'),
1011
+ DocCfg(id='012103', description=f"{EPSTEIN_V_ROTHSTEIN_EDWARDS} Telephone Interview with {VIRGINIA_GIUFFRE}", date='2011-05-17'),
1012
+ DocCfg(id='017488', description=f"{EPSTEIN_V_ROTHSTEIN_EDWARDS} Deposition of Scott Rothstein", date='2012-06-22'),
1013
+ DocCfg(id='029315', description=f"{EPSTEIN_V_ROTHSTEIN_EDWARDS} Plaintiff Motion for Summary Judgment by {JACK_SCAROLA}", date='2013-09-13'),
1014
+ DocCfg(id='013304', description=f"{EPSTEIN_V_ROTHSTEIN_EDWARDS} Plaintiff Response to Epstein's Motion for Summary Judgment", date='2014-04-17'),
1015
+ DocCfg(id='019352', description=FBI_REPORT,),
1016
+ DocCfg(id='021434', description=FBI_REPORT,),
1017
+ DocCfg(id='018872', description=FBI_SEIZED_PROPERTY,),
1018
+ DocCfg(id='021569', description=FBI_SEIZED_PROPERTY,),
1019
+ DocCfg(id='017792', description=f"{GIUFFRE_V_DERSHOWITZ} article about {ALAN_DERSHOWITZ}'s appearance on Wolf Blitzer"),
1020
+ DocCfg(id='017767', description=f"{GIUFFRE_V_DERSHOWITZ} article about {ALAN_DERSHOWITZ} working with {JEFFREY_EPSTEIN}"),
1021
+ DocCfg(id='017796', description=f"{GIUFFRE_V_DERSHOWITZ} article about {ALAN_DERSHOWITZ}"),
1022
+ DocCfg(id='017935', description=f"{GIUFFRE_V_DERSHOWITZ} defamation complaint", date='2019-04-16'),
1023
+ DocCfg(id='017824', description=f"{GIUFFRE_V_DERSHOWITZ} {MIAMI_HERALD} article by {JULIE_K_BROWN}"),
1024
+ DocCfg(
1059
1025
  id='017818',
1060
1026
  description=f"{GIUFFRE_V_DERSHOWITZ} {MIAMI_HERALD} article about accusations against {ALAN_DERSHOWITZ} by {JULIE_K_BROWN}",
1061
1027
  date='2018-12-27',
1062
1028
  ),
1063
- FileCfg(id='017800', description=f'{GIUFFRE_V_DERSHOWITZ} {MIAMI_HERALD} "Perversion of Justice" by {JULIE_K_BROWN}'),
1064
- FileCfg(id='022237', description=f"{GIUFFRE_V_DERSHOWITZ} partial court filing with fact checking questions?"),
1065
- FileCfg(id='016197', description=f"{GIUFFRE_V_DERSHOWITZ} response to Florida Bar complaint by {ALAN_DERSHOWITZ} about David Boies from {PAUL_G_CASSELL}"),
1066
- FileCfg(id='017771', description=f'{GIUFFRE_V_DERSHOWITZ} Vanity Fair article "The Talented Mr. Epstein" by Vicky Ward', date='2011-06-27'),
1067
- FileCfg(id='014118', description=f"{GIUFFRE_V_EPSTEIN} Declaration in Support of Motion to Compel Production of Documents", date='2016-10-21'),
1068
- FileCfg(id='014652', description=f"{GIUFFRE_V_MAXWELL} Complaint", date='2015-04-22'),
1069
- FileCfg(id='015529', description=f"{GIUFFRE_V_MAXWELL} Defamation Complaint", date='2015-09-21'),
1070
- FileCfg(id='014797', description=f"{GIUFFRE_V_MAXWELL} Declaration of Laura A. Menninger in Opposition to Plaintiff's Motion", date='2017-03-17'),
1071
- FileCfg(id='011304', description=f"{GIUFFRE_V_MAXWELL} Oral Argument Transcript", date='2017-03-17'),
1072
- FileCfg(
1029
+ DocCfg(id='017800', description=f'{GIUFFRE_V_DERSHOWITZ} {MIAMI_HERALD} "Perversion of Justice" by {JULIE_K_BROWN}'),
1030
+ DocCfg(id='022237', description=f"{GIUFFRE_V_DERSHOWITZ} partial court filing with fact checking questions?"),
1031
+ DocCfg(id='016197', description=f"{GIUFFRE_V_DERSHOWITZ} response to Florida Bar complaint by {ALAN_DERSHOWITZ} about David Boies from {PAUL_G_CASSELL}"),
1032
+ DocCfg(id='017771', description=f'{GIUFFRE_V_DERSHOWITZ} Vanity Fair article "The Talented Mr. Epstein" by Vicky Ward', date='2011-06-27'),
1033
+ DocCfg(id='014118', description=f"{GIUFFRE_V_EPSTEIN} Declaration in Support of Motion to Compel Production of Documents", date='2016-10-21'),
1034
+ DocCfg(id='014652', description=f"{GIUFFRE_V_MAXWELL} Complaint", date='2015-04-22'),
1035
+ DocCfg(id='015529', description=f"{GIUFFRE_V_MAXWELL} Defamation Complaint", date='2015-09-21'),
1036
+ DocCfg(id='014797', description=f"{GIUFFRE_V_MAXWELL} Declaration of Laura A. Menninger in Opposition to Plaintiff's Motion", date='2017-03-17'),
1037
+ DocCfg(id='011304', description=f"{GIUFFRE_V_MAXWELL} Oral Argument Transcript", date='2017-03-17'),
1038
+ DocCfg(
1073
1039
  id='014788',
1074
1040
  description=f"{GIUFFRE_V_MAXWELL} Maxwell Response to Plaintiff's Omnibus Motion in Limine",
1075
1041
  date='2017-03-17',
1076
1042
  duplicate_ids=['011463'],
1077
1043
  ),
1078
- FileCfg(
1044
+ DocCfg(
1079
1045
  id='019297',
1080
1046
  description=f'{GIUFFRE_V_MAXWELL} letter from {ALAN_DERSHOWITZ} lawyer Andrew G. Celli',
1081
1047
  date='2018-02-07'
1082
1048
  ),
1083
- FileCfg(
1049
+ DocCfg(
1084
1050
  id='025937',
1085
1051
  description=f'{JANE_DOE_V_EPSTEIN_TRUMP} Affidavit of Tiffany Doe describing Jane Doe being raped by Epstein and Trump',
1086
1052
  date='2016-06-20',
1087
1053
  ),
1088
- FileCfg(id='025939', description=f'{JANE_DOE_V_EPSTEIN_TRUMP} Affidavit of Jane Doe describing being raped by Epstein', date='2016-06-20'),
1089
- FileCfg(id='013489', description=f'{JANE_DOE_V_EPSTEIN_TRUMP} Affidavit of {BRAD_EDWARDS}', date='2010-07-20'),
1090
- FileCfg(id='029398', description=f'{JANE_DOE_V_EPSTEIN_TRUMP} article in Law.com'),
1091
- FileCfg(id='026854', description=f"{JANE_DOE_V_EPSTEIN_TRUMP} Civil Docket"),
1092
- FileCfg(id='026384', description=f"{JANE_DOE_V_EPSTEIN_TRUMP} Complaint for rape and sexual abuse", date='2016-06-20'),
1093
- FileCfg(id='013463', description=f'{JANE_DOE_V_EPSTEIN_TRUMP} Deposition of Scott Rothstein', date='2010-03-23'),
1094
- FileCfg(id='029257', description=f'{JANE_DOE_V_EPSTEIN_TRUMP} allegations and identity of plaintiff Katie Johnson', date='2016-04-26'),
1095
- FileCfg(id='032321', description=f"{JANE_DOE_V_EPSTEIN_TRUMP} Notice of Initial Conference", date='2016-10-04'),
1096
- FileCfg(id='010735', description=f"{JANE_DOE_V_USA} Dershowitz Reply in Support of Motion for Limited Intervention", date='2015-02-02'),
1097
- FileCfg(id='014084', description=f"{JANE_DOE_V_USA} Jane Doe Response to Dershowitz's Motion for Limited Intervention", date='2015-03-24'),
1098
- FileCfg(id='023361', description=f"{JASTA_SAUDI_LAWSUIT} legal text and court documents", date='2012-01-20'),
1099
- FileCfg(id='017830', description=f"{JASTA_SAUDI_LAWSUIT} legal text and court documents"),
1100
- FileCfg(id='017904', description=f"{JASTA_SAUDI_LAWSUIT} Westlaw search results", date='2019-01-01'),
1101
- FileCfg(id='014037', description=f"Journal of Criminal Law and Criminology article on {CVRA}"),
1102
- FileCfg(id='020662', description=f"letter from {ALAN_DERSHOWITZ}'s British lawyers Mishcon de Reya to Daily Mail threatening libel suit"),
1103
- FileCfg(
1054
+ DocCfg(id='025939', description=f'{JANE_DOE_V_EPSTEIN_TRUMP} Affidavit of Jane Doe describing being raped by Epstein', date='2016-06-20'),
1055
+ DocCfg(id='013489', description=f'{JANE_DOE_V_EPSTEIN_TRUMP} Affidavit of {BRAD_EDWARDS}', date='2010-07-20'),
1056
+ DocCfg(id='029398', description=f'{JANE_DOE_V_EPSTEIN_TRUMP} article in Law.com'),
1057
+ DocCfg(id='026854', description=f"{JANE_DOE_V_EPSTEIN_TRUMP} Civil Docket"),
1058
+ DocCfg(id='026384', description=f"{JANE_DOE_V_EPSTEIN_TRUMP} Complaint for rape and sexual abuse", date='2016-06-20'),
1059
+ DocCfg(id='013463', description=f'{JANE_DOE_V_EPSTEIN_TRUMP} Deposition of Scott Rothstein', date='2010-03-23'),
1060
+ DocCfg(id='029257', description=f'{JANE_DOE_V_EPSTEIN_TRUMP} allegations and identity of plaintiff Katie Johnson', date='2016-04-26'),
1061
+ DocCfg(id='032321', description=f"{JANE_DOE_V_EPSTEIN_TRUMP} Notice of Initial Conference", date='2016-10-04'),
1062
+ DocCfg(id='010735', description=f"{JANE_DOE_V_USA} Dershowitz Reply in Support of Motion for Limited Intervention", date='2015-02-02'),
1063
+ DocCfg(id='014084', description=f"{JANE_DOE_V_USA} Jane Doe Response to Dershowitz's Motion for Limited Intervention", date='2015-03-24'),
1064
+ DocCfg(id='023361', description=f"{JASTA_SAUDI_LAWSUIT} legal text and court documents", date='2012-01-20'),
1065
+ DocCfg(id='017830', description=f"{JASTA_SAUDI_LAWSUIT} legal text and court documents"),
1066
+ DocCfg(id='017904', description=f"{JASTA_SAUDI_LAWSUIT} Westlaw search results", date='2019-01-01'),
1067
+ DocCfg(id='014037', description=f"Journal of Criminal Law and Criminology article on {CVRA}"),
1068
+ DocCfg(id='020662', description=f"letter from {ALAN_DERSHOWITZ}'s British lawyers Mishcon de Reya to Daily Mail threatening libel suit"),
1069
+ DocCfg(
1104
1070
  id='010560',
1105
1071
  description=f"letter from Gloria Allred to {SCOTT_J_LINK} alleging abuse of a girl from Kansas",
1106
1072
  date='2019-06-19',
1107
1073
  ),
1108
- FileCfg(
1074
+ DocCfg(
1109
1075
  id='031447',
1110
1076
  description=f"letter from {MARTIN_WEINBERG} to Melanie Ann Pustay and Sean O'Neill re: an Epstein FOIA request"
1111
1077
  ),
1112
- FileCfg(
1078
+ DocCfg(
1113
1079
  id='028965',
1114
1080
  description=f"letter from {MARTIN_WEINBERG} to ABC / Good Morning America threatening libel lawsuit",
1115
1081
  duplicate_ids=['028928']
1116
1082
  ),
1117
- FileCfg(
1083
+ DocCfg(
1118
1084
  id='026793',
1119
1085
  description=f"letter from {STEVEN_HOFFENBERG}'s lawyers at Mintz Fraade offering to take over Epstein's business and resolve his legal issues",
1120
1086
  date='2018-03-23',
1121
1087
  ),
1122
- FileCfg(
1088
+ DocCfg(
1123
1089
  id='016420',
1124
1090
  description=f"{NEW_YORK_V_EPSTEIN} New York Post Motion to Unseal Appellate Briefs",
1125
1091
  date='2019-01-11',
1126
1092
  ),
1127
- FileCfg(id='028540', description=f"SCOTUS decision in Budha Ismail Jam et al. v. INTERNATIONAL FINANCE CORP"),
1128
- FileCfg(id='012197', description=f"SDFL Response to {JAY_LEFKOWITZ} on Epstein Plea Agreement Compliance"),
1129
- FileCfg(id='022277', description=f"{TEXT_OF_US_LAW} National Labour Relations Board (NLRB)"),
1093
+ DocCfg(id='028540', description=f"SCOTUS decision in Budha Ismail Jam et al. v. INTERNATIONAL FINANCE CORP"),
1094
+ DocCfg(id='012197', description=f"SDFL Response to {JAY_LEFKOWITZ} on Epstein Plea Agreement Compliance"),
1095
+ DocCfg(id='022277', description=f"{TEXT_OF_US_LAW} National Labour Relations Board (NLRB)"),
1096
+ ]
1130
1097
 
1131
- # conferences
1132
- FileCfg(id='030769', description=f"2017 Independent Filmmaker Project (IFP) Gotham Awards invitation"),
1133
- FileCfg(id='014951', description=f"2017 TED Talks program", date='2017-04-20'),
1134
- FileCfg(id='014315', description=f'{BOFA_MERRILL} 2016 Future of Financials Conference'),
1135
- FileCfg(id='026825', description=f"{DEUTSCHE_BANK} Asset & Wealth Management featured speaker bios"), # Really "Deutsche Asset" which may not be Deutsche Bank?
1136
- FileCfg(id='017526', description=f'Intellectual Jazz conference brochure f. {DAVID_BLAINE}'),
1137
- FileCfg(id='023120', description=f"{LAWRENCE_KRAUSS} 'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more (old draft)"),
1138
- FileCfg(id='023123', description=f"{LAWRENCE_KRAUSS} 'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more", duplicate_ids=['023121'], dupe_type='earlier'),
1139
- FileCfg(id='031359', description=f"{NOBEL_CHARITABLE_TRUST} Earth Environment Convention about ESG investing"),
1140
- FileCfg(id='031354', description=f'{NOBEL_CHARITABLE_TRUST} "Thinking About the Environment and Technology" report 2011'),
1141
- FileCfg(id='024179', description=f'president and first lady schedule at 67th U.N. General Assembly', date='2012-09-21'),
1142
- FileCfg(id='029427', description=f"seems related to an IRL meeting about concerns China will attempt to absorb Mongolia"),
1143
- FileCfg(
1098
+ OTHER_FILES_CONFERENCES = [
1099
+ DocCfg(id='014315', author=BOFA_MERRILL, description=f'2016 Future of Financials Conference'),
1100
+ DocCfg(id='024179', author=UN_GENERAL_ASSEMBLY, description=f'president and first lady schedule', date='2012-09-21'),
1101
+ DocCfg(
1144
1102
  id='024185',
1145
- description=f'schedule of 67th U.N. General Assembly w/"Presidents Private Dinner - Jeffrey Epstine (sic)"',
1103
+ author=UN_GENERAL_ASSEMBLY,
1104
+ description=f'schedule including "Presidents Private Dinner - Jeffrey Epstine (sic)"',
1146
1105
  date='2012-09-21',
1147
1106
  ),
1148
- FileCfg(id='025797', description=f'someone\'s notes from Aspen Strategy Group', date='2013-05-29'),
1149
- FileCfg(id='017524', description=f"{SWEDISH_LIFE_SCIENCES_SUMMIT} 2012 program"),
1150
- FileCfg(id='026747', description=f"{SWEDISH_LIFE_SCIENCES_SUMMIT} 2017 program", date='2017-08-23'),
1151
- FileCfg(id='019300', description=f'{WOMEN_EMPOWERMENT} f. {KATHRYN_RUEMMLER}', date='2019-04-05'),
1152
- FileCfg(id='022267', description=f'{WOMEN_EMPOWERMENT} founder essay about growing the seminar business'),
1153
- FileCfg(id='022407', description=f'{WOMEN_EMPOWERMENT} seminar pitch deck'),
1154
- FileCfg(
1107
+ DocCfg(id='014951', description=f"2017 TED Talks program", date='2017-04-20'),
1108
+ DocCfg(id='026825', description=f"{DEUTSCHE_BANK} Asset & Wealth Management featured speaker bios"), # Really "Deutsche Asset" which may not be Deutsche Bank?
1109
+ DocCfg(id='017526', description=f'Intellectual Jazz conference brochure f. {DAVID_BLAINE}'),
1110
+ DocCfg(id='023120', description=f"{LAWRENCE_KRAUSS} 'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more (old draft)"),
1111
+ DocCfg(id='023123', description=f"{LAWRENCE_KRAUSS} 'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more", duplicate_ids=['023121'], dupe_type='earlier'),
1112
+ DocCfg(id='031359', description=f"{NOBEL_CHARITABLE_TRUST} Earth Environment Convention about ESG investing"),
1113
+ DocCfg(id='031354', description=f'{NOBEL_CHARITABLE_TRUST} "Thinking About the Environment and Technology" report 2011'),
1114
+ DocCfg(id='029427', description=f"seems related to an IRL meeting about concerns China will attempt to absorb Mongolia"),
1115
+ DocCfg(id='025797', description=f'someone\'s notes from Aspen Strategy Group', date='2013-05-29'),
1116
+ DocCfg(id='017524', description=f"{SWEDISH_LIFE_SCIENCES_SUMMIT} 2012 program"),
1117
+ DocCfg(id='026747', description=f"{SWEDISH_LIFE_SCIENCES_SUMMIT} 2017 program", date='2017-08-23'),
1118
+ DocCfg(id='019300', description=f'{WOMEN_EMPOWERMENT} f. {KATHRYN_RUEMMLER}', date='2019-04-05'),
1119
+ DocCfg(id='022267', description=f'{WOMEN_EMPOWERMENT} founder essay about growing the seminar business'),
1120
+ DocCfg(id='022407', description=f'{WOMEN_EMPOWERMENT} seminar pitch deck'),
1121
+ DocCfg(
1155
1122
  id='017060',
1156
1123
  description=f'World Economic Forum (WEF) Annual Meeting 2011 List of Participants',
1157
1124
  date='2011-01-18',
1158
1125
  ),
1126
+ ]
1159
1127
 
1160
- # press releases, reports, etc.
1161
- FileCfg(id='024631', description=f"Ackrell Capital Cannabis Investment Report 2018"),
1162
- FileCfg(id='016111', description=f"{BOFA_MERRILL} 'GEMs Paper #26 Saudi Arabia: beyond oil but not so fast'", date='2016-06-30'),
1163
- FileCfg(id='010609', description=f"{BOFA_MERRILL} 'Liquid Insight Trump\'s effect on MXN'", date='2016-09-22'),
1164
- FileCfg(id='025978', description=f"{BOFA_MERRILL} 'Understanding when risk parity risk Increases'", date='2016-08-09'),
1165
- FileCfg(id='014404', description=f'{BOFA_MERRILL} Japan Investment Strategy Report', date='2016-11-18'),
1166
- FileCfg(id='014410', description=f'{BOFA_MERRILL} Japan Investment Strategy Report', date='2016-11-18'),
1167
- FileCfg(id='014424', description=f"{BOFA_MERRILL} 'Japan Macro Watch'", date='2016-11-14'),
1168
- FileCfg(id='014731', description=f"{BOFA_MERRILL} 'Global Rates, FX & EM 2017 Year Ahead'", date='2016-11-16'),
1169
- FileCfg(id='014432', description=f"{BOFA_MERRILL} 'Global Cross Asset Strategy - Year Ahead The Trump inflection'", date='2016-11-30'),
1170
- FileCfg(id='014460', description=f"{BOFA_MERRILL} 'European Equity Strategy 2017'", date='2016-12-01'),
1171
- FileCfg(id='014972', description=f"{BOFA_MERRILL} 'Global Equity Volatility Insights'", date='2017-06-20'),
1172
- FileCfg(id='014622', description=f"{BOFA_MERRILL} 'Top 10 US Ideas Quarterly'", date='2017-01-03'),
1173
- FileCfg(id='023069', description=f"{BOFA_MERRILL} 'Equity Strategy Focus Point Death and Taxes'", date='2017-01-29'),
1174
- FileCfg(id='014721', description=f"{BOFA_MERRILL} 'Cause and Effect Fade the Trump Risk Premium'", date='2017-02-13'),
1175
- FileCfg(id='014887', description=f"{BOFA_MERRILL} 'Internet / e-Commerce'", date='2017-04-06'),
1176
- FileCfg(id='014873', description=f"{BOFA_MERRILL} 'Hess Corp'", date='2017-04-11'),
1177
- FileCfg(id='023575', description=f"{BOFA_MERRILL} 'Global Equity Volatility Insights'", date='2017-06-01'),
1178
- FileCfg(id='014518', description=f'{BOFA_WEALTH_MGMT} tax alert', date='2016-05-02'),
1179
- FileCfg(id='029438', description=f'{BOFA_WEALTH_MGMT} tax report', date='2018-01-02'),
1180
- FileCfg(id='024271', description=f"Blockchain Capital and Brock Pierce pitch deck", date='2015-10-01'),
1181
- FileCfg(id='024302', description=f"Carvana form 14A SEC filing proxy statement", date='2019-04-23'),
1182
- FileCfg(id='029305', description=f"CCH Tax Briefing on end of Defense of Marriage Act", date='2013-06-27'),
1183
- FileCfg(id='024817', description=f"Cowen's Collective View of CBD / Cannabis report"),
1184
- FileCfg(id='026794', description=f"{DEUTSCHE_BANK} Global Public Affairs report: 'Global Political and Regulatory Risk in 2015/2016'"),
1185
- FileCfg(id='022361', description=DEUTSCHE_BANK_TAX_TOPICS, date='2013-05-01'),
1186
- FileCfg(id='022325', description=DEUTSCHE_BANK_TAX_TOPICS, date='2013-12-20'),
1187
- FileCfg(id='022330', description=f'{DEUTSCHE_BANK_TAX_TOPICS} table of contents', date='2013-12-20'),
1188
- FileCfg(id='019440', description=DEUTSCHE_BANK_TAX_TOPICS, date='2014-01-29'),
1189
- FileCfg(id='024202', description=f"Electron Capital Partners LLC 'Global Utility White Paper'", date='2013-03-08'),
1190
- FileCfg(id='022372', description=f'Ernst & Young 2016 election report'),
1191
- FileCfg(
1128
+ # All authors of documents in this category will be marked uninteresting
1129
+ OTHER_FILES_FINANCE = [
1130
+ DocCfg(id='016111', author=BOFA_MERRILL, description=f"GEMs Paper #26 Saudi Arabia: beyond oil but not so fast", date='2016-06-30'),
1131
+ DocCfg(id='010609', author=BOFA_MERRILL, description=f"Liquid Insight Trump\'s effect on MXN", date='2016-09-22'),
1132
+ DocCfg(id='025978', author=BOFA_MERRILL, description=f"Understanding when risk parity risk Increases", date='2016-08-09'),
1133
+ DocCfg(id='014404', author=BOFA_MERRILL, description=f'Japan Investment Strategy Report', date='2016-11-18'),
1134
+ DocCfg(id='014410', author=BOFA_MERRILL, description=f'Japan Investment Strategy Report', date='2016-11-18'),
1135
+ DocCfg(id='014424', author=BOFA_MERRILL, description=f"Japan Macro Watch", date='2016-11-14'),
1136
+ DocCfg(id='014731', author=BOFA_MERRILL, description=f"Global Rates, FX & EM 2017 Year Ahead", date='2016-11-16'),
1137
+ DocCfg(id='014432', author=BOFA_MERRILL, description=f"Global Cross Asset Strategy - Year Ahead The Trump inflection", date='2016-11-30'),
1138
+ DocCfg(id='014460', author=BOFA_MERRILL, description=f"European Equity Strategy 2017", date='2016-12-01'),
1139
+ DocCfg(id='014972', author=BOFA_MERRILL, description=f"Global Equity Volatility Insights", date='2017-06-20'),
1140
+ DocCfg(id='014622', author=BOFA_MERRILL, description=f"Top 10 US Ideas Quarterly", date='2017-01-03'),
1141
+ DocCfg(id='023069', author=BOFA_MERRILL, description=f"Equity Strategy Focus Point Death and Taxes", date='2017-01-29'),
1142
+ DocCfg(id='014721', author=BOFA_MERRILL, description=f"Cause and Effect Fade the Trump Risk Premium", date='2017-02-13'),
1143
+ DocCfg(id='014887', author=BOFA_MERRILL, description=f"Internet / e-Commerce", date='2017-04-06'),
1144
+ DocCfg(id='014873', author=BOFA_MERRILL, description=f"Hess Corp", date='2017-04-11'),
1145
+ DocCfg(id='023575', author=BOFA_MERRILL, description=f"Global Equity Volatility Insights", date='2017-06-01'),
1146
+ DocCfg(id='014518', author=BOFA_WEALTH_MGMT, description=f'tax alert', date='2016-05-02'),
1147
+ DocCfg(id='029438', author=BOFA_WEALTH_MGMT, description=f'tax report', date='2018-01-02'),
1148
+ DocCfg(id='024302', author='Carvana', description=f"form 14A SEC filing proxy statement", date='2019-04-23'),
1149
+ DocCfg(id='029305', author='CCH Tax', description=f"Briefing on end of Defense of Marriage Act", date='2013-06-27'),
1150
+ DocCfg(id='026794', author=DEUTSCHE_BANK, description=f"Global Political and Regulatory Risk in 2015/2016"),
1151
+ DocCfg(id='022361', author=DEUTSCHE_BANK_TAX_TOPICS, date='2013-05-01'),
1152
+ DocCfg(id='022325', author=DEUTSCHE_BANK_TAX_TOPICS, date='2013-12-20'),
1153
+ DocCfg(id='022330', author=DEUTSCHE_BANK_TAX_TOPICS, date='2013-12-20', description='table of contents'),
1154
+ DocCfg(id='019440', author=DEUTSCHE_BANK_TAX_TOPICS, date='2014-01-29'),
1155
+ DocCfg(id='024202', author=ELECTRON_CAPITAL_PARTNERS, description=f"Global Utility White Paper", date='2013-03-08'),
1156
+ DocCfg(id='022372', author='Ernst & Young', description=f'2016 election report'),
1157
+ DocCfg(
1192
1158
  id='025663',
1193
- description=f"{GOLDMAN_REPORT} 'An Overview of the Current State of Cryptocurrencies and Blockchain'",
1159
+ author=GOLDMAN_INVESTMENT_MGMT,
1160
+ description=f"An Overview of the Current State of Cryptocurrencies and Blockchain",
1194
1161
  date='2017-11-15',
1162
+ is_interesting=True,
1195
1163
  ),
1196
- FileCfg(id='014532', description=f"{GOLDMAN_REPORT} 'Outlook - Half Full'", date='2017-01-01'),
1197
- FileCfg(id='026909', description=f"{GOLDMAN_REPORT} 'The Unsteady Undertow Commands the Seas (Temporarily)'", date='2018-10-14'),
1198
- FileCfg(id='026944', description=f"{GOLDMAN_REPORT} 'Risk of a US-Iran Military Conflict'", date='2019-05-23'),
1199
- FileCfg(id='026679', description=f"Invesco report: 'Global Sovereign Asset Management Study 2017'"),
1200
- FileCfg(id='023096', description=f'{EPSTEIN_FOUNDATION} blog', date='2012-11-15'),
1201
- FileCfg(id='029326', description=f'{EPSTEIN_FOUNDATION} {PRESS_RELEASE}', date='2013-02-15'),
1202
- FileCfg(id='026565', description=f'{EPSTEIN_FOUNDATION} {PRESS_RELEASE}, maybe a draft of 029326', date='2013-02-15'),
1203
- FileCfg(id='026572', description=f"{JP_MORGAN} Global Asset Allocation report", date='2012-11-09'),
1204
- FileCfg(id='030848', description=f"{JP_MORGAN} Global Asset Allocation report", date='2013-03-28'),
1205
- FileCfg(id='030840', description=f"{JP_MORGAN} Market Thoughts"),
1206
- FileCfg(id='022350', description=f"{JP_MORGAN} report on tax efficiency of Intentionally Defective Grantor Trusts (IDGT)"),
1207
- FileCfg(id='025242', description=JP_MORGAN_EYE_ON_THE_MARKET, date='2012-04-09'),
1208
- FileCfg(id='030010', description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-06-14'),
1209
- FileCfg(id='030808', description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-07-11'),
1210
- FileCfg(id='025221', description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-07-25'),
1211
- FileCfg(id='025229', description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-08-04'),
1212
- FileCfg(id='030814', description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-11-21'),
1213
- FileCfg(id='024132', description=JP_MORGAN_EYE_ON_THE_MARKET, date='2012-03-15'),
1214
- FileCfg(id='024194', description=JP_MORGAN_EYE_ON_THE_MARKET, date='2012-10-22'),
1215
- FileCfg(id='025296', description=f'Laffer Associates report predicting Trump win', date='2016-07-06'),
1216
- FileCfg(id='025551', description=f'Morgan Stanley report about alternative asset managers', date='2018-01-30'),
1217
- FileCfg(id='026759', description=f'{PRESS_RELEASE} by Ritz-Carlton club about damage from Hurricane Irma', date='2017-09-13'),
1218
- FileCfg(
1219
- id='033338',
1220
- description=f"{PRESS_RELEASE} announcing Donald Trump & {NICHOLAS_RIBIS} ended their working relationship at Trump's casino",
1221
- date='2000-06-07',
1222
- ),
1223
- FileCfg(id='012048', description=f"{PRESS_RELEASE} 'Rockefeller Partners with Gregory J. Fleming to Create Independent Financial Services Firm' and other articles"),
1224
- FileCfg(id='020447', description=f'Promoting Constructive Vigilance: Report of the Working Group on Chinese Influence Activities in the U.S. (Hoover Group/Stanford 2018)'),
1225
- FileCfg(id='025763', description=f"S&P Economic Research: 'How Increasing Income Inequality Is Dampening U.S. Growth'", date='2014-08-05'),
1226
- FileCfg(id='019856', description=f"Sadis Goldberg LLP report on SCOTUS ruling about insider trading"),
1227
- FileCfg(id='026827', description=f'Scowcroft Group report on ISIS', date='2015-11-14'),
1228
- FileCfg(id='033220', description=f"short economic report on defense spending under Trump by Joseph G. Carson"),
1229
- FileCfg(id='026856', author='Kevin Rudd', description=f"speech 'Xi Jinping, China And The Global Order'", date='2018-06-26'),
1230
- FileCfg(id='024135', author=UBS, description=UBS_CIO_REPORT, date='2012-06-29'),
1231
- FileCfg(id='025247', author=UBS, description=UBS_CIO_REPORT, date='2012-10-25'),
1232
- FileCfg(id='025849', description=f"US Office of Government Information Services report: 'Building a Bridge Between FOIA Requesters & Agencies'"),
1233
- FileCfg(id='020824', description=f"USA Inc: A Basic Summary of America's Financial Statements compiled by Mary Meeker", date='2011-02-01'),
1164
+ DocCfg(id='014532', author=GOLDMAN_INVESTMENT_MGMT, description=f"Outlook - Half Full", date='2017-01-01'),
1165
+ DocCfg(id='026909', author=GOLDMAN_INVESTMENT_MGMT, description=f"The Unsteady Undertow Commands the Seas (Temporarily)", date='2018-10-14'),
1166
+ DocCfg(id='026944', author=GOLDMAN_INVESTMENT_MGMT, description=f"Risk of a US-Iran Military Conflict", date='2019-05-23'),
1167
+ DocCfg(id='018804', author='Integra Realty Resources', description=f"appraisal of going concern for IGY American Yacht Harbor Marina in {VIRGIN_ISLANDS}"),
1168
+ DocCfg(id='026679', author='Invesco', description=f"Global Sovereign Asset Management Study 2017"),
1169
+ DocCfg(id='033220', author='Joseph G. Carson', description=f"short economic report on defense spending under Trump"),
1170
+ DocCfg(id='026572', author=JP_MORGAN, description=f"Global Asset Allocation report", date='2012-11-09'),
1171
+ DocCfg(id='030848', author=JP_MORGAN, description=f"Global Asset Allocation report", date='2013-03-28'),
1172
+ DocCfg(id='030840', author=JP_MORGAN, description=f"Market Thoughts"),
1173
+ DocCfg(id='022350', author=JP_MORGAN, description=f"tax efficiency of Intentionally Defective Grantor Trusts (IDGT)"),
1174
+ DocCfg(id='025242', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2012-04-09'),
1175
+ DocCfg(id='030010', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-06-14'),
1176
+ DocCfg(id='030808', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-07-11'),
1177
+ DocCfg(id='025221', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-07-25'),
1178
+ DocCfg(id='025229', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-08-04'),
1179
+ DocCfg(id='030814', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2011-11-21'),
1180
+ DocCfg(id='024132', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2012-03-15'),
1181
+ DocCfg(id='024194', author=JP_MORGAN, description=JP_MORGAN_EYE_ON_THE_MARKET, date='2012-10-22'),
1182
+ DocCfg(id='025296', author='Laffer Associates', description=f'report predicting Trump win', date='2016-07-06'),
1183
+ DocCfg(id='020824', author='Mary Meeker', description=f"USA Inc: A Basic Summary of America's Financial Statements compiled", date='2011-02-01'),
1184
+ DocCfg(id='025551', author='Morgan Stanley', description=f'report about alternative asset managers', date='2018-01-30'),
1185
+ DocCfg(id='025763', author='S&P', description=f"Economic Research: How Increasing Income Inequality Is Dampening U.S. Growth", date='2014-08-05'),
1186
+ DocCfg(id='024135', author=UBS, description=UBS_CIO_REPORT, date='2012-06-29'),
1187
+ DocCfg(id='025247', author=UBS, description=UBS_CIO_REPORT, date='2012-10-25'),
1188
+ DocCfg(id='024631', description=f"Ackrell Capital report: Cannabis Investment Report 2018"),
1189
+ DocCfg(id='024271', description=f"Blockchain Capital and Brock Pierce pitch deck", date='2015-10-01'),
1190
+ DocCfg(id='024817', description=f"Cowen's Collective View of CBD / Cannabis report"),
1191
+ DocCfg(id='012048', description=f"{PRESS_RELEASE} 'Rockefeller Partners with Gregory J. Fleming to Create Independent Financial Services Firm' and other articles"),
1192
+ DocCfg(id='019856', description=f"Sadis Goldberg LLP report on SCOTUS ruling about insider trading"),
1193
+
1194
+ # private placement memoranda
1195
+ DocCfg(id='024432', description=f"Michael Milken's Knowledge Universe Education (KUE) $1,000,000 corporate share placement notice (SEC filing?)"),
1196
+ DocCfg(id='024003', description=f"New Leaf Ventures private placement memorandum"),
1197
+ ]
1234
1198
 
1235
- # letters
1236
- FileCfg(id='017789', author=ALAN_DERSHOWITZ, description=f'letter to {HARVARD} Crimson complaining he was defamed'),
1237
- FileCfg(
1199
+ OTHER_FILES_LETTERS = [
1200
+ DocCfg(id='017789', author=ALAN_DERSHOWITZ, description=f'letter to {HARVARD} Crimson complaining he was defamed'),
1201
+ DocCfg(
1238
1202
  id='019086',
1239
1203
  author=DAVID_BLAINE,
1240
1204
  description=f"{DAVID_BLAINE_VISA_LETTER} from Russia 'Svet' ({SVETLANA_POZHIDAEVA}?), names Putin puppet regimes",
1241
1205
  date='2015-05-27', # Date is a guess based on other drafts,
1242
1206
  ),
1243
- FileCfg(
1207
+ DocCfg(
1244
1208
  id='019474',
1245
1209
  author=DAVID_BLAINE,
1246
1210
  description=f"{DAVID_BLAINE_VISA_LETTER} from Russia 'Svetlana' ({SVETLANA_POZHIDAEVA}?)",
1247
1211
  date='2015-05-29',
1248
1212
  ),
1249
- FileCfg(
1213
+ DocCfg(
1250
1214
  id='019476',
1251
1215
  author=DAVID_BLAINE,
1252
1216
  description=f"{DAVID_BLAINE_VISA_LETTER} (probably {SVETLANA_POZHIDAEVA}?)",
1253
1217
  date='2015-06-01',
1254
1218
  ),
1255
- FileCfg(id='031670', description=f"letter from General Mike Flynn's lawyers to senators Mark Warner & Richard Burr about subpoena"),
1256
- FileCfg(
1219
+ DocCfg(id='031670', description=f"letter from General Mike Flynn's lawyers to senators Mark Warner & Richard Burr about subpoena"),
1220
+ DocCfg(
1257
1221
  id='026011',
1258
1222
  author='Gennady Mashtalyar',
1259
1223
  description=f"letter about algorithmic trading",
1260
1224
  date='2016-06-24', # date is based on Brexit reference but he could be backtesting,
1261
1225
  ),
1262
- FileCfg(id='026248', author='Don McGahn', description=f'letter from Trump lawyer to Devin Nunes (R-CA) about FISA courts and Trump'),
1263
- FileCfg(id='029301', author=MICHAEL_J_BOCCIO, description=f"letter from former lawyer at the Trump Organization", date='2011-08-07'),
1264
- FileCfg(id='022405', author=NOAM_CHOMSKY, description=f"letter attesting to Epstein's good character"),
1265
- FileCfg(id='026134', description=f'letter to someone named George about investment opportunities in the Ukraine banking sector'),
1266
- FileCfg(id='029304', description=f"Trump recommendation letter for recently departed Trump Organization lawyer {MICHAEL_J_BOCCIO}"),
1267
- FileCfg(id='026668', description=f"Boothbay Fund Management 2016-Q4 earnings report signed by Ari Glass"),
1268
-
1269
- # private placement memoranda
1270
- FileCfg(id='024432', description=f"Michael Milken's Knowledge Universe Education (KUE) $1,000,000 corporate share placement notice (SEC filing?)"),
1271
- FileCfg(id='024003', description=f"New Leaf Ventures private placement memorandum"),
1272
- FileCfg(id='018804', description=f"appraisal of going concern for IGY American Yacht Harbor Marina in {VIRGIN_ISLANDS}"),
1226
+ DocCfg(id='026248', author='Don McGahn', description=f'letter from Trump lawyer to Devin Nunes (R-CA) about FISA courts and Trump'),
1227
+ DocCfg(id='029301', author=MICHAEL_J_BOCCIO, description=f"letter from former lawyer at the Trump Organization", date='2011-08-07'),
1228
+ DocCfg(id='022405', author=NOAM_CHOMSKY, description=f"letter attesting to Epstein's good character"),
1229
+ DocCfg(id='026134', description=f'letter to someone named George about investment opportunities in the Ukraine banking sector'),
1230
+ DocCfg(id='029304', description=f"Trump recommendation letter for recently departed Trump Organization lawyer {MICHAEL_J_BOCCIO}"),
1231
+ DocCfg(id='026668', description=f"Boothbay Fund Management 2016-Q4 earnings report signed by Ari Glass"),
1232
+ ]
1273
1233
 
1274
- # property
1275
- FileCfg(id='018743', description=f"Las Vegas property listing"),
1276
- FileCfg(id='016597', description=f'letter from Trump Properties LLC appealing some decision about Mar-a-Lago by {PALM_BEACH} authorities'),
1277
- FileCfg(id='016602', description=PALM_BEACH_CODE_ENFORCEMENT, date='2008-04-17'),
1278
- FileCfg(id='016554', description=PALM_BEACH_CODE_ENFORCEMENT, date='2008-07-17', duplicate_ids=['016616', '016574']),
1279
- FileCfg(id='016695', description=f"{PALM_BEACH} property info (?)"),
1280
- FileCfg(id='016697', description=f"{PALM_BEACH} property tax info (?) that mentions Trump"),
1281
- FileCfg(id='016599', description=f"{PALM_BEACH_TSV} consumption (water?)"),
1282
- FileCfg(id='016600', description=f"{PALM_BEACH_TSV} consumption (water?)"),
1283
- FileCfg(id='016601', description=f"{PALM_BEACH_TSV} consumption (water?)"),
1284
- FileCfg(id='016694', description=f"{PALM_BEACH_TSV} consumption (water?)"),
1285
- FileCfg(id='016552', description=f"{PALM_BEACH_TSV} info"),
1286
- FileCfg(id='016698', description=f"{PALM_BEACH_TSV} info (broken?)"),
1287
- FileCfg(id='016696', description=f"{PALM_BEACH_TSV} info (water quality?"),
1288
- FileCfg(id='016636', description=f"{PALM_BEACH_WATER_COMMITTEE} Meeting on January 29, 2009"),
1289
- FileCfg(id='022417', description=f"Park Partners NYC letter to partners in real estate project with architectural plans"),
1290
- FileCfg(id='027068', author=THE_REAL_DEAL, description=THE_REAL_DEAL_ARTICLE),
1291
- FileCfg(id='029520', author=THE_REAL_DEAL, description=f"{THE_REAL_DEAL_ARTICLE} 'Lost Paradise at the Palm House'", date='2019-06-17'),
1292
- FileCfg(
1234
+ OTHER_FILES_PROPERTY = [
1235
+ DocCfg(id='026759', author='Great Bay Condominium Owners Association', description=f'{PRESS_RELEASE} by about Hurricane Irma damage', date='2017-09-13'),
1236
+ DocCfg(id='027068', author=THE_REAL_DEAL, description=f"{THE_REAL_DEAL_ARTICLE} Palm House Hotel Bankruptcy and EB-5 Visa Fraud Allegations"),
1237
+ DocCfg(id='029520', author=THE_REAL_DEAL, description=f"{THE_REAL_DEAL_ARTICLE} 'Lost Paradise at the Palm House'", date='2019-06-17'),
1238
+ DocCfg(id='018743', description=f"Las Vegas property listing"),
1239
+ DocCfg(id='016597', description=f'letter from Trump Properties LLC appealing some decision about Mar-a-Lago by {PALM_BEACH} authorities'),
1240
+ DocCfg(id='016602', description=PALM_BEACH_CODE_ENFORCEMENT, date='2008-04-17'),
1241
+ DocCfg(id='016554', description=PALM_BEACH_CODE_ENFORCEMENT, date='2008-07-17', duplicate_ids=['016616', '016574']),
1242
+ DocCfg(id='016695', description=f"{PALM_BEACH} property info (?)"),
1243
+ DocCfg(id='016697', description=f"{PALM_BEACH} property tax info (?) that mentions Trump"),
1244
+ DocCfg(id='016599', description=f"{PALM_BEACH_TSV} consumption (water?)"),
1245
+ DocCfg(id='016600', description=f"{PALM_BEACH_TSV} consumption (water?)"),
1246
+ DocCfg(id='016601', description=f"{PALM_BEACH_TSV} consumption (water?)"),
1247
+ DocCfg(id='016694', description=f"{PALM_BEACH_TSV} consumption (water?)"),
1248
+ DocCfg(id='016552', description=f"{PALM_BEACH_TSV} info"),
1249
+ DocCfg(id='016698', description=f"{PALM_BEACH_TSV} info (broken?)"),
1250
+ DocCfg(id='016696', description=f"{PALM_BEACH_TSV} info (water quality?"),
1251
+ DocCfg(id='016636', description=f"{PALM_BEACH_WATER_COMMITTEE} Meeting on January 29, 2009"),
1252
+ DocCfg(id='022417', description=f"Park Partners NYC letter to partners in real estate project with architectural plans"),
1253
+ DocCfg(
1293
1254
  id='018727',
1294
1255
  description=f"{VIRGIN_ISLANDS} property deal pitch deck, building will be leased to the U.S. govt GSA",
1295
1256
  date='2014-06-01',
1296
1257
  ),
1258
+ ]
1297
1259
 
1298
- # reputation management
1299
- FileCfg(id='026582', description=f"{REPUTATION_MGMT} Epstein's internet search results at start of reputation repair campaign, maybe from {OSBORNE_LLP}"),
1300
- FileCfg(id='030573', description=f"{REPUTATION_MGMT} Epstein's unflattering Google search results, maybe screenshot by {AL_SECKEL} or {OSBORNE_LLP}"),
1301
- FileCfg(id='030875', description=f"{REPUTATION_MGMT} Epstein's Wikipedia page"),
1302
- FileCfg(id='026583', description=f"{REPUTATION_MGMT} Google search results for '{JEFFREY_EPSTEIN}' with analysis ({OSBORNE_LLP}?)"),
1303
- FileCfg(id='029350', description=f"{REPUTATION_MGMT} Microsoft Bing search results for Epstein with sex offender at top, maybe from {TYLER_SHEARS}?"),
1304
- FileCfg(
1305
- id='030426',
1306
- description=f'{REPUTATION_MGMT} {OSBORNE_LLP} reputation repair proposal (cites Michael Milken)',
1307
- date='2011-06-14',
1308
- ),
1260
+ OTHER_FILES_REPUTATION = [
1261
+ DocCfg(id='026582', description=f"Epstein's internet search results at start of reputation repair campaign, maybe from {OSBORNE_LLP}"),
1262
+ DocCfg(id='030573', description=f"Epstein's unflattering Google search results, maybe screenshot by {AL_SECKEL} or {OSBORNE_LLP}"),
1263
+ DocCfg(id='030875', description=f"Epstein's Wikipedia page"),
1264
+ DocCfg(id='026583', description=f"Google search results for '{JEFFREY_EPSTEIN}' with analysis ({OSBORNE_LLP}?)"),
1265
+ DocCfg(id='029350', description=f"Microsoft Bing search results for Epstein with sex offender at top, maybe from {TYLER_SHEARS}?"),
1266
+ DocCfg(id='030426', description=f"{OSBORNE_LLP} reputation repair proposal (cites Michael Milken)", date='2011-06-14'),
1267
+ ]
1309
1268
 
1310
- # social media / InsightsPod
1311
- FileCfg(id='028815', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} business plan", date='2016-08-20'),
1312
- FileCfg(id='011170', author=ZUBAIR_AND_ANYA, description=f'{INSIGHTS_POD} collected tweets about #Brexit', date='2016-06-23'),
1313
- FileCfg(id='032324', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} election social media trend analysis", date='2016-11-05'),
1314
- FileCfg(id='032281', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} forecasting election for Trump", date='2016-10-25'),
1315
- FileCfg(id='028988', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} pitch deck", date='2016-08-20'),
1316
- FileCfg(id='026627', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} report on the presidential debate"),
1317
- FileCfg(id='023050', description=f"{DERSH_GIUFFRE_TWEET}"),
1318
- FileCfg(id='017787', description=f"{DERSH_GIUFFRE_TWEET}"),
1319
- FileCfg(id='033433', description=f"{DERSH_GIUFFRE_TWEET} / David Boies", date='2019-03-02'),
1320
- FileCfg(id='033432', description=f"{DERSH_GIUFFRE_TWEET} / David Boies", date='2019-05-02'),
1321
- FileCfg(id='022213', description=f"{SCREENSHOT} Facebook group called 'Shit Pilots Say' disparaging a 'global girl'"),
1322
- FileCfg(id='030884', description=f"{TWEET} by Ed Krassenstein"),
1323
- FileCfg(id='031546', description=f"{TWEET}s by Donald Trump about Russian collusion", date='2018-01-06'),
1324
- FileCfg(id='033236', description=f'{TWEET}s about Ivanka Trump in Arabic', date='2017-05-20'),
1269
+ # social media / InsightsPod
1270
+ OTHER_FILES_SOCIAL = [
1271
+ DocCfg(id='028815', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} business plan", date='2016-08-20'),
1272
+ DocCfg(id='011170', author=ZUBAIR_AND_ANYA, description=f'{INSIGHTS_POD} collected tweets about #Brexit', date='2016-06-23'),
1273
+ DocCfg(id='032324', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} election social media trend analysis", date='2016-11-05'),
1274
+ DocCfg(id='032281', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} forecasting election for Trump", date='2016-10-25'),
1275
+ DocCfg(id='028988', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} pitch deck", date='2016-08-20'),
1276
+ DocCfg(id='026627', author=ZUBAIR_AND_ANYA, description=f"{INSIGHTS_POD} report on the presidential debate"),
1277
+ DocCfg(id='023050', description=f"{DERSH_GIUFFRE_TWEET}"),
1278
+ DocCfg(id='017787', description=f"{DERSH_GIUFFRE_TWEET}"),
1279
+ DocCfg(id='033433', description=f"{DERSH_GIUFFRE_TWEET} / David Boies", date='2019-03-02'),
1280
+ DocCfg(id='033432', description=f"{DERSH_GIUFFRE_TWEET} / David Boies", date='2019-05-02'),
1281
+ DocCfg(id='022213', description=f"{SCREENSHOT} Facebook group called 'Shit Pilots Say' disparaging a 'global girl'"),
1282
+ DocCfg(id='030884', description=f"{TWEET} by Ed Krassenstein"),
1283
+ DocCfg(id='031546', description=f"{TWEET}s by Donald Trump about Russian collusion", date='2018-01-06'),
1284
+ DocCfg(id='033236', description=f'{TWEET}s about Ivanka Trump in Arabic', date='2017-05-20'),
1285
+ ]
1325
1286
 
1326
- # politics
1327
- FileCfg(id='029918', description=f"{DIANA_DEGETTES_CAMPAIGN} campaign bio", date='2012-01-01'),
1328
- FileCfg(id='031184', description=f"{DIANA_DEGETTES_CAMPAIGN} fundraiser invitation"),
1329
- FileCfg(id='027009', description=f"{EHUD_BARAK} speech to AIPAC", date='2013-03-03'),
1330
- FileCfg(id='026851', description=f"Politifact lying politicians chart", date='2016-07-26'),
1331
- FileCfg(
1287
+ OTHER_FILES_POLITICS = [
1288
+ DocCfg(id='026827', author='Scowcroft Group', description=f'report on ISIS', date='2015-11-14'),
1289
+ DocCfg(id='026851', author='Politifact', description=f"lying politicians chart", date='2016-07-26'),
1290
+ DocCfg(
1332
1291
  id='023133',
1333
- description=f'"The Search for Peace in the Arab-Israeli Conflict" edited by {TERJE_ROD_LARSEN}, Nur Laiq, Fabrice Aidan',
1292
+ author=f"{TERJE_ROD_LARSEN}, Nur Laiq, Fabrice Aidan",
1293
+ description=f'The Search for Peace in the Arab-Israeli Conflict',
1334
1294
  date='2019-12-09',
1335
1295
  ),
1336
- FileCfg(id='024294', description=f"{STACEY_PLASKETT} campaign flier", date='2016-10-01'),
1337
- FileCfg(
1296
+ DocCfg(
1297
+ id='025849',
1298
+ author='US Office of Government Information Services',
1299
+ description=f"Building a Bridge Between FOIA Requesters & Agencies",
1300
+ ),
1301
+ DocCfg(id='029918', description=f"{DIANA_DEGETTES_CAMPAIGN} bio", date='2012-01-01'),
1302
+ DocCfg(id='031184', description=f"{DIANA_DEGETTES_CAMPAIGN} fundraiser invitation"),
1303
+ DocCfg(id='024294', description=f"{STACEY_PLASKETT} campaign flier", date='2016-10-01'),
1304
+ DocCfg(
1338
1305
  id='029357',
1339
1306
  description=f"text about Israel's challenges going into 2015, feels like it was extracted from a book",
1340
1307
  date='2015-01-15', # TODO: this is just a guess
1341
1308
  duplicate_ids=['028887'],
1342
1309
  ),
1343
- FileCfg(id='010617', description=TRUMP_DISCLOSURES, date='2017-01-20'),
1344
- FileCfg(id='016699', description=TRUMP_DISCLOSURES, date='2017-01-20'),
1310
+ DocCfg(id='010617', description=TRUMP_DISCLOSURES, date='2017-01-20'),
1311
+ DocCfg(id='016699', description=TRUMP_DISCLOSURES, date='2017-01-20'),
1312
+ ]
1345
1313
 
1346
- # academia
1347
- FileCfg(id='024256', author=JOI_ITO, description=f"article 'Internet & Society: The Technologies and Politics of Control'"),
1348
- FileCfg(id='027004', author=JOSCHA_BACH, description=f"article 'The Computational Structure of Mental Representation'", date='2013-02-26'),
1349
- FileCfg(id='029539', author=LA_TIMES, description=f"Alan Trounson interview on California stem cell research and CIRM"),
1350
- FileCfg(id='027024', author=LA_TIMES, description=f"'Scientists Create Human Embryos to Make Stem Cells'", date='2013-05-15'),
1351
- FileCfg(id='026634', author='Michael Carrier', description=f"comments about an Apollo linked hedge fund 'DE Fund VIII'"),
1352
- FileCfg(id='015501', author=f"{MOSHE_HOFFMAN}, Erez Yoeli, and Carlos David Navarrete", description=f"'Game Theory and Morality'"),
1353
- FileCfg(id='025143', author=ROBERT_TRIVERS, description=f"'Africa, Parasites, Intelligence'", date='2018-06-25'),
1354
- FileCfg(id='029155', author=ROBERT_TRIVERS, description=f'response sent to the Gruterites ({GORDON_GETTY} fans)', date='2018-03-19'),
1355
- FileCfg(
1314
+ OTHER_FILES_ACADEMIA = [
1315
+ DocCfg(
1316
+ id='014697',
1317
+ author=f"{LAWRENCE_KRAUSS}'s ASU Origins Project",
1318
+ description=f'report: "Challenges of AI: Envisioning and Addressing Adverse Outcomes"',
1319
+ duplicate_ids=['011284']
1320
+ ),
1321
+ DocCfg(id='024256', author=JOI_ITO, description=f"Internet & Society: The Technologies and Politics of Control"),
1322
+ DocCfg(id='027004', author=JOSCHA_BACH, description=f"The Computational Structure of Mental Representation", date='2013-02-26'),
1323
+ DocCfg(id='015501', author=f"{MOSHE_HOFFMAN}, Erez Yoeli, and Carlos David Navarrete", description=f"Game Theory and Morality"),
1324
+ DocCfg(id='025143', author=ROBERT_TRIVERS, description=f"Africa, Parasites, Intelligence", date='2018-06-25'),
1325
+ DocCfg(id='029155', author=ROBERT_TRIVERS, description=f'response sent to the Gruterites ({GORDON_GETTY} fans)', date='2018-03-19'),
1326
+ DocCfg(
1356
1327
  id='033323',
1357
1328
  author=f"{ROBERT_TRIVERS} and Nathan H. Lents",
1358
- description=f"draft of 'Does Trump Fit the Evolutionary Role of Narcissistic Sociopath?",
1329
+ description=f"'Does Trump Fit the Evolutionary Role of Narcissistic Sociopath?' (draft)",
1359
1330
  date='2018-12-07',
1360
1331
  ),
1361
- FileCfg(id='014697', description=CHALLENGES_OF_AI, duplicate_ids=['011284']),
1362
- FileCfg(id='026521', description=f"game theory paper by {MARTIN_NOWAK}, Erez Yoeli, and Moshe Hoffman"),
1363
- FileCfg(id='023416', description=HARVARD_POETRY),
1364
- FileCfg(id='023435', description=HARVARD_POETRY),
1365
- FileCfg(id='023450', description=HARVARD_POETRY),
1366
- FileCfg(id='023452', description=HARVARD_POETRY),
1367
- FileCfg(id='029517', description=HARVARD_POETRY),
1368
- FileCfg(id='029543', description=HARVARD_POETRY),
1369
- FileCfg(id='029589', description=HARVARD_POETRY),
1370
- FileCfg(id='029603', description=HARVARD_POETRY),
1371
- FileCfg(id='029298', description=HARVARD_POETRY),
1372
- FileCfg(id='029592', description=HARVARD_POETRY),
1373
- FileCfg(id='019396', description=f'{HARVARD} Economics 1545 Professor Kenneth Rogoff syllabus'),
1374
- FileCfg(id='022445', description=f"Inference: International Review of Science Feedback & Comments", date='2018-11-01'),
1375
- FileCfg(id='029355', description=f'{SCREENSHOT} quote in book about {LARRY_SUMMERS}', duplicate_ids=['029356'], dupe_type='quoted'), # 029356 is zoomed in corner
1376
- FileCfg(id='026731', description=f"text of speech by Lord Martin Rees at first inaugural Carl Sagan Lecture at Cornell"),
1332
+ DocCfg(
1333
+ id='026521',
1334
+ author=f"{MARTIN_NOWAK}, Erez Yoeli, and {MOSHE_HOFFMAN}",
1335
+ description=f"Cooperating Without Looking: Game Theory Model of Trust and Reciprocal Cooperation"
1336
+ ),
1337
+ DocCfg(id='023416', description=HARVARD_POETRY),
1338
+ DocCfg(id='023435', description=HARVARD_POETRY),
1339
+ DocCfg(id='023450', description=HARVARD_POETRY),
1340
+ DocCfg(id='023452', description=HARVARD_POETRY),
1341
+ DocCfg(id='029517', description=HARVARD_POETRY),
1342
+ DocCfg(id='029543', description=HARVARD_POETRY),
1343
+ DocCfg(id='029589', description=HARVARD_POETRY),
1344
+ DocCfg(id='029603', description=HARVARD_POETRY),
1345
+ DocCfg(id='029298', description=HARVARD_POETRY),
1346
+ DocCfg(id='029592', description=HARVARD_POETRY),
1347
+ DocCfg(id='019396', description=f'{HARVARD} Economics 1545 Professor Kenneth Rogoff syllabus'),
1348
+ DocCfg(id='022445', description=f"Inference: International Review of Science Feedback & Comments", date='2018-11-01'),
1349
+ DocCfg(id='029355', description=f'{SCREENSHOT} quote in book about {LARRY_SUMMERS}', duplicate_ids=['029356'], dupe_type='quoted'), # 029356 is zoomed in corner
1350
+ ]
1377
1351
 
1378
- # resumes and application letters
1379
- FileCfg(id='029102', description=HBS_APPLICATION_NERIO),
1380
- FileCfg(id='029104', description=HBS_APPLICATION_NERIO),
1381
- FileCfg(id='022367', description=f"{RESUME_OF} Jack J Grynberg", date='2014-07-01'),
1382
- FileCfg(
1352
+ OTHER_FILES_SPEECH = [
1353
+ DocCfg(id='027009', author=EHUD_BARAK, description=f"speech to AIPAC", date='2013-03-03'),
1354
+ DocCfg(id='026856', author='Kevin Rudd', description=f"speech 'Xi Jinping, China And The Global Order'", date='2018-06-26'),
1355
+ DocCfg(id='026731', description=f"speech by Lord Martin Rees at first inaugural Carl Sagan Lecture at Cornell"),
1356
+ ]
1357
+
1358
+ # resumes and application letters
1359
+ OTHER_FILES_RESUMES = [
1360
+ DocCfg(id='029102', description=HBS_APPLICATION_NERIO),
1361
+ DocCfg(id='029104', description=HBS_APPLICATION_NERIO),
1362
+ DocCfg(id='022367', description=f"{RESUME_OF} Jack J Grynberg", date='2014-07-01'),
1363
+ DocCfg(
1383
1364
  id='029302',
1384
1365
  description=f"{RESUME_OF} {MICHAEL_J_BOCCIO}, former lawyer at the Trump Organization",
1385
1366
  date='2011-08-07',
1386
1367
  ),
1387
- FileCfg(id='015671', description=f"{RESUME_OF} Robin Solomon", date='2015-06-02'), # She left Mount Sinai at some point in 2015,
1388
- FileCfg(id='015672', description=f"{RESUME_OF} Robin Solomon", date='2015-06-02'), # She left Mount Sinai at some point in 2015,
1389
- FileCfg(id='029623', description=f'short bio of Kathleen Harrington, Founding Partner, C/H Global Strategies'),
1368
+ DocCfg(id='015671', description=f"{RESUME_OF} Robin Solomon", date='2015-06-02'), # She left Mount Sinai at some point in 2015,
1369
+ DocCfg(id='015672', description=f"{RESUME_OF} Robin Solomon", date='2015-06-02'), # She left Mount Sinai at some point in 2015,
1370
+ DocCfg(id='029623', description=f'short bio of Kathleen Harrington, Founding Partner, C/H Global Strategies'),
1371
+ ]
1390
1372
 
1391
- # arts
1392
- FileCfg(id='018703', author=ANDRES_SERRANO, description=f"artist statement about Trump objects"),
1393
- FileCfg(id='028281', description=f'art show flier for "The House Of The Nobleman" curated by Wolfe Von Lenkiewicz & Victoria Golembiovskaya'),
1394
- FileCfg(
1373
+ OTHER_FILES_ARTS = [
1374
+ DocCfg(id='018703', author=ANDRES_SERRANO, description=f"artist statement about Trump objects"),
1375
+ DocCfg(id='023438', author=BROCKMAN_INC, description=f"announcement of auction of 'Noise' by Daniel Kahneman, Olivier Sibony, and Cass Sunstein"),
1376
+ DocCfg(id='030769', description=f"2017 Independent Filmmaker Project (IFP) Gotham Awards invitation"),
1377
+ DocCfg(id='028281', description=f'art show flier for "The House Of The Nobleman" curated by Wolfe Von Lenkiewicz & Victoria Golembiovskaya'),
1378
+ DocCfg(
1395
1379
  id='025205',
1396
1380
  description=f'Mercury Films partner profiles of Jennifer Baichwal, Nicholas de Pencier, Kermit Blackwood, Travis Rummel',
1397
1381
  date='2010-02-01',
1398
1382
  duplicate_ids=['025210']
1399
1383
  ),
1384
+ ]
1400
1385
 
1401
- # misc
1402
- FileCfg(id='031743', description=f'a few pages describing the internet as a "New Nation State" (Network State?)'),
1403
- FileCfg(id='023438', description=f"Brockman announcemeent of auction of 'Noise' by Daniel Kahneman, Olivier Sibony, and Cass Sunstein"),
1404
- FileCfg(
1386
+ OTHER_FILES_MISC = [
1387
+ DocCfg(id='022780', category=FLIGHT_LOGS),
1388
+ DocCfg(id='022816', category=FLIGHT_LOGS),
1389
+ DocCfg(
1405
1390
  id='025147',
1406
- description=f'Brockman hot list Frankfurt Book Fair (includes article about Silk Road/Ross Ulbricht)',
1391
+ author=BROCKMAN_INC,
1392
+ description=f'hot list Frankfurt Book Fair (includes article about Silk Road/Ross Ulbricht)',
1407
1393
  date='2016-10-23',
1408
1394
  ),
1409
- FileCfg(id='031425', description=f'completely redacted email from {SCOTT_J_LINK}'),
1410
- FileCfg(id='018224', description=f"conversation with {LAWRENCE_KRAUSS}?"),
1411
- FileCfg(id='012718', description=f"{CVRA} congressional record", date='2011-06-17'),
1412
- FileCfg(id='025540', description=f"Epstein's rough draft of his side of the story?"),
1413
- FileCfg(id='024117', description=f"FAQ about anti-money laundering (AML) and terrorist financing (CFT) laws in the U.S."),
1414
- FileCfg(id='027071', description=f"{FEMALE_HEALTH_COMPANY} brochure request donations for female condoms in Uganda"),
1415
- FileCfg(id='027074', description=f"{FEMALE_HEALTH_COMPANY} pitch deck (USAID was a customer)"),
1416
- FileCfg(id='022780', description=FLIGHT_LOGS,),
1417
- FileCfg(id='022816', description=FLIGHT_LOGS,),
1418
- FileCfg(id='022494', description=f'Foreign Corrupt Practices Act (FCPA) DOJ Resource Guide'),
1419
- FileCfg(id='032735', description=f"{GORDON_GETTY} on Trump", date='2018-03-20'), # Dated based on concurrent emails from Getty
1420
- FileCfg(
1395
+ DocCfg(id='023096', author=EPSTEIN_FOUNDATION, description=f'blog post', date='2012-11-15'),
1396
+ DocCfg(id='029326', author=EPSTEIN_FOUNDATION, description=f'{PRESS_RELEASE}', date='2013-02-15'),
1397
+ DocCfg(id='026565', author=EPSTEIN_FOUNDATION, description=f'{PRESS_RELEASE}, maybe a draft of 029326', date='2013-02-15'),
1398
+ DocCfg(id='026634', author='Michael Carrier', description=f"comments about an Apollo linked hedge fund 'DE Fund VIII'"),
1399
+ DocCfg(id='020447', author='Working Group on Chinese Influence Activities in the U.S.', description=f'Promoting Constructive Vigilance'),
1400
+ DocCfg(id='031743', description=f'a few pages describing the internet as a "New Nation State" (Network State?)'),
1401
+ DocCfg(id='031425', description=f'completely redacted email from {SCOTT_J_LINK}'),
1402
+ DocCfg(id='018224', description=f"conversation with {LAWRENCE_KRAUSS}?"),
1403
+ DocCfg(id='012718', description=f"{CVRA} congressional record", date='2011-06-17'),
1404
+ DocCfg(id='025540', description=f"Epstein's rough draft of his side of the story?"),
1405
+ DocCfg(id='024117', description=f"FAQ about anti-money laundering (AML) and terrorist financing (CFT) law in the U.S."),
1406
+ DocCfg(id='027071', description=f"{FEMALE_HEALTH_COMPANY} brochure requesting donations for female condoms in Uganda"),
1407
+ DocCfg(id='027074', description=f"{FEMALE_HEALTH_COMPANY} pitch deck (USAID was a customer)"),
1408
+ DocCfg(id='022494', description=f'Foreign Corrupt Practices Act (FCPA) DOJ Resource Guide'),
1409
+ DocCfg(id='032735', description=f"{GORDON_GETTY} on Trump", date='2018-03-20'), # Dated based on concurrent emails from Getty
1410
+ DocCfg(id='019448', description=f"Haitian business investment proposal called Jacmel"),
1411
+ DocCfg(id='023644', description=f"interview with Mohammed bin Salman", date='2016-04-25'),
1412
+ DocCfg(
1421
1413
  id='030142',
1422
1414
  description=f"{JASTA} (Justice Against Sponsors of Terrorism Act) doc that's mostly empty, references suit against Saudi f. {KATHRYN_RUEMMLER} & {KEN_STARR}",
1423
1415
  date='2016-09-01',
1424
1416
  ),
1425
- FileCfg(id='019448', description=f"Haitian business investment proposal called Jacmel"),
1426
- FileCfg(id='029328', description=f"Rafanelli Events promotional deck"),
1427
- FileCfg(id='033434', description=f"{SCREENSHOT} iPhone chat labeled 'Edwards' at the top"),
1428
- FileCfg(id='023644', description=f"transcription of an interview with MBS from Saudi", date='2016-04-25'),
1429
- FileCfg(id='029475', description=f'{VIRGIN_ISLANDS} Twin City Mobile Integrated Health Services (TCMIH) proposal/request for donation'),
1430
- FileCfg(id='029448', description=f"weird short essay titled 'President Obama and Self-Deception'"),
1417
+ DocCfg(
1418
+ id='033338',
1419
+ description=f"{PRESS_RELEASE} announcing Donald Trump & {NICHOLAS_RIBIS} ended their working relationship at Trump's casino",
1420
+ date='2000-06-07',
1421
+ ),
1422
+ DocCfg(id='029328', description=f"Rafanelli Events promotional deck"),
1423
+ DocCfg(id='033434', description=f"{SCREENSHOT} iPhone chat labeled 'Edwards' at the top"),
1424
+ DocCfg(id='029475', description=f'{VIRGIN_ISLANDS} Twin City Mobile Integrated Health Services (TCMIH) proposal/request for donation'),
1425
+ DocCfg(id='029448', description=f"weird short essay titled 'President Obama and Self-Deception'"),
1426
+ ]
1431
1427
 
1432
- # junk
1433
- FileCfg(id='026678', description=f"fragment of image metadata {QUESTION_MARKS}", date='2017-06-29'),
1434
- FileCfg(id='022986', description=f"fragment of a screenshot {QUESTION_MARKS}"),
1435
- FileCfg(id='033478', description=f'{MEME} Kim Jong Un reading {FIRE_AND_FURY}', date='2018-01-05', duplicate_ids=['032713']),
1436
- FileCfg(id='033177', description=f"{MEME} Trump with text 'WOULD YOU TRUST THIS MAN WITH YOUR DAUGHTER?'"),
1437
- FileCfg(id='029564', description=OBAMA_JOKE, date='2013-07-26'),
1438
- FileCfg(id='029353', description=OBAMA_JOKE, date='2013-07-26'),
1439
- FileCfg(id='029352', description=OBAMA_JOKE, date='2013-07-26'),
1440
- FileCfg(id='029351', description=OBAMA_JOKE, date='2013-07-26'),
1441
- FileCfg(id='029354', description=OBAMA_JOKE, date='2013-07-26'),
1428
+ OTHER_FILES_JUNK = [
1429
+ DocCfg(id='026678', description=f"fragment of image metadata {QUESTION_MARKS}", date='2017-06-29'),
1430
+ DocCfg(id='022986', description=f"fragment of a screenshot {QUESTION_MARKS}"),
1431
+ DocCfg(id='033478', description=f'{MEME} Kim Jong Un reading {FIRE_AND_FURY}', date='2018-01-05', duplicate_ids=['032713']),
1432
+ DocCfg(id='033177', description=f"{MEME} Trump with text 'WOULD YOU TRUST THIS MAN WITH YOUR DAUGHTER?'"),
1433
+ DocCfg(id='029564', description=OBAMA_JOKE, date='2013-07-26'),
1434
+ DocCfg(id='029353', description=OBAMA_JOKE, date='2013-07-26'),
1435
+ DocCfg(id='029352', description=OBAMA_JOKE, date='2013-07-26'),
1436
+ DocCfg(id='029351', description=OBAMA_JOKE, date='2013-07-26'),
1437
+ DocCfg(id='029354', description=OBAMA_JOKE, date='2013-07-26'),
1442
1438
  ]
1443
1439
 
1444
- # Create a dict keyed by file_id
1440
+ OTHER_FILES_CATEGORIES = [
1441
+ ACADEMIA,
1442
+ f"{ARTICLE}s",
1443
+ ARTS,
1444
+ f"{BOOK}s",
1445
+ f"{CONFERENCE}s",
1446
+ FINANCE,
1447
+ JUNK,
1448
+ LEGAL,
1449
+ 'LETTERS',
1450
+ 'MISC',
1451
+ POLITICS,
1452
+ PROPERTY,
1453
+ REPUTATION,
1454
+ 'RESUMES',
1455
+ SOCIAL,
1456
+ SPEECH,
1457
+ ]
1458
+
1459
+ OTHER_FILES_CONFIG = []
1460
+
1461
+ # Collect all OTHER_FILES_ configs into OTHER_FILES_CONFIG
1462
+ for category in OTHER_FILES_CATEGORIES:
1463
+ configs = locals()[f"OTHER_FILES_{category.upper()}"]
1464
+ OTHER_FILES_CONFIG.extend(configs)
1465
+ category = category.lower()
1466
+ category = category if category in [ARTS, POLITICS] else category.removesuffix('s')
1467
+
1468
+ # Inject category field
1469
+ for cfg in configs:
1470
+ cfg.category = cfg.category or category
1471
+
1445
1472
  ALL_CONFIGS = TEXTS_CONFIG + EMAILS_CONFIG + OTHER_FILES_CONFIG
1446
- ALL_FILE_CONFIGS: dict[str, FileCfg] = {}
1473
+ ALL_FILE_CONFIGS: dict[str, DocCfg] = {}
1447
1474
 
1448
- # Add extra config objects for duplicate files that match the config of file they are duplicating
1475
+ # Create a dict keyed by file_id
1449
1476
  for cfg in ALL_CONFIGS:
1450
1477
  ALL_FILE_CONFIGS[cfg.id] = cfg
1451
1478
 
1479
+ # Add extra config objects for duplicate files that match the config of file they are duplicating
1452
1480
  for dupe_cfg in cfg.duplicate_cfgs():
1453
1481
  ALL_FILE_CONFIGS[dupe_cfg.id] = dupe_cfg
1454
1482
 
1455
1483
 
1456
- # OtherFiles whose description/hints match these prefixes are not displayed unless --all-other-files is used
1457
- UNINTERESTING_PREFIXES = [
1458
- 'article about',
1459
- ARTICLE_DRAFT,
1460
- 'Aviation International',
1461
- BBC,
1462
- BLOOMBERG,
1463
- BOFA,
1464
- BOFA_MERRILL,
1465
- BOOK,
1466
- 'Boston Globe',
1467
- 'Brockman',
1468
- CHALLENGES_OF_AI,
1469
- CHINA_DAILY,
1470
- CNN,
1471
- 'completely redacted',
1472
- CVRA,
1473
- DAILY_MAIL,
1474
- DAILY_TELEGRAPH,
1475
- DAVID_SCHOEN_CVRA_LEXIS_SEARCH[0:-12], # Because date at end :(
1476
- DEEP_THINKING_HINT,
1477
- DERSH_GIUFFRE_TWEET,
1478
- DEUTSCHE_BANK,
1479
- 'Forbes',
1480
- 'fragment',
1481
- 'Frontlines',
1482
- 'Future Science',
1483
- 'Globe and Mail',
1484
- GOLDMAN_REPORT,
1485
- GORDON_GETTY,
1486
- f"{HARVARD} Econ",
1487
- HARVARD_POETRY,
1488
- 'Inference',
1489
- 'Invesco',
1490
- JAMES_PATTERSON,
1491
- JASTA,
1492
- 'JetGala',
1493
- JOHN_BOLTON_PRESS_CLIPPING,
1494
- 'Journal of Criminal',
1495
- JP_MORGAN,
1496
- LA_TIMES,
1497
- 'Litigation Daily',
1498
- 'MarketWatch',
1499
- MEME,
1500
- 'Morgan Stanley',
1501
- NOBEL_CHARITABLE_TRUST,
1502
- 'Nautilus',
1503
- 'New Yorker',
1504
- NIGHT_FLIGHT_HINT,
1505
- NYT_ARTICLE,
1506
- NYT_COLUMN,
1507
- OBAMA_JOKE,
1508
- PALM_BEACH_CODE_ENFORCEMENT,
1509
- PALM_BEACH_DAILY_ARTICLE,
1510
- PALM_BEACH_POST_ARTICLE,
1511
- PALM_BEACH_TSV,
1512
- PALM_BEACH_WATER_COMMITTEE,
1513
- PAUL_KRASSNER,
1514
- PEGGY_SIEGAL,
1515
- 'Politifact',
1516
- 'Rafanelli',
1517
- REDACTED,
1518
- ROBERT_LAWRENCE_KUHN,
1519
- ROBERT_TRIVERS,
1520
- 'S&P',
1521
- 'Sadis',
1522
- 'SCMP',
1523
- 'SciencExpress',
1524
- 'Scowcroft',
1525
- SHIMON_POST_ARTICLE,
1526
- SINGLE_PAGE,
1527
- STACEY_PLASKETT,
1528
- TEXT_OF_US_LAW,
1529
- TRANSLATION,
1530
- TWEET,
1531
- THE_REAL_DEAL_ARTICLE,
1532
- TRUMP_DISCLOSURES,
1533
- UBS_CIO_REPORT,
1534
- 'U.S. News',
1535
- 'US Office',
1536
- 'USA Inc',
1537
- 'Vanity Fair',
1538
- VI_DAILY_NEWS_ARTICLE,
1539
- WAPO,
1540
- ]
1484
+ # Email related regexes (have to be here for circular dependencies reasons)
1485
+ FORWARDED_LINE_PATTERN = r"-+ ?(Forwarded|Original)\s*Message ?-*|Begin forwarded message:?"
1486
+ REPLY_LINE_IN_A_MSG_PATTERN = r"In a message dated \d+/\d+/\d+.*writes:"
1487
+ REPLY_LINE_ENDING_PATTERN = r"[_ \n](AM|PM|[<_]|wrote:?)"
1488
+ REPLY_LINE_ON_NUMERIC_DATE_PATTERN = fr"On \d+/\d+/\d+[, ].*{REPLY_LINE_ENDING_PATTERN}"
1489
+ REPLY_LINE_ON_DATE_PATTERN = fr"^On (\d+ )?((Mon|Tues?|Wed(nes)?|Thu(rs)?|Fri|Sat(ur)?|Sun)(day)?|(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*)[, ].*{REPLY_LINE_ENDING_PATTERN}"
1490
+ REPLY_LINE_PATTERN = rf"({REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
1491
+ REPLY_REGEX = re.compile(REPLY_LINE_PATTERN, re.IGNORECASE | re.MULTILINE)
1492
+ SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?(Sent (from|via).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)', re.M | re.I)
1541
1493
 
1542
1494
 
1543
1495
  # Error checking.
1496
+ assert len(OTHER_FILES_CONFIG) == 438
1544
1497
  encountered_file_ids = set()
1545
1498
 
1546
1499
  for cfg in ALL_CONFIGS: