epstein-files 1.0.16__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,8 @@ HEADER_ABBREVIATIONS = {
25
25
  'Jagland': 'Thorbjørn Jagland (former Norwegian prime minister)',
26
26
  'JEGE': "Epstein's airplane holding company",
27
27
  'Jeffrey Wernick': 'right wing crypto bro, former COO of Parler',
28
- 'Joi': 'Joi Ito (MIT Media Lab, MIT Digital Currency Initiative)',
29
- "Hoffenberg": "Steven Hoffenberg (Epstein's ponzi scheme partner)",
28
+ 'Joi': f"Joi Ito ({MIT_MEDIA_LAB}, MIT Digital Currency Initiative)",
29
+ "Hoffenberg": f"{STEVEN_HOFFENBERG} (Epstein's ponzi scheme partner)",
30
30
  'KSA': "Kingdom of Saudi Arabia",
31
31
  'Kurz': 'Sebastian Kurz (former Austrian Chancellor)',
32
32
  'Kwok': "Chinese criminal Miles Kwok AKA Miles Guo AKA Guo Wengui",
@@ -91,17 +91,17 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
91
91
  JOI_ITO: re.compile(r'ji@media.mit.?edu|(joichi|joi)( Ito)?', re.IGNORECASE),
92
92
  JONATHAN_FARKAS: re.compile(r'Jonathan Farka(s|il)', re.IGNORECASE),
93
93
  KATHRYN_RUEMMLER: re.compile(r'Kathr?yn? Ruemmler?', re.IGNORECASE),
94
- KEN_STARR: re.compile(r'starr, ken|Ken(neth W.)?\s+starr?|starr', re.IGNORECASE),
94
+ KEN_STARR: re.compile(r'starr, ken|Ken(neth\s*(W.\s*)?)?\s+starr?|starr', re.IGNORECASE),
95
95
  LANDON_THOMAS: re.compile(r'lando[nr] thomas( jr)?|thomas jr.?, lando[nr]', re.IGNORECASE),
96
96
  LARRY_SUMMERS: re.compile(r'(La(wrence|rry).{1,5})?Summers?|^LH$|LHS|Ihsofficel', re.IGNORECASE),
97
97
  LAWRANCE_VISOSKI: re.compile(r'La(rry|wrance) Visoski?|Lvjet', re.IGNORECASE),
98
- LAWRENCE_KRAUSS: re.compile(r'Lawrence Kraus|[jl]awkrauss|kruase', re.IGNORECASE),
99
- LEON_BLACK: re.compile(r'Leon Black?', re.IGNORECASE),
98
+ LAWRENCE_KRAUSS: re.compile(r'Lawrence Kraus[es]?|[jl]awkrauss|kruase', re.IGNORECASE),
99
+ LEON_BLACK: re.compile(r'Leon\s*Black?|(?<!Marc )Leon(?! (Botstein|Jaworski|Wieseltier))', re.IGNORECASE),
100
+ LILLY_SANCHEZ: re.compile(r'Lilly.*Sanchez', re.IGNORECASE),
101
+ LISA_NEW: re.compile(r'E?Lisa New?\b', re.IGNORECASE),
100
102
  MANUELA_MARTINEZ: re.compile(fr'Manuela (- Mega Partners|Martinez)', re.IGNORECASE),
101
103
  MARIANA_IDZKOWSKA: re.compile(r'Mariana [Il]d[źi]kowska?', re.IGNORECASE),
102
104
  MARK_EPSTEIN: re.compile(r'Mark (L\. )?Epstein', re.IGNORECASE),
103
- LILLY_SANCHEZ: re.compile(r'Lilly.*Sanchez', re.IGNORECASE),
104
- LISA_NEW: re.compile(r'E?Lisa New?\b', re.IGNORECASE),
105
105
  MARC_LEON: re.compile(r'Marc[.\s]+(Kensington|Leon)|Kensington2', re.IGNORECASE),
106
106
  MARTIN_NOWAK: re.compile(r'(Martin.*?)?No[vw]ak|Nowak, Martin', re.IGNORECASE),
107
107
  MARTIN_WEINBERG: re.compile(r'martin.*?weinberg', re.IGNORECASE),
@@ -128,10 +128,10 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
128
128
  PRINCE_ANDREW: re.compile(r'Prince Andrew|The Duke', re.IGNORECASE),
129
129
  REID_WEINGARTEN: re.compile(r'Weingarten, Rei[cdi]|Rei[cdi] Weingarten', re.IGNORECASE),
130
130
  RICHARD_KAHN: re.compile(r'rich(ard)? kahn?', re.IGNORECASE),
131
- ROBERT_D_CRITTON_JR: re.compile(r'Robert D.? Critton Jr.?', re.IGNORECASE),
131
+ ROBERT_D_CRITTON_JR: re.compile(r'Robert D.? Critton,? Jr.?', re.IGNORECASE),
132
132
  ROBERT_LAWRENCE_KUHN: re.compile(r'Robert\s*(Lawrence)?\s*Kuhn', re.IGNORECASE),
133
133
  ROBERT_TRIVERS: re.compile(r'tri[vy]ersr@gmail|Robert\s*Trivers?', re.IGNORECASE),
134
- ROSS_GOW: re.compile(fr"{ROSS_GOW}|ross@acuityreputation.com", re.IGNORECASE),
134
+ ROSS_GOW: re.compile(fr"Ross(acuity)? Gow|(ross@)?acuity\s*reputation(\.com)?", re.IGNORECASE),
135
135
  SAMUEL_LEFF: re.compile(r"Sam(uel)?(/Walli)? Leff", re.IGNORECASE),
136
136
  SCOTT_J_LINK: re.compile(r'scott j. link?', re.IGNORECASE),
137
137
  SEAN_BANNON: re.compile(r'sean bannon?', re.IGNORECASE),
@@ -145,7 +145,8 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
145
145
  TERRY_KAFKA: re.compile(r'Terry Kafka?', re.IGNORECASE),
146
146
  THANU_BOONYAWATANA: re.compile(r"Thanu (BOONYAWATANA|Cnx)", re.IGNORECASE),
147
147
  THORBJORN_JAGLAND: re.compile(r'(Thor.{3,8})?Jag[il]and?', re.IGNORECASE),
148
- TONJA_HADDAD_COLEMAN: re.compile(fr"To(nj|rl)a Haddad Coleman|haddadfm@aol.com", re.IGNORECASE)
148
+ TONJA_HADDAD_COLEMAN: re.compile(r"To(nj|rl)a Haddad Coleman|haddadfm@aol.com", re.IGNORECASE),
149
+ VINCENZO_IOZZO: re.compile(r"Vincenzo [IL]ozzo", re.IGNORECASE),
149
150
  }
150
151
 
151
152
  # If found as substring consider them the author
@@ -194,7 +195,6 @@ EMAILERS = [
194
195
  'Steven Victor MD',
195
196
  'Susan Edelman',
196
197
  TOM_BARRACK,
197
- 'Vincenzo Lozzo',
198
198
  'Vladimir Yudashkin',
199
199
  ]
200
200
 
@@ -387,6 +387,7 @@ EMAILS_CONFIG = [
387
387
  EmailCfg(
388
388
  id='023208',
389
389
  author=JEFFREY_EPSTEIN,
390
+ description=f"very long email chain about Leon Black's finances and things like Gratitude America",
390
391
  fwded_text_after='Date: Tue, Oct 27',
391
392
  recipients=[BRAD_WECHSLER, MELANIE_SPINELLA],
392
393
  duplicate_ids=['023291'],
@@ -499,7 +500,7 @@ EMAILS_CONFIG = [
499
500
  author=STEVEN_HOFFENBERG,
500
501
  recipients=["Players2"],
501
502
  timestamp=parse('2016-08-11 09:36:01'),
502
- attribution_reason='Actually a fwd by Charles Michael but Hoffenberg email more interesting',
503
+ attribution_reason=f"Actually a fwd by Charles Michael but {STEVEN_HOFFENBERG} email more interesting",
503
504
  ),
504
505
  EmailCfg(
505
506
  id='026620',
@@ -859,7 +860,6 @@ UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
859
860
  WOMEN_EMPOWERMENT = f"Women Empowerment (WE) conference"
860
861
  ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
861
862
 
862
-
863
863
  OTHER_FILES_BOOKS = [
864
864
  DocCfg(id='017088', author=ALAN_DERSHOWITZ, description=f'"Taking the Stand: My Life in the Law" (draft)'),
865
865
  DocCfg(id='013501', author='Arnold J. Mandell', description=f'The Nearness Of Grace: A Personal Science Of Spiritual Transformation', date='2005-01-01'),
@@ -1139,11 +1139,7 @@ OTHER_FILES_LEGAL = [
1139
1139
  DocCfg(id='025353', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-05-19', duplicate_ids=['010723', '019224'], dupe_type='redacted'),
1140
1140
  DocCfg(id='025704', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-05-27', duplicate_ids=['010732', '019221'], dupe_type='redacted'),
1141
1141
  DocCfg(id='012130', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-06-19', duplicate_ids=['012135']),
1142
- DocCfg(
1143
- id='031447',
1144
- author=MARTIN_WEINBERG,
1145
- description=f"letter from to Melanie Ann Pustay and Sean O'Neill re: an Epstein FOIA request"
1146
- ),
1142
+ DocCfg(id='031447', author=MARTIN_WEINBERG, description=f"letter from to Melanie Ann Pustay & Sean O'Neill re: Epstein FOIA request"),
1147
1143
  DocCfg(
1148
1144
  id='028965',
1149
1145
  author=MARTIN_WEINBERG,
@@ -1223,7 +1219,7 @@ OTHER_FILES_CONFERENCES = [
1223
1219
  OTHER_FILES_FINANCE = [
1224
1220
  DocCfg(id='024631', author='Ackrell Capital', description=f"Cannabis Investment Report 2018", is_interesting=True),
1225
1221
  DocCfg(id='016111', author=BOFA_MERRILL, description=f"GEMs Paper #26 Saudi Arabia: beyond oil but not so fast", date='2016-06-30'),
1226
- DocCfg(id='010609', author=BOFA_MERRILL, description=f"Liquid Insight Trump\'s effect on MXN", date='2016-09-22'),
1222
+ DocCfg(id='010609', author=BOFA_MERRILL, description=f"Liquid Insight Trump's effect on MXN", date='2016-09-22'),
1227
1223
  DocCfg(id='025978', author=BOFA_MERRILL, description=f"Understanding when risk parity risk Increases", date='2016-08-09'),
1228
1224
  DocCfg(id='014404', author=BOFA_MERRILL, description=f'Japan Investment Strategy Report', date='2016-11-18'),
1229
1225
  DocCfg(id='014410', author=BOFA_MERRILL, description=f'Japan Investment Strategy Report', date='2016-11-18'),
@@ -1515,8 +1511,8 @@ OTHER_FILES_ARTS = [
1515
1511
  ]
1516
1512
 
1517
1513
  OTHER_FILES_MISC = [
1518
- DocCfg(id='022780', category=FLIGHT_LOGS),
1519
- DocCfg(id='022816', category=FLIGHT_LOGS),
1514
+ DocCfg(id='022780', category=FLIGHT_LOG),
1515
+ DocCfg(id='022816', category=FLIGHT_LOG),
1520
1516
  DocCfg(id='032206', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
1521
1517
  DocCfg(id='032208', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
1522
1518
  DocCfg(id='032209', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
@@ -1541,7 +1537,7 @@ OTHER_FILES_MISC = [
1541
1537
  DocCfg(id='031743', description=f'a few pages describing the internet as a "New Nation State" (Network State?)'),
1542
1538
  DocCfg(id='012718', description=f"{CVRA} congressional record", date='2011-06-17'),
1543
1539
  DocCfg(id='024117', description=f"FAQ about anti-money laundering (AML) and terrorist financing (CFT) law in the U.S."),
1544
- DocCfg(id='019448', description=f"Haitian business investment proposal called Jacmel"),
1540
+ DocCfg(id='019448', description=f"Haitian business investment proposal called Jacmel", attached_to_email_id='019448'),
1545
1541
  DocCfg(id='023644', description=f"interview with Mohammed bin Salman", date='2016-04-25'),
1546
1542
  DocCfg(
1547
1543
  id='030142',
epstein_files/util/env.py CHANGED
@@ -2,14 +2,27 @@ import logging
2
2
  from argparse import ArgumentParser
3
3
  from os import environ
4
4
  from pathlib import Path
5
- from sys import argv, exit
6
5
 
7
6
  from rich_argparse_plus import RichHelpFormatterPlus
8
7
 
9
- from epstein_files.util.logging import env_log_level, logger
8
+ from epstein_files.util.logging import env_log_level, exit_with_error, logger
10
9
 
11
10
  DEFAULT_WIDTH = 145
12
- HTML_SCRIPTS = ['epstein_generate', 'epstein_word_count']
11
+ EPSTEIN_GENERATE = 'epstein_generate'
12
+ HTML_SCRIPTS = [EPSTEIN_GENERATE, 'epstein_word_count']
13
+
14
+ # Verify Epstein docs dir exists
15
+ EPSTEIN_DOCS_DIR_ENV_VAR_NAME = 'EPSTEIN_DOCS_DIR'
16
+ DOCS_DIR_ENV = environ.get(EPSTEIN_DOCS_DIR_ENV_VAR_NAME)
17
+ DOCS_DIR = Path(DOCS_DIR_ENV or '').resolve()
18
+
19
+ if not DOCS_DIR_ENV:
20
+ exit_with_error(f"{EPSTEIN_DOCS_DIR_ENV_VAR_NAME} env var not set!\n")
21
+ elif not DOCS_DIR.exists():
22
+ exit_with_error(f"{EPSTEIN_DOCS_DIR_ENV_VAR_NAME}='{DOCS_DIR}' does not exist!\n")
23
+
24
+ is_env_var_set = lambda s: len(environ.get(s) or '') > 0
25
+ is_output_arg = lambda arg: any([arg.startswith(pfx) for pfx in ['colors_only', 'json', 'make_clean', 'output']])
13
26
 
14
27
 
15
28
  RichHelpFormatterPlus.choose_theme('morning_glory')
@@ -21,13 +34,14 @@ parser.add_argument('--overwrite-pickle', '-op', action='store_true', help='re-p
21
34
  output = parser.add_argument_group('OUTPUT', 'Options used by epstein_generate.')
22
35
  output.add_argument('--all-emails', '-ae', action='store_true', help='all the emails instead of just the interesting ones')
23
36
  output.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just the interesting ones')
24
- output.add_argument('--build', '-b', action='store_true', help='write HTML output to a file')
37
+ output.add_argument('--build', '-b', action='store_true', help='write output to an HTML file in docs/')
38
+ output.add_argument('--email-timeline', action='store_true', help='print a table of all emails in chronological order')
25
39
  output.add_argument('--json-files', action='store_true', help='pretty print all the raw JSON data files in the collection and exit')
26
40
  output.add_argument('--json-metadata', action='store_true', help='dump JSON metadata for all files and exit')
27
41
  output.add_argument('--output-emails', '-oe', action='store_true', help='generate emails section')
28
42
  output.add_argument('--output-other', '-oo', action='store_true', help='generate other files section')
29
43
  output.add_argument('--output-texts', '-ot', action='store_true', help='generate text messages section')
30
- output.add_argument('--sort-alphabetical', action='store_true', help='sort emailers alphabetically intead of by email count')
44
+ output.add_argument('--sort-alphabetical', action='store_true', help='sort tables alphabetically intead of by count')
31
45
  output.add_argument('--suppress-output', action='store_true', help='no output to terminal (use with --build)')
32
46
  output.add_argument('--uninteresting', action='store_true', help='only output uninteresting other files')
33
47
  output.add_argument('--width', '-w', type=int, default=DEFAULT_WIDTH, help='screen width to use (in characters)')
@@ -35,7 +49,7 @@ output.add_argument('--width', '-w', type=int, default=DEFAULT_WIDTH, help='scre
35
49
  scripts = parser.add_argument_group('SCRIPTS', 'Options used by epstein_search, epstein_show, and epstein_diff.')
36
50
  scripts.add_argument('positional_args', nargs='*', help='strings to searchs for, file IDs to show or diff, etc.')
37
51
  scripts.add_argument('--raw', '-r', action='store_true', help='show raw contents of file (used by epstein_show)')
38
- scripts.add_argument('--whole-file', '-wf', action='store_true', help='print whole file (used by epstein_search)')
52
+ scripts.add_argument('--whole-file', '-wf', action='store_true', help='print whole files')
39
53
 
40
54
  debug = parser.add_argument_group('DEBUG')
41
55
  debug.add_argument('--colors-only', '-c', action='store_true', help='print header with color key table and links and exit')
@@ -44,24 +58,11 @@ debug.add_argument('--deep-debug', '-dd', action='store_true', help='set debug l
44
58
  debug.add_argument('--json-stats', '-j', action='store_true', help='print JSON formatted stats about the files')
45
59
  debug.add_argument('--skip-other-files', '-sof', action='store_true', help='skip parsing non email/text files')
46
60
  debug.add_argument('--suppress-logs', '-sl', action='store_true', help='set debug level to FATAL')
47
- args = parser.parse_args()
48
-
49
-
50
- # Verify Epstein docs can be found
51
- EPSTEIN_DOCS_DIR_ENV_VAR_NAME = 'EPSTEIN_DOCS_DIR'
52
- DOCS_DIR_ENV = environ.get(EPSTEIN_DOCS_DIR_ENV_VAR_NAME)
53
- DOCS_DIR = Path(DOCS_DIR_ENV or '').resolve()
54
61
 
55
- if not DOCS_DIR_ENV:
56
- print(f"\n ERROR: {EPSTEIN_DOCS_DIR_ENV_VAR_NAME} env var not set!\n")
57
- exit(1)
58
- elif not DOCS_DIR.exists():
59
- print(f"\n ERROR: {EPSTEIN_DOCS_DIR_ENV_VAR_NAME}='{DOCS_DIR}' does not exist!\n")
60
- exit(1)
61
62
 
62
- current_script = Path(argv[0]).name
63
- is_env_var_set = lambda s: len(environ.get(s) or '') > 0
64
- is_html_script = current_script in HTML_SCRIPTS
63
+ # Parse args
64
+ args = parser.parse_args()
65
+ is_html_script = parser.prog in HTML_SCRIPTS
65
66
 
66
67
  args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
67
68
  args.names = [None if n == 'None' else n for n in (args.names or [])]
@@ -69,8 +70,25 @@ args.output_emails = args.output_emails or args.all_emails
69
70
  args.output_other = args.output_other or args.all_other_files or args.uninteresting
70
71
  args.overwrite_pickle = args.overwrite_pickle or (is_env_var_set('OVERWRITE_PICKLE') and not is_env_var_set('PICKLED'))
71
72
  args.width = args.width if is_html_script else None
72
- is_any_output_selected = any([arg.startswith('output_') and value for arg, value in vars(args).items()])
73
- is_any_output_selected = is_any_output_selected or args.json_metadata or args.colors_only
73
+
74
+ if is_html_script:
75
+ if args.positional_args:
76
+ exit_with_error(f"{parser.prog} does not accept positional arguments (receeived {args.positional_args})")
77
+
78
+ if parser.prog == EPSTEIN_GENERATE:
79
+ if any([is_output_arg(arg) and val for arg, val in vars(args).items()]):
80
+ if args.email_timeline:
81
+ exit_with_error(f"--email-timeline option is mutually exlusive with other output options")
82
+ elif not args.email_timeline:
83
+ logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
84
+ args.output_texts = args.output_emails = args.output_other = True
85
+ elif parser.prog.startswith('epstein_') and not args.positional_args:
86
+ exit_with_error(f"{parser.prog} requires positional arguments but got none!")
87
+
88
+ if args.names:
89
+ logger.warning(f"Output restricted to {args.names}")
90
+ args.output_other = False
91
+
74
92
 
75
93
  # Log level args
76
94
  if args.deep_debug:
@@ -82,15 +100,7 @@ elif args.suppress_logs:
82
100
  elif not env_log_level:
83
101
  logger.setLevel(logging.WARNING)
84
102
 
85
- logger.info(f'Log level set to {logger.level}...')
86
-
87
- # Massage args that depend on other args to the appropriate state
88
- if current_script == 'epstein_generate' and not (is_any_output_selected or args.make_clean):
89
- logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
90
- args.output_texts = args.output_emails = args.output_other = True
91
-
92
- if args.debug:
93
- logger.warning(f"Invocation args:\ncurrent_script={current_script}\nis_html_script={is_html_script},\nis_output_selected={is_any_output_selected},\nargs={args}")
94
-
95
- if args.names:
96
- logger.warning(f"Output restricted to {args.names}")
103
+ logger.debug(f'Log level set to {logger.level}...')
104
+ args_str = ',\n'.join([f"{k}={v}" for k, v in vars(args).items() if v])
105
+ logger.info(f"'{parser.prog}' script invoked\n{args_str}")
106
+ logger.debug(f"Reading Epstein documents from '{DOCS_DIR}'...")
@@ -11,11 +11,10 @@ FILENAME_LENGTH = len(HOUSE_OVERSIGHT_PREFIX) + 6
11
11
  KB = 1024
12
12
  MB = KB * KB
13
13
 
14
- file_size = lambda file_path: Path(file_path).stat().st_size
15
-
16
14
  # Coerce methods handle both string and int arguments.
17
15
  coerce_file_name = lambda filename_or_id: coerce_file_stem(filename_or_id) + '.txt'
18
16
  coerce_file_path = lambda filename_or_id: DOCS_DIR.joinpath(coerce_file_name(filename_or_id))
17
+ file_size = lambda file_path: Path(file_path).stat().st_size
19
18
  id_str = lambda id: f"{int(id):06d}"
20
19
 
21
20