PyPI - epstein-files - Versions diffs - 1.0.16__py3-none-any.whl → 1.1.2__py3-none-any.whl - Mend

epstein-files 1.0.16py3-none-any.whl → 1.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

epstein_files/__init__.py +26 -17
epstein_files/documents/communication.py +10 -14
epstein_files/documents/document.py +5 -1
epstein_files/documents/email.py +164 -78
epstein_files/documents/imessage/text_message.py +42 -25
epstein_files/documents/messenger_log.py +31 -12
epstein_files/documents/other_file.py +13 -12
epstein_files/epstein_files.py +19 -80
epstein_files/util/constant/common_words.py +3 -3
epstein_files/util/constant/html.py +13 -6
epstein_files/util/constant/names.py +10 -7
epstein_files/util/constant/output_files.py +3 -0
epstein_files/util/constant/strings.py +6 -2
epstein_files/util/constant/urls.py +1 -1
epstein_files/util/constants.py +18 -22
epstein_files/util/env.py +46 -36
epstein_files/util/file_helper.py +1 -2
epstein_files/util/highlighted_group.py +1007 -187
epstein_files/util/logging.py +8 -1
epstein_files/util/output.py +166 -51
epstein_files/util/rich.py +55 -79
epstein_files/util/timer.py +1 -1
epstein_files/util/word_count.py +3 -4
{epstein_files-1.0.16.dist-info → epstein_files-1.1.2.dist-info}/METADATA +1 -1
epstein_files-1.1.2.dist-info/RECORD +33 -0
epstein_files-1.0.16.dist-info/RECORD +0 -33
{epstein_files-1.0.16.dist-info → epstein_files-1.1.2.dist-info}/LICENSE +0 -0
{epstein_files-1.0.16.dist-info → epstein_files-1.1.2.dist-info}/WHEEL +0 -0
{epstein_files-1.0.16.dist-info → epstein_files-1.1.2.dist-info}/entry_points.txt +0 -0

epstein_files/util/constants.py CHANGED Viewed

@@ -25,8 +25,8 @@ HEADER_ABBREVIATIONS = {
     'Jagland': 'Thorbjørn Jagland (former Norwegian prime minister)',
     'JEGE': "Epstein's airplane holding company",
     'Jeffrey Wernick': 'right wing crypto bro, former COO of Parler',
-    'Joi': 'Joi Ito (MIT Media Lab, MIT Digital Currency Initiative)',
-    "Hoffenberg": "Steven Hoffenberg (Epstein's ponzi scheme partner)",
+    'Joi': f"Joi Ito ({MIT_MEDIA_LAB}, MIT Digital Currency Initiative)",
+    "Hoffenberg": f"{STEVEN_HOFFENBERG} (Epstein's ponzi scheme partner)",
     'KSA': "Kingdom of Saudi Arabia",
     'Kurz': 'Sebastian Kurz (former Austrian Chancellor)',
     'Kwok': "Chinese criminal Miles Kwok AKA Miles Guo AKA Guo Wengui",
@@ -91,17 +91,17 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
     JOI_ITO: re.compile(r'ji@media.mit.?edu|(joichi|joi)( Ito)?', re.IGNORECASE),
     JONATHAN_FARKAS: re.compile(r'Jonathan Farka(s|il)', re.IGNORECASE),
     KATHRYN_RUEMMLER: re.compile(r'Kathr?yn? Ruemmler?', re.IGNORECASE),
-    KEN_STARR: re.compile(r'starr, ken|Ken(neth W.)?\s+starr?|starr', re.IGNORECASE),
+    KEN_STARR: re.compile(r'starr, ken|Ken(neth\s*(W.\s*)?)?\s+starr?|starr', re.IGNORECASE),
     LANDON_THOMAS: re.compile(r'lando[nr] thomas( jr)?|thomas jr.?, lando[nr]', re.IGNORECASE),
     LARRY_SUMMERS: re.compile(r'(La(wrence|rry).{1,5})?Summers?|^LH$|LHS|Ihsofficel', re.IGNORECASE),
     LAWRANCE_VISOSKI: re.compile(r'La(rry|wrance) Visoski?|Lvjet', re.IGNORECASE),
-    LAWRENCE_KRAUSS: re.compile(r'Lawrence Kraus|[jl]awkrauss|kruase', re.IGNORECASE),
-    LEON_BLACK: re.compile(r'Leon Black?', re.IGNORECASE),
+    LAWRENCE_KRAUSS: re.compile(r'Lawrence Kraus[es]?|[jl]awkrauss|kruase', re.IGNORECASE),
+    LEON_BLACK: re.compile(r'Leon\s*Black?|(?<!Marc )Leon(?! (Botstein|Jaworski|Wieseltier))', re.IGNORECASE),
+    LILLY_SANCHEZ: re.compile(r'Lilly.*Sanchez', re.IGNORECASE),
+    LISA_NEW: re.compile(r'E?Lisa New?\b', re.IGNORECASE),
     MANUELA_MARTINEZ: re.compile(fr'Manuela (- Mega Partners|Martinez)', re.IGNORECASE),
     MARIANA_IDZKOWSKA: re.compile(r'Mariana [Il]d[źi]kowska?', re.IGNORECASE),
     MARK_EPSTEIN: re.compile(r'Mark (L\. )?Epstein', re.IGNORECASE),
-    LILLY_SANCHEZ: re.compile(r'Lilly.*Sanchez', re.IGNORECASE),
-    LISA_NEW: re.compile(r'E?Lisa New?\b', re.IGNORECASE),
     MARC_LEON: re.compile(r'Marc[.\s]+(Kensington|Leon)|Kensington2', re.IGNORECASE),
     MARTIN_NOWAK: re.compile(r'(Martin.*?)?No[vw]ak|Nowak, Martin', re.IGNORECASE),
     MARTIN_WEINBERG: re.compile(r'martin.*?weinberg', re.IGNORECASE),
@@ -128,10 +128,10 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
     PRINCE_ANDREW: re.compile(r'Prince Andrew|The Duke', re.IGNORECASE),
     REID_WEINGARTEN: re.compile(r'Weingarten, Rei[cdi]|Rei[cdi] Weingarten', re.IGNORECASE),
     RICHARD_KAHN: re.compile(r'rich(ard)? kahn?', re.IGNORECASE),
-    ROBERT_D_CRITTON_JR: re.compile(r'Robert D.? Critton Jr.?', re.IGNORECASE),
+    ROBERT_D_CRITTON_JR: re.compile(r'Robert D.? Critton,? Jr.?', re.IGNORECASE),
     ROBERT_LAWRENCE_KUHN: re.compile(r'Robert\s*(Lawrence)?\s*Kuhn', re.IGNORECASE),
     ROBERT_TRIVERS: re.compile(r'tri[vy]ersr@gmail|Robert\s*Trivers?', re.IGNORECASE),
-    ROSS_GOW: re.compile(fr"{ROSS_GOW}|ross@acuityreputation.com", re.IGNORECASE),
+    ROSS_GOW: re.compile(fr"Ross(acuity)? Gow|(ross@)?acuity\s*reputation(\.com)?", re.IGNORECASE),
     SAMUEL_LEFF: re.compile(r"Sam(uel)?(/Walli)? Leff", re.IGNORECASE),
     SCOTT_J_LINK: re.compile(r'scott j. link?', re.IGNORECASE),
     SEAN_BANNON: re.compile(r'sean bannon?', re.IGNORECASE),
@@ -145,7 +145,8 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
     TERRY_KAFKA: re.compile(r'Terry Kafka?', re.IGNORECASE),
     THANU_BOONYAWATANA: re.compile(r"Thanu (BOONYAWATANA|Cnx)", re.IGNORECASE),
     THORBJORN_JAGLAND: re.compile(r'(Thor.{3,8})?Jag[il]and?', re.IGNORECASE),
-    TONJA_HADDAD_COLEMAN: re.compile(fr"To(nj|rl)a Haddad Coleman|haddadfm@aol.com", re.IGNORECASE)
+    TONJA_HADDAD_COLEMAN: re.compile(r"To(nj|rl)a Haddad Coleman|haddadfm@aol.com", re.IGNORECASE),
+    VINCENZO_IOZZO: re.compile(r"Vincenzo [IL]ozzo", re.IGNORECASE),
 }
 # If found as substring consider them the author
@@ -194,7 +195,6 @@ EMAILERS = [
     'Steven Victor MD',
     'Susan Edelman',
     TOM_BARRACK,
-    'Vincenzo Lozzo',
     'Vladimir Yudashkin',
 ]
@@ -387,6 +387,7 @@ EMAILS_CONFIG = [
     EmailCfg(
         id='023208',
         author=JEFFREY_EPSTEIN,
+        description=f"very long email chain about Leon Black's finances and things like Gratitude America",
         fwded_text_after='Date: Tue, Oct 27',
         recipients=[BRAD_WECHSLER, MELANIE_SPINELLA],
         duplicate_ids=['023291'],
@@ -499,7 +500,7 @@ EMAILS_CONFIG = [
         author=STEVEN_HOFFENBERG,
         recipients=["Players2"],
         timestamp=parse('2016-08-11 09:36:01'),
-        attribution_reason='Actually a fwd by Charles Michael but Hoffenberg email more interesting',
+        attribution_reason=f"Actually a fwd by Charles Michael but {STEVEN_HOFFENBERG} email more interesting",
     ),
     EmailCfg(
         id='026620',
@@ -859,7 +860,6 @@ UN_GENERAL_ASSEMBLY = '67th U.N. General Assembly'
 WOMEN_EMPOWERMENT = f"Women Empowerment (WE) conference"
 ZUBAIR_AND_ANYA = f"{ZUBAIR_KHAN} and Anya Rasulova"
 OTHER_FILES_BOOKS = [
     DocCfg(id='017088', author=ALAN_DERSHOWITZ, description=f'"Taking the Stand: My Life in the Law" (draft)'),
     DocCfg(id='013501', author='Arnold J. Mandell', description=f'The Nearness Of Grace: A Personal Science Of Spiritual Transformation', date='2005-01-01'),
@@ -1139,11 +1139,7 @@ OTHER_FILES_LEGAL = [
     DocCfg(id='025353', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-05-19', duplicate_ids=['010723', '019224'], dupe_type='redacted'),
     DocCfg(id='025704', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-05-27', duplicate_ids=['010732', '019221'], dupe_type='redacted'),
     DocCfg(id='012130', author=KEN_STARR, description=KEN_STARR_LETTER, date='2008-06-19', duplicate_ids=['012135']),
-    DocCfg(
-        id='031447',
-        author=MARTIN_WEINBERG,
-        description=f"letter from to Melanie Ann Pustay and Sean O'Neill re: an Epstein FOIA request"
-    ),
+    DocCfg(id='031447', author=MARTIN_WEINBERG, description=f"letter from to Melanie Ann Pustay & Sean O'Neill re: Epstein FOIA request"),
     DocCfg(
         id='028965',
         author=MARTIN_WEINBERG,
@@ -1223,7 +1219,7 @@ OTHER_FILES_CONFERENCES = [
 OTHER_FILES_FINANCE = [
     DocCfg(id='024631', author='Ackrell Capital', description=f"Cannabis Investment Report 2018", is_interesting=True),
     DocCfg(id='016111', author=BOFA_MERRILL, description=f"GEMs Paper #26 Saudi Arabia: beyond oil but not so fast", date='2016-06-30'),
-    DocCfg(id='010609', author=BOFA_MERRILL, description=f"Liquid Insight Trump\'s effect on MXN", date='2016-09-22'),
+    DocCfg(id='010609', author=BOFA_MERRILL, description=f"Liquid Insight Trump's effect on MXN", date='2016-09-22'),
     DocCfg(id='025978', author=BOFA_MERRILL, description=f"Understanding when risk parity risk Increases", date='2016-08-09'),
     DocCfg(id='014404', author=BOFA_MERRILL, description=f'Japan Investment Strategy Report', date='2016-11-18'),
     DocCfg(id='014410', author=BOFA_MERRILL, description=f'Japan Investment Strategy Report', date='2016-11-18'),
@@ -1515,8 +1511,8 @@ OTHER_FILES_ARTS = [
 ]
 OTHER_FILES_MISC = [
-    DocCfg(id='022780', category=FLIGHT_LOGS),
-    DocCfg(id='022816', category=FLIGHT_LOGS),
+    DocCfg(id='022780', category=FLIGHT_LOG),
+    DocCfg(id='022816', category=FLIGHT_LOG),
     DocCfg(id='032206', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
     DocCfg(id='032208', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
     DocCfg(id='032209', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
@@ -1541,7 +1537,7 @@ OTHER_FILES_MISC = [
     DocCfg(id='031743', description=f'a few pages describing the internet as a "New Nation State" (Network State?)'),
     DocCfg(id='012718', description=f"{CVRA} congressional record", date='2011-06-17'),
     DocCfg(id='024117', description=f"FAQ about anti-money laundering (AML) and terrorist financing (CFT) law in the U.S."),
-    DocCfg(id='019448', description=f"Haitian business investment proposal called Jacmel"),
+    DocCfg(id='019448', description=f"Haitian business investment proposal called Jacmel", attached_to_email_id='019448'),
     DocCfg(id='023644', description=f"interview with Mohammed bin Salman", date='2016-04-25'),
     DocCfg(
         id='030142',

epstein_files/util/env.py CHANGED Viewed

@@ -2,14 +2,27 @@ import logging
 from argparse import ArgumentParser
 from os import environ
 from pathlib import Path
-from sys import argv, exit
 from rich_argparse_plus import RichHelpFormatterPlus
-from epstein_files.util.logging import env_log_level, logger
+from epstein_files.util.logging import env_log_level, exit_with_error, logger
 DEFAULT_WIDTH = 145
-HTML_SCRIPTS = ['epstein_generate', 'epstein_word_count']
+EPSTEIN_GENERATE = 'epstein_generate'
+HTML_SCRIPTS = [EPSTEIN_GENERATE, 'epstein_word_count']
+# Verify Epstein docs dir exists
+EPSTEIN_DOCS_DIR_ENV_VAR_NAME = 'EPSTEIN_DOCS_DIR'
+DOCS_DIR_ENV = environ.get(EPSTEIN_DOCS_DIR_ENV_VAR_NAME)
+DOCS_DIR = Path(DOCS_DIR_ENV or '').resolve()
+if not DOCS_DIR_ENV:
+    exit_with_error(f"{EPSTEIN_DOCS_DIR_ENV_VAR_NAME} env var not set!\n")
+elif not DOCS_DIR.exists():
+    exit_with_error(f"{EPSTEIN_DOCS_DIR_ENV_VAR_NAME}='{DOCS_DIR}' does not exist!\n")
+is_env_var_set = lambda s: len(environ.get(s) or '') > 0
+is_output_arg = lambda arg: any([arg.startswith(pfx) for pfx in ['colors_only', 'json', 'make_clean', 'output']])
 RichHelpFormatterPlus.choose_theme('morning_glory')
@@ -21,13 +34,14 @@ parser.add_argument('--overwrite-pickle', '-op', action='store_true', help='re-p
 output = parser.add_argument_group('OUTPUT', 'Options used by epstein_generate.')
 output.add_argument('--all-emails', '-ae', action='store_true', help='all the emails instead of just the interesting ones')
 output.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just the interesting ones')
-output.add_argument('--build', '-b', action='store_true', help='write HTML output to a file')
+output.add_argument('--build', '-b', action='store_true', help='write output to an HTML file in docs/')
+output.add_argument('--email-timeline', action='store_true', help='print a table of all emails in chronological order')
 output.add_argument('--json-files', action='store_true', help='pretty print all the raw JSON data files in the collection and exit')
 output.add_argument('--json-metadata', action='store_true', help='dump JSON metadata for all files and exit')
 output.add_argument('--output-emails', '-oe', action='store_true', help='generate emails section')
 output.add_argument('--output-other', '-oo', action='store_true', help='generate other files section')
 output.add_argument('--output-texts', '-ot', action='store_true', help='generate text messages section')
-output.add_argument('--sort-alphabetical', action='store_true', help='sort emailers alphabetically intead of by email count')
+output.add_argument('--sort-alphabetical', action='store_true', help='sort tables alphabetically intead of by count')
 output.add_argument('--suppress-output', action='store_true', help='no output to terminal (use with --build)')
 output.add_argument('--uninteresting', action='store_true', help='only output uninteresting other files')
 output.add_argument('--width', '-w', type=int, default=DEFAULT_WIDTH, help='screen width to use (in characters)')
@@ -35,7 +49,7 @@ output.add_argument('--width', '-w', type=int, default=DEFAULT_WIDTH, help='scre
 scripts = parser.add_argument_group('SCRIPTS', 'Options used by epstein_search, epstein_show, and epstein_diff.')
 scripts.add_argument('positional_args', nargs='*', help='strings to searchs for, file IDs to show or diff, etc.')
 scripts.add_argument('--raw', '-r', action='store_true', help='show raw contents of file (used by epstein_show)')
-scripts.add_argument('--whole-file', '-wf', action='store_true', help='print whole file (used by epstein_search)')
+scripts.add_argument('--whole-file', '-wf', action='store_true', help='print whole files')
 debug = parser.add_argument_group('DEBUG')
 debug.add_argument('--colors-only', '-c', action='store_true', help='print header with color key table and links and exit')
@@ -44,24 +58,11 @@ debug.add_argument('--deep-debug', '-dd', action='store_true', help='set debug l
 debug.add_argument('--json-stats', '-j', action='store_true', help='print JSON formatted stats about the files')
 debug.add_argument('--skip-other-files', '-sof', action='store_true', help='skip parsing non email/text files')
 debug.add_argument('--suppress-logs', '-sl', action='store_true', help='set debug level to FATAL')
-args = parser.parse_args()
-# Verify Epstein docs can be found
-EPSTEIN_DOCS_DIR_ENV_VAR_NAME = 'EPSTEIN_DOCS_DIR'
-DOCS_DIR_ENV = environ.get(EPSTEIN_DOCS_DIR_ENV_VAR_NAME)
-DOCS_DIR = Path(DOCS_DIR_ENV or '').resolve()
-if not DOCS_DIR_ENV:
-    print(f"\n   ERROR: {EPSTEIN_DOCS_DIR_ENV_VAR_NAME} env var not set!\n")
-    exit(1)
-elif not DOCS_DIR.exists():
-    print(f"\n   ERROR: {EPSTEIN_DOCS_DIR_ENV_VAR_NAME}='{DOCS_DIR}' does not exist!\n")
-    exit(1)
-current_script = Path(argv[0]).name
-is_env_var_set = lambda s: len(environ.get(s) or '') > 0
-is_html_script = current_script in HTML_SCRIPTS
+# Parse args
+args = parser.parse_args()
+is_html_script = parser.prog in HTML_SCRIPTS
 args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
 args.names = [None if n == 'None' else n for n in (args.names or [])]
@@ -69,8 +70,25 @@ args.output_emails = args.output_emails or args.all_emails
 args.output_other = args.output_other or args.all_other_files or args.uninteresting
 args.overwrite_pickle = args.overwrite_pickle or (is_env_var_set('OVERWRITE_PICKLE') and not is_env_var_set('PICKLED'))
 args.width = args.width if is_html_script else None
-is_any_output_selected = any([arg.startswith('output_') and value for arg, value in vars(args).items()])
-is_any_output_selected = is_any_output_selected or args.json_metadata or args.colors_only
+if is_html_script:
+    if args.positional_args:
+        exit_with_error(f"{parser.prog} does not accept positional arguments (receeived {args.positional_args})")
+    if parser.prog == EPSTEIN_GENERATE:
+        if any([is_output_arg(arg) and val for arg, val in vars(args).items()]):
+            if args.email_timeline:
+                exit_with_error(f"--email-timeline option is mutually exlusive with other output options")
+        elif not args.email_timeline:
+            logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
+            args.output_texts = args.output_emails = args.output_other = True
+elif parser.prog.startswith('epstein_') and not args.positional_args:
+    exit_with_error(f"{parser.prog} requires positional arguments but got none!")
+if args.names:
+    logger.warning(f"Output restricted to {args.names}")
+    args.output_other = False
 # Log level args
 if args.deep_debug:
@@ -82,15 +100,7 @@ elif args.suppress_logs:
 elif not env_log_level:
     logger.setLevel(logging.WARNING)
-logger.info(f'Log level set to {logger.level}...')
-# Massage args that depend on other args to the appropriate state
-if current_script == 'epstein_generate' and not (is_any_output_selected or args.make_clean):
-    logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
-    args.output_texts = args.output_emails = args.output_other = True
-if args.debug:
-    logger.warning(f"Invocation args:\ncurrent_script={current_script}\nis_html_script={is_html_script},\nis_output_selected={is_any_output_selected},\nargs={args}")
-if args.names:
-    logger.warning(f"Output restricted to {args.names}")
+logger.debug(f'Log level set to {logger.level}...')
+args_str = ',\n'.join([f"{k}={v}" for k, v in vars(args).items() if v])
+logger.info(f"'{parser.prog}' script invoked\n{args_str}")
+logger.debug(f"Reading Epstein documents from '{DOCS_DIR}'...")

epstein_files/util/file_helper.py CHANGED Viewed

@@ -11,11 +11,10 @@ FILENAME_LENGTH = len(HOUSE_OVERSIGHT_PREFIX) + 6
 KB = 1024
 MB = KB * KB
-file_size = lambda file_path: Path(file_path).stat().st_size
 # Coerce methods handle both string and int arguments.
 coerce_file_name = lambda filename_or_id: coerce_file_stem(filename_or_id) + '.txt'
 coerce_file_path = lambda filename_or_id: DOCS_DIR.joinpath(coerce_file_name(filename_or_id))
+file_size = lambda file_path: Path(file_path).stat().st_size
 id_str = lambda id: f"{int(id):06d}"

epstein-files 1.0.16__py3-none-any.whl → 1.1.2__py3-none-any.whl

epstein-files 1.0.16py3-none-any.whl → 1.1.2py3-none-any.whl