PyPI - epstein-files - Versions diffs - 1.2.1__py3-none-any.whl → 1.2.5__py3-none-any.whl - Mend

epstein-files 1.2.1py3-none-any.whl → 1.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

epstein_files/__init__.py +31 -6
epstein_files/documents/document.py +5 -1
epstein_files/documents/email.py +192 -203
epstein_files/epstein_files.py +2 -3
epstein_files/person.py +29 -9
epstein_files/util/constant/names.py +9 -7
epstein_files/util/constant/output_files.py +8 -5
epstein_files/util/constant/strings.py +2 -1
epstein_files/util/constant/urls.py +13 -2
epstein_files/util/constants.py +35 -11
epstein_files/util/data.py +1 -0
epstein_files/util/env.py +9 -6
epstein_files/util/highlighted_group.py +263 -117
epstein_files/util/output.py +18 -9
epstein_files/util/rich.py +7 -2
epstein_files/util/word_count.py +1 -1
{epstein_files-1.2.1.dist-info → epstein_files-1.2.5.dist-info}/METADATA +1 -1
epstein_files-1.2.5.dist-info/RECORD +34 -0
epstein_files-1.2.1.dist-info/RECORD +0 -34
{epstein_files-1.2.1.dist-info → epstein_files-1.2.5.dist-info}/LICENSE +0 -0
{epstein_files-1.2.1.dist-info → epstein_files-1.2.5.dist-info}/WHEEL +0 -0
{epstein_files-1.2.1.dist-info → epstein_files-1.2.5.dist-info}/entry_points.txt +0 -0

epstein_files/person.py CHANGED Viewed

@@ -120,7 +120,7 @@ class Person:
         else:
             email_count = len(self.unique_emails())
             num_days = self.email_conversation_length_in_days()
-            title_suffix = f"to/from {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
+            title_suffix = f"{TO_FROM} {self.name_str()} starting {self.earliest_email_date()} covering {num_days:,} days"
         title = f"Found {email_count} emails {title_suffix}"
         width = max(MIN_AUTHOR_PANEL_WIDTH, len(title) + 4, len(self.info_with_category()) + 8)
@@ -136,8 +136,12 @@ class Person:
         highlight_group = self.highlight_group()
         if highlight_group and isinstance(highlight_group, HighlightedNames) and self.name:
-            return highlight_group.info_for(self.name)
-        elif self.is_uninteresting_cc:
+            info = highlight_group.info_for(self.name)
+            if info:
+                return info
+        if self.is_uninteresting_cc:
             if self.has_any_epstein_emails():
                 return UNINTERESTING_CC_INFO
             else:
@@ -152,7 +156,7 @@ class Person:
         elif self.name is None:
             return Text('(emails whose author or recipient could not be determined)', style=ALT_INFO_STYLE)
         elif self.category() == JUNK:
-            return Text(f"({JUNK} mail)", style='tan dim')
+            return Text(f"({JUNK} mail)", style='bright_black dim')
         elif self.is_uninteresting_cc and (self.info_str() or '').startswith(UNINTERESTING_CC_INFO):
             if self.info_str() == UNINTERESTING_CC_INFO:
                 return Text(f"({self.info_str()})", style='wheat4 dim')
@@ -168,7 +172,11 @@ class Person:
             else:
                 return None
         else:
-            return Text(self.info_str())
+            return Text(self.info_str(), style=self.style())
+    def internal_link(self) -> Text:
+        """Kind of like an anchor link to the section of the page containing these emails."""
+        return link_text_obj(internal_link_to_emails(self.name_str()), self.name_str(), style=self.style())
     def is_a_mystery(self) -> bool:
         """Return True if this is someone we theroetically could know more about."""
@@ -237,7 +245,13 @@ class Person:
         console.line()
     def sort_key(self) -> list[int | str]:
-        counts = [len(self.unique_emails()), int(self.has_any_epstein_emails())]
+        counts = [
+            len(self.unique_emails()),
+            -1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO_NO_CONTACT),
+            -1 * int((self.info_str() or '') == UNINTERESTING_CC_INFO),
+            int(self.has_any_epstein_emails()),
+        ]
         counts = [-1 * count for count in counts]
         if args.sort_alphabetical:
@@ -276,6 +290,11 @@ class Person:
         highlighted = highlighted or people
         highlighted_names = [p.name for p in highlighted]
         is_selection = len(people) != len(highlighted) or args.emailers_info
+        all_emails = Document.uniquify(flatten([list(p.unique_emails()) for p in people]))
+        email_authors = [p for p in people if p.emails_by() and p.name]
+        attributed_emails = [email for email in all_emails if email.author]
+        footer = f"(identified {len(email_authors)} authors of {len(attributed_emails):,}" \
+                 f" out of {len(all_emails):,} emails, {len(all_emails) - len(attributed_emails)} still unknown)"
         if is_selection:
             title = Text(f"{EMAILER_INFO_TITLE} in This Order for the Highlighted Names (", style=TABLE_TITLE_STYLE)
@@ -283,7 +302,7 @@ class Person:
         else:
             title = f"{EMAILER_INFO_TITLE} in Chronological Order Based on Timestamp of First Email"
-        table = build_table(title)
+        table = build_table(title, caption=footer)
         table.add_column('First')
         table.add_column('Name', max_width=24, no_wrap=True)
         table.add_column('Category', justify='left', style='dim italic')
@@ -298,6 +317,7 @@ class Person:
         for person in people:
             earliest_email_date = person.earliest_email_date()
+            is_on_page = False if show_epstein_total else person.name in highlighted_names
             year_months = (earliest_email_date.year * 12) + earliest_email_date.month
             # Color year rollovers more brightly
@@ -311,14 +331,14 @@ class Person:
             table.add_row(
                 Text(str(earliest_email_date), style=f"grey{GREY_NUMBERS[0 if is_selection else grey_idx]}"),
-                person.name_txt(),  # TODO: make link?
+                person.internal_link() if is_on_page and not person.is_uninteresting_cc else person.name_txt(),
                 person.category_txt(),
                 f"{len(person.unique_emails() if show_epstein_total else person._unique_printable_emails())}",
                 Text(f"{len(person.unique_emails_by())}", style='dim' if len(person.unique_emails_by()) == 0 else ''),
                 Text(f"{len(person.unique_emails_to())}", style='dim' if len(person.unique_emails_to()) == 0 else ''),
                 f"{person.email_conversation_length_in_days()}",
                 person.info_txt() or '',
-                style='' if person.name in highlighted_names else 'dim',
+                style='' if show_epstein_total or is_on_page else 'dim',
             )
         return table

epstein_files/util/constant/names.py CHANGED Viewed

@@ -61,6 +61,7 @@ DIANE_ZIMAN = 'Diane Ziman'
 DONALD_TRUMP = 'Donald Trump'
 EDUARDO_ROBLES = 'Eduardo Robles'
 EDWARD_JAY_EPSTEIN = 'Edward Jay Epstein'
+EDWARD_ROD_LARSEN = 'Edward Rod Larsen'
 EHUD_BARAK = 'Ehud Barak'
 ERIC_ROTH = 'Eric Roth'
 FAITH_KATES = 'Faith Kates'
@@ -129,6 +130,7 @@ MOSHE_HOFFMAN = 'Moshe Hoffman'
 NADIA_MARCINKO = 'Nadia Marcinko'
 NEAL_KASSELL = 'Neal Kassell'
 NICHOLAS_RIBIS = 'Nicholas Ribis'
+NILI_PRIELL_BARAK = 'Nili Priell Barak'
 NOAM_CHOMSKY = 'Noam Chomsky'
 NORMAN_D_RAU = 'Norman D. Rau'
 OLIVIER_COLOM = 'Olivier Colom'
@@ -215,13 +217,13 @@ UBS = 'UBS'
 # First and last names that should be made part of a highlighting regex for emailers
 NAMES_TO_NOT_HIGHLIGHT = """
     al alain alan alfredo allen alex alexander amanda andres andrew anthony
-    bard barrett barry bennet bill black bob boris brad bruce
+    bard barrett barry bennet bernard bill black bob boris brad brenner bruce
     caroline carolyn chris christina cohen
-    dan daniel danny darren dave david donald
+    dan daniel danny darren dave david debbie donald
     ed edward edwards enforcement enterprise enterprises entourage epstein eric erika etienne
-    faith forget fred friendly frost fuller
-    gerald george gold gordon
-    haddad harry hay heather henry hill hoffman
+    faith fisher forget fred friendly frost fuller
+    gates gerald george gold gordon
+    haddad harry hay heather henry hill hoffman howard
     ian ivan
     jack james jay jean jeff jeffrey jennifer jeremy jessica joel john jon jonathan joseph jr
     kahn karl kate katherine kelly ken kevin krassner
@@ -230,7 +232,7 @@ NAMES_TO_NOT_HIGHLIGHT = """
     nancy neal new nicole norman
     owen
     paul paula pen peter philip prince
-    randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubin
+    randall rangel reid richard robert rodriguez roger rosenberg ross roth roy rubenstein rubin
     scott sean skip stanley stern stephen steve steven stone susan
     the thomas tim tom tony tyler
     victor
@@ -243,7 +245,7 @@ OTHER_NAMES = NAMES_TO_NOT_HIGHLIGHT + """
     aaron albert alberto alec alexandra alice anderson andre ann anna anne ariana arthur
     baldwin barack barrett ben benjamin berger bert binant bob bonner boyden bradley brady branson bright bruno bryant burton
     chapman charles charlie christopher clint cohen colin collins conway
-    davis dean debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
+    davis dean debbie debra deborah dennis diana diane diaz dickinson dixon dominique don dylan
     edmond elizabeth emily entwistle erik evelyn
     ferguson flachsbart francis franco frank
     gardner gary geoff geoffrey gilbert gloria goldberg gonzalez gould graham greene guarino gwyneth

epstein_files/util/constant/output_files.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from pathlib import Path
 from epstein_files.util.constant.strings import EMAIL, TEXT_MESSAGE, SiteType
+from epstein_files.util.logging import logger
 # Files output by the code
 HTML_DIR = Path('docs')
@@ -16,9 +17,10 @@ URLS_ENV = '.urls.env'
 EMAILERS_TABLE_PNG_PATH = HTML_DIR.joinpath('emailers_info_table.png')
 # Deployment URLS
-# NOTE: don't rename these variables without changing deploy.sh!
+# NOTE: don't rename these variables without changing deploy.sh
+GH_REPO_NAME = 'epstein_text_messages'
 GH_PAGES_BASE_URL = 'https://michelcrypt4d4mus.github.io'
-TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/epstein_text_messages"
+TEXT_MSGS_URL = f"{GH_PAGES_BASE_URL}/{GH_REPO_NAME}"
 ALL_EMAILS_URL = f"{TEXT_MSGS_URL}/{ALL_EMAILS_PATH.name}"
 CHRONOLOGICAL_EMAILS_URL = f"{TEXT_MSGS_URL}/{CHRONOLOGICAL_EMAILS_PATH.name}"
 JSON_FILES_URL = f"{TEXT_MSGS_URL}/{JSON_FILES_JSON_PATH.name}"
@@ -44,6 +46,7 @@ BUILD_ARTIFACTS = [
 def make_clean() -> None:
     """Delete all build artifacts."""
     for build_file in BUILD_ARTIFACTS:
-        if build_file.exists():
-            print(f"Removing build file '{build_file}'...")
-            build_file.unlink()
+        for file in [build_file, Path(f"{build_file}.txt")]:
+            if file.exists():
+                logger.warning(f"Removing build file '{file}'...")
+                file.unlink()

epstein_files/util/constant/strings.py CHANGED Viewed

@@ -64,7 +64,8 @@ REDACTED = '<REDACTED>'
 QUESTION_MARKS = '(???)'
 # Regexes
-FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}(\d{{6}}(_\d{{1,2}})?)")
+ID_REGEX = re.compile(r"\d{6}(_\d{1,2})?")
+FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}({ID_REGEX.pattern})")
 FILE_NAME_REGEX = re.compile(fr"{FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
 QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')

epstein_files/util/constant/urls.py CHANGED Viewed

@@ -6,6 +6,7 @@ from inflection import parameterize
 from rich.text import Text
 from epstein_files.util.constant.output_files import *
+from epstein_files.util.constant.strings import remove_question_marks
 from epstein_files.util.env import args
 from epstein_files.util.file_helper import coerce_file_stem
@@ -22,10 +23,11 @@ JMAIL = 'Jmail'
 ROLLCALL = 'RollCall'
 TWITTER = 'search X'
-GH_PROJECT_URL = 'https://github.com/michelcrypt4d4mus/epstein_text_messages'
+GH_PROJECT_URL = f'https://github.com/michelcrypt4d4mus/{GH_REPO_NAME}'
 GH_MASTER_URL = f"{GH_PROJECT_URL}/blob/master"
 ATTRIBUTIONS_URL = f'{GH_MASTER_URL}/epstein_files/util/constants.py'
 EXTRACTS_BASE_URL = f'{GH_MASTER_URL}/emails_extracted_from_legal_filings'
+TO_FROM = 'to/from'
 extracted_file_url = lambda f: f"{EXTRACTS_BASE_URL}/{f}"
@@ -72,7 +74,6 @@ rollcall_doc_url = lambda file_stem: build_doc_url(DOC_LINK_BASE_URLS[ROLLCALL],
 search_jmail_url = lambda txt: f"{JMAIL_URL}/search?q={urllib.parse.quote(txt)}"
 search_twitter_url = lambda txt: f"https://x.com/search?q={urllib.parse.quote(txt)}&src=typed_query&f=live"
 PERSON_LINK_BUILDERS: dict[ExternalSite, Callable[[str], str]] = {
     EPSTEIN_MEDIA: epstein_media_person_url,
     EPSTEIN_WEB: epstein_web_person_url,
@@ -98,6 +99,12 @@ def external_doc_link_txt(site: ExternalSite, filename_or_id: int | str, style:
     return Text.from_markup(external_doc_link_markup(site, filename_or_id, style))
+def internal_link_to_emails(name: str) -> str:
+    """e.g. https://michelcrypt4d4mus.github.io/epstein_text_messages/all_emails_epstein_files_nov_2025.html#:~:text=to%2Ffrom%20Jack%20Goldberger"""
+    search_term = urllib.parse.quote(f"{TO_FROM} {remove_question_marks(name)}")
+    return f"{this_site_url()}#:~:text={search_term}"
 def link_markup(
     url: str,
     link_text: str | None = None,
@@ -121,6 +128,10 @@ def other_site_url() -> str:
     return SITE_URLS[other_site_type()]
+def this_site_url() -> str:
+    return SITE_URLS[EMAIL if other_site_type() == TEXT_MESSAGE else TEXT_MESSAGE]
 CRYPTADAMUS_TWITTER = link_markup('https://x.com/cryptadamist', '@cryptadamist')
 THE_OTHER_PAGE_MARKUP = link_markup(other_site_url(), 'the other page', style='light_slate_grey bold')
 THE_OTHER_PAGE_TXT = Text.from_markup(THE_OTHER_PAGE_MARKUP)

epstein_files/util/constants.py CHANGED Viewed

@@ -39,6 +39,7 @@ HEADER_ABBREVIATIONS = {
     'MBZ': "Mohamed bin Zayed Al Nahyan (Emirates sheikh)",
     "Miro": MIROSLAV_LAJCAK,
     "Mooch": "Anthony 'The Mooch' Scaramucci (Skybridge crypto bro)",
+    "NPA": 'non-prosecution agreement',
     "Terje": TERJE_ROD_LARSEN,
     "VI": f"U.S. {VIRGIN_ISLANDS}",
     "Woody": "Woody Allen",
@@ -52,14 +53,14 @@ HEADER_ABBREVIATIONS = {
 # Emailers
 EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
-    ALAN_DERSHOWITZ: re.compile(r'(alan.{1,7})?dershowi(lz?|tz)|AlanDersh', re.IGNORECASE),
+    ALAN_DERSHOWITZ: re.compile(r'(alan.{1,7})?dershowi(lz?|t?z)|AlanDersh', re.IGNORECASE),
     ALIREZA_ITTIHADIEH: re.compile(r'Alireza.[Il]ttihadieh', re.IGNORECASE),
     AMANDA_ENS: re.compile(r'ens, amanda?|Amanda.Ens', re.IGNORECASE),
     ANAS_ALRASHEED: re.compile(r'anas\s*al\s*rashee[cd]', re.IGNORECASE),
     ANIL_AMBANI: re.compile(r'Anil.Ambani', re.IGNORECASE),
     ANN_MARIE_VILLAFANA: re.compile(r'Villafana, Ann Marie|(A(\.|nn) Marie )?Villafa(c|n|ri)a', re.IGNORECASE),
     ANTHONY_SCARAMUCCI: re.compile(r"mooch|(Anthony ('The Mooch' )?)?Scaramucci", re.IGNORECASE),
-    ARIANE_DE_ROTHSCHILD: re.compile(r'AdeR|((Ariane|Edmond) de )?Rothschild|Ariane', re.IGNORECASE),
+    ARIANE_DE_ROTHSCHILD: re.compile(r'AdeR|((Ariane|Edmond) (de )?)?Roths(ch|hc)?ild|Ariane', re.IGNORECASE),
     BARBRO_C_EHNBOM: re.compile(r'behnbom@aol.com|(Barbro\s.*)?Ehnbom', re.IGNORECASE),
     BARRY_J_COHEN: re.compile(r'barry\s*((j.?|james)\s*)?cohen?', re.IGNORECASE),
     BENNET_MOSKOWITZ: re.compile(r'Moskowitz.*Bennet|Bennet.*Moskowitz', re.IGNORECASE),
@@ -86,7 +87,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
     JABOR_Y: re.compile(r'[ji]abor\s*y?', re.IGNORECASE),
     JAMES_HILL: re.compile(r"hill, james e.|james.e.hill@abc.com", re.IGNORECASE),
     JANUSZ_BANASIAK: re.compile(r"Janu[is]z Banasiak", re.IGNORECASE),
-    JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?', re.IGNORECASE),
+    JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?|JeanLuc', re.IGNORECASE),
     JEFF_FULLER: re.compile(r"jeff@mc2mm.com|Jeff Fuller", re.IGNORECASE),
     JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeffrey E((sp|ps)tein?)?( VI Foundation)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!(Mark L.|ard Jay) )Epstein', re.IGNORECASE),
     JESSICA_CADWELL: re.compile(r'Jessica Cadwell?', re.IGNORECASE),
@@ -104,7 +105,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
     LISA_NEW: re.compile(r'E?Lisa New?\b', re.IGNORECASE),
     MANUELA_MARTINEZ: re.compile(fr'Manuela (- Mega Partners|Martinez)', re.IGNORECASE),
     MARIANA_IDZKOWSKA: re.compile(r'Mariana [Il]d[źi]kowska?', re.IGNORECASE),
-    MARK_EPSTEIN: re.compile(r'Mark (L\. )?Epstein', re.IGNORECASE),
+    MARK_EPSTEIN: re.compile(r'Mark (L\. )?(Epstein|Lloyd)', re.IGNORECASE),
     MARC_LEON: re.compile(r'Marc[.\s]+(Kensington|Leon)|Kensington2', re.IGNORECASE),
     MARTIN_NOWAK: re.compile(r'(Martin.*?)?No[vw]ak|Nowak, Martin', re.IGNORECASE),
     MARTIN_WEINBERG: re.compile(r'martin.*?weinberg', re.IGNORECASE),
@@ -113,7 +114,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
     MICHAEL_BUCHHOLTZ: re.compile(r'Michael.*Buchholtz', re.IGNORECASE),
     MICHAEL_MILLER: re.compile(r'Micha(el)? Miller|Miller, Micha(el)?', re.IGNORECASE),
     MICHAEL_SITRICK: re.compile(r'(Mi(chael|ke).{0,5})?[CS]itrick', re.IGNORECASE),
-    MICHAEL_WOLFF: re.compile(r'Michael\s*Wol(f[ef]|i)|Wolff', re.IGNORECASE),
+    MICHAEL_WOLFF: re.compile(r'Michael\s*Wol(f[ef]e?|i)|Wolff', re.IGNORECASE),
     MIROSLAV_LAJCAK: re.compile(r"Miro(slav)?(\s+Laj[cč][aá]k)?"),
     MOHAMED_WAHEED_HASSAN: re.compile(r'Mohamed Waheed(\s+Hassan)?', re.IGNORECASE),
     NADIA_MARCINKO: re.compile(r"Na[dď]i?a\s+Marcinko(v[aá])?", re.IGNORECASE),
@@ -195,6 +196,7 @@ EMAILERS = [
     'Peter Aldhous',
     'Peter Green',
     ROGER_SCHANK,
+    'Roy Black',
     STEVEN_PFEIFFER,
     'Steven Victor MD',
     'Susan Edelman',
@@ -513,7 +515,7 @@ EMAILS_CONFIG = [
         recipients=['George Krassner', 'Nick Kazan', 'Mrisman02', 'Rebecca Risman', 'Linda W. Grossman'],
         duplicate_ids=['031973']
     ),
-    EmailCfg(id='032457', author=PAUL_KRASSNER),  # Bad OCR (nofix)
+    EmailCfg(id='032457', author=PAUL_KRASSNER, recipients=[JEFFREY_EPSTEIN, 'Nancy Cain']),  # Bad OCR (nofix)
     EmailCfg(id='029981', author=PAULA, attribution_reason='Name in reply + opera reference (Fisher now works in opera)'),
     EmailCfg(id='030482', author=PAULA, attribution_reason=PAULA_REASON),
     EmailCfg(id='033383', author=PAUL_PROSPERI, attribution_reason='Reply'),
@@ -643,7 +645,16 @@ EMAILS_CONFIG = [
     EmailCfg(id='029344', actual_text='I thought of you when I read this article. Was this your idea? Alan'),
     EmailCfg(id='032358', actual_text=REDACTED),  # Completely redacted
     EmailCfg(id='033050', actual_text='schwartman'),
+    EmailCfg(id='031036', description=f"{BARBRO_C_EHNBOM} related donation and Swedish girls discussion"),
     EmailCfg(id='022219', description="discussion of attempts to clean up Epstein's Google search results"),
+    EmailCfg(id='030648', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
+    EmailCfg(id='030762', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
+    EmailCfg(id='030649', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
+    EmailCfg(id='026026', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
+    EmailCfg(id='026030', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
+    EmailCfg(id='026033', description="is the 'roger' Epstein is trying to meet Roger Stone?"),
+    EmailCfg(id='031320', description=f"Epstein and {RICHARD_KAHN} appear to be discussing routing donatings through {PEGGY_SIEGAL}"),
+    EmailCfg(id='016693', description='signed "MM"'),
     EmailCfg(id='028524', is_fwded_article=True, description='Zach Braff op-ed on Woody Allen in NYT'),
     EmailCfg(id='031333', is_fwded_article=True, description='Fort Knox conspiracy theory, looks like a Russian disinfo article'),  # Russia Says IMF Chief Jailed For Discovering All US Gold is Gone
     EmailCfg(id='031335', is_fwded_article=True, description='Fort Knox conspiracy theory, looks like a Russian disinfo article'),  # DOMINQUE STRAUSS-KAHN ARRESTED, NOT BECAUSE HE RAPED A MAID, BUT BECAUSE HE HAD EVIDENCE US HAS NO GOLD IN FORT KNOX.
@@ -661,6 +672,9 @@ EMAILS_CONFIG = [
     EmailCfg(id='032023', is_fwded_article=True, duplicate_ids=['032012']),  # American-Israeli Cooperative Enterprise Newsletter
     EmailCfg(id='021758', is_fwded_article=True, duplicate_ids=['030616']),  # Radar Online article about Epstein's early prison release
     EmailCfg(id='033297', is_fwded_article=True, duplicate_ids=['033586']),  # Sultan Sulayem fwding article about Trump and Russia
+    EmailCfg(id='026829', is_fwded_article=True),  # Taxes
+    EmailCfg(id='020443', is_fwded_article=True),  # WSJ Deplorables Bannon
+    EmailCfg(id='030372', is_fwded_article=True),  # Bannon China Iran
     EmailCfg(id='030983', is_fwded_article=True),  # Power Line blog Alex Acosta and Jeffrey Epstein Plea Deal Analysis
     EmailCfg(id='031774', is_fwded_article=True),  # Krassner fwd of Palmer Report article
     EmailCfg(id='033345', is_fwded_article=True),  # Krassner fwd of Palmer Report article
@@ -711,6 +725,8 @@ EMAILS_CONFIG = [
     EmailCfg(id='031340', is_fwded_article=True),  # Article about Alex Jones threatening Robert Mueller
     EmailCfg(id='030209', is_fwded_article=True),  # Atlantic Council  Syria: Blackberry Diplomacy
     EmailCfg(id='026605', is_fwded_article=True),  # Article about Ruemmler turning down attorney general job by NEDRA PICKLER
+    EmailCfg(id='031990', is_fwded_article=True),  # newsmax on ken starr
+    EmailCfg(id='029433', is_fwded_article=True),  # Estate Planning After the Enactment of the Tax Cuts and Jobs Act
     EmailCfg(id='032475', timestamp=parse('2017-02-15 13:31:25')),
     EmailCfg(id='030373', timestamp=parse('2018-10-03 01:49:27')),
@@ -733,12 +749,12 @@ EMAILS_CONFIG = [
     EmailCfg(id='033512', duplicate_ids=['033361']),
     EmailCfg(id='030299', duplicate_ids=['021794']),
     EmailCfg(id='033575', duplicate_ids=['012898']),
-    EmailCfg(id='031428', duplicate_ids=['031388']),
+    EmailCfg(id='031428', is_fwded_article=True, duplicate_ids=['031388']),
     EmailCfg(id='031980', duplicate_ids=['019409']),
     EmailCfg(id='033486', duplicate_ids=['033156']),
     EmailCfg(id='025790', duplicate_ids=['031994']),
     EmailCfg(id='028497', duplicate_ids=['026228']),
-    EmailCfg(id='033528', duplicate_ids=['033517']),
+    EmailCfg(id='033528', is_fwded_article=True, duplicate_ids=['033517']),
     EmailCfg(id='019412', duplicate_ids=['028621']),
     EmailCfg(id='027053', duplicate_ids=['028765']),
     EmailCfg(id='027049', duplicate_ids=['028773']),
@@ -1355,7 +1371,12 @@ OTHER_FILES_FINANCE = [
     DocCfg(id='012048', description=f"{PRESS_RELEASE} 'Rockefeller Partners with Gregory J. Fleming to Create Independent Financial Services Firm' and other articles"),
     # private placement memoranda
-    DocCfg(id='024432', description=f"Michael Milken's Knowledge Universe Education (KUE) $1,000,000 corporate share placement notice (SEC filing?)"),
+    DocCfg(
+        id='024432',
+        date='2006-09-27',
+        description=f"Michael Milken's Knowledge Universe Education (KUE) $1,000,000 corporate share placement notice (SEC filing?)"
+    ),
     DocCfg(id='024003', description=f"New Leaf Ventures ($375 million biotech fund) private placement memorandum"),
 ]
@@ -1689,13 +1710,16 @@ for cfg in ALL_CONFIGS:
 # Email related regexes (have to be here for circular dependencies reasons)
 FORWARDED_LINE_PATTERN = r"-+ ?(Forwarded|Original)\s*Message ?-*|Begin forwarded message:?"
+FRENCH_REPLY_PATTERN = r"Le .* a ecrit:"
+GERMAN_REPLY_PATTERN = r"Am \d\d\.\d\d\..*schrieb.*"
+NORWEGAIN_REPLY_PATTERN = r"(Den .* folgende|(fre|lor|son)\. .* skrev .*):"
 REPLY_LINE_IN_A_MSG_PATTERN = r"In a message dated \d+/\d+/\d+.*writes:"
 REPLY_LINE_ENDING_PATTERN = r"[_ \n](AM|PM|[<_]|wrote:?)"
 REPLY_LINE_ON_NUMERIC_DATE_PATTERN = fr"On \d+/\d+/\d+[, ].*{REPLY_LINE_ENDING_PATTERN}"
 REPLY_LINE_ON_DATE_PATTERN = fr"^On (\d+ )?((Mon|Tues?|Wed(nes)?|Thu(rs)?|Fri|Sat(ur)?|Sun)(day)?|(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*)[, ].*{REPLY_LINE_ENDING_PATTERN}"
-REPLY_LINE_PATTERN = rf"({REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
+REPLY_LINE_PATTERN = rf"({FRENCH_REPLY_PATTERN}|{GERMAN_REPLY_PATTERN}|{NORWEGAIN_REPLY_PATTERN}|{REPLY_LINE_IN_A_MSG_PATTERN}|{REPLY_LINE_ON_NUMERIC_DATE_PATTERN}|{REPLY_LINE_ON_DATE_PATTERN}|{FORWARDED_LINE_PATTERN})"
 REPLY_REGEX = re.compile(REPLY_LINE_PATTERN, re.IGNORECASE | re.MULTILINE)
-SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?((Envoyé de mon|Sent (from|via)).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)', re.M | re.I)
+SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4})?((Envoyé de mon|Sent (from|via)).*(and string|AT&T|Droid|iPad|Phone|Mail|BlackBerry(.*(smartphone|device|Handheld|AT&T|T- ?Mobile))?)\.?)|Co-authored with iPhone auto-correct', re.M | re.I)
 # No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients

epstein_files/util/data.py CHANGED Viewed

@@ -22,6 +22,7 @@ ALL_NAMES = [v for k, v in vars(names).items() if isinstance(v, str) and CONSTAN
 PACIFIC_TZ = tz.gettz("America/Los_Angeles")
 TIMEZONE_INFO = {"PDT": PACIFIC_TZ, "PST": PACIFIC_TZ}  # Suppresses annoying warnings from parse() calls
+all_elements_same = lambda _list: len(_list) == 0 or all(x == _list[0] for x in _list)
 collapse_newlines = lambda text: MULTINEWLINE_REGEX.sub('\n\n', text)
 date_str = lambda dt: dt.isoformat()[0:10] if dt else None
 escape_double_quotes = lambda text: text.replace('"', r'\"')

epstein_files/util/env.py CHANGED Viewed

@@ -38,7 +38,7 @@ output.add_argument('--all-emails', '-ae', action='store_true', help='all the em
 output.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just the interesting ones')
 parser.add_argument('--build', '-b', nargs="?", default=None, const=DEFAULT_FILE, help='write output to HTML file')
 output.add_argument('--email-timeline', action='store_true', help='print a table of all emails in chronological order')
-output.add_argument('--emailers-info', action='store_true', help='write a .png of the eeailers info table')
+output.add_argument('--emailers-info', '-ei', action='store_true', help='write a .png of the eeailers info table')
 output.add_argument('--json-files', action='store_true', help='pretty print all the raw JSON data files in the collection and exit')
 output.add_argument('--json-metadata', action='store_true', help='dump JSON metadata for all files and exit')
 output.add_argument('--output-emails', '-oe', action='store_true', help='generate emails section')
@@ -61,31 +61,34 @@ debug.add_argument('--deep-debug', '-dd', action='store_true', help='set debug l
 debug.add_argument('--json-stats', '-j', action='store_true', help='print JSON formatted stats about the files')
 debug.add_argument('--skip-other-files', '-sof', action='store_true', help='skip parsing non email/text files')
 debug.add_argument('--suppress-logs', '-sl', action='store_true', help='set debug level to FATAL')
+debug.add_argument('--truncate', '-t', type=int, help='truncate emails to this many characters')
+debug.add_argument('--write-txt', '-wt', action='store_true', help='write a plain text version of output')
 # Parse args
 args = parser.parse_args()
 is_html_script = parser.prog in HTML_SCRIPTS
-args.build = args.build
 args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
 args.names = [None if n == 'None' else n.strip() for n in (args.names or [])]
 args.output_emails = args.output_emails or args.all_emails
 args.output_other = args.output_other or args.all_other_files or args.uninteresting
 args.overwrite_pickle = args.overwrite_pickle or (is_env_var_set('OVERWRITE_PICKLE') and not is_env_var_set('PICKLED'))
 args.width = args.width if is_html_script else None
+args.any_output_selected = any([is_output_arg(arg) and val for arg, val in vars(args).items()])
+if not (args.any_output_selected or args.email_timeline or args.emailers_info):
+    logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
+    args.output_emails = args.output_other = args.output_texts = True
 if is_html_script:
     if args.positional_args:
         exit_with_error(f"{parser.prog} does not accept positional arguments (receeived {args.positional_args})")
     if parser.prog == EPSTEIN_GENERATE:
-        if any([is_output_arg(arg) and val for arg, val in vars(args).items()]):
+        if args.any_output_selected:
             if args.email_timeline:
                 exit_with_error(f"--email-timeline option is mutually exlusive with other output options")
-        elif not args.email_timeline and not args.emailers_info:
-            logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
-            args.output_texts = args.output_emails = args.output_other = True
     if args.build == DEFAULT_FILE:
         if args.all_emails:

epstein-files 1.2.1__py3-none-any.whl → 1.2.5__py3-none-any.whl

epstein-files 1.2.1py3-none-any.whl → 1.2.5py3-none-any.whl