epstein-files 1.0.3__tar.gz → 1.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {epstein_files-1.0.3 → epstein_files-1.0.5}/PKG-INFO +38 -22
- epstein_files-1.0.5/README.md +69 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/__init__.py +5 -10
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/documents/document.py +1 -2
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/documents/email.py +15 -10
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/epstein_files.py +18 -18
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/constant/output_files.py +2 -3
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/constant/strings.py +8 -7
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/constant/urls.py +1 -1
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/constants.py +19 -18
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/doc_cfg.py +3 -1
- epstein_files-1.0.5/epstein_files/util/env.py +84 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/highlighted_group.py +4 -3
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/output.py +5 -10
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/rich.py +9 -5
- {epstein_files-1.0.3 → epstein_files-1.0.5}/pyproject.toml +1 -1
- epstein_files-1.0.3/README.md +0 -53
- epstein_files-1.0.3/epstein_files/util/env.py +0 -80
- {epstein_files-1.0.3 → epstein_files-1.0.5}/LICENSE +0 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/documents/communication.py +0 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/documents/emails/email_header.py +0 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/documents/imessage/text_message.py +0 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/documents/json_file.py +0 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/documents/messenger_log.py +0 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/documents/other_file.py +0 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/constant/common_words.py +0 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/constant/html.py +0 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/constant/names.py +0 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/data.py +0 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/file_helper.py +0 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/logging.py +0 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/search_result.py +0 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/timer.py +0 -0
- {epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/util/word_count.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: epstein-files
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.5
|
|
4
4
|
Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
|
|
5
5
|
Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
|
|
6
6
|
License: GPL-3.0-or-later
|
|
@@ -32,35 +32,51 @@ Description-Content-Type: text/markdown
|
|
|
32
32
|
|
|
33
33
|
# I Made Epstein's Text Messages Great Again
|
|
34
34
|
|
|
35
|
+

|
|
36
|
+
|
|
35
37
|
* [I Made Epstein's Text Messages Great Again (And You Should Read Them)](https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great) post on [Substack](https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great)
|
|
36
|
-
* The Epstein text messages (and some of the emails along with summary
|
|
37
|
-
* All of His Emails can be read at another page also generated by this code
|
|
38
|
-
* Word counts for the
|
|
38
|
+
* The Epstein text messages (and some of the emails along with summary information) generated by this code can be viewed [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/).
|
|
39
|
+
* All of His Emails along with descriptions of the 496 files that were neither emails nor text messages can be read at [another page also generated by this code](https://michelcrypt4d4mus.github.io/epstein_text_messages/all_emails_epstein_files_nov_2025.html).
|
|
40
|
+
* Word counts for the communications are [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/communication_word_count_epstein_files_nov_2025.html).
|
|
39
41
|
* Metadata containing what I have figured out about who sent or received the communications in a given file (and a brief explanation for how I figured it out for each file) is deployed [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/file_metadata_epstein_files_nov_2025.json)
|
|
40
|
-
* Configuration variables assigning specific `HOUSE_OVERSIGHT_XXXXXX.txt` file IDs (the `111111` part) as being emails to or from particular people based on various research and contributions can be found in [constants.py](./epstein_files/util/constants.py). Everything in `constants.py`
|
|
42
|
+
* Configuration variables assigning specific `HOUSE_OVERSIGHT_XXXXXX.txt` file IDs (the `111111` part) as being emails to or from particular people based on various research and contributions can be found in [constants.py](./epstein_files/util/constants.py). Everything in `constants.py` appears in the JSON metadata linked above.
|
|
41
43
|
|
|
42
44
|
|
|
43
|
-
|
|
44
|
-
1. Requires you have a local copy of OCR text from the House Oversight document
|
|
45
|
-
1. Dependencies are in [pyproject.toml](./pyproject.toml). Use `poetry install` for easiest time installing. `pip install
|
|
45
|
+
## Usage
|
|
46
|
+
1. Requires you have a local copy of the OCR text files from the House Oversight document release in a directory `/path/to/epstein/ocr_txt_files`. You can download those OCR text files from [the Congressional Google Drive folder](https://drive.google.com/drive/folders/1ldncvdqIf6miiskDp_EDuGSDAaI_fJx8).
|
|
47
|
+
1. Dependencies are in [pyproject.toml](./pyproject.toml). Use `poetry install` for easiest time installing. `pip install epstein-files` should also work, though `pipx install epstein-files` is usually better.
|
|
46
48
|
|
|
47
|
-
You need to set the `
|
|
49
|
+
You need to set the `EPSTEIN_DOCS_DIR` environment variable with the path to the folder of files you just downloaded when running. You can either create a `.env` file modeled on [`.env.example`](./.env.example) (which will set it permanently) or you can run with:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
EPSTEIN_DOCS_DIR=/path/to/epstein/ocr_txt_files epstein_generate --help
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
All the tools that come with the package require `EPSTEIN_DOCS_DIR` to be set. These are the available tools:
|
|
48
56
|
|
|
49
57
|
```bash
|
|
50
58
|
# Generate color highlighted texts/emails/other files
|
|
51
|
-
|
|
59
|
+
epstein_generate
|
|
60
|
+
|
|
61
|
+
# Search for a string:
|
|
62
|
+
epstein_search Bannon
|
|
63
|
+
# Or a regex:
|
|
64
|
+
epstein_search '\bSteve\s*Bannon\b'
|
|
52
65
|
|
|
53
|
-
#
|
|
54
|
-
|
|
66
|
+
# Show a file with color highlighting of keywords
|
|
67
|
+
epstein_show 030999
|
|
68
|
+
# Show both the highlighted and raw versions of the file:
|
|
69
|
+
epstein_show --raw 030999
|
|
70
|
+
# This also works:
|
|
71
|
+
epstein_show HOUSE_OVERSIGHT_030999
|
|
55
72
|
|
|
56
|
-
#
|
|
57
|
-
|
|
58
|
-
# This also works
|
|
59
|
-
DOCS_DIR=/path/to/epstein/ocr_txt_files epstein_show HOUSE_OVERSIGHT_030999
|
|
73
|
+
# Diff two epstein files after all the cleanup (stripping BOMs, matching newline chars, etc):
|
|
74
|
+
epstein_diff 030999 020442
|
|
60
75
|
```
|
|
61
76
|
|
|
77
|
+
The first time you run anything it will take a few minutes to fix all the data, attribute the redacted emails, etc.
|
|
62
78
|
Run `epstein_generate --help` for command line option assistance.
|
|
63
|
-
|
|
79
|
+
|
|
64
80
|
|
|
65
81
|
#### As A Library
|
|
66
82
|
```python
|
|
@@ -69,18 +85,18 @@ epstein_files = EpsteinFiles.get_files()
|
|
|
69
85
|
|
|
70
86
|
# All files
|
|
71
87
|
for document in epstein_files.all_documents():
|
|
72
|
-
do_stuff()
|
|
88
|
+
do_stuff(document)
|
|
73
89
|
|
|
74
90
|
# Emails
|
|
75
91
|
for email in epstein_files.emails:
|
|
76
|
-
do_stuff()
|
|
92
|
+
do_stuff(email)
|
|
77
93
|
|
|
78
94
|
# iMessage Logs
|
|
79
95
|
for imessage_log in epstein_files.imessage_logs:
|
|
80
|
-
do_stuff()
|
|
96
|
+
do_stuff(imessage_log)
|
|
81
97
|
|
|
82
98
|
# Other Files
|
|
83
|
-
for
|
|
84
|
-
do_stuff()
|
|
99
|
+
for file in epstein_files.other_files:
|
|
100
|
+
do_stuff(file)
|
|
85
101
|
```
|
|
86
102
|
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# I Made Epstein's Text Messages Great Again
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+
|
|
5
|
+
* [I Made Epstein's Text Messages Great Again (And You Should Read Them)](https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great) post on [Substack](https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great)
|
|
6
|
+
* The Epstein text messages (and some of the emails along with summary information) generated by this code can be viewed [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/).
|
|
7
|
+
* All of His Emails along with descriptions of the 496 files that were neither emails nor text messages can be read at [another page also generated by this code](https://michelcrypt4d4mus.github.io/epstein_text_messages/all_emails_epstein_files_nov_2025.html).
|
|
8
|
+
* Word counts for the communications are [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/communication_word_count_epstein_files_nov_2025.html).
|
|
9
|
+
* Metadata containing what I have figured out about who sent or received the communications in a given file (and a brief explanation for how I figured it out for each file) is deployed [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/file_metadata_epstein_files_nov_2025.json)
|
|
10
|
+
* Configuration variables assigning specific `HOUSE_OVERSIGHT_XXXXXX.txt` file IDs (the `111111` part) as being emails to or from particular people based on various research and contributions can be found in [constants.py](./epstein_files/util/constants.py). Everything in `constants.py` appears in the JSON metadata linked above.
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
1. Requires you have a local copy of the OCR text files from the House Oversight document release in a directory `/path/to/epstein/ocr_txt_files`. You can download those OCR text files from [the Congressional Google Drive folder](https://drive.google.com/drive/folders/1ldncvdqIf6miiskDp_EDuGSDAaI_fJx8).
|
|
15
|
+
1. Dependencies are in [pyproject.toml](./pyproject.toml). Use `poetry install` for easiest time installing. `pip install epstein-files` should also work, though `pipx install epstein-files` is usually better.
|
|
16
|
+
|
|
17
|
+
You need to set the `EPSTEIN_DOCS_DIR` environment variable with the path to the folder of files you just downloaded when running. You can either create a `.env` file modeled on [`.env.example`](./.env.example) (which will set it permanently) or you can run with:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
EPSTEIN_DOCS_DIR=/path/to/epstein/ocr_txt_files epstein_generate --help
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
All the tools that come with the package require `EPSTEIN_DOCS_DIR` to be set. These are the available tools:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# Generate color highlighted texts/emails/other files
|
|
27
|
+
epstein_generate
|
|
28
|
+
|
|
29
|
+
# Search for a string:
|
|
30
|
+
epstein_search Bannon
|
|
31
|
+
# Or a regex:
|
|
32
|
+
epstein_search '\bSteve\s*Bannon\b'
|
|
33
|
+
|
|
34
|
+
# Show a file with color highlighting of keywords
|
|
35
|
+
epstein_show 030999
|
|
36
|
+
# Show both the highlighted and raw versions of the file:
|
|
37
|
+
epstein_show --raw 030999
|
|
38
|
+
# This also works:
|
|
39
|
+
epstein_show HOUSE_OVERSIGHT_030999
|
|
40
|
+
|
|
41
|
+
# Diff two epstein files after all the cleanup (stripping BOMs, matching newline chars, etc):
|
|
42
|
+
epstein_diff 030999 020442
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
The first time you run anything it will take a few minutes to fix all the data, attribute the redacted emails, etc.
|
|
46
|
+
Run `epstein_generate --help` for command line option assistance.
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
#### As A Library
|
|
50
|
+
```python
|
|
51
|
+
from epstein_files.epstein_files import EpsteinFiles
|
|
52
|
+
epstein_files = EpsteinFiles.get_files()
|
|
53
|
+
|
|
54
|
+
# All files
|
|
55
|
+
for document in epstein_files.all_documents():
|
|
56
|
+
do_stuff(document)
|
|
57
|
+
|
|
58
|
+
# Emails
|
|
59
|
+
for email in epstein_files.emails:
|
|
60
|
+
do_stuff(email)
|
|
61
|
+
|
|
62
|
+
# iMessage Logs
|
|
63
|
+
for imessage_log in epstein_files.imessage_logs:
|
|
64
|
+
do_stuff(imessage_log)
|
|
65
|
+
|
|
66
|
+
# Other Files
|
|
67
|
+
for file in epstein_files.other_files:
|
|
68
|
+
do_stuff(file)
|
|
69
|
+
```
|
|
@@ -75,7 +75,7 @@ def epstein_diff():
|
|
|
75
75
|
def epstein_search():
|
|
76
76
|
"""Search the cleaned up text of the files."""
|
|
77
77
|
_assert_positional_args()
|
|
78
|
-
epstein_files = EpsteinFiles.get_files(
|
|
78
|
+
epstein_files = EpsteinFiles.get_files()
|
|
79
79
|
|
|
80
80
|
for search_term in args.positional_args:
|
|
81
81
|
temp_highlighter = build_highlighter(search_term)
|
|
@@ -103,27 +103,22 @@ def epstein_show():
|
|
|
103
103
|
"""Show the color highlighted file. If --raw arg is passed, show the raw text of the file as well."""
|
|
104
104
|
_assert_positional_args()
|
|
105
105
|
ids = [extract_file_id(arg) for arg in args.positional_args]
|
|
106
|
+
raw_docs = [Document(coerce_file_path(id)) for id in ids]
|
|
107
|
+
docs = [document_cls(doc)(doc.file_path) for doc in raw_docs]
|
|
106
108
|
console.line()
|
|
107
109
|
|
|
108
|
-
if args.pickled:
|
|
109
|
-
epstein_files = EpsteinFiles.get_files(use_pickled=True)
|
|
110
|
-
docs = epstein_files.get_documents_by_id(ids)
|
|
111
|
-
else:
|
|
112
|
-
raw_docs = [Document(coerce_file_path(id)) for id in ids]
|
|
113
|
-
docs = [document_cls(doc)(doc.file_path) for doc in raw_docs]
|
|
114
|
-
|
|
115
110
|
for doc in docs:
|
|
116
111
|
console.line()
|
|
117
112
|
console.print(doc)
|
|
118
113
|
|
|
119
114
|
if args.raw:
|
|
120
115
|
console.line()
|
|
121
|
-
console.print(Panel(f"
|
|
116
|
+
console.print(Panel(f"RAW {doc.filename} RAW", expand=False, style=doc._border_style()))
|
|
122
117
|
console.print(escape(doc.raw_text()))
|
|
123
118
|
|
|
124
119
|
if isinstance(doc, Email):
|
|
125
120
|
console.line()
|
|
126
|
-
console.print(Panel(f"
|
|
121
|
+
console.print(Panel(f"{doc.filename}: actual_text() output", expand=False, style=doc._border_style()))
|
|
127
122
|
console.print(escape(doc._actual_text()))
|
|
128
123
|
|
|
129
124
|
|
|
@@ -85,10 +85,9 @@ class Document:
|
|
|
85
85
|
|
|
86
86
|
if self.is_local_extract_file():
|
|
87
87
|
self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
|
|
88
|
-
cfg_type = type(self.config).__name__ if self.config else None
|
|
89
88
|
|
|
90
89
|
# Coerce FileConfig for court docs etc. to MessageCfg for email files extracted from that document
|
|
91
|
-
if self.class_name() == EMAIL_CLASS and self.config and
|
|
90
|
+
if self.class_name() == EMAIL_CLASS and self.config and not isinstance(self.config, EmailCfg):
|
|
92
91
|
self.config = EmailCfg.from_doc_cfg(self.config)
|
|
93
92
|
else:
|
|
94
93
|
self.url_slug = self.file_path.stem
|
|
@@ -26,7 +26,7 @@ from epstein_files.util.logging import logger
|
|
|
26
26
|
from epstein_files.util.rich import *
|
|
27
27
|
|
|
28
28
|
BAD_FIRST_LINE_REGEX = re.compile(r'^(>>|Grant_Smith066474"eMailContent.htm|LOVE & KISSES)$')
|
|
29
|
-
BAD_LINE_REGEX = re.compile(r'^(>;?|\d{1,2}|Classification: External Communication|Importance:?\s*High|[iI,•]|i (_ )?i|, [-,]|L\._)$')
|
|
29
|
+
BAD_LINE_REGEX = re.compile(r'^(>;?|\d{1,2}|PAGE INTENTIONALLY LEFT BLANK|Classification: External Communication|Importance:?\s*High|[iI,•]|i (_ )?i|, [-,]|L\._)$')
|
|
30
30
|
DETECT_EMAIL_REGEX = re.compile(r'^(.*\n){0,2}From:')
|
|
31
31
|
LINK_LINE_REGEX = re.compile(f"^(> )?htt")
|
|
32
32
|
QUOTED_REPLY_LINE_REGEX = re.compile(r'wrote:\n', re.IGNORECASE)
|
|
@@ -245,12 +245,10 @@ TRUNCATE_TERMS = [
|
|
|
245
245
|
]
|
|
246
246
|
|
|
247
247
|
# Some Paul Krassner emails have a ton of CCed parties we don't care about
|
|
248
|
-
KRASSNER_RECIPIENTS = uniquify(flatten(ALL_FILE_CONFIGS[id].recipients for id in ['025329', '024923', '033568']))
|
|
248
|
+
KRASSNER_RECIPIENTS = uniquify(flatten([ALL_FILE_CONFIGS[id].recipients for id in ['025329', '024923', '033568']]))
|
|
249
249
|
|
|
250
250
|
# No point in ever displaying these; their emails show up elsewhere because they're mostly CC recipients
|
|
251
|
-
USELESS_EMAILERS =
|
|
252
|
-
KRASSNER_RECIPIENTS + \
|
|
253
|
-
FLIGHT_IN_2012_PEOPLE + [
|
|
251
|
+
USELESS_EMAILERS = FLIGHT_IN_2012_PEOPLE + IRAN_DEAL_RECIPIENTS + KRASSNER_RECIPIENTS + [
|
|
254
252
|
'Alan Rogers', # Random CC
|
|
255
253
|
'Andrew Friendly', # Presumably some relation of Kelly Friendly
|
|
256
254
|
'BS Stern', # A random fwd of email we have
|
|
@@ -322,11 +320,18 @@ class Email(Communication):
|
|
|
322
320
|
def __post_init__(self):
|
|
323
321
|
super().__post_init__()
|
|
324
322
|
|
|
325
|
-
|
|
326
|
-
self.
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
self.recipients
|
|
323
|
+
try:
|
|
324
|
+
if self.config and self.config.recipients:
|
|
325
|
+
self.recipients = cast(list[str | None], self.config.recipients)
|
|
326
|
+
else:
|
|
327
|
+
for recipient in self.header.recipients():
|
|
328
|
+
self.recipients.extend(self._get_names(recipient))
|
|
329
|
+
except Exception as e:
|
|
330
|
+
console.print_exception()
|
|
331
|
+
console.line(2)
|
|
332
|
+
logger.fatal(f"Failed on {self.file_id}")
|
|
333
|
+
console.line(2)
|
|
334
|
+
raise e
|
|
330
335
|
|
|
331
336
|
# Remove self CCs
|
|
332
337
|
recipients = [r for r in self.recipients if r != self.author or self.file_id in SELF_EMAILS_FILE_IDS]
|
|
@@ -19,7 +19,6 @@ from epstein_files.documents.emails.email_header import AUTHOR
|
|
|
19
19
|
from epstein_files.documents.json_file import JsonFile
|
|
20
20
|
from epstein_files.documents.messenger_log import MSG_REGEX, MessengerLog
|
|
21
21
|
from epstein_files.documents.other_file import OtherFile
|
|
22
|
-
from epstein_files.util.constant.output_files import PICKLED_PATH
|
|
23
22
|
from epstein_files.util.constant.strings import *
|
|
24
23
|
from epstein_files.util.constant.urls import (EPSTEIN_MEDIA, EPSTEIN_WEB, JMAIL, epstein_media_person_url,
|
|
25
24
|
epsteinify_name_url, epstein_web_person_url, search_jmail_url, search_twitter_url)
|
|
@@ -35,9 +34,10 @@ from epstein_files.util.rich import (DEFAULT_NAME_STYLE, NA_TXT, add_cols_to_tab
|
|
|
35
34
|
from epstein_files.util.search_result import SearchResult
|
|
36
35
|
from epstein_files.util.timer import Timer
|
|
37
36
|
|
|
37
|
+
EXCLUDED_EMAILERS = [e.lower() for e in (USELESS_EMAILERS + [JEFFREY_EPSTEIN])]
|
|
38
|
+
PICKLED_PATH = Path("the_epstein_files.pkl.gz")
|
|
38
39
|
DEVICE_SIGNATURE = 'Device Signature'
|
|
39
40
|
DEVICE_SIGNATURE_PADDING = (1, 0)
|
|
40
|
-
NOT_INCLUDED_EMAILERS = [e.lower() for e in (USELESS_EMAILERS + [JEFFREY_EPSTEIN])]
|
|
41
41
|
SLOW_FILE_SECONDS = 1.0
|
|
42
42
|
|
|
43
43
|
INVALID_FOR_EPSTEIN_WEB = JUNK_EMAILERS + KRASSNER_RECIPIENTS + [
|
|
@@ -94,23 +94,23 @@ class EpsteinFiles:
|
|
|
94
94
|
self._tally_email_data()
|
|
95
95
|
|
|
96
96
|
@classmethod
|
|
97
|
-
def get_files(cls, timer: Timer | None = None
|
|
97
|
+
def get_files(cls, timer: Timer | None = None) -> 'EpsteinFiles':
|
|
98
98
|
"""Alternate constructor that reads/writes a pickled version of the data ('timer' arg is for logging)."""
|
|
99
99
|
timer = timer or Timer()
|
|
100
100
|
|
|
101
|
-
if
|
|
101
|
+
if PICKLED_PATH.exists() and not args.overwrite_pickle:
|
|
102
102
|
with gzip.open(PICKLED_PATH, 'rb') as file:
|
|
103
103
|
epstein_files = pickle.load(file)
|
|
104
104
|
timer.print_at_checkpoint(f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})")
|
|
105
105
|
epstein_files.timer = timer
|
|
106
106
|
return epstein_files
|
|
107
107
|
|
|
108
|
+
logger.warning(f"Building new cache file, this will take a few minutes...")
|
|
108
109
|
epstein_files = EpsteinFiles(timer=timer)
|
|
109
110
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
logger.warning(f"Pickled data to '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})...")
|
|
111
|
+
with gzip.open(PICKLED_PATH, 'wb') as file:
|
|
112
|
+
pickle.dump(epstein_files, file)
|
|
113
|
+
logger.warning(f"Pickled data to '{PICKLED_PATH}' ({file_size_str(PICKLED_PATH)})...")
|
|
114
114
|
|
|
115
115
|
timer.print_at_checkpoint(f'Processed {len(epstein_files.all_files):,} documents')
|
|
116
116
|
return epstein_files
|
|
@@ -119,9 +119,9 @@ class EpsteinFiles:
|
|
|
119
119
|
return self.imessage_logs + self.emails + self.other_files
|
|
120
120
|
|
|
121
121
|
def all_emailers(self, include_useless: bool = False) -> list[str | None]:
|
|
122
|
-
"""Returns all emailers except Epstein and
|
|
122
|
+
"""Returns all emailers except Epstein and EXCLUDED_EMAILERS, sorted from least frequent to most."""
|
|
123
123
|
names = [a for a in self.email_author_counts.keys()] + [r for r in self.email_recipient_counts.keys()]
|
|
124
|
-
names = names if include_useless else [e for e in names if e is None or e.lower() not in
|
|
124
|
+
names = names if include_useless else [e for e in names if e is None or e.lower() not in EXCLUDED_EMAILERS]
|
|
125
125
|
return sorted(list(set(names)), key=lambda e: self.email_author_counts[e] + self.email_recipient_counts[e])
|
|
126
126
|
|
|
127
127
|
def attributed_email_count(self) -> int:
|
|
@@ -200,10 +200,10 @@ class EpsteinFiles:
|
|
|
200
200
|
def json_metadata(self) -> str:
|
|
201
201
|
"""Create a JSON string containing metadata for all the files."""
|
|
202
202
|
metadata = {
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
203
|
+
Email.__name__: _sorted_metadata(self.emails),
|
|
204
|
+
JsonFile.__name__: _sorted_metadata(self.json_files),
|
|
205
|
+
MessengerLog.__name__: _sorted_metadata(self.imessage_logs),
|
|
206
|
+
OtherFile.__name__: _sorted_metadata(self.non_json_other_files()),
|
|
207
207
|
}
|
|
208
208
|
|
|
209
209
|
return json.dumps(metadata, indent=4, sort_keys=True)
|
|
@@ -372,12 +372,12 @@ def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
|
|
|
372
372
|
return counts
|
|
373
373
|
|
|
374
374
|
|
|
375
|
-
def document_cls(
|
|
376
|
-
search_area =
|
|
375
|
+
def document_cls(doc: Document) -> Type[Document]:
|
|
376
|
+
search_area = doc.text[0:5000] # Limit search area to avoid pointless scans of huge files
|
|
377
377
|
|
|
378
|
-
if
|
|
378
|
+
if doc.text[0] == '{':
|
|
379
379
|
return JsonFile
|
|
380
|
-
elif isinstance(
|
|
380
|
+
elif isinstance(doc.config, EmailCfg) or (DETECT_EMAIL_REGEX.match(search_area) and doc.config is None):
|
|
381
381
|
return Email
|
|
382
382
|
elif MSG_REGEX.search(search_area):
|
|
383
383
|
return MessengerLog
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
|
-
PICKLED_PATH = Path("the_epstein_files.pkl.gz")
|
|
4
|
-
|
|
5
|
-
EPSTEIN_FILES_NOV_2025 = 'epstein_files_nov_2025'
|
|
6
3
|
URLS_ENV = '.urls.env'
|
|
7
4
|
|
|
5
|
+
# Files output by the code
|
|
8
6
|
HTML_DIR = Path('docs')
|
|
7
|
+
EPSTEIN_FILES_NOV_2025 = 'epstein_files_nov_2025'
|
|
9
8
|
ALL_EMAILS_PATH = HTML_DIR.joinpath(f'all_emails_{EPSTEIN_FILES_NOV_2025}.html')
|
|
10
9
|
JSON_METADATA_PATH = HTML_DIR.joinpath(f'file_metadata_{EPSTEIN_FILES_NOV_2025}.json')
|
|
11
10
|
TEXT_MSGS_HTML_PATH = HTML_DIR.joinpath('index.html')
|
|
@@ -2,13 +2,6 @@ import re
|
|
|
2
2
|
from typing import Literal
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
# Document subclass names (this sucks)
|
|
6
|
-
DOCUMENT_CLASS = 'Document'
|
|
7
|
-
EMAIL_CLASS = 'Email'
|
|
8
|
-
JSON_FILE_CLASS = 'JsonFile'
|
|
9
|
-
MESSENGER_LOG_CLASS = 'MessengerLog'
|
|
10
|
-
OTHER_FILE_CLASS = 'OtherFile'
|
|
11
|
-
|
|
12
5
|
# categories
|
|
13
6
|
ACADEMIA = 'academia'
|
|
14
7
|
ARTS = 'arts'
|
|
@@ -27,6 +20,7 @@ POLITICS = 'politics'
|
|
|
27
20
|
PROPERTY = 'property'
|
|
28
21
|
PUBLICIST = 'publicist'
|
|
29
22
|
REPUTATION = 'reputation'
|
|
23
|
+
SKYPE_LOG= 'skype log'
|
|
30
24
|
SOCIAL = 'social'
|
|
31
25
|
SPEECH = 'speech'
|
|
32
26
|
|
|
@@ -76,5 +70,12 @@ FILE_STEM_REGEX = re.compile(fr"{HOUSE_OVERSIGHT_PREFIX}(\d{{6}}(_\d{{1,2}})?)")
|
|
|
76
70
|
FILE_NAME_REGEX = re.compile(fr"{FILE_STEM_REGEX.pattern}(\.txt(\.json)?)?")
|
|
77
71
|
QUESTION_MARKS_REGEX = re.compile(fr' {re.escape(QUESTION_MARKS)}$')
|
|
78
72
|
|
|
73
|
+
# Document subclass names (this sucks)
|
|
74
|
+
DOCUMENT_CLASS = 'Document'
|
|
75
|
+
EMAIL_CLASS = 'Email'
|
|
76
|
+
JSON_FILE_CLASS = 'JsonFile'
|
|
77
|
+
MESSENGER_LOG_CLASS = 'MessengerLog'
|
|
78
|
+
OTHER_FILE_CLASS = 'OtherFile'
|
|
79
|
+
|
|
79
80
|
|
|
80
81
|
remove_question_marks = lambda name: QUESTION_MARKS_REGEX.sub('', name)
|
|
@@ -47,7 +47,7 @@ extracted_file_url = lambda f: f"{EXTRACTS_BASE_URL}/{f}"
|
|
|
47
47
|
COFFEEZILLA_ARCHIVE_URL = 'https://journaliststudio.google.com/pinpoint/search?collection=061ce61c9e70bdfd'
|
|
48
48
|
COURIER_NEWSROOM_ARCHIVE_URL = 'https://journaliststudio.google.com/pinpoint/search?collection=092314e384a58618'
|
|
49
49
|
EPSTEINIFY_URL = 'https://epsteinify.com'
|
|
50
|
-
EPSTEIN_MEDIA_URL = 'https://
|
|
50
|
+
EPSTEIN_MEDIA_URL = 'https://epstein.media'
|
|
51
51
|
EPSTEIN_WEB_URL = 'https://epsteinweb.org'
|
|
52
52
|
JMAIL_URL = 'https://jmail.world'
|
|
53
53
|
OVERSIGHT_REPUBLICANS_PRESSER_URL = 'https://oversight.house.gov/release/oversight-committee-releases-additional-epstein-estate-documents/'
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from copy import deepcopy
|
|
3
|
+
from typing import cast
|
|
3
4
|
|
|
4
5
|
from dateutil.parser import parse
|
|
5
6
|
|
|
@@ -84,7 +85,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
84
85
|
JAMES_HILL: re.compile(r"hill, james e.|james.e.hill@abc.com", re.IGNORECASE),
|
|
85
86
|
JEAN_LUC_BRUNEL: re.compile(r'Jean[- ]Luc Brunel?', re.IGNORECASE),
|
|
86
87
|
JEFF_FULLER: re.compile(r"jeff@mc2mm.com|Jeff Fuller", re.IGNORECASE),
|
|
87
|
-
JEFFREY_EPSTEIN: re.compile(r'[djl]ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeffrey E((sp|ps)tein?)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!Mark L. )Epstein', re.IGNORECASE),
|
|
88
|
+
JEFFREY_EPSTEIN: re.compile(r'[djl]\s?ee[vy]acation[©@]?g?(mail.com)?|Epstine|\bJEE?\b|Jeffrey E((sp|ps)tein?)?|jeeproject@yahoo.com|J Jep|Jeffery Edwards|(?<!Mark L. )Epstein', re.IGNORECASE),
|
|
88
89
|
JESSICA_CADWELL: re.compile(r'Jessica Cadwell?', re.IGNORECASE),
|
|
89
90
|
JOHNNY_EL_HACHEM: re.compile(r'el hachem johnny|johnny el hachem', re.IGNORECASE),
|
|
90
91
|
JOI_ITO: re.compile(r'ji@media.mit.?edu|(joichi|joi)( Ito)?', re.IGNORECASE),
|
|
@@ -94,7 +95,7 @@ EMAILER_ID_REGEXES: dict[str, re.Pattern] = {
|
|
|
94
95
|
LANDON_THOMAS: re.compile(r'lando[nr] thomas( jr)?|thomas jr.?, lando[nr]', re.IGNORECASE),
|
|
95
96
|
LARRY_SUMMERS: re.compile(r'(La(wrence|rry).{1,5})?Summers?|^LH$|LHS|Ihsofficel', re.IGNORECASE),
|
|
96
97
|
LAWRANCE_VISOSKI: re.compile(r'La(rry|wrance) Visoski?|Lvjet', re.IGNORECASE),
|
|
97
|
-
LAWRENCE_KRAUSS: re.compile(r'Lawrence Kraus|
|
|
98
|
+
LAWRENCE_KRAUSS: re.compile(r'Lawrence Kraus|[jl]awkrauss', re.IGNORECASE),
|
|
98
99
|
LEON_BLACK: re.compile(r'Leon Black?', re.IGNORECASE),
|
|
99
100
|
MANUELA_MARTINEZ: re.compile(fr'Manuela (- Mega Partners|Martinez)', re.IGNORECASE),
|
|
100
101
|
MARIANA_IDZKOWSKA: re.compile(r'Mariana [Il]d[źi]kowska?', re.IGNORECASE),
|
|
@@ -268,7 +269,7 @@ SHIMON_POST = 'The Shimon Post'
|
|
|
268
269
|
SHIMON_POST_ARTICLE = f'selection of articles about the mideast'
|
|
269
270
|
SINGLE_PAGE = 'single page of'
|
|
270
271
|
STRANGE_BEDFELLOWS = "'Strange Bedfellows' list of invitees f. Johnny Depp, Woody Allen, Obama, and more"
|
|
271
|
-
SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit"
|
|
272
|
+
SWEDISH_LIFE_SCIENCES_SUMMIT = f"{BARBRO_C_EHNBOM}'s Swedish American Life Science Summit (SALSS)"
|
|
272
273
|
THE_REAL_DEAL_ARTICLE = 'article by Keith Larsen'
|
|
273
274
|
TRUMP_DISCLOSURES = f"Donald Trump financial disclosures from U.S. Office of Government Ethics"
|
|
274
275
|
UBS_CIO_REPORT = 'CIO Monthly Extended report'
|
|
@@ -371,8 +372,8 @@ TEXTS_CONFIG = CONFIRMED_TEXTS_CONFIG + UNCONFIRMED_TEXTS_CONFIG
|
|
|
371
372
|
########################################################################################################
|
|
372
373
|
|
|
373
374
|
# Some emails have a lot of uninteresting CCs
|
|
374
|
-
|
|
375
|
-
FLIGHT_IN_2012_PEOPLE
|
|
375
|
+
IRAN_DEAL_RECIPIENTS = ['Allen West', 'Rafael Bardaji', 'Philip Kafka', 'Herb Goodman', 'Grant Seeger', 'Lisa Albert', 'Janet Kafka', 'James Ramsey', 'ACT for America', 'John Zouzelka', 'Joel Dunn', 'Nate McClain', 'Bennet Greenwald', 'Taal Safdie', 'Uri Fouzailov', 'Neil Anderson', 'Nate White', 'Rita Hortenstine', 'Henry Hortenstine', 'Gary Gross', 'Forrest Miller', 'Bennett Schmidt', 'Val Sherman', 'Marcie Brown', 'Michael Horowitz', 'Marshall Funk']
|
|
376
|
+
FLIGHT_IN_2012_PEOPLE = ['Francis Derby', 'Januiz Banasiak', 'Louella Rabuyo', 'Richard Barnnet']
|
|
376
377
|
|
|
377
378
|
EMAILS_CONFIG = [
|
|
378
379
|
EmailCfg(id='032436', author=ALIREZA_ITTIHADIEH, attribution_reason='Signature'),
|
|
@@ -491,9 +492,6 @@ EMAILS_CONFIG = [
|
|
|
491
492
|
EmailCfg(id='032727', author=KATHRYN_RUEMMLER, attribution_reason=KATHY_REASON, is_attribution_uncertain=True),
|
|
492
493
|
EmailCfg(id='030478', author=LANDON_THOMAS),
|
|
493
494
|
EmailCfg(id='029013', author=LARRY_SUMMERS, recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
|
|
494
|
-
EmailCfg(id='032206', author=LAWRENCE_KRAUSS), # More of a text convo?
|
|
495
|
-
EmailCfg(id='032208', author=LAWRENCE_KRAUSS, recipients=[JEFFREY_EPSTEIN]), # More of a text convo?
|
|
496
|
-
EmailCfg(id='032209', author=LAWRENCE_KRAUSS, recipients=[JEFFREY_EPSTEIN]), # More of a text convo?
|
|
497
495
|
EmailCfg(id='029196', author=LAWRENCE_KRAUSS, recipients=[JEFFREY_EPSTEIN], actual_text='Talk in 40?'),
|
|
498
496
|
EmailCfg(id='033593', author=LAWRANCE_VISOSKI, attribution_reason='Signature'),
|
|
499
497
|
EmailCfg(id='033370', author=LAWRANCE_VISOSKI, attribution_reason=LARRY_REASON),
|
|
@@ -575,7 +573,7 @@ EMAILS_CONFIG = [
|
|
|
575
573
|
attribution_reason='ends with "Respectfully, terry"',
|
|
576
574
|
author=TERRY_KAFKA,
|
|
577
575
|
fwded_text_after='From: Mike Cohen',
|
|
578
|
-
recipients=[JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] +
|
|
576
|
+
recipients=[JEFFREY_EPSTEIN, MARK_EPSTEIN, MICHAEL_BUCHHOLTZ] + IRAN_DEAL_RECIPIENTS,
|
|
579
577
|
duplicate_ids=['028482'],
|
|
580
578
|
),
|
|
581
579
|
EmailCfg(id='029992', author=TERRY_KAFKA, attribution_reason='Quoted reply'),
|
|
@@ -600,7 +598,6 @@ EMAILS_CONFIG = [
|
|
|
600
598
|
EmailCfg(id='022202', recipients=[JEAN_LUC_BRUNEL], attribution_reason='Follow up / reply', duplicate_ids=['029975']),
|
|
601
599
|
EmailCfg(id='022187', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
|
|
602
600
|
EmailCfg(id='031489', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (unfixable)
|
|
603
|
-
EmailCfg(id='032210', recipients=[JEFFREY_EPSTEIN]), # More of a text convo?
|
|
604
601
|
EmailCfg(id='030347', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
|
|
605
602
|
EmailCfg(id='030367', recipients=[JEFFREY_EPSTEIN]), # Bad OCR (nofix)
|
|
606
603
|
EmailCfg(id='033274', recipients=[JEFFREY_EPSTEIN]), # this is a note sent to self
|
|
@@ -751,7 +748,7 @@ EMAILS_CONFIG = [
|
|
|
751
748
|
EmailCfg(id='031118', duplicate_ids=['019465']),
|
|
752
749
|
EmailCfg(id='031912', duplicate_ids=['032158']),
|
|
753
750
|
EmailCfg(id='030587', duplicate_ids=['030514']),
|
|
754
|
-
EmailCfg(id='029773', duplicate_ids=['012685']),
|
|
751
|
+
EmailCfg(id='029773', duplicate_ids=['012685'], fwded_text_after='Omar Quadhafi'),
|
|
755
752
|
EmailCfg(id='033297', duplicate_ids=['033586']),
|
|
756
753
|
EmailCfg(id='031089', duplicate_ids=['018084']),
|
|
757
754
|
EmailCfg(id='031088', duplicate_ids=['030885']),
|
|
@@ -1195,7 +1192,7 @@ OTHER_FILES_CONFERENCES = [
|
|
|
1195
1192
|
DocCfg(id='019300', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} f. {KATHRYN_RUEMMLER}', date='2019-04-05'),
|
|
1196
1193
|
DocCfg(id='022267', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} founder essay about growing the seminar business'),
|
|
1197
1194
|
DocCfg(id='022407', author=SVETLANA_POZHIDAEVA, description=f'{WOMEN_EMPOWERMENT} seminar pitch deck'),
|
|
1198
|
-
DocCfg(id='017524', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2012 program"),
|
|
1195
|
+
DocCfg(id='017524', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2012 program emailed to epstein BY {BARBRO_C_EHNBOM} in 031226", date='2012-08-18'),
|
|
1199
1196
|
DocCfg(id='026747', author=SWEDISH_LIFE_SCIENCES_SUMMIT, description=f"2017 program", date='2017-08-23'),
|
|
1200
1197
|
DocCfg(id='014951', author='TED Talks', description=f"2017 program", date='2017-04-20'),
|
|
1201
1198
|
DocCfg(id='024179', author=UN_GENERAL_ASSEMBLY, description=f'president and first lady schedule', date='2012-09-21'),
|
|
@@ -1326,7 +1323,7 @@ OTHER_FILES_LETTERS = [
|
|
|
1326
1323
|
]
|
|
1327
1324
|
|
|
1328
1325
|
OTHER_FILES_PROPERTY = [
|
|
1329
|
-
DocCfg(id='026759', author='Great Bay Condominium Owners Association', description=f'{PRESS_RELEASE}
|
|
1326
|
+
DocCfg(id='026759', author='Great Bay Condominium Owners Association', description=f'{PRESS_RELEASE} about Hurricane Irma damage', date='2017-09-13'),
|
|
1330
1327
|
DocCfg(id='016602', author=PALM_BEACH_CODE_ENFORCEMENT, description='board minutes', date='2008-04-17'),
|
|
1331
1328
|
DocCfg(id='016554', author=PALM_BEACH_CODE_ENFORCEMENT, description='board minutes', date='2008-07-17', duplicate_ids=['016616', '016574']),
|
|
1332
1329
|
DocCfg(id='027068', author=THE_REAL_DEAL, description=f"{THE_REAL_DEAL_ARTICLE} Palm House Hotel Bankruptcy and EB-5 Visa Fraud Allegations"),
|
|
@@ -1379,8 +1376,8 @@ OTHER_FILES_SOCIAL = [
|
|
|
1379
1376
|
]
|
|
1380
1377
|
|
|
1381
1378
|
OTHER_FILES_POLITICS = [
|
|
1382
|
-
DocCfg(id='029918', author=DIANA_DEGETTE_CAMPAIGN, description=f"bio", date='2012-
|
|
1383
|
-
DocCfg(id='031184', author=DIANA_DEGETTE_CAMPAIGN, description=f"fundraiser
|
|
1379
|
+
DocCfg(id='029918', author=DIANA_DEGETTE_CAMPAIGN, description=f"bio", date='2012-09-27'),
|
|
1380
|
+
DocCfg(id='031184', author=DIANA_DEGETTE_CAMPAIGN, description=f"invitation to fundraiser hosted by {BARBRO_C_EHNBOM}", date='2012-09-27'),
|
|
1384
1381
|
DocCfg(id='026827', author='Scowcroft Group', description=f'report on ISIS', date='2015-11-14'),
|
|
1385
1382
|
DocCfg(id='024294', author=STACEY_PLASKETT, description=f"campaign flier", date='2016-10-01'),
|
|
1386
1383
|
DocCfg(
|
|
@@ -1482,6 +1479,11 @@ OTHER_FILES_ARTS = [
|
|
|
1482
1479
|
OTHER_FILES_MISC = [
|
|
1483
1480
|
DocCfg(id='022780', category=FLIGHT_LOGS),
|
|
1484
1481
|
DocCfg(id='022816', category=FLIGHT_LOGS),
|
|
1482
|
+
DocCfg(id='032206', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
|
|
1483
|
+
DocCfg(id='032208', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
|
|
1484
|
+
DocCfg(id='032209', category=SKYPE_LOG, author=LAWRENCE_KRAUSS),
|
|
1485
|
+
DocCfg(id='018224', category=SKYPE_LOG, author=LAWRENCE_KRAUSS, description=f'conversations with linkspirit (French?) and {LAWRENCE_KRAUSS}'),
|
|
1486
|
+
DocCfg(id='032210', category=SKYPE_LOG, description=f'conversation with linkspirit'),
|
|
1485
1487
|
DocCfg(
|
|
1486
1488
|
id='025147',
|
|
1487
1489
|
author=BROCKMAN_INC,
|
|
@@ -1496,7 +1498,6 @@ OTHER_FILES_MISC = [
|
|
|
1496
1498
|
DocCfg(id='027074', author=FEMALE_HEALTH_COMPANY, description=f"pitch deck (USAID was a customer)"),
|
|
1497
1499
|
DocCfg(id='032735', author=GORDON_GETTY, description=f"on Trump", date='2018-03-20'), # Dated based on concurrent emails from Getty
|
|
1498
1500
|
DocCfg(id='025540', author=JEFFREY_EPSTEIN, description=f"rough draft of Epstein's side of the story?"),
|
|
1499
|
-
DocCfg(id='018224', author=LAWRENCE_KRAUSS, description=f"Skype conversation log"),
|
|
1500
1501
|
DocCfg(id='026634', author='Michael Carrier', description=f"comments about an Apollo linked hedge fund 'DE Fund VIII'"),
|
|
1501
1502
|
DocCfg(id='031425', author=SCOTT_J_LINK, description=f'completely redacted email from'),
|
|
1502
1503
|
DocCfg(id='020447', author='Working Group on Chinese Influence Activities in the U.S.', description=f'Promoting Constructive Vigilance'),
|
|
@@ -1589,8 +1590,8 @@ SENT_FROM_REGEX = re.compile(r'^(?:(Please forgive|Sorry for all the) typos.{1,4
|
|
|
1589
1590
|
|
|
1590
1591
|
|
|
1591
1592
|
# Error checking.
|
|
1592
|
-
if len(OTHER_FILES_CONFIG) !=
|
|
1593
|
-
logger.warning(f"
|
|
1593
|
+
if len(OTHER_FILES_CONFIG) != 442:
|
|
1594
|
+
logger.warning(f"Found {len(OTHER_FILES_CONFIG)} configured other files!")
|
|
1594
1595
|
|
|
1595
1596
|
encountered_file_ids = set()
|
|
1596
1597
|
|
|
@@ -109,7 +109,9 @@ class DocCfg:
|
|
|
109
109
|
|
|
110
110
|
def info_str(self) -> str | None:
|
|
111
111
|
"""String that summarizes what is known about this document."""
|
|
112
|
-
if self.category
|
|
112
|
+
if self.category and not self.description:
|
|
113
|
+
return self.category
|
|
114
|
+
elif self.category == REPUTATION:
|
|
113
115
|
return f"{REPUTATION_MGMT}: {self.description}"
|
|
114
116
|
elif self.author and self.description:
|
|
115
117
|
if self.category in [ACADEMIA, BOOK]:
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from argparse import ArgumentParser
|
|
3
|
+
from os import environ
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from sys import argv
|
|
6
|
+
|
|
7
|
+
from epstein_files.util.logging import datefinder_logger, env_log_level, logger
|
|
8
|
+
|
|
9
|
+
COUNT_WORDS_SCRIPT = 'count_words.py'
|
|
10
|
+
DEFAULT_WIDTH = 145
|
|
11
|
+
HTML_SCRIPTS = ['epstein_generate', 'generate_html.py', COUNT_WORDS_SCRIPT]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
parser = ArgumentParser(description="Parse epstein OCR docs and generate HTML page.")
|
|
15
|
+
parser.add_argument('--name', '-n', action='append', dest='names', help='specify the name(s) whose communications should be output')
|
|
16
|
+
parser.add_argument('--overwrite-pickle', '-op', action='store_true', help='ovewrite cached EpsteinFiles')
|
|
17
|
+
|
|
18
|
+
output = parser.add_argument_group('OUTPUT')
|
|
19
|
+
output.add_argument('--all-emails', '-ae', action='store_true', help='all the emails instead of just the interesting ones')
|
|
20
|
+
output.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just the interesting ones')
|
|
21
|
+
output.add_argument('--build', '-b', action='store_true', help='write output to HTML file')
|
|
22
|
+
output.add_argument('--make-clean', '-mc', action='store_true', help='delete all build artifact HTML and JSON files')
|
|
23
|
+
output.add_argument('--output-emails', '-oe', action='store_true', help='generate other files section')
|
|
24
|
+
output.add_argument('--output-other-files', '-oo', action='store_true', help='generate other files section')
|
|
25
|
+
output.add_argument('--output-texts', '-ot', action='store_true', help='generate other files section')
|
|
26
|
+
output.add_argument('--suppress-output', action='store_true', help='no output to terminal (use with --build)')
|
|
27
|
+
output.add_argument('--width', '-w', type=int, default=DEFAULT_WIDTH, help='screen width to use (in characters)')
|
|
28
|
+
output.add_argument('--use-epstein-web-links', action='store_true', help='use epsteinweb.org links instead of epstein.media')
|
|
29
|
+
|
|
30
|
+
scripts = parser.add_argument_group('SCRIPTS', 'Arguments used only by epstein_search, epstein_show, epstein_diff')
|
|
31
|
+
scripts.add_argument('positional_args', nargs='*', help='strings to searchs for, file IDs to show or diff, etc.')
|
|
32
|
+
scripts.add_argument('--raw', '-r', action='store_true', help='show raw contents of file (only used by scripts)')
|
|
33
|
+
scripts.add_argument('--whole-file', '-wf', action='store_true', help='print whole file (only used by epstein_search)')
|
|
34
|
+
|
|
35
|
+
debug = parser.add_argument_group('DEBUG')
|
|
36
|
+
debug.add_argument('--colors-only', '-c', action='store_true', help='print header with color key table and links and exit')
|
|
37
|
+
debug.add_argument('--debug', '-d', action='store_true', help='set debug level to INFO')
|
|
38
|
+
debug.add_argument('--deep-debug', '-dd', action='store_true', help='set debug level to DEBUG')
|
|
39
|
+
debug.add_argument('--json-metadata', '-jm', action='store_true', help='dump JSON metadata for all files')
|
|
40
|
+
debug.add_argument('--json-stats', '-j', action='store_true', help='print JSON formatted stats at the end')
|
|
41
|
+
debug.add_argument('--sort-alphabetical', action='store_true', help='sort emailers alphabetically in counts table')
|
|
42
|
+
debug.add_argument('--suppress-logs', '-sl', action='store_true', help='set debug level to FATAL')
|
|
43
|
+
args = parser.parse_args()
|
|
44
|
+
|
|
45
|
+
current_script = Path(argv[0]).name
|
|
46
|
+
is_env_var_set = lambda s: len(environ.get(s) or '') > 0
|
|
47
|
+
is_html_script = current_script in HTML_SCRIPTS
|
|
48
|
+
|
|
49
|
+
args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
|
|
50
|
+
args.output_emails = args.output_emails or args.all_emails
|
|
51
|
+
args.output_other_files = args.output_other_files or args.all_other_files
|
|
52
|
+
args.overwrite_pickle = args.overwrite_pickle or (is_env_var_set('OVERWRITE_PICKLE') and not is_env_var_set('PICKLED'))
|
|
53
|
+
args.width = args.width if is_html_script else None
|
|
54
|
+
specified_names: list[str | None] = [None if n == 'None' else n for n in (args.names or [])]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# Log level args
|
|
58
|
+
if args.deep_debug:
|
|
59
|
+
logger.setLevel(logging.DEBUG)
|
|
60
|
+
elif args.debug:
|
|
61
|
+
logger.setLevel(logging.INFO)
|
|
62
|
+
elif args.suppress_logs:
|
|
63
|
+
logger.setLevel(logging.FATAL)
|
|
64
|
+
elif not env_log_level:
|
|
65
|
+
logger.setLevel(logging.WARNING)
|
|
66
|
+
|
|
67
|
+
logger.info(f'Log level set to {logger.level}...')
|
|
68
|
+
datefinder_logger.setLevel(logger.level)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# Massage args that depend on other args to the appropriate state
|
|
72
|
+
if not (args.json_metadata or args.output_texts or args.output_emails or args.output_other_files):
|
|
73
|
+
if is_html_script and current_script != COUNT_WORDS_SCRIPT and not args.make_clean and not args.colors_only:
|
|
74
|
+
logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
|
|
75
|
+
|
|
76
|
+
args.output_texts = True
|
|
77
|
+
args.output_emails = True
|
|
78
|
+
args.output_other_files = True
|
|
79
|
+
|
|
80
|
+
if args.use_epstein_web_links:
|
|
81
|
+
logger.warning(f"Using links to epsteinweb.org links instead of epsteinify.com...")
|
|
82
|
+
|
|
83
|
+
if args.debug:
|
|
84
|
+
logger.warning(f"Invocation args:\ncurrent_script={current_script}\nis_html_script={is_html_script},\nspecified_names={specified_names},\nargs={args}")
|
|
@@ -159,7 +159,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
159
159
|
pattern=r'Gruterite|(John\s*)?Kluge|Marc Rich|(Mi(chael|ke)\s*)?Ovitz|(Steve\s+)?Wynn|(Les(lie)?\s+)?Wexner|SALSS|Swedish[-\s]*American\s*Life\s*Science\s*Summit|Valhi|(Yves\s*)?Bouvier',
|
|
160
160
|
emailers = {
|
|
161
161
|
ALIREZA_ITTIHADIEH: 'CEO Freestream Aircraft Limited',
|
|
162
|
-
BARBRO_C_EHNBOM: 'Swedish pharmaceuticals',
|
|
162
|
+
BARBRO_C_EHNBOM: 'Swedish pharmaceuticals, SALSS',
|
|
163
163
|
FRED_HADDAD: "co-founder of Heck's in West Virginia",
|
|
164
164
|
GERALD_BARTON: "Maryland property developer Landmark Land Company, fan of Trump's Irish golf course",
|
|
165
165
|
GORDON_GETTY: 'heir of oil tycoon J. Paul Getty',
|
|
@@ -296,6 +296,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
296
296
|
emailers = {
|
|
297
297
|
DAVID_STERN: f'emailed Epstein from Moscow, appears to know chairman of {DEUTSCHE_BANK}',
|
|
298
298
|
JONATHAN_FARKAS: "heir to the Alexander's department store fortune",
|
|
299
|
+
'linkspirit': "Skype username of someone Epstein communicated with",
|
|
299
300
|
'Peter Thomas Roth': 'student of Epstein at Dalton, skincare company founder',
|
|
300
301
|
STEPHEN_HANSON: None,
|
|
301
302
|
TOM_BARRACK: 'long time friend of Trump',
|
|
@@ -304,7 +305,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
304
305
|
HighlightedNames(
|
|
305
306
|
label='finance',
|
|
306
307
|
style='green',
|
|
307
|
-
pattern=r'Apollo|Ari\s*Glass|(Bernie\s*)?Madoff|Black(rock|stone)|
|
|
308
|
+
pattern=r'Apollo|Ari\s*Glass|Bank|(Bernie\s*)?Madoff|Black(rock|stone)|B\s*of\s*A|Boothbay(\sFund\sManagement)?|Chase\s*Bank|Credit\s*Suisse|DB|Deutsche\s*(Asset|Bank)|Electron\s*Capital\s*(Partners)?|Fenner|FRBNY|Goldman(\s*Sachs)|HSBC|Invesco|(Janet\s*)?Yellen|(Jerome\s*)?Powell(?!M\. Cabot)|(Jimmy\s*)?Cayne|JPMC?|j\.?p\.?\s*morgan(\.?com|\s*Chase)?|Madoff|Merrill(\s*Lynch)?|(Michael\s*)?(Cembalest|Milken)|Mizrahi\s*Bank|MLPF&S|(money\s+)?launder(s?|ers?|ing)?(\s+money)?|Morgan Stanley|(Peter L. )?Scher|(Ray\s*)?Dalio|Schwartz?man|Serageldin|UBS|us.gio@jpmorgan.com',
|
|
308
309
|
emailers={
|
|
309
310
|
AMANDA_ENS: 'Citigroup',
|
|
310
311
|
DANIEL_SABBA: 'UBS Investment Bank',
|
|
@@ -587,7 +588,7 @@ HIGHLIGHTED_NAMES = [
|
|
|
587
588
|
HighlightedText(
|
|
588
589
|
label='phone_number',
|
|
589
590
|
style='bright_green',
|
|
590
|
-
pattern=r"\+?(1?\(?\d{3}\)?[- ]\d{3}[- ]\d{4}|\d{2}[- ]\(?0?\)?\d{2}[- ]\d{4}[- ]\d{4})
|
|
591
|
+
pattern=r"\+?(1?\(?\d{3}\)?[- ]\d{3}[- ]\d{4}|\d{2}[- ]\(?0?\)?\d{2}[- ]\d{4}[- ]\d{4})|\b[\d+]{10,12}\b",
|
|
591
592
|
),
|
|
592
593
|
]
|
|
593
594
|
|
|
@@ -7,7 +7,6 @@ from epstein_files.util.constant.output_files import JSON_METADATA_PATH
|
|
|
7
7
|
from epstein_files.util.constant import urls
|
|
8
8
|
from epstein_files.util.constant.html import *
|
|
9
9
|
from epstein_files.util.constant.names import *
|
|
10
|
-
from epstein_files.util.constant.strings import EMAIL_CLASS, MESSENGER_LOG_CLASS
|
|
11
10
|
from epstein_files.util.data import dict_sets_to_lists
|
|
12
11
|
from epstein_files.util.env import args, specified_names
|
|
13
12
|
from epstein_files.util.logging import log_file_write, logger
|
|
@@ -122,9 +121,9 @@ def print_json_metadata(epstein_files: EpsteinFiles) -> None:
|
|
|
122
121
|
def print_json_stats(epstein_files: EpsteinFiles) -> None:
|
|
123
122
|
console.line(5)
|
|
124
123
|
console.print(Panel('JSON Stats Dump', expand=True, style='reverse bold'), '\n')
|
|
125
|
-
print_json(f"
|
|
126
|
-
print_json(f"
|
|
127
|
-
print_json(f"
|
|
124
|
+
print_json(f"MessengerLog Sender Counts", MessengerLog.count_authors(epstein_files.imessage_logs), skip_falsey=True)
|
|
125
|
+
print_json(f"Email Author Counts", epstein_files.email_author_counts, skip_falsey=True)
|
|
126
|
+
print_json(f"Email Recipient Counts", epstein_files.email_recipient_counts, skip_falsey=True)
|
|
128
127
|
print_json("Email signature_substitution_countss", epstein_files.email_signature_substitution_counts(), skip_falsey=True)
|
|
129
128
|
print_json("email_author_device_signatures", dict_sets_to_lists(epstein_files.email_authors_to_device_signatures))
|
|
130
129
|
print_json("email_sent_from_devices", dict_sets_to_lists(epstein_files.email_device_signatures_to_authors))
|
|
@@ -147,16 +146,12 @@ def print_text_messages(epstein_files: EpsteinFiles) -> None:
|
|
|
147
146
|
|
|
148
147
|
def write_urls() -> None:
|
|
149
148
|
"""Write _URL style constant variables to a file bash scripts can load as env vars."""
|
|
150
|
-
if args.output_file == 'index.html':
|
|
151
|
-
logger.warning(f"Can't write env vars to '{args.output_file}', writing to '{URLS_ENV}' instead.\n")
|
|
152
|
-
args.output_file = URLS_ENV
|
|
153
|
-
|
|
154
149
|
url_vars = {
|
|
155
150
|
k: v for k, v in vars(urls).items()
|
|
156
151
|
if isinstance(v, str) and k.split('_')[-1] in ['URL'] and 'github.io' in v and 'BASE' not in k
|
|
157
152
|
}
|
|
158
153
|
|
|
159
|
-
with open(
|
|
154
|
+
with open(URLS_ENV, 'w') as f:
|
|
160
155
|
for var_name, url in url_vars.items():
|
|
161
156
|
key_value = f"{var_name}='{url}'"
|
|
162
157
|
|
|
@@ -166,7 +161,7 @@ def write_urls() -> None:
|
|
|
166
161
|
f.write(f"{key_value}\n")
|
|
167
162
|
|
|
168
163
|
console.line()
|
|
169
|
-
logger.warning(f"Wrote {len(url_vars)} URL variables to '{
|
|
164
|
+
logger.warning(f"Wrote {len(url_vars)} URL variables to '{URLS_ENV}'\n")
|
|
170
165
|
|
|
171
166
|
|
|
172
167
|
def _verify_all_emails_were_printed(epstein_files: EpsteinFiles, already_printed_emails: list[Email]) -> None:
|
|
@@ -231,10 +231,13 @@ def print_other_site_link(is_header: bool = True) -> None:
|
|
|
231
231
|
other_site_msg += f" Epstein's {other_site_type}s also generated by this code"
|
|
232
232
|
markup_msg = link_markup(SITE_URLS[other_site_type], other_site_msg, OTHER_SITE_LINK_STYLE)
|
|
233
233
|
print_centered(parenthesize(Text.from_markup(markup_msg)), style='bold')
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
234
|
+
|
|
235
|
+
if is_header:
|
|
236
|
+
metadata_link = link_text_obj(JSON_METADATA_URL, 'metadata with author attribution explanations', OTHER_SITE_LINK_STYLE)
|
|
237
|
+
print_centered(parenthesize(metadata_link))
|
|
238
|
+
word_count_link = link_text_obj(WORD_COUNT_URL, 'most frequently used words', OTHER_SITE_LINK_STYLE)
|
|
239
|
+
print_centered(parenthesize(word_count_link))
|
|
240
|
+
print_centered(parenthesize(link_text_obj(GH_PROJECT_URL, '@github', 'dark_orange3 bold')))
|
|
238
241
|
|
|
239
242
|
|
|
240
243
|
def print_page_title(expand: bool = True, width: int | None = None) -> None:
|
|
@@ -247,8 +250,8 @@ def print_page_title(expand: bool = True, width: int | None = None) -> None:
|
|
|
247
250
|
def print_panel(msg: str, style: str = 'black on white', padding: tuple | None = None, centered: bool = False) -> None:
|
|
248
251
|
_padding: list[int] = list(padding or [0, 0, 0, 0])
|
|
249
252
|
_padding[2] += 1 # Bottom pad
|
|
250
|
-
panel = Panel(Text.from_markup(msg, justify='center'), width=70, style=style)
|
|
251
253
|
actual_padding: tuple[int, int, int, int] = tuple(_padding)
|
|
254
|
+
panel = Panel(Text.from_markup(msg, justify='center'), width=70, style=style)
|
|
252
255
|
|
|
253
256
|
if centered:
|
|
254
257
|
console.print(Align.center(Padding(panel, actual_padding)))
|
|
@@ -335,6 +338,7 @@ def _print_external_links() -> None:
|
|
|
335
338
|
print_centered(link_markup(COURIER_NEWSROOM_ARCHIVE_URL, 'Searchable Archive') + " (Courier Newsroom)")
|
|
336
339
|
print_centered(link_markup(EPSTEINIFY_URL) + " (raw document images)")
|
|
337
340
|
print_centered(link_markup(EPSTEIN_WEB_URL) + " (character summaries)")
|
|
341
|
+
print_centered(link_markup(EPSTEIN_MEDIA_URL) + " (raw document images)")
|
|
338
342
|
|
|
339
343
|
|
|
340
344
|
# if args.deep_debug:
|
epstein_files-1.0.3/README.md
DELETED
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
# I Made Epstein's Text Messages Great Again
|
|
2
|
-
|
|
3
|
-
* [I Made Epstein's Text Messages Great Again (And You Should Read Them)](https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great) post on [Substack](https://cryptadamus.substack.com/p/i-made-epsteins-text-messages-great)
|
|
4
|
-
* The Epstein text messages (and some of the emails along with summary counts of sent emails to/from Epstein) generated by this code can be viewed [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/).
|
|
5
|
-
* All of His Emails can be read at another page also generated by this code [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/all_emails_epstein_files_nov_2025.html).
|
|
6
|
-
* Word counts for the emails and text messages are [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/communication_word_count_epstein_files_nov_2025.html).
|
|
7
|
-
* Metadata containing what I have figured out about who sent or received the communications in a given file (and a brief explanation for how I figured it out for each file) is deployed [here](https://michelcrypt4d4mus.github.io/epstein_text_messages/file_metadata_epstein_files_nov_2025.json)
|
|
8
|
-
* Configuration variables assigning specific `HOUSE_OVERSIGHT_XXXXXX.txt` file IDs (the `111111` part) as being emails to or from particular people based on various research and contributions can be found in [constants.py](./epstein_files/util/constants.py). Everything in `constants.py` should also appear in the JSON metadata.
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
### Usage
|
|
12
|
-
1. Requires you have a local copy of OCR text from the House Oversight document dump in a directory `/path/to/epstein/ocr_txt_files`. You can download them from [the Congressional Google Drive folder](https://drive.google.com/drive/folders/1ldncvdqIf6miiskDp_EDuGSDAaI_fJx8).
|
|
13
|
-
1. Dependencies are in [pyproject.toml](./pyproject.toml). Use `poetry install` for easiest time installing. `pip install .` may or may not work.
|
|
14
|
-
|
|
15
|
-
You need to set the `DOCS_DIR` environment variable with the path to the folder of files you just downloaded when running. You can either create a `.env` file modeled on [`.env.example`](./.env.example) (which will set it permanently) or you can run with:
|
|
16
|
-
|
|
17
|
-
```bash
|
|
18
|
-
# Generate color highlighted texts/emails/other files
|
|
19
|
-
DOCS_DIR=/path/to/epstein/ocr_txt_files epstein_generate
|
|
20
|
-
|
|
21
|
-
# Search
|
|
22
|
-
DOCS_DIR=/path/to/epstein/ocr_txt_files epstein_search Bannon
|
|
23
|
-
|
|
24
|
-
# Show a color highlighted file
|
|
25
|
-
DOCS_DIR=/path/to/epstein/ocr_txt_files epstein_show 030999
|
|
26
|
-
# This also works
|
|
27
|
-
DOCS_DIR=/path/to/epstein/ocr_txt_files epstein_show HOUSE_OVERSIGHT_030999
|
|
28
|
-
```
|
|
29
|
-
|
|
30
|
-
Run `epstein_generate --help` for command line option assistance.
|
|
31
|
-
The first time you run anything it will take a few minutes to fix all the data, attribute the redacted emails, etc. Once you've run things once you can run the `epstein_generate --pickled` to load the cached fixed up data and things will be quick.
|
|
32
|
-
|
|
33
|
-
#### As A Library
|
|
34
|
-
```python
|
|
35
|
-
from epstein_files.epstein_files import EpsteinFiles
|
|
36
|
-
epstein_files = EpsteinFiles.get_files()
|
|
37
|
-
|
|
38
|
-
# All files
|
|
39
|
-
for document in epstein_files.all_documents():
|
|
40
|
-
do_stuff()
|
|
41
|
-
|
|
42
|
-
# Emails
|
|
43
|
-
for email in epstein_files.emails:
|
|
44
|
-
do_stuff()
|
|
45
|
-
|
|
46
|
-
# iMessage Logs
|
|
47
|
-
for imessage_log in epstein_files.imessage_logs:
|
|
48
|
-
do_stuff()
|
|
49
|
-
|
|
50
|
-
# Other Files
|
|
51
|
-
for document in epstein_files.other_files:
|
|
52
|
-
do_stuff()
|
|
53
|
-
```
|
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from argparse import ArgumentParser
|
|
3
|
-
from os import environ
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from sys import argv
|
|
6
|
-
|
|
7
|
-
from epstein_files.util.logging import datefinder_logger, env_log_level, logger
|
|
8
|
-
|
|
9
|
-
COUNT_WORDS_SCRIPT = 'count_words.py'
|
|
10
|
-
DEFAULT_WIDTH = 154
|
|
11
|
-
HTML_SCRIPTS = ['epstein_generate', 'generate_html.py', COUNT_WORDS_SCRIPT]
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
parser = ArgumentParser(description="Parse epstein OCR docs and generate HTML page.")
|
|
15
|
-
parser.add_argument('--build', '-b', action='store_true', help='write output to file')
|
|
16
|
-
parser.add_argument('--all-emails', '-ae', action='store_true', help='all the emails instead of just the interesting ones')
|
|
17
|
-
parser.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just interesting ones')
|
|
18
|
-
parser.add_argument('--colors-only', '-c', action='store_true', help='print header with color key table and links and exit')
|
|
19
|
-
parser.add_argument('--name', '-n', action='append', dest='names', help='specify the name(s) whose communications should be output')
|
|
20
|
-
parser.add_argument('--output-file', '-out', metavar='FILE', default='index.html', help='write output to FILE in docs/ (default=index.html)')
|
|
21
|
-
parser.add_argument('--output-emails', '-oe', action='store_true', help='generate other files section')
|
|
22
|
-
parser.add_argument('--output-other-files', '-oo', action='store_true', help='generate other files section')
|
|
23
|
-
parser.add_argument('--output-texts', '-ot', action='store_true', help='generate other files section')
|
|
24
|
-
parser.add_argument('--pickled', '-p', action='store_true', help='use pickled EpsteinFiles object')
|
|
25
|
-
parser.add_argument('--overwrite-pickle', '-op', action='store_true', help='generate new pickled EpsteinFiles object')
|
|
26
|
-
parser.add_argument('--raw', '-r', action='store_true', help='show raw contents of file (only used by scripts)')
|
|
27
|
-
parser.add_argument('--sort-alphabetical', '-alpha', action='store_true', help='sort emailers alphabetically in counts table')
|
|
28
|
-
parser.add_argument('--suppress-output', '-s', action='store_true', help='no output to terminal (use with --build)')
|
|
29
|
-
parser.add_argument('--use-epstein-web-links', '-use', action='store_true', help='use epsteinweb.org links instead of epstein.media')
|
|
30
|
-
parser.add_argument('--width', '-w', type=int, default=DEFAULT_WIDTH, help='screen width to use')
|
|
31
|
-
parser.add_argument('--whole-file', '-wf', action='store_true', help='print whole file (only used by search script)')
|
|
32
|
-
parser.add_argument('--debug', '-d', action='store_true', help='set debug level to INFO')
|
|
33
|
-
parser.add_argument('--deep-debug', '-dd', action='store_true', help='set debug level to DEBUG')
|
|
34
|
-
parser.add_argument('--make-clean', '-mc', action='store_true', help='delete all build artifact HTML and JSON files')
|
|
35
|
-
parser.add_argument('--suppress-logs', '-sl', action='store_true', help='set debug level to FATAL')
|
|
36
|
-
parser.add_argument('--json-metadata', '-jm', action='store_true', help='dump JSON metadata for all files')
|
|
37
|
-
parser.add_argument('--json-stats', '-j', action='store_true', help='print JSON formatted stats at the end')
|
|
38
|
-
parser.add_argument('positional_args', nargs='*', help='Optional args (only used by helper scripts)')
|
|
39
|
-
args = parser.parse_args()
|
|
40
|
-
|
|
41
|
-
current_script = Path(argv[0]).name
|
|
42
|
-
is_env_var_set = lambda s: len(environ.get(s) or '') > 0
|
|
43
|
-
is_html_script = current_script in HTML_SCRIPTS
|
|
44
|
-
|
|
45
|
-
args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
|
|
46
|
-
args.output_emails = args.output_emails or args.all_emails
|
|
47
|
-
args.output_other_files = args.output_other_files or args.all_other_files
|
|
48
|
-
args.pickled = args.pickled or is_env_var_set('PICKLED') or args.colors_only or len(args.names or []) > 0
|
|
49
|
-
args.width = args.width if is_html_script else None
|
|
50
|
-
specified_names: list[str | None] = [None if n == 'None' else n for n in (args.names or [])]
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
# Log level args
|
|
54
|
-
if args.deep_debug:
|
|
55
|
-
logger.setLevel(logging.DEBUG)
|
|
56
|
-
elif args.debug:
|
|
57
|
-
logger.setLevel(logging.INFO)
|
|
58
|
-
elif args.suppress_logs:
|
|
59
|
-
logger.setLevel(logging.FATAL)
|
|
60
|
-
elif not env_log_level:
|
|
61
|
-
logger.setLevel(logging.WARNING)
|
|
62
|
-
|
|
63
|
-
logger.info(f'Log level set to {logger.level}...')
|
|
64
|
-
datefinder_logger.setLevel(logger.level)
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
# Massage args that depend on other args to the appropriate state
|
|
68
|
-
if not (args.json_metadata or args.output_texts or args.output_emails or args.output_other_files):
|
|
69
|
-
if is_html_script and current_script != COUNT_WORDS_SCRIPT and not args.make_clean:
|
|
70
|
-
logger.warning(f"No output section chosen; outputting default of texts, selected emails, and other files...")
|
|
71
|
-
|
|
72
|
-
args.output_texts = True
|
|
73
|
-
args.output_emails = True
|
|
74
|
-
args.output_other_files = True
|
|
75
|
-
|
|
76
|
-
if args.use_epstein_web_links:
|
|
77
|
-
logger.warning(f"Using links to epsteinweb.org links instead of epsteinify.com...")
|
|
78
|
-
|
|
79
|
-
if args.debug:
|
|
80
|
-
logger.warning(f"Invocation args:\nis_html_script={is_html_script},\nspecified_names={specified_names},\nargs={args}")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{epstein_files-1.0.3 → epstein_files-1.0.5}/epstein_files/documents/imessage/text_message.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|