epstein-files 1.0.10__tar.gz → 1.0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {epstein_files-1.0.10 → epstein_files-1.0.11}/PKG-INFO +1 -1
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/__init__.py +4 -6
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/document.py +92 -49
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/email.py +7 -4
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/imessage/text_message.py +3 -12
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/json_file.py +13 -1
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/messenger_log.py +32 -19
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/other_file.py +66 -43
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/epstein_files.py +22 -15
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/constant/names.py +2 -2
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/constants.py +84 -78
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/doc_cfg.py +17 -25
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/env.py +29 -17
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/file_helper.py +13 -24
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/highlighted_group.py +22 -14
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/logging.py +0 -6
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/output.py +12 -7
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/rich.py +15 -10
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/word_count.py +65 -5
- {epstein_files-1.0.10 → epstein_files-1.0.11}/pyproject.toml +1 -1
- epstein_files-1.0.10/epstein_files/count_words.py +0 -72
- {epstein_files-1.0.10 → epstein_files-1.0.11}/LICENSE +0 -0
- {epstein_files-1.0.10 → epstein_files-1.0.11}/README.md +0 -0
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/communication.py +0 -0
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/emails/email_header.py +0 -0
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/constant/common_words.py +0 -0
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/constant/html.py +0 -0
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/constant/output_files.py +0 -0
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/constant/strings.py +0 -0
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/constant/urls.py +0 -0
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/data.py +0 -0
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/search_result.py +0 -0
- {epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/util/timer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: epstein-files
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.11
|
|
4
4
|
Summary: Tools for working with the Jeffrey Epstein documents released in November 2025.
|
|
5
5
|
Home-page: https://michelcrypt4d4mus.github.io/epstein_text_messages/
|
|
6
6
|
License: GPL-3.0-or-later
|
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
"""
|
|
3
3
|
Reformat Epstein text message files for readability and count email senders.
|
|
4
|
-
For use with iMessage log files from https://drive.google.com/drive/folders/1hTNH5woIRio578onLGElkTWofUSWRoH_
|
|
5
4
|
|
|
6
|
-
Install: 'poetry install'
|
|
7
5
|
Run: 'EPSTEIN_DOCS_DIR=/path/to/TXT epstein_generate'
|
|
8
6
|
"""
|
|
9
7
|
from sys import exit
|
|
@@ -15,7 +13,6 @@ from rich.padding import Padding
|
|
|
15
13
|
from rich.panel import Panel
|
|
16
14
|
from rich.text import Text
|
|
17
15
|
|
|
18
|
-
from epstein_files.count_words import write_word_counts_html
|
|
19
16
|
from epstein_files.epstein_files import EpsteinFiles, document_cls
|
|
20
17
|
from epstein_files.documents.document import INFO_PADDING, Document
|
|
21
18
|
from epstein_files.documents.email import Email
|
|
@@ -27,6 +24,7 @@ from epstein_files.util.output import (print_emails, print_json_files, print_jso
|
|
|
27
24
|
print_text_messages, write_urls)
|
|
28
25
|
from epstein_files.util.rich import build_highlighter, console, print_header, print_panel, write_html
|
|
29
26
|
from epstein_files.util.timer import Timer
|
|
27
|
+
from epstein_files.util.word_count import write_word_counts_html
|
|
30
28
|
|
|
31
29
|
|
|
32
30
|
def generate_html() -> None:
|
|
@@ -41,7 +39,7 @@ def generate_html() -> None:
|
|
|
41
39
|
if args.json_metadata:
|
|
42
40
|
print_json_metadata(epstein_files)
|
|
43
41
|
exit()
|
|
44
|
-
elif args.
|
|
42
|
+
elif args.json_files:
|
|
45
43
|
print_json_files(epstein_files)
|
|
46
44
|
exit()
|
|
47
45
|
|
|
@@ -58,7 +56,7 @@ def generate_html() -> None:
|
|
|
58
56
|
emails_printed = print_emails(epstein_files)
|
|
59
57
|
timer.print_at_checkpoint(f"Printed {emails_printed:,} emails")
|
|
60
58
|
|
|
61
|
-
if args.
|
|
59
|
+
if args.output_other:
|
|
62
60
|
files_printed = epstein_files.print_other_files_table()
|
|
63
61
|
timer.print_at_checkpoint(f"Printed {len(files_printed)} other files")
|
|
64
62
|
|
|
@@ -96,7 +94,7 @@ def epstein_search():
|
|
|
96
94
|
|
|
97
95
|
console.print(search_result.document)
|
|
98
96
|
else:
|
|
99
|
-
console.print(search_result.document.
|
|
97
|
+
console.print(search_result.document.summary_panel())
|
|
100
98
|
|
|
101
99
|
for matching_line in search_result.lines:
|
|
102
100
|
line_txt = matching_line.__rich__()
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import re
|
|
3
|
+
from copy import deepcopy
|
|
3
4
|
from dataclasses import asdict, dataclass, field
|
|
4
5
|
from datetime import datetime
|
|
5
6
|
from pathlib import Path
|
|
@@ -15,13 +16,13 @@ from epstein_files.util.constant.names import *
|
|
|
15
16
|
from epstein_files.util.constant.strings import *
|
|
16
17
|
from epstein_files.util.constant.urls import *
|
|
17
18
|
from epstein_files.util.constants import ALL_FILE_CONFIGS, FALLBACK_TIMESTAMP
|
|
18
|
-
from epstein_files.util.data import collapse_newlines, date_str, iso_timestamp,
|
|
19
|
+
from epstein_files.util.data import collapse_newlines, date_str, iso_timestamp, patternize, without_falsey
|
|
19
20
|
from epstein_files.util.doc_cfg import EmailCfg, DocCfg, Metadata, TextCfg
|
|
20
|
-
from epstein_files.util.env import args
|
|
21
|
-
from epstein_files.util.file_helper import (
|
|
21
|
+
from epstein_files.util.env import DOCS_DIR, args
|
|
22
|
+
from epstein_files.util.file_helper import (file_stem_for_id, extract_file_id, file_size,
|
|
22
23
|
file_size_str, is_local_extract_file)
|
|
23
24
|
from epstein_files.util.logging import DOC_TYPE_STYLES, FILENAME_STYLE, logger
|
|
24
|
-
from epstein_files.util.rich import SYMBOL_STYLE, console, highlighter, key_value_txt, link_text_obj
|
|
25
|
+
from epstein_files.util.rich import INFO_STYLE, SYMBOL_STYLE, console, highlighter, key_value_txt, link_text_obj
|
|
25
26
|
from epstein_files.util.search_result import MatchedLine
|
|
26
27
|
|
|
27
28
|
CLOSE_PROPERTIES_CHAR = ']'
|
|
@@ -33,6 +34,7 @@ MIN_DOCUMENT_ID = 10477
|
|
|
33
34
|
LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
|
|
34
35
|
WHITESPACE_REGEX = re.compile(r"\s{2,}|\t|\n", re.MULTILINE)
|
|
35
36
|
|
|
37
|
+
EXTRACTED_FROM = 'Extracted from'
|
|
36
38
|
MIN_TIMESTAMP = datetime(1991, 1, 1)
|
|
37
39
|
MID_TIMESTAMP = datetime(2007, 1, 1)
|
|
38
40
|
MAX_TIMESTAMP = datetime(2020, 1, 1)
|
|
@@ -59,14 +61,27 @@ OCR_REPAIRS = {
|
|
|
59
61
|
|
|
60
62
|
@dataclass
|
|
61
63
|
class Document:
|
|
62
|
-
"""
|
|
64
|
+
"""
|
|
65
|
+
Base class for all Epstein Files documents.
|
|
66
|
+
|
|
67
|
+
Attributes:
|
|
68
|
+
file_path (Path): Local path to file
|
|
69
|
+
author (str | None): Who is responsible for the text in the file
|
|
70
|
+
config (DocCfg): Information about this fil
|
|
71
|
+
file_id (str): 6 digit (or 8 digits if it's a local extract file) string ID
|
|
72
|
+
filename (str): File's basename
|
|
73
|
+
length (int): Number of characters in the file after all the cleanup
|
|
74
|
+
lines (str): Number of lines in the file after all the cleanup
|
|
75
|
+
text (str): Contents of the file
|
|
76
|
+
timestamp (datetime | None): When the file was originally created
|
|
77
|
+
url_slug (str): Version of the filename that works in links to epsteinify etc.
|
|
78
|
+
"""
|
|
63
79
|
file_path: Path
|
|
64
80
|
# Optional fields
|
|
65
81
|
author: str | None = None
|
|
66
82
|
config: EmailCfg | DocCfg | TextCfg | None = None
|
|
67
83
|
file_id: str = field(init=False)
|
|
68
84
|
filename: str = field(init=False)
|
|
69
|
-
is_duplicate: bool = False
|
|
70
85
|
length: int = field(init=False)
|
|
71
86
|
lines: list[str] = field(init=False)
|
|
72
87
|
num_lines: int = field(init=False)
|
|
@@ -74,21 +89,21 @@ class Document:
|
|
|
74
89
|
timestamp: datetime | None = None
|
|
75
90
|
url_slug: str = field(init=False) # e.g. 'HOUSE_OVERSIGHT_123456
|
|
76
91
|
|
|
77
|
-
# Class
|
|
78
|
-
|
|
92
|
+
# Class variables
|
|
93
|
+
include_description_in_summary_panel: ClassVar[bool] = False
|
|
94
|
+
strip_whitespace: ClassVar[bool] = True # Overridden in JsonFile
|
|
79
95
|
|
|
80
96
|
def __post_init__(self):
|
|
81
97
|
self.filename = self.file_path.name
|
|
82
98
|
self.file_id = extract_file_id(self.filename)
|
|
83
|
-
self.config = ALL_FILE_CONFIGS.get(self.file_id)
|
|
84
|
-
self.is_duplicate = bool(self.config.dupe_of_id) if self.config else False
|
|
99
|
+
self.config = deepcopy(ALL_FILE_CONFIGS.get(self.file_id))
|
|
85
100
|
|
|
86
101
|
if self.is_local_extract_file():
|
|
87
102
|
self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
|
|
103
|
+
extracted_from_doc_id = self.url_slug.split('_')[-1]
|
|
88
104
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
self.config = EmailCfg.from_doc_cfg(self.config)
|
|
105
|
+
if extracted_from_doc_id in ALL_FILE_CONFIGS:
|
|
106
|
+
self._set_extract_config(deepcopy(ALL_FILE_CONFIGS[extracted_from_doc_id]))
|
|
92
107
|
else:
|
|
93
108
|
self.url_slug = self.file_path.stem
|
|
94
109
|
|
|
@@ -97,11 +112,7 @@ class Document:
|
|
|
97
112
|
self._extract_author()
|
|
98
113
|
self.timestamp = self._extract_timestamp()
|
|
99
114
|
|
|
100
|
-
def
|
|
101
|
-
"""Annoying workaround for circular import issues and isinstance()."""
|
|
102
|
-
return str(type(self).__name__)
|
|
103
|
-
|
|
104
|
-
def configured_description(self) -> str | None:
|
|
115
|
+
def config_description(self) -> str | None:
|
|
105
116
|
"""Overloaded in OtherFile."""
|
|
106
117
|
if self.config and self.config.description:
|
|
107
118
|
return f"({self.config.description})"
|
|
@@ -109,40 +120,32 @@ class Document:
|
|
|
109
120
|
def date_str(self) -> str | None:
|
|
110
121
|
return date_str(self.timestamp)
|
|
111
122
|
|
|
112
|
-
def description_panel(self, include_hints: bool = False) -> Panel:
|
|
113
|
-
"""Panelized description() with info_txt(), used in search results."""
|
|
114
|
-
hints = [Text('', style='italic').append(h) for h in (self.hints() if include_hints else [])]
|
|
115
|
-
return Panel(Group(*([self.summary()] + hints)), border_style=self.document_type_style(), expand=False)
|
|
116
|
-
|
|
117
|
-
def document_type_style(self) -> str:
|
|
118
|
-
return DOC_TYPE_STYLES[self.class_name()]
|
|
119
|
-
|
|
120
123
|
def duplicate_file_txt(self) -> Text:
|
|
121
124
|
"""If the file is a dupe make a nice message to explain what file it's a duplicate of."""
|
|
122
125
|
if not self.config or not self.config.dupe_of_id:
|
|
123
126
|
raise RuntimeError(f"duplicate_file_txt() called on {self.summary()} but not a dupe! config:\n\n{self.config}")
|
|
124
127
|
|
|
125
|
-
txt = Text(f"Not showing ", style=
|
|
128
|
+
txt = Text(f"Not showing ", style=INFO_STYLE).append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
|
|
126
129
|
txt.append(f" because it's {self.config.duplicate_reason()} ")
|
|
127
130
|
return txt.append(epstein_media_doc_link_txt(self.config.dupe_of_id, style='royal_blue1'))
|
|
128
131
|
|
|
129
132
|
def epsteinify_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
|
|
130
133
|
"""Create a Text obj link to this document on epsteinify.com."""
|
|
131
|
-
return link_text_obj(epsteinify_doc_url(self.url_slug), link_txt or self.
|
|
134
|
+
return link_text_obj(epsteinify_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
|
|
132
135
|
|
|
133
136
|
def epstein_media_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
|
|
134
137
|
"""Create a Text obj link to this document on epstein.media."""
|
|
135
|
-
return link_text_obj(epstein_media_doc_url(self.url_slug), link_txt or self.
|
|
138
|
+
return link_text_obj(epstein_media_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
|
|
136
139
|
|
|
137
140
|
def epstein_web_link(self, style: str = ARCHIVE_LINK_COLOR, link_txt: str | None = None) -> Text:
|
|
138
141
|
"""Create a Text obj link to this document on EpsteinWeb."""
|
|
139
|
-
return link_text_obj(epstein_web_doc_url(self.url_slug), link_txt or self.
|
|
142
|
+
return link_text_obj(epstein_web_doc_url(self.url_slug), link_txt or self.file_path.stem, style)
|
|
140
143
|
|
|
141
144
|
def file_info_panel(self) -> Group:
|
|
142
|
-
"""Panel with filename linking to raw file plus any
|
|
145
|
+
"""Panel with filename linking to raw file plus any additional info about the file."""
|
|
143
146
|
panel = Panel(self.raw_document_link_txt(include_alt_link=True), border_style=self._border_style(), expand=False)
|
|
144
|
-
|
|
145
|
-
return Group(*([panel] +
|
|
147
|
+
padded_info = [Padding(sentence, INFO_PADDING) for sentence in self.info()]
|
|
148
|
+
return Group(*([panel] + padded_info))
|
|
146
149
|
|
|
147
150
|
def file_size(self) -> int:
|
|
148
151
|
return file_size(self.file_path)
|
|
@@ -150,34 +153,35 @@ class Document:
|
|
|
150
153
|
def file_size_str(self) -> str:
|
|
151
154
|
return file_size_str(self.file_path)
|
|
152
155
|
|
|
153
|
-
def
|
|
154
|
-
"""
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
hints.append(highlighter(Text(hint_msg, style='white dim italic')))
|
|
156
|
+
def info(self) -> list[Text]:
|
|
157
|
+
"""0 to 2 sentences containing the info_txt() as well as any configured description."""
|
|
158
|
+
sentences = [
|
|
159
|
+
self.info_txt(),
|
|
160
|
+
highlighter(Text(self.config_description(), style=INFO_STYLE)) if self.config_description() else None
|
|
161
|
+
]
|
|
160
162
|
|
|
161
|
-
return without_falsey(
|
|
163
|
+
return without_falsey(sentences)
|
|
162
164
|
|
|
163
165
|
def info_txt(self) -> Text | None:
|
|
164
166
|
"""Secondary info about this file (recipients, level of certainty, etc). Overload in subclasses."""
|
|
165
167
|
return None
|
|
166
168
|
|
|
169
|
+
def is_duplicate(self) -> bool:
|
|
170
|
+
return bool(self.config and self.config.dupe_of_id)
|
|
171
|
+
|
|
167
172
|
def is_local_extract_file(self) -> bool:
|
|
168
173
|
"""True if file created by extracting text from a court doc (identifiable from filename e.g. HOUSE_OVERSIGHT_012345_1.txt)."""
|
|
169
174
|
return is_local_extract_file(self.filename)
|
|
170
175
|
|
|
171
|
-
def log(self, msg: str, level: int = logging.
|
|
176
|
+
def log(self, msg: str, level: int = logging.INFO):
|
|
172
177
|
"""Log with filename as a prefix."""
|
|
173
|
-
logger.log(level, f"{self.
|
|
178
|
+
logger.log(level, f"{self.file_path.stem} {msg}")
|
|
174
179
|
|
|
175
180
|
def log_top_lines(self, n: int = 10, msg: str = '', level: int = logging.INFO) -> None:
|
|
176
181
|
"""Log first 'n' lines of self.text at 'level'. 'msg' can be optionally provided."""
|
|
177
182
|
separator = '\n\n' if '\n' in msg else '. '
|
|
178
183
|
msg = (msg + separator) if msg else ''
|
|
179
|
-
|
|
180
|
-
logger.log(level, f"{msg}\n\n{self.top_lines(n)}\n")
|
|
184
|
+
self.log(f"{msg}First {n} lines:\n\n{self.top_lines(n)}\n", level)
|
|
181
185
|
|
|
182
186
|
def matching_lines(self, _pattern: re.Pattern | str) -> list[MatchedLine]:
|
|
183
187
|
"""Return lines matching a regex as colored list[Text]."""
|
|
@@ -189,7 +193,7 @@ class Document:
|
|
|
189
193
|
metadata.update({k: v for k, v in asdict(self).items() if k in METADATA_FIELDS and v is not None})
|
|
190
194
|
metadata['bytes'] = self.file_size()
|
|
191
195
|
metadata['filename'] = f"{self.url_slug}.txt"
|
|
192
|
-
metadata['type'] = self.
|
|
196
|
+
metadata['type'] = self._class_name()
|
|
193
197
|
|
|
194
198
|
if self.is_local_extract_file():
|
|
195
199
|
metadata['extracted_file'] = {
|
|
@@ -208,7 +212,7 @@ class Document:
|
|
|
208
212
|
"""Returns colored links to epstein.media and and epsteinweb in a Text object."""
|
|
209
213
|
txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
|
|
210
214
|
|
|
211
|
-
if args.
|
|
215
|
+
if args.use_epstein_web:
|
|
212
216
|
txt.append(self.epstein_web_link(style=style))
|
|
213
217
|
|
|
214
218
|
if include_alt_link:
|
|
@@ -234,7 +238,7 @@ class Document:
|
|
|
234
238
|
return text
|
|
235
239
|
|
|
236
240
|
def sort_key(self) -> tuple[datetime, str, int]:
|
|
237
|
-
if self.
|
|
241
|
+
if self.is_duplicate():
|
|
238
242
|
sort_id = self.config.dupe_of_id
|
|
239
243
|
dupe_idx = 1
|
|
240
244
|
else:
|
|
@@ -245,7 +249,7 @@ class Document:
|
|
|
245
249
|
|
|
246
250
|
def summary(self) -> Text:
|
|
247
251
|
"""Summary of this file for logging. Brackets are left open for subclasses to add stuff."""
|
|
248
|
-
txt = Text('').append(self.
|
|
252
|
+
txt = Text('').append(self._class_name(), style=self._class_style())
|
|
249
253
|
txt.append(f" {self.url_slug}", style=FILENAME_STYLE)
|
|
250
254
|
|
|
251
255
|
if self.timestamp:
|
|
@@ -261,13 +265,32 @@ class Document:
|
|
|
261
265
|
|
|
262
266
|
return txt
|
|
263
267
|
|
|
268
|
+
def summary_panel(self) -> Panel:
|
|
269
|
+
"""Panelized description() with info_txt(), used in search results."""
|
|
270
|
+
sentences = [self.summary()]
|
|
271
|
+
|
|
272
|
+
if self.include_description_in_summary_panel:
|
|
273
|
+
sentences += [Text('', style='italic').append(h) for h in self.info()]
|
|
274
|
+
|
|
275
|
+
return Panel(Group(*sentences), border_style=self._class_style(), expand=False)
|
|
276
|
+
|
|
264
277
|
def top_lines(self, n: int = 10) -> str:
|
|
265
278
|
return '\n'.join(self.lines[0:n])[:MAX_TOP_LINES_LEN]
|
|
266
279
|
|
|
280
|
+
def warn(self, msg: str) -> None:
|
|
281
|
+
self.log(msg, level=logging.WARNING)
|
|
282
|
+
|
|
267
283
|
def _border_style(self) -> str:
|
|
268
284
|
"""Should be overloaded in subclasses."""
|
|
269
285
|
return 'white'
|
|
270
286
|
|
|
287
|
+
def _class_name(self) -> str:
|
|
288
|
+
"""Annoying workaround for circular import issues and isinstance()."""
|
|
289
|
+
return str(type(self).__name__)
|
|
290
|
+
|
|
291
|
+
def _class_style(self) -> str:
|
|
292
|
+
return DOC_TYPE_STYLES[self._class_name()]
|
|
293
|
+
|
|
271
294
|
def _extract_author(self) -> None:
|
|
272
295
|
"""Get author from config. Extended in Email subclass to also check headers."""
|
|
273
296
|
if self.config and self.config.author:
|
|
@@ -304,6 +327,26 @@ class Document:
|
|
|
304
327
|
self.lines = [line.strip() if self.strip_whitespace else line for line in self.text.split('\n')]
|
|
305
328
|
self.num_lines = len(self.lines)
|
|
306
329
|
|
|
330
|
+
def _set_extract_config(self, doc_cfg: DocCfg | EmailCfg) -> None:
|
|
331
|
+
"""Copy info from original config for file this document was extracted from."""
|
|
332
|
+
if self.config:
|
|
333
|
+
self.warn(f"Merging existing config with config for file this document was extracted from")
|
|
334
|
+
else:
|
|
335
|
+
self.config = EmailCfg(id=self.file_id)
|
|
336
|
+
|
|
337
|
+
extracted_from_description = doc_cfg.complete_description()
|
|
338
|
+
|
|
339
|
+
if extracted_from_description:
|
|
340
|
+
extracted_description = f"{EXTRACTED_FROM} {extracted_from_description}"
|
|
341
|
+
|
|
342
|
+
if self.config.description:
|
|
343
|
+
self.warn(f"Overwriting description '{self.config.description}' with extract description '{doc_cfg.description}'")
|
|
344
|
+
|
|
345
|
+
self.config.description = extracted_description
|
|
346
|
+
|
|
347
|
+
self.config.is_interesting = self.config.is_interesting or doc_cfg.is_interesting
|
|
348
|
+
self.warn(f"Constructed local config\n{self.config}")
|
|
349
|
+
|
|
307
350
|
def _write_clean_text(self, output_path: Path) -> None:
|
|
308
351
|
"""Write self.text to 'output_path'. Used only for diffing files."""
|
|
309
352
|
if output_path.exists():
|
|
@@ -591,6 +591,10 @@ class Email(Communication):
|
|
|
591
591
|
self._merge_lines(7, 9)
|
|
592
592
|
elif self.file_id == '030299':
|
|
593
593
|
self._merge_lines(7, 10)
|
|
594
|
+
elif self.file_id == '014860':
|
|
595
|
+
self._merge_lines(3)
|
|
596
|
+
self._merge_lines(4)
|
|
597
|
+
self._merge_lines(4)
|
|
594
598
|
elif self.file_id == '029977':
|
|
595
599
|
self._set_computed_fields(text=self.text.replace('Sent 9/28/2012 2:41:02 PM', 'Sent: 9/28/2012 2:41:02 PM'))
|
|
596
600
|
|
|
@@ -606,9 +610,8 @@ class Email(Communication):
|
|
|
606
610
|
self._remove_line(3)
|
|
607
611
|
|
|
608
612
|
if old_text != self.text:
|
|
609
|
-
self.log(f"Modified text, old:\n\n" + '\n'.join(old_text.split('\n')[0:12]) + '\n'
|
|
610
|
-
self.log_top_lines(12, 'Result of modifications'
|
|
611
|
-
self.log('', logging.INFO)
|
|
613
|
+
self.log(f"Modified text, old:\n\n" + '\n'.join(old_text.split('\n')[0:12]) + '\n')
|
|
614
|
+
self.log_top_lines(12, 'Result of modifications')
|
|
612
615
|
|
|
613
616
|
lines = self.repair_ocr_text(OCR_REPAIRS, self.text).split('\n')
|
|
614
617
|
new_lines = []
|
|
@@ -697,7 +700,7 @@ class Email(Communication):
|
|
|
697
700
|
yield Padding(email_txt_panel, (0, 0, 1, INFO_INDENT))
|
|
698
701
|
|
|
699
702
|
if should_rewrite_header:
|
|
700
|
-
self.log_top_lines(self.header.num_header_rows + 4, f'Original header:'
|
|
703
|
+
self.log_top_lines(self.header.num_header_rows + 4, f'Original header:')
|
|
701
704
|
|
|
702
705
|
@staticmethod
|
|
703
706
|
def build_table(emails: list['Email'], _author: str | None) -> Table:
|
{epstein_files-1.0.10 → epstein_files-1.0.11}/epstein_files/documents/imessage/text_message.py
RENAMED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from dataclasses import dataclass
|
|
2
|
+
from dataclasses import dataclass
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
|
|
5
5
|
from rich.text import Text
|
|
6
6
|
|
|
7
|
-
from epstein_files.util.constant.names import JEFFREY_EPSTEIN,
|
|
7
|
+
from epstein_files.util.constant.names import JEFFREY_EPSTEIN, STEVE_BANNON, UNKNOWN
|
|
8
8
|
from epstein_files.util.data import extract_last_name
|
|
9
9
|
from epstein_files.util.highlighted_group import get_style_for_name
|
|
10
10
|
from epstein_files.util.logging import logger
|
|
@@ -19,15 +19,6 @@ DISPLAY_LAST_NAME_ONLY = [
|
|
|
19
19
|
STEVE_BANNON,
|
|
20
20
|
]
|
|
21
21
|
|
|
22
|
-
PHONE_NUMBER_MAPPING = {
|
|
23
|
-
'+19174393646': ANTHONY_SCARAMUCCI,
|
|
24
|
-
'+13109906526': STEVE_BANNON,
|
|
25
|
-
'+16463880059': EVA,
|
|
26
|
-
'+13108737937': CELINA_DUBIN,
|
|
27
|
-
'+13108802851': STEVE_BANNON,
|
|
28
|
-
|
|
29
|
-
}
|
|
30
|
-
|
|
31
22
|
TEXTER_MAPPING = {
|
|
32
23
|
'e:': JEFFREY_EPSTEIN,
|
|
33
24
|
'e:jeeitunes@gmail.com': JEFFREY_EPSTEIN,
|
|
@@ -48,7 +39,7 @@ class TextMessage:
|
|
|
48
39
|
|
|
49
40
|
if self.author is None:
|
|
50
41
|
self.author_str = UNKNOWN
|
|
51
|
-
elif self.author in DISPLAY_LAST_NAME_ONLY:
|
|
42
|
+
elif self.author in DISPLAY_LAST_NAME_ONLY and not self.author_str:
|
|
52
43
|
self.author_str = extract_last_name(self.author)
|
|
53
44
|
else:
|
|
54
45
|
self.author_str = self.author_str or self.author
|
|
@@ -8,11 +8,23 @@ from rich.text import Text
|
|
|
8
8
|
|
|
9
9
|
from epstein_files.documents.other_file import OtherFile
|
|
10
10
|
from epstein_files.util.constant.strings import JSON
|
|
11
|
+
from epstein_files.util.rich import INFO_STYLE
|
|
12
|
+
|
|
13
|
+
TEXT_FIELDS = [
|
|
14
|
+
'caption',
|
|
15
|
+
'standard',
|
|
16
|
+
'subtitle',
|
|
17
|
+
'text',
|
|
18
|
+
'title',
|
|
19
|
+
'to',
|
|
20
|
+
]
|
|
11
21
|
|
|
12
22
|
|
|
13
23
|
@dataclass
|
|
14
24
|
class JsonFile(OtherFile):
|
|
15
25
|
"""File containing JSON data."""
|
|
26
|
+
|
|
27
|
+
include_description_in_summary_panel: ClassVar[bool] = False
|
|
16
28
|
strip_whitespace: ClassVar[bool] = False
|
|
17
29
|
|
|
18
30
|
def __post_init__(self):
|
|
@@ -27,7 +39,7 @@ class JsonFile(OtherFile):
|
|
|
27
39
|
return JSON
|
|
28
40
|
|
|
29
41
|
def info_txt(self) -> Text | None:
|
|
30
|
-
return Text(f"JSON file,
|
|
42
|
+
return Text(f"JSON file, seems to contain link unfurl/embed data for iMessage or similar", style=INFO_STYLE)
|
|
31
43
|
|
|
32
44
|
def is_interesting(self):
|
|
33
45
|
return False
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import re
|
|
2
3
|
from collections import defaultdict
|
|
3
4
|
from dataclasses import dataclass, field
|
|
@@ -15,7 +16,7 @@ from epstein_files.util.data import iso_timestamp, listify, sort_dict
|
|
|
15
16
|
from epstein_files.util.doc_cfg import Metadata, TextCfg
|
|
16
17
|
from epstein_files.util.highlighted_group import get_style_for_name
|
|
17
18
|
from epstein_files.util.logging import logger
|
|
18
|
-
from epstein_files.util.rich import build_table
|
|
19
|
+
from epstein_files.util.rich import build_table, highlighter
|
|
19
20
|
|
|
20
21
|
CONFIRMED_MSG = 'Found confirmed counterparty'
|
|
21
22
|
GUESSED_MSG = 'This is probably a conversation with'
|
|
@@ -27,7 +28,12 @@ REDACTED_AUTHOR_REGEX = re.compile(r"^([-+•_1MENO.=F]+|[4Ide])$")
|
|
|
27
28
|
class MessengerLog(Communication):
|
|
28
29
|
"""Class representing one iMessage log file (one conversation between Epstein and some counterparty)."""
|
|
29
30
|
config: TextCfg | None = None
|
|
30
|
-
|
|
31
|
+
messages: list[TextMessage] = field(default_factory=list)
|
|
32
|
+
phone_number: str | None = None
|
|
33
|
+
|
|
34
|
+
def __post_init__(self):
|
|
35
|
+
super().__post_init__()
|
|
36
|
+
self.messages = [self._build_message(match) for match in MSG_REGEX.finditer(self.text)]
|
|
31
37
|
|
|
32
38
|
def first_message_at(self, name: str | None) -> datetime:
|
|
33
39
|
return self.messages_by(name)[0].timestamp()
|
|
@@ -36,27 +42,29 @@ class MessengerLog(Communication):
|
|
|
36
42
|
if self.author is None:
|
|
37
43
|
return None
|
|
38
44
|
|
|
39
|
-
|
|
40
|
-
author_txt = Text(self.
|
|
41
|
-
|
|
45
|
+
info_msg = GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG
|
|
46
|
+
author_txt = Text(self.author, style=self.author_style + ' bold')
|
|
47
|
+
txt = Text(f"({info_msg} ", style='dim').append(author_txt)
|
|
42
48
|
|
|
43
|
-
|
|
44
|
-
|
|
49
|
+
if self.phone_number:
|
|
50
|
+
txt.append(f" using the phone number {self.phone_number}")
|
|
45
51
|
|
|
46
|
-
|
|
47
|
-
"""Lazily evaluated accessor for self._messages."""
|
|
48
|
-
if not self._messages:
|
|
49
|
-
self._messages = [self._build_message(match) for match in MSG_REGEX.finditer(self.text)]
|
|
52
|
+
return highlighter(txt.append(')'))
|
|
50
53
|
|
|
51
|
-
|
|
54
|
+
def last_message_at(self, name: str | None) -> datetime:
|
|
55
|
+
return self.messages_by(name)[-1].timestamp()
|
|
52
56
|
|
|
53
57
|
def messages_by(self, name: str | None) -> list[TextMessage]:
|
|
54
58
|
"""Return all messages by 'name'."""
|
|
55
|
-
return [m for m in self.messages
|
|
59
|
+
return [m for m in self.messages if m.author == name]
|
|
56
60
|
|
|
57
61
|
def metadata(self) -> Metadata:
|
|
58
62
|
metadata = super().metadata()
|
|
59
|
-
metadata.update({'num_messages': len(self.messages
|
|
63
|
+
metadata.update({'num_messages': len(self.messages)})
|
|
64
|
+
|
|
65
|
+
if self.phone_number:
|
|
66
|
+
metadata['phone_number'] = self.phone_number
|
|
67
|
+
|
|
60
68
|
return metadata
|
|
61
69
|
|
|
62
70
|
def _border_style(self) -> str:
|
|
@@ -65,11 +73,16 @@ class MessengerLog(Communication):
|
|
|
65
73
|
def _build_message(self, match: re.Match) -> TextMessage:
|
|
66
74
|
"""Turn a regex match into a TextMessage."""
|
|
67
75
|
author_str = REDACTED_AUTHOR_REGEX.sub('', match.group(1).strip())
|
|
76
|
+
is_phone_number = author_str.startswith('+')
|
|
77
|
+
|
|
78
|
+
if is_phone_number:
|
|
79
|
+
logger.warning(f"{self.summary()} Found phone number: {author_str}")
|
|
80
|
+
self.phone_number = author_str
|
|
68
81
|
|
|
69
|
-
# If the Sender: is redacted that means it's from self.author
|
|
82
|
+
# If the Sender: is redacted or if it's an unredacted phone number that means it's from self.author
|
|
70
83
|
return TextMessage(
|
|
71
|
-
author=self.author if (
|
|
72
|
-
author_str=author_str if
|
|
84
|
+
author=self.author if (is_phone_number or not author_str) else author_str,
|
|
85
|
+
author_str=author_str if is_phone_number else None, # Preserve phone numbers
|
|
73
86
|
id_confirmed=not self.is_attribution_uncertain(),
|
|
74
87
|
text=match.group(4).strip(),
|
|
75
88
|
timestamp_str=match.group(2).strip(),
|
|
@@ -90,7 +103,7 @@ class MessengerLog(Communication):
|
|
|
90
103
|
yield self.file_info_panel()
|
|
91
104
|
yield Text('')
|
|
92
105
|
|
|
93
|
-
for message in self.messages
|
|
106
|
+
for message in self.messages:
|
|
94
107
|
yield message
|
|
95
108
|
|
|
96
109
|
@classmethod
|
|
@@ -99,7 +112,7 @@ class MessengerLog(Communication):
|
|
|
99
112
|
sender_counts: dict[str | None, int] = defaultdict(int)
|
|
100
113
|
|
|
101
114
|
for message_log in imessage_logs:
|
|
102
|
-
for message in message_log.messages
|
|
115
|
+
for message in message_log.messages:
|
|
103
116
|
sender_counts[message.author] += 1
|
|
104
117
|
|
|
105
118
|
return sender_counts
|