epstein-files 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +59 -51
- epstein_files/documents/communication.py +9 -9
- epstein_files/documents/document.py +111 -87
- epstein_files/documents/email.py +154 -85
- epstein_files/documents/emails/email_header.py +7 -6
- epstein_files/documents/imessage/text_message.py +3 -2
- epstein_files/documents/json_file.py +17 -0
- epstein_files/documents/messenger_log.py +62 -3
- epstein_files/documents/other_file.py +165 -17
- epstein_files/epstein_files.py +100 -143
- epstein_files/util/constant/names.py +6 -0
- epstein_files/util/constant/strings.py +27 -0
- epstein_files/util/constant/urls.py +22 -9
- epstein_files/util/constants.py +968 -1015
- epstein_files/util/data.py +14 -28
- epstein_files/util/{file_cfg.py → doc_cfg.py} +120 -34
- epstein_files/util/env.py +16 -18
- epstein_files/util/file_helper.py +56 -17
- epstein_files/util/highlighted_group.py +227 -175
- epstein_files/util/logging.py +57 -0
- epstein_files/util/rich.py +18 -13
- epstein_files/util/search_result.py +14 -6
- epstein_files/util/timer.py +24 -0
- epstein_files/util/word_count.py +2 -1
- {epstein_files-1.0.0.dist-info → epstein_files-1.0.1.dist-info}/METADATA +3 -2
- epstein_files-1.0.1.dist-info/RECORD +30 -0
- epstein_files-1.0.0.dist-info/RECORD +0 -28
- {epstein_files-1.0.0.dist-info → epstein_files-1.0.1.dist-info}/LICENSE +0 -0
- {epstein_files-1.0.0.dist-info → epstein_files-1.0.1.dist-info}/WHEEL +0 -0
|
@@ -1,13 +1,20 @@
|
|
|
1
1
|
import re
|
|
2
|
+
from collections import defaultdict
|
|
2
3
|
from dataclasses import dataclass, field
|
|
3
4
|
from datetime import datetime
|
|
4
5
|
|
|
5
6
|
from rich.console import Console, ConsoleOptions, RenderResult
|
|
7
|
+
from rich.table import Table
|
|
6
8
|
from rich.text import Text
|
|
7
9
|
|
|
8
10
|
from epstein_files.documents.communication import Communication
|
|
9
11
|
from epstein_files.documents.imessage.text_message import MSG_DATE_FORMAT, TextMessage
|
|
10
|
-
from epstein_files.util.
|
|
12
|
+
from epstein_files.util.constant.names import JEFFREY_EPSTEIN, UNKNOWN
|
|
13
|
+
from epstein_files.util.constant.strings import AUTHOR
|
|
14
|
+
from epstein_files.util.data import iso_timestamp, listify, sort_dict
|
|
15
|
+
from epstein_files.util.doc_cfg import Metadata, TextCfg
|
|
16
|
+
from epstein_files.util.highlighted_group import get_style_for_name
|
|
17
|
+
from epstein_files.util.logging import logger
|
|
11
18
|
|
|
12
19
|
CONFIRMED_MSG = 'Found confirmed counterparty'
|
|
13
20
|
GUESSED_MSG = 'This is probably a conversation with'
|
|
@@ -18,12 +25,16 @@ REDACTED_AUTHOR_REGEX = re.compile(r"^([-+•_1MENO.=F]+|[4Ide])$")
|
|
|
18
25
|
@dataclass
|
|
19
26
|
class MessengerLog(Communication):
|
|
20
27
|
"""Class representing one iMessage log file (one conversation between Epstein and some counterparty)."""
|
|
28
|
+
config: TextCfg | None = None
|
|
21
29
|
_messages: list[TextMessage] = field(default_factory=list)
|
|
22
30
|
|
|
23
31
|
def first_message_at(self, name: str | None) -> datetime:
|
|
24
32
|
return self.messages_by(name)[0].timestamp()
|
|
25
33
|
|
|
26
34
|
def info_txt(self) -> Text | None:
|
|
35
|
+
if self.author is None:
|
|
36
|
+
return None
|
|
37
|
+
|
|
27
38
|
hint_msg = GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG
|
|
28
39
|
author_txt = Text(self.author_or_unknown(), style=self.author_style + ' bold')
|
|
29
40
|
return Text(f"({hint_msg} ", style='dim').append(author_txt).append(')')
|
|
@@ -51,6 +62,11 @@ class MessengerLog(Communication):
|
|
|
51
62
|
"""Return all messages by 'name'."""
|
|
52
63
|
return [m for m in self.messages() if m.author == name]
|
|
53
64
|
|
|
65
|
+
def metadata(self) -> Metadata:
|
|
66
|
+
metadata = super().metadata()
|
|
67
|
+
metadata.update({'num_messages': len(self.messages())})
|
|
68
|
+
return metadata
|
|
69
|
+
|
|
54
70
|
def _border_style(self) -> str:
|
|
55
71
|
return self.author_style
|
|
56
72
|
|
|
@@ -61,13 +77,56 @@ class MessengerLog(Communication):
|
|
|
61
77
|
try:
|
|
62
78
|
return datetime.strptime(timestamp_str, MSG_DATE_FORMAT)
|
|
63
79
|
except ValueError as e:
|
|
64
|
-
logger.info(f"
|
|
80
|
+
logger.info(f"Failed to parse '{timestamp_str}' to datetime! Using next match. Error: {e}'")
|
|
65
81
|
|
|
66
82
|
raise RuntimeError(f"{self}: No timestamp found!")
|
|
67
83
|
|
|
68
|
-
def __rich_console__(self,
|
|
84
|
+
def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
|
|
69
85
|
yield self.file_info_panel()
|
|
70
86
|
yield Text('')
|
|
71
87
|
|
|
72
88
|
for message in self.messages():
|
|
73
89
|
yield message
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def count_authors(cls, imessage_logs: list['MessengerLog']) -> dict[str | None, int]:
|
|
93
|
+
"""Count up how many texts were sent by each author."""
|
|
94
|
+
sender_counts: dict[str | None, int] = defaultdict(int)
|
|
95
|
+
|
|
96
|
+
for message_log in imessage_logs:
|
|
97
|
+
for message in message_log.messages():
|
|
98
|
+
sender_counts[message.author] += 1
|
|
99
|
+
|
|
100
|
+
return sender_counts
|
|
101
|
+
|
|
102
|
+
@classmethod
|
|
103
|
+
def logs_for(cls, author: str | None | list[str | None], logs: list['MessengerLog']) -> list['MessengerLog']:
|
|
104
|
+
authors = listify(author)
|
|
105
|
+
return logs if JEFFREY_EPSTEIN in authors else [log for log in logs if log.author in authors]
|
|
106
|
+
|
|
107
|
+
@classmethod
|
|
108
|
+
def summary_table(cls, imessage_logs: list['MessengerLog']) -> Table:
|
|
109
|
+
"""Build a table summarizing the text messages in 'imessage_logs'."""
|
|
110
|
+
counts_table = Table(title="Text Message Counts By Author", header_style="bold")
|
|
111
|
+
counts_table.add_column(AUTHOR.title(), justify='left', style="steel_blue bold", width=30)
|
|
112
|
+
counts_table.add_column('Files', justify='right', style='white')
|
|
113
|
+
counts_table.add_column("Msgs", justify='right')
|
|
114
|
+
counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
|
|
115
|
+
counts_table.add_column('Last Sent At', justify='center', style='wheat4', width=21)
|
|
116
|
+
counts_table.add_column('Days', justify='right', style='dim')
|
|
117
|
+
|
|
118
|
+
for name, count in sort_dict(cls.count_authors(imessage_logs)):
|
|
119
|
+
logs = cls.logs_for(name, imessage_logs)
|
|
120
|
+
first_at = logs[0].first_message_at(name)
|
|
121
|
+
last_at = logs[-1].first_message_at(name)
|
|
122
|
+
|
|
123
|
+
counts_table.add_row(
|
|
124
|
+
Text(name or UNKNOWN, get_style_for_name(name)),
|
|
125
|
+
str(len(logs)),
|
|
126
|
+
f"{count:,}",
|
|
127
|
+
iso_timestamp(first_at),
|
|
128
|
+
iso_timestamp(last_at),
|
|
129
|
+
str((last_at - first_at).days + 1),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return counts_table
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import re
|
|
1
2
|
import logging
|
|
2
3
|
import warnings
|
|
3
4
|
from dataclasses import dataclass
|
|
@@ -5,18 +6,25 @@ from datetime import datetime
|
|
|
5
6
|
|
|
6
7
|
import datefinder
|
|
7
8
|
import dateutil
|
|
9
|
+
from rich.console import Group
|
|
8
10
|
from rich.markup import escape
|
|
9
11
|
from rich.panel import Panel
|
|
12
|
+
from rich.table import Table
|
|
10
13
|
from rich.text import Text
|
|
11
14
|
|
|
12
15
|
from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, WHITESPACE_REGEX, Document
|
|
13
|
-
from epstein_files.util.
|
|
16
|
+
from epstein_files.util.constant.strings import *
|
|
17
|
+
from epstein_files.util.constants import *
|
|
18
|
+
from epstein_files.util.doc_cfg import FINANCIAL_REPORTS_AUTHORS, DocCfg
|
|
14
19
|
from epstein_files.util.data import escape_single_quotes, remove_timezone, uniquify
|
|
15
|
-
from epstein_files.util.
|
|
16
|
-
from epstein_files.util.
|
|
20
|
+
from epstein_files.util.file_helper import FILENAME_LENGTH
|
|
21
|
+
from epstein_files.util.env import args
|
|
22
|
+
from epstein_files.util.highlighted_group import get_style_for_category
|
|
23
|
+
from epstein_files.util.rich import QUESTION_MARK_TXT, highlighter
|
|
24
|
+
from epstein_files.util.logging import logger
|
|
17
25
|
|
|
18
|
-
MAX_EXTRACTED_TIMESTAMPS = 100
|
|
19
26
|
MAX_DAYS_SPANNED_TO_BE_VALID = 10
|
|
27
|
+
MAX_EXTRACTED_TIMESTAMPS = 100
|
|
20
28
|
MIN_TIMESTAMP = datetime(2000, 1, 1)
|
|
21
29
|
MID_TIMESTAMP = datetime(2007, 1, 1)
|
|
22
30
|
MAX_TIMESTAMP = datetime(2022, 12, 31)
|
|
@@ -24,23 +32,110 @@ PREVIEW_CHARS = int(580 * (1 if args.all_other_files else 1.5))
|
|
|
24
32
|
LOG_INDENT = '\n '
|
|
25
33
|
TIMESTAMP_LOG_INDENT = f'{LOG_INDENT} '
|
|
26
34
|
VAST_HOUSE = 'vast house' # Michael Wolff article draft about Epstein indicator
|
|
35
|
+
VI_DAILY_NEWS_REGEX = re.compile(r'virgin\s*is[kl][ai]nds\s*daily\s*news', re.IGNORECASE)
|
|
36
|
+
|
|
37
|
+
UNINTERESTING_CATEGORES = [
|
|
38
|
+
ARTS,
|
|
39
|
+
BOOK,
|
|
40
|
+
JUNK,
|
|
41
|
+
SPEECH,
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
UNINTERESTING_IDS = [
|
|
45
|
+
'031794',
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
# OtherFiles whose description/hints match these prefixes are not displayed unless --all-other-files is used
|
|
49
|
+
UNINTERESTING_PREFIXES = FINANCIAL_REPORTS_AUTHORS + [
|
|
50
|
+
'article about',
|
|
51
|
+
ARTICLE_DRAFT,
|
|
52
|
+
'Aviation International',
|
|
53
|
+
BBC,
|
|
54
|
+
BLOOMBERG,
|
|
55
|
+
'Boston Globe',
|
|
56
|
+
BROCKMAN_INC,
|
|
57
|
+
CHINA_DAILY,
|
|
58
|
+
CNN,
|
|
59
|
+
'completely redacted',
|
|
60
|
+
CVRA,
|
|
61
|
+
DAILY_MAIL,
|
|
62
|
+
DAILY_TELEGRAPH,
|
|
63
|
+
DAVID_SCHOEN_CVRA_LEXIS_SEARCH[0:-12], # Because date at end :(
|
|
64
|
+
DERSH_GIUFFRE_TWEET,
|
|
65
|
+
'Financial Times',
|
|
66
|
+
'Forbes',
|
|
67
|
+
'Frontlines',
|
|
68
|
+
'Future Science',
|
|
69
|
+
'Globe and Mail',
|
|
70
|
+
GORDON_GETTY,
|
|
71
|
+
f"{HARVARD} Econ",
|
|
72
|
+
HARVARD_POETRY,
|
|
73
|
+
'Inference',
|
|
74
|
+
JASTA,
|
|
75
|
+
'JetGala',
|
|
76
|
+
JOHN_BOLTON_PRESS_CLIPPING,
|
|
77
|
+
'Journal of Criminal',
|
|
78
|
+
LA_TIMES,
|
|
79
|
+
'Litigation Daily',
|
|
80
|
+
LAWRENCE_KRAUSS,
|
|
81
|
+
'MarketWatch',
|
|
82
|
+
MARTIN_NOWAK,
|
|
83
|
+
NOBEL_CHARITABLE_TRUST,
|
|
84
|
+
'Nautilus',
|
|
85
|
+
'New Yorker',
|
|
86
|
+
NYT_ARTICLE,
|
|
87
|
+
NYT_COLUMN,
|
|
88
|
+
PALM_BEACH_CODE_ENFORCEMENT,
|
|
89
|
+
PALM_BEACH_DAILY_ARTICLE,
|
|
90
|
+
PALM_BEACH_POST_ARTICLE,
|
|
91
|
+
PALM_BEACH_TSV,
|
|
92
|
+
PALM_BEACH_WATER_COMMITTEE,
|
|
93
|
+
PAUL_KRASSNER,
|
|
94
|
+
PEGGY_SIEGAL,
|
|
95
|
+
'Politifact',
|
|
96
|
+
'Rafanelli',
|
|
97
|
+
ROBERT_LAWRENCE_KUHN,
|
|
98
|
+
ROBERT_TRIVERS,
|
|
99
|
+
'SCMP',
|
|
100
|
+
'SciencExpress',
|
|
101
|
+
'Scowcroft',
|
|
102
|
+
SHIMON_POST_ARTICLE,
|
|
103
|
+
SINGLE_PAGE,
|
|
104
|
+
STACEY_PLASKETT,
|
|
105
|
+
TERJE_ROD_LARSEN,
|
|
106
|
+
TEXT_OF_US_LAW,
|
|
107
|
+
TRANSLATION,
|
|
108
|
+
TWEET,
|
|
109
|
+
THE_REAL_DEAL_ARTICLE,
|
|
110
|
+
TRUMP_DISCLOSURES,
|
|
111
|
+
UBS_CIO_REPORT,
|
|
112
|
+
UN_GENERAL_ASSEMBLY,
|
|
113
|
+
'U.S. News',
|
|
114
|
+
'US Office',
|
|
115
|
+
'Vanity Fair',
|
|
116
|
+
VI_DAILY_NEWS_ARTICLE,
|
|
117
|
+
WAPO,
|
|
118
|
+
]
|
|
27
119
|
|
|
28
120
|
|
|
29
121
|
@dataclass
|
|
30
122
|
class OtherFile(Document):
|
|
31
123
|
"""File that is not an email, an iMessage log, or JSON data."""
|
|
32
124
|
|
|
33
|
-
def
|
|
34
|
-
|
|
35
|
-
if self.config is None:
|
|
36
|
-
return None
|
|
125
|
+
def __post_init__(self):
|
|
126
|
+
super().__post_init__()
|
|
37
127
|
|
|
38
|
-
|
|
39
|
-
|
|
128
|
+
if self.config is None and VI_DAILY_NEWS_REGEX.search(self.text):
|
|
129
|
+
self.log(f"Creating synthetic config for VI Daily News article...", logging.INFO)
|
|
130
|
+
self.config = DocCfg(id=self.file_id, description=VI_DAILY_NEWS_ARTICLE, category=ARTICLE)
|
|
40
131
|
|
|
41
|
-
def
|
|
42
|
-
|
|
43
|
-
|
|
132
|
+
def category(self) -> str | None:
|
|
133
|
+
return self.config and self.config.category
|
|
134
|
+
|
|
135
|
+
def configured_description(self) -> str | None:
|
|
136
|
+
"""Overloads superclass method."""
|
|
137
|
+
if self.config is not None:
|
|
138
|
+
return self.config.info_str()
|
|
44
139
|
|
|
45
140
|
def description_panel(self, include_hints=True) -> Panel:
|
|
46
141
|
"""Panelized description() with info_txt(), used in search results."""
|
|
@@ -57,13 +152,22 @@ class OtherFile(Document):
|
|
|
57
152
|
return Text(escape(self.preview_text()))
|
|
58
153
|
|
|
59
154
|
def is_interesting(self):
|
|
60
|
-
"""False for lame prefixes and
|
|
155
|
+
"""False for lame prefixes, duplicates, and other boring files."""
|
|
61
156
|
hints = self.hints()
|
|
62
157
|
|
|
63
158
|
if self.is_duplicate:
|
|
64
159
|
return False
|
|
160
|
+
elif self.file_id in UNINTERESTING_IDS:
|
|
161
|
+
return False
|
|
65
162
|
elif len(hints) == 0:
|
|
66
163
|
return True
|
|
164
|
+
elif self.config:
|
|
165
|
+
if self.config.is_interesting:
|
|
166
|
+
return True
|
|
167
|
+
elif self.category() == FINANCE and self.author is not None:
|
|
168
|
+
return False
|
|
169
|
+
elif self.category() in UNINTERESTING_CATEGORES:
|
|
170
|
+
return False
|
|
67
171
|
|
|
68
172
|
for prefix in UNINTERESTING_PREFIXES:
|
|
69
173
|
if hints[0].plain.startswith(prefix):
|
|
@@ -74,6 +178,10 @@ class OtherFile(Document):
|
|
|
74
178
|
def preview_text(self) -> str:
|
|
75
179
|
return WHITESPACE_REGEX.sub(' ', self.text)[0:PREVIEW_CHARS]
|
|
76
180
|
|
|
181
|
+
def summary(self) -> Text:
|
|
182
|
+
"""One line summary mostly for logging."""
|
|
183
|
+
return super().summary().append(CLOSE_PROPERTIES_CHAR)
|
|
184
|
+
|
|
77
185
|
def _extract_timestamp(self) -> datetime | None:
|
|
78
186
|
"""Return configured timestamp or value extracted by scanning text with datefinder."""
|
|
79
187
|
if self.config and self.config.timestamp:
|
|
@@ -98,7 +206,9 @@ class OtherFile(Document):
|
|
|
98
206
|
logger.warning(f"Error while iterating through datefinder.find_dates(): {e}")
|
|
99
207
|
|
|
100
208
|
if len(timestamps) == 0:
|
|
101
|
-
|
|
209
|
+
if not self.is_duplicate and VAST_HOUSE not in self.text:
|
|
210
|
+
self.log_top_lines(15, msg=f"No timestamps found", level=logging.INFO)
|
|
211
|
+
|
|
102
212
|
return None
|
|
103
213
|
elif len(timestamps) == 1:
|
|
104
214
|
return timestamps[0]
|
|
@@ -113,5 +223,43 @@ class OtherFile(Document):
|
|
|
113
223
|
timestamps_log_msg += TIMESTAMP_LOG_INDENT.join([str(dt) for dt in timestamps])
|
|
114
224
|
|
|
115
225
|
if num_days_spanned > MAX_DAYS_SPANNED_TO_BE_VALID and VAST_HOUSE not in self.text:
|
|
116
|
-
|
|
117
|
-
|
|
226
|
+
self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
|
|
227
|
+
|
|
228
|
+
@staticmethod
|
|
229
|
+
def build_table(docs: list['OtherFile']) -> Table:
|
|
230
|
+
"""Build a table of OtherFile documents."""
|
|
231
|
+
table = Table(header_style='bold', show_lines=True)
|
|
232
|
+
table.add_column('File', justify='center', width=FILENAME_LENGTH)
|
|
233
|
+
table.add_column('Date', justify='center')
|
|
234
|
+
table.add_column('Size', justify='center')
|
|
235
|
+
table.add_column('Type', justify='center')
|
|
236
|
+
table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
|
|
237
|
+
|
|
238
|
+
for doc in docs:
|
|
239
|
+
link_and_info = [doc.raw_document_link_txt()]
|
|
240
|
+
category = doc.category()
|
|
241
|
+
date_str = doc.date_str()
|
|
242
|
+
|
|
243
|
+
if doc.is_duplicate:
|
|
244
|
+
preview_text = doc.duplicate_file_txt()
|
|
245
|
+
row_style = ' dim'
|
|
246
|
+
else:
|
|
247
|
+
link_and_info += doc.hints()
|
|
248
|
+
preview_text = doc.highlighted_preview_text()
|
|
249
|
+
row_style = ''
|
|
250
|
+
|
|
251
|
+
if category:
|
|
252
|
+
category_txt = Text(category, get_style_for_category(category) or 'wheat4')
|
|
253
|
+
else:
|
|
254
|
+
category_txt = Text('')
|
|
255
|
+
|
|
256
|
+
table.add_row(
|
|
257
|
+
Group(*link_and_info),
|
|
258
|
+
Text(date_str, style=TIMESTAMP_DIM) if date_str else QUESTION_MARK_TXT,
|
|
259
|
+
doc.file_size_str(),
|
|
260
|
+
category_txt,
|
|
261
|
+
preview_text,
|
|
262
|
+
style=row_style
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
return table
|