epstein-files 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,20 @@
1
1
  import re
2
+ from collections import defaultdict
2
3
  from dataclasses import dataclass, field
3
4
  from datetime import datetime
4
5
 
5
6
  from rich.console import Console, ConsoleOptions, RenderResult
7
+ from rich.table import Table
6
8
  from rich.text import Text
7
9
 
8
10
  from epstein_files.documents.communication import Communication
9
11
  from epstein_files.documents.imessage.text_message import MSG_DATE_FORMAT, TextMessage
10
- from epstein_files.util.rich import logger
12
+ from epstein_files.util.constant.names import JEFFREY_EPSTEIN, UNKNOWN
13
+ from epstein_files.util.constant.strings import AUTHOR
14
+ from epstein_files.util.data import iso_timestamp, listify, sort_dict
15
+ from epstein_files.util.doc_cfg import Metadata, TextCfg
16
+ from epstein_files.util.highlighted_group import get_style_for_name
17
+ from epstein_files.util.logging import logger
11
18
 
12
19
  CONFIRMED_MSG = 'Found confirmed counterparty'
13
20
  GUESSED_MSG = 'This is probably a conversation with'
@@ -18,12 +25,16 @@ REDACTED_AUTHOR_REGEX = re.compile(r"^([-+•_1MENO.=F]+|[4Ide])$")
18
25
  @dataclass
19
26
  class MessengerLog(Communication):
20
27
  """Class representing one iMessage log file (one conversation between Epstein and some counterparty)."""
28
+ config: TextCfg | None = None
21
29
  _messages: list[TextMessage] = field(default_factory=list)
22
30
 
23
31
  def first_message_at(self, name: str | None) -> datetime:
24
32
  return self.messages_by(name)[0].timestamp()
25
33
 
26
34
  def info_txt(self) -> Text | None:
35
+ if self.author is None:
36
+ return None
37
+
27
38
  hint_msg = GUESSED_MSG if self.is_attribution_uncertain() else CONFIRMED_MSG
28
39
  author_txt = Text(self.author_or_unknown(), style=self.author_style + ' bold')
29
40
  return Text(f"({hint_msg} ", style='dim').append(author_txt).append(')')
@@ -51,6 +62,11 @@ class MessengerLog(Communication):
51
62
  """Return all messages by 'name'."""
52
63
  return [m for m in self.messages() if m.author == name]
53
64
 
65
+ def metadata(self) -> Metadata:
66
+ metadata = super().metadata()
67
+ metadata.update({'num_messages': len(self.messages())})
68
+ return metadata
69
+
54
70
  def _border_style(self) -> str:
55
71
  return self.author_style
56
72
 
@@ -61,13 +77,56 @@ class MessengerLog(Communication):
61
77
  try:
62
78
  return datetime.strptime(timestamp_str, MSG_DATE_FORMAT)
63
79
  except ValueError as e:
64
- logger.info(f"[WARNING] Failed to parse '{timestamp_str}' to datetime! Using next match. Error: {e}'")
80
+ logger.info(f"Failed to parse '{timestamp_str}' to datetime! Using next match. Error: {e}'")
65
81
 
66
82
  raise RuntimeError(f"{self}: No timestamp found!")
67
83
 
68
- def __rich_console__(self, _console: Console, _options: ConsoleOptions) -> RenderResult:
84
+ def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
69
85
  yield self.file_info_panel()
70
86
  yield Text('')
71
87
 
72
88
  for message in self.messages():
73
89
  yield message
90
+
91
+ @classmethod
92
+ def count_authors(cls, imessage_logs: list['MessengerLog']) -> dict[str | None, int]:
93
+ """Count up how many texts were sent by each author."""
94
+ sender_counts: dict[str | None, int] = defaultdict(int)
95
+
96
+ for message_log in imessage_logs:
97
+ for message in message_log.messages():
98
+ sender_counts[message.author] += 1
99
+
100
+ return sender_counts
101
+
102
+ @classmethod
103
+ def logs_for(cls, author: str | None | list[str | None], logs: list['MessengerLog']) -> list['MessengerLog']:
104
+ authors = listify(author)
105
+ return logs if JEFFREY_EPSTEIN in authors else [log for log in logs if log.author in authors]
106
+
107
+ @classmethod
108
+ def summary_table(cls, imessage_logs: list['MessengerLog']) -> Table:
109
+ """Build a table summarizing the text messages in 'imessage_logs'."""
110
+ counts_table = Table(title="Text Message Counts By Author", header_style="bold")
111
+ counts_table.add_column(AUTHOR.title(), justify='left', style="steel_blue bold", width=30)
112
+ counts_table.add_column('Files', justify='right', style='white')
113
+ counts_table.add_column("Msgs", justify='right')
114
+ counts_table.add_column('First Sent At', justify='center', highlight=True, width=21)
115
+ counts_table.add_column('Last Sent At', justify='center', style='wheat4', width=21)
116
+ counts_table.add_column('Days', justify='right', style='dim')
117
+
118
+ for name, count in sort_dict(cls.count_authors(imessage_logs)):
119
+ logs = cls.logs_for(name, imessage_logs)
120
+ first_at = logs[0].first_message_at(name)
121
+ last_at = logs[-1].first_message_at(name)
122
+
123
+ counts_table.add_row(
124
+ Text(name or UNKNOWN, get_style_for_name(name)),
125
+ str(len(logs)),
126
+ f"{count:,}",
127
+ iso_timestamp(first_at),
128
+ iso_timestamp(last_at),
129
+ str((last_at - first_at).days + 1),
130
+ )
131
+
132
+ return counts_table
@@ -1,3 +1,4 @@
1
+ import re
1
2
  import logging
2
3
  import warnings
3
4
  from dataclasses import dataclass
@@ -5,18 +6,25 @@ from datetime import datetime
5
6
 
6
7
  import datefinder
7
8
  import dateutil
9
+ from rich.console import Group
8
10
  from rich.markup import escape
9
11
  from rich.panel import Panel
12
+ from rich.table import Table
10
13
  from rich.text import Text
11
14
 
12
15
  from epstein_files.documents.document import CLOSE_PROPERTIES_CHAR, WHITESPACE_REGEX, Document
13
- from epstein_files.util.constants import UNINTERESTING_PREFIXES
16
+ from epstein_files.util.constant.strings import *
17
+ from epstein_files.util.constants import *
18
+ from epstein_files.util.doc_cfg import FINANCIAL_REPORTS_AUTHORS, DocCfg
14
19
  from epstein_files.util.data import escape_single_quotes, remove_timezone, uniquify
15
- from epstein_files.util.env import args, logger
16
- from epstein_files.util.rich import highlighter, logger
20
+ from epstein_files.util.file_helper import FILENAME_LENGTH
21
+ from epstein_files.util.env import args
22
+ from epstein_files.util.highlighted_group import get_style_for_category
23
+ from epstein_files.util.rich import QUESTION_MARK_TXT, highlighter
24
+ from epstein_files.util.logging import logger
17
25
 
18
- MAX_EXTRACTED_TIMESTAMPS = 100
19
26
  MAX_DAYS_SPANNED_TO_BE_VALID = 10
27
+ MAX_EXTRACTED_TIMESTAMPS = 100
20
28
  MIN_TIMESTAMP = datetime(2000, 1, 1)
21
29
  MID_TIMESTAMP = datetime(2007, 1, 1)
22
30
  MAX_TIMESTAMP = datetime(2022, 12, 31)
@@ -24,23 +32,110 @@ PREVIEW_CHARS = int(580 * (1 if args.all_other_files else 1.5))
24
32
  LOG_INDENT = '\n '
25
33
  TIMESTAMP_LOG_INDENT = f'{LOG_INDENT} '
26
34
  VAST_HOUSE = 'vast house' # Michael Wolff article draft about Epstein indicator
35
+ VI_DAILY_NEWS_REGEX = re.compile(r'virgin\s*is[kl][ai]nds\s*daily\s*news', re.IGNORECASE)
36
+
37
+ UNINTERESTING_CATEGORES = [
38
+ ARTS,
39
+ BOOK,
40
+ JUNK,
41
+ SPEECH,
42
+ ]
43
+
44
+ UNINTERESTING_IDS = [
45
+ '031794',
46
+ ]
47
+
48
+ # OtherFiles whose description/hints match these prefixes are not displayed unless --all-other-files is used
49
+ UNINTERESTING_PREFIXES = FINANCIAL_REPORTS_AUTHORS + [
50
+ 'article about',
51
+ ARTICLE_DRAFT,
52
+ 'Aviation International',
53
+ BBC,
54
+ BLOOMBERG,
55
+ 'Boston Globe',
56
+ BROCKMAN_INC,
57
+ CHINA_DAILY,
58
+ CNN,
59
+ 'completely redacted',
60
+ CVRA,
61
+ DAILY_MAIL,
62
+ DAILY_TELEGRAPH,
63
+ DAVID_SCHOEN_CVRA_LEXIS_SEARCH[0:-12], # Because date at end :(
64
+ DERSH_GIUFFRE_TWEET,
65
+ 'Financial Times',
66
+ 'Forbes',
67
+ 'Frontlines',
68
+ 'Future Science',
69
+ 'Globe and Mail',
70
+ GORDON_GETTY,
71
+ f"{HARVARD} Econ",
72
+ HARVARD_POETRY,
73
+ 'Inference',
74
+ JASTA,
75
+ 'JetGala',
76
+ JOHN_BOLTON_PRESS_CLIPPING,
77
+ 'Journal of Criminal',
78
+ LA_TIMES,
79
+ 'Litigation Daily',
80
+ LAWRENCE_KRAUSS,
81
+ 'MarketWatch',
82
+ MARTIN_NOWAK,
83
+ NOBEL_CHARITABLE_TRUST,
84
+ 'Nautilus',
85
+ 'New Yorker',
86
+ NYT_ARTICLE,
87
+ NYT_COLUMN,
88
+ PALM_BEACH_CODE_ENFORCEMENT,
89
+ PALM_BEACH_DAILY_ARTICLE,
90
+ PALM_BEACH_POST_ARTICLE,
91
+ PALM_BEACH_TSV,
92
+ PALM_BEACH_WATER_COMMITTEE,
93
+ PAUL_KRASSNER,
94
+ PEGGY_SIEGAL,
95
+ 'Politifact',
96
+ 'Rafanelli',
97
+ ROBERT_LAWRENCE_KUHN,
98
+ ROBERT_TRIVERS,
99
+ 'SCMP',
100
+ 'SciencExpress',
101
+ 'Scowcroft',
102
+ SHIMON_POST_ARTICLE,
103
+ SINGLE_PAGE,
104
+ STACEY_PLASKETT,
105
+ TERJE_ROD_LARSEN,
106
+ TEXT_OF_US_LAW,
107
+ TRANSLATION,
108
+ TWEET,
109
+ THE_REAL_DEAL_ARTICLE,
110
+ TRUMP_DISCLOSURES,
111
+ UBS_CIO_REPORT,
112
+ UN_GENERAL_ASSEMBLY,
113
+ 'U.S. News',
114
+ 'US Office',
115
+ 'Vanity Fair',
116
+ VI_DAILY_NEWS_ARTICLE,
117
+ WAPO,
118
+ ]
27
119
 
28
120
 
29
121
  @dataclass
30
122
  class OtherFile(Document):
31
123
  """File that is not an email, an iMessage log, or JSON data."""
32
124
 
33
- def configured_description(self) -> str | None:
34
- """Overloads superclass method."""
35
- if self.config is None:
36
- return None
125
+ def __post_init__(self):
126
+ super().__post_init__()
37
127
 
38
- pieces = [p for p in [self.config.author, self.config.description] if p]
39
- return ' '.join(pieces) if pieces else None
128
+ if self.config is None and VI_DAILY_NEWS_REGEX.search(self.text):
129
+ self.log(f"Creating synthetic config for VI Daily News article...", logging.INFO)
130
+ self.config = DocCfg(id=self.file_id, description=VI_DAILY_NEWS_ARTICLE, category=ARTICLE)
40
131
 
41
- def description(self) -> Text:
42
- """One line summary mostly for logging."""
43
- return super().description().append(CLOSE_PROPERTIES_CHAR)
132
+ def category(self) -> str | None:
133
+ return self.config and self.config.category
134
+
135
+ def configured_description(self) -> str | None:
136
+ """Overloads superclass method."""
137
+ if self.config is not None:
138
+ return self.config.info_str()
44
139
 
45
140
  def description_panel(self, include_hints=True) -> Panel:
46
141
  """Panelized description() with info_txt(), used in search results."""
@@ -57,13 +152,22 @@ class OtherFile(Document):
57
152
  return Text(escape(self.preview_text()))
58
153
 
59
154
  def is_interesting(self):
60
- """False for lame prefixes and duplicates."""
155
+ """False for lame prefixes, duplicates, and other boring files."""
61
156
  hints = self.hints()
62
157
 
63
158
  if self.is_duplicate:
64
159
  return False
160
+ elif self.file_id in UNINTERESTING_IDS:
161
+ return False
65
162
  elif len(hints) == 0:
66
163
  return True
164
+ elif self.config:
165
+ if self.config.is_interesting:
166
+ return True
167
+ elif self.category() == FINANCE and self.author is not None:
168
+ return False
169
+ elif self.category() in UNINTERESTING_CATEGORES:
170
+ return False
67
171
 
68
172
  for prefix in UNINTERESTING_PREFIXES:
69
173
  if hints[0].plain.startswith(prefix):
@@ -74,6 +178,10 @@ class OtherFile(Document):
74
178
  def preview_text(self) -> str:
75
179
  return WHITESPACE_REGEX.sub(' ', self.text)[0:PREVIEW_CHARS]
76
180
 
181
+ def summary(self) -> Text:
182
+ """One line summary mostly for logging."""
183
+ return super().summary().append(CLOSE_PROPERTIES_CHAR)
184
+
77
185
  def _extract_timestamp(self) -> datetime | None:
78
186
  """Return configured timestamp or value extracted by scanning text with datefinder."""
79
187
  if self.config and self.config.timestamp:
@@ -98,7 +206,9 @@ class OtherFile(Document):
98
206
  logger.warning(f"Error while iterating through datefinder.find_dates(): {e}")
99
207
 
100
208
  if len(timestamps) == 0:
101
- self.log_top_lines(15, msg=f"{self.file_id}: No timestamps found", level=logging.INFO)
209
+ if not self.is_duplicate and VAST_HOUSE not in self.text:
210
+ self.log_top_lines(15, msg=f"No timestamps found", level=logging.INFO)
211
+
102
212
  return None
103
213
  elif len(timestamps) == 1:
104
214
  return timestamps[0]
@@ -113,5 +223,43 @@ class OtherFile(Document):
113
223
  timestamps_log_msg += TIMESTAMP_LOG_INDENT.join([str(dt) for dt in timestamps])
114
224
 
115
225
  if num_days_spanned > MAX_DAYS_SPANNED_TO_BE_VALID and VAST_HOUSE not in self.text:
116
- log_level = logging.DEBUG if VAST_HOUSE in self.text else logging.INFO
117
- self.log_top_lines(15, msg=timestamps_log_msg, level=log_level)
226
+ self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
227
+
228
+ @staticmethod
229
+ def build_table(docs: list['OtherFile']) -> Table:
230
+ """Build a table of OtherFile documents."""
231
+ table = Table(header_style='bold', show_lines=True)
232
+ table.add_column('File', justify='center', width=FILENAME_LENGTH)
233
+ table.add_column('Date', justify='center')
234
+ table.add_column('Size', justify='center')
235
+ table.add_column('Type', justify='center')
236
+ table.add_column(FIRST_FEW_LINES, justify='left', style='pale_turquoise4')
237
+
238
+ for doc in docs:
239
+ link_and_info = [doc.raw_document_link_txt()]
240
+ category = doc.category()
241
+ date_str = doc.date_str()
242
+
243
+ if doc.is_duplicate:
244
+ preview_text = doc.duplicate_file_txt()
245
+ row_style = ' dim'
246
+ else:
247
+ link_and_info += doc.hints()
248
+ preview_text = doc.highlighted_preview_text()
249
+ row_style = ''
250
+
251
+ if category:
252
+ category_txt = Text(category, get_style_for_category(category) or 'wheat4')
253
+ else:
254
+ category_txt = Text('')
255
+
256
+ table.add_row(
257
+ Group(*link_and_info),
258
+ Text(date_str, style=TIMESTAMP_DIM) if date_str else QUESTION_MARK_TXT,
259
+ doc.file_size_str(),
260
+ category_txt,
261
+ preview_text,
262
+ style=row_style
263
+ )
264
+
265
+ return table