epstein-files 1.1.5__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,11 +10,11 @@ from rich.text import Text
10
10
 
11
11
  from epstein_files.documents.communication import Communication
12
12
  from epstein_files.documents.imessage.text_message import TextMessage
13
- from epstein_files.util.constant.names import JEFFREY_EPSTEIN, UNKNOWN
13
+ from epstein_files.util.constant.names import JEFFREY_EPSTEIN, Name
14
14
  from epstein_files.util.constant.strings import AUTHOR, TIMESTAMP_STYLE
15
15
  from epstein_files.util.data import days_between, days_between_str, iso_timestamp, sort_dict
16
16
  from epstein_files.util.doc_cfg import Metadata, TextCfg
17
- from epstein_files.util.highlighted_group import get_style_for_name, styled_name
17
+ from epstein_files.util.highlighted_group import styled_name
18
18
  from epstein_files.util.logging import logger
19
19
  from epstein_files.util.rich import LAST_TIMESTAMP_STYLE, build_table, highlighter
20
20
 
@@ -35,7 +35,7 @@ class MessengerLog(Communication):
35
35
  super().__post_init__()
36
36
  self.messages = [self._build_message(match) for match in MSG_REGEX.finditer(self.text)]
37
37
 
38
- def first_message_at(self, name: str | None) -> datetime:
38
+ def first_message_at(self, name: Name) -> datetime:
39
39
  return self.messages_by(name)[0].parse_timestamp()
40
40
 
41
41
  def info_txt(self) -> Text | None:
@@ -54,10 +54,10 @@ class MessengerLog(Communication):
54
54
 
55
55
  return txt.append(')')
56
56
 
57
- def last_message_at(self, name: str | None) -> datetime:
57
+ def last_message_at(self, name: Name) -> datetime:
58
58
  return self.messages_by(name)[-1].parse_timestamp()
59
59
 
60
- def messages_by(self, name: str | None) -> list[TextMessage]:
60
+ def messages_by(self, name: Name) -> list[TextMessage]:
61
61
  """Return all messages by 'name'."""
62
62
  return [m for m in self.messages if m.author == name]
63
63
 
@@ -129,9 +129,9 @@ class MessengerLog(Communication):
129
129
  yield message
130
130
 
131
131
  @classmethod
132
- def count_authors(cls, imessage_logs: list['MessengerLog']) -> dict[str | None, int]:
132
+ def count_authors(cls, imessage_logs: list['MessengerLog']) -> dict[Name, int]:
133
133
  """Count up how many texts were sent by each author."""
134
- sender_counts: dict[str | None, int] = defaultdict(int)
134
+ sender_counts: dict[Name, int] = defaultdict(int)
135
135
 
136
136
  for message_log in imessage_logs:
137
137
  for message in message_log.messages:
@@ -22,7 +22,7 @@ from epstein_files.util.data import days_between, escape_single_quotes, remove_t
22
22
  from epstein_files.util.file_helper import FILENAME_LENGTH, file_size_to_str
23
23
  from epstein_files.util.env import args
24
24
  from epstein_files.util.highlighted_group import QUESTION_MARKS_TXT, styled_category
25
- from epstein_files.util.rich import build_table, highlighter
25
+ from epstein_files.util.rich import add_cols_to_table, build_table, highlighter
26
26
  from epstein_files.util.logging import logger
27
27
 
28
28
  FIRST_FEW_LINES = 'First Few Lines'
@@ -209,39 +209,8 @@ class OtherFile(Document):
209
209
  if num_days_spanned > MAX_DAYS_SPANNED_TO_BE_VALID and VAST_HOUSE not in self.text:
210
210
  self.log_top_lines(15, msg=timestamps_log_msg, level=logging.DEBUG)
211
211
 
212
- @staticmethod
213
- def count_by_category_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
214
- counts = defaultdict(int)
215
- category_bytes = defaultdict(int)
216
-
217
- for file in files:
218
- if file.category() is None:
219
- logger.warning(f"file {file.file_id} has no category")
220
-
221
- counts[file.category()] += 1
222
- category_bytes[file.category()] += file.file_size()
223
-
224
- table = build_table(f'{title_pfx}Other Files Summary', ['Category', 'Count', 'Has Author', 'No Author', 'Size'])
225
- table.columns[-1].justify = 'right'
226
- table.columns[0].min_width = 14
227
- table.columns[-1].style = 'dim'
228
-
229
- for (category, count) in sort_dict(counts):
230
- category_files = [f for f in files if f.category() == category]
231
- known_author_count = Document.known_author_count(category_files)
232
-
233
- table.add_row(
234
- styled_category(category),
235
- str(count),
236
- str(known_author_count),
237
- str(count - known_author_count),
238
- file_size_to_str(category_bytes[category]),
239
- )
240
-
241
- return table
242
-
243
- @staticmethod
244
- def files_preview_table(files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
212
+ @classmethod
213
+ def files_preview_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
245
214
  """Build a table of OtherFile documents."""
246
215
  table = build_table(f'{title_pfx}Other Files Details in Chronological Order', show_lines=True)
247
216
  table.add_column('File', justify='center', width=FILENAME_LENGTH)
@@ -272,3 +241,16 @@ class OtherFile(Document):
272
241
  )
273
242
 
274
243
  return table
244
+
245
+ @classmethod
246
+ def summary_table(cls, files: Sequence['OtherFile'], title_pfx: str = '') -> Table:
247
+ categories = uniquify([f.category() for f in files])
248
+ categories = sorted(categories, key=lambda c: -len([f for f in files if f.category() == c]))
249
+ table = cls.file_info_table(f'{title_pfx}Other Files Summary', 'Category')
250
+
251
+ for category in categories:
252
+ category_files = [f for f in files if f.category() == category]
253
+ table.add_row(styled_category(category), *cls.files_info_row(category_files))
254
+
255
+ table.columns = table.columns[:-2] + [table.columns[-1]] # Removee unknown author col
256
+ return table
@@ -3,39 +3,44 @@ import json
3
3
  import pickle
4
4
  import re
5
5
  from collections import defaultdict
6
+ from copy import copy
6
7
  from dataclasses import dataclass, field
7
8
  from datetime import datetime
8
9
  from pathlib import Path
9
- from typing import Sequence, Type
10
+ from typing import Sequence, Type, cast
10
11
 
11
- from rich.padding import Padding
12
12
  from rich.table import Table
13
- from rich.text import Text
14
13
 
15
14
  from epstein_files.documents.document import Document
16
- from epstein_files.documents.email import DETECT_EMAIL_REGEX, USELESS_EMAILERS, Email
17
- from epstein_files.documents.emails.email_header import AUTHOR
15
+ from epstein_files.documents.email import DETECT_EMAIL_REGEX, Email
18
16
  from epstein_files.documents.json_file import JsonFile
19
17
  from epstein_files.documents.messenger_log import MSG_REGEX, MessengerLog
20
18
  from epstein_files.documents.other_file import OtherFile
19
+ from epstein_files.person import Person
21
20
  from epstein_files.util.constant.strings import *
22
21
  from epstein_files.util.constants import *
23
- from epstein_files.util.data import days_between, dict_sets_to_lists, json_safe, listify
22
+ from epstein_files.util.data import flatten, json_safe, listify, uniquify
24
23
  from epstein_files.util.doc_cfg import EmailCfg, Metadata
25
24
  from epstein_files.util.env import DOCS_DIR, args, logger
26
25
  from epstein_files.util.file_helper import file_size_str
27
- from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames, get_info_for_name, get_style_for_name
28
- from epstein_files.util.rich import (NA_TXT, add_cols_to_table, build_table, console, highlighter,
29
- print_author_panel, print_centered, print_subtitle_panel)
26
+ from epstein_files.util.highlighted_group import HIGHLIGHTED_NAMES, HighlightedNames
30
27
  from epstein_files.util.search_result import SearchResult
31
28
  from epstein_files.util.timer import Timer
32
29
 
33
- DEVICE_SIGNATURE_SUBTITLE = f"Email [italic]Sent from \\[DEVICE][/italic] Signature Breakdown"
34
- DEVICE_SIGNATURE = 'Device Signature'
35
- DEVICE_SIGNATURE_PADDING = (1, 0)
30
+ DUPLICATE_PROPS_TO_COPY = ['author', 'recipients', 'timestamp']
36
31
  PICKLED_PATH = Path("the_epstein_files.pkl.gz")
37
32
  SLOW_FILE_SECONDS = 1.0
38
33
 
34
+ EMAILS_WITH_UNINTERESTING_CCS = [
35
+ '025329', # Krassner
36
+ '024923', # Krassner
37
+ '033568', # Krassner
38
+ ]
39
+
40
+ EMAILS_WITH_UNINTERESTING_BCCS = [
41
+ '014797_1', # Ross Gow
42
+ ]
43
+
39
44
 
40
45
  @dataclass
41
46
  class EpsteinFiles:
@@ -45,19 +50,13 @@ class EpsteinFiles:
45
50
  json_files: list[JsonFile] = field(default_factory=list)
46
51
  other_files: list[OtherFile] = field(default_factory=list)
47
52
  timer: Timer = field(default_factory=lambda: Timer())
48
-
49
- # Analytics / calculations
50
- email_author_counts: dict[str | None, int] = field(default_factory=lambda: defaultdict(int))
51
- email_authors_to_device_signatures: dict[str, set] = field(default_factory=lambda: defaultdict(set))
52
- email_device_signatures_to_authors: dict[str, set] = field(default_factory=lambda: defaultdict(set))
53
- email_recipient_counts: dict[str | None, int] = field(default_factory=lambda: defaultdict(int))
54
- unknown_recipient_email_ids: set[str] = field(default_factory=set)
53
+ uninteresting_ccs: list[Name] = field(default_factory=list)
55
54
 
56
55
  def __post_init__(self):
57
56
  """Iterate through files and build appropriate objects."""
58
57
  self.all_files = sorted([f for f in DOCS_DIR.iterdir() if f.is_file() and not f.name.startswith('.')])
59
58
  documents = []
60
- file_type_count = defaultdict(int)
59
+ file_type_count = defaultdict(int) # Hack used by --skip-other-files option
61
60
 
62
61
  # Read through and classify all the files
63
62
  for file_arg in self.all_files:
@@ -83,23 +82,23 @@ class EpsteinFiles:
83
82
  self.imessage_logs = Document.sort_by_timestamp([d for d in documents if isinstance(d, MessengerLog)])
84
83
  self.other_files = Document.sort_by_timestamp([d for d in documents if isinstance(d, (JsonFile, OtherFile))])
85
84
  self.json_files = [doc for doc in self.other_files if isinstance(doc, JsonFile)]
86
- self._tally_email_data()
85
+ self._set_uninteresting_ccs()
86
+ self._copy_duplicate_email_properties()
87
87
 
88
88
  @classmethod
89
89
  def get_files(cls, timer: Timer | None = None) -> 'EpsteinFiles':
90
90
  """Alternate constructor that reads/writes a pickled version of the data ('timer' arg is for logging)."""
91
91
  timer = timer or Timer()
92
92
 
93
- if PICKLED_PATH.exists() and not args.overwrite_pickle:
93
+ if PICKLED_PATH.exists() and not args.overwrite_pickle and not args.skip_other_files:
94
94
  with gzip.open(PICKLED_PATH, 'rb') as file:
95
95
  epstein_files = pickle.load(file)
96
- epstein_files.timer = timer
97
96
  timer_msg = f"Loaded {len(epstein_files.all_files):,} documents from '{PICKLED_PATH}'"
98
- epstein_files.timer.print_at_checkpoint(f"{timer_msg} ({file_size_str(PICKLED_PATH)})")
97
+ timer.print_at_checkpoint(f"{timer_msg} ({file_size_str(PICKLED_PATH)})")
99
98
  return epstein_files
100
99
 
101
100
  logger.warning(f"Building new cache file, this will take a few minutes...")
102
- epstein_files = EpsteinFiles(timer=timer)
101
+ epstein_files = EpsteinFiles()
103
102
 
104
103
  if args.skip_other_files:
105
104
  logger.warning(f"Not writing pickled data because --skip-other-files")
@@ -114,17 +113,7 @@ class EpsteinFiles:
114
113
  def all_documents(self) -> Sequence[Document]:
115
114
  return self.imessage_logs + self.emails + self.other_files
116
115
 
117
- def all_emailers(self, include_useless: bool = False) -> list[str | None]:
118
- """Returns all emailers USELESS_EMAILERS, sorted from least frequent to most."""
119
- names = [a for a in self.email_author_counts.keys()] + [r for r in self.email_recipient_counts.keys()]
120
- names = names if include_useless else [e for e in names if e not in USELESS_EMAILERS]
121
- return sorted(list(set(names)), key=lambda e: self.email_author_counts[e] + self.email_recipient_counts[e])
122
-
123
- def docs_matching(
124
- self,
125
- pattern: re.Pattern | str,
126
- names: list[str | None] | None = None
127
- ) -> list[SearchResult]:
116
+ def docs_matching(self, pattern: re.Pattern | str, names: list[Name] | None = None) -> list[SearchResult]:
128
117
  """Find documents whose text matches a pattern (file_type and names args limit the documents searched)."""
129
118
  results: list[SearchResult] = []
130
119
 
@@ -139,14 +128,39 @@ class EpsteinFiles:
139
128
 
140
129
  return results
141
130
 
142
- def earliest_email_at(self, author: str | None) -> datetime:
143
- return self.emails_for(author)[0].timestamp
131
+ def earliest_email_at(self, name: Name) -> datetime:
132
+ return self.emails_for(name)[0].timestamp
133
+
134
+ def last_email_at(self, name: Name) -> datetime:
135
+ return self.emails_for(name)[-1].timestamp
136
+
137
+ def email_author_counts(self) -> dict[Name, int]:
138
+ return {
139
+ person.name: len(person.unique_emails_by())
140
+ for person in self.emailers() if len(person.unique_emails_by()) > 0
141
+ }
142
+
143
+ def email_authors_to_device_signatures(self) -> dict[str, set[str]]:
144
+ signatures = defaultdict(set)
145
+
146
+ for email in [e for e in self.non_duplicate_emails() if e.sent_from_device]:
147
+ signatures[email.author_or_unknown()].add(email.sent_from_device)
144
148
 
145
- def last_email_at(self, author: str | None) -> datetime:
146
- return self.emails_for(author)[-1].timestamp
149
+ return signatures
147
150
 
148
- def email_conversation_length_in_days(self, author: str | None) -> int:
149
- return days_between(self.earliest_email_at(author), self.last_email_at(author))
151
+ def email_device_signatures_to_authors(self) -> dict[str, set[str]]:
152
+ signatures = defaultdict(set)
153
+
154
+ for email in [e for e in self.non_duplicate_emails() if e.sent_from_device]:
155
+ signatures[email.sent_from_device].add(email.author_or_unknown())
156
+
157
+ return signatures
158
+
159
+ def email_recipient_counts(self) -> dict[Name, int]:
160
+ return {
161
+ person.name: len(person.unique_emails_to())
162
+ for person in self.emailers() if len(person.unique_emails_to()) > 0
163
+ }
150
164
 
151
165
  def email_signature_substitution_counts(self) -> dict[str, int]:
152
166
  """Return the number of times an email signature was replaced with "<...snipped...>" for each author."""
@@ -158,32 +172,40 @@ class EpsteinFiles:
158
172
 
159
173
  return substitution_counts
160
174
 
161
- def email_unknown_recipient_file_ids(self) -> list[str]:
162
- return sorted(list(self.unknown_recipient_email_ids))
175
+ def emailers(self) -> list[Person]:
176
+ """All the people who sent or received an email."""
177
+ authors = [email.author for email in self.emails]
178
+ recipients = flatten([email.recipients for email in self.emails])
179
+ return self.person_objs(uniquify(authors + recipients))
163
180
 
164
- def emails_by(self, author: str | None) -> list[Email]:
181
+ def emails_by(self, author: Name) -> list[Email]:
165
182
  return Document.sort_by_timestamp([e for e in self.emails if e.author == author])
166
183
 
167
- def emails_for(self, author: str | None) -> list[Email]:
184
+ def emails_for(self, name: Name) -> list[Email]:
168
185
  """Returns emails to or from a given 'author' sorted chronologically."""
169
- if author == JEFFREY_EPSTEIN:
170
- emails = [e for e in self.emails_by(JEFFREY_EPSTEIN) if e.is_note_to_self()]
171
- else:
172
- emails = self.emails_by(author) + self.emails_to(author)
186
+ emails = self.emails_by(name) + self.emails_to(name)
173
187
 
174
188
  if len(emails) == 0:
175
- raise RuntimeError(f"No emails found for '{author}'")
189
+ raise RuntimeError(f"No emails found for '{name}'")
176
190
 
177
191
  return Document.sort_by_timestamp(Document.uniquify(emails))
178
192
 
179
- def emails_to(self, author: str | None) -> list[Email]:
180
- if author is None:
193
+ def emails_to(self, name: Name) -> list[Email]:
194
+ if name is None:
181
195
  emails = [e for e in self.emails if len(e.recipients) == 0 or None in e.recipients]
182
196
  else:
183
- emails = [e for e in self.emails if author in e.recipients]
197
+ emails = [e for e in self.emails if name in e.recipients]
184
198
 
185
199
  return Document.sort_by_timestamp(emails)
186
200
 
201
+ def email_for_id(self, file_id: str) -> Email:
202
+ docs = self.for_ids([file_id])
203
+
204
+ if docs and isinstance(docs[0], Email):
205
+ return docs[0]
206
+ else:
207
+ raise ValueError(f"No email found for {file_id}")
208
+
187
209
  def for_ids(self, file_ids: str | list[str]) -> list[Document]:
188
210
  file_ids = listify(file_ids)
189
211
  docs = [doc for doc in self.all_documents() if doc.file_id in file_ids]
@@ -193,6 +215,9 @@ class EpsteinFiles:
193
215
 
194
216
  return docs
195
217
 
218
+ def imessage_logs_for(self, name: Name) -> list[MessengerLog]:
219
+ return [log for log in self.imessage_logs if name == log.author]
220
+
196
221
  def json_metadata(self) -> str:
197
222
  """Create a JSON string containing metadata for all the files."""
198
223
  metadata = {
@@ -203,7 +228,7 @@ class EpsteinFiles:
203
228
  OtherFile.__name__: _sorted_metadata(self.non_json_other_files()),
204
229
  },
205
230
  'people': {
206
- name: highlighted_group.get_info(name)
231
+ name: highlighted_group.info_for(name, include_category=True)
207
232
  for highlighted_group in HIGHLIGHTED_NAMES
208
233
  if isinstance(highlighted_group, HighlightedNames)
209
234
  for name, description in highlighted_group.emailers.items()
@@ -214,89 +239,71 @@ class EpsteinFiles:
214
239
  return json.dumps(metadata, indent=4, sort_keys=True)
215
240
 
216
241
  def non_duplicate_emails(self) -> list[Email]:
217
- return [email for email in self.emails if not email.is_duplicate()]
242
+ return Document.without_dupes(self.emails)
218
243
 
219
244
  def non_json_other_files(self) -> list[OtherFile]:
220
245
  return [doc for doc in self.other_files if not isinstance(doc, JsonFile)]
221
246
 
222
- def print_files_summary(self) -> None:
223
- table = build_table('File Overview')
224
- add_cols_to_table(table, ['File Type', 'Count', 'Author Known', 'Author Unknown', 'Duplicates'])
225
- table.columns[1].justify = 'right'
226
-
227
- def add_row(label: str, docs: list):
228
- known = None if isinstance(docs[0], JsonFile) else Document.known_author_count(docs)
229
-
230
- table.add_row(
231
- label,
232
- f"{len(docs):,}",
233
- f"{known:,}" if known is not None else NA_TXT,
234
- f"{len(docs) - known:,}" if known is not None else NA_TXT,
235
- f"{len([d for d in docs if d.is_duplicate()])}",
247
+ def person_objs(self, names: list[Name]) -> list[Person]:
248
+ """Construct Person objects for a list of names."""
249
+ return [
250
+ Person(
251
+ name=name,
252
+ emails=self.emails_for(name),
253
+ imessage_logs=self.imessage_logs_for(name),
254
+ is_uninteresting_cc=name in self.uninteresting_emailers(),
255
+ other_files=[f for f in self.other_files if name and name == f.author]
236
256
  )
257
+ for name in names
258
+ ]
259
+
260
+ def overview_table(self) -> Table:
261
+ table = Document.file_info_table('Files Overview', 'File Type')
262
+ table.add_row('Emails', *Document.files_info_row(self.emails))
263
+ table.add_row('iMessage Logs', *Document.files_info_row(self.imessage_logs))
264
+ table.add_row('JSON Data', *Document.files_info_row(self.json_files, True))
265
+ table.add_row('Other', *Document.files_info_row(self.non_json_other_files()))
266
+ return table
267
+
268
+ def unknown_recipient_ids(self) -> list[str]:
269
+ """IDs of emails whose recipient is not known."""
270
+ return sorted([e.file_id for e in self.emails if None in e.recipients or not e.recipients])
271
+
272
+ def uninteresting_emailers(self) -> list[Name]:
273
+ """Emailers whom we don't want to print a separate section for because they're just CCed."""
274
+ if '_uninteresting_emailers' not in vars(self):
275
+ self._uninteresting_emailers = sorted(uniquify(UNINTERESTING_EMAILERS + self.uninteresting_ccs))
276
+
277
+ return self._uninteresting_emailers
278
+
279
+ def _copy_duplicate_email_properties(self) -> None:
280
+ """Ensure dupe emails have the properties of the emails they duplicate to capture any repairs, config etc."""
281
+ for email in self.emails:
282
+ if not email.is_duplicate():
283
+ continue
237
284
 
238
- add_row('Emails', self.emails)
239
- add_row('iMessage Logs', self.imessage_logs)
240
- add_row('JSON Data', self.json_files)
241
- add_row('Other', self.non_json_other_files())
242
- print_centered(table)
243
- console.line()
244
-
245
- def print_emails_for(self, _author: str | None) -> list[Email]:
246
- """Print complete emails to or from a particular 'author'. Returns the Emails that were printed."""
247
- emails = self.emails_for(_author)
248
- num_days = self.email_conversation_length_in_days(_author)
249
- unique_emails = [email for email in emails if not email.is_duplicate()]
250
- start_date = emails[0].timestamp.date()
251
- author = _author or UNKNOWN
252
- title = f"Found {len(unique_emails)} emails"
253
-
254
- if author == JEFFREY_EPSTEIN:
255
- title += f" sent by {JEFFREY_EPSTEIN} to himself"
256
- else:
257
- title += f" to/from {author} starting {start_date} covering {num_days:,} days"
258
-
259
- print_author_panel(title, get_info_for_name(author), get_style_for_name(author))
260
- self.print_emails_table_for(_author)
261
- last_printed_email_was_duplicate = False
262
-
263
- for email in emails:
264
- if email.is_duplicate():
265
- console.print(Padding(email.duplicate_file_txt().append('...'), (0, 0, 0, 4)))
266
- last_printed_email_was_duplicate = True
267
- else:
268
- if last_printed_email_was_duplicate:
269
- console.line()
270
-
271
- console.print(email)
272
- last_printed_email_was_duplicate = False
285
+ original = self.email_for_id(email.duplicate_of_id())
273
286
 
274
- return emails
287
+ for field_name in DUPLICATE_PROPS_TO_COPY:
288
+ original_prop = getattr(original, field_name)
289
+ duplicate_prop = getattr(email, field_name)
275
290
 
276
- def print_emails_table_for(self, author: str | None) -> None:
277
- emails = [email for email in self.emails_for(author) if not email.is_duplicate()] # Remove dupes
278
- print_centered(Padding(Email.build_emails_table(emails, author), (0, 5, 1, 5)))
291
+ if original_prop != duplicate_prop:
292
+ email.warn(f"Replacing {field_name} {duplicate_prop} with {original_prop} from duplicated '{original.file_id}'")
293
+ setattr(email, field_name, original_prop)
279
294
 
280
- def print_email_device_info(self) -> None:
281
- print_subtitle_panel(DEVICE_SIGNATURE_SUBTITLE)
282
- console.print(_build_signature_table(self.email_device_signatures_to_authors, (DEVICE_SIGNATURE, AUTHOR), ', '))
283
- console.print(_build_signature_table(self.email_authors_to_device_signatures, (AUTHOR, DEVICE_SIGNATURE)))
295
+ # Resort in case any timestamp were updated
296
+ self.emails = Document.sort_by_timestamp(self.emails)
284
297
 
285
- def _tally_email_data(self) -> None:
286
- """Tally up summary info about Email objects."""
287
- for email in self.non_duplicate_emails():
288
- self.email_author_counts[email.author] += 1
298
+ def _set_uninteresting_ccs(self) -> None:
299
+ for id in EMAILS_WITH_UNINTERESTING_BCCS:
300
+ self.uninteresting_ccs += copy(cast(list[Name], self.email_for_id(id).header.bcc))
289
301
 
290
- if len(email.recipients) == 0:
291
- self.unknown_recipient_email_ids.add(email.file_id)
292
- self.email_recipient_counts[None] += 1
293
- else:
294
- for recipient in email.recipients:
295
- self.email_recipient_counts[recipient] += 1
302
+ for id in EMAILS_WITH_UNINTERESTING_CCS:
303
+ self.uninteresting_ccs += self.email_for_id(id).recipients
296
304
 
297
- if email.sent_from_device:
298
- self.email_authors_to_device_signatures[email.author_or_unknown()].add(email.sent_from_device)
299
- self.email_device_signatures_to_authors[email.sent_from_device].add(email.author_or_unknown())
305
+ self.uninteresting_ccs = sorted(uniquify(self.uninteresting_ccs))
306
+ logger.info(f"Extracted uninteresting_ccs: {self.uninteresting_ccs}")
300
307
 
301
308
 
302
309
  def count_by_month(docs: Sequence[Document]) -> dict[str | None, int]:
@@ -326,21 +333,6 @@ def document_cls(doc: Document) -> Type[Document]:
326
333
  return OtherFile
327
334
 
328
335
 
329
- def _build_signature_table(keyed_sets: dict[str, set[str]], cols: tuple[str, str], join_char: str = '\n') -> Padding:
330
- title = 'Signatures Used By Authors' if cols[0] == AUTHOR else 'Authors Seen Using Signatures'
331
- table = build_table(title, header_style="bold reverse", show_lines=True)
332
-
333
- for i, col in enumerate(cols):
334
- table.add_column(col.title() + ('s' if i == 1 else ''))
335
-
336
- new_dict = dict_sets_to_lists(keyed_sets)
337
-
338
- for k in sorted(new_dict.keys()):
339
- table.add_row(highlighter(k or UNKNOWN), highlighter(join_char.join(sorted(new_dict[k]))))
340
-
341
- return Padding(table, DEVICE_SIGNATURE_PADDING)
342
-
343
-
344
336
  def _sorted_metadata(docs: Sequence[Document]) -> list[Metadata]:
345
337
  docs_sorted_by_id = sorted(docs, key=lambda d: d.file_id)
346
338
  return [json_safe(d.metadata()) for d in docs_sorted_by_id]