epstein-files 1.2.1__py3-none-any.whl → 1.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +31 -6
- epstein_files/documents/document.py +5 -1
- epstein_files/documents/email.py +192 -203
- epstein_files/epstein_files.py +2 -3
- epstein_files/person.py +29 -9
- epstein_files/util/constant/names.py +9 -7
- epstein_files/util/constant/output_files.py +8 -5
- epstein_files/util/constant/strings.py +2 -1
- epstein_files/util/constant/urls.py +13 -2
- epstein_files/util/constants.py +35 -11
- epstein_files/util/data.py +1 -0
- epstein_files/util/env.py +9 -6
- epstein_files/util/highlighted_group.py +263 -117
- epstein_files/util/output.py +18 -9
- epstein_files/util/rich.py +7 -2
- epstein_files/util/word_count.py +1 -1
- {epstein_files-1.2.1.dist-info → epstein_files-1.2.5.dist-info}/METADATA +1 -1
- epstein_files-1.2.5.dist-info/RECORD +34 -0
- epstein_files-1.2.1.dist-info/RECORD +0 -34
- {epstein_files-1.2.1.dist-info → epstein_files-1.2.5.dist-info}/LICENSE +0 -0
- {epstein_files-1.2.1.dist-info → epstein_files-1.2.5.dist-info}/WHEEL +0 -0
- {epstein_files-1.2.1.dist-info → epstein_files-1.2.5.dist-info}/entry_points.txt +0 -0
epstein_files/__init__.py
CHANGED
|
@@ -16,14 +16,18 @@ from rich.text import Text
|
|
|
16
16
|
from epstein_files.epstein_files import EpsteinFiles, document_cls
|
|
17
17
|
from epstein_files.documents.document import INFO_PADDING, Document
|
|
18
18
|
from epstein_files.documents.email import Email
|
|
19
|
+
from epstein_files.documents.messenger_log import MessengerLog
|
|
20
|
+
from epstein_files.documents.other_file import OtherFile
|
|
19
21
|
from epstein_files.util.constant.output_files import make_clean
|
|
22
|
+
from epstein_files.util.constant.strings import ID_REGEX
|
|
23
|
+
from epstein_files.util.data import flatten
|
|
20
24
|
from epstein_files.util.env import args
|
|
21
25
|
from epstein_files.util.file_helper import coerce_file_path, extract_file_id
|
|
22
26
|
from epstein_files.util.logging import exit_with_error, logger
|
|
23
27
|
from epstein_files.util.output import (print_emails_section, print_json_files, print_json_stats,
|
|
24
28
|
print_other_files_section, print_text_messages_section, print_email_timeline, print_emailers_info,
|
|
25
29
|
print_json_metadata, write_urls)
|
|
26
|
-
from epstein_files.util.rich import (build_highlighter, console, print_color_key, print_title_page_header,
|
|
30
|
+
from epstein_files.util.rich import (build_highlighter, console, highlighter, print_color_key, print_title_page_header,
|
|
27
31
|
print_title_page_tables, print_subtitle_panel, write_html)
|
|
28
32
|
from epstein_files.util.timer import Timer
|
|
29
33
|
from epstein_files.util.word_count import write_word_counts_html
|
|
@@ -76,6 +80,9 @@ def generate_html() -> None:
|
|
|
76
80
|
write_html(args.build)
|
|
77
81
|
logger.warning(f"Total time: {timer.seconds_since_start_str()}")
|
|
78
82
|
|
|
83
|
+
if args.debug:
|
|
84
|
+
highlighter.print_highlight_counts(console)
|
|
85
|
+
|
|
79
86
|
# JSON stats (mostly used for building pytest checks)
|
|
80
87
|
if args.json_stats:
|
|
81
88
|
print_json_stats(epstein_files)
|
|
@@ -90,23 +97,36 @@ def epstein_search():
|
|
|
90
97
|
"""Search the cleaned up text of the files."""
|
|
91
98
|
epstein_files = EpsteinFiles.get_files()
|
|
92
99
|
|
|
100
|
+
if ID_REGEX.match(args.positional_args[0]):
|
|
101
|
+
logger.warning(f"'{args.positional_args[0]}' seems to be an ID, running epstein_show instead...")
|
|
102
|
+
epstein_show()
|
|
103
|
+
return
|
|
104
|
+
|
|
93
105
|
for search_term in args.positional_args:
|
|
94
106
|
temp_highlighter = build_highlighter(search_term)
|
|
95
107
|
search_results = epstein_files.docs_matching(search_term, args.names)
|
|
96
108
|
print_subtitle_panel(f"Found {len(search_results)} documents matching '{search_term}'")
|
|
97
109
|
|
|
98
110
|
for search_result in search_results:
|
|
99
|
-
|
|
111
|
+
document = search_result.document
|
|
112
|
+
|
|
113
|
+
if (isinstance(document, Email) and not args.output_emails) \
|
|
114
|
+
or (isinstance(document, OtherFile) and not args.output_other) \
|
|
115
|
+
or (isinstance(document, MessengerLog) and not args.output_texts):
|
|
116
|
+
document.warn(f"{type(document).__name__} Skipping search result...")
|
|
117
|
+
continue
|
|
100
118
|
|
|
101
119
|
if args.whole_file:
|
|
102
|
-
console.print(
|
|
120
|
+
console.print(document)
|
|
103
121
|
else:
|
|
104
|
-
console.print(
|
|
122
|
+
console.print(document.summary_panel())
|
|
105
123
|
|
|
106
124
|
for matching_line in search_result.lines:
|
|
107
125
|
line_txt = matching_line.__rich__()
|
|
108
126
|
console.print(Padding(temp_highlighter(line_txt), INFO_PADDING), style='gray37')
|
|
109
127
|
|
|
128
|
+
console.line()
|
|
129
|
+
|
|
110
130
|
|
|
111
131
|
def epstein_show():
|
|
112
132
|
"""Show the color highlighted file. If --raw arg is passed, show the raw text of the file as well."""
|
|
@@ -114,8 +134,13 @@ def epstein_show():
|
|
|
114
134
|
console.line()
|
|
115
135
|
|
|
116
136
|
try:
|
|
117
|
-
|
|
118
|
-
|
|
137
|
+
if args.names:
|
|
138
|
+
people = EpsteinFiles.get_files().person_objs(args.names)
|
|
139
|
+
raw_docs = [doc for doc in flatten([p.emails for p in people])]
|
|
140
|
+
else:
|
|
141
|
+
ids = [extract_file_id(arg) for arg in args.positional_args]
|
|
142
|
+
raw_docs = [Document(coerce_file_path(id)) for id in ids]
|
|
143
|
+
|
|
119
144
|
docs = Document.sort_by_timestamp([document_cls(doc)(doc.file_path) for doc in raw_docs])
|
|
120
145
|
except Exception as e:
|
|
121
146
|
exit_with_error(str(e))
|
|
@@ -271,7 +271,7 @@ class Document:
|
|
|
271
271
|
txt.append(", ").append(key_value_txt('lines', self.num_lines()))
|
|
272
272
|
|
|
273
273
|
if self.config and self.config.duplicate_of_id:
|
|
274
|
-
txt.append(", ").append(key_value_txt('dupe_of', Text(self.config.duplicate_of_id, style='
|
|
274
|
+
txt.append(", ").append(key_value_txt('dupe_of', Text(self.config.duplicate_of_id, style='cyan dim')))
|
|
275
275
|
|
|
276
276
|
return txt
|
|
277
277
|
|
|
@@ -435,6 +435,10 @@ class Document:
|
|
|
435
435
|
"""Count of how many Document objects have an author attribution."""
|
|
436
436
|
return len([doc for doc in docs if doc.author])
|
|
437
437
|
|
|
438
|
+
@staticmethod
|
|
439
|
+
def sort_by_id(docs: Sequence['DocumentType']) -> list['DocumentType']:
|
|
440
|
+
return sorted(docs, key=lambda d: d.file_id)
|
|
441
|
+
|
|
438
442
|
@staticmethod
|
|
439
443
|
def sort_by_timestamp(docs: Sequence['DocumentType']) -> list['DocumentType']:
|
|
440
444
|
return sorted(docs, key=lambda doc: doc.timestamp_sort_key())
|