epstein-files 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +75 -135
- epstein_files/documents/communication.py +9 -9
- epstein_files/documents/document.py +115 -87
- epstein_files/documents/email.py +154 -85
- epstein_files/documents/emails/email_header.py +7 -6
- epstein_files/documents/imessage/text_message.py +3 -2
- epstein_files/documents/json_file.py +17 -0
- epstein_files/documents/messenger_log.py +62 -3
- epstein_files/documents/other_file.py +165 -17
- epstein_files/epstein_files.py +128 -169
- epstein_files/util/constant/names.py +8 -1
- epstein_files/util/constant/output_files.py +29 -0
- epstein_files/util/constant/strings.py +27 -0
- epstein_files/util/constant/urls.py +25 -9
- epstein_files/util/constants.py +1018 -1045
- epstein_files/util/data.py +20 -55
- epstein_files/util/{file_cfg.py → doc_cfg.py} +121 -43
- epstein_files/util/env.py +19 -20
- epstein_files/util/file_helper.py +38 -21
- epstein_files/util/highlighted_group.py +229 -177
- epstein_files/util/logging.py +63 -0
- epstein_files/util/output.py +180 -0
- epstein_files/util/rich.py +29 -17
- epstein_files/util/search_result.py +14 -6
- epstein_files/util/timer.py +24 -0
- epstein_files/util/word_count.py +2 -1
- {epstein_files-1.0.0.dist-info → epstein_files-1.0.2.dist-info}/METADATA +20 -4
- epstein_files-1.0.2.dist-info/RECORD +33 -0
- epstein_files-1.0.2.dist-info/entry_points.txt +7 -0
- epstein_files-1.0.0.dist-info/RECORD +0 -28
- {epstein_files-1.0.0.dist-info → epstein_files-1.0.2.dist-info}/LICENSE +0 -0
- {epstein_files-1.0.0.dist-info → epstein_files-1.0.2.dist-info}/WHEEL +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import re
|
|
3
|
-
from dataclasses import dataclass, field
|
|
3
|
+
from dataclasses import asdict, dataclass, field
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from subprocess import run
|
|
@@ -14,33 +14,28 @@ from rich.text import Text
|
|
|
14
14
|
from epstein_files.util.constant.names import *
|
|
15
15
|
from epstein_files.util.constant.strings import *
|
|
16
16
|
from epstein_files.util.constant.urls import *
|
|
17
|
-
from epstein_files.util.constants import ALL_FILE_CONFIGS, FALLBACK_TIMESTAMP
|
|
18
|
-
from epstein_files.util.
|
|
19
|
-
from epstein_files.util.
|
|
20
|
-
from epstein_files.util.env import args
|
|
21
|
-
from epstein_files.util.file_helper import DOCS_DIR, file_stem_for_id, extract_file_id,
|
|
22
|
-
|
|
17
|
+
from epstein_files.util.constants import ALL_FILE_CONFIGS, FALLBACK_TIMESTAMP
|
|
18
|
+
from epstein_files.util.data import collapse_newlines, date_str, iso_timestamp, listify, patternize, without_nones
|
|
19
|
+
from epstein_files.util.doc_cfg import EmailCfg, DocCfg, Metadata, TextCfg
|
|
20
|
+
from epstein_files.util.env import args
|
|
21
|
+
from epstein_files.util.file_helper import (DOCS_DIR, file_stem_for_id, extract_file_id, file_size,
|
|
22
|
+
file_size_str, is_local_extract_file)
|
|
23
|
+
from epstein_files.util.logging import DOC_TYPE_STYLES, FILENAME_STYLE, logger
|
|
24
|
+
from epstein_files.util.rich import SYMBOL_STYLE, console, highlighter, key_value_txt, link_text_obj
|
|
25
|
+
from epstein_files.util.search_result import MatchedLine
|
|
23
26
|
|
|
24
|
-
|
|
27
|
+
CLOSE_PROPERTIES_CHAR = ']'
|
|
25
28
|
HOUSE_OVERSIGHT = HOUSE_OVERSIGHT_PREFIX.replace('_', ' ').strip()
|
|
26
|
-
MIN_DOCUMENT_ID = 10477
|
|
27
29
|
INFO_INDENT = 2
|
|
28
30
|
INFO_PADDING = (0, 0, 0, INFO_INDENT)
|
|
31
|
+
MAX_TOP_LINES_LEN = 4000 # Only for logging
|
|
32
|
+
MIN_DOCUMENT_ID = 10477
|
|
33
|
+
LOCAL_EXTRACT_REGEX = re.compile(r"_\d$")
|
|
34
|
+
WHITESPACE_REGEX = re.compile(r"\s{2,}|\t|\n", re.MULTILINE)
|
|
29
35
|
|
|
30
|
-
CLOSE_PROPERTIES_CHAR = ']'
|
|
31
|
-
MAX_EXTRACTED_TIMESTAMPS = 6
|
|
32
36
|
MIN_TIMESTAMP = datetime(1991, 1, 1)
|
|
33
37
|
MID_TIMESTAMP = datetime(2007, 1, 1)
|
|
34
38
|
MAX_TIMESTAMP = datetime(2020, 1, 1)
|
|
35
|
-
VI_DAILY_NEWS_REGEX = re.compile(r'virgin\s*is[kl][ai]nds\s*daily\s*news', re.IGNORECASE)
|
|
36
|
-
|
|
37
|
-
DOC_TYPE_STYLES = {
|
|
38
|
-
DOCUMENT_CLASS: 'grey69',
|
|
39
|
-
EMAIL_CLASS: 'sea_green2',
|
|
40
|
-
JSON_FILE_CLASS: 'sandy_brown',
|
|
41
|
-
MESSENGER_LOG_CLASS: 'cyan',
|
|
42
|
-
OTHER_FILE_CLASS: 'grey69',
|
|
43
|
-
}
|
|
44
39
|
|
|
45
40
|
FILENAME_MATCH_STYLES = [
|
|
46
41
|
'dark_green',
|
|
@@ -48,6 +43,13 @@ FILENAME_MATCH_STYLES = [
|
|
|
48
43
|
'spring_green4',
|
|
49
44
|
]
|
|
50
45
|
|
|
46
|
+
METADATA_FIELDS = [
|
|
47
|
+
'author',
|
|
48
|
+
'file_id',
|
|
49
|
+
'num_lines',
|
|
50
|
+
'timestamp'
|
|
51
|
+
]
|
|
52
|
+
|
|
51
53
|
OCR_REPAIRS = {
|
|
52
54
|
re.compile(r'\.corn\b'): '.com',
|
|
53
55
|
re.compile('ln(adequate|dyke)'): r'In\1',
|
|
@@ -61,7 +63,7 @@ class Document:
|
|
|
61
63
|
file_path: Path
|
|
62
64
|
# Optional fields
|
|
63
65
|
author: str | None = None
|
|
64
|
-
config:
|
|
66
|
+
config: EmailCfg | DocCfg | TextCfg | None = None
|
|
65
67
|
file_id: str = field(init=False)
|
|
66
68
|
filename: str = field(init=False)
|
|
67
69
|
is_duplicate: bool = False
|
|
@@ -72,8 +74,8 @@ class Document:
|
|
|
72
74
|
timestamp: datetime | None = None
|
|
73
75
|
url_slug: str = field(init=False) # e.g. 'HOUSE_OVERSIGHT_123456
|
|
74
76
|
|
|
75
|
-
# Class variable
|
|
76
|
-
|
|
77
|
+
# Class variable overridden in JsonFile
|
|
78
|
+
strip_whitespace: ClassVar[bool] = True
|
|
77
79
|
|
|
78
80
|
def __post_init__(self):
|
|
79
81
|
self.filename = self.file_path.name
|
|
@@ -82,12 +84,12 @@ class Document:
|
|
|
82
84
|
self.is_duplicate = bool(self.config.dupe_of_id) if self.config else False
|
|
83
85
|
|
|
84
86
|
if self.is_local_extract_file():
|
|
85
|
-
self.url_slug = file_stem_for_id(self.file_id)
|
|
87
|
+
self.url_slug = LOCAL_EXTRACT_REGEX.sub('', file_stem_for_id(self.file_id))
|
|
86
88
|
cfg_type = type(self.config).__name__ if self.config else None
|
|
87
89
|
|
|
88
90
|
# Coerce FileConfig for court docs etc. to MessageCfg for email files extracted from that document
|
|
89
|
-
if self.
|
|
90
|
-
self.config =
|
|
91
|
+
if self.class_name() == EMAIL_CLASS and self.config and cfg_type != EmailCfg.__name__:
|
|
92
|
+
self.config = EmailCfg.from_doc_cfg(self.config)
|
|
91
93
|
else:
|
|
92
94
|
self.url_slug = self.file_path.stem
|
|
93
95
|
|
|
@@ -96,41 +98,30 @@ class Document:
|
|
|
96
98
|
self._extract_author()
|
|
97
99
|
self.timestamp = self._extract_timestamp()
|
|
98
100
|
|
|
101
|
+
def class_name(self) -> str:
|
|
102
|
+
"""Annoying workaround for circular import issues and isinstance()."""
|
|
103
|
+
return str(type(self).__name__)
|
|
104
|
+
|
|
99
105
|
def configured_description(self) -> str | None:
|
|
100
|
-
|
|
106
|
+
"""Overloaded in OtherFile."""
|
|
107
|
+
if self.config and self.config.description:
|
|
108
|
+
return f"({self.config.description})"
|
|
101
109
|
|
|
102
110
|
def date_str(self) -> str | None:
|
|
103
111
|
return date_str(self.timestamp)
|
|
104
112
|
|
|
105
|
-
def description(self) -> Text:
|
|
106
|
-
"""Mostly for logging. Brackets are left open for subclasses to add stuff."""
|
|
107
|
-
txt = Text('').append(self.url_slug, style='magenta')
|
|
108
|
-
txt.append(f' {self.document_type()}', style=self.document_type_style())
|
|
109
|
-
|
|
110
|
-
if self.timestamp:
|
|
111
|
-
txt.append(' (', style=SYMBOL_STYLE)
|
|
112
|
-
txt.append(f"{iso_timestamp(self.timestamp)}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
|
|
113
|
-
|
|
114
|
-
txt.append(" [").append(key_value_txt('num_lines', Text(f"{self.num_lines}", style='cyan')))
|
|
115
|
-
txt.append(', ').append(key_value_txt('size', Text(self.file_size_str(), style='aquamarine1')))
|
|
116
|
-
return txt
|
|
117
|
-
|
|
118
113
|
def description_panel(self, include_hints: bool = False) -> Panel:
|
|
119
114
|
"""Panelized description() with info_txt(), used in search results."""
|
|
120
115
|
hints = [Text('', style='italic').append(h) for h in (self.hints() if include_hints else [])]
|
|
121
|
-
return Panel(Group(*([self.
|
|
122
|
-
|
|
123
|
-
def document_type(self) -> str:
|
|
124
|
-
"""Annoying workaround for circular import issues and isinstance()."""
|
|
125
|
-
return str(type(self).__name__)
|
|
116
|
+
return Panel(Group(*([self.summary()] + hints)), border_style=self.document_type_style(), expand=False)
|
|
126
117
|
|
|
127
118
|
def document_type_style(self) -> str:
|
|
128
|
-
return DOC_TYPE_STYLES[self.
|
|
119
|
+
return DOC_TYPE_STYLES[self.class_name()]
|
|
129
120
|
|
|
130
121
|
def duplicate_file_txt(self) -> Text:
|
|
131
122
|
"""If the file is a dupe make a nice message to explain what file it's a duplicate of."""
|
|
132
123
|
if not self.config or not self.config.dupe_of_id:
|
|
133
|
-
raise RuntimeError(f"duplicate_file_txt() called on {self.
|
|
124
|
+
raise RuntimeError(f"duplicate_file_txt() called on {self.summary()} but not a dupe! config:\n\n{self.config}")
|
|
134
125
|
|
|
135
126
|
txt = Text(f"Not showing ", style='white dim italic').append(epstein_media_doc_link_txt(self.file_id, style='cyan'))
|
|
136
127
|
txt.append(f" because it's {self.config.duplicate_reason()} ")
|
|
@@ -154,6 +145,9 @@ class Document:
|
|
|
154
145
|
hints = [Padding(hint, INFO_PADDING) for hint in self.hints()]
|
|
155
146
|
return Group(*([panel] + hints))
|
|
156
147
|
|
|
148
|
+
def file_size(self) -> int:
|
|
149
|
+
return file_size(self.file_path)
|
|
150
|
+
|
|
157
151
|
def file_size_str(self) -> str:
|
|
158
152
|
return file_size_str(self.file_path)
|
|
159
153
|
|
|
@@ -162,16 +156,10 @@ class Document:
|
|
|
162
156
|
hints = listify(self.info_txt())
|
|
163
157
|
hint_msg = self.configured_description()
|
|
164
158
|
|
|
165
|
-
if self.document_type() == OTHER_FILE_CLASS:
|
|
166
|
-
if not hint_msg and VI_DAILY_NEWS_REGEX.search(self.text):
|
|
167
|
-
hint_msg = VI_DAILY_NEWS_ARTICLE
|
|
168
|
-
elif hint_msg:
|
|
169
|
-
hint_msg = f"({hint_msg})"
|
|
170
|
-
|
|
171
159
|
if hint_msg:
|
|
172
160
|
hints.append(highlighter(Text(hint_msg, style='white dim italic')))
|
|
173
161
|
|
|
174
|
-
return hints
|
|
162
|
+
return without_nones(hints)
|
|
175
163
|
|
|
176
164
|
def info_txt(self) -> Text | None:
|
|
177
165
|
"""Secondary info about this file (recipients, level of certainty, etc). Overload in subclasses."""
|
|
@@ -181,32 +169,42 @@ class Document:
|
|
|
181
169
|
"""True if file created by extracting text from a court doc (identifiable from filename e.g. HOUSE_OVERSIGHT_012345_1.txt)."""
|
|
182
170
|
return is_local_extract_file(self.filename)
|
|
183
171
|
|
|
184
|
-
def lines_matching_txt(self, _pattern: re.Pattern | str) -> list[Text]:
|
|
185
|
-
"""Return lines matching a regex as colored list[Text]."""
|
|
186
|
-
pattern = patternize(_pattern)
|
|
187
|
-
matched_lines = [line for line in self.lines if pattern.search(line)]
|
|
188
|
-
|
|
189
|
-
if len(matched_lines) == 0:
|
|
190
|
-
return []
|
|
191
|
-
|
|
192
|
-
file_style = FILENAME_MATCH_STYLES[type(self).file_matching_idx % len(FILENAME_MATCH_STYLES)]
|
|
193
|
-
type(self).file_matching_idx += 1
|
|
194
|
-
|
|
195
|
-
return [
|
|
196
|
-
Text('').append(self.file_path.name, style=file_style).append(':').append(line)
|
|
197
|
-
for line in matched_lines
|
|
198
|
-
]
|
|
199
|
-
|
|
200
172
|
def log(self, msg: str, level: int = logging.WARNING):
|
|
201
|
-
"""Log with
|
|
202
|
-
logger.log(level, f"
|
|
173
|
+
"""Log with filename as a prefix."""
|
|
174
|
+
logger.log(level, f"{self.url_slug} {msg}")
|
|
203
175
|
|
|
204
176
|
def log_top_lines(self, n: int = 10, msg: str = '', level: int = logging.INFO) -> None:
|
|
205
177
|
"""Log first 'n' lines of self.text at 'level'. 'msg' can be optionally provided."""
|
|
206
178
|
separator = '\n\n' if '\n' in msg else '. '
|
|
207
|
-
msg =
|
|
179
|
+
msg = (msg + separator) if msg else ''
|
|
180
|
+
msg = f"{self.filename}: {msg}First {n} lines:"
|
|
208
181
|
logger.log(level, f"{msg}\n\n{self.top_lines(n)}\n")
|
|
209
182
|
|
|
183
|
+
def matching_lines(self, _pattern: re.Pattern | str) -> list[MatchedLine]:
|
|
184
|
+
"""Return lines matching a regex as colored list[Text]."""
|
|
185
|
+
pattern = patternize(_pattern)
|
|
186
|
+
return [MatchedLine(line, i) for i, line in enumerate(self.lines) if pattern.search(line)]
|
|
187
|
+
|
|
188
|
+
def metadata(self) -> Metadata:
|
|
189
|
+
metadata = self.config.metadata() if self.config else {}
|
|
190
|
+
metadata.update({k: v for k, v in asdict(self).items() if k in METADATA_FIELDS and v is not None})
|
|
191
|
+
metadata['bytes'] = self.file_size()
|
|
192
|
+
metadata['filename'] = f"{self.url_slug}.txt"
|
|
193
|
+
metadata['type'] = self.class_name()
|
|
194
|
+
|
|
195
|
+
if self.is_local_extract_file():
|
|
196
|
+
metadata['extracted_file'] = {
|
|
197
|
+
'explanation': 'This file was extracted from a court filing, not distributed directly. A copy can be found on github.',
|
|
198
|
+
'extracted_from_file': self.url_slug + '.txt',
|
|
199
|
+
'extracted_file_url': extracted_file_url(self.filename),
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
return metadata
|
|
203
|
+
|
|
204
|
+
def raw_text(self) -> str:
|
|
205
|
+
with open(self.file_path) as f:
|
|
206
|
+
return f.read()
|
|
207
|
+
|
|
210
208
|
def raw_document_link_txt(self, style: str = '', include_alt_link: bool = False) -> Text:
|
|
211
209
|
"""Returns colored links to epstein.media and and epsteinweb in a Text object."""
|
|
212
210
|
txt = Text('', style='white' if include_alt_link else ARCHIVE_LINK_COLOR)
|
|
@@ -215,11 +213,13 @@ class Document:
|
|
|
215
213
|
txt.append(self.epstein_web_link(style=style))
|
|
216
214
|
|
|
217
215
|
if include_alt_link:
|
|
216
|
+
txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
|
|
218
217
|
txt.append(' (').append(self.epstein_media_link(style='white dim', link_txt=EPSTEIN_MEDIA)).append(')')
|
|
219
218
|
else:
|
|
220
219
|
txt.append(self.epstein_media_link(style=style))
|
|
221
220
|
|
|
222
221
|
if include_alt_link:
|
|
222
|
+
txt.append(' (').append(self.epsteinify_link(style='white dim', link_txt=EPSTEINIFY)).append(')')
|
|
223
223
|
txt.append(' (').append(self.epstein_web_link(style='white dim', link_txt=EPSTEIN_WEB)).append(')')
|
|
224
224
|
|
|
225
225
|
return txt
|
|
@@ -234,8 +234,36 @@ class Document:
|
|
|
234
234
|
|
|
235
235
|
return text
|
|
236
236
|
|
|
237
|
+
def sort_key(self) -> tuple[datetime, str, int]:
|
|
238
|
+
if self.config and self.config.dupe_of_id:
|
|
239
|
+
sort_id = self.config.dupe_of_id
|
|
240
|
+
dupe_idx = 1
|
|
241
|
+
else:
|
|
242
|
+
sort_id = self.file_id
|
|
243
|
+
dupe_idx = 0
|
|
244
|
+
|
|
245
|
+
return (self.timestamp or FALLBACK_TIMESTAMP, sort_id, dupe_idx)
|
|
246
|
+
|
|
247
|
+
def summary(self) -> Text:
|
|
248
|
+
"""Summary of this file for logging. Brackets are left open for subclasses to add stuff."""
|
|
249
|
+
txt = Text('').append(self.class_name(), style=self.document_type_style())
|
|
250
|
+
txt.append(f" {self.url_slug}", style=FILENAME_STYLE)
|
|
251
|
+
|
|
252
|
+
if self.timestamp:
|
|
253
|
+
timestamp_str = iso_timestamp(self.timestamp).removesuffix(' 00:00:00')
|
|
254
|
+
txt.append(' (', style=SYMBOL_STYLE)
|
|
255
|
+
txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
|
|
256
|
+
|
|
257
|
+
txt.append(' [').append(key_value_txt('size', Text(self.file_size_str(), style='aquamarine1')))
|
|
258
|
+
txt.append(", ").append(key_value_txt('lines', self.num_lines))
|
|
259
|
+
|
|
260
|
+
if self.config and self.config.dupe_of_id:
|
|
261
|
+
txt.append(", ").append(key_value_txt('dupe_of', Text(self.config.dupe_of_id, style='magenta')))
|
|
262
|
+
|
|
263
|
+
return txt
|
|
264
|
+
|
|
237
265
|
def top_lines(self, n: int = 10) -> str:
|
|
238
|
-
return '\n'.join(self.lines[0:n])
|
|
266
|
+
return '\n'.join(self.lines[0:n])[:MAX_TOP_LINES_LEN]
|
|
239
267
|
|
|
240
268
|
def _border_style(self) -> str:
|
|
241
269
|
"""Should be overloaded in subclasses."""
|
|
@@ -250,21 +278,20 @@ class Document:
|
|
|
250
278
|
"""Should be implemented in subclasses."""
|
|
251
279
|
pass
|
|
252
280
|
|
|
253
|
-
def _load_file(self):
|
|
281
|
+
def _load_file(self) -> str:
|
|
254
282
|
"""Remove BOM and HOUSE OVERSIGHT lines, strip whitespace."""
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
lines = lines[1:] if (len(lines) > 1 and lines[0] == '>>') else lines
|
|
261
|
-
return collapse_newlines('\n'.join(lines))
|
|
283
|
+
text = self.raw_text()
|
|
284
|
+
text = text[1:] if (len(text) > 0 and text[0] == '\ufeff') else text # remove BOM
|
|
285
|
+
text = self.repair_ocr_text(OCR_REPAIRS, text.strip())
|
|
286
|
+
lines = [l.strip() for l in text.split('\n') if not l.startswith(HOUSE_OVERSIGHT)]
|
|
287
|
+
return collapse_newlines('\n'.join(lines))
|
|
262
288
|
|
|
263
289
|
def _repair(self) -> None:
|
|
264
|
-
"""Can optionally be overloaded in subclasses."""
|
|
290
|
+
"""Can optionally be overloaded in subclasses to further improve self.text."""
|
|
265
291
|
pass
|
|
266
292
|
|
|
267
293
|
def _set_computed_fields(self, lines: list[str] | None = None, text: str | None = None) -> None:
|
|
294
|
+
"""Sets all fields derived from self.text based on either 'lines' or 'text' arg."""
|
|
268
295
|
if (lines and text):
|
|
269
296
|
raise RuntimeError(f"[{self.filename}] Either 'lines' or 'text' arg must be provided (got both)")
|
|
270
297
|
elif lines is not None:
|
|
@@ -275,7 +302,7 @@ class Document:
|
|
|
275
302
|
raise RuntimeError(f"[{self.filename}] Either 'lines' or 'text' arg must be provided (neither was)")
|
|
276
303
|
|
|
277
304
|
self.length = len(self.text)
|
|
278
|
-
self.lines = [line.strip() for line in self.text.split('\n')]
|
|
305
|
+
self.lines = [line.strip() if self.strip_whitespace else line for line in self.text.split('\n')]
|
|
279
306
|
self.num_lines = len(self.lines)
|
|
280
307
|
|
|
281
308
|
def _write_clean_text(self, output_path: Path) -> None:
|
|
@@ -291,16 +318,17 @@ class Document:
|
|
|
291
318
|
|
|
292
319
|
logger.warning(f"Wrote {self.length} chars of cleaned {self.filename} to {output_path}.")
|
|
293
320
|
|
|
294
|
-
def __rich_console__(self,
|
|
321
|
+
def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
|
|
295
322
|
yield self.file_info_panel()
|
|
296
323
|
text_panel = Panel(highlighter(self.text), border_style=self._border_style(), expand=False)
|
|
297
324
|
yield Padding(text_panel, (0, 0, 1, INFO_INDENT))
|
|
298
325
|
|
|
299
326
|
def __str__(self) -> str:
|
|
300
|
-
return self.
|
|
327
|
+
return self.summary().plain
|
|
301
328
|
|
|
302
329
|
@staticmethod
|
|
303
330
|
def diff_files(files: list[str]) -> None:
|
|
331
|
+
"""Diff the contents of two Documents after all cleanup, BOM removal, etc."""
|
|
304
332
|
if len(files) != 2:
|
|
305
333
|
raise RuntimeError('Need 2 files')
|
|
306
334
|
elif files[0] == files[1]:
|
|
@@ -330,7 +358,7 @@ class Document:
|
|
|
330
358
|
|
|
331
359
|
@staticmethod
|
|
332
360
|
def sort_by_timestamp(docs: Sequence['DocumentType']) -> list['DocumentType']:
|
|
333
|
-
return sorted(docs, key=lambda doc:
|
|
361
|
+
return sorted(docs, key=lambda doc: doc.sort_key())
|
|
334
362
|
|
|
335
363
|
@classmethod
|
|
336
364
|
def uniquify(cls, documents: Sequence['DocumentType']) -> Sequence['DocumentType']:
|