epstein-files 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. epstein_files/__init__.py +31 -18
  2. epstein_files/documents/communication.py +9 -5
  3. epstein_files/documents/document.py +225 -136
  4. epstein_files/documents/doj_file.py +242 -0
  5. epstein_files/documents/doj_files/full_text.py +166 -0
  6. epstein_files/documents/email.py +138 -163
  7. epstein_files/documents/emails/email_header.py +21 -11
  8. epstein_files/documents/emails/emailers.py +223 -0
  9. epstein_files/documents/imessage/text_message.py +2 -3
  10. epstein_files/documents/json_file.py +18 -14
  11. epstein_files/documents/messenger_log.py +23 -39
  12. epstein_files/documents/other_file.py +48 -44
  13. epstein_files/epstein_files.py +54 -33
  14. epstein_files/person.py +142 -110
  15. epstein_files/util/constant/names.py +29 -6
  16. epstein_files/util/constant/output_files.py +2 -0
  17. epstein_files/util/constant/strings.py +12 -6
  18. epstein_files/util/constant/urls.py +17 -0
  19. epstein_files/util/constants.py +101 -174
  20. epstein_files/util/data.py +2 -0
  21. epstein_files/util/doc_cfg.py +20 -15
  22. epstein_files/util/env.py +24 -16
  23. epstein_files/util/file_helper.py +28 -6
  24. epstein_files/util/helpers/debugging_helper.py +13 -0
  25. epstein_files/util/helpers/env_helpers.py +21 -0
  26. epstein_files/util/highlighted_group.py +57 -16
  27. epstein_files/util/layout/left_bar_panel.py +26 -0
  28. epstein_files/util/logging.py +28 -13
  29. epstein_files/util/output.py +33 -10
  30. epstein_files/util/rich.py +28 -2
  31. epstein_files/util/word_count.py +7 -7
  32. {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/METADATA +14 -1
  33. epstein_files-1.5.0.dist-info/RECORD +40 -0
  34. epstein_files-1.4.1.dist-info/RECORD +0 -34
  35. {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/LICENSE +0 -0
  36. {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/WHEEL +0 -0
  37. {epstein_files-1.4.1.dist-info → epstein_files-1.5.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,242 @@
1
+ import re
2
+ from dataclasses import dataclass, field
3
+ from datetime import datetime
4
+ from typing import ClassVar, Self
5
+
6
+ from rich.console import Console, ConsoleOptions, RenderableType, RenderResult
7
+ from rich.padding import Padding
8
+ from rich.panel import Panel
9
+ from rich.text import Text
10
+
11
+ from epstein_files.documents.document import INFO_INDENT, Document
12
+ from epstein_files.documents.email import Email
13
+ from epstein_files.documents.emails.email_header import FIELDS_COLON_PATTERN
14
+ from epstein_files.documents.other_file import Metadata, OtherFile
15
+ from epstein_files.util.constant.names import RENATA_BOLOTOVA
16
+ from epstein_files.util.constants import FALLBACK_TIMESTAMP
17
+ from epstein_files.util.data import without_falsey
18
+ from epstein_files.util.layout.left_bar_panel import LeftBarPanel
19
+ from epstein_files.util.logging import logger
20
+ from epstein_files.util.rich import RAINBOW, INFO_STYLE, SKIPPED_FILE_MSG_PADDING, highlighter, link_text_obj
21
+
22
+ CHECK_LINK_FOR_DETAILS = 'not shown here, check original PDF for details'
23
+ IMAGE_PANEL_REGEX = re.compile(r"\n╭─* Page \d+, Image \d+.*?╯\n", re.DOTALL)
24
+ IGNORE_LINE_REGEX = re.compile(r"^(\d+\n?|[\s+❑]{2,})$")
25
+ MIN_VALID_LENGTH = 10
26
+
27
+ OTHER_DOC_URLS = {
28
+ '245-22.pdf': 'https://www.justice.gov/multimedia/Court%20Records/Government%20of%20the%20United%20States%20Virgin%20Islands%20v.%20JPMorgan%20Chase%20Bank,%20N.A.,%20No.%20122-cv-10904%20(S.D.N.Y.%202022)/245-22.pdf'
29
+ }
30
+
31
+ # DojFile specific repair
32
+ OCR_REPAIRS: dict[str | re.Pattern, str] = {
33
+ re.compile(fr"({FIELDS_COLON_PATTERN}.*\n)\nSubject:", re.MULTILINE): r'\1Subject:',
34
+ }
35
+
36
+ BAD_DOJ_FILE_IDS = [
37
+ 'EFTA00008511',
38
+ 'EFTA00008503',
39
+ 'EFTA00002512',
40
+ 'EFTA00008501',
41
+ 'EFTA00008500',
42
+ 'EFTA00008514',
43
+ 'EFTA00001940',
44
+ 'EFTA00008410',
45
+ 'EFTA00008411',
46
+ 'EFTA00008519',
47
+ 'EFTA00008493',
48
+ 'EFTA00008527',
49
+ 'EFTA00008473',
50
+ 'EFTA00001846',
51
+ 'EFTA00000052',
52
+ 'EFTA00008445',
53
+ 'EFTA00008480',
54
+ 'EFTA00001124',
55
+ 'EFTA00002509',
56
+ 'EFTA00008497',
57
+ 'EFTA00001031',
58
+ 'EFTA00005495',
59
+ 'EFTA00002830',
60
+ 'EFTA00001937',
61
+ 'EFTA00008496',
62
+ 'EFTA00008441',
63
+ 'EFTA00008415',
64
+ 'EFTA00000675',
65
+ 'EFTA00002538',
66
+ 'EFTA00000672',
67
+ 'EFTA00002814',
68
+ 'EFTA00002812',
69
+ 'EFTA00002543',
70
+ 'EFTA00002813',
71
+ 'EFTA00002523',
72
+ 'EFTA00002079',
73
+ 'EFTA00002805',
74
+ 'EFTA00001840',
75
+ 'EFTA00001114',
76
+ 'EFTA00002812',
77
+ 'EFTA00002543',
78
+ 'EFTA00002786',
79
+ 'EFTA00001271',
80
+ 'EFTA00002523',
81
+ 'EFTA00001979',
82
+ 'EFTA00002110',
83
+ 'EFTA00008504',
84
+ 'EFTA00001368',
85
+ 'EFTA00000134',
86
+ 'EFTA00000471',
87
+ 'EFTA00001848',
88
+ 'EFTA00008506',
89
+ ]
90
+
91
+ PHONE_BILL_IDS = {
92
+ 'EFTA00006770': 'covering 2006-02-01 to 2006-06-16',
93
+ 'EFTA00006870': 'covering 2006-02-09 to 2006-07',
94
+ 'EFTA00006970': 'covering 2006-04-15 to 2006-07-16',
95
+ # 'EFTA00007070': # TODO: not a messy phone bill, short, has additional info at end
96
+ }
97
+
98
+ INTERESTING_DOJ_FILES = {
99
+ 'EFTA02640711': 'Jabor Y home address (HBJ)',
100
+ 'EFTA00039689': 'Dilorio emails to SEC about Signature Bank, Hapoalim, Bioptix / RIOT, Honig, etc.',
101
+ 'EFTA00039025': 'Investigation and Review of the Federal Bureau of Prisons Custody, Care, and Supervision of Jeffrey Epstein',
102
+ 'EFTA02296929': f"{RENATA_BOLOTOVA} appears to know Epstein's final girlfriend",
103
+ 'EFTA01273102': f"payment from Epstein to {RENATA_BOLOTOVA}'s father's account at Sberbank",
104
+ }
105
+
106
+ NO_IMAGE_SUFFIX = """
107
+ ╭──── Page 1, Image 1 ─────╮
108
+ │ (no text found in image) │
109
+ ╰──────────────────────────╯
110
+ """.strip()
111
+
112
+
113
+ @dataclass
114
+ class DojFile(OtherFile):
115
+ """
116
+ Class for the files released by DOJ on 2026-01-30 with `EFTA000` prefix.
117
+ """
118
+ _border_style: str | None = None
119
+
120
+ border_style_rainbow_idx: ClassVar[int] = 0 # ClassVar to help change color as we print, no impact beyond fancier output
121
+ max_timestamp: ClassVar[datetime] = datetime(2025, 1, 29) # Overloaded in DojFile
122
+
123
+ @property
124
+ def border_style(self) -> str:
125
+ """Use a rainbow to make sure each printed object has different color for those before and after."""
126
+ if self._border_style is None:
127
+ self._border_style = RAINBOW[int(self.border_style_rainbow_idx % len(RAINBOW))]
128
+ type(self).border_style_rainbow_idx += 1
129
+
130
+ return self._border_style
131
+
132
+ @property
133
+ def info(self) -> list[Text]:
134
+ """Overloads superclass to adjust formatting."""
135
+ return [Text(' ').append(sentence) for sentence in super().info]
136
+
137
+ @property
138
+ def is_bad_ocr(self) -> bool:
139
+ return self.file_id in BAD_DOJ_FILE_IDS
140
+
141
+ @property
142
+ def is_empty(self) -> bool:
143
+ """Overloads superclass method."""
144
+ return len(self.text.strip().removesuffix(NO_IMAGE_SUFFIX)) < MIN_VALID_LENGTH
145
+
146
+ @property
147
+ def prettified_text(self) -> Text:
148
+ """Returns the string we want to print as the body of the document."""
149
+ style = ''
150
+
151
+ if self.file_id in PHONE_BILL_IDS:
152
+ pages = self.text.split('MetroPCS')
153
+ text = f"{pages[0]}\n\n(Redacted phone bill {PHONE_BILL_IDS[self.file_id]} {CHECK_LINK_FOR_DETAILS})"
154
+ elif self.config and self.config.replace_text_with:
155
+ if len(self.config.replace_text_with) < 300:
156
+ style = INFO_STYLE
157
+ text = f'(Text of {self.config.replace_text_with} {CHECK_LINK_FOR_DETAILS})'
158
+ else:
159
+ text = self.config.replace_text_with
160
+ else:
161
+ text = self.text
162
+
163
+ return Text(text, style)
164
+
165
+ @property
166
+ def timestamp_sort_key(self) -> tuple[datetime, str, int]:
167
+ """Overloads parent method."""
168
+ dupe_idx = 0
169
+ # TODO: Years of 2001 are often garbage pared from '1.6' etc.
170
+ sort_timestamp = self.timestamp or FALLBACK_TIMESTAMP
171
+ sort_timestamp = FALLBACK_TIMESTAMP if sort_timestamp.year <= 2001 else sort_timestamp
172
+ return (sort_timestamp, self.file_id, dupe_idx)
173
+
174
+ def __post_init__(self):
175
+ super().__post_init__()
176
+
177
+ if self.file_id in PHONE_BILL_IDS:
178
+ self.strip_image_ocr_panels()
179
+
180
+ def doj_link(self) -> Text:
181
+ """Link to this file on the DOJ site."""
182
+ return link_text_obj(self.external_url, self.url_slug)
183
+
184
+ def external_links_txt(self, _style: str = '', include_alt_links: bool = True) -> Text:
185
+ """Overrides super() method to apply self.border_style."""
186
+ return super().external_links_txt(self.border_style, include_alt_links=include_alt_links)
187
+
188
+ def image_with_no_text_msg(self) -> RenderableType:
189
+ """One line of linked text to show if this file doesn't seem to have any OCR text."""
190
+ return Padding(
191
+ Text('').append(self.doj_link()).append(f" is a single image with no text..."),
192
+ (0, 0, 0, 1)
193
+ )
194
+
195
+ def printable_document(self) -> Self | Email:
196
+ """Return a copy of this `DojFile` with simplified text if file ID is in `REPLACEMENT_TEXT`."""
197
+ if Document.is_email(self):
198
+ try:
199
+ return Email(self.file_path, text=self.text) # Pass text= to avoid reprocessing
200
+ except Exception as e:
201
+ self.warn(f"Error creating Email object, trying full reload of text...")
202
+ return Email(self.file_path)
203
+ else:
204
+ return self
205
+
206
+ def strip_image_ocr_panels(self) -> None:
207
+ """Removes the ╭--- Page 5, Image 1 ---- panels from the text."""
208
+ new_text, num_replaced = IMAGE_PANEL_REGEX.subn('', self.text)
209
+ self.warn(f"Stripped {num_replaced} image panels.")
210
+ self._set_computed_fields(text=new_text)
211
+
212
+ def _repair(self) -> None:
213
+ """Overloads superclass method."""
214
+ new_text = self.repair_ocr_text(OCR_REPAIRS, self.text)
215
+ self._set_computed_fields(text=new_text)
216
+ self._remove_number_only_lines()
217
+
218
+ def _remove_number_only_lines(self) -> None:
219
+ """Remove number only lines (which happen a lot in legal doc OCR) if there are more than a certain amount of them."""
220
+ non_number_lines = [line for line in self.lines if not IGNORE_LINE_REGEX.match(line)]
221
+ number_only_line_count = len(self.lines) - len(non_number_lines)
222
+
223
+ if number_only_line_count > 20:
224
+ self.warn(f"Reduced line count from {len(self.lines)} to {len(non_number_lines)}")
225
+ self._set_computed_fields(lines=non_number_lines)
226
+
227
+ def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
228
+ doc = self.printable_document()
229
+
230
+ # Emails handle their own formatting
231
+ if isinstance(doc, Email):
232
+ yield doc
233
+ else:
234
+ yield (info_panel := self.file_info_panel())
235
+ border_style = info_panel.renderables[0].border_style
236
+ panel_args = [self.prettified_text, border_style]
237
+
238
+ if self.panel_title_timestamp:
239
+ panel_args.append(Text(f"[{self.panel_title_timestamp}]", style='dim italic ' + border_style))
240
+
241
+ table = LeftBarPanel.build(*panel_args)
242
+ yield Padding(table, (0, 0, 1, 1))
@@ -0,0 +1,166 @@
1
+ EFTA00009622_TEXT = """
2
+ [redacted] s GJ Test. 7/19/2006
3
+
4
+ [redacted] dob 16 yrs
5
+
6
+ [redacted] - Loxahatchee
7
+
8
+ 2005 [redacted] 9th grade
9
+
10
+ [redacted] was outside asked to date [redacted]
11
+
12
+ 1/10/19 [redacted]
13
+
14
+ ~~talked in depth~~
15
+
16
+ met for first time the night before went to JE
17
+ 11:30 ish 10
18
+ do you want to make $200
19
+
20
+ met my friend Jeff bighouse
21
+
22
+ but you have to give massage 45 min + that's all you have to do get $200
23
+
24
+ [redacted] told [redacted] + [redacted] went to Bathroom
25
+ to argue
26
+ came back out [redacted] called Jeff on the phone.
27
+ 1 assistant answered the pho...
28
+
29
+ Page 2
30
+ Jeff asst answered phone - [redacted] had someone to give massage Lady asked how old I was
31
+ she told the lady 18 + went to [redacted]
32
+
33
+ Its $200 didn't care what
34
+ [redacted] - Lady trying to verify 18 Yes
35
+ Graduated from [redacted] Known [redacted] for a long time
36
+ & the next morning
37
+
38
+ [redacted] + another girl - going to Mall he [redacted] didn't care
39
+
40
+ [redacted] asked dad for $ yeh sure fine ok
41
+
42
+ House in Palm Beach
43
+ all got out of car gate at back door security guard the saw us the [redacted] asked us where...
44
+ Page 3
45
+
46
+ [redacted] said Jeffrey
47
+ Went to kitchen waited 15 min
48
+
49
+ JE + asst walked in back door
50
+
51
+ shook hands met both JE asst.
52
+ (A) JE chose gonna go first
53
+
54
+ JE said how about you
55
+ [redacted] upstairs "asst lady"
56
+
57
+ walked me up there stripping out all this picture
58
+
59
+ She pulled out massage table me front of couch blanket over table opened drawer whole bunch of lotions + pick some out
60
+ She told me keep bra + panties on + get undress
61
+
62
+ No [redacted] told her I thought a massage keep your clothing
63
+
64
+ Page 4
65
+
66
+ stay in the room
67
+ I came in the room HE + shook hands
68
+ Came back in the room
69
+
70
+ JE: old 50ish
71
+
72
+ initially guy golfy khaki pants pullover
73
+
74
+ t-shirt + jeans thong bra
75
+
76
+ hesitant thinking what's going on. I didn't care cause I wanted $200
77
+
78
+ He laid on massage table He said grab any 3 lotions
79
+
80
+ Started to give him
81
+
82
+ What M.S. did you go to told him she was 16 Not do you like to do small talk [redacted]
83
+
84
+ Sat on table 15-20 minute conversation another house in NY
85
+
86
+ Page 5
87
+
88
+ He asked if I wanted to make an extra $300
89
+ Straddle him
90
+
91
+ massage over -
92
+ but if you're not comfortable giving a massage can I [redacted] on you ~~anything~~
93
+ [redacted] said ok
94
+ JE left + came ba
95
+
96
+ [redacted] kind of like a [redacted]
97
+ grabbed that + layed down back on table ~~old~~ ~~old~~ hands
98
+
99
+ Give me a massage on chest use the [redacted] 10 minutes or so ( after a couple ~~of~~ minutes ( on [redacted]
100
+ (A) [redacted] said if he did that he would have to pay her extra money if he >
101
+ Page 6
102
+
103
+ [redacted] not she would have to didn't tell her ~~get~~ ~~over~~ do her clothes at He [redacted] you get paid
104
+
105
+ [redacted]
106
+
107
+ Yes Saw [redacted]
108
+
109
+ He opened wallet got the $200
110
+
111
+ He told her to write Name + #
112
+
113
+ after that left the room
114
+
115
+ I didn't see him [redacted]
116
+
117
+ I was scared to tell
118
+
119
+ [redacted] her body not his body
120
+ Went down by herself got 1st No didn't do anything but [redacted] $200 ~~too~~
121
+
122
+ Page 7
123
+ I figure it was probably because she brought me
124
+
125
+ What happened I got $300 I was excited
126
+
127
+ What did you have to do with him
128
+ If you do this you get that
129
+ Laughing ~~in~~ Thanks for fully me I was going to ~~much~~ ~~go~~ ~~many~~ Oh sorry.
130
+ Didn't plan it
131
+
132
+ If you ever need money
133
+ Oh yeh - everybody
134
+
135
+ I did not ever go back to house
136
+
137
+ at school my best friend I told her if you ever want to make 300.-o massage Told [redacted] [redacted]
138
+
139
+ Page 8
140
+ [redacted] telling everybody I confronted her altercation get into a fight [redacted]
141
+
142
+ Spreading rumors about me Let me see in your purse you don't have to lie Stefand I worked at [redacted]
143
+
144
+ I didn't think it was her business and why would tell on myself
145
+
146
+ You don't have to lie I knew what happen
147
+ Everything came out
148
+
149
+ [redacted] then I didn't want to tell them what had happened,
150
+
151
+ I [redacted]
152
+
153
+ [redacted] Family Setty a lot problems
154
+
155
+ No the only [redacted] sometimes
156
+ Page 9
157
+
158
+ [redacted] Have you ever da[ting] [redacted] Survey - Yeh
159
+ [redacted] not anymore
160
+ 14 birthday my dad
161
+ I did [redacted] lots of time
162
+
163
+ $250,000 its a joke
164
+ tall skinny dark hair
165
+ Page 10
166
+ """.strip()