PyPI - epstein-files - Versions diffs - 1.2.5__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

epstein-files 1.2.5py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

epstein_files/__init__.py +32 -13
epstein_files/documents/document.py +8 -1
epstein_files/documents/email.py +179 -97
epstein_files/documents/emails/email_header.py +17 -8
epstein_files/documents/other_file.py +8 -6
epstein_files/epstein_files.py +16 -1
epstein_files/person.py +40 -15
epstein_files/util/constant/names.py +10 -6
epstein_files/util/constant/strings.py +2 -1
epstein_files/util/constants.py +463 -225
epstein_files/util/doc_cfg.py +33 -27
epstein_files/util/env.py +10 -3
epstein_files/util/file_helper.py +2 -0
epstein_files/util/highlighted_group.py +66 -23
epstein_files/util/output.py +17 -31
epstein_files/util/rich.py +2 -1
epstein_files/util/word_count.py +1 -1
{epstein_files-1.2.5.dist-info → epstein_files-1.4.1.dist-info}/METADATA +3 -3
epstein_files-1.4.1.dist-info/RECORD +34 -0
{epstein_files-1.2.5.dist-info → epstein_files-1.4.1.dist-info}/entry_points.txt +1 -1
epstein_files-1.2.5.dist-info/RECORD +0 -34
{epstein_files-1.2.5.dist-info → epstein_files-1.4.1.dist-info}/LICENSE +0 -0
{epstein_files-1.2.5.dist-info → epstein_files-1.4.1.dist-info}/WHEEL +0 -0

epstein_files/__init__.py CHANGED Viewed

@@ -27,8 +27,8 @@ from epstein_files.util.logging import exit_with_error, logger
 from epstein_files.util.output import (print_emails_section, print_json_files, print_json_stats,
      print_other_files_section, print_text_messages_section, print_email_timeline, print_emailers_info,
      print_json_metadata, write_urls)
-from epstein_files.util.rich import (build_highlighter, console, highlighter, print_color_key, print_title_page_header,
-     print_title_page_tables, print_subtitle_panel, write_html)
+from epstein_files.util.rich import (build_highlighter, console, highlighter, print_color_key, print_json,
+     print_title_page_header, print_title_page_tables, print_subtitle_panel, write_html)
 from epstein_files.util.timer import Timer
 from epstein_files.util.word_count import write_word_counts_html
@@ -93,7 +93,7 @@ def epstein_diff():
     Document.diff_files(args.positional_args)
-def epstein_search():
+def epstein_grep():
     """Search the cleaned up text of the files."""
     epstein_files = EpsteinFiles.get_files()
@@ -106,22 +106,36 @@ def epstein_search():
         temp_highlighter = build_highlighter(search_term)
         search_results = epstein_files.docs_matching(search_term, args.names)
         print_subtitle_panel(f"Found {len(search_results)} documents matching '{search_term}'")
+        last_document = None
         for search_result in search_results:
-            document = search_result.document
+            doc = search_result.document
+            lines = search_result.lines
-            if (isinstance(document, Email) and not args.output_emails) \
-                    or (isinstance(document, OtherFile) and not args.output_other) \
-                    or (isinstance(document, MessengerLog) and not args.output_texts):
-                document.warn(f"{type(document).__name__} Skipping search result...")
+            if (isinstance(doc, Email) and not args.output_emails) \
+                    or (isinstance(doc, OtherFile) and not args.output_other) \
+                    or (isinstance(doc, MessengerLog) and not args.output_texts):
+                doc.log(f"{type(doc).__name__} Skipping search result...")
                 continue
+            elif isinstance(doc, Email) and args.email_body:
+                lines = [l for l in search_result.lines if l.line_number > doc.header.num_header_rows]
-            if args.whole_file:
-                console.print(document)
+                if not lines:
+                    doc.log(f"None of the matches for '{search_term}' seem to be in the body of the email")
+                    continue
+            if doc.is_duplicate():
+                if last_document and not last_document.is_duplicate():
+                    console.line()
+                last_document = doc
+                console.print(doc.duplicate_file_txt())
+            elif args.whole_file:
+                console.print(doc)
             else:
-                console.print(document.summary_panel())
+                console.print(doc.summary_panel())
-                for matching_line in search_result.lines:
+                for matching_line in lines:
                     line_txt = matching_line.__rich__()
                     console.print(Padding(temp_highlighter(line_txt), INFO_PADDING), style='gray37')
@@ -138,7 +152,7 @@ def epstein_show():
             people = EpsteinFiles.get_files().person_objs(args.names)
             raw_docs = [doc for doc in flatten([p.emails for p in people])]
         else:
-            ids = [extract_file_id(arg) for arg in args.positional_args]
+            ids = [extract_file_id(arg.strip().strip('_')) for arg in args.positional_args]
             raw_docs = [Document(coerce_file_path(id)) for id in ids]
         docs = Document.sort_by_timestamp([document_cls(doc)(doc.file_path) for doc in raw_docs])
@@ -155,6 +169,11 @@ def epstein_show():
             if isinstance(doc, Email):
                 console.print(Panel(Text("actual_text: ").append(doc.summary()), expand=False, style=doc._border_style()))
                 console.print(escape(doc._actual_text()), '\n')
+                metadata = doc.metadata()
+                metadata['is_fwded_article'] = doc.is_fwded_article()
+                metadata['is_word_count_worthy'] = doc.is_word_count_worthy()
+                metadata['_is_first_for_user'] = doc._is_first_for_user
+                print_json(f"{doc.file_id} Metadata", metadata)
 def epstein_word_count() -> None:

epstein_files/documents/document.py CHANGED Viewed

@@ -197,6 +197,9 @@ class Document:
     def is_duplicate(self) -> bool:
         return bool(self.duplicate_of_id())
+    def is_interesting(self) -> bool:
+        return bool(self.config and self.config.is_interesting)
     def is_local_extract_file(self) -> bool:
         """True if extracted from other file (identifiable from filename e.g. HOUSE_OVERSIGHT_012345_1.txt)."""
         return is_local_extract_file(self.filename)
@@ -267,7 +270,7 @@ class Document:
             txt.append(' (', style=SYMBOL_STYLE)
             txt.append(f"{timestamp_str}", style=TIMESTAMP_DIM).append(')', style=SYMBOL_STYLE)
-        txt.append(' [').append(key_value_txt('size', Text(self.file_size_str(0), style='aquamarine1')))
+        txt.append(' [').append(key_value_txt('size', Text(str(self.length()), style='aquamarine1')))
         txt.append(", ").append(key_value_txt('lines', self.num_lines()))
         if self.config and self.config.duplicate_of_id:
@@ -439,6 +442,10 @@ class Document:
     def sort_by_id(docs: Sequence['DocumentType']) -> list['DocumentType']:
         return sorted(docs, key=lambda d: d.file_id)
+    @staticmethod
+    def sort_by_length(docs: Sequence['DocumentType']) -> list['DocumentType']:
+        return sorted(docs, key=lambda d: d.file_size(), reverse=True)
     @staticmethod
     def sort_by_timestamp(docs: Sequence['DocumentType']) -> list['DocumentType']:
         return sorted(docs, key=lambda doc: doc.timestamp_sort_key())

epstein-files 1.2.5__py3-none-any.whl → 1.4.1__py3-none-any.whl

epstein-files 1.2.5py3-none-any.whl → 1.4.1py3-none-any.whl