PyPI - dirshot - Versions diffs - 0.1.2__tar.gz → 0.1.3__tar.gz - Mend

dirshot 0.1.2tar.gz → 0.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{dirshot-0.1.2 → dirshot-0.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dirshot
-Version: 0.1.2
+Version: 0.1.3
 Summary: A flexible utility for creating project snapshots and searching for files.
 Author-email: init-helpful <init.helpful@gmail.com>
 Project-URL: Homepage, https://github.com/init-helpful/dirshot

{dirshot-0.1.2 → dirshot-0.1.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "dirshot"
-version = "0.1.2"
+version = "0.1.3"
 authors = [
   { name="init-helpful", email="init.helpful@gmail.com" },
 ]

{dirshot-0.1.2 → dirshot-0.1.3}/src/dirshot/dirshot.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import os
 import sys
 import re
-import time  # Imported for the fallback progress bar
+import time
 from pathlib import Path
 from dataclasses import dataclass, field
 from typing import List, Optional, Set, Tuple, Callable, NamedTuple, Dict, Any
@@ -18,7 +18,9 @@ except ImportError:
     class tqdm:
         """A simple, text-based progress bar fallback if tqdm is not installed."""
-        def __init__(self, iterable=None, total=None, desc="", unit="it", **kwargs):
+        def __init__(
+            self, iterable=None, total=None, desc="", unit="it", postfix=None, **kwargs
+        ):
             self.iterable = iterable
             self.total = (
                 total
@@ -30,21 +32,20 @@ except ImportError:
             self.current = 0
             self.start_time = time.time()
             self._last_update_time = 0
+            self._postfix = postfix or {}
         def __iter__(self):
+            if self.iterable is None:
+                raise TypeError("tqdm fallback must be initialized with an iterable.")
             for obj in self.iterable:
                 yield obj
                 self.update(1)
-            # The loop is finished, ensure the bar is 100% and close
-            if self.total is not None and self.current < self.total:
-                self.update(self.total - self.current)
             self.close()
         def update(self, n=1):
             """Update the progress bar by n steps."""
             self.current += n
             now = time.time()
-            # Throttle screen updates to prevent flickering and performance loss
             if (
                 self.total is None
                 or now - self._last_update_time > 0.1
@@ -58,19 +59,29 @@ except ImportError:
             self.desc = desc
             self._draw()
+        def set_postfix_str(self, s: str):
+            self._postfix["info"] = s
+            self._draw()
         def _draw(self):
             """Draw the progress bar to the console."""
-            if self.total:
+            postfix_str = ", ".join([f"{k}={v}" for k, v in self._postfix.items()])
+            if self.total and self.total > 0:
                 percent = int((self.current / self.total) * 100)
                 bar_length = 25
                 filled_length = int(bar_length * self.current // self.total)
                 bar = "█" * filled_length + "-" * (bar_length - filled_length)
-                # Use carriage return to print on the same line
                 progress_line = f"\r{self.desc}: {percent}%|{bar}| {self.current}/{self.total} [{self.unit}]"
-                sys.stdout.write(progress_line)
             else:  # Case where total is not known
-                sys.stdout.write(f"\r{self.desc}: {self.current} {self.unit}")
+                progress_line = f"\r{self.desc}: {self.current} {self.unit}"
+            if postfix_str:
+                progress_line += f" [{postfix_str}]"
+            # Pad with spaces to clear previous, longer lines
+            terminal_width = 80
+            sys.stdout.write(progress_line.ljust(terminal_width))
             sys.stdout.flush()
         def close(self):
@@ -88,6 +99,72 @@ TREE_HEADER_TEXT = "Project File Structure"
 FILE_HEADER_PREFIX = "FILE: "
 TOKEN_APPROX_MODE = "CHAR_COUNT"
+# List of binary file extensions to skip during content search
+BINARY_FILE_EXTENSIONS = {
+    # Images
+    ".png",
+    ".jpg",
+    ".jpeg",
+    ".gif",
+    ".bmp",
+    ".ico",
+    ".tiff",
+    ".webp",
+    # Documents
+    ".pdf",
+    ".doc",
+    ".docx",
+    ".xls",
+    ".xlsx",
+    ".ppt",
+    ".pptx",
+    ".odt",
+    ".ods",
+    # Archives
+    ".zip",
+    ".gz",
+    ".tar",
+    ".rar",
+    ".7z",
+    ".bz2",
+    ".xz",
+    # Executables & Binaries
+    ".exe",
+    ".dll",
+    ".so",
+    ".o",
+    ".a",
+    ".lib",
+    ".bin",
+    ".dat",
+    ".db",
+    ".sqlite",
+    ".img",
+    ".iso",
+    # Compiled Code
+    ".class",
+    ".jar",
+    ".war",
+    ".pyc",
+    ".pyo",
+    # Audio/Video
+    ".mp3",
+    ".wav",
+    ".flac",
+    ".ogg",
+    ".mp4",
+    ".mkv",
+    ".avi",
+    ".mov",
+    ".wmv",
+    # Fonts
+    ".ttf",
+    ".otf",
+    ".woff",
+    ".woff2",
+}
 # --- Public Enums for Import and Usage ---
@@ -273,17 +350,25 @@ def process_file_for_search(
     search_file_contents: bool,
     full_path_compare: bool,
 ) -> Optional[Path]:
+    """
+    Checks a single file for keyword matches. Skips content search for binary files.
+    """
     compare_target = str(file_path) if full_path_compare else file_path.name
     if any(key in compare_target.lower() for key in normalized_keywords):
         return file_path
     if search_file_contents:
+        # Before reading content, check if it's a known binary file type
+        if file_path.suffix.lower() in BINARY_FILE_EXTENSIONS:
+            return None  # Do not attempt to read binary file content
         try:
             with open(str(file_path), "r", encoding="utf-8", errors="ignore") as f:
                 for line in f:
                     if any(key in line.lower() for key in normalized_keywords):
                         return file_path
         except (IOError, OSError):
-            pass
+            pass  # Ignore files that can't be opened
     return None
@@ -487,13 +572,12 @@ def _collate_content_to_file(
 ) -> None:
     """
     Collates content to a string buffer, calculates token count,
-    and then writes to the output file.
+    and then writes to the output file with a progress bar.
     """
     output_file_path = Path(output_file_path_str).resolve()
     output_file_path.parent.mkdir(parents=True, exist_ok=True)
     separator_line = separator_char * separator_line_len
-    # Use an in-memory buffer to build the output first
     buffer = StringIO()
     if tree_content_lines:
@@ -505,40 +589,41 @@ def _collate_content_to_file(
                     "Key: [I: Included f/d | T: Total f/d in original dir]\n"
                     "     (f=files, d=directories)\n\n"
                 )
-            else:  # ProjectMode.SEARCH
+            else:
                 stats_key = (
                     "Key: [M: Matched files/dirs]\n" "     (f=files, d=directories)\n\n"
                 )
             buffer.write(stats_key)
-        tree_content = "\n".join(tree_content_lines)
-        buffer.write(tree_content + "\n")
+        buffer.write("\n".join(tree_content_lines) + "\n")
         buffer.write(f"\n{separator_line}\n\n")
-    # This message is for the file content, not the console.
     if not files_to_process:
         message = (
             "No files found matching the specified criteria.\n"
             if mode == ProjectMode.SEARCH
-            else "No files found matching the specified criteria for content aggregation.\n"
+            else "No files found matching specified criteria for content aggregation.\n"
         )
         buffer.write(message)
     else:
-        for file_info in files_to_process:
+        collation_bar = tqdm(
+            files_to_process, desc="Phase 3: Collating files", unit="file", leave=False
+        )
+        for file_info in collation_bar:
+            collation_bar.set_postfix_str(file_info.relative_path_posix, refresh=True)
             header_content = f"{separator_line}\n{FILE_HEADER_PREFIX}{file_info.relative_path_posix}\n{separator_line}\n\n"
             buffer.write(header_content)
             try:
                 with open(
                     file_info.absolute_path, "r", encoding=encoding, errors="replace"
                 ) as infile:
-                    file_content = infile.read()
-                    buffer.write(file_content)
+                    buffer.write(infile.read())
                 buffer.write("\n\n")
             except Exception:
                 buffer.write(
                     f"Error: Could not read file '{file_info.relative_path_posix}'.\n\n"
                 )
-    # Get the complete content from the buffer
     final_content = buffer.getvalue()
     total_token_count = 0
     mode_display = "Characters" if TOKEN_APPROX_MODE == "CHAR_COUNT" else "Words"
@@ -549,24 +634,19 @@ def _collate_content_to_file(
         elif TOKEN_APPROX_MODE == "WORD_COUNT":
             total_token_count = len(final_content.split())
-    # Now, write everything to the actual file
     try:
         with open(output_file_path, "w", encoding=encoding) as outfile:
             if show_token_count:
-                # Add the token count at the top of the file as requested
                 outfile.write(f"Token Count ({mode_display}): {total_token_count}\n\n")
-            # Write the main content
             outfile.write(final_content)
     except IOError as e:
         print(f"\nError: Could not write to output file '{output_file_path}': {e}")
         return
-    # Final console output for user feedback
     if mode == ProjectMode.SEARCH:
         if files_to_process:
-            print("\nSuccess! Collation complete.")
-    else:  # Filter mode has its own messaging pattern
+            print("Success! Collation complete.")
+    else:
         print(f"\nProcess complete. Output written to: {output_file_path}")
         if len(files_to_process) > 0:
             print(
@@ -609,22 +689,34 @@ def filter_and_append_content(
         else None
     )
     files_to_process: List[FileToProcess] = []
-    for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
-        current_dir_path = Path(dirpath_str)
-        orig_dirnames = list(dirnames)
-        dirnames[:] = []
-        for d_name in orig_dirnames:
-            dir_abs_path = current_dir_path / d_name
-            if _should_include_entry(dir_abs_path, root_dir, criteria, is_dir=True):
-                dirnames.append(d_name)
-        for filename in filenames:
-            file_abs_path = current_dir_path / filename
-            if _should_include_entry(file_abs_path, root_dir, criteria, is_dir=False):
-                files_to_process.append(
-                    FileToProcess(
-                        file_abs_path, file_abs_path.relative_to(root_dir).as_posix()
-                    )
+    with tqdm(desc="Discovering files", unit="dir") as discovery_bar:
+        for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
+            discovery_bar.update(1)
+            discovery_bar.set_postfix_str(os.path.basename(dirpath_str), refresh=True)
+            current_dir_path = Path(dirpath_str)
+            orig_dirnames = list(dirnames)
+            dirnames[:] = [
+                d
+                for d in orig_dirnames
+                if _should_include_entry(
+                    current_dir_path / d, root_dir, criteria, is_dir=True
                 )
+            ]
+            for filename in filenames:
+                file_abs_path = current_dir_path / filename
+                if _should_include_entry(
+                    file_abs_path, root_dir, criteria, is_dir=False
+                ):
+                    files_to_process.append(
+                        FileToProcess(
+                            file_abs_path,
+                            file_abs_path.relative_to(root_dir).as_posix(),
+                        )
+                    )
     files_to_process.sort(key=lambda f_info: f_info.relative_path_posix.lower())
     _collate_content_to_file(
         output_file_path_str,
@@ -654,7 +746,7 @@ def search_and_collate_content(
     show_token_count: bool,
     show_tree_stats: bool,
 ) -> None:
-    """SEARCH MODE: Scans for files that match a substring in their path/name or content."""
+    """SEARCH MODE: Scans for files that match a substring with multi-phase progress bars."""
     criteria = FilterCriteria.normalize_inputs(
         file_extensions_to_check,
         None,
@@ -670,41 +762,52 @@ def search_and_collate_content(
         print("Error: Search mode requires 'search_keywords' to be provided.")
         return
-    print("Phase 1: Finding all matching files...")
     if criteria.ignore_path_components:
         print(
-            f"Ignoring directories and files containing: {', '.join(criteria.ignore_path_components)}"
+            f"Ignoring directories containing: {', '.join(sorted(list(criteria.ignore_path_components)))}"
         )
     candidate_files: List[Path] = []
-    for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
-        current_dir_path = Path(dirpath_str)
-        # Prune directories based on ignore criteria
-        dirnames[:] = [
-            d
-            for d in dirnames
-            if (current_dir_path / d).name.lower()
-            not in criteria.ignore_path_components
-        ]
-        for filename in filenames:
-            file_abs_path = current_dir_path / filename
-            # Also ignore individual files based on path components
-            try:
-                relative_parts = file_abs_path.relative_to(root_dir).parts
-                if any(
-                    part.lower() in criteria.ignore_path_components
-                    for part in relative_parts
-                ):
+    with tqdm(desc="Phase 1: Discovering files", unit="dir") as discovery_bar:
+        for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
+            discovery_bar.update(1)
+            discovery_bar.set_postfix_str(os.path.basename(dirpath_str), refresh=True)
+            current_dir_path = Path(dirpath_str)
+            dirnames[:] = [
+                d
+                for d in dirnames
+                if (current_dir_path / d).name.lower()
+                not in criteria.ignore_path_components
+            ]
+            for filename in filenames:
+                file_abs_path = current_dir_path / filename
+                try:
+                    relative_parts = file_abs_path.relative_to(root_dir).parts
+                    if any(
+                        part.lower() in criteria.ignore_path_components
+                        for part in relative_parts
+                    ):
+                        continue
+                except ValueError:
                     continue
-            except ValueError:
-                continue
-            if (
-                not criteria.file_extensions
-                or file_abs_path.suffix.lower() in criteria.file_extensions
-            ):
-                candidate_files.append(file_abs_path)
+                if (
+                    not criteria.file_extensions
+                    or file_abs_path.suffix.lower() in criteria.file_extensions
+                ):
+                    candidate_files.append(file_abs_path)
+    print(f"Discovered {len(candidate_files)} candidate files to process.")
+    if not candidate_files:
+        print(
+            "\nScan complete. No files matched the initial criteria (extensions and ignores)."
+        )
+        with open(output_file, "w", encoding=DEFAULT_ENCODING) as f_out:
+            f_out.write("No files found matching the specified criteria.\n")
+        return
     matched_files: Set[Path] = set()
     with ThreadPoolExecutor(max_workers=max_workers) as executor:
@@ -718,30 +821,34 @@ def search_and_collate_content(
             ): file
             for file in candidate_files
         }
+        progress_bar_desc = f"Phase 2: Processing {len(candidate_files)} files"
         progress_bar = tqdm(
             as_completed(future_to_file),
-            total=len(candidate_files),
+            total=len(future_to_file),
             unit="file",
-            desc="Scanning",
+            desc=progress_bar_desc,
         )
         for future in progress_bar:
             result = future.result()
             if result:
                 matched_files.add(result)
     if not matched_files:
-        print("\nScan complete. No matching files were found.")
-        # Still create the output file with a "not found" message
+        print(
+            "\nScan complete. No matching files were found after processing keywords."
+        )
         with open(output_file, "w", encoding=DEFAULT_ENCODING) as f_out:
-            f_out.write("No files found matching the specified criteria.\n")
+            f_out.write("No files found matching the specified search keywords.\n")
         return
     sorted_matched_files = sorted(
         list(matched_files), key=lambda p: p.relative_to(root_dir).as_posix().lower()
     )
-    print(f"\nPhase 1 Complete: Found {len(sorted_matched_files)} matching files.")
-    print(f"\nPhase 2: Generating output file at '{Path(output_file).resolve()}'...")
+    print(f"Found {len(sorted_matched_files)} matching files.")
+    print(f"Generating output file at '{Path(output_file).resolve()}'...")
     tree_content_lines = _generate_tree_from_paths(
         root_dir, sorted_matched_files, tree_style, show_tree_stats
@@ -750,6 +857,7 @@ def search_and_collate_content(
         FileToProcess(f, f.relative_to(root_dir).as_posix())
         for f in sorted_matched_files
     ]
     _collate_content_to_file(
         output_file,
         tree_content_lines,
@@ -999,7 +1107,6 @@ __all__ = [
 ]
 if __name__ == "__main__":
-    # --- Example: Scan with Custom Filters and the New Readable Stats ---
     print("\n--- Running a custom filter scan with new stats format ---")
     filter_project(
         root_dir_param=".",

{dirshot-0.1.2 → dirshot-0.1.3}/src/dirshot.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dirshot
-Version: 0.1.2
+Version: 0.1.3
 Summary: A flexible utility for creating project snapshots and searching for files.
 Author-email: init-helpful <init.helpful@gmail.com>
 Project-URL: Homepage, https://github.com/init-helpful/dirshot