PyPI - dirshot - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

dirshot 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

dirshot/dirshot.py +275 -88
{dirshot-0.1.1.dist-info → dirshot-0.1.3.dist-info}/METADATA +1 -1
dirshot-0.1.3.dist-info/RECORD +7 -0
dirshot-0.1.1.dist-info/RECORD +0 -7
{dirshot-0.1.1.dist-info → dirshot-0.1.3.dist-info}/WHEEL +0 -0
{dirshot-0.1.1.dist-info → dirshot-0.1.3.dist-info}/top_level.txt +0 -0

dirshot/dirshot.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import sys
 import re
+import time
 from pathlib import Path
 from dataclasses import dataclass, field
 from typing import List, Optional, Set, Tuple, Callable, NamedTuple, Dict, Any
@@ -13,21 +14,81 @@ try:
     from tqdm import tqdm
 except ImportError:
+    # Define a functional fallback dummy tqdm class if the import fails.
     class tqdm:
-        def __init__(self, iterable=None, **kwargs):
+        """A simple, text-based progress bar fallback if tqdm is not installed."""
+        def __init__(
+            self, iterable=None, total=None, desc="", unit="it", postfix=None, **kwargs
+        ):
             self.iterable = iterable
+            self.total = (
+                total
+                if total is not None
+                else (len(iterable) if hasattr(iterable, "__len__") else None)
+            )
+            self.desc = desc
+            self.unit = unit
+            self.current = 0
+            self.start_time = time.time()
+            self._last_update_time = 0
+            self._postfix = postfix or {}
         def __iter__(self):
-            return iter(self.iterable)
+            if self.iterable is None:
+                raise TypeError("tqdm fallback must be initialized with an iterable.")
+            for obj in self.iterable:
+                yield obj
+                self.update(1)
+            self.close()
         def update(self, n=1):
-            pass
-        def set_description(self, desc):
-            pass
+            """Update the progress bar by n steps."""
+            self.current += n
+            now = time.time()
+            if (
+                self.total is None
+                or now - self._last_update_time > 0.1
+                or self.current == self.total
+            ):
+                self._last_update_time = now
+                self._draw()
+        def set_description(self, desc: str):
+            """Set the description of the progress bar."""
+            self.desc = desc
+            self._draw()
+        def set_postfix_str(self, s: str):
+            self._postfix["info"] = s
+            self._draw()
+        def _draw(self):
+            """Draw the progress bar to the console."""
+            postfix_str = ", ".join([f"{k}={v}" for k, v in self._postfix.items()])
+            if self.total and self.total > 0:
+                percent = int((self.current / self.total) * 100)
+                bar_length = 25
+                filled_length = int(bar_length * self.current // self.total)
+                bar = "█" * filled_length + "-" * (bar_length - filled_length)
+                progress_line = f"\r{self.desc}: {percent}%|{bar}| {self.current}/{self.total} [{self.unit}]"
+            else:  # Case where total is not known
+                progress_line = f"\r{self.desc}: {self.current} {self.unit}"
+            if postfix_str:
+                progress_line += f" [{postfix_str}]"
+            # Pad with spaces to clear previous, longer lines
+            terminal_width = 80
+            sys.stdout.write(progress_line.ljust(terminal_width))
+            sys.stdout.flush()
         def close(self):
-            pass
+            """Clean up the progress bar line."""
+            # Print a newline to move off the progress bar line
+            sys.stdout.write("\n")
+            sys.stdout.flush()
 # --- Configuration Constants ---
@@ -38,6 +99,72 @@ TREE_HEADER_TEXT = "Project File Structure"
 FILE_HEADER_PREFIX = "FILE: "
 TOKEN_APPROX_MODE = "CHAR_COUNT"
+# List of binary file extensions to skip during content search
+BINARY_FILE_EXTENSIONS = {
+    # Images
+    ".png",
+    ".jpg",
+    ".jpeg",
+    ".gif",
+    ".bmp",
+    ".ico",
+    ".tiff",
+    ".webp",
+    # Documents
+    ".pdf",
+    ".doc",
+    ".docx",
+    ".xls",
+    ".xlsx",
+    ".ppt",
+    ".pptx",
+    ".odt",
+    ".ods",
+    # Archives
+    ".zip",
+    ".gz",
+    ".tar",
+    ".rar",
+    ".7z",
+    ".bz2",
+    ".xz",
+    # Executables & Binaries
+    ".exe",
+    ".dll",
+    ".so",
+    ".o",
+    ".a",
+    ".lib",
+    ".bin",
+    ".dat",
+    ".db",
+    ".sqlite",
+    ".img",
+    ".iso",
+    # Compiled Code
+    ".class",
+    ".jar",
+    ".war",
+    ".pyc",
+    ".pyo",
+    # Audio/Video
+    ".mp3",
+    ".wav",
+    ".flac",
+    ".ogg",
+    ".mp4",
+    ".mkv",
+    ".avi",
+    ".mov",
+    ".wmv",
+    # Fonts
+    ".ttf",
+    ".otf",
+    ".woff",
+    ".woff2",
+}
 # --- Public Enums for Import and Usage ---
@@ -223,17 +350,25 @@ def process_file_for_search(
     search_file_contents: bool,
     full_path_compare: bool,
 ) -> Optional[Path]:
+    """
+    Checks a single file for keyword matches. Skips content search for binary files.
+    """
     compare_target = str(file_path) if full_path_compare else file_path.name
     if any(key in compare_target.lower() for key in normalized_keywords):
         return file_path
     if search_file_contents:
+        # Before reading content, check if it's a known binary file type
+        if file_path.suffix.lower() in BINARY_FILE_EXTENSIONS:
+            return None  # Do not attempt to read binary file content
         try:
             with open(str(file_path), "r", encoding="utf-8", errors="ignore") as f:
                 for line in f:
                     if any(key in line.lower() for key in normalized_keywords):
                         return file_path
         except (IOError, OSError):
-            pass
+            pass  # Ignore files that can't be opened
     return None
@@ -437,13 +572,12 @@ def _collate_content_to_file(
 ) -> None:
     """
     Collates content to a string buffer, calculates token count,
-    and then writes to the output file.
+    and then writes to the output file with a progress bar.
     """
     output_file_path = Path(output_file_path_str).resolve()
     output_file_path.parent.mkdir(parents=True, exist_ok=True)
     separator_line = separator_char * separator_line_len
-    # Use an in-memory buffer to build the output first
     buffer = StringIO()
     if tree_content_lines:
@@ -455,37 +589,41 @@ def _collate_content_to_file(
                     "Key: [I: Included f/d | T: Total f/d in original dir]\n"
                     "     (f=files, d=directories)\n\n"
                 )
-            else:  # ProjectMode.SEARCH
+            else:
                 stats_key = (
-                    "Key: [M: Matched files/dirs]\n"
-                    "     (f=files, d=directories)\n\n"
+                    "Key: [M: Matched files/dirs]\n" "     (f=files, d=directories)\n\n"
                 )
             buffer.write(stats_key)
-        tree_content = "\n".join(tree_content_lines)
-        buffer.write(tree_content + "\n")
+        buffer.write("\n".join(tree_content_lines) + "\n")
         buffer.write(f"\n{separator_line}\n\n")
-    for file_info in files_to_process:
-        header_content = f"{separator_line}\n{FILE_HEADER_PREFIX}{file_info.relative_path_posix}\n{separator_line}\n\n"
-        buffer.write(header_content)
-        try:
-            with open(
-                file_info.absolute_path, "r", encoding=encoding, errors="replace"
-            ) as infile:
-                file_content = infile.read()
-                buffer.write(file_content)
-            buffer.write("\n\n")
-        except Exception:
-            buffer.write(
-                f"Error: Could not read file '{file_info.relative_path_posix}'.\n\n"
-            )
-    if not files_to_process and not tree_content_lines:
-        buffer.write(
-            "No files found matching the specified criteria for content aggregation.\n"
+    if not files_to_process:
+        message = (
+            "No files found matching the specified criteria.\n"
+            if mode == ProjectMode.SEARCH
+            else "No files found matching specified criteria for content aggregation.\n"
         )
+        buffer.write(message)
+    else:
+        collation_bar = tqdm(
+            files_to_process, desc="Phase 3: Collating files", unit="file", leave=False
+        )
+        for file_info in collation_bar:
+            collation_bar.set_postfix_str(file_info.relative_path_posix, refresh=True)
+            header_content = f"{separator_line}\n{FILE_HEADER_PREFIX}{file_info.relative_path_posix}\n{separator_line}\n\n"
+            buffer.write(header_content)
+            try:
+                with open(
+                    file_info.absolute_path, "r", encoding=encoding, errors="replace"
+                ) as infile:
+                    buffer.write(infile.read())
+                buffer.write("\n\n")
+            except Exception:
+                buffer.write(
+                    f"Error: Could not read file '{file_info.relative_path_posix}'.\n\n"
+                )
-    # Get the complete content from the buffer
     final_content = buffer.getvalue()
     total_token_count = 0
     mode_display = "Characters" if TOKEN_APPROX_MODE == "CHAR_COUNT" else "Words"
@@ -496,27 +634,27 @@ def _collate_content_to_file(
         elif TOKEN_APPROX_MODE == "WORD_COUNT":
             total_token_count = len(final_content.split())
-    # Now, write everything to the actual file
     try:
         with open(output_file_path, "w", encoding=encoding) as outfile:
             if show_token_count:
-                # Add the token count at the top of the file as requested
                 outfile.write(f"Token Count ({mode_display}): {total_token_count}\n\n")
-            # Write the main content
             outfile.write(final_content)
     except IOError as e:
-        print(f"Error: Could not write to output file '{output_file_path}': {e}")
+        print(f"\nError: Could not write to output file '{output_file_path}': {e}")
         return
-    # Final console output remains for user feedback
-    print(f"\nProcess complete. Output written to: {output_file_path}")
+    if mode == ProjectMode.SEARCH:
+        if files_to_process:
+            print("Success! Collation complete.")
+    else:
+        print(f"\nProcess complete. Output written to: {output_file_path}")
+        if len(files_to_process) > 0:
+            print(
+                f"Summary: {len(files_to_process)} files selected for content processing."
+            )
     if show_token_count:
         print(f"Total Approximated Tokens ({mode_display}): {total_token_count}")
-    if len(files_to_process) > 0:
-        print(
-            f"Summary: {len(files_to_process)} files selected for content processing."
-        )
 def filter_and_append_content(
@@ -551,22 +689,34 @@ def filter_and_append_content(
         else None
     )
     files_to_process: List[FileToProcess] = []
-    for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
-        current_dir_path = Path(dirpath_str)
-        orig_dirnames = list(dirnames)
-        dirnames[:] = []
-        for d_name in orig_dirnames:
-            dir_abs_path = current_dir_path / d_name
-            if _should_include_entry(dir_abs_path, root_dir, criteria, is_dir=True):
-                dirnames.append(d_name)
-        for filename in filenames:
-            file_abs_path = current_dir_path / filename
-            if _should_include_entry(file_abs_path, root_dir, criteria, is_dir=False):
-                files_to_process.append(
-                    FileToProcess(
-                        file_abs_path, file_abs_path.relative_to(root_dir).as_posix()
-                    )
+    with tqdm(desc="Discovering files", unit="dir") as discovery_bar:
+        for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
+            discovery_bar.update(1)
+            discovery_bar.set_postfix_str(os.path.basename(dirpath_str), refresh=True)
+            current_dir_path = Path(dirpath_str)
+            orig_dirnames = list(dirnames)
+            dirnames[:] = [
+                d
+                for d in orig_dirnames
+                if _should_include_entry(
+                    current_dir_path / d, root_dir, criteria, is_dir=True
                 )
+            ]
+            for filename in filenames:
+                file_abs_path = current_dir_path / filename
+                if _should_include_entry(
+                    file_abs_path, root_dir, criteria, is_dir=False
+                ):
+                    files_to_process.append(
+                        FileToProcess(
+                            file_abs_path,
+                            file_abs_path.relative_to(root_dir).as_posix(),
+                        )
+                    )
     files_to_process.sort(key=lambda f_info: f_info.relative_path_posix.lower())
     _collate_content_to_file(
         output_file_path_str,
@@ -596,7 +746,7 @@ def search_and_collate_content(
     show_token_count: bool,
     show_tree_stats: bool,
 ) -> None:
-    """SEARCH MODE: Scans for files that match a substring in their path/name or content."""
+    """SEARCH MODE: Scans for files that match a substring with multi-phase progress bars."""
     criteria = FilterCriteria.normalize_inputs(
         file_extensions_to_check,
         None,
@@ -611,19 +761,54 @@ def search_and_collate_content(
     if not normalized_keywords:
         print("Error: Search mode requires 'search_keywords' to be provided.")
         return
+    if criteria.ignore_path_components:
+        print(
+            f"Ignoring directories containing: {', '.join(sorted(list(criteria.ignore_path_components)))}"
+        )
     candidate_files: List[Path] = []
-    for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
-        current_dir_path = Path(dirpath_str)
-        dirnames[:] = [
-            d for d in dirnames if d.lower() not in criteria.ignore_path_components
-        ]
-        for filename in filenames:
-            file_abs_path = current_dir_path / filename
-            if (
-                file_abs_path.suffix.lower() in criteria.file_extensions
-                or not criteria.file_extensions
-            ):
-                candidate_files.append(file_abs_path)
+    with tqdm(desc="Phase 1: Discovering files", unit="dir") as discovery_bar:
+        for dirpath_str, dirnames, filenames in os.walk(str(root_dir), topdown=True):
+            discovery_bar.update(1)
+            discovery_bar.set_postfix_str(os.path.basename(dirpath_str), refresh=True)
+            current_dir_path = Path(dirpath_str)
+            dirnames[:] = [
+                d
+                for d in dirnames
+                if (current_dir_path / d).name.lower()
+                not in criteria.ignore_path_components
+            ]
+            for filename in filenames:
+                file_abs_path = current_dir_path / filename
+                try:
+                    relative_parts = file_abs_path.relative_to(root_dir).parts
+                    if any(
+                        part.lower() in criteria.ignore_path_components
+                        for part in relative_parts
+                    ):
+                        continue
+                except ValueError:
+                    continue
+                if (
+                    not criteria.file_extensions
+                    or file_abs_path.suffix.lower() in criteria.file_extensions
+                ):
+                    candidate_files.append(file_abs_path)
+    print(f"Discovered {len(candidate_files)} candidate files to process.")
+    if not candidate_files:
+        print(
+            "\nScan complete. No files matched the initial criteria (extensions and ignores)."
+        )
+        with open(output_file, "w", encoding=DEFAULT_ENCODING) as f_out:
+            f_out.write("No files found matching the specified criteria.\n")
+        return
     matched_files: Set[Path] = set()
     with ThreadPoolExecutor(max_workers=max_workers) as executor:
         future_to_file = {
@@ -636,33 +821,35 @@ def search_and_collate_content(
             ): file
             for file in candidate_files
         }
+        progress_bar_desc = f"Phase 2: Processing {len(candidate_files)} files"
         progress_bar = tqdm(
             as_completed(future_to_file),
-            total=len(candidate_files),
+            total=len(future_to_file),
             unit="file",
-            desc="Scanning",
+            desc=progress_bar_desc,
         )
         for future in progress_bar:
             result = future.result()
             if result:
                 matched_files.add(result)
     if not matched_files:
-        print("\nScan complete. No matching files were found.")
-        _collate_content_to_file(
-            output_file,
-            None,
-            [],
-            DEFAULT_ENCODING,
-            DEFAULT_SEPARATOR_CHAR,
-            DEFAULT_SEPARATOR_LINE_LENGTH,
-            show_token_count,
-            show_tree_stats,
-            ProjectMode.SEARCH,
+        print(
+            "\nScan complete. No matching files were found after processing keywords."
         )
+        with open(output_file, "w", encoding=DEFAULT_ENCODING) as f_out:
+            f_out.write("No files found matching the specified search keywords.\n")
         return
     sorted_matched_files = sorted(
         list(matched_files), key=lambda p: p.relative_to(root_dir).as_posix().lower()
     )
+    print(f"Found {len(sorted_matched_files)} matching files.")
+    print(f"Generating output file at '{Path(output_file).resolve()}'...")
     tree_content_lines = _generate_tree_from_paths(
         root_dir, sorted_matched_files, tree_style, show_tree_stats
     )
@@ -670,6 +857,7 @@ def search_and_collate_content(
         FileToProcess(f, f.relative_to(root_dir).as_posix())
         for f in sorted_matched_files
     ]
     _collate_content_to_file(
         output_file,
         tree_content_lines,
@@ -919,7 +1107,6 @@ __all__ = [
 ]
 if __name__ == "__main__":
-    # --- Example: Scan with Custom Filters and the New Readable Stats ---
     print("\n--- Running a custom filter scan with new stats format ---")
     filter_project(
         root_dir_param=".",
@@ -928,4 +1115,4 @@ if __name__ == "__main__":
         ignore_dirs_in_path=["venv", "build", "node_modules", "static", "templates"],
         show_tree_stats=True,
         show_token_count=True,
-    )
+    )

{dirshot-0.1.1.dist-info → dirshot-0.1.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dirshot
-Version: 0.1.1
+Version: 0.1.3
 Summary: A flexible utility for creating project snapshots and searching for files.
 Author-email: init-helpful <init.helpful@gmail.com>
 Project-URL: Homepage, https://github.com/init-helpful/dirshot

dirshot-0.1.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+dirshot/__init__.py,sha256=ss4HC5VTyD9j6GFGCLMU6VxPlXy0qaGFzXlZB3_d2WM,403
+dirshot/dirshot.py,sha256=2zx4ghzYi5Rsh-C0maHATapF2ArremgRLFWJlWlRu34,40365
+dirshot/examples.py,sha256=q--iNqxmA4xX8nyXYdOP-HPsqzpLHBFo1PTseQ9ki7M,2344
+dirshot-0.1.3.dist-info/METADATA,sha256=9mdpQmEFer0rY-kineW0bSU2OZHHYo7FNw1eDCZ_M4I,4172
+dirshot-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dirshot-0.1.3.dist-info/top_level.txt,sha256=ROGW8gTcmwJ2jJ1Fp7TV1REZLRUGbL3L-Lfoy8tPxOA,8
+dirshot-0.1.3.dist-info/RECORD,,

dirshot-0.1.1.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-dirshot/__init__.py,sha256=ss4HC5VTyD9j6GFGCLMU6VxPlXy0qaGFzXlZB3_d2WM,403
-dirshot/dirshot.py,sha256=ItCwC4BsSbPzBLlHddiFlYsqdB3Hh3PEpwN89EuplIc,34693
-dirshot/examples.py,sha256=q--iNqxmA4xX8nyXYdOP-HPsqzpLHBFo1PTseQ9ki7M,2344
-dirshot-0.1.1.dist-info/METADATA,sha256=z72qXvnkUFizL4qkdXEXF6QWu3yZs28szf9wuaru4kI,4172
-dirshot-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dirshot-0.1.1.dist-info/top_level.txt,sha256=ROGW8gTcmwJ2jJ1Fp7TV1REZLRUGbL3L-Lfoy8tPxOA,8
-dirshot-0.1.1.dist-info/RECORD,,

{dirshot-0.1.1.dist-info → dirshot-0.1.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{dirshot-0.1.1.dist-info → dirshot-0.1.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

dirshot 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

dirshot 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl