PyPI - code-to-txt - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

code-to-txt 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

code_to_txt/.extensions +34 -0
code_to_txt/.ignore +20 -0
code_to_txt/__init__.py +1 -0
code_to_txt/cli.py +113 -96
code_to_txt/code_to_txt.py +190 -120
code_to_txt/config.py +13 -7
code_to_txt/utils.py +13 -0
{code_to_txt-0.2.0.dist-info → code_to_txt-0.3.0.dist-info}/METADATA +93 -67
code_to_txt-0.3.0.dist-info/RECORD +12 -0
code_to_txt-0.2.0.dist-info/RECORD +0 -9
{code_to_txt-0.2.0.dist-info → code_to_txt-0.3.0.dist-info}/WHEEL +0 -0
{code_to_txt-0.2.0.dist-info → code_to_txt-0.3.0.dist-info}/entry_points.txt +0 -0
{code_to_txt-0.2.0.dist-info → code_to_txt-0.3.0.dist-info}/licenses/LICENSE +0 -0

code_to_txt/.extensions ADDED Viewed

@@ -0,0 +1,34 @@
+.py
+.js
+.ts
+.jsx
+.tsx
+.java
+.c
+.cpp
+.h
+.hpp
+.cs
+.go
+.rs
+.rb
+.php
+.swift
+.kt
+.scala
+.r
+.sql
+.sh
+.bash
+.zsh
+.yaml
+.yml
+.json
+.toml
+.xml
+.html
+.css
+.scss
+.md
+.txt
+.rst

code_to_txt/.ignore ADDED Viewed

@@ -0,0 +1,20 @@
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.git
+.svn
+.hg
+node_modules
+.venv
+venv
+.env
+*.egg-info
+dist
+build
+.pytest_cache
+.mypy_cache
+.ruff_cache
+*.so
+*.dylib
+*.dll

code_to_txt/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from .code_to_txt import CodeToText
+__version__ = "0.3.0"
 __all__ = ["CodeToText"]

code_to_txt/cli.py CHANGED Viewed

@@ -4,96 +4,81 @@ from pathlib import Path
 import click
 import pyperclip
+from . import __version__
 from .code_to_txt import CodeToText
 from .config import create_default_config, load_config
+def display_statistics(stats: dict) -> None:
+    """Display statistics about the codebase."""
+    divider = "=" * 60
+    click.echo(f"\n{divider}")
+    click.echo("CODEBASE STATISTICS")
+    click.echo(divider)
+    click.echo(f"Total files: {stats['total_files']}")
+    click.echo(f"Total size: {stats['total_size_bytes'] / 1024 / 1024:.2f} MB")
+    click.echo(f"Total lines: {stats['total_lines']:,}")
+    if stats["skipped_files"] > 0:
+        click.echo(f"Skipped files: {stats['skipped_files']}")
+    click.echo("\nFiles by extension:")
+    by_ext = sorted(stats["by_extension"].items(), key=lambda x: x[1]["count"], reverse=True)
+    for ext, data in by_ext[:10]:
+        size_mb = data["size"] / 1024 / 1024
+        click.echo(f"  {ext:15} {data['count']:5} files  {size_mb:8.2f} MB")
+    if len(by_ext) > 10:
+        click.echo(f"  ... and {len(by_ext) - 10} more extensions")
+    if stats["largest_files"]:
+        click.echo("\nLargest files:")
+        for file_info in stats["largest_files"][:5]:
+            click.echo(f"  {file_info['size_kb']:8.2f} KB  {file_info['path']}")
+    click.echo(f"{divider}\n")
 @click.command()
 @click.argument("path", type=click.Path(exists=True), default=".")
-@click.option(
-    "-o",
-    "--output",
-    default=None,
-    help="Output file path (default: codetotxt_YYYYMMDD_HHMMSS.txt)",
-    type=click.Path(),
-)
-@click.option(
-    "-e",
-    "--extensions",
-    default=None,
-    help="File extensions to include. Space-separated list (e.g., '.py .js .ts') or comma-separated (e.g., '.py,.js,.ts')",
-)
-@click.option(
-    "-x",
-    "--exclude",
-    multiple=True,
-    help="Patterns to exclude (gitignore style). Can be specified multiple times.",
-)
-@click.option(
-    "-g",
-    "--glob",
-    multiple=True,
-    help="Glob patterns to include (e.g., '*.py' 'src/**/*.js'). Can be specified multiple times.",
-)
-@click.option(
-    "--no-gitignore",
-    is_flag=True,
-    help="Don't respect .gitignore files",
-)
-@click.option(
-    "--no-tree",
-    is_flag=True,
-    help="Don't include directory tree in output",
-)
-@click.option(
-    "--separator",
-    default="=" * 80,
-    help="Separator between files",
-)
-@click.option(
-    "--clipboard",
-    "-c",
-    is_flag=True,
-    help="Copy output to clipboard in addition to saving to file",
-)
-@click.option(
-    "--clipboard-only",
-    is_flag=True,
-    help="Copy output to clipboard only (don't save to file)",
-)
-@click.option(
-    "--config",
-    type=click.Path(exists=True),
-    help="Path to config file (.yml or .yaml)",
-)
-@click.option(
-    "--init-config",
-    is_flag=True,
-    help="Create default configuration file (.code-to-txt.yml)",
-)
-@click.option(
-    "--timestamp",
-    "-t",
-    is_flag=True,
-    help="Add timestamp to output filename",
-)
+@click.option("-o", "--output", default=None, type=click.Path(),
+              help="Output file path (default: codetotxt_YYYYMMDD_HHMMSS.txt)")
+@click.option("-e", "--extensions", default=None, help="File extensions to include (space or comma separated)")
+@click.option("-x", "--exclude", multiple=True, help="Patterns to exclude (can be used multiple times)")
+@click.option("-g", "--glob", multiple=True, help="Glob patterns to include (can be used multiple times)")
+@click.option("--no-gitignore", is_flag=True, help="Don't respect .gitignore files")
+@click.option("--no-tree", is_flag=True, help="Don't include directory tree in output")
+@click.option("--separator", default="=" * 80, help="Separator between files")
+@click.option("-c", "--clipboard", is_flag=True, help="Copy output to clipboard in addition to file")
+@click.option("--clipboard-only", is_flag=True, help="Copy to clipboard only (don't save file)")
+@click.option("--config", type=click.Path(exists=True), help="Path to config file (.yml or .yaml)")
+@click.option("--init-config", is_flag=True, help="Create default configuration file")
+@click.option("-t", "--timestamp", is_flag=True, help="Add timestamp to output filename")
+@click.option("-v", "--version", is_flag=True, help="Show version and exit")
+@click.option("--dry-run", is_flag=True, help="Show which files would be processed without creating output")
+@click.option("--stats", is_flag=True, help="Show detailed statistics about the codebase")
+@click.option("--max-file-size", type=int, default=None, help="Skip files larger than N KB")
 def main(
-    path: str,
-    output: str | None,
-    extensions: str | None,
-    exclude: tuple[str, ...],
-    glob: tuple[str, ...],
-    no_gitignore: bool,
-    no_tree: bool,
-    separator: str,
-    clipboard: bool,
-    clipboard_only: bool,
-    config: str | None,
-    init_config: bool,
-    timestamp: bool,
+        path: str,
+        output: str | None,
+        extensions: str | None,
+        exclude: tuple[str, ...],
+        glob: tuple[str, ...],
+        no_gitignore: bool,
+        no_tree: bool,
+        separator: str,
+        clipboard: bool,
+        clipboard_only: bool,
+        config: str | None,
+        init_config: bool,
+        timestamp: bool,
+        version: bool,
+        dry_run: bool,
+        stats: bool,
+        max_file_size: int | None,
 ) -> None:
     """
-    Convert code files to a single text file for easy LLM consumption.
+    Convert code files to a single text file for LLM consumption.
     PATH: Directory to scan (default: current directory)
@@ -127,13 +112,14 @@ def main(
         # Use config file
         code-to-txt --config .code-to-txt.yml
     """
+    if version:
+        click.echo(f"v{__version__}")
+        return
     if init_config:
         config_path = Path(".code-to-txt.yml")
         if config_path.exists():
-            click.confirm(
-                f"Config file {config_path} already exists. Overwrite?",
-                abort=True,
-            )
+            click.confirm(f"Config file {config_path} already exists. Overwrite?", abort=True)
         create_default_config(config_path)
         click.echo(f"Created default config file: {config_path}")
         click.echo("You can now edit this file and use it with --config flag")
@@ -155,6 +141,7 @@ def main(
     clipboard = clipboard or config_data.get("clipboard", False)
     clipboard_only = clipboard_only or config_data.get("clipboard_only", False)
     timestamp = timestamp or config_data.get("timestamp", False)
+    max_file_size = max_file_size or config_data.get("max_file_size")
     if not output or timestamp:
         timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -183,30 +170,54 @@ def main(
             if ext:
                 include_extensions.add(ext)
-    codetotxt = CodeToText(
+    code_to_txt = CodeToText(
         root_path=path,
-        output_file=output if not clipboard_only else None,
+        output_file=output if not clipboard_only and not dry_run and not stats else None,
         include_extensions=include_extensions,
         exclude_patterns=list(exclude),
         glob_patterns=list(glob_patterns),
         gitignore=not no_gitignore,
+        max_file_size_kb=max_file_size,
     )
     try:
+        if stats or dry_run:
+            statistics = code_to_txt.calculate_statistics()
+            display_statistics(statistics)
+            if stats:
+                return
+            if dry_run:
+                files = code_to_txt._collect_files()
+                click.echo("Files that would be processed:")
+                for i, file_path in enumerate(files, 1):
+                    relative_path = file_path.relative_to(Path(path).resolve())
+                    size_kb = file_path.stat().st_size / 1024
+                    click.echo(f"  {i:4}. {relative_path} ({size_kb:.1f} KB)")
+                if code_to_txt.skipped_files:
+                    click.echo(f"\nSkipped {len(code_to_txt.skipped_files)} files:")
+                    for file_path, reason in code_to_txt.skipped_files[:20]:
+                        relative_path = file_path.relative_to(Path(path).resolve())
+                        click.echo(f"  - {relative_path} ({reason})")
+                    if len(code_to_txt.skipped_files) > 20:
+                        click.echo(f"  ... and {len(code_to_txt.skipped_files) - 20} more")
+            if not stats or dry_run:
+                return
         if clipboard_only:
-            content = codetotxt.generate_content(
-                add_tree=not no_tree,
-                separator=separator,
-            )
+            content = code_to_txt.generate_content(add_tree=not no_tree, separator=separator)
             pyperclip.copy(content)
             click.echo("Content copied to clipboard")
-            click.echo(f"Processed {codetotxt.file_count} files")
+            click.echo(f"Processed {code_to_txt.file_count} files")
             click.echo(f"Content size: {len(content) / 1024:.2f} KB")
+            estimated_tokens = len(content) / 4
+            click.echo(f"Estimated tokens: ~{estimated_tokens:,.0f}")
         else:
-            num_files = codetotxt.convert(
-                add_tree=not no_tree,
-                separator=separator,
-            )
+            num_files = code_to_txt.convert(add_tree=not no_tree, separator=separator)
             output_path = Path(output).resolve()
             click.echo(f"Successfully processed {num_files} files")
@@ -215,11 +226,17 @@ def main(
             size_kb = output_path.stat().st_size / 1024
             click.echo(f"File size: {size_kb:.2f} KB")
+            estimated_tokens = size_kb * 1024 / 4
+            click.echo(f"Estimated tokens: ~{estimated_tokens:,.0f}")
             if clipboard:
                 content = output_path.read_text(encoding="utf-8")
                 pyperclip.copy(content)
                 click.echo("Content also copied to clipboard")
+            if code_to_txt.skipped_files:
+                click.echo(f"\nNote: Skipped {len(code_to_txt.skipped_files)} files (use --dry-run to see details)")
     except Exception as e:
         click.echo(f"Error: {e}", err=True)
         raise click.Abort()

code_to_txt/code_to_txt.py CHANGED Viewed

@@ -6,41 +6,11 @@ from typing import Any
 import pathspec
 from pathspec import PathSpec
+from .utils import load_patterns_from_file
 class CodeToText:
-    DEFAULT_IGNORE = {
-        "__pycache__",
-        "*.pyc",
-        "*.pyo",
-        "*.pyd",
-        ".git",
-        ".svn",
-        ".hg",
-        "node_modules",
-        ".venv",
-        "venv",
-        ".env",
-        "*.egg-info",
-        "dist",
-        "build",
-        ".pytest_cache",
-        ".mypy_cache",
-        ".ruff_cache",
-        "*.so",
-        "*.dylib",
-        "*.dll",
-    }
-    DEFAULT_EXTENSIONS = {
-        ".py", ".js", ".ts", ".jsx", ".tsx",
-        ".java", ".c", ".cpp", ".h", ".hpp",
-        ".cs", ".go", ".rs", ".rb", ".php",
-        ".swift", ".kt", ".scala", ".r",
-        ".sql", ".sh", ".bash", ".zsh",
-        ".yaml", ".yml", ".json", ".toml",
-        ".xml", ".html", ".css", ".scss",
-        ".md", ".txt", ".rst",
-    }
+    """Convert code files to a single text file for LLM consumption."""
     def __init__(
             self,
@@ -50,9 +20,10 @@ class CodeToText:
             exclude_patterns: list[str] | None = None,
             glob_patterns: list[str] | None = None,
             gitignore: bool = True,
+            max_file_size_kb: int | None = None,
     ):
         """
-        Initialize the instance of CodeToText.
+        Initialize CodeToText instance.
         Args:
             root_path: Root directory to scan
@@ -61,89 +32,188 @@ class CodeToText:
             exclude_patterns: List of patterns to exclude (gitignore style)
             glob_patterns: List of glob patterns to include (e.g., '*.py', 'src/**/*.js')
             gitignore: Whether to respect .gitignore files
+            max_file_size_kb: Skip files larger than this size in KB
         """
         self.root_path = Path(root_path).resolve()
         self.output_file = output_file
-        self.include_extensions = include_extensions or self.DEFAULT_EXTENSIONS
-        self.exclude_patterns = exclude_patterns or []
         self.glob_patterns = glob_patterns or []
         self.gitignore = gitignore
+        self.max_file_size_kb = max_file_size_kb
         self.spec: PathSpec | None = None
         self.file_count = 0
+        self.skipped_files: list[tuple[Path, str]] = []
-        if self.gitignore:
-            self._load_gitignore()
+        config_dir = Path(__file__).parent
+        default_extensions = load_patterns_from_file(config_dir / ".extensions")
+        default_ignore = load_patterns_from_file(config_dir / ".ignore")
-    def _load_gitignore(self) -> None:
-        """Load .gitignore patterns if present."""
-        gitignore_path = self.root_path / ".gitignore"
-        patterns = list(self.DEFAULT_IGNORE)
+        self.include_extensions = include_extensions or default_extensions
+        self.exclude_patterns = exclude_patterns or []
+        self.default_ignore = default_ignore
-        if gitignore_path.exists():
-            with open(gitignore_path, encoding="utf-8") as f:
-                for line in f:
-                    line = line.strip()
-                    if line and not line.startswith("#"):
-                        patterns.append(line)
+        if self.gitignore:
+            self._init_pathspec()
+    def _init_pathspec(self) -> None:
+        """Initialize pathspec from .gitignore files and default patterns."""
+        patterns = list(self.default_ignore)
+        current_path = self.root_path
+        for _ in range(5):
+            gitignore_path = current_path / ".gitignore"
+            if gitignore_path.exists():
+                try:
+                    with open(gitignore_path, encoding="utf-8") as f:
+                        for line in f:
+                            clean_line = line.strip()
+                            if clean_line and not clean_line.startswith("#"):
+                                patterns.append(clean_line)
+                except Exception:
+                    pass
+            parent = current_path.parent
+            if parent == current_path:
+                break
+            current_path = parent
         patterns.extend(self.exclude_patterns)
         self.spec = pathspec.PathSpec.from_lines("gitignore", patterns)
-    def _matches_glob_pattern(self, file_path: Path) -> bool:
+    def _check_glob_match(self, file_path: Path) -> bool:
         """Check if file matches any glob pattern."""
         if not self.glob_patterns:
             return False
         relative_path = file_path.relative_to(self.root_path)
-        relative_str = str(relative_path)
+        path_str = str(relative_path)
         for pattern in self.glob_patterns:
-            if fnmatch(relative_str, pattern):
+            if fnmatch(path_str, pattern):
                 return True
             if fnmatch(file_path.name, pattern):
                 return True
+            if fnmatch(path_str.replace(os.sep, "/"), pattern):
+                return True
         return False
-    def _should_include_file(self, file_path: Path) -> bool:
-        """Check if a file should be included."""
+    def _check_file_inclusion(self, file_path: Path) -> bool:
+        """Determine if a file should be included in the output."""
+        if self.max_file_size_kb is not None:
+            try:
+                file_size_kb = file_path.stat().st_size / 1024
+                if file_size_kb > self.max_file_size_kb:
+                    self.skipped_files.append(
+                        (file_path, f"exceeds size limit ({file_size_kb:.1f}KB)")
+                    )
+                    return False
+            except Exception:
+                pass
         if self.glob_patterns:
-            if not self._matches_glob_pattern(file_path):
-                return False
-        else:
-            if file_path.suffix not in self.include_extensions:
+            if not self._check_glob_match(file_path):
                 return False
         if self.spec:
-            relative_path = file_path.relative_to(self.root_path)
-            if self.spec.match_file(str(relative_path)):
+            try:
+                relative_path = file_path.relative_to(self.root_path)
+                relative_str = str(relative_path).replace(os.sep, "/")
+                if self.spec.match_file(relative_str):
+                    self.skipped_files.append((file_path, "matches ignore pattern"))
+                    return False
+            except ValueError:
                 return False
+        if file_path.suffix not in self.include_extensions:
+            return False
         return True
-    def _get_files(self) -> list[Path]:
-        """Get all files to process."""
+    def _collect_files(self) -> list[Path]:
+        """Collect all files to process based on filters."""
         files = []
+        self.skipped_files = []
         for root, dirs, filenames in os.walk(self.root_path):
             root_path = Path(root)
             if self.spec:
-                relative_root = root_path.relative_to(self.root_path)
-                dirs[:] = [
-                    d for d in dirs
-                    if not self.spec.match_file(str(relative_root / d))
-                ]
+                try:
+                    relative_root = root_path.relative_to(self.root_path)
+                    root_str = str(relative_root).replace(os.sep, "/") if str(relative_root) != "." else ""
+                    filtered_dirs = []
+                    for d in dirs:
+                        dir_path = f"{root_str}/{d}" if root_str else d
+                        if not self.spec.match_file(dir_path) and not self.spec.match_file(f"{dir_path}/"):
+                            filtered_dirs.append(d)
+                    dirs[:] = filtered_dirs
+                except ValueError:
+                    pass
             for filename in filenames:
                 file_path = root_path / filename
-                if self._should_include_file(file_path):
+                if self._check_file_inclusion(file_path):
                     files.append(file_path)
         return sorted(files)
+    def calculate_statistics(self) -> dict[str, Any]:
+        """
+        Calculate statistics about the codebase.
+        Returns:
+            Dictionary containing total files, size, lines, breakdown by extension, etc.
+        """
+        files = self._collect_files()
+        stats: dict = {
+            "total_files": len(files),
+            "total_size_bytes": 0,
+            "total_lines": 0,
+            "by_extension": {},
+            "skipped_files": len(self.skipped_files),
+            "largest_files": [],
+        }
+        file_sizes = []
+        for file_path in files:
+            try:
+                size = file_path.stat().st_size
+                stats["total_size_bytes"] += size
+                file_sizes.append((file_path, size))
+                try:
+                    with open(file_path, encoding="utf-8") as f:
+                        lines = sum(1 for _ in f)
+                        stats["total_lines"] += lines
+                except Exception:
+                    pass
+                ext = file_path.suffix or "(no extension)"
+                if ext not in stats["by_extension"]:
+                    stats["by_extension"][ext] = {"count": 0, "size": 0}
+                stats["by_extension"][ext]["count"] += 1
+                stats["by_extension"][ext]["size"] += size
+            except Exception:
+                pass
+        file_sizes.sort(key=lambda x: x[1], reverse=True)
+        stats["largest_files"] = [
+            {"path": str(f.relative_to(self.root_path)), "size_kb": s / 1024}
+            for f, s in file_sizes[:10]
+        ]
+        return stats
     def generate_content(self, add_tree: bool = True, separator: str = "=" * 80) -> str:
         """
-        Generate content as string (for clipboard).
+        Generate content as string without writing to file.
         Args:
             add_tree: Whether to add directory tree at the beginning
@@ -152,43 +222,43 @@ class CodeToText:
         Returns:
             Generated content as string
         """
-        files = self._get_files()
+        files = self._collect_files()
         self.file_count = len(files)
-        lines = []
-        lines.append(f"Code Export from: {self.root_path}")
-        lines.append(f"Total files: {len(files)}")
-        lines.append(separator)
-        lines.append("")
+        output_lines = []
+        output_lines.append(f"Code Export from: {self.root_path}")
+        output_lines.append(f"Total files: {len(files)}")
+        output_lines.append(separator)
+        output_lines.append("")
         if add_tree:
-            lines.append("DIRECTORY TREE:")
-            lines.append(separator)
-            lines.append(self._generate_tree())
-            lines.append("")
-            lines.append(separator)
-            lines.append("")
-        for i, file_path in enumerate(files, 1):
+            output_lines.append("DIRECTORY TREE:")
+            output_lines.append(separator)
+            output_lines.append(self._build_tree_structure())
+            output_lines.append("")
+            output_lines.append(separator)
+            output_lines.append("")
+        for idx, file_path in enumerate(files, 1):
             relative_path = file_path.relative_to(self.root_path)
-            lines.append(f"FILE {i}/{len(files)}: {relative_path}")
-            lines.append(separator)
+            output_lines.append(f"FILE {idx}/{len(files)}: {relative_path}")
+            output_lines.append(separator)
             try:
                 with open(file_path, encoding="utf-8") as f:
                     content = f.read()
-                lines.append(content)
+                output_lines.append(content)
             except UnicodeDecodeError:
-                lines.append("[Binary file - skipped]")
+                output_lines.append("[Binary file - skipped]")
             except Exception as e:
-                lines.append(f"[Error reading file: {e}]")
+                output_lines.append(f"[Error reading file: {e}]")
-            lines.append("")
-            lines.append(separator)
-            lines.append("")
+            output_lines.append("")
+            output_lines.append(separator)
+            output_lines.append("")
-        return "\n".join(lines)
+        return "\n".join(output_lines)
     def convert(self, add_tree: bool = True, separator: str = "=" * 80) -> int:
         """
@@ -211,49 +281,49 @@ class CodeToText:
         return self.file_count
-    def _generate_tree(self) -> str:
-        """Generate a directory tree representation."""
-        tree_lines = []
-        files = self._get_files()
+    def _build_tree_structure(self) -> str:
+        """Build a directory tree representation of included files."""
+        tree_output = []
+        files = self._collect_files()
         if not files:
             return "(no files to display)"
-        dir_structure: dict[str, Any] = {}
+        structure: dict[str, Any] = {}
         for file_path in files:
             relative_path = file_path.relative_to(self.root_path)
             parts = relative_path.parts
-            current = dir_structure
+            current_level = structure
             for part in parts[:-1]:
-                if part not in current:
-                    current[part] = {}
-                current = current[part]
+                if part not in current_level:
+                    current_level[part] = {}
+                current_level = current_level[part]
-            if "__files__" not in current:
-                current["__files__"] = []
-            current["__files__"].append(parts[-1])
+            if "__files__" not in current_level:
+                current_level["__files__"] = []
+            current_level["__files__"].append(parts[-1])
-        def print_tree(structure: dict[str, Any], prefix: str = "", is_last: bool = True) -> None:
-            items = []
-            for key in sorted(structure.keys()):
+        def render_tree(node: dict[str, Any], prefix: str = "", is_final: bool = True) -> None:
+            entries = []
+            for key in sorted(node.keys()):
                 if key != "__files__":
-                    items.append((key, True))  # directory
+                    entries.append((key, True))
-            if "__files__" in structure:
-                for file in sorted(structure["__files__"]):
-                    items.append((file, False))  # file
+            if "__files__" in node:
+                for file in sorted(node["__files__"]):
+                    entries.append((file, False))
-            for i, (name, is_dir) in enumerate(items):
-                is_last_item = i == len(items) - 1
-                connector = "└── " if is_last_item else "├── "
-                tree_lines.append(f"{prefix}{connector}{name}{'/' if is_dir else ''}")
+            for i, (name, is_directory) in enumerate(entries):
+                is_last_entry = i == len(entries) - 1
+                connector = "└── " if is_last_entry else "├── "
+                tree_output.append(f"{prefix}{connector}{name}{'/' if is_directory else ''}")
-                if is_dir:
-                    extension = "    " if is_last_item else "│   "
-                    print_tree(structure[name], prefix + extension, is_last_item)
+                if is_directory:
+                    extension = "    " if is_last_entry else "│   "
+                    render_tree(node[name], prefix + extension, is_last_entry)
-        tree_lines.append(f"{self.root_path.name}/")
-        print_tree(dir_structure)
+        tree_output.append(f"{self.root_path.name}/")
+        render_tree(structure)
-        return "\n".join(tree_lines)
+        return "\n".join(tree_output)

code_to_txt/config.py CHANGED Viewed

@@ -4,8 +4,8 @@ from typing import Any
 import yaml
 DEFAULT_CONFIG = {
-    "output": "code_output.txt",
-    "extensions": None,  # None means use defaults
+    "output": "code-to-txt.txt",
+    "extensions": None,
     "exclude": [
         "tests/*",
         "*.test.js",
@@ -13,13 +13,14 @@ DEFAULT_CONFIG = {
         "*.spec.js",
         "*.spec.ts",
     ],
-    "glob": [],  # e.g., ["*.py", "src/**/*.js"]
+    "glob": [],
     "no_gitignore": False,
     "no_tree": False,
     "separator": "=" * 80,
     "clipboard": False,
     "clipboard_only": False,
-    "timestamp": False,
+    "timestamp": True,
+    "max_file_size": None,
 }
@@ -31,7 +32,7 @@ def load_config(config_path: str) -> dict[str, Any]:
         config_path: Path to the configuration file
     Returns:
-        Dictionary with configuration values
+        Dictionary with validated configuration values
     """
     path = Path(config_path)
@@ -75,6 +76,9 @@ def load_config(config_path: str) -> dict[str, Any]:
         if field in config:
             validated_config[field] = bool(config[field])
+    if "max_file_size" in config and config["max_file_size"] is not None:
+        validated_config["max_file_size"] = int(config["max_file_size"])
     return validated_config
@@ -91,7 +95,7 @@ def create_default_config(config_path: Path) -> None:
 # Output file name (supports strftime formatting)
 # Use timestamp: true to automatically add timestamp
-output: code_output.txt
+output: code-to-txt.txt
 # File extensions to include
 # Can be a list or space/comma-separated string
@@ -136,7 +140,9 @@ clipboard: false
 clipboard_only: false
 # Add timestamp to output filename
-timestamp: false
+timestamp: true
+max_file_size: null
 # Example configurations:
 #

code_to_txt/utils.py ADDED Viewed

@@ -0,0 +1,13 @@
+from pathlib import Path
+def load_patterns_from_file(file_path: Path) -> set[str]:
+    """Load patterns from a text file, one per line."""
+    patterns = set()
+    if file_path.exists():
+        with open(file_path, encoding="utf-8") as f:
+            for line in f:
+                stripped = line.strip()
+                if stripped and not stripped.startswith("#"):
+                    patterns.add(stripped)
+    return patterns

{code_to_txt-0.2.0.dist-info → code_to_txt-0.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: code-to-txt
-Version: 0.2.0
+Version: 0.3.0
 Summary: Convert code files to a single text file for LLM consumption
 License: MIT
 License-File: LICENSE
@@ -28,15 +28,6 @@ Models (LLMs) or for easy code review and documentation.
 ## Features
-✨ **New in v0.2.0:**
-- 🕐 **Automatic timestamps** in output filenames
-- 📋 **Clipboard support** - copy output directly to clipboard
-- 🎯 **Better extension handling** - specify multiple extensions without repeating `-e` flag
-- 🔍 **Glob pattern support** - use patterns like `*.py` or `src/**/*.js`
-- ⚙️ **Configuration file support** - save your preferences in `.code-to-txt.yml`
-- 🚀 **Enhanced defaults** - more file types and ignore patterns out of the box
 **Core Features:**
 - 📁 Convert entire directories of code into a single text file
@@ -63,9 +54,18 @@ poetry add code-to-txt
 ### Basic Usage
 ```bash
-# Convert all code files in current directory with timestamp
+# Show version
+code-to-txt --version
+# Convert all code files with timestamp
 code-to-txt -t
+# Preview what would be processed
+code-to-txt --dry-run
+# Get codebase statistics
+code-to-txt --stats
 # Convert specific directory
 code-to-txt ./my-project -o project.txt
@@ -88,6 +88,9 @@ code-to-txt -g "*.py" -g "*.md"
 ### Advanced Usage
 ```bash
+# Limit file sizes (useful for LLM token limits)
+code-to-txt --max-file-size 500
 # Exclude patterns
 code-to-txt -x "tests/*" -x "*.test.js"
@@ -116,7 +119,7 @@ This creates `.code-to-txt.yml` with default settings:
 ```yaml
 # Output file name
-output: codetotxt.txt
+output: code-to-txt.txt
 # File extensions to include (null = use defaults)
 extensions: null
@@ -125,7 +128,12 @@ extensions: null
 exclude:
   - "tests/*"
   - "*.test.js"
+  - "*.test.ts"
+  - "*.spec.js"
+  - "*.spec.ts"
   - "node_modules/*"
+  - "__pycache__/*"
+  - "*.pyc"
 # Glob patterns (alternative to extensions)
 glob: [ ]
@@ -137,6 +145,7 @@ separator: "================"
 clipboard: false
 clipboard_only: false
 timestamp: false
+max_file_size: null
 ```
 Use the config file:
@@ -155,6 +164,7 @@ code-to-txt --config .code-to-txt.yml
 extensions: [ .py ]
 exclude: [ "tests/*", "*.pyc", "__pycache__/*", "venv/*", ".venv/*" ]
 timestamp: true
+max_file_size: 500
 ```
 **JavaScript/TypeScript Project:**
@@ -163,20 +173,18 @@ timestamp: true
 extensions: [ .js, .ts, .jsx, .tsx ]
 exclude: [ "node_modules/*", "dist/*", "build/*", "*.test.js", "*.spec.ts" ]
 no_tree: false
+max_file_size: 1000
 ```
-**C/C++ Project:**
-```yaml
-extensions: [ .c, .cpp, .h, .hpp ]
-exclude: [ "build/*", "*.o", "*.a", "cmake-build-*" ]
-```
-**Using Glob Patterns:**
+**LLM-Optimized:**
 ```yaml
-glob: [ "src/**/*.py", "lib/**/*.py", "*.md" ]
-extensions: null  # Ignore extensions when using glob
+extensions: [ .py, .js, .md ]
+exclude: [ "tests/*", "*.test.*", "node_modules/*", "dist/*", "build/*" ]
+timestamp: true
+clipboard: true
+max_file_size: 200
+no_tree: false
 ```
 ## Command Line Options
@@ -194,12 +202,16 @@ Options:
   -g, --glob TEXT         Glob patterns to include (can be used multiple times)
   --no-gitignore          Don't respect .gitignore files
   --no-tree               Don't include directory tree in output
-  --separator TEXT        Separator between files (default: ====...)
+  --separator TEXT        Separator between files
   -c, --clipboard         Copy output to clipboard in addition to file
   --clipboard-only        Copy to clipboard only (don't save file)
   --config PATH           Path to config file (.yml or .yaml)
   --init-config           Create default configuration file
   -t, --timestamp         Add timestamp to output filename
+  -v, --version           Show version and exit
+  --dry-run               Show which files would be processed
+  --stats                 Show detailed statistics
+  --max-file-size INT     Skip files larger than N KB
   --help                  Show this message and exit
 ```
@@ -210,15 +222,13 @@ Options:
 ```python
 from code_to_txt import CodeToText
-# Create instance
-code_to_text = CodeToText(
+code_to_txt = CodeToText(
     root_path="./my-project",
     output_file="output.txt",
     include_extensions={".py", ".js"},
 )
-# Convert to file
-num_files = code_to_text.convert(add_tree=True)
+num_files = code_to_txt.convert(add_tree=True)
 print(f"Processed {num_files} files")
 ```
@@ -226,54 +236,47 @@ print(f"Processed {num_files} files")
 ```python
 from code_to_txt import CodeToText
+import pyperclip
-# Generate content without writing to file
-code_to_text = CodeToText(
+code_to_txt = CodeToText(
     root_path="./my-project",
-    output_file=None,  # No file needed
+    output_file=None,
     include_extensions={".py"},
 )
-content = code_to_text.generate_content(add_tree=True)
-print(f"Generated {len(content)} characters")
-# Copy to clipboard using pyperclip
-import pyperclip
+content = code_to_txt.generate_content(add_tree=True)
 pyperclip.copy(content)
 ```
-### Using Glob Patterns
+### Get Statistics
 ```python
 from code_to_txt import CodeToText
-code_to_text = CodeToText(
+code_to_txt = CodeToText(
     root_path="./my-project",
-    output_file="output.txt",
-    glob_patterns=["*.py", "src/**/*.js", "**/*.md"],
+    output_file=None,
+    max_file_size_kb=500,
 )
-num_files = code_to_text.convert()
+stats = code_to_txt.calculate_statistics()
+print(f"Total files: {stats['total_files']}")
+print(f"Total size: {stats['total_size_bytes'] / 1024 / 1024:.2f} MB")
+print(f"Total lines: {stats['total_lines']:,}")
 ```
-### Advanced Configuration
+### Using Glob Patterns
 ```python
 from code_to_txt import CodeToText
-code_to_text = CodeToText(
+code_to_txt = CodeToText(
     root_path="./my-project",
-    output_file="detailed_output.txt",
-    include_extensions={".py", ".js", ".ts"},
-    exclude_patterns=["tests/*", "*.test.js", "node_modules/*"],
-    gitignore=True,  # Respect .gitignore (default)
+    output_file="output.txt",
+    glob_patterns=["*.py", "src/**/*.js", "**/*.md"],
 )
-num_files = code_to_text.convert(
-    add_tree=True,
-    separator="=" * 100,
-)
+num_files = code_to_txt.convert()
 ```
 ## Default File Extensions
@@ -301,7 +304,7 @@ CodeToTxt automatically ignores common build artifacts and dependencies:
 - `.pytest_cache`, `.mypy_cache`, `.ruff_cache`
 - `*.so`, `*.dylib`, `*.dll`
-Plus any patterns in your `.gitignore` file.
+Plus any patterns in your `.gitignore` file (including parent directories).
 ## Output Format
@@ -353,34 +356,43 @@ if __name__ == "__main__":
 ## Tips & Tricks
-### For Large Projects
+### For LLM Consumption
 ```bash
-# Use specific extensions to reduce size
-code-to-txt -e ".py" -t
+# Step 1: Check what you're working with
+code-to-txt --stats
-# Exclude heavy directories
-code-to-txt -x "node_modules/*" -x "venv/*" -x "dist/*"
+# Step 2: Preview files
+code-to-txt --dry-run --max-file-size 200
+# Step 3: Copy to clipboard with size limit
+code-to-txt --clipboard-only --max-file-size 200 -e ".py .md"
+# See token estimate:
+# Estimated tokens: ~95,000
 ```
-### For LLM Consumption
+### For Large Projects
 ```bash
-# Copy directly to clipboard for pasting into ChatGPT/Claude
-code-to-txt --clipboard-only -e ".py .md"
+# Use specific extensions to reduce size
+code-to-txt -e ".py" -t --max-file-size 500
-# Or save and copy
-code-to-txt -t -c -e ".py .js"
+# Exclude heavy directories
+code-to-txt -x "node_modules/*" -x "venv/*" -x "dist/*"
+# Get statistics first
+code-to-txt --stats --max-file-size 300
 ```
-### For Specific Features
+### Debug Ignore Patterns
 ```bash
-# Only include source files, exclude tests
-code-to-txt -g "src/**/*.py" -g "lib/**/*.py"
+# See which files are being skipped and why
+code-to-txt --dry-run
-# Only documentation
-code-to-txt -e ".md .rst .txt"
+# Compare with and without gitignore
+code-to-txt --dry-run --no-gitignore
 ```
 ## Requirements
@@ -416,6 +428,20 @@ MIT License - see LICENSE file for details.
 ## Changelog
+### v0.3.0
+- 🔧 Refactored codebase for better maintainability
+- 📁 Externalized default extensions and ignore patterns to separate files
+- 🐛 Fixed critical gitignore bug (now checks parent directories)
+- 🔍 Improved cross-platform path handling
+- 📊 Added `--stats` flag for detailed codebase statistics
+- 🎯 Added `--dry-run` mode to preview without processing
+- 📏 Added `--max-file-size` to skip large files
+- 🔢 Added token estimation for LLM consumption
+- 📝 Added skip tracking to see which files were excluded
+- 🚀 Improved method naming and code structure
+- ✅ Enhanced test coverage
 ### v0.2.0
 - ✨ Added automatic timestamp generation for output files

code_to_txt-0.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+code_to_txt/.extensions,sha256=wmqH99IE9LSVPBQjOlmFH7e32aBhW-Gyx5pUk_aHTTw,164
+code_to_txt/.ignore,sha256=h-2N-vrqYosVthADpYPSMwvHmZJXhdr9sUutlPtoEyw,151
+code_to_txt/__init__.py,sha256=0BlnuJBBoiatWYgynf7iIw8LnMl-vyksXOwiSxLg7AI,84
+code_to_txt/cli.py,sha256=Gg45vpewnQWZcQmbWgArMaXa6HYovCL7BK_nDdgQKqg,9633
+code_to_txt/code_to_txt.py,sha256=h9UXYUdbXbPd4vaGn-EhgxSUlCGtN-JdTLw86lIakYE,11607
+code_to_txt/config.py,sha256=DRjZ5uLXYbSwfTu36dGDbVUagYSMDhiw6TKgjAQkMU8,4292
+code_to_txt/utils.py,sha256=K-eKT05eTCgkWuRDwSzPdmcmMZECRB4gubabO2vOgVE,434
+code_to_txt-0.3.0.dist-info/METADATA,sha256=CTNP9Yjp9F7FeG7gakv2Lk1jO_da5jaKmgSNWMXAtNQ,11160
+code_to_txt-0.3.0.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
+code_to_txt-0.3.0.dist-info/entry_points.txt,sha256=jPT0g_nryiuAd0E496deFZAhdscNLXiUmUdD3KGN3iA,52
+code_to_txt-0.3.0.dist-info/licenses/LICENSE,sha256=-K4fNS51V7AiwILLB_InW4EECFSbFrrOBd66OqVVyh4,1068
+code_to_txt-0.3.0.dist-info/RECORD,,

code_to_txt-0.2.0.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-code_to_txt/__init__.py,sha256=0_iks7Uz24B1pc-Na1n8C97vgMms6haaFNqIRkpq_Cg,62
-code_to_txt/cli.py,sha256=5NEXWGts1JBSXpAWsgrAfz9O0YFHzu6uRPf4NUePOj0,6662
-code_to_txt/code_to_txt.py,sha256=ZCqc-Yk-hc5AexDIGaphCe_2Ck3LhfVeQP8-pDVRCec,8417
-code_to_txt/config.py,sha256=KMlpeKO0F8YRbEmlXMnCs_PrR3iYQNTYOgZISZfCzVU,4148
-code_to_txt-0.2.0.dist-info/METADATA,sha256=AS-XxI1i8Au96Y1_y04nhgY2U6A8whMEshnGHUfHNgc,10519
-code_to_txt-0.2.0.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
-code_to_txt-0.2.0.dist-info/entry_points.txt,sha256=jPT0g_nryiuAd0E496deFZAhdscNLXiUmUdD3KGN3iA,52
-code_to_txt-0.2.0.dist-info/licenses/LICENSE,sha256=-K4fNS51V7AiwILLB_InW4EECFSbFrrOBd66OqVVyh4,1068
-code_to_txt-0.2.0.dist-info/RECORD,,

{code_to_txt-0.2.0.dist-info → code_to_txt-0.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{code_to_txt-0.2.0.dist-info → code_to_txt-0.3.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{code_to_txt-0.2.0.dist-info → code_to_txt-0.3.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

code-to-txt 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

code-to-txt 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl