PyPI - ai-codeindex - Versions diffs - 0.7.0__py3-none-any.whl - Mend

ai-codeindex 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

ai_codeindex-0.7.0.dist-info/METADATA +966 -0
ai_codeindex-0.7.0.dist-info/RECORD +41 -0
ai_codeindex-0.7.0.dist-info/WHEEL +4 -0
ai_codeindex-0.7.0.dist-info/entry_points.txt +2 -0
ai_codeindex-0.7.0.dist-info/licenses/LICENSE +21 -0
codeindex/README_AI.md +767 -0
codeindex/__init__.py +11 -0
codeindex/adaptive_config.py +83 -0
codeindex/adaptive_selector.py +171 -0
codeindex/ai_helper.py +48 -0
codeindex/cli.py +40 -0
codeindex/cli_common.py +10 -0
codeindex/cli_config.py +97 -0
codeindex/cli_docs.py +66 -0
codeindex/cli_hooks.py +765 -0
codeindex/cli_scan.py +562 -0
codeindex/cli_symbols.py +295 -0
codeindex/cli_tech_debt.py +238 -0
codeindex/config.py +479 -0
codeindex/directory_tree.py +229 -0
codeindex/docstring_processor.py +342 -0
codeindex/errors.py +62 -0
codeindex/extractors/__init__.py +9 -0
codeindex/extractors/thinkphp.py +132 -0
codeindex/file_classifier.py +148 -0
codeindex/framework_detect.py +323 -0
codeindex/hierarchical.py +428 -0
codeindex/incremental.py +278 -0
codeindex/invoker.py +260 -0
codeindex/parallel.py +155 -0
codeindex/parser.py +740 -0
codeindex/route_extractor.py +98 -0
codeindex/route_registry.py +77 -0
codeindex/scanner.py +167 -0
codeindex/semantic_extractor.py +408 -0
codeindex/smart_writer.py +737 -0
codeindex/symbol_index.py +199 -0
codeindex/symbol_scorer.py +283 -0
codeindex/tech_debt.py +619 -0
codeindex/tech_debt_formatters.py +234 -0
codeindex/writer.py +164 -0

codeindex/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""
+codeindex - AI-native code indexing tool for large codebases
+Usage:
+    codeindex scan <path>     # Scan a directory and generate README_AI.md
+    codeindex init            # Initialize .codeindex.yaml
+    codeindex status          # Show indexing status
+"""
+__version__ = "0.7.0"
+__all__ = ["__version__"]

codeindex/adaptive_config.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""Adaptive symbols configuration.
+This module defines the configuration structure for adaptive symbol extraction,
+which allows dynamically adjusting the number of symbols to extract based on
+file size and other factors.
+"""
+from dataclasses import dataclass, field
+@dataclass
+class AdaptiveSymbolsConfig:
+    """Configuration for adaptive symbol extraction.
+    Adaptive symbol extraction adjusts the number of symbols to display in
+    README_AI.md files based on file size, ensuring better information coverage
+    for large files while keeping smaller files concise.
+    Attributes:
+        enabled: Whether adaptive symbol extraction is enabled. If False,
+                the traditional max_per_file setting is used.
+        thresholds: File size thresholds (in lines) for categorizing files.
+                   Keys: tiny, small, medium, large, xlarge, huge
+                   Values: Line count thresholds
+        limits: Symbol count limits for each file size category.
+               Keys: tiny, small, medium, large, xlarge, huge, mega
+               Values: Maximum symbols to display
+        min_symbols: Minimum number of symbols to display, regardless of
+                    file size or other factors.
+        max_symbols: Maximum number of symbols to display, regardless of
+                    file size or other factors.
+    Example:
+        >>> config = AdaptiveSymbolsConfig(
+        ...     enabled=True,
+        ...     thresholds={"small": 200, "medium": 500, "large": 1000},
+        ...     limits={"small": 15, "medium": 30, "large": 50},
+        ... )
+        >>> config.enabled
+        True
+        >>> config.limits["medium"]
+        30
+    File Size Categories:
+        - tiny: < thresholds["tiny"] lines
+        - small: < thresholds["small"] lines
+        - medium: < thresholds["medium"] lines
+        - large: < thresholds["large"] lines
+        - xlarge: < thresholds["xlarge"] lines
+        - huge: < thresholds["huge"] lines
+        - mega: >= thresholds["huge"] lines
+    """
+    enabled: bool = False
+    thresholds: dict[str, int] = field(default_factory=dict)
+    limits: dict[str, int] = field(default_factory=dict)
+    min_symbols: int = 5
+    max_symbols: int = 200
+# Default configuration matching the planning document
+DEFAULT_ADAPTIVE_CONFIG = AdaptiveSymbolsConfig(
+    enabled=False,  # Disabled by default for backward compatibility
+    thresholds={
+        "tiny": 100,
+        "small": 200,
+        "medium": 500,
+        "large": 1000,
+        "xlarge": 2000,
+        "huge": 5000,
+    },
+    limits={
+        "tiny": 10,
+        "small": 15,
+        "medium": 30,
+        "large": 50,
+        "xlarge": 80,
+        "huge": 120,
+        "mega": 150,  # For files > 5000 lines
+    },
+    min_symbols=5,
+    max_symbols=200,
+)

codeindex/adaptive_selector.py ADDED Viewed

@@ -0,0 +1,171 @@
+"""Adaptive symbol selector for dynamic symbol limit calculation.
+This module implements the core algorithm for adaptive symbol extraction,
+which adjusts the number of symbols to display based on file size.
+"""
+from codeindex.adaptive_config import DEFAULT_ADAPTIVE_CONFIG, AdaptiveSymbolsConfig
+class AdaptiveSymbolSelector:
+    """Selects appropriate symbol limit based on file size.
+    This selector implements a tiered approach where larger files get
+    more symbols displayed, improving information coverage while keeping
+    smaller files concise.
+    The algorithm works in three steps:
+    1. Determine file size category (tiny/small/medium/large/xlarge/huge/mega)
+    2. Get configured symbol limit for that category
+    3. Apply constraints (min/max symbols, total available symbols)
+    Attributes:
+        config: AdaptiveSymbolsConfig instance controlling the selection logic
+    Example:
+        >>> selector = AdaptiveSymbolSelector()
+        >>> limit = selector.calculate_limit(8891, 57)  # 8891 lines, 57 symbols
+        >>> print(limit)  # Returns 57 (mega category limit=150, but only 57 symbols)
+        57
+        >>> limit = selector.calculate_limit(500, 100)  # 500 lines, 100 symbols
+        >>> print(limit)  # Returns 50 (large category limit=50)
+        50
+    """
+    def __init__(self, config: AdaptiveSymbolsConfig | None = None):
+        """Initialize selector with optional configuration.
+        Args:
+            config: AdaptiveSymbolsConfig instance. If None, uses DEFAULT_ADAPTIVE_CONFIG.
+        """
+        if config is None:
+            # Use default config, creating a copy to avoid mutation
+            self.config = AdaptiveSymbolsConfig(
+                enabled=DEFAULT_ADAPTIVE_CONFIG.enabled,
+                thresholds=DEFAULT_ADAPTIVE_CONFIG.thresholds.copy(),
+                limits=DEFAULT_ADAPTIVE_CONFIG.limits.copy(),
+                min_symbols=DEFAULT_ADAPTIVE_CONFIG.min_symbols,
+                max_symbols=DEFAULT_ADAPTIVE_CONFIG.max_symbols,
+            )
+        else:
+            # Merge custom config with defaults to ensure all fields are present
+            self.config = AdaptiveSymbolsConfig(
+                enabled=config.enabled,
+                thresholds={**DEFAULT_ADAPTIVE_CONFIG.thresholds, **config.thresholds},
+                limits={**DEFAULT_ADAPTIVE_CONFIG.limits, **config.limits},
+                min_symbols=config.min_symbols,
+                max_symbols=config.max_symbols,
+            )
+    def calculate_limit(self, file_lines: int, total_symbols: int) -> int:
+        """Calculate appropriate symbol limit for a file.
+        This is the main entry point for the adaptive selection algorithm.
+        Args:
+            file_lines: Number of lines in the file
+            total_symbols: Total number of symbols available in the file
+        Returns:
+            int: Number of symbols to display (between min_symbols and max_symbols)
+        Example:
+            >>> selector = AdaptiveSymbolSelector()
+            >>> selector.calculate_limit(100, 20)  # small file, 20 symbols
+            15
+            >>> selector.calculate_limit(10000, 200)  # huge file, 200 symbols
+            150
+        """
+        # Step 1: Determine file size category
+        category = self._determine_size_category(file_lines)
+        # Step 2: Get configured limit for this category
+        limit = self.config.limits[category]
+        # Step 3: Apply constraints
+        limit = self._apply_constraints(limit, total_symbols)
+        return limit
+    def _determine_size_category(self, lines: int) -> str:
+        """Determine file size category based on line count.
+        Categories are determined by comparing against configured thresholds:
+        - tiny: < thresholds["tiny"] (default: <100)
+        - small: < thresholds["small"] (default: 100-199)
+        - medium: < thresholds["medium"] (default: 200-499)
+        - large: < thresholds["large"] (default: 500-999)
+        - xlarge: < thresholds["xlarge"] (default: 1000-1999)
+        - huge: < thresholds["huge"] (default: 2000-4999)
+        - mega: >= thresholds["huge"] (default: >=5000)
+        Args:
+            lines: Number of lines in the file
+        Returns:
+            str: Size category name
+        Example:
+            >>> selector = AdaptiveSymbolSelector()
+            >>> selector._determine_size_category(50)
+            'tiny'
+            >>> selector._determine_size_category(150)
+            'small'
+            >>> selector._determine_size_category(8891)
+            'mega'
+        """
+        thresholds = self.config.thresholds
+        if lines < thresholds["tiny"]:
+            return "tiny"
+        elif lines < thresholds["small"]:
+            return "small"
+        elif lines < thresholds["medium"]:
+            return "medium"
+        elif lines < thresholds["large"]:
+            return "large"
+        elif lines < thresholds["xlarge"]:
+            return "xlarge"
+        elif lines < thresholds["huge"]:
+            return "huge"
+        else:
+            return "mega"
+    def _apply_constraints(self, limit: int, total_symbols: int) -> int:
+        """Apply constraints to ensure limit is valid.
+        Constraints applied:
+        1. Not exceed total_symbols (can't display more symbols than available)
+        2. Not less than min_symbols (only if total_symbols >= min_symbols)
+        3. Not exceed max_symbols (prevent overly long README files)
+        Args:
+            limit: Calculated limit from category
+            total_symbols: Total symbols available in the file
+        Returns:
+            int: Constrained limit
+        Example:
+            >>> selector = AdaptiveSymbolSelector()
+            >>> selector._apply_constraints(50, 30)  # Want 50, but only 30 available
+            30
+            >>> selector._apply_constraints(250, 300)  # Want 250, but max is 200
+            200
+            >>> selector._apply_constraints(3, 100)  # Want 3, but min is 5
+            5
+            >>> selector._apply_constraints(10, 1)  # Want 10, but only 1 available
+            1
+        """
+        # Constraint 1: Don't exceed available symbols (hard constraint)
+        limit = min(limit, total_symbols)
+        # Constraint 2: Respect minimum (only if we have enough symbols)
+        # If total_symbols < min_symbols, we can't enforce the minimum
+        if total_symbols >= self.config.min_symbols:
+            limit = max(limit, self.config.min_symbols)
+        # Constraint 3: Respect maximum
+        limit = min(limit, self.config.max_symbols)
+        return limit

codeindex/ai_helper.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""AI enhancement helper functions (Epic 4 Story 4.1).
+This module provides reusable functions for AI enhancement operations,
+eliminating code duplication in scan and scan-all commands.
+"""
+from pathlib import Path
+from codeindex.parser import ParseResult
+def aggregate_parse_results(
+    parse_results: list[ParseResult],
+    path: Path,
+) -> ParseResult:
+    """Aggregate multiple parse results into one.
+    Combines symbols and line counts from multiple parse results into a single
+    ParseResult. This is useful for analyzing multi-file directories.
+    Args:
+        parse_results: List of parse results to aggregate
+        path: Path for the aggregated result
+    Returns:
+        ParseResult with combined symbols and total line count
+    Example:
+        >>> pr1 = ParseResult(Path("a.py"), file_lines=100, symbols=[...])
+        >>> pr2 = ParseResult(Path("b.py"), file_lines=200, symbols=[...])
+        >>> aggregated = aggregate_parse_results([pr1, pr2], Path("dir"))
+        >>> aggregated.file_lines
+        300
+    """
+    all_symbols = []
+    total_lines = 0
+    for pr in parse_results:
+        all_symbols.extend(pr.symbols)
+        total_lines += pr.file_lines
+    return ParseResult(
+        path=path,
+        file_lines=total_lines,
+        symbols=all_symbols,
+    )

codeindex/cli.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""CLI entry point for codeindex.
+This module serves as the main entry point for the codeindex CLI tool.
+It imports and registers commands from specialized modules to keep the
+codebase organized and maintainable.
+"""
+import click
+from .cli_config import init, list_dirs, status
+from .cli_docs import docs
+from .cli_hooks import hooks
+from .cli_scan import scan, scan_all
+from .cli_symbols import affected, index, symbols
+from .cli_tech_debt import tech_debt
+@click.group()
+@click.version_option()
+def main():
+    """codeindex - AI-native code indexing tool for large codebases."""
+    pass
+# Register commands from specialized modules
+main.add_command(scan)
+main.add_command(scan_all)
+main.add_command(init)
+main.add_command(status)
+main.add_command(list_dirs)
+main.add_command(index)
+main.add_command(symbols)
+main.add_command(affected)
+main.add_command(tech_debt)
+main.add_command(hooks)
+main.add_command(docs)
+if __name__ == "__main__":
+    main()

codeindex/cli_common.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""Common utilities for CLI modules.
+This module provides shared resources used across all CLI command modules,
+such as the Rich console instance for formatted output.
+"""
+from rich.console import Console
+# Shared console instance for all CLI commands
+console = Console()

codeindex/cli_config.py ADDED Viewed

@@ -0,0 +1,97 @@
+"""CLI commands for configuration and project status.
+This module provides commands for initializing configuration files,
+checking indexing status, and listing indexable directories.
+"""
+from pathlib import Path
+import click
+from rich.table import Table
+from .cli_common import console
+from .config import DEFAULT_CONFIG_NAME, Config
+from .scanner import find_all_directories
+@click.command()
+@click.option("--force", "-f", is_flag=True, help="Overwrite existing config")
+def init(force: bool):
+    """Initialize .codeindex.yaml configuration file."""
+    config_path = Path.cwd() / DEFAULT_CONFIG_NAME
+    if config_path.exists() and not force:
+        console.print(f"[yellow]Config already exists:[/yellow] {config_path}")
+        console.print("Use --force to overwrite")
+        return
+    created_path = Config.create_default()
+    console.print(f"[green]Created:[/green] {created_path}")
+    console.print("\nEdit this file to configure:")
+    console.print("  - ai_command: Your AI CLI command")
+    console.print("  - include/exclude: Directories to scan")
+@click.command()
+@click.option("--root", type=click.Path(exists=True, file_okay=False, path_type=Path), default=".")
+def status(root: Path):
+    """Show indexing status for the project."""
+    root = root.resolve()
+    config = Config.load()
+    console.print(f"[bold]Project:[/bold] {root}")
+    console.print(f"[bold]Config:[/bold] {DEFAULT_CONFIG_NAME}")
+    # Find all directories that should be indexed
+    dirs = find_all_directories(root, config)
+    if not dirs:
+        console.print("[yellow]No indexable directories found[/yellow]")
+        return
+    # Check which have README_AI.md
+    indexed = []
+    not_indexed = []
+    for d in dirs:
+        readme_path = d / config.output_file
+        if readme_path.exists():
+            indexed.append(d)
+        else:
+            not_indexed.append(d)
+    # Display table
+    table = Table(title="Indexing Status")
+    table.add_column("Status", style="bold")
+    table.add_column("Count")
+    table.add_column("Percentage")
+    total = len(dirs)
+    indexed_count = len(indexed)
+    coverage = (indexed_count / total * 100) if total > 0 else 0
+    table.add_row("[green]Indexed[/green]", str(indexed_count), f"{coverage:.1f}%")
+    table.add_row("[yellow]Not indexed[/yellow]", str(len(not_indexed)), f"{100-coverage:.1f}%")
+    table.add_row("Total", str(total), "100%")
+    console.print(table)
+    if not_indexed and len(not_indexed) <= 10:
+        console.print("\n[dim]Not indexed:[/dim]")
+        for d in not_indexed[:10]:
+            rel = d.relative_to(root)
+            console.print(f"  {rel}")
+@click.command()
+@click.option("--root", type=click.Path(exists=True, file_okay=False, path_type=Path), default=".")
+def list_dirs(root: Path):
+    """List all directories that would be indexed."""
+    root = root.resolve()
+    config = Config.load()
+    dirs = find_all_directories(root, config)
+    for d in dirs:
+        rel = d.relative_to(root)
+        print(rel)

codeindex/cli_docs.py ADDED Viewed

@@ -0,0 +1,66 @@
+"""Documentation CLI commands for codeindex."""
+from pathlib import Path
+import click
+from .cli_common import console
+@click.group()
+def docs():
+    """Show codeindex documentation."""
+    pass
+@docs.command()
+def show_ai_guide():
+    """
+    Show AI integration guide for Git Hooks.
+    This command outputs the complete guide that AI Code tools can read
+    to understand codeindex Git Hooks and update user project documentation.
+    Usage:
+        codeindex docs show-ai-guide
+    """
+    # Get the installed package location
+    package_dir = Path(__file__).parent.parent.parent
+    guide_path = package_dir / "examples" / "ai-integration-guide.md"
+    if not guide_path.exists():
+        console.print(
+            "[red]Error:[/red] AI integration guide not found.\n"
+            f"Expected at: {guide_path}\n\n"
+            "Please reinstall codeindex or check installation.",
+            style="red",
+        )
+        raise click.Abort()
+    # Read and output the guide
+    content = guide_path.read_text()
+    console.print(
+        "\n[bold cyan]═══════════════════════════════════════════════════[/bold cyan]"
+    )
+    console.print(
+        "[bold cyan]  AI Integration Guide: codeindex Git Hooks[/bold cyan]"
+    )
+    console.print(
+        "[bold cyan]═══════════════════════════════════════════════════[/bold cyan]\n"
+    )
+    console.print(content)
+    console.print(
+        "\n[bold cyan]═══════════════════════════════════════════════════[/bold cyan]"
+    )
+    console.print(
+        "[dim]Tip: Your AI Code can read this output to understand Git Hooks[/dim]"
+    )
+    console.print(
+        "[dim]Run: codeindex docs show-ai-guide > guide.md (to save to file)[/dim]"
+    )
+    console.print(
+        "[bold cyan]═══════════════════════════════════════════════════[/bold cyan]\n"
+    )