PyPI - ai-codeindex - Versions diffs - 0.7.0__py3-none-any.whl - Mend

ai-codeindex 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

ai_codeindex-0.7.0.dist-info/METADATA +966 -0
ai_codeindex-0.7.0.dist-info/RECORD +41 -0
ai_codeindex-0.7.0.dist-info/WHEEL +4 -0
ai_codeindex-0.7.0.dist-info/entry_points.txt +2 -0
ai_codeindex-0.7.0.dist-info/licenses/LICENSE +21 -0
codeindex/README_AI.md +767 -0
codeindex/__init__.py +11 -0
codeindex/adaptive_config.py +83 -0
codeindex/adaptive_selector.py +171 -0
codeindex/ai_helper.py +48 -0
codeindex/cli.py +40 -0
codeindex/cli_common.py +10 -0
codeindex/cli_config.py +97 -0
codeindex/cli_docs.py +66 -0
codeindex/cli_hooks.py +765 -0
codeindex/cli_scan.py +562 -0
codeindex/cli_symbols.py +295 -0
codeindex/cli_tech_debt.py +238 -0
codeindex/config.py +479 -0
codeindex/directory_tree.py +229 -0
codeindex/docstring_processor.py +342 -0
codeindex/errors.py +62 -0
codeindex/extractors/__init__.py +9 -0
codeindex/extractors/thinkphp.py +132 -0
codeindex/file_classifier.py +148 -0
codeindex/framework_detect.py +323 -0
codeindex/hierarchical.py +428 -0
codeindex/incremental.py +278 -0
codeindex/invoker.py +260 -0
codeindex/parallel.py +155 -0
codeindex/parser.py +740 -0
codeindex/route_extractor.py +98 -0
codeindex/route_registry.py +77 -0
codeindex/scanner.py +167 -0
codeindex/semantic_extractor.py +408 -0
codeindex/smart_writer.py +737 -0
codeindex/symbol_index.py +199 -0
codeindex/symbol_scorer.py +283 -0
codeindex/tech_debt.py +619 -0
codeindex/tech_debt_formatters.py +234 -0
codeindex/writer.py +164 -0

codeindex/hierarchical.py ADDED Viewed

@@ -0,0 +1,428 @@
+"""Bottom-up hierarchical processing for codeindex."""
+from collections import defaultdict
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List, Set, Tuple
+from rich.console import Console
+from .config import Config
+from .scanner import find_all_directories, scan_directory
+from .smart_writer import SmartWriter, determine_level
+console = Console()
+@dataclass
+class DirectoryInfo:
+    """Information about a directory in the hierarchy."""
+    path: Path
+    level: int  # 0 = deepest, numbers increase upward
+    children: Set[Path]  # Directories directly contained
+    parent: Path | None
+    has_files: bool
+    scan_result = None  # Will hold scan result
+    readmes_below: Set[Path]  # README_AI.md files in subdirectories
+def build_directory_hierarchy(
+    directories: List[Path],
+) -> Tuple[Dict[Path, DirectoryInfo], List[Path]]:
+    """
+    Build directory hierarchy from bottom up.
+    Returns:
+        - dict mapping paths to DirectoryInfo
+        - list of root directories (top level)
+    """
+    # Sort by depth (deepest first)
+    sorted_dirs = sorted(directories, key=lambda p: len(p.parts), reverse=True)
+    dir_info = {}
+    roots = []
+    # First pass: create all nodes
+    for dir_path in sorted_dirs:
+        info = DirectoryInfo(
+            path=dir_path,
+            level=0,  # Will be calculated
+            children=set(),
+            parent=None,
+            has_files=False,
+            readmes_below=set()
+        )
+        dir_info[dir_path] = info
+    # Second pass: establish relationships
+    for dir_path, info in dir_info.items():
+        # Find parent relationship
+        parent_path = dir_path.parent
+        if parent_path in dir_info:
+            info.parent = parent_path
+            dir_info[parent_path].children.add(dir_path)
+        else:
+            roots.append(dir_path)
+    # Calculate levels bottom-up
+    def calculate_level(path: Path) -> int:
+        info = dir_info[path]
+        if not info.children:
+            info.level = 0
+            return 0
+        max_child_level = max(calculate_level(child) for child in info.children)
+        info.level = max_child_level + 1
+        return info.level
+    for root in roots:
+        calculate_level(root)
+    return dir_info, roots
+def create_processing_batches(
+    dir_info: Dict[Path, DirectoryInfo], max_workers: int
+) -> List[List[Path]]:
+    """
+    Create batches for parallel processing.
+    All directories at the same level can be processed in parallel.
+    """
+    level_groups = defaultdict(list)
+    for path, info in dir_info.items():
+        if info.has_files:  # Only include directories that need processing
+            level_groups[info.level].append(path)
+    # Create batches from level groups (deeper levels first)
+    batches = []
+    for level in sorted(level_groups.keys()):
+        dirs_at_level = level_groups[level]
+        # Split into batches of max_workers
+        for i in range(0, len(dirs_at_level), max_workers):
+            batch = dirs_at_level[i:i + max_workers]
+            batches.append(batch)
+    return batches
+def process_directory_batch(
+    batch: List[Path],
+    config: Config,
+    use_fallback: bool = False,
+    quiet: bool = False,
+    timeout: int = 120,
+    root_path: Path = None,
+) -> Dict[Path, bool]:
+    """
+    Process a batch of directories in parallel.
+    Returns dict mapping path to success boolean.
+    """
+    import concurrent.futures
+    results = {}
+    def process_single(path: Path) -> Tuple[Path, bool]:
+        try:
+            # Use smart processing with level detection
+            return path, process_normal(path, config, use_fallback, quiet, timeout, root_path)
+        except Exception as e:
+            if not quiet:
+                console.print(f"[yellow]⚠ Skipping {path.name}: {e}[/yellow]")
+            return path, False
+    with concurrent.futures.ThreadPoolExecutor(max_workers=len(batch)) as executor:
+        futures = {executor.submit(process_single, path): path for path in batch}
+        for future in concurrent.futures.as_completed(futures):
+            path, success = future.result()
+            results[path] = success
+    return results
+# Global variable to hold directory info (should be passed as parameter in real implementation)
+dir_info = None
+def process_normal(
+    path: Path,
+    config: Config,
+    use_fallback: bool,
+    quiet: bool,
+    timeout: int,
+    root_path: Path = None,
+) -> bool:
+    """Process a single directory with smart level detection."""
+    # Scan directory
+    if not quiet:
+        console.print(f"  [dim]→ {path.name}: scanning...[/dim]")
+    scan_result = scan_directory(path, config)
+    # Parse files
+    from .parallel import parse_files_parallel
+    parse_results = []
+    if scan_result.files:
+        parse_results = parse_files_parallel(scan_result.files, config, quiet)
+    # Check if this directory has README_AI.md from children
+    child_dirs = []
+    if path in dir_info and dir_info[path].readmes_below:
+        child_dirs = list(dir_info[path].readmes_below)
+    # Determine appropriate level
+    has_children = bool(child_dirs)
+    if root_path is None:
+        root_path = path
+    level = determine_level(path, root_path, has_children, config.indexing)
+    if not quiet:
+        console.print(
+            f"  [dim]→ {path.name}: generating [{level}] README "
+            f"with {len(child_dirs)} subdirs...[/dim]"
+        )
+    # Use smart writer
+    writer = SmartWriter(config.indexing)
+    write_result = writer.write_readme(
+        dir_path=path,
+        parse_results=parse_results,
+        level=level,
+        child_dirs=child_dirs,
+        output_file=config.output_file,
+    )
+    if write_result.truncated and not quiet:
+        size_kb = write_result.size_bytes // 1024
+        console.print(
+            f"  [yellow]⚠ {path.name}: README truncated to {size_kb}KB[/yellow]"
+        )
+    return write_result.success
+def process_with_children(
+    path: Path, config: Config, use_fallback: bool, quiet: bool, timeout: int
+) -> bool:
+    """Process a directory that has children, aggregating their information."""
+    # This would be similar process_normal but with child aggregation
+    return process_normal(path, config, use_fallback, quiet, timeout)
+def scan_directories_hierarchical(
+    root: Path,
+    config: Config,
+    max_workers: int = 8,
+    use_fallback: bool = True,
+    quiet: bool = False,
+    timeout: int = 120
+) -> bool:
+    """
+    Main function for hierarchical directory scanning.
+    Returns True if processing was successful overall.
+    """
+    global dir_info
+    # Step 1: Find all directories
+    directories = find_all_directories(root, config)
+    if not directories:
+        if not quiet:
+            console.print("[yellow]No directories to process[/yellow]")
+        return True
+    # Step 2: Scan files to determine which directories need processing
+    if not quiet:
+        console.print("[bold]🔍 Building directory hierarchy...[/bold]")
+    for dir_path in directories:
+        scan_result = scan_directory(dir_path, config)
+        _ = bool(scan_result.files)  # Check if directory has files
+        # Update dir_info after it's built
+        # (This would need restructuring in real implementation)
+        pass
+    # Step 3: Build hierarchy
+    dir_info, roots = build_directory_hierarchy(directories)
+    # Mark directories that have files
+    for dir_path in directories:
+        scan_result = scan_directory(dir_path, config)
+        if dir_path in dir_info:
+            dir_info[dir_path].has_files = bool(scan_result.files)
+            dir_info[dir_path].scan_result = scan_result
+        # Update parent-child relationship for README tracking
+        parent_path = dir_path.parent
+        if parent_path in dir_info:
+            dir_info[parent_path].readmes_below.add(dir_path)
+    # Step 4: Create processing batches
+    if not quiet:
+        console.print("[bold]📦 Creating processing batches...[/bold]")
+    batches = create_processing_batches(dir_info, max_workers)
+    if not quiet:
+        total_dirs = sum(len(batch) for batch in batches)
+        console.print(f"[green]✓ {total_dirs} directories in {len(batches)} levels/batches[/green]")
+    # Step 5: Process batches
+    global_processed = 0
+    for i, batch in enumerate(batches):
+        if not quiet:
+            level = dir_info[batch[0]].level if batch else 0
+            console.print(f"\n[bold]Level {level} - Batch {i+1}/{len(batches)}[/bold]")
+        results = process_directory_batch(
+            batch, config, use_fallback, quiet, timeout, root_path=root
+        )
+        for path, success in results.items():
+            if success:
+                global_processed += 1
+            elif not quiet:
+                console.print(f"[yellow]⚠ Skipped: {path.name}[/yellow]")
+    if not quiet:
+        console.print(f"\n[green]✓ Processed {global_processed}/{total_dirs} directories[/green]")
+    return global_processed > 0
+def generate_enhanced_fallback_readme(
+    dir_path: Path,
+    parse_results: list,
+    child_readmes: List[Path],
+    output_file: str = "README_AI.md"
+):
+    """
+    Generate enhanced fallback README that includes child directory summaries.
+    """
+    from datetime import datetime
+    from .writer import format_imports_for_prompt, format_symbols_for_prompt
+    output_path = dir_path / output_file
+    # Basic directory info
+    lines = [
+        f"<!-- Generated by codeindex (hierarchical) at {datetime.now().isoformat()} -->",
+        "",
+        f"# {dir_path.name}",
+        ""
+    ]
+    # File statistics
+    files_count = len(parse_results)
+    symbols_count = sum(len(r.symbols) for r in parse_results)
+    lines.extend([
+        "## Overview",
+        f"- **Files**: {files_count}",
+        f"- **Symbols**: {symbols_count}",
+        f"- **Subdirectories**: {len(child_readmes)}",
+        ""
+    ])
+    # Child directories section
+    if child_readmes:
+        lines.extend([
+            "## Subdirectories",
+            ""
+        ])
+        for child_path in sorted(child_readmes):
+            child_name = child_path.name
+            child_readme = child_path / output_file
+            # Extract brief description from child README if it exists
+            description = "Module directory"
+            if child_readme.exists():
+                try:
+                    content = child_readme.read_text()
+                    # Look for first non-heading line
+                    for line in content.split('\n')[2:10]:  # Skip title and header
+                        line = line.strip()
+                        if line and not line.startswith('#'):
+                            description = line[:100]
+                            break
+                except Exception:
+                    pass
+            lines.append(f"- **{child_name}** - {description}")
+        lines.append("")
+    # Local files and symbols
+    if parse_results:
+        lines.extend([
+            "## Files",
+            ""
+        ])
+        # Group by subdirectory
+        files_by_subdir = defaultdict(list)
+        for result in parse_results:
+            if not result.error:
+                rel_path = result.path.relative_to(dir_path)
+                if rel_path.parent != Path('.'):
+                    files_by_subdir[str(rel_path.parent)].append(result)
+                else:
+                    files_by_subdir['.'].append(result)
+        for subdir in sorted(files_by_subdir.keys()):
+            if subdir == '.':
+                # Files in root
+                for result in files_by_subdir[subdir]:
+                    lines.append(f"- {result.path.name} ({len(result.symbols)} symbols)")
+            else:
+                # Files in subdirectory
+                lines.append(f"- **{subdir}/**")
+                for result in files_by_subdir[subdir]:
+                    lines.append(f"  - {result.path.name} ({len(result.symbols)} symbols)")
+        lines.extend([
+            "",
+            "## Symbols",
+            ""
+        ])
+        # Add symbols
+        lines.append(format_symbols_for_prompt(parse_results))
+        # Add dependencies if any
+        all_imports = []
+        for result in parse_results:
+            all_imports.extend(result.imports)
+        if all_imports:
+            lines.extend([
+                "",
+                "## Dependencies",
+                ""
+            ])
+            lines.append(format_imports_for_prompt(parse_results))
+    # Write file
+    try:
+        with open(output_path, "w") as f:
+            f.write("\n".join(lines))
+        return type('WriteResult', (), {
+            'path': output_path,
+            'success': True,
+            'error': ""
+        })()
+    except Exception as e:
+        return type('WriteResult', (), {
+            'path': output_path,
+            'success': False,
+            'error': str(e)
+        })()

codeindex/incremental.py ADDED Viewed

@@ -0,0 +1,278 @@
+"""Incremental update logic for codeindex.
+This module analyzes git changes and determines which directories
+need README_AI.md updates based on configurable thresholds.
+"""
+import subprocess
+from dataclasses import dataclass, field
+from enum import Enum
+from pathlib import Path
+from .config import Config
+class UpdateLevel(Enum):
+    """Update decision levels."""
+    SKIP = "skip"  # Changes too small, skip update
+    CURRENT = "current"  # Update current directory only
+    AFFECTED = "affected"  # Update all affected directories
+    FULL = "full"  # Suggest full project update
+@dataclass
+class FileChange:
+    """Represents a changed file."""
+    path: Path
+    additions: int = 0
+    deletions: int = 0
+    @property
+    def total_lines(self) -> int:
+        return self.additions + self.deletions
+    @property
+    def directory(self) -> Path:
+        return self.path.parent
+@dataclass
+class ChangeAnalysis:
+    """Analysis result of git changes."""
+    files: list[FileChange] = field(default_factory=list)
+    total_additions: int = 0
+    total_deletions: int = 0
+    affected_dirs: set[Path] = field(default_factory=set)
+    level: UpdateLevel = UpdateLevel.SKIP
+    message: str = ""
+    @property
+    def total_lines(self) -> int:
+        return self.total_additions + self.total_deletions
+    def to_dict(self) -> dict:
+        """Convert to dictionary for CLI output."""
+        return {
+            "total_lines": self.total_lines,
+            "additions": self.total_additions,
+            "deletions": self.total_deletions,
+            "files_changed": len(self.files),
+            "affected_dirs": [str(d) for d in sorted(self.affected_dirs)],
+            "level": self.level.value,
+            "message": self.message,
+        }
+def run_git_command(args: list[str], cwd: Path | None = None) -> str:
+    """Run a git command and return output."""
+    try:
+        result = subprocess.run(
+            ["git"] + args,
+            capture_output=True,
+            text=True,
+            cwd=cwd,
+            check=True,
+        )
+        return result.stdout.strip()
+    except subprocess.CalledProcessError:
+        return ""
+def get_changed_files(
+    since: str = "HEAD~1",
+    until: str = "HEAD",
+    cwd: Path | None = None,
+) -> list[FileChange]:
+    """Get list of changed files with line counts.
+    Args:
+        since: Starting commit reference (default: HEAD~1)
+        until: Ending commit reference (default: HEAD)
+        cwd: Working directory
+    Returns:
+        List of FileChange objects
+    """
+    # Get numstat for line counts
+    output = run_git_command(
+        ["diff", "--numstat", since, until],
+        cwd=cwd,
+    )
+    if not output:
+        return []
+    changes = []
+    for line in output.split("\n"):
+        if not line.strip():
+            continue
+        parts = line.split("\t")
+        if len(parts) != 3:
+            continue
+        additions, deletions, filepath = parts
+        # Handle binary files (shown as -)
+        try:
+            add_count = int(additions) if additions != "-" else 0
+            del_count = int(deletions) if deletions != "-" else 0
+        except ValueError:
+            continue
+        changes.append(
+            FileChange(
+                path=Path(filepath),
+                additions=add_count,
+                deletions=del_count,
+            )
+        )
+    return changes
+def filter_code_files(
+    changes: list[FileChange],
+    languages: list[str],
+) -> list[FileChange]:
+    """Filter changes to only include code files.
+    Args:
+        changes: List of all file changes
+        languages: List of supported languages
+    Returns:
+        Filtered list of code file changes
+    """
+    extensions = {
+        "python": {".py"},
+        "javascript": {".js", ".jsx"},
+        "typescript": {".ts", ".tsx"},
+        "java": {".java"},
+        "go": {".go"},
+        "rust": {".rs"},
+    }
+    valid_extensions = set()
+    for lang in languages:
+        valid_extensions.update(extensions.get(lang, set()))
+    return [c for c in changes if c.path.suffix in valid_extensions]
+def analyze_changes(
+    config: Config,
+    since: str = "HEAD~1",
+    until: str = "HEAD",
+    cwd: Path | None = None,
+) -> ChangeAnalysis:
+    """Analyze git changes and determine update strategy.
+    Args:
+        config: codeindex configuration
+        since: Starting commit reference
+        until: Ending commit reference
+        cwd: Working directory
+    Returns:
+        ChangeAnalysis with update recommendation
+    """
+    inc = config.incremental
+    # Get all changes
+    all_changes = get_changed_files(since, until, cwd)
+    # Filter to code files only
+    code_changes = filter_code_files(all_changes, config.languages)
+    if not code_changes:
+        return ChangeAnalysis(
+            level=UpdateLevel.SKIP,
+            message="No code files changed",
+        )
+    # Calculate totals
+    total_add = sum(c.additions for c in code_changes)
+    total_del = sum(c.deletions for c in code_changes)
+    total_lines = total_add + total_del
+    # Get affected directories
+    affected_dirs = {c.directory for c in code_changes}
+    # Determine update level based on thresholds
+    if total_lines < inc.skip_lines:
+        level = UpdateLevel.SKIP
+        message = f"Changes ({total_lines} lines) below skip threshold ({inc.skip_lines})"
+    elif total_lines < inc.current_only:
+        level = UpdateLevel.CURRENT
+        message = f"Small changes ({total_lines} lines), update current dirs only"
+    elif total_lines < inc.suggest_full:
+        level = UpdateLevel.AFFECTED
+        message = f"Medium changes ({total_lines} lines), update affected dirs"
+    else:
+        level = UpdateLevel.FULL
+        message = f"Large changes ({total_lines} lines), consider full update"
+    return ChangeAnalysis(
+        files=code_changes,
+        total_additions=total_add,
+        total_deletions=total_del,
+        affected_dirs=affected_dirs,
+        level=level,
+        message=message,
+    )
+def get_dirs_to_update(
+    analysis: ChangeAnalysis,
+    config: Config,
+) -> list[Path]:
+    """Get list of directories that should be updated.
+    Args:
+        analysis: Change analysis result
+        config: codeindex configuration
+    Returns:
+        List of directory paths to update
+    """
+    if analysis.level == UpdateLevel.SKIP:
+        return []
+    # For CURRENT, AFFECTED, FULL - update affected dirs
+    dirs = list(analysis.affected_dirs)
+    # Filter to only include configured directories
+    include_patterns = config.include
+    filtered_dirs = []
+    for d in dirs:
+        d_str = str(d)
+        for pattern in include_patterns:
+            # Simple prefix matching (could be enhanced with glob)
+            pattern_clean = pattern.rstrip("/")
+            if d_str.startswith(pattern_clean) or d_str == pattern_clean:
+                filtered_dirs.append(d)
+                break
+    return sorted(filtered_dirs)
+def should_update_project_index(analysis: ChangeAnalysis, config: Config) -> bool:
+    """Determine if PROJECT_INDEX.md should be updated.
+    Args:
+        analysis: Change analysis result
+        config: codeindex configuration
+    Returns:
+        True if PROJECT_INDEX.md should be updated
+    """
+    if not config.incremental.auto_project_index:
+        return False
+    # Update project index for large changes or multiple directories
+    return analysis.level == UpdateLevel.FULL or len(analysis.affected_dirs) > 2