PyPI - ai-codeindex - Versions diffs - 0.7.0__py3-none-any.whl - Mend

ai-codeindex 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

ai_codeindex-0.7.0.dist-info/METADATA +966 -0
ai_codeindex-0.7.0.dist-info/RECORD +41 -0
ai_codeindex-0.7.0.dist-info/WHEEL +4 -0
ai_codeindex-0.7.0.dist-info/entry_points.txt +2 -0
ai_codeindex-0.7.0.dist-info/licenses/LICENSE +21 -0
codeindex/README_AI.md +767 -0
codeindex/__init__.py +11 -0
codeindex/adaptive_config.py +83 -0
codeindex/adaptive_selector.py +171 -0
codeindex/ai_helper.py +48 -0
codeindex/cli.py +40 -0
codeindex/cli_common.py +10 -0
codeindex/cli_config.py +97 -0
codeindex/cli_docs.py +66 -0
codeindex/cli_hooks.py +765 -0
codeindex/cli_scan.py +562 -0
codeindex/cli_symbols.py +295 -0
codeindex/cli_tech_debt.py +238 -0
codeindex/config.py +479 -0
codeindex/directory_tree.py +229 -0
codeindex/docstring_processor.py +342 -0
codeindex/errors.py +62 -0
codeindex/extractors/__init__.py +9 -0
codeindex/extractors/thinkphp.py +132 -0
codeindex/file_classifier.py +148 -0
codeindex/framework_detect.py +323 -0
codeindex/hierarchical.py +428 -0
codeindex/incremental.py +278 -0
codeindex/invoker.py +260 -0
codeindex/parallel.py +155 -0
codeindex/parser.py +740 -0
codeindex/route_extractor.py +98 -0
codeindex/route_registry.py +77 -0
codeindex/scanner.py +167 -0
codeindex/semantic_extractor.py +408 -0
codeindex/smart_writer.py +737 -0
codeindex/symbol_index.py +199 -0
codeindex/symbol_scorer.py +283 -0
codeindex/tech_debt.py +619 -0
codeindex/tech_debt_formatters.py +234 -0
codeindex/writer.py +164 -0

codeindex/cli_symbols.py ADDED Viewed

@@ -0,0 +1,295 @@
+"""CLI commands for symbol indexing and dependency analysis.
+This module provides commands for generating project-wide indices
+and analyzing code dependencies and affected directories.
+"""
+import json
+from datetime import datetime
+from pathlib import Path
+import click
+from rich.table import Table
+from .cli_common import console
+from .config import Config
+from .incremental import (
+    UpdateLevel,
+    analyze_changes,
+    get_dirs_to_update,
+    should_update_project_index,
+)
+from .scanner import find_all_directories
+from .semantic_extractor import DirectoryContext, SemanticExtractor
+from .symbol_index import GlobalSymbolIndex
+def extract_module_purpose(
+    dir_path: Path,
+    config: Config,
+    output_file: str = "README_AI.md"
+) -> str:
+    """
+    Extract module purpose/description from directory.
+    Args:
+        dir_path: Path to the directory
+        config: Configuration object
+        output_file: README filename to check
+    Returns:
+        Module purpose/description string
+    """
+    # Strategy:
+    # 1. If semantic extraction enabled, use SemanticExtractor
+    # 2. Otherwise, try to extract from README_AI.md "Purpose" section
+    # 3. Fallback to generic description
+    # Check if semantic extraction is enabled
+    if config.indexing.semantic.enabled:
+        try:
+            # Initialize semantic extractor
+            extractor = SemanticExtractor(
+                use_ai=config.indexing.semantic.use_ai,
+                ai_command=config.ai_command if config.indexing.semantic.use_ai else None
+            )
+            # Build DirectoryContext
+            files = []
+            subdirs = []
+            if dir_path.is_dir():
+                files = [f.name for f in dir_path.iterdir() if f.is_file()]
+                subdirs = [d.name for d in dir_path.iterdir() if d.is_dir()]
+            # Try to get symbols from README_AI.md or scan directory
+            # For now, we'll use a simplified approach without full parsing
+            symbols = []
+            imports = []
+            # Quick symbol extraction from filenames
+            for f in files:
+                if f.endswith(('.py', '.php', '.java', '.ts', '.js')):
+                    # Extract class/file name without extension
+                    name = f.rsplit('.', 1)[0]
+                    symbols.append(name)
+            context = DirectoryContext(
+                path=str(dir_path),
+                files=files,
+                subdirs=subdirs,
+                symbols=symbols,
+                imports=imports
+            )
+            # Extract semantic
+            semantic = extractor.extract_directory_semantic(context)
+            return semantic.description
+        except Exception:
+            # Fall through to README extraction
+            pass
+    # Try to extract from README_AI.md
+    readme_path = dir_path / output_file
+    if readme_path.exists():
+        try:
+            content = readme_path.read_text()
+            lines = content.split("\n")
+            for i, line in enumerate(lines):
+                if line.startswith("## Purpose") or line.startswith("## 目的"):
+                    # Get next non-empty line
+                    for j in range(i + 1, min(i + 5, len(lines))):
+                        if lines[j].strip() and not lines[j].startswith("#"):
+                            full_purpose = lines[j].strip()
+                            if len(full_purpose) <= 80:
+                                return full_purpose
+                            else:
+                                # Smart truncate at word boundary
+                                truncated = full_purpose[:80]
+                                last_space = truncated.rfind(" ")
+                                if last_space > 40:
+                                    return truncated[:last_space] + "..."
+                                else:
+                                    return truncated + "..."
+                    break
+        except Exception:
+            pass
+    # Fallback to generic description
+    return f"{dir_path.name} module"
+@click.command()
+@click.option("--root", type=click.Path(exists=True, file_okay=False, path_type=Path), default=".")
+@click.option("--output", "-o", default="PROJECT_INDEX.md", help="Output filename")
+def index(root: Path, output: str):
+    """Generate PROJECT_INDEX.md - a lightweight project overview."""
+    root = root.resolve()
+    config = Config.load()
+    console.print(f"[bold]Generating project index:[/bold] {root}")
+    # Find all indexed directories (those with README_AI.md)
+    dirs = find_all_directories(root, config)
+    indexed_dirs = [d for d in dirs if (d / config.output_file).exists()]
+    if not indexed_dirs:
+        console.print("[yellow]No indexed directories found.[/yellow]")
+        console.print("Run 'codeindex scan' first to generate README_AI.md files.")
+        return
+    # Try to get project name from pyproject.toml or directory name
+    project_name = root.name
+    description = ""
+    entry_points = []
+    pyproject = root / "pyproject.toml"
+    if pyproject.exists():
+        try:
+            import tomllib
+        except ImportError:
+            import tomli as tomllib
+        with open(pyproject, "rb") as f:
+            data = tomllib.load(f)
+            project = data.get("project", {})
+            project_name = project.get("name", project_name)
+            description = project.get("description", "")
+            scripts = project.get("scripts", {})
+            entry_points = [f"- `{k}`: `{v}`" for k, v in scripts.items()]
+    # Build module table
+    modules = []
+    for d in sorted(indexed_dirs):
+        rel_path = d.relative_to(root)
+        # Extract purpose using semantic extraction or README fallback
+        purpose = extract_module_purpose(d, config, config.output_file)
+        modules.append(f"| `{rel_path}/` | {purpose} |")
+    # Generate PROJECT_INDEX.md
+    timestamp = datetime.now().strftime("%Y-%m-%d")
+    content = f"""# Project Index: {project_name}
+> Generated: {timestamp}
+> {description}
+## Modules
+| Path | Purpose |
+|------|---------|
+{chr(10).join(modules)}
+## Entry Points
+{chr(10).join(entry_points) if entry_points else "_No CLI entry points defined_"}
+---
+*Generated by codeindex. See each directory's README_AI.md for details.*
+"""
+    # Write file
+    output_path = root / output
+    output_path.write_text(content)
+    console.print(f"[green]✓ Created:[/green] {output_path}")
+    console.print(f"[dim]Indexed {len(indexed_dirs)} modules[/dim]")
+@click.command()
+@click.option("--root", type=click.Path(exists=True, file_okay=False, path_type=Path), default=".")
+@click.option("--output", "-o", default="PROJECT_SYMBOLS.md", help="Output filename")
+@click.option("--quiet", "-q", is_flag=True, help="Minimal output")
+def symbols(root: Path, output: str, quiet: bool):
+    """Generate PROJECT_SYMBOLS.md - a global symbol index for all classes."""
+    root = root.resolve()
+    config = Config.load()
+    if not quiet:
+        console.print(f"[bold]Generating global symbol index:[/bold] {root}")
+        console.print("[dim]→ Scanning all directories...[/dim]")
+    indexer = GlobalSymbolIndex(root, config)
+    stats = indexer.collect_symbols(quiet=quiet)
+    if not quiet:
+        console.print(f"[dim]→ Found {stats['symbols']} symbols in {stats['files']} files[/dim]")
+    if stats["symbols"] == 0:
+        console.print("[yellow]No symbols found. Run 'codeindex scan' first.[/yellow]")
+        return
+    if not quiet:
+        console.print("[dim]→ Generating index...[/dim]")
+    output_path = indexer.generate_index(output)
+    console.print(f"[green]✓ Created:[/green] {output_path}")
+    index_msg = f"Indexed {stats['symbols']} symbols from {stats['directories']} directories"
+    console.print(f"[dim]{index_msg}[/dim]")
+@click.command()
+@click.option("--since", default="HEAD~1", help="Starting commit reference")
+@click.option("--until", default="HEAD", help="Ending commit reference")
+@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
+def affected(since: str, until: str, as_json: bool):
+    """Analyze git changes and show affected directories.
+    Shows which directories need README_AI.md updates based on code changes.
+    """
+    config = Config.load()
+    root = Path.cwd().resolve()
+    if not as_json:
+        console.print(f"[bold]Analyzing changes:[/bold] {since}..{until}")
+    # Analyze changes
+    analysis = analyze_changes(config, since, until, root)
+    if as_json:
+        # JSON output for scripting
+        print(json.dumps(analysis.to_dict(), indent=2))
+        return
+    # Human-readable output
+    if analysis.level == UpdateLevel.SKIP:
+        console.print(f"[green]✓ {analysis.message}[/green]")
+        return
+    # Show statistics
+    table = Table(title="Change Analysis")
+    table.add_column("Metric", style="bold")
+    table.add_column("Value")
+    table.add_row("Files changed", str(len(analysis.files)))
+    table.add_row("Lines added", f"+{analysis.total_additions}")
+    table.add_row("Lines deleted", f"-{analysis.total_deletions}")
+    table.add_row("Total changes", str(analysis.total_lines))
+    table.add_row("Update level", analysis.level.value.upper())
+    console.print(table)
+    # Show affected directories
+    dirs_to_update = get_dirs_to_update(analysis, config)
+    if dirs_to_update:
+        console.print("\n[bold]Directories to update:[/bold]")
+        for d in dirs_to_update:
+            rel = d.relative_to(root) if d.is_absolute() else d
+            readme_exists = (root / rel / config.output_file).exists()
+            status = "[green]✓[/green]" if readme_exists else "[yellow]⚠[/yellow]"
+            console.print(f"  {status} {rel}/")
+    # Show recommendation
+    console.print(f"\n[dim]{analysis.message}[/dim]")
+    if should_update_project_index(analysis, config):
+        console.print("[yellow]→ Consider updating PROJECT_INDEX.md[/yellow]")
+    # Show suggested command
+    if dirs_to_update:
+        console.print("\n[bold]Suggested command:[/bold]")
+        if len(dirs_to_update) == 1:
+            console.print(f"  codeindex scan {dirs_to_update[0]}")
+        else:
+            console.print("  codeindex list-dirs | xargs -P 4 -I {} codeindex scan {}")

codeindex/cli_tech_debt.py ADDED Viewed

@@ -0,0 +1,238 @@
+"""CLI commands for technical debt analysis.
+This module provides the tech-debt command for analyzing technical debt
+in a directory, including file size issues, god classes, and symbol overload.
+"""
+from pathlib import Path
+import click
+from .cli_common import console
+from .config import Config
+from .symbol_scorer import ScoringContext, SymbolImportanceScorer
+from .tech_debt import TechDebtDetector, TechDebtReport, TechDebtReporter
+from .tech_debt_formatters import ConsoleFormatter, JSONFormatter, MarkdownFormatter
+def _find_source_files(
+    path: Path, recursive: bool, languages: list[str] | None = None
+) -> list[Path]:
+    """Find source files in the given directory based on language configuration.
+    Args:
+        path: Directory path to search
+        recursive: If True, search subdirectories recursively
+        languages: List of languages to include (optional, uses config if None)
+    Returns:
+        List of source file paths
+    """
+    # Load languages from config if not provided
+    if languages is None:
+        config = Config.load()
+        languages = config.languages
+    # Map languages to file extensions
+    extensions = {
+        'python': '*.py',
+        'php': '*.php',
+        'javascript': '*.js',
+        'typescript': '*.ts',
+        'java': '*.java',
+        'go': '*.go',
+        'rust': '*.rs',
+        'cpp': '*.cpp',
+        'c': '*.c',
+    }
+    files = []
+    for lang in languages:
+        ext = extensions.get(lang)
+        if ext:
+            if recursive:
+                files.extend([f for f in path.rglob(ext) if f.is_file()])
+            else:
+                files.extend([f for f in path.glob(ext) if f.is_file()])
+    return files
+def _analyze_files(
+    files: list[Path],
+    detector: TechDebtDetector,
+    reporter: TechDebtReporter,
+    show_progress: bool,
+) -> None:
+    """Analyze files and add results to reporter.
+    Args:
+        files: List of source files to analyze
+        detector: Technical debt detector instance
+        reporter: Reporter to collect results
+        show_progress: Whether to show progress messages
+    """
+    from .parser import parse_file
+    for file_path in files:
+        try:
+            # Parse file
+            parse_result = parse_file(file_path)
+            if parse_result.error:
+                if show_progress:
+                    console.print(
+                        f"[yellow]⚠ Skipping {file_path.name}: {parse_result.error}[/yellow]"
+                    )
+                continue
+            # Determine file type based on extension
+            file_ext = file_path.suffix.lower()
+            if file_ext == '.py':
+                file_type = 'python'
+            elif file_ext == '.php':
+                file_type = 'php'
+            elif file_ext == '.js':
+                file_type = 'javascript'
+            elif file_ext == '.ts':
+                file_type = 'typescript'
+            else:
+                file_type = file_ext[1:] if file_ext else 'unknown'
+            # Create scorer context
+            scoring_context = ScoringContext(
+                framework=None,
+                file_type=file_type,
+                total_symbols=len(parse_result.symbols),
+            )
+            scorer = SymbolImportanceScorer(scoring_context)
+            # Detect technical debt
+            debt_analysis = detector.analyze_file(parse_result, scorer)
+            # Analyze symbol overload
+            symbol_issues, symbol_analysis = detector.analyze_symbol_overload(
+                parse_result, scorer
+            )
+            # Merge symbol overload issues into debt analysis
+            debt_analysis.issues.extend(symbol_issues)
+            # Add to reporter
+            reporter.add_file_result(
+                file_path=file_path,
+                debt_analysis=debt_analysis,
+                symbol_analysis=symbol_analysis,
+            )
+        except Exception as e:
+            if show_progress:
+                console.print(f"[red]✗ Error analyzing {file_path.name}: {e}[/red]")
+            continue
+def _format_and_output(
+    report: TechDebtReport,
+    format: str,
+    output: Path | None,
+    quiet: bool,
+) -> None:
+    """Format and output the technical debt report.
+    Args:
+        report: Technical debt report to format
+        format: Output format (console, markdown, or json)
+        output: Optional output file path
+        quiet: Whether to suppress status messages
+    """
+    # Select formatter
+    if format == "console":
+        formatter = ConsoleFormatter()
+    elif format == "markdown":
+        formatter = MarkdownFormatter()
+    else:  # json
+        formatter = JSONFormatter()
+    formatted_output = formatter.format(report)
+    # Write output
+    if output:
+        output.write_text(formatted_output)
+        if not quiet:
+            console.print(f"[green]✓ Report written to {output}[/green]")
+    else:
+        # Print to stdout
+        print(formatted_output)
+@click.command()
+@click.argument("path", type=click.Path(exists=True, file_okay=False, path_type=Path))
+@click.option(
+    "--format",
+    type=click.Choice(["console", "markdown", "json"], case_sensitive=False),
+    default="console",
+    help="Output format",
+)
+@click.option(
+    "--output",
+    "-o",
+    type=click.Path(path_type=Path),
+    help="Write output to file instead of stdout",
+)
+@click.option(
+    "--recursive",
+    "-r",
+    is_flag=True,
+    help="Recursively scan subdirectories",
+)
+@click.option(
+    "--quiet",
+    "-q",
+    is_flag=True,
+    help="Minimal output",
+)
+def tech_debt(path: Path, format: str, output: Path | None, recursive: bool, quiet: bool):
+    """Analyze technical debt in a directory.
+    Scans source files for technical debt issues including:
+    - Super large files (>5000 lines)
+    - Large files (>2000 lines)
+    - God Classes (>50 methods)
+    - Massive symbol count (>100 symbols)
+    - High noise ratio (>50% low-quality symbols)
+    Results can be output in console, markdown, or JSON format.
+    """
+    try:
+        # Load config
+        config = Config.load()
+        # Initialize detector and reporter
+        detector = TechDebtDetector(config)
+        reporter = TechDebtReporter()
+        # Find all source files to analyze
+        files_to_analyze = _find_source_files(path, recursive)
+        # Handle empty directory
+        if not files_to_analyze:
+            report = reporter.generate_report()
+            _format_and_output(report, format, output, quiet)
+            return
+        # Only show progress if not JSON to stdout (JSON needs clean output)
+        show_progress = not quiet and not (format == "json" and output is None)
+        if show_progress:
+            console.print(f"[dim]Analyzing {len(files_to_analyze)} source files...[/dim]")
+        # Parse and analyze each file
+        _analyze_files(files_to_analyze, detector, reporter, show_progress)
+        # Generate and output report
+        report = reporter.generate_report()
+        _format_and_output(report, format, output, quiet)
+    except Exception as e:
+        console.print(f"[red]✗ Error: {e}[/red]")
+        raise click.Abort()