PyPI - ai-codeindex - Versions diffs - 0.7.0__py3-none-any.whl - Mend

ai-codeindex 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

ai_codeindex-0.7.0.dist-info/METADATA +966 -0
ai_codeindex-0.7.0.dist-info/RECORD +41 -0
ai_codeindex-0.7.0.dist-info/WHEEL +4 -0
ai_codeindex-0.7.0.dist-info/entry_points.txt +2 -0
ai_codeindex-0.7.0.dist-info/licenses/LICENSE +21 -0
codeindex/README_AI.md +767 -0
codeindex/__init__.py +11 -0
codeindex/adaptive_config.py +83 -0
codeindex/adaptive_selector.py +171 -0
codeindex/ai_helper.py +48 -0
codeindex/cli.py +40 -0
codeindex/cli_common.py +10 -0
codeindex/cli_config.py +97 -0
codeindex/cli_docs.py +66 -0
codeindex/cli_hooks.py +765 -0
codeindex/cli_scan.py +562 -0
codeindex/cli_symbols.py +295 -0
codeindex/cli_tech_debt.py +238 -0
codeindex/config.py +479 -0
codeindex/directory_tree.py +229 -0
codeindex/docstring_processor.py +342 -0
codeindex/errors.py +62 -0
codeindex/extractors/__init__.py +9 -0
codeindex/extractors/thinkphp.py +132 -0
codeindex/file_classifier.py +148 -0
codeindex/framework_detect.py +323 -0
codeindex/hierarchical.py +428 -0
codeindex/incremental.py +278 -0
codeindex/invoker.py +260 -0
codeindex/parallel.py +155 -0
codeindex/parser.py +740 -0
codeindex/route_extractor.py +98 -0
codeindex/route_registry.py +77 -0
codeindex/scanner.py +167 -0
codeindex/semantic_extractor.py +408 -0
codeindex/smart_writer.py +737 -0
codeindex/symbol_index.py +199 -0
codeindex/symbol_scorer.py +283 -0
codeindex/tech_debt.py +619 -0
codeindex/tech_debt_formatters.py +234 -0
codeindex/writer.py +164 -0

codeindex/smart_writer.py ADDED Viewed

@@ -0,0 +1,737 @@
+"""Smart README writer with grouping, size limits, and hierarchical levels."""
+from collections import defaultdict
+from dataclasses import dataclass
+from datetime import datetime
+from fnmatch import fnmatch
+from pathlib import Path
+from typing import Literal, Optional
+from .adaptive_selector import AdaptiveSymbolSelector
+from .config import IndexingConfig
+from .framework_detect import RouteInfo, detect_framework
+from .parser import ParseResult, Symbol
+from .semantic_extractor import SemanticExtractor
+@dataclass
+class WriteResult:
+    """Result of writing a README file."""
+    path: Path
+    success: bool
+    error: str = ""
+    size_bytes: int = 0
+    truncated: bool = False
+# Level types
+LevelType = Literal["overview", "navigation", "detailed"]
+class SmartWriter:
+    """
+    Smart README writer that generates appropriate content based on level.
+    Levels:
+    - overview: Project/root level, only module list with descriptions
+    - navigation: Module level, grouped files with key classes
+    - detailed: Leaf level, full symbol information
+    """
+    def __init__(self, config: IndexingConfig, docstring_processor=None):
+        """
+        Initialize SmartWriter.
+        Args:
+            config: Indexing configuration (can also accept full Config object)
+            docstring_processor: Optional DocstringProcessor for AI-powered
+                                 docstring extraction (Epic 9)
+        """
+        # Handle both IndexingConfig and full Config
+        if hasattr(config, 'indexing'):
+            # Full Config object passed - extract indexing config
+            self.config = config.indexing
+        else:
+            # IndexingConfig passed directly
+            self.config = config
+        self.max_size = self.config.max_readme_size
+        # Initialize adaptive symbol selector
+        self.adaptive_selector = AdaptiveSymbolSelector(
+            self.config.symbols.adaptive_symbols
+        )
+        # Initialize semantic extractor if enabled
+        if self.config.semantic.enabled:
+            # Need ai_command from parent Config (will be passed separately)
+            # For now, initialize with heuristic mode
+            self.semantic_extractor = SemanticExtractor(
+                use_ai=self.config.semantic.use_ai,
+                ai_command=None if not self.config.semantic.use_ai else None
+            )
+        else:
+            self.semantic_extractor = None
+        # Initialize route extractor registry (Epic 6)
+        from .extractors.thinkphp import ThinkPHPRouteExtractor
+        from .route_registry import RouteExtractorRegistry
+        self.route_registry = RouteExtractorRegistry()
+        self.route_registry.register(ThinkPHPRouteExtractor())
+        # Initialize docstring processor (Epic 9)
+        self.docstring_processor = docstring_processor
+    def write_readme(
+        self,
+        dir_path: Path,
+        parse_results: list[ParseResult],
+        level: LevelType = "detailed",
+        child_dirs: list[Path] | None = None,
+        output_file: str = "README_AI.md",
+    ) -> WriteResult:
+        """
+        Write README_AI.md with appropriate content based on level.
+        Args:
+            dir_path: Directory to write to
+            parse_results: Parsed file results for this directory
+            level: Content level (overview/navigation/detailed)
+            child_dirs: Child directories with their own README_AI.md
+            output_file: Output filename
+        """
+        output_path = dir_path / output_file
+        child_dirs = child_dirs or []
+        try:
+            if level == "overview":
+                content = self._generate_overview(dir_path, parse_results, child_dirs)
+            elif level == "navigation":
+                content = self._generate_navigation(dir_path, parse_results, child_dirs)
+            else:  # detailed
+                content = self._generate_detailed(dir_path, parse_results, child_dirs)
+            # Check size limit
+            content_bytes = content.encode('utf-8')
+            truncated = False
+            if len(content_bytes) > self.max_size:
+                content, truncated = self._truncate_content(content, self.max_size)
+                content_bytes = content.encode('utf-8')
+            with open(output_path, "w", encoding="utf-8") as f:
+                f.write(content)
+            return WriteResult(
+                path=output_path,
+                success=True,
+                size_bytes=len(content_bytes),
+                truncated=truncated,
+            )
+        except Exception as e:
+            return WriteResult(path=output_path, success=False, error=str(e))
+    def _generate_overview(
+        self,
+        dir_path: Path,
+        parse_results: list[ParseResult],
+        child_dirs: list[Path],
+    ) -> str:
+        """Generate overview level README (root/project level)."""
+        timestamp = datetime.now().isoformat()
+        lines = [
+            f"<!-- Generated by codeindex (overview) at {timestamp} -->",
+            "",
+            f"# {dir_path.name}",
+            "",
+        ]
+        # Detect framework
+        framework = detect_framework(dir_path)
+        if framework != "unknown":
+            lines.extend([
+                f"**Framework**: {framework.title()}",
+                "",
+            ])
+        # Statistics
+        total_files = len(parse_results)
+        total_symbols = sum(len(r.symbols) for r in parse_results)
+        total_modules = len(child_dirs)
+        lines.extend([
+            "## Overview",
+            "",
+            f"- **Modules**: {total_modules}",
+            f"- **Files**: {total_files}",
+            f"- **Symbols**: {total_symbols}",
+            "",
+        ])
+        # Module structure (tree view)
+        if child_dirs:
+            lines.extend([
+                "## Module Structure",
+                "",
+                "```",
+                f"{dir_path.name}/",
+            ])
+            # Group child dirs by first-level subdirectory
+            for child in sorted(child_dirs):
+                rel_path = child.relative_to(dir_path)
+                lines.append(f"├── {rel_path}/")
+            lines.extend([
+                "```",
+                "",
+            ])
+            # Module list with descriptions
+            lines.extend([
+                "## Modules",
+                "",
+            ])
+            for child in sorted(child_dirs):
+                rel_path = child.relative_to(dir_path)
+                description = self._extract_module_description(child)
+                lines.append(f"- **{rel_path}** - {description}")
+            lines.append("")
+        return "\n".join(lines)
+    def _generate_navigation(
+        self,
+        dir_path: Path,
+        parse_results: list[ParseResult],
+        child_dirs: list[Path],
+    ) -> str:
+        """Generate navigation level README (module level)."""
+        timestamp = datetime.now().isoformat()
+        lines = [
+            f"<!-- Generated by codeindex (navigation) at {timestamp} -->",
+            "",
+            f"# {dir_path.name}",
+            "",
+        ]
+        # Statistics
+        total_files = len(parse_results)
+        total_symbols = sum(len(r.symbols) for r in parse_results)
+        lines.extend([
+            "## Overview",
+            "",
+            f"- **Files**: {total_files}",
+            f"- **Symbols**: {total_symbols}",
+            f"- **Subdirectories**: {len(child_dirs)}",
+            "",
+        ])
+        # Subdirectories (if any)
+        if child_dirs:
+            lines.extend([
+                "## Subdirectories",
+                "",
+            ])
+            for child in sorted(child_dirs):
+                description = self._extract_module_description(child)
+                lines.append(f"- **{child.name}/** - {description}")
+            lines.extend(["", ""])
+        # Grouped files
+        if parse_results:
+            grouped = self._group_files(parse_results)
+            lines.extend([
+                "## Files",
+                "",
+            ])
+            for group_name, group_results in grouped.items():
+                if group_name != "_ungrouped":
+                    group_desc = self.config.grouping.patterns.get(group_name, "")
+                    lines.append(f"### {group_name} ({len(group_results)} files)")
+                    if group_desc:
+                        lines.append(f"_{group_desc}_")
+                    lines.append("")
+                for result in group_results:
+                    if result.error:
+                        continue
+                    # List key classes/functions only
+                    key_symbols = self._get_key_symbols(result.symbols)
+                    symbol_summary = ", ".join(
+                        s.name.split("::")[-1].split(".")[-1]
+                        for s in key_symbols[:3]
+                    )
+                    if symbol_summary:
+                        lines.append(f"- **{result.path.name}** - {symbol_summary}")
+                    else:
+                        lines.append(f"- {result.path.name}")
+                lines.append("")
+        return "\n".join(lines)
+    def _generate_detailed(
+        self,
+        dir_path: Path,
+        parse_results: list[ParseResult],
+        child_dirs: list[Path],
+    ) -> str:
+        """Generate detailed level README (leaf level)."""
+        timestamp = datetime.now().isoformat()
+        lines = [
+            f"<!-- Generated by codeindex (detailed) at {timestamp} -->",
+            "",
+            f"# {dir_path.name}",
+            "",
+        ]
+        # Statistics
+        total_files = len(parse_results)
+        total_symbols = sum(len(r.symbols) for r in parse_results)
+        lines.extend([
+            "## Overview",
+            "",
+            f"- **Files**: {total_files}",
+            f"- **Symbols**: {total_symbols}",
+            "",
+        ])
+        # Framework route tables (Epic 6: using registry)
+        # Try all registered extractors
+        from .route_extractor import ExtractionContext
+        for framework_name in self.route_registry.list_frameworks():
+            extractor = self.route_registry.get(framework_name)
+            if not extractor:
+                continue
+            # Create extraction context
+            # Note: root_path is approximated from dir_path
+            # In a real scenario, this would be passed from the caller
+            context = ExtractionContext(
+                root_path=dir_path,  # Temporary: use current dir as root
+                current_dir=dir_path,
+                parse_results=parse_results,
+            )
+            # Check if this extractor can handle this directory
+            if extractor.can_extract(context):
+                routes = extractor.extract_routes(context)
+                if routes:
+                    route_lines = self._format_route_table(routes, framework_name)
+                    lines.extend(route_lines)
+                break  # Only use first matching extractor
+        # Subdirectories (brief, just references)
+        if child_dirs:
+            lines.extend([
+                "## Subdirectories",
+                "",
+            ])
+            for child in sorted(child_dirs):
+                lines.append(f"- [{child.name}/]({child.name}/README_AI.md)")
+            lines.extend(["", ""])
+        # Process docstrings with AI if processor available (Epic 9)
+        if self.docstring_processor:
+            for result in parse_results:
+                if result.error or not result.symbols:
+                    continue
+                # Get AI-enhanced docstrings for this file
+                try:
+                    normalized = self.docstring_processor.process_file(
+                        result.path, result.symbols
+                    )
+                    # Update symbol docstrings with AI-enhanced descriptions
+                    for symbol in result.symbols:
+                        if symbol.name in normalized:
+                            symbol.docstring = normalized[symbol.name]
+                except Exception:
+                    # If AI processing fails, continue with raw docstrings
+                    # (backward compatible fallback)
+                    pass
+        # Detailed file listing with symbols
+        if parse_results:
+            grouped = self._group_files(parse_results)
+            for group_name, group_results in grouped.items():
+                if group_name != "_ungrouped":
+                    group_desc = self.config.grouping.patterns.get(group_name, "")
+                    lines.append(f"## {group_name}")
+                    if group_desc:
+                        lines.append(f"_{group_desc}_")
+                    lines.append("")
+                else:
+                    lines.append("## Files")
+                    lines.append("")
+                for result in group_results:
+                    if result.error:
+                        lines.append(f"### {result.path.name}")
+                        lines.append(f"_Parse error: {result.error}_")
+                        lines.append("")
+                        continue
+                    lines.append(f"### {result.path.name}")
+                    # Show namespace for PHP files
+                    if result.namespace:
+                        lines.append(f"**Namespace:** `{result.namespace}`")
+                    if result.module_docstring:
+                        lines.append(f"_{result.module_docstring[:150]}_")
+                    lines.append("")
+                    # Filter and limit symbols
+                    symbols = self._filter_symbols(result.symbols)
+                    total_filtered_symbols = len(symbols)  # Save count after filtering
+                    # Calculate symbol limit: use adaptive if enabled, otherwise use max_per_file
+                    if self.config.symbols.adaptive_symbols.enabled:
+                        limit = self.adaptive_selector.calculate_limit(
+                            result.file_lines, len(symbols)
+                        )
+                    else:
+                        limit = self.config.symbols.max_per_file
+                    symbols = symbols[:limit]
+                    # Group by kind
+                    classes = [s for s in symbols if s.kind == "class"]
+                    methods = [s for s in symbols if s.kind == "method"]
+                    functions = [s for s in symbols if s.kind == "function"]
+                    properties = [s for s in symbols if s.kind == "property"]
+                    if classes:
+                        for cls in classes:
+                            lines.append(f"**class** `{cls.signature}`")
+                            if cls.docstring:
+                                lines.append(f"> {cls.docstring[:100]}")
+                            lines.append("")
+                    if methods:
+                        lines.append("**Methods:**")
+                        for m in methods:
+                            lines.append(f"- `{m.signature}`")
+                        lines.append("")
+                    if functions:
+                        lines.append("**Functions:**")
+                        for f in functions:
+                            lines.append(f"- `{f.signature}`")
+                        lines.append("")
+                    if properties:
+                        lines.append("**Properties:**")
+                        for p in properties:
+                            lines.append(f"- `{p.signature}`")
+                        lines.append("")
+                    # Show truncation notice
+                    shown_symbols = len(symbols)
+                    if shown_symbols < total_filtered_symbols:
+                        lines.append(
+                            f"_... and {total_filtered_symbols - shown_symbols} more symbols_"
+                        )
+                        lines.append("")
+        # Dependencies section
+        all_imports = []
+        for result in parse_results:
+            all_imports.extend(result.imports)
+        if all_imports:
+            lines.extend([
+                "## Dependencies",
+                "",
+            ])
+            # Deduplicate and sort
+            modules = sorted(set(imp.module for imp in all_imports))
+            for module in modules[:20]:  # Limit to 20
+                lines.append(f"- {module}")
+            if len(modules) > 20:
+                lines.append(f"_... and {len(modules) - 20} more_")
+            lines.append("")
+        return "\n".join(lines)
+    def _format_route_table(
+        self, routes: list[RouteInfo], framework: str = "thinkphp"
+    ) -> list[str]:
+        """
+        Format route information as Markdown table with line numbers.
+        Args:
+            routes: List of RouteInfo objects
+            framework: Framework name for title (e.g., "thinkphp", "laravel")
+        Returns:
+            List of markdown lines for the route table
+        Epic 6, P1: Line number support
+        """
+        if not routes:
+            return []
+        # Format framework name with proper casing
+        framework_display = {
+            "thinkphp": "ThinkPHP",
+            "laravel": "Laravel",
+            "django": "Django",
+            "fastapi": "FastAPI",
+        }.get(framework.lower(), framework.title())
+        lines = [
+            f"## Routes ({framework_display})",
+            "",
+            "| URL | Controller | Action | Location | Description |",
+            "|-----|------------|--------|----------|-------------|",
+        ]
+        # Display up to 30 routes
+        for route in routes[:30]:
+            # Use route.location property (handles file:line format)
+            location = f"`{route.location}`" if route.location else ""
+            # Get description (already truncated to 60 chars in extractor)
+            description = route.description if route.description else ""
+            lines.append(
+                f"| `{route.url}` | {route.controller} | "
+                f"{route.action} | {location} | {description} |"
+            )
+        # Show "more" indicator if there are additional routes
+        if len(routes) > 30:
+            remaining = len(routes) - 30
+            lines.append(f"| ... | _{remaining} more routes_ | | | |")
+        lines.extend(["", ""])
+        return lines
+    def _group_files(self, results: list[ParseResult]) -> dict[str, list[ParseResult]]:
+        """Group files by suffix pattern."""
+        if not self.config.grouping.enabled:
+            return {"_ungrouped": results}
+        grouped = defaultdict(list)
+        ungrouped = []
+        for result in results:
+            filename = result.path.stem  # Without extension
+            matched = False
+            for pattern in self.config.grouping.patterns.keys():
+                if filename.endswith(pattern):
+                    grouped[pattern].append(result)
+                    matched = True
+                    break
+            if not matched:
+                ungrouped.append(result)
+        # Sort groups by pattern order, add ungrouped at end
+        ordered = {}
+        for pattern in self.config.grouping.patterns.keys():
+            if pattern in grouped:
+                ordered[pattern] = grouped[pattern]
+        if ungrouped:
+            ordered["_ungrouped"] = ungrouped
+        return ordered
+    def _filter_symbols(self, symbols: list[Symbol]) -> list[Symbol]:
+        """Filter symbols based on visibility and exclusion patterns."""
+        filtered = []
+        for symbol in symbols:
+            # Check exclusion patterns
+            name = symbol.name.split("::")[-1].split(".")[-1]
+            excluded = False
+            for pattern in self.config.symbols.exclude_patterns:
+                if fnmatch(name, pattern):
+                    excluded = True
+                    break
+            if excluded:
+                continue
+            # Check visibility (from signature)
+            sig_lower = symbol.signature.lower()
+            if self.config.symbols.include_visibility:
+                # If visibility config exists, check it
+                has_visibility = any(v in sig_lower for v in ["public", "private", "protected"])
+                if has_visibility:
+                    visible = any(v in sig_lower for v in self.config.symbols.include_visibility)
+                    if not visible:
+                        continue
+            filtered.append(symbol)
+        return filtered
+    def _get_key_symbols(self, symbols: list[Symbol]) -> list[Symbol]:
+        """Get key symbols (classes and main functions) from a file."""
+        key = []
+        # Add all classes
+        for s in symbols:
+            if s.kind == "class":
+                key.append(s)
+        # Add public functions/methods
+        for s in symbols:
+            if s.kind in ("function", "method"):
+                sig_lower = s.signature.lower()
+                if "public" in sig_lower or s.kind == "function":
+                    key.append(s)
+        return key[:5]  # Limit to 5 key symbols
+    def _extract_module_description(self, dir_path: Path, output_file: str = "README_AI.md") -> str:
+        """Extract brief description from a child module's README."""
+        readme_path = dir_path / output_file
+        if not readme_path.exists():
+            return "Module directory"
+        try:
+            content = readme_path.read_text(encoding="utf-8")
+            lines = content.split("\n")
+            # Look for first non-empty, non-header line
+            for line in lines[2:15]:  # Skip header, check first 15 lines
+                line = line.strip()
+                if line and not line.startswith("#") and not line.startswith("<!--"):
+                    if line.startswith("-"):
+                        continue  # Skip list items
+                    return line[:80]
+            return "Module directory"
+        except Exception:
+            return "Module directory"
+    def _extract_module_description_semantic(
+        self,
+        dir_path: Path,
+        parse_result: Optional[ParseResult] = None
+    ) -> str:
+        """
+        Extract module description using semantic extraction.
+        Args:
+            dir_path: Path to the directory
+            parse_result: Optional ParseResult with symbols/imports
+        Returns:
+            Business semantic description
+        """
+        if not self.semantic_extractor:
+            # Fallback to old method if semantic extraction disabled
+            return self._extract_module_description(dir_path)
+        # Build DirectoryContext from parse_result
+        from codeindex.semantic_extractor import DirectoryContext
+        # Get file names
+        files = []
+        if dir_path.is_dir():
+            files = [f.name for f in dir_path.iterdir() if f.is_file()]
+        # Get subdirectory names
+        subdirs = []
+        if dir_path.is_dir():
+            subdirs = [d.name for d in dir_path.iterdir() if d.is_dir()]
+        # Get symbols and imports from parse_result
+        symbols = []
+        imports = []
+        if parse_result:
+            symbols = [s.name for s in parse_result.symbols]
+            imports = [imp.module for imp in parse_result.imports]
+        # Create context
+        context = DirectoryContext(
+            path=str(dir_path),
+            files=files,
+            subdirs=subdirs,
+            symbols=symbols,
+            imports=imports
+        )
+        # Extract semantic
+        try:
+            semantic = self.semantic_extractor.extract_directory_semantic(context)
+            return semantic.description
+        except Exception:
+            # Fallback to old method on error
+            if self.config.semantic.fallback_to_heuristic:
+                return self._extract_module_description(dir_path)
+            return "Module directory"
+    def _truncate_content(self, content: str, max_size: int) -> tuple[str, bool]:
+        """Truncate content to fit within size limit."""
+        content_bytes = content.encode('utf-8')
+        if len(content_bytes) <= max_size:
+            return content, False
+        # Find a good truncation point
+        truncated = content_bytes[:max_size - 200].decode('utf-8', errors='ignore')
+        # Try to truncate at a section boundary
+        last_section = truncated.rfind("\n## ")
+        if last_section > len(truncated) // 2:
+            truncated = truncated[:last_section]
+        # Add truncation notice
+        truncated += (
+            "\n\n---\n"
+            "_Content truncated due to size limit. "
+            "See individual module README files for details._\n"
+        )
+        return truncated, True
+def determine_level(
+    dir_path: Path,
+    root_path: Path,
+    has_children: bool,
+    config: IndexingConfig,
+) -> LevelType:
+    """
+    Determine the appropriate level for a directory.
+    Args:
+        dir_path: The directory being processed
+        root_path: The project root
+        has_children: Whether this directory has subdirectories with README_AI.md
+        config: Indexing configuration
+    """
+    # Calculate depth from root
+    try:
+        rel_path = dir_path.relative_to(root_path)
+        depth = len(rel_path.parts)
+    except ValueError:
+        depth = 0
+    # Root directory
+    if depth == 0 or dir_path == root_path:
+        return config.root_level
+    # Has children -> module level
+    if has_children:
+        return config.module_level
+    # Leaf directory
+    return config.leaf_level