PyPI - codebase-digest-ai - Versions diffs - 0.1.1__py3-none-any.whl - Mend

codebase-digest-ai 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

codebase_digest/__init__.py +8 -0
codebase_digest/analyzer/__init__.py +7 -0
codebase_digest/analyzer/codebase_analyzer.py +183 -0
codebase_digest/analyzer/flow_analyzer.py +164 -0
codebase_digest/analyzer/metrics_analyzer.py +130 -0
codebase_digest/cli/__init__.py +1 -0
codebase_digest/cli/main.py +284 -0
codebase_digest/exporters/__init__.py +9 -0
codebase_digest/exporters/graph_exporter.py +1038 -0
codebase_digest/exporters/html_exporter.py +1052 -0
codebase_digest/exporters/json_exporter.py +105 -0
codebase_digest/exporters/markdown_exporter.py +273 -0
codebase_digest/exporters/readme_exporter.py +306 -0
codebase_digest/models.py +81 -0
codebase_digest/parser/__init__.py +7 -0
codebase_digest/parser/base.py +41 -0
codebase_digest/parser/javascript_parser.py +36 -0
codebase_digest/parser/python_parser.py +270 -0
codebase_digest_ai-0.1.1.dist-info/METADATA +233 -0
codebase_digest_ai-0.1.1.dist-info/RECORD +24 -0
codebase_digest_ai-0.1.1.dist-info/WHEEL +5 -0
codebase_digest_ai-0.1.1.dist-info/entry_points.txt +2 -0
codebase_digest_ai-0.1.1.dist-info/licenses/LICENSE +21 -0
codebase_digest_ai-0.1.1.dist-info/top_level.txt +1 -0

codebase_digest/models.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""Core data models for codebase analysis."""
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Set
+from pathlib import Path
+@dataclass
+class Symbol:
+    """Represents a code symbol (function, class, method, etc.)."""
+    name: str
+    type: str  # 'function', 'class', 'method', 'variable'
+    file_path: Path
+    line_number: int
+    docstring: Optional[str] = None
+    parameters: List[str] = field(default_factory=list)
+    return_type: Optional[str] = None
+    decorators: List[str] = field(default_factory=list)
+@dataclass
+class Import:
+    """Represents an import statement."""
+    module: str
+    names: List[str]
+    alias: Optional[str] = None
+    file_path: Optional[Path] = None
+    line_number: Optional[int] = None
+@dataclass
+class CallRelation:
+    """Represents a function/method call relationship."""
+    caller_symbol: Symbol
+    callee_name: str
+    line_number: Optional[int] = None
+    callee_file: Optional[Path] = None  # For cross-file calls
+@dataclass
+class DomainEntity:
+    """Represents a domain entity (business object)."""
+    name: str
+    type: str  # 'class', 'dataclass', 'pydantic_model', etc.
+    file_path: Path
+    fields: List[str] = field(default_factory=list)
+    methods: List[str] = field(default_factory=list)
+    creation_points: List[str] = field(default_factory=list)
+    modification_points: List[str] = field(default_factory=list)
+    validation_points: List[str] = field(default_factory=list)
+@dataclass
+class ExecutionFlow:
+    """Represents an execution flow through the system."""
+    name: str
+    entry_point: str
+    steps: List[str] = field(default_factory=list)
+    files_involved: Set[Path] = field(default_factory=set)
+    description: Optional[str] = None
+@dataclass
+class CodebaseAnalysis:
+    """Complete analysis results for a codebase."""
+    root_path: Path
+    symbols: List[Symbol] = field(default_factory=list)
+    imports: List[Import] = field(default_factory=list)
+    call_relations: List[CallRelation] = field(default_factory=list)
+    domain_entities: List[DomainEntity] = field(default_factory=list)
+    execution_flows: List[ExecutionFlow] = field(default_factory=list)
+    entry_points: List[Path] = field(default_factory=list)
+    # Metrics
+    total_files: int = 0
+    total_lines: int = 0
+    languages: Set[str] = field(default_factory=set)
+    complexity_score: float = 0.0
+    # Directory structure
+    directory_tree: Dict = field(default_factory=dict)

codebase_digest/parser/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""Parser modules for different programming languages."""
+from .base import BaseParser
+from .python_parser import PythonParser
+from .javascript_parser import JavaScriptParser
+__all__ = ["BaseParser", "PythonParser", "JavaScriptParser"]

codebase_digest/parser/base.py ADDED Viewed

@@ -0,0 +1,41 @@
+"""Base parser interface."""
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import List
+from ..models import Symbol, Import, CallRelation, DomainEntity
+class BaseParser(ABC):
+    """Abstract base class for language-specific parsers."""
+    def __init__(self, file_path: Path):
+        self.file_path = file_path
+        self.content = file_path.read_text(encoding='utf-8')
+    @abstractmethod
+    def parse_symbols(self) -> List[Symbol]:
+        """Extract symbols (functions, classes, methods) from the file."""
+        pass
+    @abstractmethod
+    def parse_imports(self) -> List[Import]:
+        """Extract import statements from the file."""
+        pass
+    @abstractmethod
+    def parse_calls(self) -> List[CallRelation]:
+        """Extract function/method calls from the file."""
+        pass
+    @abstractmethod
+    def parse_domain_entities(self) -> List[DomainEntity]:
+        """Extract domain entities (business objects) from the file."""
+        pass
+    @property
+    @abstractmethod
+    def supported_extensions(self) -> List[str]:
+        """Return list of file extensions this parser supports."""
+        pass

codebase_digest/parser/javascript_parser.py ADDED Viewed

@@ -0,0 +1,36 @@
+"""JavaScript/TypeScript parser using tree-sitter."""
+from pathlib import Path
+from typing import List
+from .base import BaseParser
+from ..models import Symbol, Import, CallRelation, DomainEntity
+class JavaScriptParser(BaseParser):
+    """Parser for JavaScript/TypeScript files using tree-sitter."""
+    @property
+    def supported_extensions(self) -> List[str]:
+        return ['.js', '.jsx', '.ts', '.tsx']
+    def parse_symbols(self) -> List[Symbol]:
+        """Extract JavaScript/TypeScript symbols."""
+        # TODO: Implement tree-sitter parsing
+        # For now, return empty list as placeholder
+        return []
+    def parse_imports(self) -> List[Import]:
+        """Extract import statements."""
+        # TODO: Implement tree-sitter parsing
+        return []
+    def parse_calls(self) -> List[CallRelation]:
+        """Extract function calls."""
+        # TODO: Implement tree-sitter parsing
+        return []
+    def parse_domain_entities(self) -> List[DomainEntity]:
+        """Extract domain entities."""
+        # TODO: Implement tree-sitter parsing
+        return []

codebase_digest/parser/python_parser.py ADDED Viewed

@@ -0,0 +1,270 @@
+"""Python AST-based parser."""
+import ast
+from pathlib import Path
+from typing import List, Optional
+from .base import BaseParser
+from ..models import Symbol, Import, CallRelation, DomainEntity
+class PythonParser(BaseParser):
+    """Parser for Python files using AST."""
+    @property
+    def supported_extensions(self) -> List[str]:
+        return ['.py']
+    def parse_symbols(self) -> List[Symbol]:
+        """Extract Python symbols using AST."""
+        symbols = []
+        try:
+            tree = ast.parse(self.content)
+            for node in ast.walk(tree):
+                if isinstance(node, ast.FunctionDef):
+                    symbols.append(self._create_function_symbol(node))
+                elif isinstance(node, ast.ClassDef):
+                    symbols.append(self._create_class_symbol(node))
+                    # Add methods
+                    for item in node.body:
+                        if isinstance(item, ast.FunctionDef):
+                            symbols.append(self._create_method_symbol(item, node.name))
+        except SyntaxError:
+            # Skip files with syntax errors
+            pass
+        return symbols
+    def parse_imports(self) -> List[Import]:
+        """Extract import statements."""
+        imports = []
+        try:
+            tree = ast.parse(self.content)
+            for node in ast.walk(tree):
+                if isinstance(node, ast.Import):
+                    for alias in node.names:
+                        imports.append(Import(
+                            module=alias.name,
+                            names=[alias.name],
+                            alias=alias.asname,
+                            file_path=self.file_path,
+                            line_number=node.lineno
+                        ))
+                elif isinstance(node, ast.ImportFrom):
+                    if node.module:
+                        names = [alias.name for alias in node.names]
+                        imports.append(Import(
+                            module=node.module,
+                            names=names,
+                            file_path=self.file_path,
+                            line_number=node.lineno
+                        ))
+        except SyntaxError:
+            pass
+        return imports
+    def parse_calls(self) -> List[CallRelation]:
+        """Extract function calls with symbol context."""
+        calls = []
+        try:
+            tree = ast.parse(self.content)
+            # First pass: collect all symbols for context
+            symbols_by_name = {}
+            for node in ast.walk(tree):
+                if isinstance(node, ast.FunctionDef):
+                    symbol = self._create_function_symbol(node)
+                    symbols_by_name[node.name] = symbol
+                elif isinstance(node, ast.ClassDef):
+                    class_symbol = self._create_class_symbol(node)
+                    symbols_by_name[node.name] = class_symbol
+                    # Add methods
+                    for item in node.body:
+                        if isinstance(item, ast.FunctionDef):
+                            method_symbol = self._create_method_symbol(item, node.name)
+                            symbols_by_name[f"{node.name}.{item.name}"] = method_symbol
+            # Second pass: extract calls with symbol context
+            for node in ast.walk(tree):
+                if isinstance(node, ast.FunctionDef):
+                    caller_symbol = symbols_by_name.get(node.name)
+                    if caller_symbol:
+                        # Find calls within this function
+                        for child in ast.walk(node):
+                            if isinstance(child, ast.Call):
+                                callee_name = self._extract_call_name(child)
+                                if callee_name:
+                                    calls.append(CallRelation(
+                                        caller_symbol=caller_symbol,
+                                        callee_name=callee_name,
+                                        line_number=child.lineno
+                                    ))
+                elif isinstance(node, ast.ClassDef):
+                    # Handle method calls
+                    for item in node.body:
+                        if isinstance(item, ast.FunctionDef):
+                            method_key = f"{node.name}.{item.name}"
+                            caller_symbol = symbols_by_name.get(method_key)
+                            if caller_symbol:
+                                # Find calls within this method
+                                for child in ast.walk(item):
+                                    if isinstance(child, ast.Call):
+                                        callee_name = self._extract_call_name(child)
+                                        if callee_name:
+                                            calls.append(CallRelation(
+                                                caller_symbol=caller_symbol,
+                                                callee_name=callee_name,
+                                                line_number=child.lineno
+                                            ))
+        except SyntaxError:
+            pass
+        return calls
+    def _extract_call_name(self, call_node: ast.Call) -> Optional[str]:
+        """Extract the called function/method name from a Call node."""
+        if isinstance(call_node.func, ast.Name):
+            call_name = call_node.func.id
+            # Filter out builtin functions
+            if call_name in {'print', 'len', 'str', 'int', 'float', 'bool', 'list', 'dict', 'set', 'tuple', 'range', 'enumerate', 'zip', 'map', 'filter', 'sorted', 'reversed', 'sum', 'min', 'max', 'abs', 'round', 'type', 'isinstance', 'hasattr', 'getattr', 'setattr', 'delattr'}:
+                return None
+            return call_name
+        elif isinstance(call_node.func, ast.Attribute):
+            attr_name = self._get_attribute_name_from_node(call_node.func)
+            # Filter out common builtin method patterns
+            if attr_name and any(pattern in attr_name.lower() for pattern in ['append', 'extend', 'pop', 'remove', 'insert', 'sort', 'reverse', 'datetime.now', 'time.time']):
+                return None
+            return attr_name
+        return None
+    def parse_domain_entities(self) -> List[DomainEntity]:
+        """Extract domain entities (classes that represent business objects)."""
+        entities = []
+        try:
+            tree = ast.parse(self.content)
+            for node in ast.walk(tree):
+                if isinstance(node, ast.ClassDef):
+                    # Look for common domain entity patterns
+                    if self._is_domain_entity(node):
+                        entity = DomainEntity(
+                            name=node.name,
+                            type='class',
+                            file_path=self.file_path,
+                            fields=self._extract_class_fields(node),
+                            methods=self._extract_class_methods(node)
+                        )
+                        entities.append(entity)
+        except SyntaxError:
+            pass
+        return entities
+    def _create_function_symbol(self, node: ast.FunctionDef) -> Symbol:
+        """Create a Symbol from a function AST node."""
+        return Symbol(
+            name=node.name,
+            type='function',
+            file_path=self.file_path,
+            line_number=node.lineno,
+            docstring=ast.get_docstring(node),
+            parameters=[arg.arg for arg in node.args.args],
+            decorators=[self._get_decorator_name(dec) for dec in node.decorator_list]
+        )
+    def _create_class_symbol(self, node: ast.ClassDef) -> Symbol:
+        """Create a Symbol from a class AST node."""
+        return Symbol(
+            name=node.name,
+            type='class',
+            file_path=self.file_path,
+            line_number=node.lineno,
+            docstring=ast.get_docstring(node),
+            decorators=[self._get_decorator_name(dec) for dec in node.decorator_list]
+        )
+    def _create_method_symbol(self, node: ast.FunctionDef, class_name: str) -> Symbol:
+        """Create a Symbol from a method AST node."""
+        return Symbol(
+            name=f"{class_name}.{node.name}",
+            type='method',
+            file_path=self.file_path,
+            line_number=node.lineno,
+            docstring=ast.get_docstring(node),
+            parameters=[arg.arg for arg in node.args.args],
+            decorators=[self._get_decorator_name(dec) for dec in node.decorator_list]
+        )
+    def _get_decorator_name(self, decorator) -> str:
+        """Extract decorator name from AST node."""
+        if isinstance(decorator, ast.Name):
+            return decorator.id
+        elif isinstance(decorator, ast.Attribute):
+            return self._get_attribute_name_from_node(decorator) or ""
+        return ""
+    def _get_attribute_name_from_node(self, node: ast.Attribute) -> Optional[str]:
+        """Get full attribute name (e.g., 'obj.method') from AST node."""
+        if isinstance(node.value, ast.Name):
+            return f"{node.value.id}.{node.attr}"
+        elif isinstance(node.value, ast.Attribute):
+            base = self._get_attribute_name_from_node(node.value)
+            return f"{base}.{node.attr}" if base else None
+        return None
+    def _is_domain_entity(self, node: ast.ClassDef) -> bool:
+        """Determine if a class represents a domain entity."""
+        # Simple heuristics for domain entities
+        class_name = node.name.lower()
+        # Common domain entity names
+        domain_keywords = [
+            'user', 'account', 'profile', 'customer', 'client',
+            'order', 'payment', 'transaction', 'invoice', 'billing',
+            'product', 'item', 'catalog', 'inventory',
+            'wallet', 'balance', 'credit', 'debit',
+            'session', 'token', 'auth', 'permission',
+            'notification', 'message', 'email', 'sms',
+            'address', 'location', 'contact', 'phone'
+        ]
+        # Check if class name contains domain keywords
+        for keyword in domain_keywords:
+            if keyword in class_name:
+                return True
+        # Check for dataclass or pydantic model decorators
+        for decorator in node.decorator_list:
+            if isinstance(decorator, ast.Name):
+                if decorator.id in ['dataclass', 'BaseModel']:
+                    return True
+        return False
+    def _extract_class_fields(self, node: ast.ClassDef) -> List[str]:
+        """Extract field names from a class."""
+        fields = []
+        for item in node.body:
+            if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
+                fields.append(item.target.id)
+            elif isinstance(item, ast.Assign):
+                for target in item.targets:
+                    if isinstance(target, ast.Name):
+                        fields.append(target.id)
+        return fields
+    def _extract_class_methods(self, node: ast.ClassDef) -> List[str]:
+        """Extract method names from a class."""
+        methods = []
+        for item in node.body:
+            if isinstance(item, ast.FunctionDef):
+                methods.append(item.name)
+        return methods

codebase_digest_ai-0.1.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,233 @@
+Metadata-Version: 2.4
+Name: codebase-digest-ai
+Version: 0.1.1
+Summary: AI-native code intelligence engine for semantic codebase analysis
+Author: Harsh Bothara
+License: MIT
+Project-URL: Homepage, https://github.com/codebase-digest/codebase-digest
+Project-URL: Documentation, https://github.com/codebase-digest/codebase-digest#readme
+Project-URL: Repository, https://github.com/codebase-digest/codebase-digest
+Project-URL: Issues, https://github.com/codebase-digest/codebase-digest/issues
+Keywords: code-analysis,ast,static-analysis,documentation,ai,developer-tools
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Software Development :: Documentation
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Software Development :: Quality Assurance
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: typer>=0.9.0
+Requires-Dist: networkx>=3.0
+Requires-Dist: rich>=13.0.0
+Requires-Dist: jinja2>=3.1.0
+Requires-Dist: pathspec>=0.11.0
+Requires-Dist: pyvis>=0.3.2
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0.0; extra == "dev"
+Requires-Dist: black>=23.0.0; extra == "dev"
+Requires-Dist: isort>=5.12.0; extra == "dev"
+Requires-Dist: mypy>=1.0.0; extra == "dev"
+Requires-Dist: build>=0.10.0; extra == "dev"
+Requires-Dist: twine>=4.0.0; extra == "dev"
+Dynamic: license-file
+# codebase-digest
+🚀 **AI-Native Code Intelligence Engine**
+Transform any codebase into semantic architectural understanding, execution flows, and human-readable engineering reports.
+## 🧱 What It Does
+This is NOT a repo summarizer. This is a code intelligence engine that explains:
+- **What this system does** - Infers project purpose from domain entities
+- **How data flows** - Maps execution paths and call relationships
+- **Where logic lives** - Identifies core components and their responsibilities
+- **What domains exist** - Detects business entities (User, Payment, Wallet, etc.)
+- **What files matter** - Highlights entry points and key modules
+## ✨ Features
+- **🔍 Semantic Analysis**: Extract functions, classes, methods, and imports with full context
+- **📊 Interactive Call Graphs**: Visualize function relationships and execution flows
+- **🏗️ Domain Entity Detection**: Automatically identify core business objects
+- **🔄 Execution Flow Mapping**: Trace request paths through the system
+- **📋 Project README Generation**: Auto-generate documentation for new developers
+- **📈 Multi-format Output**: HTML dashboards + Markdown reports + JSON data + Interactive graphs
+## 🚀 Quick Start
+```bash
+# Install
+pip install codebase-digest
+# Analyze current directory
+codebase-digest build
+# Analyze specific directory
+codebase-digest build /path/to/project
+# Generate with interactive call graph
+codebase-digest build --graph
+# Quick stats
+codebase-digest stats
+# Search for patterns
+codebase-digest query "wallet"
+```
+## 📁 Output Structure
+Generates `.digest/` directory with comprehensive analysis:
+```
+.digest/
+├── README.md          # Project documentation for developers
+├── callgraph.html     # Interactive call graph visualization
+├── report.html        # Comprehensive HTML dashboard
+├── architecture.md    # Technical architecture breakdown
+├── flows.md           # Execution flow documentation
+├── ai-context.md      # AI-optimized context file
+└── entities.json      # Structured analysis data
+```
+## 📊 Example Output
+For a Python financial services project:
+```
+📊 Codebase Statistics
+┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓
+┃ Total Files      ┃ 4      ┃
+┃ Lines of Code    ┃ 189    ┃
+┃ Languages        ┃ Python ┃
+┃ Functions        ┃ 24     ┃
+┃ Classes          ┃ 8      ┃
+┃ Domain Entities  ┃ 7      ┃
+┃ Execution Flows  ┃ 4      ┃
+┃ Complexity Score ┃ 1.8    ┃
+┗━━━━━━━━━━━━━━━━━━┻━━━━━━━━┛
+Graph Stats: 29 nodes, 27 edges, 7 components
+```
+**Generated README.md excerpt:**
+```markdown
+# Project Overview
+This is a financial services application that provides user management,
+payment processing, and digital wallet functionality. The system is built
+with a service-oriented architecture using Python dataclasses for domain
+modeling and separate service layers for business logic.
+## Architecture
+The application follows a layered architecture with clear separation of concerns:
+- **Domain Layer**: Contains core business entities (User, Payment, Wallet)
+- **Service Layer**: Implements business logic (UserService, PaymentService)
+- **Application Layer**: Handles bootstrapping and orchestration
+```
+## 💡 Commands
+```bash
+# Full analysis with all outputs
+codebase-digest build [PATH]
+# Specific formats
+codebase-digest build --format html       # HTML dashboard only
+codebase-digest build --format markdown   # Markdown reports only
+codebase-digest build --format json       # JSON data only
+# Interactive call graph with depth filtering
+codebase-digest build --graph --graph-depth 3
+# Quick metrics and search
+codebase-digest stats [PATH]              # Project statistics
+codebase-digest query "search term" [PATH] # Search patterns
+```
+## 🎯 Key Features
+### 🕸️ Interactive Call Graph
+- **Probabilistic entrypoint detection** - Finds real execution starting points
+- **Noise filtering** - Removes builtin calls and isolated nodes
+- **Depth filtering** - Focus on core execution spine
+- **Professional UI** - GitHub/Linear/Notion inspired design
+### 📝 Smart README Generation
+- **Project type inference** - Detects financial, e-commerce, CMS patterns
+- **Architecture analysis** - Service-oriented vs modular detection
+- **Run instructions** - Inferred from entry points
+- **Future improvements** - Realistic enhancement suggestions
+### 🔍 Semantic Understanding
+- **Symbol-aware analysis** - True function-level relationships
+- **Domain entity detection** - Business object identification
+- **Execution flow mapping** - Startup and runtime sequences
+- **Cross-file analysis** - Import and dependency tracking
+## 🛠️ Tech Stack
+- **Python 3.10+** - Core language
+- **AST parsing** - Deep Python code analysis
+- **NetworkX** - Call graph analysis and visualization
+- **vis.js** - Interactive graph rendering
+- **Typer** - CLI interface
+- **Rich** - Beautiful terminal output
+## 📋 Supported Languages
+- ✅ **Python** - Full AST analysis with call graphs
+- 🚧 **JavaScript/TypeScript** - Parser implemented, integration in progress
+- 🚧 **Java** - Planned
+- 🚧 **Go** - Planned
+## 🎯 Use Cases
+- **New Developer Onboarding** - Understand unfamiliar codebases quickly
+- **Code Reviews** - Architectural overview and impact analysis
+- **Documentation Generation** - Auto-generate project documentation
+- **Refactoring Planning** - Identify core components and dependencies
+- **AI-Assisted Development** - Provide context for LLM code assistance
+## 🔧 Development
+```bash
+# Install development dependencies
+pip install -e ".[dev]"
+# Run tests
+pytest
+# Format code
+black .
+isort .
+# Type checking
+mypy codebase_digest/
+```
+## 🤝 Contributing
+1. Fork the repository
+2. Create a feature branch (`git checkout -b feature/amazing-feature`)
+3. Make your changes
+4. Add tests if applicable
+5. Submit a pull request
+## 📄 License
+MIT License - see LICENSE file for details.
+## 🙏 Acknowledgments
+- Built with modern Python tooling and best practices
+- Inspired by professional developer tools (JetBrains, Sourcegraph)
+- Designed for AI-native development workflows