PyPI - codebase-digest-ai - Versions diffs - 0.1.1__py3-none-any.whl - Mend

codebase-digest-ai 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

codebase_digest/__init__.py +8 -0
codebase_digest/analyzer/__init__.py +7 -0
codebase_digest/analyzer/codebase_analyzer.py +183 -0
codebase_digest/analyzer/flow_analyzer.py +164 -0
codebase_digest/analyzer/metrics_analyzer.py +130 -0
codebase_digest/cli/__init__.py +1 -0
codebase_digest/cli/main.py +284 -0
codebase_digest/exporters/__init__.py +9 -0
codebase_digest/exporters/graph_exporter.py +1038 -0
codebase_digest/exporters/html_exporter.py +1052 -0
codebase_digest/exporters/json_exporter.py +105 -0
codebase_digest/exporters/markdown_exporter.py +273 -0
codebase_digest/exporters/readme_exporter.py +306 -0
codebase_digest/models.py +81 -0
codebase_digest/parser/__init__.py +7 -0
codebase_digest/parser/base.py +41 -0
codebase_digest/parser/javascript_parser.py +36 -0
codebase_digest/parser/python_parser.py +270 -0
codebase_digest_ai-0.1.1.dist-info/METADATA +233 -0
codebase_digest_ai-0.1.1.dist-info/RECORD +24 -0
codebase_digest_ai-0.1.1.dist-info/WHEEL +5 -0
codebase_digest_ai-0.1.1.dist-info/entry_points.txt +2 -0
codebase_digest_ai-0.1.1.dist-info/licenses/LICENSE +21 -0
codebase_digest_ai-0.1.1.dist-info/top_level.txt +1 -0

codebase_digest/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""
+Codebase Digest - AI-native code intelligence engine.
+Transform any codebase into semantic architectural understanding,
+execution flows, and human-readable engineering reports.
+"""
+__version__ = "0.1.0"

codebase_digest/analyzer/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""Analysis modules for codebase intelligence."""
+from .codebase_analyzer import CodebaseAnalyzer
+from .flow_analyzer import FlowAnalyzer
+from .metrics_analyzer import MetricsAnalyzer
+__all__ = ["CodebaseAnalyzer", "FlowAnalyzer", "MetricsAnalyzer"]

codebase_digest/analyzer/codebase_analyzer.py ADDED Viewed

@@ -0,0 +1,183 @@
+"""Main codebase analyzer that orchestrates parsing and analysis."""
+import os
+from pathlib import Path
+from typing import Dict, List, Set
+from ..models import CodebaseAnalysis
+from ..parser import PythonParser, JavaScriptParser, BaseParser
+from .flow_analyzer import FlowAnalyzer
+from .metrics_analyzer import MetricsAnalyzer
+class CodebaseAnalyzer:
+    """Main analyzer that coordinates parsing and analysis of a codebase."""
+    def __init__(self, root_path: Path):
+        self.root_path = Path(root_path)
+        self.parsers: Dict[str, BaseParser] = {}
+        self._register_parsers()
+        # Ignore patterns
+        self.ignore_patterns = {
+            '__pycache__', '.git', '.svn', '.hg', 'node_modules',
+            '.pytest_cache', '.mypy_cache', '.tox', 'venv', 'env',
+            '.venv', 'dist', 'build', '*.egg-info', '.DS_Store'
+        }
+    def _register_parsers(self):
+        """Register available parsers."""
+        # Register parsers by extension without instantiating
+        python_extensions = ['.py']
+        js_extensions = ['.js', '.jsx', '.ts', '.tsx']
+        for ext in python_extensions:
+            self.parsers[ext] = PythonParser
+        for ext in js_extensions:
+            self.parsers[ext] = JavaScriptParser
+    def analyze(self) -> CodebaseAnalysis:
+        """Perform complete codebase analysis."""
+        analysis = CodebaseAnalysis(root_path=self.root_path)
+        # Find all relevant files
+        files = self._find_source_files()
+        analysis.total_files = len(files)
+        # Parse each file
+        for file_path in files:
+            self._parse_file(file_path, analysis)
+        # Detect entry points
+        analysis.entry_points = self._detect_entry_points(files)
+        # Analyze execution flows
+        flow_analyzer = FlowAnalyzer(analysis)
+        analysis.execution_flows = flow_analyzer.analyze_flows()
+        # Calculate metrics
+        metrics_analyzer = MetricsAnalyzer(analysis)
+        analysis.total_lines = metrics_analyzer.count_total_lines(files)
+        analysis.languages = metrics_analyzer.detect_languages(files)
+        analysis.complexity_score = metrics_analyzer.calculate_complexity()
+        # Build directory tree
+        analysis.directory_tree = self._build_directory_tree()
+        return analysis
+    def _find_source_files(self) -> List[Path]:
+        """Find all source files in the codebase."""
+        files = []
+        for root, dirs, filenames in os.walk(self.root_path):
+            # Filter out ignored directories
+            dirs[:] = [d for d in dirs if not self._should_ignore(d)]
+            for filename in filenames:
+                file_path = Path(root) / filename
+                if self._is_source_file(file_path) and not self._should_ignore(filename):
+                    files.append(file_path)
+        return files
+    def _is_source_file(self, file_path: Path) -> bool:
+        """Check if file is a source code file."""
+        return file_path.suffix in self.parsers
+    def _should_ignore(self, name: str) -> bool:
+        """Check if file/directory should be ignored."""
+        for pattern in self.ignore_patterns:
+            if pattern.startswith('*'):
+                if name.endswith(pattern[1:]):
+                    return True
+            elif name == pattern or name.startswith(pattern):
+                return True
+        return False
+    def _parse_file(self, file_path: Path, analysis: CodebaseAnalysis):
+        """Parse a single file and add results to analysis."""
+        parser_class = self.parsers.get(file_path.suffix)
+        if not parser_class:
+            return
+        try:
+            parser = parser_class(file_path)
+            # Parse symbols
+            symbols = parser.parse_symbols()
+            analysis.symbols.extend(symbols)
+            # Parse imports
+            imports = parser.parse_imports()
+            analysis.imports.extend(imports)
+            # Parse calls
+            calls = parser.parse_calls()
+            analysis.call_relations.extend(calls)
+            # Parse domain entities
+            entities = parser.parse_domain_entities()
+            analysis.domain_entities.extend(entities)
+        except Exception as e:
+            # Log error but continue processing
+            print(f"Error parsing {file_path}: {e}")
+    def _detect_entry_points(self, files: List[Path]) -> List[Path]:
+        """Detect likely entry points in the codebase."""
+        entry_points = []
+        # Common entry point patterns
+        entry_patterns = [
+            'main.py', 'app.py', 'server.py', 'run.py', 'start.py',
+            'manage.py', '__main__.py', 'wsgi.py', 'asgi.py',
+            'index.js', 'server.js', 'app.js', 'main.js'
+        ]
+        for file_path in files:
+            if file_path.name in entry_patterns:
+                entry_points.append(file_path)
+            elif file_path.name == '__init__.py':
+                # Check if it's a package entry point
+                if self._is_package_entry_point(file_path):
+                    entry_points.append(file_path)
+        return entry_points
+    def _is_package_entry_point(self, init_file: Path) -> bool:
+        """Check if __init__.py file is a package entry point."""
+        try:
+            content = init_file.read_text(encoding='utf-8')
+            # Simple heuristic: contains main execution logic
+            return 'if __name__' in content or 'main(' in content
+        except:
+            return False
+    def _build_directory_tree(self) -> Dict:
+        """Build a directory tree structure."""
+        tree = {}
+        for root, dirs, files in os.walk(self.root_path):
+            # Filter ignored directories
+            dirs[:] = [d for d in dirs if not self._should_ignore(d)]
+            rel_path = Path(root).relative_to(self.root_path)
+            # Build nested structure
+            current = tree
+            for part in rel_path.parts:
+                if part not in current:
+                    current[part] = {}
+                current = current[part]
+            # Add files
+            source_files = [f for f in files
+                          if self._is_source_file(Path(root) / f)
+                          and not self._should_ignore(f)]
+            if source_files:
+                current['_files'] = source_files
+        return tree

codebase_digest/analyzer/flow_analyzer.py ADDED Viewed

@@ -0,0 +1,164 @@
+"""Execution flow analysis."""
+import networkx as nx
+from typing import List, Set, Dict
+from pathlib import Path
+from ..models import CodebaseAnalysis, ExecutionFlow, CallRelation
+class FlowAnalyzer:
+    """Analyzes execution flows through the codebase."""
+    def __init__(self, analysis: CodebaseAnalysis):
+        self.analysis = analysis
+        self.call_graph = self._build_call_graph()
+    def _build_call_graph(self) -> nx.DiGraph:
+        """Build a directed graph of function calls."""
+        graph = nx.DiGraph()
+        for call in self.analysis.call_relations:
+            caller_name = call.caller_symbol.name
+            graph.add_edge(caller_name, call.callee_name,
+                          caller_file=call.caller_symbol.file_path,
+                          callee_file=call.callee_file,
+                          line_number=call.line_number)
+        return graph
+    def analyze_flows(self) -> List[ExecutionFlow]:
+        """Analyze execution flows starting from entry points."""
+        flows = []
+        # Find entry point functions
+        entry_functions = self._find_entry_functions()
+        for entry_func in entry_functions:
+            flow = self._trace_execution_flow(entry_func)
+            if flow:
+                flows.append(flow)
+        # Detect common patterns
+        flows.extend(self._detect_common_patterns())
+        return flows
+    def _find_entry_functions(self) -> List[str]:
+        """Find functions that are likely entry points."""
+        entry_functions = []
+        # Look for main functions
+        for symbol in self.analysis.symbols:
+            if symbol.name in ['main', '__main__', 'app', 'run', 'start']:
+                entry_functions.append(symbol.name)
+        # Look for HTTP route handlers
+        for symbol in self.analysis.symbols:
+            if any(decorator in ['@app.route', '@router.get', '@router.post',
+                               '@api.route', '@bp.route']
+                   for decorator in symbol.decorators):
+                entry_functions.append(symbol.name)
+        return entry_functions
+    def _trace_execution_flow(self, entry_function: str) -> ExecutionFlow:
+        """Trace execution flow from an entry function."""
+        if entry_function not in self.call_graph:
+            return None
+        # Use DFS to trace the flow
+        visited = set()
+        flow_steps = []
+        files_involved = set()
+        def dfs(func_name: str, depth: int = 0):
+            if depth > 10 or func_name in visited:  # Prevent infinite recursion
+                return
+            visited.add(func_name)
+            flow_steps.append(func_name)
+            # Add file information
+            for call in self.analysis.call_relations:
+                if call.caller_symbol.name == func_name:
+                    files_involved.add(call.caller_symbol.file_path)
+                    if call.callee_file:
+                        files_involved.add(call.callee_file)
+            # Continue tracing
+            if func_name in self.call_graph:
+                for successor in self.call_graph.successors(func_name):
+                    dfs(successor, depth + 1)
+        dfs(entry_function)
+        if len(flow_steps) > 1:
+            return ExecutionFlow(
+                name=f"{entry_function}_flow",
+                entry_point=entry_function,
+                steps=flow_steps,
+                files_involved=files_involved,
+                description=f"Execution flow starting from {entry_function}"
+            )
+        return None
+    def _detect_common_patterns(self) -> List[ExecutionFlow]:
+        """Detect common execution patterns."""
+        patterns = []
+        # Detect CRUD patterns
+        crud_flow = self._detect_crud_pattern()
+        if crud_flow:
+            patterns.append(crud_flow)
+        # Detect authentication patterns
+        auth_flow = self._detect_auth_pattern()
+        if auth_flow:
+            patterns.append(auth_flow)
+        return patterns
+    def _detect_crud_pattern(self) -> ExecutionFlow:
+        """Detect CRUD (Create, Read, Update, Delete) patterns."""
+        crud_functions = []
+        for symbol in self.analysis.symbols:
+            name_lower = symbol.name.lower()
+            if any(crud_word in name_lower for crud_word in
+                   ['create', 'read', 'get', 'update', 'delete', 'save', 'find']):
+                crud_functions.append(symbol.name)
+        if len(crud_functions) >= 3:  # At least 3 CRUD operations
+            return ExecutionFlow(
+                name="crud_operations",
+                entry_point="CRUD Operations",
+                steps=crud_functions,
+                files_involved=set(),
+                description="CRUD operations detected in the codebase"
+            )
+        return None
+    def _detect_auth_pattern(self) -> ExecutionFlow:
+        """Detect authentication/authorization patterns."""
+        auth_functions = []
+        for symbol in self.analysis.symbols:
+            name_lower = symbol.name.lower()
+            if any(auth_word in name_lower for auth_word in
+                   ['login', 'logout', 'authenticate', 'authorize', 'verify',
+                    'validate', 'token', 'session', 'permission']):
+                auth_functions.append(symbol.name)
+        if len(auth_functions) >= 2:
+            return ExecutionFlow(
+                name="authentication_flow",
+                entry_point="Authentication System",
+                steps=auth_functions,
+                files_involved=set(),
+                description="Authentication and authorization flow"
+            )
+        return None

codebase_digest/analyzer/metrics_analyzer.py ADDED Viewed

@@ -0,0 +1,130 @@
+"""Metrics and statistics analyzer."""
+from pathlib import Path
+from typing import List, Set, Dict
+from ..models import CodebaseAnalysis
+class MetricsAnalyzer:
+    """Analyzes codebase metrics and statistics."""
+    def __init__(self, analysis: CodebaseAnalysis):
+        self.analysis = analysis
+    def count_total_lines(self, files: List[Path]) -> int:
+        """Count total lines of code."""
+        total_lines = 0
+        for file_path in files:
+            try:
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    total_lines += sum(1 for line in f if line.strip())
+            except:
+                continue
+        return total_lines
+    def detect_languages(self, files: List[Path]) -> Set[str]:
+        """Detect programming languages in the codebase."""
+        languages = set()
+        language_map = {
+            '.py': 'Python',
+            '.js': 'JavaScript',
+            '.jsx': 'JavaScript',
+            '.ts': 'TypeScript',
+            '.tsx': 'TypeScript',
+            '.java': 'Java',
+            '.cpp': 'C++',
+            '.c': 'C',
+            '.cs': 'C#',
+            '.go': 'Go',
+            '.rs': 'Rust',
+            '.php': 'PHP',
+            '.rb': 'Ruby',
+            '.swift': 'Swift',
+            '.kt': 'Kotlin',
+            '.scala': 'Scala'
+        }
+        for file_path in files:
+            lang = language_map.get(file_path.suffix)
+            if lang:
+                languages.add(lang)
+        return languages
+    def calculate_complexity(self) -> float:
+        """Calculate a simple complexity score."""
+        if not self.analysis.symbols:
+            return 0.0
+        # Simple complexity based on:
+        # - Number of functions/classes
+        # - Number of call relationships
+        # - Depth of call chains
+        symbol_count = len(self.analysis.symbols)
+        call_count = len(self.analysis.call_relations)
+        # Normalize to 0-100 scale
+        base_complexity = min(100, (symbol_count + call_count) / 10)
+        # Adjust for call chain depth
+        max_chain_depth = self._calculate_max_call_depth()
+        depth_factor = min(2.0, max_chain_depth / 5)
+        return min(100.0, base_complexity * depth_factor)
+    def _calculate_max_call_depth(self) -> int:
+        """Calculate the maximum depth of call chains."""
+        # Build a simple call graph
+        call_graph = {}
+        for call in self.analysis.call_relations:
+            caller_name = call.caller_symbol.name
+            if caller_name not in call_graph:
+                call_graph[caller_name] = []
+            call_graph[caller_name].append(call.callee_name)
+        # Find maximum depth using DFS
+        max_depth = 0
+        def dfs(func: str, visited: Set[str], depth: int) -> int:
+            if func in visited or func not in call_graph:
+                return depth
+            visited.add(func)
+            local_max = depth
+            for callee in call_graph[func]:
+                local_max = max(local_max, dfs(callee, visited.copy(), depth + 1))
+            return local_max
+        for func in call_graph:
+            max_depth = max(max_depth, dfs(func, set(), 0))
+        return max_depth
+    def get_file_statistics(self, files: List[Path]) -> Dict[str, int]:
+        """Get detailed file statistics."""
+        stats = {
+            'total_files': len(files),
+            'python_files': 0,
+            'javascript_files': 0,
+            'typescript_files': 0,
+            'other_files': 0
+        }
+        for file_path in files:
+            if file_path.suffix == '.py':
+                stats['python_files'] += 1
+            elif file_path.suffix in ['.js', '.jsx']:
+                stats['javascript_files'] += 1
+            elif file_path.suffix in ['.ts', '.tsx']:
+                stats['typescript_files'] += 1
+            else:
+                stats['other_files'] += 1
+        return stats

codebase_digest/cli/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """CLI module for codebase-digest."""