PyPI - shannon-codebase-insight - Versions diffs - 0.4.0__py3-none-any.whl - Mend

shannon-codebase-insight 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

shannon_codebase_insight-0.4.0.dist-info/METADATA +209 -0
shannon_codebase_insight-0.4.0.dist-info/RECORD +37 -0
shannon_codebase_insight-0.4.0.dist-info/WHEEL +5 -0
shannon_codebase_insight-0.4.0.dist-info/entry_points.txt +7 -0
shannon_codebase_insight-0.4.0.dist-info/licenses/LICENSE +21 -0
shannon_codebase_insight-0.4.0.dist-info/top_level.txt +1 -0
shannon_insight/__init__.py +25 -0
shannon_insight/analyzers/__init__.py +8 -0
shannon_insight/analyzers/base.py +215 -0
shannon_insight/analyzers/go_analyzer.py +150 -0
shannon_insight/analyzers/python_analyzer.py +169 -0
shannon_insight/analyzers/typescript_analyzer.py +162 -0
shannon_insight/cache.py +214 -0
shannon_insight/cli.py +333 -0
shannon_insight/config.py +235 -0
shannon_insight/core.py +546 -0
shannon_insight/exceptions/__init__.py +31 -0
shannon_insight/exceptions/analysis.py +78 -0
shannon_insight/exceptions/base.py +18 -0
shannon_insight/exceptions/config.py +48 -0
shannon_insight/file_ops.py +218 -0
shannon_insight/logging_config.py +98 -0
shannon_insight/math/__init__.py +15 -0
shannon_insight/math/entropy.py +133 -0
shannon_insight/math/fusion.py +109 -0
shannon_insight/math/graph.py +209 -0
shannon_insight/math/robust.py +106 -0
shannon_insight/math/statistics.py +159 -0
shannon_insight/models.py +48 -0
shannon_insight/primitives/__init__.py +13 -0
shannon_insight/primitives/detector.py +318 -0
shannon_insight/primitives/extractor.py +278 -0
shannon_insight/primitives/fusion.py +373 -0
shannon_insight/primitives/recommendations.py +158 -0
shannon_insight/py.typed +2 -0
shannon_insight/security.py +284 -0
shannon_insight/utils/__init__.py +1 -0

shannon_insight/analyzers/python_analyzer.py ADDED Viewed

@@ -0,0 +1,169 @@
+"""Python language analyzer"""
+import re
+from pathlib import Path
+from collections import Counter
+from typing import List, Optional
+from .base import BaseScanner
+from ..models import FileMetrics
+from ..config import AnalysisSettings
+from ..exceptions import FileAccessError
+from ..logging_config import get_logger
+logger = get_logger(__name__)
+class PythonScanner(BaseScanner):
+    """Scanner optimized for Python codebases"""
+    def __init__(self, root_dir: str, settings: Optional[AnalysisSettings] = None):
+        super().__init__(root_dir, extensions=[".py"], settings=settings)
+    def _should_skip(self, filepath: Path) -> bool:
+        """Skip test files, venv, and other non-project directories"""
+        path_str = str(filepath)
+        skip_dirs = (
+            "venv", ".venv", "__pycache__", ".git", ".tox",
+            ".mypy_cache", ".pytest_cache", "node_modules",
+            "dist", "build", ".eggs", "*.egg-info",
+        )
+        skip_files = ("setup.py", "conftest.py")
+        name = filepath.name
+        return (
+            any(d in path_str for d in skip_dirs)
+            or name in skip_files
+            or name.startswith("test_")
+            or name.endswith("_test.py")
+        )
+    def _analyze_file(self, filepath: Path) -> FileMetrics:
+        """Extract all metrics from a Python file"""
+        try:
+            with open(filepath, "r", encoding="utf-8", errors="replace") as f:
+                content = f.read()
+        except OSError as e:
+            raise FileAccessError(filepath, f"Cannot read file: {e}")
+        except Exception as e:
+            raise FileAccessError(filepath, f"Unexpected error: {e}")
+        lines = content.split("\n")
+        return FileMetrics(
+            path=str(filepath.relative_to(self.root_dir)),
+            lines=len(lines),
+            tokens=self._count_tokens(content),
+            imports=self._extract_imports(content),
+            exports=self._extract_exports(content),
+            functions=self._count_functions(content),
+            interfaces=0,  # Python doesn't have interfaces
+            structs=self._count_classes(content),
+            complexity_score=self._estimate_complexity(content),
+            nesting_depth=self._max_nesting_depth_python(content),
+            ast_node_types=self._extract_ast_node_types(content),
+            last_modified=filepath.stat().st_mtime,
+        )
+    def _count_tokens(self, content: str) -> int:
+        """Approximate token count for Python"""
+        # Remove comments and strings
+        content = re.sub(r"#.*", "", content)
+        content = re.sub(r'""".*?"""', "", content, flags=re.DOTALL)
+        content = re.sub(r"'''.*?'''", "", content, flags=re.DOTALL)
+        content = re.sub(r'"[^"]*"', "", content)
+        content = re.sub(r"'[^']*'", "", content)
+        tokens = re.findall(r"\w+|[{}()\[\];,.:@]", content)
+        return len(tokens)
+    def _extract_imports(self, content: str) -> List[str]:
+        """Extract Python import statements"""
+        imports = []
+        # import X
+        for match in re.finditer(r"^import\s+(\S+)", content, re.MULTILINE):
+            imports.append(match.group(1))
+        # from X import Y
+        for match in re.finditer(r"^from\s+(\S+)\s+import", content, re.MULTILINE):
+            imports.append(match.group(1))
+        return imports
+    def _extract_exports(self, content: str) -> List[str]:
+        """Extract public identifiers (no leading underscore)"""
+        exports = []
+        # Public functions: def name(
+        exports.extend(
+            re.findall(r"^def\s+([a-zA-Z]\w*)\s*\(", content, re.MULTILINE)
+        )
+        # Public classes: class Name
+        exports.extend(
+            re.findall(r"^class\s+([a-zA-Z]\w*)", content, re.MULTILINE)
+        )
+        # __all__ list
+        match = re.search(r"__all__\s*=\s*\[([^\]]+)\]", content, re.DOTALL)
+        if match:
+            items = re.findall(r'["\'](\w+)["\']', match.group(1))
+            exports.extend(items)
+        return exports
+    def _count_functions(self, content: str) -> int:
+        """Count function and method definitions"""
+        return len(re.findall(r"^\s*def\s+\w+\s*\(", content, re.MULTILINE))
+    def _count_classes(self, content: str) -> int:
+        """Count class definitions"""
+        return len(re.findall(r"^class\s+\w+", content, re.MULTILINE))
+    def _estimate_complexity(self, content: str) -> float:
+        """Estimate cyclomatic complexity for Python"""
+        complexity = 1
+        complexity += len(re.findall(r"\bif\s+", content))
+        complexity += len(re.findall(r"\belif\s+", content))
+        complexity += len(re.findall(r"\belse\s*:", content))
+        complexity += len(re.findall(r"\bfor\s+", content))
+        complexity += len(re.findall(r"\bwhile\s+", content))
+        complexity += len(re.findall(r"\bexcept\s*", content))
+        complexity += len(re.findall(r"\band\b", content))
+        complexity += len(re.findall(r"\bor\b", content))
+        complexity += len(re.findall(r"\bwith\s+", content))
+        return complexity
+    def _max_nesting_depth_python(self, content: str) -> int:
+        """Calculate max indentation depth for Python (indent-based)"""
+        max_depth = 0
+        for line in content.split("\n"):
+            stripped = line.lstrip()
+            if not stripped or stripped.startswith("#"):
+                continue
+            indent = len(line) - len(stripped)
+            # Python standard is 4 spaces per level
+            depth = indent // 4
+            max_depth = max(max_depth, depth)
+        return max_depth
+    def _extract_ast_node_types(self, content: str) -> Counter:
+        """Extract distribution of AST node types for Python"""
+        node_types = Counter()
+        node_types["function"] = self._count_functions(content)
+        node_types["class"] = self._count_classes(content)
+        node_types["import"] = len(self._extract_imports(content))
+        node_types["export"] = len(self._extract_exports(content))
+        node_types["if"] = len(re.findall(r"\bif\s+", content))
+        node_types["for"] = len(re.findall(r"\bfor\s+", content))
+        node_types["while"] = len(re.findall(r"\bwhile\s+", content))
+        node_types["return"] = len(re.findall(r"\breturn\b", content))
+        node_types["yield"] = len(re.findall(r"\byield\b", content))
+        node_types["with"] = len(re.findall(r"\bwith\s+", content))
+        node_types["try"] = len(re.findall(r"\btry\s*:", content))
+        node_types["decorator"] = len(re.findall(r"^\s*@\w+", content, re.MULTILINE))
+        return node_types

shannon_insight/analyzers/typescript_analyzer.py ADDED Viewed

@@ -0,0 +1,162 @@
+"""TypeScript/React analyzer"""
+import re
+from pathlib import Path
+from collections import Counter
+from typing import List, Optional
+from .base import BaseScanner
+from ..models import FileMetrics
+from ..config import AnalysisSettings
+from ..exceptions import FileAccessError
+from ..logging_config import get_logger
+logger = get_logger(__name__)
+class TypeScriptScanner(BaseScanner):
+    """Scanner optimized for TypeScript and React codebases"""
+    def __init__(self, root_dir: str, settings: Optional[AnalysisSettings] = None):
+        super().__init__(root_dir, extensions=[".ts", ".tsx", ".js", ".jsx"], settings=settings)
+    def _should_skip(self, filepath: Path) -> bool:
+        """Skip node_modules, dist, venv, and other non-project directories"""
+        path_str = str(filepath)
+        skip_dirs = ("node_modules", "dist", "build", "venv", ".venv", "__pycache__", ".git", ".tox", ".mypy_cache")
+        return any(d in path_str for d in skip_dirs)
+    def _analyze_file(self, filepath: Path) -> FileMetrics:
+        """Extract all metrics from a TypeScript/React file"""
+        try:
+            with open(filepath, "r", encoding="utf-8", errors="replace") as f:
+                content = f.read()
+        except OSError as e:
+            raise FileAccessError(filepath, f"Cannot read file: {e}")
+        except Exception as e:
+            raise FileAccessError(filepath, f"Unexpected error: {e}")
+        lines = content.split("\n")
+        return FileMetrics(
+            path=str(filepath.relative_to(self.root_dir)),
+            lines=len(lines),
+            tokens=self._count_tokens(content),
+            imports=self._extract_imports(content),
+            exports=self._extract_exports(content),
+            functions=self._count_functions(content),
+            interfaces=self._count_classes(content),  # Use classes for TypeScript
+            structs=self._count_react_components(content),  # Repurpose for components
+            complexity_score=self._estimate_complexity(content),
+            nesting_depth=self._max_nesting_depth(content),
+            ast_node_types=self._extract_ast_node_types(content),
+            last_modified=filepath.stat().st_mtime,
+        )
+    def _count_tokens(self, content: str) -> int:
+        """Approximate token count for TypeScript"""
+        # Remove comments and strings
+        content = re.sub(r"//.*", "", content)
+        content = re.sub(r"/\*.*?\*/", "", content, flags=re.DOTALL)
+        content = re.sub(r'["\'].*?["\']', "", content)
+        # Split on whitespace and common operators
+        tokens = re.findall(r"\w+|[{}()\[\];,.]", content)
+        return len(tokens)
+    def _extract_imports(self, content: str) -> List[str]:
+        """Extract import statements"""
+        imports = []
+        # Match: import X from 'Y'
+        for match in re.finditer(
+            r'import\s+.*?\s+from\s+["\']([^"\']+)["\']', content
+        ):
+            imports.append(match.group(1))
+        # Match: import 'Y'
+        for match in re.finditer(r'import\s+["\']([^"\']+)["\']', content):
+            imports.append(match.group(1))
+        return imports
+    def _extract_exports(self, content: str) -> List[str]:
+        """Extract exported identifiers"""
+        exports = []
+        # export const/function/class X
+        exports.extend(
+            re.findall(r"export\s+(?:const|function|class)\s+(\w+)", content)
+        )
+        # export { X, Y }
+        for match in re.finditer(r"export\s+\{([^}]+)\}", content):
+            items = match.group(1).split(",")
+            exports.extend([item.strip().split()[0] for item in items])
+        return exports
+    def _count_functions(self, content: str) -> int:
+        """Count function declarations"""
+        # function X(), const X = () =>, const X = function()
+        count = len(re.findall(r"\bfunction\s+\w+", content))
+        count += len(
+            re.findall(
+                r"const\s+\w+\s*=\s*(?:\([^)]*\)|[a-zA-Z_]\w*)\s*=>", content
+            )
+        )
+        count += len(re.findall(r"const\s+\w+\s*=\s*function", content))
+        return count
+    def _count_classes(self, content: str) -> int:
+        """Count class declarations"""
+        return len(re.findall(r"\bclass\s+\w+", content))
+    def _count_react_components(self, content: str) -> int:
+        """Count React component definitions"""
+        # Function components: const X: React.FC, function X() { return <
+        count = len(re.findall(r"const\s+[A-Z]\w+\s*:\s*React\.FC", content))
+        count += len(
+            re.findall(r"function\s+[A-Z]\w+.*?return\s*\(?\s*<", content, re.DOTALL)
+        )
+        return count
+    def _count_react_hooks(self, content: str) -> int:
+        """Count React hook usages"""
+        # useState, useEffect, useCallback, etc.
+        return len(re.findall(r"\buse[A-Z]\w+\s*\(", content))
+    def _estimate_complexity(self, content: str) -> float:
+        """Estimate cyclomatic complexity for TypeScript"""
+        # Count decision points: if, else, case, while, for, &&, ||, ?
+        complexity = 1  # Base complexity
+        complexity += len(re.findall(r"\bif\s*\(", content))
+        complexity += len(re.findall(r"\belse\b", content))
+        complexity += len(re.findall(r"\bcase\s+", content))
+        complexity += len(re.findall(r"\bwhile\s*\(", content))
+        complexity += len(re.findall(r"\bfor\s*\(", content))
+        complexity += len(re.findall(r"&&", content))
+        complexity += len(re.findall(r"\|\|", content))
+        complexity += len(re.findall(r"\?", content))
+        return complexity
+    def _extract_ast_node_types(self, content: str) -> Counter:
+        """Extract distribution of AST node types for TypeScript"""
+        node_types = Counter()
+        # TypeScript/React-specific node types
+        node_types["function"] = self._count_functions(content)
+        node_types["class"] = self._count_classes(content)
+        node_types["component"] = self._count_react_components(content)
+        node_types["hook"] = self._count_react_hooks(content)
+        node_types["import"] = len(self._extract_imports(content))
+        node_types["export"] = len(self._extract_exports(content))
+        node_types["if"] = len(re.findall(r"\bif\s*\(", content))
+        node_types["for"] = len(re.findall(r"\bfor\s*\(", content))
+        node_types["while"] = len(re.findall(r"\bwhile\s*\(", content))
+        node_types["return"] = len(re.findall(r"\breturn\b", content))
+        node_types["jsx"] = len(re.findall(r"<[A-Z]\w+", content))
+        return node_types

shannon_insight/cache.py ADDED Viewed

@@ -0,0 +1,214 @@
+"""
+Caching system for Shannon Insight.
+Uses diskcache for SQLite-based persistent caching.
+"""
+import hashlib
+import json
+from functools import wraps
+from pathlib import Path
+from typing import Any, Callable, Optional
+from diskcache import Cache
+from .logging_config import get_logger
+logger = get_logger(__name__)
+class AnalysisCache:
+    """
+    SQLite-based cache for analysis results.
+    Features:
+    - Automatic cache key generation from file metadata
+    - TTL-based expiration
+    - Thread-safe operations
+    - Efficient disk storage
+    """
+    def __init__(
+        self,
+        cache_dir: str = ".shannon-cache",
+        ttl_hours: int = 24,
+        enabled: bool = True
+    ):
+        """
+        Initialize cache.
+        Args:
+            cache_dir: Directory for cache storage
+            ttl_hours: Time-to-live in hours
+            enabled: Whether caching is enabled
+        """
+        self.enabled = enabled
+        self.ttl_seconds = ttl_hours * 3600
+        if self.enabled:
+            self.cache = Cache(cache_dir)
+            logger.debug(f"Cache initialized at {cache_dir} with TTL={ttl_hours}h")
+        else:
+            self.cache = None
+            logger.debug("Cache disabled")
+    def _get_file_key(self, filepath: Path, config_hash: str) -> str:
+        """
+        Generate cache key from file metadata and configuration.
+        The key is based on:
+        - File path
+        - File modification time
+        - File size
+        - Configuration hash
+        Args:
+            filepath: File path
+            config_hash: Hash of configuration settings
+        Returns:
+            Cache key string
+        """
+        try:
+            stat = filepath.stat()
+            key_data = f"{filepath}:{stat.st_mtime}:{stat.st_size}:{config_hash}"
+            return hashlib.sha256(key_data.encode()).hexdigest()
+        except OSError:
+            # If we can't stat the file, generate key from path only
+            key_data = f"{filepath}:{config_hash}"
+            return hashlib.sha256(key_data.encode()).hexdigest()
+    def get(self, key: str) -> Optional[Any]:
+        """
+        Get value from cache.
+        Args:
+            key: Cache key
+        Returns:
+            Cached value or None if not found/expired
+        """
+        if not self.enabled or self.cache is None:
+            return None
+        try:
+            value = self.cache.get(key)
+            if value is not None:
+                logger.debug(f"Cache hit: {key[:16]}...")
+            return value
+        except Exception as e:
+            logger.warning(f"Cache get failed: {e}")
+            return None
+    def set(self, key: str, value: Any) -> None:
+        """
+        Set value in cache.
+        Args:
+            key: Cache key
+            value: Value to cache
+        """
+        if not self.enabled or self.cache is None:
+            return
+        try:
+            self.cache.set(key, value, expire=self.ttl_seconds)
+            logger.debug(f"Cache set: {key[:16]}...")
+        except Exception as e:
+            logger.warning(f"Cache set failed: {e}")
+    def clear(self) -> None:
+        """Clear all cache entries."""
+        if not self.enabled or self.cache is None:
+            return
+        try:
+            self.cache.clear()
+            logger.info("Cache cleared")
+        except Exception as e:
+            logger.warning(f"Cache clear failed: {e}")
+    def stats(self) -> dict:
+        """
+        Get cache statistics.
+        Returns:
+            Dictionary with cache stats
+        """
+        if not self.enabled or self.cache is None:
+            return {"enabled": False}
+        try:
+            return {
+                "enabled": True,
+                "size": len(self.cache),
+                "directory": self.cache.directory,
+                "volume": self.cache.volume()
+            }
+        except Exception as e:
+            logger.warning(f"Cache stats failed: {e}")
+            return {"enabled": True, "error": str(e)}
+    def memoize(
+        self,
+        config_hash: Optional[str] = None
+    ) -> Callable:
+        """
+        Decorator for caching function results.
+        Usage:
+            cache = AnalysisCache()
+            @cache.memoize(config_hash="abc123")
+            def analyze_file(filepath: Path) -> FileMetrics:
+                # Expensive operation
+                return metrics
+        Args:
+            config_hash: Hash of configuration (for cache invalidation)
+        Returns:
+            Decorator function
+        """
+        def decorator(func: Callable) -> Callable:
+            @wraps(func)
+            def wrapper(filepath: Path, *args, **kwargs):
+                if not self.enabled or self.cache is None:
+                    return func(filepath, *args, **kwargs)
+                # Generate cache key
+                cfg_hash = config_hash or kwargs.get('config_hash', '')
+                key = self._get_file_key(filepath, cfg_hash)
+                # Try cache first
+                cached_result = self.get(key)
+                if cached_result is not None:
+                    return cached_result
+                # Compute and cache
+                result = func(filepath, *args, **kwargs)
+                self.set(key, result)
+                return result
+            return wrapper
+        return decorator
+    def close(self) -> None:
+        """Close cache (cleanup)."""
+        if self.cache is not None:
+            self.cache.close()
+def compute_config_hash(config: dict) -> str:
+    """
+    Compute hash of configuration for cache invalidation.
+    Args:
+        config: Configuration dictionary
+    Returns:
+        SHA256 hash of configuration
+    """
+    # Sort keys for consistent hashing
+    config_str = json.dumps(config, sort_keys=True)
+    return hashlib.sha256(config_str.encode()).hexdigest()[:16]