shannon-codebase-insight 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. shannon_codebase_insight-0.4.0.dist-info/METADATA +209 -0
  2. shannon_codebase_insight-0.4.0.dist-info/RECORD +37 -0
  3. shannon_codebase_insight-0.4.0.dist-info/WHEEL +5 -0
  4. shannon_codebase_insight-0.4.0.dist-info/entry_points.txt +7 -0
  5. shannon_codebase_insight-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. shannon_codebase_insight-0.4.0.dist-info/top_level.txt +1 -0
  7. shannon_insight/__init__.py +25 -0
  8. shannon_insight/analyzers/__init__.py +8 -0
  9. shannon_insight/analyzers/base.py +215 -0
  10. shannon_insight/analyzers/go_analyzer.py +150 -0
  11. shannon_insight/analyzers/python_analyzer.py +169 -0
  12. shannon_insight/analyzers/typescript_analyzer.py +162 -0
  13. shannon_insight/cache.py +214 -0
  14. shannon_insight/cli.py +333 -0
  15. shannon_insight/config.py +235 -0
  16. shannon_insight/core.py +546 -0
  17. shannon_insight/exceptions/__init__.py +31 -0
  18. shannon_insight/exceptions/analysis.py +78 -0
  19. shannon_insight/exceptions/base.py +18 -0
  20. shannon_insight/exceptions/config.py +48 -0
  21. shannon_insight/file_ops.py +218 -0
  22. shannon_insight/logging_config.py +98 -0
  23. shannon_insight/math/__init__.py +15 -0
  24. shannon_insight/math/entropy.py +133 -0
  25. shannon_insight/math/fusion.py +109 -0
  26. shannon_insight/math/graph.py +209 -0
  27. shannon_insight/math/robust.py +106 -0
  28. shannon_insight/math/statistics.py +159 -0
  29. shannon_insight/models.py +48 -0
  30. shannon_insight/primitives/__init__.py +13 -0
  31. shannon_insight/primitives/detector.py +318 -0
  32. shannon_insight/primitives/extractor.py +278 -0
  33. shannon_insight/primitives/fusion.py +373 -0
  34. shannon_insight/primitives/recommendations.py +158 -0
  35. shannon_insight/py.typed +2 -0
  36. shannon_insight/security.py +284 -0
  37. shannon_insight/utils/__init__.py +1 -0
@@ -0,0 +1,169 @@
1
+ """Python language analyzer"""
2
+
3
+ import re
4
+ from pathlib import Path
5
+ from collections import Counter
6
+ from typing import List, Optional
7
+
8
+ from .base import BaseScanner
9
+ from ..models import FileMetrics
10
+ from ..config import AnalysisSettings
11
+ from ..exceptions import FileAccessError
12
+ from ..logging_config import get_logger
13
+
14
+ logger = get_logger(__name__)
15
+
16
+
17
+ class PythonScanner(BaseScanner):
18
+ """Scanner optimized for Python codebases"""
19
+
20
+ def __init__(self, root_dir: str, settings: Optional[AnalysisSettings] = None):
21
+ super().__init__(root_dir, extensions=[".py"], settings=settings)
22
+
23
+ def _should_skip(self, filepath: Path) -> bool:
24
+ """Skip test files, venv, and other non-project directories"""
25
+ path_str = str(filepath)
26
+ skip_dirs = (
27
+ "venv", ".venv", "__pycache__", ".git", ".tox",
28
+ ".mypy_cache", ".pytest_cache", "node_modules",
29
+ "dist", "build", ".eggs", "*.egg-info",
30
+ )
31
+ skip_files = ("setup.py", "conftest.py")
32
+ name = filepath.name
33
+ return (
34
+ any(d in path_str for d in skip_dirs)
35
+ or name in skip_files
36
+ or name.startswith("test_")
37
+ or name.endswith("_test.py")
38
+ )
39
+
40
+ def _analyze_file(self, filepath: Path) -> FileMetrics:
41
+ """Extract all metrics from a Python file"""
42
+ try:
43
+ with open(filepath, "r", encoding="utf-8", errors="replace") as f:
44
+ content = f.read()
45
+ except OSError as e:
46
+ raise FileAccessError(filepath, f"Cannot read file: {e}")
47
+ except Exception as e:
48
+ raise FileAccessError(filepath, f"Unexpected error: {e}")
49
+
50
+ lines = content.split("\n")
51
+
52
+ return FileMetrics(
53
+ path=str(filepath.relative_to(self.root_dir)),
54
+ lines=len(lines),
55
+ tokens=self._count_tokens(content),
56
+ imports=self._extract_imports(content),
57
+ exports=self._extract_exports(content),
58
+ functions=self._count_functions(content),
59
+ interfaces=0, # Python doesn't have interfaces
60
+ structs=self._count_classes(content),
61
+ complexity_score=self._estimate_complexity(content),
62
+ nesting_depth=self._max_nesting_depth_python(content),
63
+ ast_node_types=self._extract_ast_node_types(content),
64
+ last_modified=filepath.stat().st_mtime,
65
+ )
66
+
67
+ def _count_tokens(self, content: str) -> int:
68
+ """Approximate token count for Python"""
69
+ # Remove comments and strings
70
+ content = re.sub(r"#.*", "", content)
71
+ content = re.sub(r'""".*?"""', "", content, flags=re.DOTALL)
72
+ content = re.sub(r"'''.*?'''", "", content, flags=re.DOTALL)
73
+ content = re.sub(r'"[^"]*"', "", content)
74
+ content = re.sub(r"'[^']*'", "", content)
75
+
76
+ tokens = re.findall(r"\w+|[{}()\[\];,.:@]", content)
77
+ return len(tokens)
78
+
79
+ def _extract_imports(self, content: str) -> List[str]:
80
+ """Extract Python import statements"""
81
+ imports = []
82
+
83
+ # import X
84
+ for match in re.finditer(r"^import\s+(\S+)", content, re.MULTILINE):
85
+ imports.append(match.group(1))
86
+
87
+ # from X import Y
88
+ for match in re.finditer(r"^from\s+(\S+)\s+import", content, re.MULTILINE):
89
+ imports.append(match.group(1))
90
+
91
+ return imports
92
+
93
+ def _extract_exports(self, content: str) -> List[str]:
94
+ """Extract public identifiers (no leading underscore)"""
95
+ exports = []
96
+
97
+ # Public functions: def name(
98
+ exports.extend(
99
+ re.findall(r"^def\s+([a-zA-Z]\w*)\s*\(", content, re.MULTILINE)
100
+ )
101
+
102
+ # Public classes: class Name
103
+ exports.extend(
104
+ re.findall(r"^class\s+([a-zA-Z]\w*)", content, re.MULTILINE)
105
+ )
106
+
107
+ # __all__ list
108
+ match = re.search(r"__all__\s*=\s*\[([^\]]+)\]", content, re.DOTALL)
109
+ if match:
110
+ items = re.findall(r'["\'](\w+)["\']', match.group(1))
111
+ exports.extend(items)
112
+
113
+ return exports
114
+
115
+ def _count_functions(self, content: str) -> int:
116
+ """Count function and method definitions"""
117
+ return len(re.findall(r"^\s*def\s+\w+\s*\(", content, re.MULTILINE))
118
+
119
+ def _count_classes(self, content: str) -> int:
120
+ """Count class definitions"""
121
+ return len(re.findall(r"^class\s+\w+", content, re.MULTILINE))
122
+
123
+ def _estimate_complexity(self, content: str) -> float:
124
+ """Estimate cyclomatic complexity for Python"""
125
+ complexity = 1
126
+
127
+ complexity += len(re.findall(r"\bif\s+", content))
128
+ complexity += len(re.findall(r"\belif\s+", content))
129
+ complexity += len(re.findall(r"\belse\s*:", content))
130
+ complexity += len(re.findall(r"\bfor\s+", content))
131
+ complexity += len(re.findall(r"\bwhile\s+", content))
132
+ complexity += len(re.findall(r"\bexcept\s*", content))
133
+ complexity += len(re.findall(r"\band\b", content))
134
+ complexity += len(re.findall(r"\bor\b", content))
135
+ complexity += len(re.findall(r"\bwith\s+", content))
136
+
137
+ return complexity
138
+
139
+ def _max_nesting_depth_python(self, content: str) -> int:
140
+ """Calculate max indentation depth for Python (indent-based)"""
141
+ max_depth = 0
142
+ for line in content.split("\n"):
143
+ stripped = line.lstrip()
144
+ if not stripped or stripped.startswith("#"):
145
+ continue
146
+ indent = len(line) - len(stripped)
147
+ # Python standard is 4 spaces per level
148
+ depth = indent // 4
149
+ max_depth = max(max_depth, depth)
150
+ return max_depth
151
+
152
+ def _extract_ast_node_types(self, content: str) -> Counter:
153
+ """Extract distribution of AST node types for Python"""
154
+ node_types = Counter()
155
+
156
+ node_types["function"] = self._count_functions(content)
157
+ node_types["class"] = self._count_classes(content)
158
+ node_types["import"] = len(self._extract_imports(content))
159
+ node_types["export"] = len(self._extract_exports(content))
160
+ node_types["if"] = len(re.findall(r"\bif\s+", content))
161
+ node_types["for"] = len(re.findall(r"\bfor\s+", content))
162
+ node_types["while"] = len(re.findall(r"\bwhile\s+", content))
163
+ node_types["return"] = len(re.findall(r"\breturn\b", content))
164
+ node_types["yield"] = len(re.findall(r"\byield\b", content))
165
+ node_types["with"] = len(re.findall(r"\bwith\s+", content))
166
+ node_types["try"] = len(re.findall(r"\btry\s*:", content))
167
+ node_types["decorator"] = len(re.findall(r"^\s*@\w+", content, re.MULTILINE))
168
+
169
+ return node_types
@@ -0,0 +1,162 @@
1
+ """TypeScript/React analyzer"""
2
+
3
+ import re
4
+ from pathlib import Path
5
+ from collections import Counter
6
+ from typing import List, Optional
7
+
8
+ from .base import BaseScanner
9
+ from ..models import FileMetrics
10
+ from ..config import AnalysisSettings
11
+ from ..exceptions import FileAccessError
12
+ from ..logging_config import get_logger
13
+
14
+ logger = get_logger(__name__)
15
+
16
+
17
+ class TypeScriptScanner(BaseScanner):
18
+ """Scanner optimized for TypeScript and React codebases"""
19
+
20
+ def __init__(self, root_dir: str, settings: Optional[AnalysisSettings] = None):
21
+ super().__init__(root_dir, extensions=[".ts", ".tsx", ".js", ".jsx"], settings=settings)
22
+
23
+ def _should_skip(self, filepath: Path) -> bool:
24
+ """Skip node_modules, dist, venv, and other non-project directories"""
25
+ path_str = str(filepath)
26
+ skip_dirs = ("node_modules", "dist", "build", "venv", ".venv", "__pycache__", ".git", ".tox", ".mypy_cache")
27
+ return any(d in path_str for d in skip_dirs)
28
+
29
+ def _analyze_file(self, filepath: Path) -> FileMetrics:
30
+ """Extract all metrics from a TypeScript/React file"""
31
+ try:
32
+ with open(filepath, "r", encoding="utf-8", errors="replace") as f:
33
+ content = f.read()
34
+ except OSError as e:
35
+ raise FileAccessError(filepath, f"Cannot read file: {e}")
36
+ except Exception as e:
37
+ raise FileAccessError(filepath, f"Unexpected error: {e}")
38
+
39
+ lines = content.split("\n")
40
+
41
+ return FileMetrics(
42
+ path=str(filepath.relative_to(self.root_dir)),
43
+ lines=len(lines),
44
+ tokens=self._count_tokens(content),
45
+ imports=self._extract_imports(content),
46
+ exports=self._extract_exports(content),
47
+ functions=self._count_functions(content),
48
+ interfaces=self._count_classes(content), # Use classes for TypeScript
49
+ structs=self._count_react_components(content), # Repurpose for components
50
+ complexity_score=self._estimate_complexity(content),
51
+ nesting_depth=self._max_nesting_depth(content),
52
+ ast_node_types=self._extract_ast_node_types(content),
53
+ last_modified=filepath.stat().st_mtime,
54
+ )
55
+
56
+ def _count_tokens(self, content: str) -> int:
57
+ """Approximate token count for TypeScript"""
58
+ # Remove comments and strings
59
+ content = re.sub(r"//.*", "", content)
60
+ content = re.sub(r"/\*.*?\*/", "", content, flags=re.DOTALL)
61
+ content = re.sub(r'["\'].*?["\']', "", content)
62
+
63
+ # Split on whitespace and common operators
64
+ tokens = re.findall(r"\w+|[{}()\[\];,.]", content)
65
+ return len(tokens)
66
+
67
+ def _extract_imports(self, content: str) -> List[str]:
68
+ """Extract import statements"""
69
+ imports = []
70
+
71
+ # Match: import X from 'Y'
72
+ for match in re.finditer(
73
+ r'import\s+.*?\s+from\s+["\']([^"\']+)["\']', content
74
+ ):
75
+ imports.append(match.group(1))
76
+
77
+ # Match: import 'Y'
78
+ for match in re.finditer(r'import\s+["\']([^"\']+)["\']', content):
79
+ imports.append(match.group(1))
80
+
81
+ return imports
82
+
83
+ def _extract_exports(self, content: str) -> List[str]:
84
+ """Extract exported identifiers"""
85
+ exports = []
86
+
87
+ # export const/function/class X
88
+ exports.extend(
89
+ re.findall(r"export\s+(?:const|function|class)\s+(\w+)", content)
90
+ )
91
+
92
+ # export { X, Y }
93
+ for match in re.finditer(r"export\s+\{([^}]+)\}", content):
94
+ items = match.group(1).split(",")
95
+ exports.extend([item.strip().split()[0] for item in items])
96
+
97
+ return exports
98
+
99
+ def _count_functions(self, content: str) -> int:
100
+ """Count function declarations"""
101
+ # function X(), const X = () =>, const X = function()
102
+ count = len(re.findall(r"\bfunction\s+\w+", content))
103
+ count += len(
104
+ re.findall(
105
+ r"const\s+\w+\s*=\s*(?:\([^)]*\)|[a-zA-Z_]\w*)\s*=>", content
106
+ )
107
+ )
108
+ count += len(re.findall(r"const\s+\w+\s*=\s*function", content))
109
+ return count
110
+
111
+ def _count_classes(self, content: str) -> int:
112
+ """Count class declarations"""
113
+ return len(re.findall(r"\bclass\s+\w+", content))
114
+
115
+ def _count_react_components(self, content: str) -> int:
116
+ """Count React component definitions"""
117
+ # Function components: const X: React.FC, function X() { return <
118
+ count = len(re.findall(r"const\s+[A-Z]\w+\s*:\s*React\.FC", content))
119
+ count += len(
120
+ re.findall(r"function\s+[A-Z]\w+.*?return\s*\(?\s*<", content, re.DOTALL)
121
+ )
122
+ return count
123
+
124
+ def _count_react_hooks(self, content: str) -> int:
125
+ """Count React hook usages"""
126
+ # useState, useEffect, useCallback, etc.
127
+ return len(re.findall(r"\buse[A-Z]\w+\s*\(", content))
128
+
129
+ def _estimate_complexity(self, content: str) -> float:
130
+ """Estimate cyclomatic complexity for TypeScript"""
131
+ # Count decision points: if, else, case, while, for, &&, ||, ?
132
+ complexity = 1 # Base complexity
133
+
134
+ complexity += len(re.findall(r"\bif\s*\(", content))
135
+ complexity += len(re.findall(r"\belse\b", content))
136
+ complexity += len(re.findall(r"\bcase\s+", content))
137
+ complexity += len(re.findall(r"\bwhile\s*\(", content))
138
+ complexity += len(re.findall(r"\bfor\s*\(", content))
139
+ complexity += len(re.findall(r"&&", content))
140
+ complexity += len(re.findall(r"\|\|", content))
141
+ complexity += len(re.findall(r"\?", content))
142
+
143
+ return complexity
144
+
145
+ def _extract_ast_node_types(self, content: str) -> Counter:
146
+ """Extract distribution of AST node types for TypeScript"""
147
+ node_types = Counter()
148
+
149
+ # TypeScript/React-specific node types
150
+ node_types["function"] = self._count_functions(content)
151
+ node_types["class"] = self._count_classes(content)
152
+ node_types["component"] = self._count_react_components(content)
153
+ node_types["hook"] = self._count_react_hooks(content)
154
+ node_types["import"] = len(self._extract_imports(content))
155
+ node_types["export"] = len(self._extract_exports(content))
156
+ node_types["if"] = len(re.findall(r"\bif\s*\(", content))
157
+ node_types["for"] = len(re.findall(r"\bfor\s*\(", content))
158
+ node_types["while"] = len(re.findall(r"\bwhile\s*\(", content))
159
+ node_types["return"] = len(re.findall(r"\breturn\b", content))
160
+ node_types["jsx"] = len(re.findall(r"<[A-Z]\w+", content))
161
+
162
+ return node_types
@@ -0,0 +1,214 @@
1
+ """
2
+ Caching system for Shannon Insight.
3
+
4
+ Uses diskcache for SQLite-based persistent caching.
5
+ """
6
+
7
+ import hashlib
8
+ import json
9
+ from functools import wraps
10
+ from pathlib import Path
11
+ from typing import Any, Callable, Optional
12
+
13
+ from diskcache import Cache
14
+
15
+ from .logging_config import get_logger
16
+
17
+ logger = get_logger(__name__)
18
+
19
+
20
+ class AnalysisCache:
21
+ """
22
+ SQLite-based cache for analysis results.
23
+
24
+ Features:
25
+ - Automatic cache key generation from file metadata
26
+ - TTL-based expiration
27
+ - Thread-safe operations
28
+ - Efficient disk storage
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ cache_dir: str = ".shannon-cache",
34
+ ttl_hours: int = 24,
35
+ enabled: bool = True
36
+ ):
37
+ """
38
+ Initialize cache.
39
+
40
+ Args:
41
+ cache_dir: Directory for cache storage
42
+ ttl_hours: Time-to-live in hours
43
+ enabled: Whether caching is enabled
44
+ """
45
+ self.enabled = enabled
46
+ self.ttl_seconds = ttl_hours * 3600
47
+
48
+ if self.enabled:
49
+ self.cache = Cache(cache_dir)
50
+ logger.debug(f"Cache initialized at {cache_dir} with TTL={ttl_hours}h")
51
+ else:
52
+ self.cache = None
53
+ logger.debug("Cache disabled")
54
+
55
+ def _get_file_key(self, filepath: Path, config_hash: str) -> str:
56
+ """
57
+ Generate cache key from file metadata and configuration.
58
+
59
+ The key is based on:
60
+ - File path
61
+ - File modification time
62
+ - File size
63
+ - Configuration hash
64
+
65
+ Args:
66
+ filepath: File path
67
+ config_hash: Hash of configuration settings
68
+
69
+ Returns:
70
+ Cache key string
71
+ """
72
+ try:
73
+ stat = filepath.stat()
74
+ key_data = f"{filepath}:{stat.st_mtime}:{stat.st_size}:{config_hash}"
75
+ return hashlib.sha256(key_data.encode()).hexdigest()
76
+ except OSError:
77
+ # If we can't stat the file, generate key from path only
78
+ key_data = f"{filepath}:{config_hash}"
79
+ return hashlib.sha256(key_data.encode()).hexdigest()
80
+
81
+ def get(self, key: str) -> Optional[Any]:
82
+ """
83
+ Get value from cache.
84
+
85
+ Args:
86
+ key: Cache key
87
+
88
+ Returns:
89
+ Cached value or None if not found/expired
90
+ """
91
+ if not self.enabled or self.cache is None:
92
+ return None
93
+
94
+ try:
95
+ value = self.cache.get(key)
96
+ if value is not None:
97
+ logger.debug(f"Cache hit: {key[:16]}...")
98
+ return value
99
+ except Exception as e:
100
+ logger.warning(f"Cache get failed: {e}")
101
+ return None
102
+
103
+ def set(self, key: str, value: Any) -> None:
104
+ """
105
+ Set value in cache.
106
+
107
+ Args:
108
+ key: Cache key
109
+ value: Value to cache
110
+ """
111
+ if not self.enabled or self.cache is None:
112
+ return
113
+
114
+ try:
115
+ self.cache.set(key, value, expire=self.ttl_seconds)
116
+ logger.debug(f"Cache set: {key[:16]}...")
117
+ except Exception as e:
118
+ logger.warning(f"Cache set failed: {e}")
119
+
120
+ def clear(self) -> None:
121
+ """Clear all cache entries."""
122
+ if not self.enabled or self.cache is None:
123
+ return
124
+
125
+ try:
126
+ self.cache.clear()
127
+ logger.info("Cache cleared")
128
+ except Exception as e:
129
+ logger.warning(f"Cache clear failed: {e}")
130
+
131
+ def stats(self) -> dict:
132
+ """
133
+ Get cache statistics.
134
+
135
+ Returns:
136
+ Dictionary with cache stats
137
+ """
138
+ if not self.enabled or self.cache is None:
139
+ return {"enabled": False}
140
+
141
+ try:
142
+ return {
143
+ "enabled": True,
144
+ "size": len(self.cache),
145
+ "directory": self.cache.directory,
146
+ "volume": self.cache.volume()
147
+ }
148
+ except Exception as e:
149
+ logger.warning(f"Cache stats failed: {e}")
150
+ return {"enabled": True, "error": str(e)}
151
+
152
+ def memoize(
153
+ self,
154
+ config_hash: Optional[str] = None
155
+ ) -> Callable:
156
+ """
157
+ Decorator for caching function results.
158
+
159
+ Usage:
160
+ cache = AnalysisCache()
161
+
162
+ @cache.memoize(config_hash="abc123")
163
+ def analyze_file(filepath: Path) -> FileMetrics:
164
+ # Expensive operation
165
+ return metrics
166
+
167
+ Args:
168
+ config_hash: Hash of configuration (for cache invalidation)
169
+
170
+ Returns:
171
+ Decorator function
172
+ """
173
+ def decorator(func: Callable) -> Callable:
174
+ @wraps(func)
175
+ def wrapper(filepath: Path, *args, **kwargs):
176
+ if not self.enabled or self.cache is None:
177
+ return func(filepath, *args, **kwargs)
178
+
179
+ # Generate cache key
180
+ cfg_hash = config_hash or kwargs.get('config_hash', '')
181
+ key = self._get_file_key(filepath, cfg_hash)
182
+
183
+ # Try cache first
184
+ cached_result = self.get(key)
185
+ if cached_result is not None:
186
+ return cached_result
187
+
188
+ # Compute and cache
189
+ result = func(filepath, *args, **kwargs)
190
+ self.set(key, result)
191
+ return result
192
+
193
+ return wrapper
194
+ return decorator
195
+
196
+ def close(self) -> None:
197
+ """Close cache (cleanup)."""
198
+ if self.cache is not None:
199
+ self.cache.close()
200
+
201
+
202
+ def compute_config_hash(config: dict) -> str:
203
+ """
204
+ Compute hash of configuration for cache invalidation.
205
+
206
+ Args:
207
+ config: Configuration dictionary
208
+
209
+ Returns:
210
+ SHA256 hash of configuration
211
+ """
212
+ # Sort keys for consistent hashing
213
+ config_str = json.dumps(config, sort_keys=True)
214
+ return hashlib.sha256(config_str.encode()).hexdigest()[:16]