shannon-codebase-insight 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. shannon_codebase_insight-0.4.0.dist-info/METADATA +209 -0
  2. shannon_codebase_insight-0.4.0.dist-info/RECORD +37 -0
  3. shannon_codebase_insight-0.4.0.dist-info/WHEEL +5 -0
  4. shannon_codebase_insight-0.4.0.dist-info/entry_points.txt +7 -0
  5. shannon_codebase_insight-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. shannon_codebase_insight-0.4.0.dist-info/top_level.txt +1 -0
  7. shannon_insight/__init__.py +25 -0
  8. shannon_insight/analyzers/__init__.py +8 -0
  9. shannon_insight/analyzers/base.py +215 -0
  10. shannon_insight/analyzers/go_analyzer.py +150 -0
  11. shannon_insight/analyzers/python_analyzer.py +169 -0
  12. shannon_insight/analyzers/typescript_analyzer.py +162 -0
  13. shannon_insight/cache.py +214 -0
  14. shannon_insight/cli.py +333 -0
  15. shannon_insight/config.py +235 -0
  16. shannon_insight/core.py +546 -0
  17. shannon_insight/exceptions/__init__.py +31 -0
  18. shannon_insight/exceptions/analysis.py +78 -0
  19. shannon_insight/exceptions/base.py +18 -0
  20. shannon_insight/exceptions/config.py +48 -0
  21. shannon_insight/file_ops.py +218 -0
  22. shannon_insight/logging_config.py +98 -0
  23. shannon_insight/math/__init__.py +15 -0
  24. shannon_insight/math/entropy.py +133 -0
  25. shannon_insight/math/fusion.py +109 -0
  26. shannon_insight/math/graph.py +209 -0
  27. shannon_insight/math/robust.py +106 -0
  28. shannon_insight/math/statistics.py +159 -0
  29. shannon_insight/models.py +48 -0
  30. shannon_insight/primitives/__init__.py +13 -0
  31. shannon_insight/primitives/detector.py +318 -0
  32. shannon_insight/primitives/extractor.py +278 -0
  33. shannon_insight/primitives/fusion.py +373 -0
  34. shannon_insight/primitives/recommendations.py +158 -0
  35. shannon_insight/py.typed +2 -0
  36. shannon_insight/security.py +284 -0
  37. shannon_insight/utils/__init__.py +1 -0
@@ -0,0 +1,48 @@
1
+ """Configuration and security exceptions: paths, settings, access control."""
2
+
3
+ from pathlib import Path
4
+ from typing import Optional, Any
5
+
6
+ from .base import ShannonInsightError
7
+
8
+
9
+ class ConfigurationError(ShannonInsightError):
10
+ """Base class for configuration-related errors."""
11
+ pass
12
+
13
+
14
+ class InvalidPathError(ConfigurationError):
15
+ """Raised when a provided path is invalid."""
16
+
17
+ def __init__(self, path: Path, reason: str):
18
+ super().__init__(
19
+ f"Invalid path: {path}", details={"path": str(path), "reason": reason}
20
+ )
21
+ self.path = path
22
+ self.reason = reason
23
+
24
+
25
+ class InvalidConfigError(ConfigurationError):
26
+ """Raised when configuration values are invalid."""
27
+
28
+ def __init__(self, key: str, value: Any, reason: str):
29
+ super().__init__(
30
+ f"Invalid configuration for {key}: {value}",
31
+ details={"key": key, "value": str(value), "reason": reason},
32
+ )
33
+ self.key = key
34
+ self.value = value
35
+ self.reason = reason
36
+
37
+
38
+ class SecurityError(ConfigurationError):
39
+ """Raised when a security violation is detected."""
40
+
41
+ def __init__(self, reason: str, filepath: Optional[Path] = None):
42
+ details = {"reason": reason}
43
+ if filepath:
44
+ details["filepath"] = str(filepath)
45
+
46
+ super().__init__(f"Security violation: {reason}", details=details)
47
+ self.reason = reason
48
+ self.filepath = filepath
@@ -0,0 +1,218 @@
1
+ """
2
+ Safe file operations for Shannon Insight.
3
+
4
+ Provides timeout-protected and size-limited file operations.
5
+ """
6
+
7
+ import signal
8
+ from contextlib import contextmanager
9
+ from pathlib import Path
10
+ from typing import Generator, List, Optional
11
+
12
+ from .exceptions import FileAccessError, SecurityError
13
+ from .security import PathValidator, ResourceLimiter
14
+
15
+
16
+ class TimeoutError(Exception):
17
+ """Raised when an operation times out."""
18
+ pass
19
+
20
+
21
+ def _timeout_handler(signum, frame):
22
+ """Signal handler for timeout."""
23
+ raise TimeoutError("Operation timed out")
24
+
25
+
26
+ @contextmanager
27
+ def timeout(seconds: int):
28
+ """
29
+ Context manager for timeout protection.
30
+
31
+ Args:
32
+ seconds: Timeout in seconds
33
+
34
+ Raises:
35
+ TimeoutError: If operation exceeds timeout
36
+ """
37
+ # Set up signal handler
38
+ old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
39
+ signal.alarm(seconds)
40
+
41
+ try:
42
+ yield
43
+ finally:
44
+ # Restore old handler and cancel alarm
45
+ signal.alarm(0)
46
+ signal.signal(signal.SIGALRM, old_handler)
47
+
48
+
49
+ def safe_read_file(
50
+ filepath: Path,
51
+ validator: Optional[PathValidator] = None,
52
+ limiter: Optional[ResourceLimiter] = None,
53
+ timeout_seconds: int = 10,
54
+ encoding: str = 'utf-8',
55
+ errors: str = 'replace'
56
+ ) -> str:
57
+ """
58
+ Safely read a file with security checks and timeout protection.
59
+
60
+ Args:
61
+ filepath: File to read
62
+ validator: Path validator (if None, skips path validation)
63
+ limiter: Resource limiter (if None, skips size check)
64
+ timeout_seconds: Timeout in seconds
65
+ encoding: Text encoding
66
+ errors: How to handle encoding errors
67
+
68
+ Returns:
69
+ File contents as string
70
+
71
+ Raises:
72
+ FileAccessError: If file cannot be read
73
+ SecurityError: If security checks fail
74
+ TimeoutError: If read operation times out
75
+ """
76
+ # Validate path
77
+ if validator:
78
+ filepath = validator.validate_path(filepath)
79
+
80
+ # Check file size
81
+ if limiter:
82
+ limiter.check_file_size(filepath)
83
+
84
+ # Read with timeout protection
85
+ try:
86
+ with timeout(timeout_seconds):
87
+ with open(filepath, 'r', encoding=encoding, errors=errors) as f:
88
+ return f.read()
89
+ except TimeoutError:
90
+ raise FileAccessError(filepath, f"Read operation timed out after {timeout_seconds}s")
91
+ except UnicodeDecodeError as e:
92
+ raise FileAccessError(filepath, f"Encoding error: {e}")
93
+ except OSError as e:
94
+ raise FileAccessError(filepath, f"OS error: {e}")
95
+ except Exception as e:
96
+ raise FileAccessError(filepath, f"Unexpected error: {e}")
97
+
98
+
99
+ def safe_scan_directory(
100
+ root_dir: Path,
101
+ pattern: str = "**/*",
102
+ validator: Optional[PathValidator] = None,
103
+ limiter: Optional[ResourceLimiter] = None,
104
+ follow_symlinks: bool = False
105
+ ) -> Generator[Path, None, None]:
106
+ """
107
+ Safely scan a directory with security checks.
108
+
109
+ Args:
110
+ root_dir: Directory to scan
111
+ pattern: Glob pattern
112
+ validator: Path validator
113
+ limiter: Resource limiter
114
+ follow_symlinks: Whether to follow symbolic links
115
+
116
+ Yields:
117
+ Safe file paths
118
+
119
+ Raises:
120
+ SecurityError: If resource limits are exceeded
121
+ """
122
+ # Validate root directory
123
+ if validator:
124
+ root_dir = validator.validate_path(root_dir)
125
+
126
+ try:
127
+ for path in root_dir.glob(pattern):
128
+ # Skip symlinks if not following them
129
+ if path.is_symlink() and not follow_symlinks:
130
+ continue
131
+
132
+ # Skip directories
133
+ if path.is_dir():
134
+ continue
135
+
136
+ # Check file count limit
137
+ if limiter:
138
+ limiter.increment_file_count()
139
+
140
+ # Validate path
141
+ if validator:
142
+ try:
143
+ path = validator.validate_path(path)
144
+ except (SecurityError, Exception):
145
+ # Skip files that fail validation
146
+ continue
147
+
148
+ # Check file size
149
+ if limiter:
150
+ try:
151
+ limiter.check_file_size(path)
152
+ except SecurityError:
153
+ # Skip files that exceed size limit
154
+ continue
155
+
156
+ yield path
157
+
158
+ except OSError as e:
159
+ raise FileAccessError(root_dir, f"Directory scan failed: {e}")
160
+
161
+
162
+ def safe_write_file(
163
+ filepath: Path,
164
+ content: str,
165
+ validator: Optional[PathValidator] = None,
166
+ encoding: str = 'utf-8'
167
+ ) -> None:
168
+ """
169
+ Safely write to a file with validation.
170
+
171
+ Args:
172
+ filepath: File to write
173
+ content: Content to write
174
+ validator: Path validator
175
+ encoding: Text encoding
176
+
177
+ Raises:
178
+ FileAccessError: If file cannot be written
179
+ SecurityError: If security checks fail
180
+ """
181
+ # Validate path (parent directory)
182
+ if validator:
183
+ parent = filepath.parent
184
+ if parent.exists():
185
+ validator.validate_path(parent)
186
+
187
+ try:
188
+ # Create parent directory if it doesn't exist
189
+ filepath.parent.mkdir(parents=True, exist_ok=True)
190
+
191
+ # Write file
192
+ with open(filepath, 'w', encoding=encoding) as f:
193
+ f.write(content)
194
+
195
+ except OSError as e:
196
+ raise FileAccessError(filepath, f"Write failed: {e}")
197
+ except Exception as e:
198
+ raise FileAccessError(filepath, f"Unexpected error: {e}")
199
+
200
+
201
+ def should_skip_file(
202
+ filepath: Path,
203
+ exclude_patterns: List[str]
204
+ ) -> bool:
205
+ """
206
+ Check if a file should be skipped based on exclusion patterns.
207
+
208
+ Args:
209
+ filepath: File to check
210
+ exclude_patterns: List of glob patterns to exclude
211
+
212
+ Returns:
213
+ True if file should be skipped
214
+ """
215
+ for pattern in exclude_patterns:
216
+ if filepath.match(pattern):
217
+ return True
218
+ return False
@@ -0,0 +1,98 @@
1
+ """
2
+ Logging configuration for Shannon Insight.
3
+
4
+ Provides structured logging with rich formatting for beautiful terminal output.
5
+ """
6
+
7
+ import logging
8
+ import sys
9
+ from typing import Optional
10
+
11
+ from rich.logging import RichHandler
12
+ from rich.console import Console
13
+
14
+
15
+ def setup_logging(
16
+ verbose: bool = False,
17
+ quiet: bool = False,
18
+ log_file: Optional[str] = None
19
+ ) -> logging.Logger:
20
+ """
21
+ Configure logging with rich handler for colored output.
22
+
23
+ Args:
24
+ verbose: Enable DEBUG level logging
25
+ quiet: Suppress all but ERROR level logging
26
+ log_file: Optional file path to write logs to
27
+
28
+ Returns:
29
+ Configured logger instance for shannon_insight
30
+ """
31
+ # Determine log level
32
+ if quiet:
33
+ level = logging.ERROR
34
+ elif verbose:
35
+ level = logging.DEBUG
36
+ else:
37
+ level = logging.INFO
38
+
39
+ # Create rich console for logging
40
+ console = Console(stderr=True)
41
+
42
+ # Configure handlers
43
+ handlers = [
44
+ RichHandler(
45
+ console=console,
46
+ rich_tracebacks=True,
47
+ tracebacks_show_locals=verbose,
48
+ markup=True,
49
+ show_time=True,
50
+ show_path=verbose,
51
+ )
52
+ ]
53
+
54
+ # Add file handler if specified
55
+ if log_file:
56
+ file_handler = logging.FileHandler(log_file, mode='a')
57
+ file_handler.setFormatter(
58
+ logging.Formatter(
59
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
60
+ datefmt='%Y-%m-%d %H:%M:%S'
61
+ )
62
+ )
63
+ handlers.append(file_handler)
64
+
65
+ # Configure root logger
66
+ logging.basicConfig(
67
+ level=level,
68
+ format="%(message)s",
69
+ datefmt="[%X]",
70
+ handlers=handlers
71
+ )
72
+
73
+ # Get shannon_insight logger
74
+ logger = logging.getLogger("shannon_insight")
75
+ logger.setLevel(level)
76
+
77
+ return logger
78
+
79
+
80
+ def get_logger(name: Optional[str] = None) -> logging.Logger:
81
+ """
82
+ Get a logger instance for a specific module.
83
+
84
+ Args:
85
+ name: Module name (e.g., 'shannon_insight.core')
86
+ If None, returns the root shannon_insight logger
87
+
88
+ Returns:
89
+ Logger instance
90
+ """
91
+ if name is None:
92
+ return logging.getLogger("shannon_insight")
93
+
94
+ # Ensure name starts with shannon_insight
95
+ if not name.startswith("shannon_insight"):
96
+ name = f"shannon_insight.{name}"
97
+
98
+ return logging.getLogger(name)
@@ -0,0 +1,15 @@
1
+ """Mathematical utilities for codebase analysis."""
2
+
3
+ from .entropy import Entropy
4
+ from .graph import GraphMetrics
5
+ from .statistics import Statistics
6
+ from .robust import RobustStatistics
7
+ from .fusion import SignalFusion
8
+
9
+ __all__ = [
10
+ "Entropy",
11
+ "GraphMetrics",
12
+ "Statistics",
13
+ "SignalFusion",
14
+ "RobustStatistics",
15
+ ]
@@ -0,0 +1,133 @@
1
+ """Information theory: Shannon entropy, KL divergence, joint entropy."""
2
+
3
+ import math
4
+ from typing import List, Mapping, Union
5
+
6
+
7
+ class Entropy:
8
+ """Information entropy calculations."""
9
+
10
+ @staticmethod
11
+ def shannon(distribution: Mapping[str, Union[int, float]]) -> float:
12
+ """
13
+ Compute Shannon entropy H(X) = -Σ p(x) log₂ p(x).
14
+
15
+ Args:
16
+ distribution: Dictionary with event -> count mapping
17
+
18
+ Returns:
19
+ Entropy in bits
20
+ """
21
+ total = sum(distribution.values())
22
+ if total == 0:
23
+ return 0.0
24
+
25
+ entropy = 0.0
26
+ for count in distribution.values():
27
+ p = count / total
28
+ if p > 0:
29
+ entropy -= p * math.log2(p)
30
+
31
+ return entropy
32
+
33
+ @staticmethod
34
+ def normalized(distribution: Mapping[str, Union[int, float]]) -> float:
35
+ """
36
+ Normalize entropy by maximum possible entropy.
37
+
38
+ H_norm = H / log₂(N) where N is number of unique events
39
+
40
+ Returns:
41
+ Normalized entropy in [0, 1]
42
+ """
43
+ h = Entropy.shannon(distribution)
44
+ n = len(distribution)
45
+ if n <= 1:
46
+ return 0.0
47
+ max_h = math.log2(n)
48
+ return h / max_h if max_h > 0 else 0.0
49
+
50
+ @staticmethod
51
+ def kl_divergence(
52
+ p: Mapping[str, Union[int, float]], q: Mapping[str, Union[int, float]]
53
+ ) -> float:
54
+ """
55
+ Compute Kullback-Leibler divergence D_KL(P || Q).
56
+
57
+ D_KL(P || Q) = Σ P(x) log₂(P(x) / Q(x))
58
+
59
+ Args:
60
+ p: Observed distribution
61
+ q: Expected distribution
62
+
63
+ Returns:
64
+ KL divergence in bits (lower = more similar)
65
+ """
66
+ total_p = sum(p.values())
67
+ total_q = sum(q.values())
68
+
69
+ if total_p == 0 or total_q == 0:
70
+ return 0.0
71
+
72
+ kl_div = 0.0
73
+ for key, count_p in p.items():
74
+ prob_p = count_p / total_p
75
+ prob_q = q.get(key, 0) / total_q if key in q else 0
76
+
77
+ if prob_p > 0 and prob_q == 0:
78
+ # D_KL is undefined (infinite) when P(x)>0 but Q(x)=0
79
+ return float("inf")
80
+ if prob_p > 0 and prob_q > 0:
81
+ kl_div += prob_p * math.log2(prob_p / prob_q)
82
+
83
+ return kl_div
84
+
85
+ @staticmethod
86
+ def joint_entropy(
87
+ joint_distribution: Mapping[tuple, Union[int, float]]
88
+ ) -> float:
89
+ """
90
+ Compute joint entropy H(X, Y, ...) from a joint distribution.
91
+
92
+ H(X,Y) = -Σ_x Σ_y p(x,y) log₂ p(x,y)
93
+
94
+ The joint distribution must be keyed by tuples representing
95
+ joint outcomes, e.g. {("a", "b"): 5, ("a", "c"): 3, ...}.
96
+
97
+ Args:
98
+ joint_distribution: Dictionary mapping outcome tuples to counts
99
+
100
+ Returns:
101
+ Joint entropy in bits
102
+
103
+ Reference:
104
+ Cover & Thomas, *Elements of Information Theory*, 2nd ed.,
105
+ Chapter 2 (Theorem 2.6.6).
106
+ """
107
+ # Delegate to shannon() — the formula is identical, only the
108
+ # sample space changes from singleton events to joint events.
109
+ return Entropy.shannon(joint_distribution)
110
+
111
+ @staticmethod
112
+ def pooled_entropy(*distributions: Mapping[str, Union[int, float]]) -> float:
113
+ """
114
+ Compute entropy of the pooled (merged) sample from multiple distributions.
115
+
116
+ This is NOT the same as joint entropy. It merges all counts into a
117
+ single distribution and computes H of the mixture. Useful when you
118
+ want the entropy of the combined observation set.
119
+
120
+ H_pooled = H(merge(X₁, X₂, ...))
121
+
122
+ Args:
123
+ *distributions: Multiple count distributions to pool
124
+
125
+ Returns:
126
+ Entropy of the pooled distribution in bits
127
+ """
128
+ merged: dict = {}
129
+ for dist in distributions:
130
+ for key, count in dist.items():
131
+ merged[key] = merged.get(key, 0) + count
132
+
133
+ return Entropy.shannon(merged)
@@ -0,0 +1,109 @@
1
+ """Evidence fusion: Bayesian combination, Dempster-Shafer theory."""
2
+
3
+ from typing import Dict, List, Tuple
4
+
5
+ import numpy as np
6
+
7
+
8
+ class SignalFusion:
9
+ """Evidence-theoretic signal fusion methods."""
10
+
11
+ @staticmethod
12
+ def bayesian_fusion(
13
+ priors: List[float], likelihoods: List[float]
14
+ ) -> Tuple[float, float]:
15
+ """
16
+ Bayesian evidence combination: P(H|E) = P(E|H) * P(H) / P(E).
17
+
18
+ Computes the posterior for each hypothesis, normalizes by total
19
+ evidence, and returns the maximum posterior along with an
20
+ entropy-based confidence measure.
21
+
22
+ Args:
23
+ priors: Prior probabilities for each hypothesis (should sum to 1)
24
+ likelihoods: Likelihoods P(E|H_i) for each hypothesis
25
+
26
+ Returns:
27
+ Tuple of (max_posterior, confidence)
28
+ confidence is 1 - normalized_entropy of the posterior distribution,
29
+ bounded in [0, 1].
30
+
31
+ Reference:
32
+ Bayes' theorem; Bishop, "Pattern Recognition and Machine Learning"
33
+ (2006), Chapter 1.2.
34
+ """
35
+ import math as _math
36
+
37
+ if len(priors) != len(likelihoods):
38
+ raise ValueError("priors and likelihoods must have the same length")
39
+
40
+ # Unnormalized posteriors: P(E|H_i) * P(H_i)
41
+ unnormalized = [p * l for p, l in zip(priors, likelihoods)]
42
+ evidence = sum(unnormalized)
43
+
44
+ if evidence <= 0:
45
+ n = len(priors)
46
+ return 1.0 / n if n > 0 else 0.0, 0.0
47
+
48
+ posteriors = [u / evidence for u in unnormalized]
49
+ max_posterior = max(posteriors)
50
+
51
+ # Confidence = 1 - normalized entropy of the posterior distribution.
52
+ # When all mass is on one hypothesis, entropy = 0 -> confidence = 1.
53
+ # When posteriors are uniform, entropy = log2(n) -> confidence = 0.
54
+ n = len(posteriors)
55
+ if n <= 1:
56
+ confidence = 1.0
57
+ else:
58
+ entropy = -sum(p * _math.log2(p) for p in posteriors if p > 0)
59
+ max_entropy = _math.log2(n)
60
+ confidence = 1.0 - (entropy / max_entropy) if max_entropy > 0 else 1.0
61
+
62
+ return float(max_posterior), float(confidence)
63
+
64
+ @staticmethod
65
+ def dempster_shafer_combination(
66
+ mass_functions: List[Dict[frozenset, float]]
67
+ ) -> Dict[frozenset, float]:
68
+ """
69
+ Combine evidence using Dempster-Shafer theory.
70
+
71
+ m(A) = Σ(B∩C=A) m1(B) * m2(C) / (1 - K)
72
+
73
+ Where K is conflict coefficient.
74
+
75
+ Keys must be frozensets representing hypothesis sets.
76
+
77
+ Args:
78
+ mass_functions: List of mass functions {frozenset(hypotheses): mass}
79
+
80
+ Returns:
81
+ Combined mass function
82
+ """
83
+ if not mass_functions:
84
+ return {}
85
+
86
+ combined = mass_functions[0].copy()
87
+
88
+ for i in range(1, len(mass_functions)):
89
+ m2 = mass_functions[i]
90
+ new_combined: Dict[frozenset, float] = {}
91
+ total_conflict = 0.0
92
+
93
+ for a, ma in combined.items():
94
+ for b, mb in m2.items():
95
+ intersection = a & b # proper set intersection
96
+ if intersection:
97
+ new_combined[intersection] = (
98
+ new_combined.get(intersection, 0.0) + ma * mb
99
+ )
100
+ else:
101
+ total_conflict += ma * mb
102
+
103
+ normalization = 1.0 - total_conflict
104
+ if normalization > 0:
105
+ new_combined = {k: v / normalization for k, v in new_combined.items()}
106
+
107
+ combined = new_combined
108
+
109
+ return combined