shannon-codebase-insight 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. shannon_codebase_insight-0.4.0.dist-info/METADATA +209 -0
  2. shannon_codebase_insight-0.4.0.dist-info/RECORD +37 -0
  3. shannon_codebase_insight-0.4.0.dist-info/WHEEL +5 -0
  4. shannon_codebase_insight-0.4.0.dist-info/entry_points.txt +7 -0
  5. shannon_codebase_insight-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. shannon_codebase_insight-0.4.0.dist-info/top_level.txt +1 -0
  7. shannon_insight/__init__.py +25 -0
  8. shannon_insight/analyzers/__init__.py +8 -0
  9. shannon_insight/analyzers/base.py +215 -0
  10. shannon_insight/analyzers/go_analyzer.py +150 -0
  11. shannon_insight/analyzers/python_analyzer.py +169 -0
  12. shannon_insight/analyzers/typescript_analyzer.py +162 -0
  13. shannon_insight/cache.py +214 -0
  14. shannon_insight/cli.py +333 -0
  15. shannon_insight/config.py +235 -0
  16. shannon_insight/core.py +546 -0
  17. shannon_insight/exceptions/__init__.py +31 -0
  18. shannon_insight/exceptions/analysis.py +78 -0
  19. shannon_insight/exceptions/base.py +18 -0
  20. shannon_insight/exceptions/config.py +48 -0
  21. shannon_insight/file_ops.py +218 -0
  22. shannon_insight/logging_config.py +98 -0
  23. shannon_insight/math/__init__.py +15 -0
  24. shannon_insight/math/entropy.py +133 -0
  25. shannon_insight/math/fusion.py +109 -0
  26. shannon_insight/math/graph.py +209 -0
  27. shannon_insight/math/robust.py +106 -0
  28. shannon_insight/math/statistics.py +159 -0
  29. shannon_insight/models.py +48 -0
  30. shannon_insight/primitives/__init__.py +13 -0
  31. shannon_insight/primitives/detector.py +318 -0
  32. shannon_insight/primitives/extractor.py +278 -0
  33. shannon_insight/primitives/fusion.py +373 -0
  34. shannon_insight/primitives/recommendations.py +158 -0
  35. shannon_insight/py.typed +2 -0
  36. shannon_insight/security.py +284 -0
  37. shannon_insight/utils/__init__.py +1 -0
@@ -0,0 +1,373 @@
1
+ """
2
+ Advanced signal fusion with mathematical rigor.
3
+
4
+ Implements:
5
+ - Dempster-Shafer evidence theory
6
+ - Bayesian evidence combination
7
+ - Consistency-weighted fusion using statistical methods
8
+ - Confidence quantification
9
+ """
10
+
11
+ import math
12
+ import statistics
13
+ from typing import Dict, List, Tuple, Optional
14
+ import numpy as np
15
+
16
+ from ..models import Primitives
17
+ from ..logging_config import get_logger
18
+
19
+ logger = get_logger(__name__)
20
+
21
+
22
+ class SignalFusion:
23
+ """
24
+ Fuse multiple signals with statistical confidence quantification.
25
+
26
+ Uses advanced methods from evidence theory and statistics.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ primitives: Dict[str, Primitives],
32
+ normalized: Dict[str, Primitives],
33
+ weights: Optional[List[float]] = None,
34
+ ):
35
+ """
36
+ Initialize signal fusion.
37
+
38
+ Args:
39
+ primitives: Raw primitives
40
+ normalized: Normalized primitives (z-scores)
41
+ weights: Fusion weights [entropy, centrality, churn, coherence, cognitive]
42
+ """
43
+ self.primitives = primitives
44
+ self.normalized = normalized
45
+ self.weights = weights or [0.2, 0.25, 0.2, 0.15, 0.2]
46
+ logger.debug(f"Initialized SignalFusion with weights={self.weights}")
47
+
48
+ def fuse(self) -> Dict[str, Tuple[float, float]]:
49
+ """
50
+ Fuse signals with consistency weighting.
51
+
52
+ Uses coefficient of variation on absolute values for consistency,
53
+ and applies confidence-based weighting.
54
+
55
+ Returns:
56
+ Dictionary mapping paths to (score, confidence) tuples
57
+ """
58
+ results = {}
59
+
60
+ for path in self.primitives.keys():
61
+ norm = self.normalized[path]
62
+
63
+ # Extract z-scores
64
+ scores = [
65
+ norm.structural_entropy,
66
+ norm.network_centrality,
67
+ norm.churn_volatility,
68
+ norm.semantic_coherence,
69
+ norm.cognitive_load,
70
+ ]
71
+
72
+ # Compute multiple consistency measures
73
+ cv_consistency = self._coefficient_of_variation_consistency(scores)
74
+ correlation_consistency = self._correlation_consistency(scores)
75
+ entropy_consistency = self._entropy_consistency(scores)
76
+
77
+ # Combine consistency measures (weighted geometric mean)
78
+ overall_consistency = (
79
+ cv_consistency ** 0.4
80
+ * correlation_consistency ** 0.3
81
+ * entropy_consistency ** 0.3
82
+ )
83
+
84
+ # Weighted average of signals
85
+ fused_score = sum(s * w for s, w in zip(scores, self.weights))
86
+
87
+ # Final score = consistency * |weighted_average|
88
+ # This penalizes inconsistent signals
89
+ final_score = overall_consistency * abs(fused_score)
90
+
91
+ results[path] = (final_score, overall_consistency)
92
+
93
+ return results
94
+
95
+ def _coefficient_of_variation_consistency(self, z_scores: List[float]) -> float:
96
+ """
97
+ Compute consistency using coefficient of variation on absolute values.
98
+
99
+ CV = σ/μ, lower CV means more consistent signals.
100
+
101
+ Consistency = 1 / (1 + CV)
102
+ """
103
+ abs_scores = [abs(s) for s in z_scores]
104
+ mean_abs = statistics.mean(abs_scores)
105
+
106
+ if mean_abs == 0:
107
+ return 1.0
108
+
109
+ std_abs = statistics.stdev(abs_scores) if len(abs_scores) > 1 else 0
110
+
111
+ # Coefficient of variation
112
+ cv = std_abs / mean_abs if mean_abs > 0 else 0
113
+
114
+ # Convert to consistency: CV=0 -> 1.0, CV=inf -> 0
115
+ consistency = 1.0 / (1.0 + cv)
116
+
117
+ return consistency
118
+
119
+ def _correlation_consistency(self, z_scores: List[float]) -> float:
120
+ """
121
+ Compute consistency based on signal correlation.
122
+
123
+ For a truly anomalous file, we expect most signals to point in the same direction.
124
+ """
125
+ # Convert to signs (+1 or -1)
126
+ signs = [1 if s > 0 else -1 for s in z_scores if s != 0]
127
+
128
+ if len(signs) < 2:
129
+ return 1.0
130
+
131
+ # Count how many agree with the majority
132
+ majority_sign = statistics.mode(signs)
133
+ agreement_count = sum(1 for s in signs if s == majority_sign)
134
+
135
+ # Consistency = proportion of signals that agree
136
+ consistency = agreement_count / len(signs)
137
+
138
+ return consistency
139
+
140
+ def _entropy_consistency(self, z_scores: List[float]) -> float:
141
+ """
142
+ Compute consistency using entropy of signal distribution.
143
+
144
+ Lower entropy means signals are more clustered (more consistent).
145
+ """
146
+ # Convert to probability distribution
147
+ abs_scores = [abs(s) for s in z_scores]
148
+ total = sum(abs_scores)
149
+
150
+ if total == 0:
151
+ return 1.0
152
+
153
+ # Normalize to probabilities
154
+ probs = [s / total for s in abs_scores]
155
+
156
+ # Compute Shannon entropy
157
+ entropy_val = 0.0
158
+ for p in probs:
159
+ if p > 0:
160
+ entropy_val -= p * math.log2(p)
161
+
162
+ # Maximum possible entropy
163
+ max_entropy = math.log2(len(probs)) if len(probs) > 1 else 1.0
164
+
165
+ # Normalize entropy to [0, 1]
166
+ normalized_entropy_val = entropy_val / max_entropy if max_entropy > 0 else 0
167
+
168
+ # Consistency = 1 - normalized_entropy
169
+ # Low entropy (clustered signals) = high consistency
170
+ consistency = 1.0 - normalized_entropy_val
171
+
172
+ return consistency
173
+
174
+ def bayesian_fusion(
175
+ self, priors: List[float], likelihoods: List[float]
176
+ ) -> Tuple[float, float]:
177
+ """
178
+ Bayesian evidence combination.
179
+
180
+ P(H|E) = P(E|H) * P(H) / P(E)
181
+
182
+ Computes the posterior for each hypothesis, normalizes by total
183
+ evidence, and returns the maximum posterior along with an
184
+ entropy-based confidence measure.
185
+
186
+ Args:
187
+ priors: Prior probabilities for each hypothesis (should sum to 1)
188
+ likelihoods: Likelihoods P(E|H_i) for each hypothesis
189
+
190
+ Returns:
191
+ Tuple of (max_posterior, confidence)
192
+ confidence is 1 - normalized_entropy of the posterior distribution,
193
+ bounded in [0, 1].
194
+
195
+ Reference:
196
+ Bayes' theorem; Bishop, "Pattern Recognition and Machine Learning"
197
+ (2006), Chapter 1.2.
198
+ """
199
+ if len(priors) != len(likelihoods):
200
+ raise ValueError("priors and likelihoods must have the same length")
201
+
202
+ # Unnormalized posteriors: P(E|H_i) * P(H_i)
203
+ unnormalized = [p * l for p, l in zip(priors, likelihoods)]
204
+ evidence = sum(unnormalized)
205
+
206
+ if evidence <= 0:
207
+ n = len(priors)
208
+ return 1.0 / n if n > 0 else 0.0, 0.0
209
+
210
+ posteriors = [u / evidence for u in unnormalized]
211
+ max_posterior = max(posteriors)
212
+
213
+ # Confidence = 1 - normalized entropy of the posterior distribution.
214
+ n = len(posteriors)
215
+ if n <= 1:
216
+ confidence = 1.0
217
+ else:
218
+ entropy = -sum(p * math.log2(p) for p in posteriors if p > 0)
219
+ max_entropy = math.log2(n)
220
+ confidence = 1.0 - (entropy / max_entropy) if max_entropy > 0 else 1.0
221
+
222
+ return float(max_posterior), float(confidence)
223
+
224
+ def dempster_shafer_combine(
225
+ self, mass_functions: List[Dict[frozenset, float]]
226
+ ) -> Dict[frozenset, float]:
227
+ """
228
+ Combine evidence using Dempster-Shafer theory.
229
+
230
+ m(A) = Σ(B∩C=A) m1(B) * m2(C) / (1 - K)
231
+
232
+ Where K is conflict coefficient.
233
+
234
+ Keys must be frozensets representing hypothesis sets.
235
+
236
+ Args:
237
+ mass_functions: List of mass functions {frozenset(hypotheses): mass}
238
+
239
+ Returns:
240
+ Combined mass function
241
+ """
242
+ if not mass_functions:
243
+ return {}
244
+
245
+ combined = mass_functions[0].copy()
246
+
247
+ for i in range(1, len(mass_functions)):
248
+ m2 = mass_functions[i]
249
+ new_combined: Dict[frozenset, float] = {}
250
+ total_conflict = 0.0
251
+
252
+ for a, ma in combined.items():
253
+ for b, mb in m2.items():
254
+ intersection = a & b # proper set intersection
255
+ if intersection:
256
+ new_combined[intersection] = (
257
+ new_combined.get(intersection, 0.0) + ma * mb
258
+ )
259
+ else:
260
+ total_conflict += ma * mb
261
+
262
+ normalization = 1.0 - total_conflict
263
+ if normalization > 0:
264
+ new_combined = {k: v / normalization for k, v in new_combined.items()}
265
+
266
+ combined = new_combined
267
+
268
+ return combined
269
+
270
+ def multivariate_fusion(
271
+ self, z_scores: np.ndarray, covariance: np.ndarray
272
+ ) -> float:
273
+ """
274
+ Multivariate fusion considering signal correlations.
275
+
276
+ Uses Mahalanobis distance to account for correlations between signals.
277
+
278
+ Args:
279
+ z_scores: Z-score vector (1D array)
280
+ covariance: Covariance matrix of signals
281
+
282
+ Returns:
283
+ Fused score (z-equivalent via chi-squared conversion)
284
+ """
285
+ z = np.atleast_1d(z_scores).astype(float)
286
+ k = len(z)
287
+
288
+ # Check if covariance is invertible via condition number
289
+ try:
290
+ cond = np.linalg.cond(covariance)
291
+ if cond > 1e12:
292
+ return float(np.linalg.norm(z))
293
+ inv_cov = np.linalg.inv(covariance)
294
+ except np.linalg.LinAlgError:
295
+ return float(np.linalg.norm(z))
296
+
297
+ # D^2 = z^T Sigma^-1 z
298
+ md_squared = float(z @ inv_cov @ z)
299
+
300
+ from scipy import stats
301
+
302
+ p_value = 1 - stats.chi2.cdf(md_squared, k)
303
+
304
+ # p ≈ 0 means maximally significant — map to a large z-score.
305
+ # p ≈ 1 means no anomaly — map to z = 0.
306
+ if p_value <= 0:
307
+ z_equiv = 10.0 # practical upper bound for extreme significance
308
+ elif p_value >= 1:
309
+ z_equiv = 0.0
310
+ else:
311
+ z_equiv = stats.norm.ppf(1 - p_value)
312
+
313
+ return float(z_equiv)
314
+
315
+ def adaptive_fusion(
316
+ self, z_scores: List[float], signal_reliabilities: List[float]
317
+ ) -> float:
318
+ """
319
+ Adaptive fusion that weights signals by reliability.
320
+
321
+ Final score = Σ (reliability_i * score_i) / Σ reliability_i
322
+
323
+ Args:
324
+ z_scores: Z-scores of signals
325
+ signal_reliabilities: Reliability of each signal [0, 1]
326
+
327
+ Returns:
328
+ Fused score
329
+ """
330
+ if len(z_scores) != len(signal_reliabilities):
331
+ raise ValueError("z_scores and signal_reliabilities must have same length")
332
+
333
+ total_reliability = sum(signal_reliabilities)
334
+
335
+ if total_reliability == 0:
336
+ return statistics.mean(z_scores)
337
+
338
+ weighted_sum = sum(
339
+ rel * score for rel, score in zip(signal_reliabilities, z_scores)
340
+ )
341
+
342
+ return weighted_sum / total_reliability
343
+
344
+ def confidence_weighted_fusion(
345
+ self, z_scores: List[float], confidences: List[float]
346
+ ) -> Tuple[float, float]:
347
+ """
348
+ Fusion with explicit confidence weighting.
349
+
350
+ Args:
351
+ z_scores: Z-scores of signals
352
+ confidences: Confidence in each signal [0, 1]
353
+
354
+ Returns:
355
+ Tuple of (fused_score, overall_confidence)
356
+ """
357
+ if len(z_scores) != len(confidences):
358
+ raise ValueError("z_scores and confidences must have same length")
359
+
360
+ # Weighted average
361
+ total_weight = sum(confidences)
362
+ if total_weight == 0:
363
+ return statistics.mean(z_scores), 0.5
364
+
365
+ weighted_score = sum(
366
+ conf * score for conf, score in zip(confidences, z_scores)
367
+ ) / total_weight
368
+
369
+ # Overall confidence using geometric mean
370
+ # More sensitive to low confidence values
371
+ overall_confidence = math.prod(confidences) ** (1.0 / len(confidences))
372
+
373
+ return float(weighted_score), float(overall_confidence)
@@ -0,0 +1,158 @@
1
+ """Generate actionable recommendations from fused signals"""
2
+
3
+ from typing import Dict, List, Tuple
4
+ from ..models import FileMetrics, Primitives, AnomalyReport
5
+
6
+
7
+ class RecommendationEngine:
8
+ """Generate actionable recommendations from fused signals"""
9
+
10
+ def __init__(
11
+ self,
12
+ files: List[FileMetrics],
13
+ primitives: Dict[str, Primitives],
14
+ normalized: Dict[str, Primitives],
15
+ anomalies: Dict[str, List[str]],
16
+ fused_scores: Dict[str, Tuple[float, float]],
17
+ ):
18
+ self.files = files
19
+ self.file_map = {f.path: f for f in files}
20
+ self.primitives = primitives
21
+ self.normalized = normalized
22
+ self.anomalies = anomalies
23
+ self.fused_scores = fused_scores
24
+
25
+ def generate(self) -> List[AnomalyReport]:
26
+ """Generate comprehensive analysis reports"""
27
+ reports = []
28
+
29
+ # Sort by fused score (descending)
30
+ sorted_files = sorted(
31
+ self.fused_scores.items(), key=lambda x: x[1][0], reverse=True
32
+ )
33
+
34
+ for path, (score, confidence) in sorted_files:
35
+ if path not in self.anomalies:
36
+ continue # Skip non-anomalous files
37
+
38
+ flags = self.anomalies[path]
39
+ root_causes = self._identify_root_causes(path, flags)
40
+ recommendations = self._generate_recommendations(path, flags, root_causes)
41
+
42
+ report = AnomalyReport(
43
+ file=path,
44
+ overall_score=score,
45
+ confidence=confidence,
46
+ primitives=self.primitives[path],
47
+ normalized_primitives=self.normalized[path],
48
+ anomaly_flags=flags,
49
+ root_causes=root_causes,
50
+ recommendations=recommendations,
51
+ )
52
+
53
+ reports.append(report)
54
+
55
+ return reports
56
+
57
+ def _identify_root_causes(self, path: str, flags: List[str]) -> List[str]:
58
+ """Identify root causes from anomaly flags"""
59
+ causes = []
60
+ file = self.file_map.get(path)
61
+
62
+ if "high_centrality" in flags and "high_volatility" in flags:
63
+ causes.append("Critical hub with unstable interface")
64
+
65
+ if "high_cognitive_load" in flags and "structural_entropy_high" in flags:
66
+ causes.append("Complex file with chaotic organization")
67
+
68
+ if "semantic_coherence_low" in flags:
69
+ if file and len(file.imports) > 10:
70
+ causes.append(
71
+ f"Too many imports ({len(file.imports)}) - file handles unrelated concerns"
72
+ )
73
+ else:
74
+ causes.append("Low cohesion - file handles multiple unrelated concerns")
75
+
76
+ if "high_centrality" in flags:
77
+ causes.append("High coupling - many files depend on this")
78
+
79
+ if "structural_entropy_low" in flags:
80
+ causes.append("Overly uniform structure - possible code duplication")
81
+
82
+ if "high_cognitive_load" in flags:
83
+ if file:
84
+ causes.append(
85
+ f"High cognitive load: {file.functions} functions, "
86
+ f"complexity={file.complexity_score}, nesting={file.nesting_depth}"
87
+ )
88
+ else:
89
+ causes.append("High cognitive load - too many concepts")
90
+
91
+ if not causes:
92
+ causes.append("General code quality concern")
93
+
94
+ return causes
95
+
96
+ def _generate_recommendations(
97
+ self, path: str, flags: List[str], causes: List[str]
98
+ ) -> List[str]:
99
+ """Generate actionable recommendations"""
100
+ recs = []
101
+ file = self.file_map.get(path)
102
+
103
+ if "high_cognitive_load" in flags:
104
+ if file and file.nesting_depth > 5:
105
+ recs.append(
106
+ f"Reduce nesting depth (currently {file.nesting_depth}) - flatten deeply nested conditionals"
107
+ )
108
+ if file and file.complexity_score > 10:
109
+ recs.append(
110
+ f"Reduce cyclomatic complexity (currently {file.complexity_score}) - extract guard clauses"
111
+ )
112
+ recs.append("Split file into smaller, focused modules")
113
+ recs.append("Extract helper functions to reduce complexity")
114
+
115
+ if "high_centrality" in flags:
116
+ recs.append("Implement dependency injection to reduce coupling")
117
+ recs.append("Extract interface to isolate dependents")
118
+ recs.append("Consider moving shared types to separate module")
119
+
120
+ if "semantic_coherence_low" in flags:
121
+ if file and len(file.imports) > 10:
122
+ recs.append(
123
+ f"Reduce import count from {len(file.imports)} to <10 - group related imports"
124
+ )
125
+ recs.append("Separate concerns into different files")
126
+ recs.append("Group related functions into cohesive modules")
127
+ recs.append("Consider extracting unrelated functionality to separate files")
128
+
129
+ if "high_volatility" in flags:
130
+ recs.append("Stabilize interface - add integration tests")
131
+ recs.append("Consider feature flags for experimental changes")
132
+ recs.append("Review commit history for thrashing patterns")
133
+
134
+ if "structural_entropy_high" in flags:
135
+ recs.append("Refactor to follow consistent patterns")
136
+ recs.append("Standardize code structure across file")
137
+
138
+ if "structural_entropy_low" in flags:
139
+ recs.append("Review for code duplication - extract common patterns")
140
+ recs.append("Consider DRY principle - eliminate copy-paste code")
141
+
142
+ # Add file-specific recommendations
143
+ if file:
144
+ if file.functions > 10:
145
+ recs.append(
146
+ f"Extract business logic from {file.functions} functions into separate modules"
147
+ )
148
+ if file.structs > 5:
149
+ recs.append(
150
+ f"Consider consolidating {file.structs} struct types into related modules"
151
+ )
152
+ if file.interfaces > 5:
153
+ recs.append(f"Group {file.interfaces} interfaces by responsibility")
154
+
155
+ if not recs:
156
+ recs.append("Review file manually for code quality improvements")
157
+
158
+ return recs
@@ -0,0 +1,2 @@
1
+ # PEP 561 marker file
2
+ # This indicates that the package supports type checking