PyPI - shannon-codebase-insight - Versions diffs - 0.4.0__py3-none-any.whl - Mend

shannon-codebase-insight 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

shannon_codebase_insight-0.4.0.dist-info/METADATA +209 -0
shannon_codebase_insight-0.4.0.dist-info/RECORD +37 -0
shannon_codebase_insight-0.4.0.dist-info/WHEEL +5 -0
shannon_codebase_insight-0.4.0.dist-info/entry_points.txt +7 -0
shannon_codebase_insight-0.4.0.dist-info/licenses/LICENSE +21 -0
shannon_codebase_insight-0.4.0.dist-info/top_level.txt +1 -0
shannon_insight/__init__.py +25 -0
shannon_insight/analyzers/__init__.py +8 -0
shannon_insight/analyzers/base.py +215 -0
shannon_insight/analyzers/go_analyzer.py +150 -0
shannon_insight/analyzers/python_analyzer.py +169 -0
shannon_insight/analyzers/typescript_analyzer.py +162 -0
shannon_insight/cache.py +214 -0
shannon_insight/cli.py +333 -0
shannon_insight/config.py +235 -0
shannon_insight/core.py +546 -0
shannon_insight/exceptions/__init__.py +31 -0
shannon_insight/exceptions/analysis.py +78 -0
shannon_insight/exceptions/base.py +18 -0
shannon_insight/exceptions/config.py +48 -0
shannon_insight/file_ops.py +218 -0
shannon_insight/logging_config.py +98 -0
shannon_insight/math/__init__.py +15 -0
shannon_insight/math/entropy.py +133 -0
shannon_insight/math/fusion.py +109 -0
shannon_insight/math/graph.py +209 -0
shannon_insight/math/robust.py +106 -0
shannon_insight/math/statistics.py +159 -0
shannon_insight/models.py +48 -0
shannon_insight/primitives/__init__.py +13 -0
shannon_insight/primitives/detector.py +318 -0
shannon_insight/primitives/extractor.py +278 -0
shannon_insight/primitives/fusion.py +373 -0
shannon_insight/primitives/recommendations.py +158 -0
shannon_insight/py.typed +2 -0
shannon_insight/security.py +284 -0
shannon_insight/utils/__init__.py +1 -0

shannon_insight/math/graph.py ADDED Viewed

@@ -0,0 +1,209 @@
+"""Graph theory: PageRank, betweenness centrality, eigenvector centrality."""
+import math
+from typing import Dict, List
+class GraphMetrics:
+    """Graph theory calculations for dependency graphs."""
+    @staticmethod
+    def pagerank(
+        adjacency: Dict[str, List[str]],
+        damping: float = 0.85,
+        iterations: int = 100,
+        tolerance: float = 1e-6,
+    ) -> Dict[str, float]:
+        """
+        Compute PageRank using power iteration.
+        PR(A) = (1 - d) + d * Σ (PR(Ti) / C(Ti))
+        Args:
+            adjacency: Node -> list of neighbors
+            damping: Damping factor (0.85 is standard)
+            iterations: Maximum iterations
+            tolerance: Convergence tolerance
+        Returns:
+            Dictionary mapping nodes to PageRank scores
+        """
+        # Work on a copy to avoid mutating the caller's data structure.
+        adj: Dict[str, List[str]] = {k: list(v) for k, v in adjacency.items()}
+        nodes = set(adj.keys())
+        for neighbors in adj.values():
+            nodes.update(neighbors)
+        if not nodes:
+            return {}
+        N = len(nodes)
+        rank = {node: 1.0 / N for node in nodes}
+        # Identify dangling nodes (no outgoing edges).
+        # Standard treatment: redistribute their mass uniformly to all nodes.
+        # Reference: Langville & Meyer, "Google's PageRank and Beyond" (2006), Ch. 3.
+        dangling = [node for node in nodes if node not in adj or len(adj[node]) == 0]
+        # Ensure every node has an adjacency entry (possibly empty).
+        for node in nodes:
+            if node not in adj:
+                adj[node] = []
+        out_degree = {node: len(neighbors) for node, neighbors in adj.items()}
+        reverse: Dict[str, List[str]] = {node: [] for node in nodes}
+        for src, neighbors in adj.items():
+            for tgt in neighbors:
+                if tgt in reverse:
+                    reverse[tgt].append(src)
+        for _ in range(iterations):
+            new_rank = {}
+            max_diff = 0.0
+            # Sum of rank mass sitting on dangling nodes.
+            dangling_sum = sum(rank[node] for node in dangling)
+            for node in nodes:
+                # Teleportation + dangling-node redistribution.
+                new_rank[node] = (1 - damping) / N + damping * dangling_sum / N
+                for src in reverse[node]:
+                    if out_degree[src] > 0:
+                        new_rank[node] += damping * (rank[src] / out_degree[src])
+                diff = abs(new_rank[node] - rank[node])
+                max_diff = max(max_diff, diff)
+            rank = new_rank
+            if max_diff < tolerance:
+                break
+        return rank
+    @staticmethod
+    def betweenness_centrality(
+        adjacency: Dict[str, List[str]], normalize: bool = True
+    ) -> Dict[str, float]:
+        """
+        Compute betweenness centrality using Brandes' algorithm.
+        C_B(v) = Σ (σ_st(v) / σ_st) where s != v != t
+        Args:
+            adjacency: Node -> list of neighbors
+            normalize: Normalize by (n-1)(n-2)/2 for undirected graphs
+        Returns:
+            Dictionary mapping nodes to betweenness centrality
+        """
+        nodes = set(adjacency.keys())
+        for neighbors in adjacency.values():
+            nodes.update(neighbors)
+        betweenness = {node: 0.0 for node in nodes}
+        for s in nodes:
+            stack: List[str] = []
+            predecessors: Dict[str, List[str]] = {v: [] for v in nodes}
+            sigma = {v: 0 for v in nodes}
+            sigma[s] = 1
+            dist = {v: -1 for v in nodes}
+            dist[s] = 0
+            queue = [s]
+            while queue:
+                v = queue.pop(0)
+                stack.append(v)
+                for w in adjacency.get(v, []):
+                    if dist[w] < 0:
+                        dist[w] = dist[v] + 1
+                        queue.append(w)
+                    if dist[w] == dist[v] + 1:
+                        sigma[w] += sigma[v]
+                        predecessors[w].append(v)
+            delta = {v: 0.0 for v in nodes}
+            while stack:
+                w = stack.pop()
+                for v in predecessors[w]:
+                    delta[v] += (sigma[v] / sigma[w]) * (1 + delta[w])
+                if w != s:
+                    betweenness[w] += delta[w]
+        if normalize:
+            n = len(nodes)
+            if n > 2:
+                # Directed graph: normalize by (n-1)(n-2).
+                # The BFS follows directed edges, so the factor-of-2 used
+                # for undirected graphs does not apply here.
+                # Reference: Brandes (2001), Section 4.
+                scale = 1.0 / ((n - 1) * (n - 2))
+                betweenness = {k: v * scale for k, v in betweenness.items()}
+        return betweenness
+    @staticmethod
+    def eigenvector_centrality(
+        adjacency: Dict[str, List[str]], iterations: int = 100, tolerance: float = 1e-6
+    ) -> Dict[str, float]:
+        """
+        Compute eigenvector centrality using power iteration.
+        x_i = (1/lambda) Σ A_ij x_j
+        Args:
+            adjacency: Node -> list of neighbors
+            iterations: Maximum iterations
+            tolerance: Convergence tolerance
+        Returns:
+            Dictionary mapping nodes to eigenvector centrality
+        """
+        # Collect ALL nodes — including those that appear only as targets.
+        nodes_set = set(adjacency.keys())
+        for neighbors in adjacency.values():
+            nodes_set.update(neighbors)
+        nodes = list(nodes_set)
+        if not nodes:
+            return {}
+        # TODO: Eigenvector centrality is ill-defined for disconnected graphs.
+        # The Perron-Frobenius theorem guarantees a unique positive leading
+        # eigenvector only for strongly connected (or irreducible) graphs.
+        # For disconnected graphs, smaller components may converge to zero.
+        # Consider falling back to PageRank or warning the caller.
+        # Reference: Newman, "Networks: An Introduction" (2010), Section 7.2.
+        x = {node: 1.0 for node in nodes}
+        for _ in range(iterations):
+            new_x = {}
+            max_diff = 0.0
+            for node in nodes:
+                sum_neighbors = sum(x.get(nbr, 0.0) for nbr in adjacency.get(node, []))
+                new_x[node] = sum_neighbors
+            norm = math.sqrt(sum(v * v for v in new_x.values()))
+            if norm > 0:
+                new_x = {k: v / norm for k, v in new_x.items()}
+            for node in nodes:
+                diff = abs(new_x[node] - x[node])
+                max_diff = max(max_diff, diff)
+            x = new_x
+            if max_diff < tolerance:
+                break
+        return x

shannon_insight/math/robust.py ADDED Viewed

@@ -0,0 +1,106 @@
+"""Robust statistics: MAD, modified z-scores, IQR, isolation forest."""
+from typing import List, Optional, Union
+import numpy as np
+class RobustStatistics:
+    """Robust statistical methods resistant to outliers."""
+    @staticmethod
+    def median_absolute_deviation(values: Union[List[float], np.ndarray]) -> float:
+        """
+        Median Absolute Deviation: MAD = median(|x_i - median(x)|).
+        Args:
+            values: List or array of values
+        Returns:
+            MAD value
+        """
+        median_val = np.median(values)
+        if isinstance(values, np.ndarray):
+            deviations = np.abs(values - median_val)
+        else:
+            deviations = [abs(x - median_val) for x in values]
+        return float(np.median(deviations))
+    @staticmethod
+    def modified_z_score(
+        values: Union[List[float], np.ndarray], threshold: float = 3.5
+    ) -> List[float]:
+        """
+        Modified z-scores using MAD (robust to outliers).
+        M_i = 0.6745 * (x_i - median) / MAD
+        Args:
+            values: List of values
+            threshold: Outlier threshold (default 3.5)
+        Returns:
+            List of modified z-scores
+        """
+        median_val = float(np.median(values))
+        mad = RobustStatistics.median_absolute_deviation(values)
+        if mad == 0:
+            return [0.0] * len(values)
+        constant = 0.6745  # Normal distribution consistency constant
+        return [constant * (x - median_val) / mad for x in values]
+    @staticmethod
+    def iqr_outliers(values: List[float], multiplier: float = 1.5) -> List[bool]:
+        """
+        Detect outliers using Interquartile Range.
+        Outlier if x < Q1 - k*IQR or x > Q3 + k*IQR
+        Args:
+            values: List of values
+            multiplier: IQR multiplier (default 1.5)
+        Returns:
+            List of booleans indicating outliers
+        """
+        q1 = float(np.percentile(values, 25))
+        q3 = float(np.percentile(values, 75))
+        iqr = q3 - q1
+        lower_bound = q1 - multiplier * iqr
+        upper_bound = q3 + multiplier * iqr
+        return [(x < lower_bound or x > upper_bound) for x in values]
+    @staticmethod
+    def isolation_forest_outliers(
+        values: np.ndarray, contamination: Optional[float] = 0.1
+    ) -> np.ndarray:
+        """
+        Detect outliers using isolation forest.
+        Args:
+            values: Array of values
+            contamination: Expected proportion of outliers
+        Returns:
+            Boolean array indicating outliers
+        """
+        try:
+            from sklearn.ensemble import IsolationForest
+            contamination_val = "auto" if contamination is None else contamination
+            clf = IsolationForest(contamination=contamination_val, random_state=42)
+            outliers = clf.fit_predict(values.reshape(-1, 1))
+            return np.array([o == -1 for o in outliers])
+        except (ImportError, Exception):
+            return np.array(
+                RobustStatistics.iqr_outliers(
+                    values.tolist() if hasattr(values, "tolist") else list(values)
+                )
+            )

shannon_insight/math/statistics.py ADDED Viewed

@@ -0,0 +1,159 @@
+"""Descriptive and inferential statistics: z-scores, Mahalanobis, Grubbs' test."""
+import math
+import statistics as stdlib_stats
+from typing import List, Optional, Tuple
+import numpy as np
+class Statistics:
+    """Statistical analysis methods."""
+    @staticmethod
+    def mean(values: List[float]) -> float:
+        """Compute arithmetic mean."""
+        if not values:
+            return 0.0
+        return stdlib_stats.mean(values)
+    @staticmethod
+    def stdev(values: List[float]) -> float:
+        """Compute sample standard deviation."""
+        if len(values) < 2:
+            return 0.0
+        return stdlib_stats.stdev(values)
+    @staticmethod
+    def z_scores(values: List[float]) -> List[float]:
+        """
+        Compute z-scores: z = (x - mu) / sigma.
+        Args:
+            values: List of values
+        Returns:
+            List of z-scores
+        """
+        if not values or len(values) < 2:
+            return [0.0] * len(values)
+        mean_val = Statistics.mean(values)
+        stdev_val = Statistics.stdev(values)
+        if stdev_val == 0:
+            return [0.0] * len(values)
+        return [(x - mean_val) / stdev_val for x in values]
+    @staticmethod
+    def z_score(x: float, mean: float, std: float) -> float:
+        """Compute single z-score: z = (x - mu) / sigma."""
+        if std == 0:
+            return 0.0
+        return (x - mean) / std
+    @staticmethod
+    def mahalanobis_distance(
+        point: np.ndarray, mean: np.ndarray, cov_matrix: np.ndarray
+    ) -> float:
+        """
+        Compute Mahalanobis distance: D^2 = (x - mu)^T Sigma^-1 (x - mu).
+        Args:
+            point: Observation vector
+            mean: Mean vector
+            cov_matrix: Covariance matrix
+        Returns:
+            Mahalanobis distance (squared)
+        """
+        diff = point - mean
+        try:
+            inv_cov = np.linalg.inv(cov_matrix)
+        except np.linalg.LinAlgError:
+            inv_cov = np.linalg.pinv(cov_matrix)
+        distance = diff.T @ inv_cov @ diff
+        return float(distance)
+    @staticmethod
+    def grubbs_test(
+        values: List[float], alpha: float = 0.05
+    ) -> Optional[Tuple[int, float]]:
+        """
+        Grubbs' test for detecting a single outlier.
+        G = (max|x_i - x_bar|) / s
+        Args:
+            values: List of values
+            alpha: Significance level (default 0.05)
+        Returns:
+            Tuple of (outlier_index, G_statistic) if outlier found, None otherwise
+        """
+        n = len(values)
+        if n < 3:
+            return None
+        mean_val = float(np.mean(values))
+        std_val = float(np.std(values, ddof=1))
+        if std_val == 0:
+            return None
+        deviations = [abs(x - mean_val) for x in values]
+        max_deviation = max(deviations)
+        outlier_index = deviations.index(max_deviation)
+        G = max_deviation / std_val
+        t_critical = Statistics._t_critical_value(alpha / (2 * n), n - 2)
+        G_critical = ((n - 1) / math.sqrt(n)) * math.sqrt(
+            t_critical**2 / (n - 2 + t_critical**2)
+        )
+        if G > G_critical:
+            return outlier_index, float(G)
+        return None
+    @staticmethod
+    def _t_critical_value(alpha: float, df: int) -> float:
+        """Inverse t-distribution critical value."""
+        from scipy import stats as sp_stats
+        return float(sp_stats.t.ppf(1 - alpha, df))
+    @staticmethod
+    def confidence_interval(
+        values: List[float], confidence: float = 0.95
+    ) -> Tuple[float, float]:
+        """
+        Confidence interval for the mean.
+        CI = x_bar +/- t_(alpha/2, n-1) * s / sqrt(n)
+        Args:
+            values: Sample values
+            confidence: Confidence level (default 0.95)
+        Returns:
+            Tuple of (lower_bound, upper_bound)
+        """
+        n = len(values)
+        if n < 2:
+            return (values[0], values[0]) if values else (0.0, 0.0)
+        mean_val = float(np.mean(values))
+        std_val = float(np.std(values, ddof=1))
+        alpha = 1 - confidence
+        from scipy import stats as sp_stats
+        t_critical = sp_stats.t.ppf(1 - alpha / 2, n - 1)
+        margin = t_critical * std_val / math.sqrt(n)
+        return (mean_val - margin, mean_val + margin)

shannon_insight/models.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""Data models for Shannon Insight"""
+from dataclasses import dataclass
+from collections import Counter
+from typing import List
+@dataclass
+class FileMetrics:
+    """Raw observations for a single file"""
+    path: str
+    lines: int
+    tokens: int
+    imports: List[str]
+    exports: List[str]
+    functions: int
+    interfaces: int
+    structs: int
+    complexity_score: float
+    nesting_depth: int
+    ast_node_types: Counter
+    last_modified: float
+@dataclass
+class Primitives:
+    """Five orthogonal quality primitives"""
+    structural_entropy: float
+    network_centrality: float
+    churn_volatility: float
+    semantic_coherence: float
+    cognitive_load: float
+@dataclass
+class AnomalyReport:
+    """Final analysis output"""
+    file: str
+    overall_score: float
+    confidence: float
+    primitives: Primitives
+    normalized_primitives: Primitives
+    anomaly_flags: List[str]
+    root_causes: List[str]
+    recommendations: List[str]

shannon_insight/primitives/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""Primitive extractors for the five quality dimensions"""
+from .extractor import PrimitiveExtractor
+from .fusion import SignalFusion
+from .detector import AnomalyDetector
+from .recommendations import RecommendationEngine
+__all__ = [
+    "PrimitiveExtractor",
+    "SignalFusion",
+    "AnomalyDetector",
+    "RecommendationEngine",
+]