shannon-codebase-insight 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. shannon_codebase_insight-0.4.0.dist-info/METADATA +209 -0
  2. shannon_codebase_insight-0.4.0.dist-info/RECORD +37 -0
  3. shannon_codebase_insight-0.4.0.dist-info/WHEEL +5 -0
  4. shannon_codebase_insight-0.4.0.dist-info/entry_points.txt +7 -0
  5. shannon_codebase_insight-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. shannon_codebase_insight-0.4.0.dist-info/top_level.txt +1 -0
  7. shannon_insight/__init__.py +25 -0
  8. shannon_insight/analyzers/__init__.py +8 -0
  9. shannon_insight/analyzers/base.py +215 -0
  10. shannon_insight/analyzers/go_analyzer.py +150 -0
  11. shannon_insight/analyzers/python_analyzer.py +169 -0
  12. shannon_insight/analyzers/typescript_analyzer.py +162 -0
  13. shannon_insight/cache.py +214 -0
  14. shannon_insight/cli.py +333 -0
  15. shannon_insight/config.py +235 -0
  16. shannon_insight/core.py +546 -0
  17. shannon_insight/exceptions/__init__.py +31 -0
  18. shannon_insight/exceptions/analysis.py +78 -0
  19. shannon_insight/exceptions/base.py +18 -0
  20. shannon_insight/exceptions/config.py +48 -0
  21. shannon_insight/file_ops.py +218 -0
  22. shannon_insight/logging_config.py +98 -0
  23. shannon_insight/math/__init__.py +15 -0
  24. shannon_insight/math/entropy.py +133 -0
  25. shannon_insight/math/fusion.py +109 -0
  26. shannon_insight/math/graph.py +209 -0
  27. shannon_insight/math/robust.py +106 -0
  28. shannon_insight/math/statistics.py +159 -0
  29. shannon_insight/models.py +48 -0
  30. shannon_insight/primitives/__init__.py +13 -0
  31. shannon_insight/primitives/detector.py +318 -0
  32. shannon_insight/primitives/extractor.py +278 -0
  33. shannon_insight/primitives/fusion.py +373 -0
  34. shannon_insight/primitives/recommendations.py +158 -0
  35. shannon_insight/py.typed +2 -0
  36. shannon_insight/security.py +284 -0
  37. shannon_insight/utils/__init__.py +1 -0
shannon_insight/cli.py ADDED
@@ -0,0 +1,333 @@
1
+ """Command-line interface for Shannon Insight"""
2
+
3
+ import sys
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ import typer
9
+ from rich.console import Console
10
+
11
+ from . import __version__
12
+ from .core import CodebaseAnalyzer
13
+ from .config import load_settings, AnalysisSettings
14
+ from .logging_config import setup_logging
15
+ from .exceptions import ShannonInsightError
16
+
17
+ app = typer.Typer(
18
+ name="shannon-insight",
19
+ help="Shannon Insight - Multi-Signal Codebase Quality Analyzer",
20
+ add_completion=False,
21
+ rich_markup_mode="rich",
22
+ )
23
+
24
+ console = Console()
25
+
26
+
27
+ @app.callback(invoke_without_command=True, no_args_is_help=True)
28
+ def main(
29
+ ctx: typer.Context,
30
+ path: Path = typer.Argument(
31
+ Path("."),
32
+ help="Path to the codebase directory to analyze",
33
+ exists=False,
34
+ file_okay=False,
35
+ dir_okay=True,
36
+ readable=True,
37
+ ),
38
+ language: str = typer.Option(
39
+ "auto",
40
+ "--language",
41
+ "-l",
42
+ help="Programming language (auto, go, typescript, react, javascript, python)",
43
+ ),
44
+ top: int = typer.Option(
45
+ 15,
46
+ "--top",
47
+ "-t",
48
+ help="Number of top files to display",
49
+ min=1,
50
+ max=1000,
51
+ ),
52
+ output: Optional[Path] = typer.Option(
53
+ None,
54
+ "--output",
55
+ "-o",
56
+ help="Output JSON file path",
57
+ dir_okay=False,
58
+ ),
59
+ fmt: str = typer.Option(
60
+ "rich",
61
+ "--format",
62
+ "-f",
63
+ help="Output format: rich (default), json, csv, quiet",
64
+ ),
65
+ explain: Optional[str] = typer.Option(
66
+ None,
67
+ "--explain",
68
+ "-e",
69
+ help="Deep-dive explanation for a specific file (substring match)",
70
+ ),
71
+ fail_above: Optional[float] = typer.Option(
72
+ None,
73
+ "--fail-above",
74
+ help="Exit 1 if any file's score exceeds this threshold (for CI gating)",
75
+ min=0.0,
76
+ ),
77
+ threshold: Optional[float] = typer.Option(
78
+ None,
79
+ "--threshold",
80
+ help="Z-score threshold for anomaly detection (0.0 - 10.0)",
81
+ min=0.0,
82
+ max=10.0,
83
+ ),
84
+ config: Optional[Path] = typer.Option(
85
+ None,
86
+ "--config",
87
+ "-c",
88
+ help="Configuration file path (TOML format)",
89
+ exists=True,
90
+ file_okay=True,
91
+ dir_okay=False,
92
+ readable=True,
93
+ ),
94
+ verbose: bool = typer.Option(
95
+ False,
96
+ "--verbose",
97
+ "-v",
98
+ help="Enable verbose (DEBUG) logging",
99
+ ),
100
+ quiet: bool = typer.Option(
101
+ False,
102
+ "--quiet",
103
+ "-q",
104
+ help="Suppress all but ERROR logging",
105
+ ),
106
+ no_cache: bool = typer.Option(
107
+ False,
108
+ "--no-cache",
109
+ help="Disable caching",
110
+ ),
111
+ clear_cache: bool = typer.Option(
112
+ False,
113
+ "--clear-cache",
114
+ help="Clear cache before running",
115
+ ),
116
+ workers: Optional[int] = typer.Option(
117
+ None,
118
+ "--workers",
119
+ "-w",
120
+ help="Number of parallel workers (default: auto-detect)",
121
+ min=1,
122
+ max=32,
123
+ ),
124
+ version: bool = typer.Option(
125
+ False,
126
+ "--version",
127
+ help="Show version and exit",
128
+ ),
129
+ ):
130
+ """
131
+ Analyze codebase quality using mathematical primitives.
132
+
133
+ Named after Claude Shannon, father of information theory.
134
+
135
+ [bold cyan]Examples:[/bold cyan]
136
+
137
+ shannon-insight /path/to/codebase
138
+
139
+ shannon-insight /path/to/codebase --language go
140
+
141
+ shannon-insight /path/to/codebase --top 20 --output results.json
142
+
143
+ shannon-insight . --format json | jq .
144
+
145
+ shannon-insight . --fail-above 2.0 --format quiet
146
+
147
+ shannon-insight . --explain complex.go
148
+ """
149
+ # If subcommand is invoked, don't run main analysis
150
+ if ctx.invoked_subcommand is not None:
151
+ return
152
+
153
+ # Handle version
154
+ if version:
155
+ console.print(
156
+ f"[bold cyan]Shannon Insight[/bold cyan] version [green]{__version__}[/green]"
157
+ )
158
+ raise typer.Exit(0)
159
+
160
+ # Validate mutually exclusive options
161
+ if verbose and quiet:
162
+ console.print("[red]Error:[/red] --verbose and --quiet are mutually exclusive")
163
+ raise typer.Exit(1)
164
+
165
+ # Validate format
166
+ valid_formats = {"rich", "json", "csv", "quiet"}
167
+ if fmt not in valid_formats:
168
+ console.print(f"[red]Error:[/red] --format must be one of: {', '.join(sorted(valid_formats))}")
169
+ raise typer.Exit(1)
170
+
171
+ # Setup logging first
172
+ logger = setup_logging(verbose=verbose, quiet=quiet)
173
+
174
+ try:
175
+ # Load settings from config file and environment
176
+ overrides = {}
177
+
178
+ # CLI overrides (highest priority)
179
+ if threshold is not None:
180
+ overrides["z_score_threshold"] = threshold
181
+ if no_cache:
182
+ overrides["enable_cache"] = False
183
+ if workers is not None:
184
+ overrides["parallel_workers"] = workers
185
+ if verbose:
186
+ overrides["verbose"] = True
187
+ if quiet:
188
+ overrides["quiet"] = True
189
+
190
+ settings = load_settings(config_file=config, **overrides)
191
+
192
+ # Log configuration
193
+ logger.debug(f"Loaded settings: {settings.model_dump()}")
194
+
195
+ # Create analyzer
196
+ analyzer = CodebaseAnalyzer(root_dir=path, language=language, settings=settings)
197
+
198
+ # Clear cache if requested
199
+ if clear_cache:
200
+ if hasattr(analyzer, "cache") and analyzer.cache:
201
+ analyzer.cache.clear()
202
+ console.print("[yellow]Cache cleared[/yellow]")
203
+
204
+ # Run analysis
205
+ reports = analyzer.analyze()
206
+
207
+ # Handle results based on format
208
+ if reports:
209
+ if explain:
210
+ # --explain mode: deep-dive on matching file(s)
211
+ analyzer.print_explain(reports, explain)
212
+ elif fmt == "json":
213
+ # JSON to stdout
214
+ print(analyzer.format_json(reports))
215
+ elif fmt == "csv":
216
+ # CSV to stdout
217
+ print(analyzer.format_csv(reports), end="")
218
+ elif fmt == "quiet":
219
+ # Just file paths
220
+ print(analyzer.format_quiet(reports))
221
+ else:
222
+ # Default rich output: summary + detailed report
223
+ analyzer.print_summary(reports, top_n=top)
224
+ analyzer.print_report(reports, top_n=top)
225
+
226
+ # Export JSON report
227
+ if output is not None:
228
+ analyzer.export_json(reports, filename=str(output))
229
+ elif fmt == "rich":
230
+ # Auto-save with timestamp when using rich output
231
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
232
+ auto_path = f"analysis_report_{ts}.json"
233
+ analyzer.export_json(reports, filename=auto_path)
234
+
235
+ # --fail-above CI gating
236
+ if fail_above is not None:
237
+ max_score = max(r.overall_score for r in reports)
238
+ if max_score > fail_above:
239
+ if fmt == "rich":
240
+ console.print(
241
+ f"\n[red]FAIL:[/red] Max score {max_score:.3f} "
242
+ f"exceeds threshold {fail_above:.3f}"
243
+ )
244
+ raise typer.Exit(1)
245
+
246
+ if fmt == "rich":
247
+ console.print()
248
+ console.print("[bold green]ANALYSIS COMPLETE[/bold green]")
249
+ console.print()
250
+ else:
251
+ if fmt == "json":
252
+ print("[]")
253
+ elif fmt == "csv":
254
+ print("file,overall_score,confidence,structural_entropy,"
255
+ "network_centrality,churn_volatility,semantic_coherence,"
256
+ "cognitive_load,anomaly_flags")
257
+ elif fmt == "rich":
258
+ console.print(
259
+ "[bold green]No anomalies detected - codebase looks clean![/bold green]"
260
+ )
261
+ raise typer.Exit(0)
262
+
263
+ except typer.Exit:
264
+ raise # Re-raise typer exits (--fail-above, no anomalies, etc.)
265
+
266
+ except ShannonInsightError as e:
267
+ # Handle known errors
268
+ logger.error(f"{e.__class__.__name__}: {e}")
269
+ console.print(f"[red]Error:[/red] {e}")
270
+ raise typer.Exit(1)
271
+
272
+ except KeyboardInterrupt:
273
+ logger.info("Analysis interrupted by user")
274
+ console.print("\n[yellow]Analysis interrupted[/yellow]")
275
+ raise typer.Exit(130)
276
+
277
+ except Exception as e:
278
+ # Handle unexpected errors
279
+ logger.exception("Unexpected error during analysis")
280
+ console.print(f"[red]Unexpected error:[/red] {e}")
281
+ if verbose:
282
+ console.print_exception()
283
+ raise typer.Exit(1)
284
+
285
+
286
+ @app.command()
287
+ def cache_info():
288
+ """Show cache information and statistics."""
289
+ from .config import default_settings
290
+ from .cache import AnalysisCache
291
+
292
+ cache = AnalysisCache(
293
+ cache_dir=default_settings.cache_dir,
294
+ ttl_hours=default_settings.cache_ttl_hours,
295
+ enabled=default_settings.enable_cache,
296
+ )
297
+
298
+ stats = cache.stats()
299
+
300
+ console.print("[bold cyan]Shannon Insight Cache Info[/bold cyan]")
301
+ console.print()
302
+
303
+ if stats.get("enabled"):
304
+ console.print(f"Status: [green]Enabled[/green]")
305
+ console.print(f"Directory: [blue]{stats.get('directory', 'N/A')}[/blue]")
306
+ console.print(f"Entries: [yellow]{stats.get('size', 0)}[/yellow]")
307
+ console.print(f"Size: [yellow]{stats.get('volume', 0)} bytes[/yellow]")
308
+ else:
309
+ console.print(f"Status: [red]Disabled[/red]")
310
+
311
+
312
+ @app.command()
313
+ def cache_clear():
314
+ """Clear the analysis cache."""
315
+ from .config import default_settings
316
+ from .cache import AnalysisCache
317
+
318
+ cache = AnalysisCache(
319
+ cache_dir=default_settings.cache_dir,
320
+ ttl_hours=default_settings.cache_ttl_hours,
321
+ enabled=default_settings.enable_cache,
322
+ )
323
+
324
+ if not default_settings.enable_cache:
325
+ console.print("[yellow]Cache is disabled[/yellow]")
326
+ raise typer.Exit(0)
327
+
328
+ cache.clear()
329
+ console.print("[green]Cache cleared successfully[/green]")
330
+
331
+
332
+ if __name__ == "__main__":
333
+ app()
@@ -0,0 +1,235 @@
1
+ """
2
+ Configuration management for Shannon Insight.
3
+
4
+ Uses pydantic-settings for type-safe configuration with automatic validation.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import List, Optional
9
+
10
+ from pydantic import Field, field_validator
11
+ from pydantic_settings import BaseSettings, SettingsConfigDict
12
+
13
+
14
+ class AnalysisSettings(BaseSettings):
15
+ """
16
+ Type-safe configuration with automatic validation.
17
+
18
+ Configuration hierarchy (highest to lowest priority):
19
+ 1. CLI arguments (merged manually in CLI)
20
+ 2. Environment variables (SHANNON_*)
21
+ 3. TOML file (./shannon-insight.toml or ~/.shannon-insight.toml)
22
+ 4. Defaults defined in Field()
23
+ """
24
+
25
+ model_config = SettingsConfigDict(
26
+ env_prefix="SHANNON_",
27
+ env_file=".env",
28
+ env_file_encoding="utf-8",
29
+ case_sensitive=False,
30
+ extra="ignore",
31
+ )
32
+
33
+ # ==================== Anomaly Detection ====================
34
+
35
+ z_score_threshold: float = Field(
36
+ default=1.5,
37
+ gt=0.0,
38
+ lt=10.0,
39
+ description="Z-score threshold for anomaly detection",
40
+ )
41
+
42
+ # ==================== PageRank ====================
43
+
44
+ pagerank_damping: float = Field(
45
+ default=0.85, ge=0.0, le=1.0, description="PageRank damping factor"
46
+ )
47
+
48
+ pagerank_iterations: int = Field(
49
+ default=20, ge=1, le=100, description="Maximum PageRank iterations"
50
+ )
51
+
52
+ pagerank_tolerance: float = Field(
53
+ default=1e-6, gt=0.0, description="PageRank convergence tolerance"
54
+ )
55
+
56
+ # ==================== Signal Fusion ====================
57
+
58
+ fusion_weights: List[float] = Field(
59
+ default=[0.2, 0.25, 0.2, 0.15, 0.2],
60
+ description="Signal fusion weights [entropy, centrality, churn, coherence, cognitive]",
61
+ )
62
+
63
+ fusion_normalize: bool = Field(
64
+ default=True, description="Normalize weights to sum to 1.0"
65
+ )
66
+
67
+ # ==================== File Filtering ====================
68
+
69
+ exclude_patterns: List[str] = Field(
70
+ default=[
71
+ "*_test.go",
72
+ "*_test.ts",
73
+ "*.test.ts",
74
+ "*.spec.ts",
75
+ "vendor/*",
76
+ "node_modules/*",
77
+ "dist/*",
78
+ "build/*",
79
+ ".git/*",
80
+ "venv/*",
81
+ ".venv/*",
82
+ "__pycache__/*",
83
+ ".tox/*",
84
+ ".mypy_cache/*",
85
+ ],
86
+ description="File patterns to exclude from analysis",
87
+ )
88
+
89
+ max_file_size_mb: float = Field(
90
+ default=10.0, gt=0.0, le=100.0, description="Maximum file size in MB"
91
+ )
92
+
93
+ max_files: int = Field(
94
+ default=10000, gt=0, le=100000, description="Maximum number of files to analyze"
95
+ )
96
+
97
+ # ==================== Performance ====================
98
+
99
+ parallel_workers: Optional[int] = Field(
100
+ default=None,
101
+ ge=1,
102
+ le=32,
103
+ description="Number of parallel workers (None = auto-detect)",
104
+ )
105
+
106
+ timeout_seconds: int = Field(
107
+ default=10, ge=1, le=300, description="Timeout for file operations in seconds"
108
+ )
109
+
110
+ # ==================== Cache ====================
111
+
112
+ enable_cache: bool = Field(
113
+ default=True, description="Enable caching for faster repeated analysis"
114
+ )
115
+
116
+ cache_dir: str = Field(default=".shannon-cache", description="Cache directory path")
117
+
118
+ cache_ttl_hours: int = Field(
119
+ default=24,
120
+ ge=0,
121
+ le=720, # 30 days max
122
+ description="Cache time-to-live in hours",
123
+ )
124
+
125
+ # ==================== Logging ====================
126
+
127
+ verbose: bool = Field(default=False, description="Enable verbose (DEBUG) logging")
128
+
129
+ quiet: bool = Field(default=False, description="Suppress all but ERROR logging")
130
+
131
+ log_file: Optional[str] = Field(
132
+ default=None, description="Log file path (optional)"
133
+ )
134
+
135
+ # ==================== Security ====================
136
+
137
+ allow_hidden_files: bool = Field(
138
+ default=False, description="Allow analysis of hidden files (starting with .)"
139
+ )
140
+
141
+ block_system_dirs: bool = Field(
142
+ default=True, description="Block access to system directories"
143
+ )
144
+
145
+ follow_symlinks: bool = Field(
146
+ default=False, description="Follow symbolic links during scanning"
147
+ )
148
+
149
+ # ==================== Validators ====================
150
+
151
+ @field_validator("fusion_weights")
152
+ @classmethod
153
+ def validate_fusion_weights(cls, v: List[float]) -> List[float]:
154
+ """Validate fusion weights."""
155
+ if len(v) != 5:
156
+ raise ValueError("fusion_weights must have exactly 5 values")
157
+
158
+ if any(w < 0 for w in v):
159
+ raise ValueError("fusion_weights must be non-negative")
160
+
161
+ weight_sum = sum(v)
162
+ if weight_sum == 0:
163
+ raise ValueError("fusion_weights cannot all be zero")
164
+
165
+ # Normalize to sum to 1.0
166
+ return [w / weight_sum for w in v]
167
+
168
+ @field_validator("cache_dir")
169
+ @classmethod
170
+ def validate_cache_dir(cls, v: str) -> str:
171
+ """Validate cache directory."""
172
+ # Convert to absolute path
173
+ cache_path = Path(v)
174
+ if not cache_path.is_absolute():
175
+ cache_path = Path.cwd() / cache_path
176
+ return str(cache_path)
177
+
178
+ @field_validator("parallel_workers")
179
+ @classmethod
180
+ def validate_parallel_workers(cls, v: Optional[int]) -> Optional[int]:
181
+ """Validate parallel workers count."""
182
+ if v is not None and v < 1:
183
+ raise ValueError("parallel_workers must be at least 1")
184
+ return v
185
+
186
+ # ==================== Computed Properties ====================
187
+
188
+ @property
189
+ def max_file_size_bytes(self) -> int:
190
+ """Get max file size in bytes."""
191
+ return int(self.max_file_size_mb * 1024 * 1024)
192
+
193
+ @property
194
+ def cache_ttl_seconds(self) -> int:
195
+ """Get cache TTL in seconds."""
196
+ return self.cache_ttl_hours * 3600
197
+
198
+
199
+ def load_settings(config_file: Optional[Path] = None, **overrides) -> AnalysisSettings:
200
+ """
201
+ Load settings from config file and environment variables.
202
+
203
+ Args:
204
+ config_file: Optional TOML config file path
205
+ **overrides: Manual overrides (typically from CLI args)
206
+
207
+ Returns:
208
+ Loaded settings with all overrides applied
209
+ """
210
+ # Try to load from TOML file
211
+ if config_file and config_file.exists():
212
+ # pydantic-settings doesn't natively support TOML
213
+ # We'll load it manually and pass as overrides
214
+ try:
215
+ tomli = __import__("tomli")
216
+ with open(config_file, "rb") as f:
217
+ toml_data = tomli.load(f)
218
+ # Merge TOML data with overrides (overrides take precedence)
219
+ merged = {**toml_data, **overrides}
220
+ return AnalysisSettings(**merged)
221
+ except ImportError:
222
+ # tomli not available, fall back to env vars + overrides
223
+ pass
224
+ except Exception as e:
225
+ # TOML parsing failed, warn and continue
226
+ import warnings
227
+
228
+ warnings.warn(f"Failed to load config file {config_file}: {e}")
229
+
230
+ # Load from environment variables + overrides
231
+ return AnalysisSettings(**overrides)
232
+
233
+
234
+ # Default settings instance
235
+ default_settings = AnalysisSettings()