shannon-codebase-insight 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shannon_codebase_insight-0.4.0.dist-info/METADATA +209 -0
- shannon_codebase_insight-0.4.0.dist-info/RECORD +37 -0
- shannon_codebase_insight-0.4.0.dist-info/WHEEL +5 -0
- shannon_codebase_insight-0.4.0.dist-info/entry_points.txt +7 -0
- shannon_codebase_insight-0.4.0.dist-info/licenses/LICENSE +21 -0
- shannon_codebase_insight-0.4.0.dist-info/top_level.txt +1 -0
- shannon_insight/__init__.py +25 -0
- shannon_insight/analyzers/__init__.py +8 -0
- shannon_insight/analyzers/base.py +215 -0
- shannon_insight/analyzers/go_analyzer.py +150 -0
- shannon_insight/analyzers/python_analyzer.py +169 -0
- shannon_insight/analyzers/typescript_analyzer.py +162 -0
- shannon_insight/cache.py +214 -0
- shannon_insight/cli.py +333 -0
- shannon_insight/config.py +235 -0
- shannon_insight/core.py +546 -0
- shannon_insight/exceptions/__init__.py +31 -0
- shannon_insight/exceptions/analysis.py +78 -0
- shannon_insight/exceptions/base.py +18 -0
- shannon_insight/exceptions/config.py +48 -0
- shannon_insight/file_ops.py +218 -0
- shannon_insight/logging_config.py +98 -0
- shannon_insight/math/__init__.py +15 -0
- shannon_insight/math/entropy.py +133 -0
- shannon_insight/math/fusion.py +109 -0
- shannon_insight/math/graph.py +209 -0
- shannon_insight/math/robust.py +106 -0
- shannon_insight/math/statistics.py +159 -0
- shannon_insight/models.py +48 -0
- shannon_insight/primitives/__init__.py +13 -0
- shannon_insight/primitives/detector.py +318 -0
- shannon_insight/primitives/extractor.py +278 -0
- shannon_insight/primitives/fusion.py +373 -0
- shannon_insight/primitives/recommendations.py +158 -0
- shannon_insight/py.typed +2 -0
- shannon_insight/security.py +284 -0
- shannon_insight/utils/__init__.py +1 -0
shannon_insight/core.py
ADDED
|
@@ -0,0 +1,546 @@
|
|
|
1
|
+
"""Main pipeline orchestrator for Shannon Insight"""
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import io
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import asdict
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
from rich.panel import Panel
|
|
12
|
+
from rich.progress import (
|
|
13
|
+
Progress,
|
|
14
|
+
SpinnerColumn,
|
|
15
|
+
BarColumn,
|
|
16
|
+
TaskProgressColumn,
|
|
17
|
+
TimeElapsedColumn,
|
|
18
|
+
TextColumn,
|
|
19
|
+
)
|
|
20
|
+
from rich.table import Table
|
|
21
|
+
|
|
22
|
+
from .models import AnomalyReport
|
|
23
|
+
from .analyzers import GoScanner, TypeScriptScanner, PythonScanner
|
|
24
|
+
from .primitives import (
|
|
25
|
+
PrimitiveExtractor,
|
|
26
|
+
AnomalyDetector,
|
|
27
|
+
SignalFusion,
|
|
28
|
+
RecommendationEngine,
|
|
29
|
+
)
|
|
30
|
+
from .config import AnalysisSettings, default_settings
|
|
31
|
+
from .cache import AnalysisCache, compute_config_hash
|
|
32
|
+
from .logging_config import get_logger
|
|
33
|
+
from .security import validate_root_directory
|
|
34
|
+
from .exceptions import (
|
|
35
|
+
InvalidPathError,
|
|
36
|
+
UnsupportedLanguageError,
|
|
37
|
+
InsufficientDataError,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
import sys as _sys
|
|
41
|
+
|
|
42
|
+
logger = get_logger(__name__)
|
|
43
|
+
console = Console(stderr=True)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class CodebaseAnalyzer:
|
|
47
|
+
"""Main pipeline orchestrator with enterprise features"""
|
|
48
|
+
|
|
49
|
+
SUPPORTED_LANGUAGES = {"auto", "go", "typescript", "react", "javascript", "python"}
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
root_dir: "Path | str",
|
|
54
|
+
language: str = "auto",
|
|
55
|
+
settings: Optional[AnalysisSettings] = None,
|
|
56
|
+
):
|
|
57
|
+
"""
|
|
58
|
+
Initialize codebase analyzer.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
root_dir: Root directory of codebase to analyze
|
|
62
|
+
language: Programming language (auto, go, typescript, react, javascript)
|
|
63
|
+
settings: Analysis settings (uses defaults if not provided)
|
|
64
|
+
|
|
65
|
+
Raises:
|
|
66
|
+
InvalidPathError: If root_dir is invalid
|
|
67
|
+
UnsupportedLanguageError: If language is not supported
|
|
68
|
+
"""
|
|
69
|
+
# Validate root directory
|
|
70
|
+
self.root_dir = validate_root_directory(Path(root_dir))
|
|
71
|
+
logger.info(f"Analyzing codebase at: {self.root_dir}")
|
|
72
|
+
|
|
73
|
+
# Validate language
|
|
74
|
+
if language not in self.SUPPORTED_LANGUAGES:
|
|
75
|
+
raise UnsupportedLanguageError(language, list(self.SUPPORTED_LANGUAGES))
|
|
76
|
+
self.language = language
|
|
77
|
+
|
|
78
|
+
# Load settings
|
|
79
|
+
self.settings = settings or default_settings
|
|
80
|
+
logger.debug(
|
|
81
|
+
f"Settings: cache={self.settings.enable_cache}, "
|
|
82
|
+
f"workers={self.settings.parallel_workers}, "
|
|
83
|
+
f"threshold={self.settings.z_score_threshold}"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Initialize cache
|
|
87
|
+
self.cache = None
|
|
88
|
+
if self.settings.enable_cache:
|
|
89
|
+
self.cache = AnalysisCache(
|
|
90
|
+
cache_dir=self.settings.cache_dir,
|
|
91
|
+
ttl_hours=self.settings.cache_ttl_hours,
|
|
92
|
+
enabled=True,
|
|
93
|
+
)
|
|
94
|
+
logger.debug(f"Cache enabled at {self.settings.cache_dir}")
|
|
95
|
+
|
|
96
|
+
# Compute config hash for cache invalidation
|
|
97
|
+
self.config_hash = compute_config_hash(self.settings.model_dump())
|
|
98
|
+
|
|
99
|
+
# Track analysis metadata
|
|
100
|
+
self._total_files_scanned = 0
|
|
101
|
+
self._detected_language = language
|
|
102
|
+
|
|
103
|
+
def analyze(self) -> List[AnomalyReport]:
|
|
104
|
+
"""
|
|
105
|
+
Run full analysis pipeline with progress tracking.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
List of anomaly reports sorted by severity
|
|
109
|
+
|
|
110
|
+
Raises:
|
|
111
|
+
InsufficientDataError: If no files found to analyze
|
|
112
|
+
"""
|
|
113
|
+
console.print()
|
|
114
|
+
console.print("[bold cyan]=" * 40)
|
|
115
|
+
console.print(
|
|
116
|
+
"[bold cyan]SHANNON INSIGHT - Multi-Signal Codebase Quality Analyzer"
|
|
117
|
+
)
|
|
118
|
+
console.print("[bold cyan]=" * 40)
|
|
119
|
+
console.print()
|
|
120
|
+
|
|
121
|
+
with Progress(
|
|
122
|
+
SpinnerColumn(),
|
|
123
|
+
TextColumn("[progress.description]{task.description}"),
|
|
124
|
+
BarColumn(),
|
|
125
|
+
TaskProgressColumn(),
|
|
126
|
+
TimeElapsedColumn(),
|
|
127
|
+
console=console,
|
|
128
|
+
transient=False,
|
|
129
|
+
) as progress:
|
|
130
|
+
# Layer 1: Data Collection
|
|
131
|
+
scan_task = progress.add_task(
|
|
132
|
+
"[cyan]Layer 1: Scanning codebase...", total=None
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
scanner = self._get_scanner()
|
|
136
|
+
files = scanner.scan()
|
|
137
|
+
|
|
138
|
+
if not files:
|
|
139
|
+
logger.warning("No files found to analyze")
|
|
140
|
+
raise InsufficientDataError(
|
|
141
|
+
"No source files found in the specified directory",
|
|
142
|
+
minimum_required=1,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
self._total_files_scanned = len(files)
|
|
146
|
+
|
|
147
|
+
progress.update(
|
|
148
|
+
scan_task,
|
|
149
|
+
completed=True,
|
|
150
|
+
description=f"[green]Layer 1: Found {len(files)} source files",
|
|
151
|
+
)
|
|
152
|
+
logger.info(f"Scanned {len(files)} files")
|
|
153
|
+
|
|
154
|
+
# Layer 2: Primitive Extraction
|
|
155
|
+
extract_task = progress.add_task(
|
|
156
|
+
"[cyan]Layer 2: Extracting primitives...", total=100
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
extractor = PrimitiveExtractor(
|
|
160
|
+
files, cache=self.cache, config_hash=self.config_hash
|
|
161
|
+
)
|
|
162
|
+
primitives = extractor.extract_all()
|
|
163
|
+
|
|
164
|
+
progress.update(
|
|
165
|
+
extract_task,
|
|
166
|
+
completed=100,
|
|
167
|
+
description=f"[green]Layer 2: Extracted 5 primitives for {len(primitives)} files",
|
|
168
|
+
)
|
|
169
|
+
logger.info(f"Extracted primitives for {len(primitives)} files")
|
|
170
|
+
|
|
171
|
+
# Layer 3: Normalization & Anomaly Detection
|
|
172
|
+
detect_task = progress.add_task(
|
|
173
|
+
"[cyan]Layer 3: Normalizing and detecting anomalies...", total=100
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
detector = AnomalyDetector(
|
|
177
|
+
primitives, threshold=self.settings.z_score_threshold
|
|
178
|
+
)
|
|
179
|
+
normalized = detector.normalize()
|
|
180
|
+
anomalies = detector.detect_anomalies(normalized)
|
|
181
|
+
|
|
182
|
+
progress.update(
|
|
183
|
+
detect_task,
|
|
184
|
+
completed=100,
|
|
185
|
+
description=f"[green]Layer 3: Detected {len(anomalies)} anomalous files",
|
|
186
|
+
)
|
|
187
|
+
logger.info(f"Detected {len(anomalies)} anomalies")
|
|
188
|
+
|
|
189
|
+
# Layer 4: Signal Fusion
|
|
190
|
+
fusion_task = progress.add_task(
|
|
191
|
+
"[cyan]Layer 4: Fusing signals with consistency check...", total=100
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
fusion = SignalFusion(
|
|
195
|
+
primitives, normalized, weights=self.settings.fusion_weights
|
|
196
|
+
)
|
|
197
|
+
fused_scores = fusion.fuse()
|
|
198
|
+
|
|
199
|
+
progress.update(
|
|
200
|
+
fusion_task,
|
|
201
|
+
completed=100,
|
|
202
|
+
description=f"[green]Layer 4: Computed consensus scores for {len(fused_scores)} files",
|
|
203
|
+
)
|
|
204
|
+
logger.info(f"Fused signals for {len(fused_scores)} files")
|
|
205
|
+
|
|
206
|
+
# Layer 5: Recommendations
|
|
207
|
+
rec_task = progress.add_task(
|
|
208
|
+
"[cyan]Layer 5: Generating recommendations...", total=100
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
engine = RecommendationEngine(
|
|
212
|
+
files, primitives, normalized, anomalies, fused_scores
|
|
213
|
+
)
|
|
214
|
+
reports = engine.generate()
|
|
215
|
+
|
|
216
|
+
progress.update(
|
|
217
|
+
rec_task,
|
|
218
|
+
completed=100,
|
|
219
|
+
description=f"[green]Layer 5: Generated {len(reports)} actionable reports",
|
|
220
|
+
)
|
|
221
|
+
logger.info(f"Generated {len(reports)} reports")
|
|
222
|
+
|
|
223
|
+
console.print()
|
|
224
|
+
|
|
225
|
+
# Show cache stats if enabled
|
|
226
|
+
if self.cache:
|
|
227
|
+
stats = self.cache.stats()
|
|
228
|
+
logger.debug(f"Cache stats: {stats}")
|
|
229
|
+
|
|
230
|
+
return reports
|
|
231
|
+
|
|
232
|
+
def _get_scanner(self):
|
|
233
|
+
"""
|
|
234
|
+
Get appropriate scanner based on language.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
Language-specific scanner instance
|
|
238
|
+
|
|
239
|
+
Raises:
|
|
240
|
+
UnsupportedLanguageError: If no scanner available for detected language
|
|
241
|
+
"""
|
|
242
|
+
if self.language == "go":
|
|
243
|
+
logger.debug("Using Go scanner")
|
|
244
|
+
self._detected_language = "go"
|
|
245
|
+
return GoScanner(str(self.root_dir), settings=self.settings)
|
|
246
|
+
elif self.language in ["typescript", "react", "javascript"]:
|
|
247
|
+
logger.debug(f"Using TypeScript scanner for {self.language}")
|
|
248
|
+
self._detected_language = self.language
|
|
249
|
+
return TypeScriptScanner(str(self.root_dir), settings=self.settings)
|
|
250
|
+
elif self.language == "python":
|
|
251
|
+
logger.debug("Using Python scanner")
|
|
252
|
+
self._detected_language = "python"
|
|
253
|
+
return PythonScanner(str(self.root_dir), settings=self.settings)
|
|
254
|
+
else:
|
|
255
|
+
# Auto-detect
|
|
256
|
+
logger.debug("Auto-detecting language...")
|
|
257
|
+
|
|
258
|
+
skip_dirs = {"venv", ".venv", "node_modules", "__pycache__", ".git", "dist", "build"}
|
|
259
|
+
|
|
260
|
+
def _has_ext(ext: str) -> bool:
|
|
261
|
+
for p in self.root_dir.rglob(f"*{ext}"):
|
|
262
|
+
if not any(part in skip_dirs for part in p.parts):
|
|
263
|
+
return True
|
|
264
|
+
return False
|
|
265
|
+
|
|
266
|
+
has_go = _has_ext(".go")
|
|
267
|
+
has_ts = _has_ext(".ts") or _has_ext(".tsx")
|
|
268
|
+
has_py = _has_ext(".py")
|
|
269
|
+
|
|
270
|
+
if has_go:
|
|
271
|
+
logger.info("Auto-detected: Go codebase")
|
|
272
|
+
console.print("[yellow]Auto-detected: Go codebase[/yellow]\n")
|
|
273
|
+
self._detected_language = "go"
|
|
274
|
+
return GoScanner(str(self.root_dir), settings=self.settings)
|
|
275
|
+
elif has_ts:
|
|
276
|
+
logger.info("Auto-detected: TypeScript/React codebase")
|
|
277
|
+
console.print(
|
|
278
|
+
"[yellow]Auto-detected: TypeScript/React codebase[/yellow]\n"
|
|
279
|
+
)
|
|
280
|
+
self._detected_language = "typescript"
|
|
281
|
+
return TypeScriptScanner(str(self.root_dir), settings=self.settings)
|
|
282
|
+
elif has_py:
|
|
283
|
+
logger.info("Auto-detected: Python codebase")
|
|
284
|
+
console.print("[yellow]Auto-detected: Python codebase[/yellow]\n")
|
|
285
|
+
self._detected_language = "python"
|
|
286
|
+
return PythonScanner(str(self.root_dir), settings=self.settings)
|
|
287
|
+
else:
|
|
288
|
+
logger.warning("Could not auto-detect language, defaulting to Python")
|
|
289
|
+
console.print(
|
|
290
|
+
"[yellow]Could not auto-detect language. Defaulting to Python.[/yellow]\n"
|
|
291
|
+
)
|
|
292
|
+
self._detected_language = "python"
|
|
293
|
+
return PythonScanner(str(self.root_dir), settings=self.settings)
|
|
294
|
+
|
|
295
|
+
def print_summary(self, reports: List[AnomalyReport], top_n: int = 10):
|
|
296
|
+
"""
|
|
297
|
+
Print a compact summary dashboard using Rich Panel and Table.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
reports: List of anomaly reports
|
|
301
|
+
top_n: Number of top files to display in summary
|
|
302
|
+
"""
|
|
303
|
+
num_anomalies = len(reports)
|
|
304
|
+
pct = (num_anomalies / self._total_files_scanned * 100) if self._total_files_scanned > 0 else 0
|
|
305
|
+
avg_confidence = (
|
|
306
|
+
sum(r.confidence for r in reports) / num_anomalies
|
|
307
|
+
if num_anomalies > 0
|
|
308
|
+
else 0.0
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
summary_text = (
|
|
312
|
+
f"Scanned [bold]{self._total_files_scanned}[/bold] files "
|
|
313
|
+
f"([cyan]{self._detected_language}[/cyan]) | "
|
|
314
|
+
f"[yellow]{num_anomalies}[/yellow] anomalies "
|
|
315
|
+
f"([yellow]{pct:.0f}%[/yellow]) | "
|
|
316
|
+
f"Avg confidence: [blue]{avg_confidence:.2f}[/blue]"
|
|
317
|
+
)
|
|
318
|
+
console.print(Panel(summary_text, title="[bold cyan]Summary[/bold cyan]", expand=False))
|
|
319
|
+
console.print()
|
|
320
|
+
|
|
321
|
+
if not reports:
|
|
322
|
+
return
|
|
323
|
+
|
|
324
|
+
table = Table(title=f"Top {min(top_n, len(reports))} Files Requiring Attention", expand=True)
|
|
325
|
+
table.add_column("#", style="dim", width=4)
|
|
326
|
+
table.add_column("File", style="yellow", no_wrap=False, ratio=3)
|
|
327
|
+
table.add_column("Score", style="red", justify="right", width=8)
|
|
328
|
+
table.add_column("Confidence", style="blue", justify="right", width=12)
|
|
329
|
+
table.add_column("Primary Issue", style="white", ratio=2)
|
|
330
|
+
|
|
331
|
+
# Shorten flag names for table display
|
|
332
|
+
_short = {
|
|
333
|
+
"structural_entropy_high": "entropy high",
|
|
334
|
+
"structural_entropy_low": "entropy low",
|
|
335
|
+
"high_centrality": "high centrality",
|
|
336
|
+
"high_volatility": "high volatility",
|
|
337
|
+
"semantic_coherence_low": "coherence low",
|
|
338
|
+
"semantic_coherence_high": "coherence high",
|
|
339
|
+
"high_cognitive_load": "high cog. load",
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
for i, report in enumerate(reports[:top_n], 1):
|
|
343
|
+
flags = [_short.get(f, f) for f in report.anomaly_flags]
|
|
344
|
+
primary = ", ".join(flags) if flags else "-"
|
|
345
|
+
table.add_row(
|
|
346
|
+
str(i),
|
|
347
|
+
report.file,
|
|
348
|
+
f"{report.overall_score:.3f}",
|
|
349
|
+
f"{report.confidence:.2f}",
|
|
350
|
+
primary,
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
console.print(table)
|
|
354
|
+
console.print()
|
|
355
|
+
|
|
356
|
+
def print_report(self, reports: List[AnomalyReport], top_n: int = 10):
|
|
357
|
+
"""
|
|
358
|
+
Print human-readable analysis report with rich formatting.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
reports: List of anomaly reports
|
|
362
|
+
top_n: Number of top files to display
|
|
363
|
+
"""
|
|
364
|
+
console.print("[bold cyan]=" * 40)
|
|
365
|
+
console.print(
|
|
366
|
+
f"[bold cyan]TOP {min(top_n, len(reports))} FILES REQUIRING ATTENTION"
|
|
367
|
+
)
|
|
368
|
+
console.print("[bold cyan]=" * 40)
|
|
369
|
+
console.print()
|
|
370
|
+
|
|
371
|
+
for i, report in enumerate(reports[:top_n], 1):
|
|
372
|
+
console.print(f"[bold yellow]{i}. {report.file}[/bold yellow]")
|
|
373
|
+
console.print(
|
|
374
|
+
f" Overall Score: [red]{report.overall_score:.3f}[/red] "
|
|
375
|
+
f"(Confidence: [blue]{report.confidence:.2f}[/blue])"
|
|
376
|
+
)
|
|
377
|
+
console.print()
|
|
378
|
+
|
|
379
|
+
console.print(" [dim]Raw Primitives:[/dim]")
|
|
380
|
+
console.print(
|
|
381
|
+
f" - Structural Entropy: {report.primitives.structural_entropy:.3f}"
|
|
382
|
+
)
|
|
383
|
+
console.print(
|
|
384
|
+
f" - Network Centrality: {report.primitives.network_centrality:.3f}"
|
|
385
|
+
)
|
|
386
|
+
console.print(
|
|
387
|
+
f" - Churn Volatility: {report.primitives.churn_volatility:.3f}"
|
|
388
|
+
)
|
|
389
|
+
console.print(
|
|
390
|
+
f" - Semantic Coherence: {report.primitives.semantic_coherence:.3f}"
|
|
391
|
+
)
|
|
392
|
+
console.print(
|
|
393
|
+
f" - Cognitive Load: {report.primitives.cognitive_load:.3f}"
|
|
394
|
+
)
|
|
395
|
+
console.print()
|
|
396
|
+
|
|
397
|
+
console.print(" [dim]Normalized (Z-Scores):[/dim]")
|
|
398
|
+
console.print(
|
|
399
|
+
f" - Structural Entropy: {report.normalized_primitives.structural_entropy:+.2f}s"
|
|
400
|
+
)
|
|
401
|
+
console.print(
|
|
402
|
+
f" - Network Centrality: {report.normalized_primitives.network_centrality:+.2f}s"
|
|
403
|
+
)
|
|
404
|
+
console.print(
|
|
405
|
+
f" - Churn Volatility: {report.normalized_primitives.churn_volatility:+.2f}s"
|
|
406
|
+
)
|
|
407
|
+
console.print(
|
|
408
|
+
f" - Semantic Coherence: {report.normalized_primitives.semantic_coherence:+.2f}s"
|
|
409
|
+
)
|
|
410
|
+
console.print(
|
|
411
|
+
f" - Cognitive Load: {report.normalized_primitives.cognitive_load:+.2f}s"
|
|
412
|
+
)
|
|
413
|
+
console.print()
|
|
414
|
+
|
|
415
|
+
if report.root_causes:
|
|
416
|
+
console.print(" [dim]Root Causes:[/dim]")
|
|
417
|
+
for cause in report.root_causes:
|
|
418
|
+
console.print(f" [red]![/red] {cause}")
|
|
419
|
+
console.print()
|
|
420
|
+
|
|
421
|
+
if report.recommendations:
|
|
422
|
+
console.print(" [dim]Recommendations:[/dim]")
|
|
423
|
+
for rec in report.recommendations:
|
|
424
|
+
console.print(f" [green]->[/green] {rec}")
|
|
425
|
+
console.print()
|
|
426
|
+
|
|
427
|
+
console.print("[dim]" + "-" * 80 + "[/dim]")
|
|
428
|
+
console.print()
|
|
429
|
+
|
|
430
|
+
logger.info(f"Printed report for top {min(top_n, len(reports))} files")
|
|
431
|
+
|
|
432
|
+
def print_explain(self, reports: List[AnomalyReport], pattern: str):
|
|
433
|
+
"""
|
|
434
|
+
Print a deep-dive explanation for file(s) matching a pattern.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
reports: List of anomaly reports
|
|
438
|
+
pattern: File name or pattern to match
|
|
439
|
+
"""
|
|
440
|
+
matching = [r for r in reports if pattern in r.file]
|
|
441
|
+
|
|
442
|
+
if not matching:
|
|
443
|
+
console.print(f"[yellow]No files matching '{pattern}' found in analysis results.[/yellow]")
|
|
444
|
+
return
|
|
445
|
+
|
|
446
|
+
for report in matching:
|
|
447
|
+
console.print(Panel(
|
|
448
|
+
f"[bold]{report.file}[/bold]",
|
|
449
|
+
title="[bold cyan]Deep Dive[/bold cyan]",
|
|
450
|
+
expand=False,
|
|
451
|
+
))
|
|
452
|
+
console.print()
|
|
453
|
+
|
|
454
|
+
console.print("[bold]Raw Primitives:[/bold]")
|
|
455
|
+
console.print(f" Structural Entropy: {report.primitives.structural_entropy:.4f}")
|
|
456
|
+
console.print(f" Network Centrality: {report.primitives.network_centrality:.4f}")
|
|
457
|
+
console.print(f" Churn Volatility: {report.primitives.churn_volatility:.4f}")
|
|
458
|
+
console.print(f" Semantic Coherence: {report.primitives.semantic_coherence:.4f}")
|
|
459
|
+
console.print(f" Cognitive Load: {report.primitives.cognitive_load:.4f}")
|
|
460
|
+
console.print()
|
|
461
|
+
|
|
462
|
+
threshold = self.settings.z_score_threshold
|
|
463
|
+
console.print(f"[bold]Normalized Z-Scores[/bold] (threshold: {threshold:.1f}):")
|
|
464
|
+
for name, val in [
|
|
465
|
+
("Structural Entropy", report.normalized_primitives.structural_entropy),
|
|
466
|
+
("Network Centrality", report.normalized_primitives.network_centrality),
|
|
467
|
+
("Churn Volatility", report.normalized_primitives.churn_volatility),
|
|
468
|
+
("Semantic Coherence", report.normalized_primitives.semantic_coherence),
|
|
469
|
+
("Cognitive Load", report.normalized_primitives.cognitive_load),
|
|
470
|
+
]:
|
|
471
|
+
marker = " [red]<< ANOMALY[/red]" if abs(val) > threshold else ""
|
|
472
|
+
console.print(f" {name:22s} {val:+.3f}s{marker}")
|
|
473
|
+
console.print()
|
|
474
|
+
|
|
475
|
+
console.print(f"[bold]Overall Score:[/bold] [red]{report.overall_score:.4f}[/red]")
|
|
476
|
+
console.print(f"[bold]Confidence:[/bold] [blue]{report.confidence:.4f}[/blue]")
|
|
477
|
+
console.print()
|
|
478
|
+
|
|
479
|
+
if report.anomaly_flags:
|
|
480
|
+
console.print("[bold]Anomaly Flags:[/bold]")
|
|
481
|
+
for flag in report.anomaly_flags:
|
|
482
|
+
console.print(f" [red]-[/red] {flag}")
|
|
483
|
+
console.print()
|
|
484
|
+
|
|
485
|
+
if report.root_causes:
|
|
486
|
+
console.print("[bold]Root Causes:[/bold]")
|
|
487
|
+
for cause in report.root_causes:
|
|
488
|
+
console.print(f" [red]![/red] {cause}")
|
|
489
|
+
console.print()
|
|
490
|
+
|
|
491
|
+
if report.recommendations:
|
|
492
|
+
console.print("[bold]Recommendations:[/bold]")
|
|
493
|
+
for rec in report.recommendations:
|
|
494
|
+
console.print(f" [green]->[/green] {rec}")
|
|
495
|
+
console.print()
|
|
496
|
+
|
|
497
|
+
console.print("[dim]" + "-" * 80 + "[/dim]")
|
|
498
|
+
console.print()
|
|
499
|
+
|
|
500
|
+
def format_json(self, reports: List[AnomalyReport]) -> str:
|
|
501
|
+
"""Format reports as JSON string."""
|
|
502
|
+
data = [asdict(r) for r in reports]
|
|
503
|
+
return json.dumps(data, indent=2)
|
|
504
|
+
|
|
505
|
+
def format_csv(self, reports: List[AnomalyReport]) -> str:
|
|
506
|
+
"""Format reports as CSV string."""
|
|
507
|
+
output = io.StringIO()
|
|
508
|
+
writer = csv.writer(output)
|
|
509
|
+
writer.writerow([
|
|
510
|
+
"file", "overall_score", "confidence",
|
|
511
|
+
"structural_entropy", "network_centrality",
|
|
512
|
+
"churn_volatility", "semantic_coherence", "cognitive_load",
|
|
513
|
+
"anomaly_flags",
|
|
514
|
+
])
|
|
515
|
+
for r in reports:
|
|
516
|
+
writer.writerow([
|
|
517
|
+
r.file, f"{r.overall_score:.4f}", f"{r.confidence:.4f}",
|
|
518
|
+
f"{r.primitives.structural_entropy:.4f}",
|
|
519
|
+
f"{r.primitives.network_centrality:.4f}",
|
|
520
|
+
f"{r.primitives.churn_volatility:.4f}",
|
|
521
|
+
f"{r.primitives.semantic_coherence:.4f}",
|
|
522
|
+
f"{r.primitives.cognitive_load:.4f}",
|
|
523
|
+
";".join(r.anomaly_flags),
|
|
524
|
+
])
|
|
525
|
+
return output.getvalue()
|
|
526
|
+
|
|
527
|
+
def format_quiet(self, reports: List[AnomalyReport]) -> str:
|
|
528
|
+
"""Format reports as one file path per line."""
|
|
529
|
+
return "\n".join(r.file for r in reports)
|
|
530
|
+
|
|
531
|
+
def export_json(
|
|
532
|
+
self, reports: List[AnomalyReport], filename: str = "analysis_report.json"
|
|
533
|
+
):
|
|
534
|
+
"""
|
|
535
|
+
Export analysis to JSON file.
|
|
536
|
+
|
|
537
|
+
Args:
|
|
538
|
+
reports: List of anomaly reports
|
|
539
|
+
filename: Output filename
|
|
540
|
+
"""
|
|
541
|
+
output_path = Path(filename)
|
|
542
|
+
with open(output_path, "w") as f:
|
|
543
|
+
f.write(self.format_json(reports))
|
|
544
|
+
|
|
545
|
+
console.print(f"[green]Exported detailed report to {output_path}[/green]")
|
|
546
|
+
logger.info(f"Exported {len(reports)} reports to {output_path}")
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Exception hierarchy for Shannon Insight."""
|
|
2
|
+
|
|
3
|
+
from .base import ShannonInsightError
|
|
4
|
+
from .analysis import (
|
|
5
|
+
AnalysisError,
|
|
6
|
+
FileAccessError,
|
|
7
|
+
ParsingError,
|
|
8
|
+
UnsupportedLanguageError,
|
|
9
|
+
InsufficientDataError,
|
|
10
|
+
PrimitiveExtractionError,
|
|
11
|
+
)
|
|
12
|
+
from .config import (
|
|
13
|
+
ConfigurationError,
|
|
14
|
+
InvalidPathError,
|
|
15
|
+
InvalidConfigError,
|
|
16
|
+
SecurityError,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"ShannonInsightError",
|
|
21
|
+
"AnalysisError",
|
|
22
|
+
"FileAccessError",
|
|
23
|
+
"ParsingError",
|
|
24
|
+
"UnsupportedLanguageError",
|
|
25
|
+
"InsufficientDataError",
|
|
26
|
+
"PrimitiveExtractionError",
|
|
27
|
+
"ConfigurationError",
|
|
28
|
+
"InvalidPathError",
|
|
29
|
+
"InvalidConfigError",
|
|
30
|
+
"SecurityError",
|
|
31
|
+
]
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Analysis-related exceptions: file access, parsing, data issues."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, List, Dict
|
|
5
|
+
|
|
6
|
+
from .base import ShannonInsightError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AnalysisError(ShannonInsightError):
|
|
10
|
+
"""Base class for analysis-related errors."""
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FileAccessError(AnalysisError):
|
|
15
|
+
"""Raised when a file cannot be accessed or read."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, filepath: Path, reason: str):
|
|
18
|
+
super().__init__(
|
|
19
|
+
f"Cannot access file: {filepath}",
|
|
20
|
+
details={"filepath": str(filepath), "reason": reason},
|
|
21
|
+
)
|
|
22
|
+
self.filepath = filepath
|
|
23
|
+
self.reason = reason
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ParsingError(AnalysisError):
|
|
27
|
+
"""Raised when file content cannot be parsed."""
|
|
28
|
+
|
|
29
|
+
def __init__(self, filepath: Path, language: str, reason: str):
|
|
30
|
+
super().__init__(
|
|
31
|
+
f"Failed to parse {language} file: {filepath}",
|
|
32
|
+
details={"filepath": str(filepath), "language": language, "reason": reason},
|
|
33
|
+
)
|
|
34
|
+
self.filepath = filepath
|
|
35
|
+
self.language = language
|
|
36
|
+
self.reason = reason
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class UnsupportedLanguageError(AnalysisError):
|
|
40
|
+
"""Raised when attempting to analyze an unsupported language."""
|
|
41
|
+
|
|
42
|
+
def __init__(self, language: str, supported_languages: List[str]):
|
|
43
|
+
super().__init__(
|
|
44
|
+
f"Unsupported language: {language}",
|
|
45
|
+
details={"language": language, "supported": ", ".join(supported_languages)},
|
|
46
|
+
)
|
|
47
|
+
self.language = language
|
|
48
|
+
self.supported_languages = supported_languages
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class InsufficientDataError(AnalysisError):
|
|
52
|
+
"""Raised when there's not enough data for analysis."""
|
|
53
|
+
|
|
54
|
+
def __init__(self, reason: str, minimum_required: Optional[int] = None):
|
|
55
|
+
details: Dict[str, str] = {"reason": reason}
|
|
56
|
+
if minimum_required is not None:
|
|
57
|
+
details["minimum_required"] = str(minimum_required)
|
|
58
|
+
|
|
59
|
+
super().__init__(f"Insufficient data for analysis: {reason}", details=details)
|
|
60
|
+
self.reason = reason
|
|
61
|
+
self.minimum_required = minimum_required
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class PrimitiveExtractionError(AnalysisError):
|
|
65
|
+
"""Raised when primitive extraction fails."""
|
|
66
|
+
|
|
67
|
+
def __init__(self, primitive_name: str, filepath: Path, reason: str):
|
|
68
|
+
super().__init__(
|
|
69
|
+
f"Failed to extract {primitive_name} from {filepath}",
|
|
70
|
+
details={
|
|
71
|
+
"primitive": primitive_name,
|
|
72
|
+
"filepath": str(filepath),
|
|
73
|
+
"reason": reason,
|
|
74
|
+
},
|
|
75
|
+
)
|
|
76
|
+
self.primitive_name = primitive_name
|
|
77
|
+
self.filepath = filepath
|
|
78
|
+
self.reason = reason
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Base exception for Shannon Insight."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional, Dict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ShannonInsightError(Exception):
|
|
7
|
+
"""Base exception for all Shannon Insight errors."""
|
|
8
|
+
|
|
9
|
+
def __init__(self, message: str, details: Optional[Dict[str, str]] = None):
|
|
10
|
+
super().__init__(message)
|
|
11
|
+
self.message = message
|
|
12
|
+
self.details = details or {}
|
|
13
|
+
|
|
14
|
+
def __str__(self) -> str:
|
|
15
|
+
if self.details:
|
|
16
|
+
details_str = ", ".join(f"{k}={v}" for k, v in self.details.items())
|
|
17
|
+
return f"{self.message} ({details_str})"
|
|
18
|
+
return self.message
|