shannon-codebase-insight 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. shannon_codebase_insight-0.4.0.dist-info/METADATA +209 -0
  2. shannon_codebase_insight-0.4.0.dist-info/RECORD +37 -0
  3. shannon_codebase_insight-0.4.0.dist-info/WHEEL +5 -0
  4. shannon_codebase_insight-0.4.0.dist-info/entry_points.txt +7 -0
  5. shannon_codebase_insight-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. shannon_codebase_insight-0.4.0.dist-info/top_level.txt +1 -0
  7. shannon_insight/__init__.py +25 -0
  8. shannon_insight/analyzers/__init__.py +8 -0
  9. shannon_insight/analyzers/base.py +215 -0
  10. shannon_insight/analyzers/go_analyzer.py +150 -0
  11. shannon_insight/analyzers/python_analyzer.py +169 -0
  12. shannon_insight/analyzers/typescript_analyzer.py +162 -0
  13. shannon_insight/cache.py +214 -0
  14. shannon_insight/cli.py +333 -0
  15. shannon_insight/config.py +235 -0
  16. shannon_insight/core.py +546 -0
  17. shannon_insight/exceptions/__init__.py +31 -0
  18. shannon_insight/exceptions/analysis.py +78 -0
  19. shannon_insight/exceptions/base.py +18 -0
  20. shannon_insight/exceptions/config.py +48 -0
  21. shannon_insight/file_ops.py +218 -0
  22. shannon_insight/logging_config.py +98 -0
  23. shannon_insight/math/__init__.py +15 -0
  24. shannon_insight/math/entropy.py +133 -0
  25. shannon_insight/math/fusion.py +109 -0
  26. shannon_insight/math/graph.py +209 -0
  27. shannon_insight/math/robust.py +106 -0
  28. shannon_insight/math/statistics.py +159 -0
  29. shannon_insight/models.py +48 -0
  30. shannon_insight/primitives/__init__.py +13 -0
  31. shannon_insight/primitives/detector.py +318 -0
  32. shannon_insight/primitives/extractor.py +278 -0
  33. shannon_insight/primitives/fusion.py +373 -0
  34. shannon_insight/primitives/recommendations.py +158 -0
  35. shannon_insight/py.typed +2 -0
  36. shannon_insight/security.py +284 -0
  37. shannon_insight/utils/__init__.py +1 -0
@@ -0,0 +1,546 @@
1
+ """Main pipeline orchestrator for Shannon Insight"""
2
+
3
+ import csv
4
+ import io
5
+ import json
6
+ from dataclasses import asdict
7
+ from pathlib import Path
8
+ from typing import List, Optional
9
+
10
+ from rich.console import Console
11
+ from rich.panel import Panel
12
+ from rich.progress import (
13
+ Progress,
14
+ SpinnerColumn,
15
+ BarColumn,
16
+ TaskProgressColumn,
17
+ TimeElapsedColumn,
18
+ TextColumn,
19
+ )
20
+ from rich.table import Table
21
+
22
+ from .models import AnomalyReport
23
+ from .analyzers import GoScanner, TypeScriptScanner, PythonScanner
24
+ from .primitives import (
25
+ PrimitiveExtractor,
26
+ AnomalyDetector,
27
+ SignalFusion,
28
+ RecommendationEngine,
29
+ )
30
+ from .config import AnalysisSettings, default_settings
31
+ from .cache import AnalysisCache, compute_config_hash
32
+ from .logging_config import get_logger
33
+ from .security import validate_root_directory
34
+ from .exceptions import (
35
+ InvalidPathError,
36
+ UnsupportedLanguageError,
37
+ InsufficientDataError,
38
+ )
39
+
40
+ import sys as _sys
41
+
42
+ logger = get_logger(__name__)
43
+ console = Console(stderr=True)
44
+
45
+
46
+ class CodebaseAnalyzer:
47
+ """Main pipeline orchestrator with enterprise features"""
48
+
49
+ SUPPORTED_LANGUAGES = {"auto", "go", "typescript", "react", "javascript", "python"}
50
+
51
+ def __init__(
52
+ self,
53
+ root_dir: "Path | str",
54
+ language: str = "auto",
55
+ settings: Optional[AnalysisSettings] = None,
56
+ ):
57
+ """
58
+ Initialize codebase analyzer.
59
+
60
+ Args:
61
+ root_dir: Root directory of codebase to analyze
62
+ language: Programming language (auto, go, typescript, react, javascript)
63
+ settings: Analysis settings (uses defaults if not provided)
64
+
65
+ Raises:
66
+ InvalidPathError: If root_dir is invalid
67
+ UnsupportedLanguageError: If language is not supported
68
+ """
69
+ # Validate root directory
70
+ self.root_dir = validate_root_directory(Path(root_dir))
71
+ logger.info(f"Analyzing codebase at: {self.root_dir}")
72
+
73
+ # Validate language
74
+ if language not in self.SUPPORTED_LANGUAGES:
75
+ raise UnsupportedLanguageError(language, list(self.SUPPORTED_LANGUAGES))
76
+ self.language = language
77
+
78
+ # Load settings
79
+ self.settings = settings or default_settings
80
+ logger.debug(
81
+ f"Settings: cache={self.settings.enable_cache}, "
82
+ f"workers={self.settings.parallel_workers}, "
83
+ f"threshold={self.settings.z_score_threshold}"
84
+ )
85
+
86
+ # Initialize cache
87
+ self.cache = None
88
+ if self.settings.enable_cache:
89
+ self.cache = AnalysisCache(
90
+ cache_dir=self.settings.cache_dir,
91
+ ttl_hours=self.settings.cache_ttl_hours,
92
+ enabled=True,
93
+ )
94
+ logger.debug(f"Cache enabled at {self.settings.cache_dir}")
95
+
96
+ # Compute config hash for cache invalidation
97
+ self.config_hash = compute_config_hash(self.settings.model_dump())
98
+
99
+ # Track analysis metadata
100
+ self._total_files_scanned = 0
101
+ self._detected_language = language
102
+
103
+ def analyze(self) -> List[AnomalyReport]:
104
+ """
105
+ Run full analysis pipeline with progress tracking.
106
+
107
+ Returns:
108
+ List of anomaly reports sorted by severity
109
+
110
+ Raises:
111
+ InsufficientDataError: If no files found to analyze
112
+ """
113
+ console.print()
114
+ console.print("[bold cyan]=" * 40)
115
+ console.print(
116
+ "[bold cyan]SHANNON INSIGHT - Multi-Signal Codebase Quality Analyzer"
117
+ )
118
+ console.print("[bold cyan]=" * 40)
119
+ console.print()
120
+
121
+ with Progress(
122
+ SpinnerColumn(),
123
+ TextColumn("[progress.description]{task.description}"),
124
+ BarColumn(),
125
+ TaskProgressColumn(),
126
+ TimeElapsedColumn(),
127
+ console=console,
128
+ transient=False,
129
+ ) as progress:
130
+ # Layer 1: Data Collection
131
+ scan_task = progress.add_task(
132
+ "[cyan]Layer 1: Scanning codebase...", total=None
133
+ )
134
+
135
+ scanner = self._get_scanner()
136
+ files = scanner.scan()
137
+
138
+ if not files:
139
+ logger.warning("No files found to analyze")
140
+ raise InsufficientDataError(
141
+ "No source files found in the specified directory",
142
+ minimum_required=1,
143
+ )
144
+
145
+ self._total_files_scanned = len(files)
146
+
147
+ progress.update(
148
+ scan_task,
149
+ completed=True,
150
+ description=f"[green]Layer 1: Found {len(files)} source files",
151
+ )
152
+ logger.info(f"Scanned {len(files)} files")
153
+
154
+ # Layer 2: Primitive Extraction
155
+ extract_task = progress.add_task(
156
+ "[cyan]Layer 2: Extracting primitives...", total=100
157
+ )
158
+
159
+ extractor = PrimitiveExtractor(
160
+ files, cache=self.cache, config_hash=self.config_hash
161
+ )
162
+ primitives = extractor.extract_all()
163
+
164
+ progress.update(
165
+ extract_task,
166
+ completed=100,
167
+ description=f"[green]Layer 2: Extracted 5 primitives for {len(primitives)} files",
168
+ )
169
+ logger.info(f"Extracted primitives for {len(primitives)} files")
170
+
171
+ # Layer 3: Normalization & Anomaly Detection
172
+ detect_task = progress.add_task(
173
+ "[cyan]Layer 3: Normalizing and detecting anomalies...", total=100
174
+ )
175
+
176
+ detector = AnomalyDetector(
177
+ primitives, threshold=self.settings.z_score_threshold
178
+ )
179
+ normalized = detector.normalize()
180
+ anomalies = detector.detect_anomalies(normalized)
181
+
182
+ progress.update(
183
+ detect_task,
184
+ completed=100,
185
+ description=f"[green]Layer 3: Detected {len(anomalies)} anomalous files",
186
+ )
187
+ logger.info(f"Detected {len(anomalies)} anomalies")
188
+
189
+ # Layer 4: Signal Fusion
190
+ fusion_task = progress.add_task(
191
+ "[cyan]Layer 4: Fusing signals with consistency check...", total=100
192
+ )
193
+
194
+ fusion = SignalFusion(
195
+ primitives, normalized, weights=self.settings.fusion_weights
196
+ )
197
+ fused_scores = fusion.fuse()
198
+
199
+ progress.update(
200
+ fusion_task,
201
+ completed=100,
202
+ description=f"[green]Layer 4: Computed consensus scores for {len(fused_scores)} files",
203
+ )
204
+ logger.info(f"Fused signals for {len(fused_scores)} files")
205
+
206
+ # Layer 5: Recommendations
207
+ rec_task = progress.add_task(
208
+ "[cyan]Layer 5: Generating recommendations...", total=100
209
+ )
210
+
211
+ engine = RecommendationEngine(
212
+ files, primitives, normalized, anomalies, fused_scores
213
+ )
214
+ reports = engine.generate()
215
+
216
+ progress.update(
217
+ rec_task,
218
+ completed=100,
219
+ description=f"[green]Layer 5: Generated {len(reports)} actionable reports",
220
+ )
221
+ logger.info(f"Generated {len(reports)} reports")
222
+
223
+ console.print()
224
+
225
+ # Show cache stats if enabled
226
+ if self.cache:
227
+ stats = self.cache.stats()
228
+ logger.debug(f"Cache stats: {stats}")
229
+
230
+ return reports
231
+
232
+ def _get_scanner(self):
233
+ """
234
+ Get appropriate scanner based on language.
235
+
236
+ Returns:
237
+ Language-specific scanner instance
238
+
239
+ Raises:
240
+ UnsupportedLanguageError: If no scanner available for detected language
241
+ """
242
+ if self.language == "go":
243
+ logger.debug("Using Go scanner")
244
+ self._detected_language = "go"
245
+ return GoScanner(str(self.root_dir), settings=self.settings)
246
+ elif self.language in ["typescript", "react", "javascript"]:
247
+ logger.debug(f"Using TypeScript scanner for {self.language}")
248
+ self._detected_language = self.language
249
+ return TypeScriptScanner(str(self.root_dir), settings=self.settings)
250
+ elif self.language == "python":
251
+ logger.debug("Using Python scanner")
252
+ self._detected_language = "python"
253
+ return PythonScanner(str(self.root_dir), settings=self.settings)
254
+ else:
255
+ # Auto-detect
256
+ logger.debug("Auto-detecting language...")
257
+
258
+ skip_dirs = {"venv", ".venv", "node_modules", "__pycache__", ".git", "dist", "build"}
259
+
260
+ def _has_ext(ext: str) -> bool:
261
+ for p in self.root_dir.rglob(f"*{ext}"):
262
+ if not any(part in skip_dirs for part in p.parts):
263
+ return True
264
+ return False
265
+
266
+ has_go = _has_ext(".go")
267
+ has_ts = _has_ext(".ts") or _has_ext(".tsx")
268
+ has_py = _has_ext(".py")
269
+
270
+ if has_go:
271
+ logger.info("Auto-detected: Go codebase")
272
+ console.print("[yellow]Auto-detected: Go codebase[/yellow]\n")
273
+ self._detected_language = "go"
274
+ return GoScanner(str(self.root_dir), settings=self.settings)
275
+ elif has_ts:
276
+ logger.info("Auto-detected: TypeScript/React codebase")
277
+ console.print(
278
+ "[yellow]Auto-detected: TypeScript/React codebase[/yellow]\n"
279
+ )
280
+ self._detected_language = "typescript"
281
+ return TypeScriptScanner(str(self.root_dir), settings=self.settings)
282
+ elif has_py:
283
+ logger.info("Auto-detected: Python codebase")
284
+ console.print("[yellow]Auto-detected: Python codebase[/yellow]\n")
285
+ self._detected_language = "python"
286
+ return PythonScanner(str(self.root_dir), settings=self.settings)
287
+ else:
288
+ logger.warning("Could not auto-detect language, defaulting to Python")
289
+ console.print(
290
+ "[yellow]Could not auto-detect language. Defaulting to Python.[/yellow]\n"
291
+ )
292
+ self._detected_language = "python"
293
+ return PythonScanner(str(self.root_dir), settings=self.settings)
294
+
295
+ def print_summary(self, reports: List[AnomalyReport], top_n: int = 10):
296
+ """
297
+ Print a compact summary dashboard using Rich Panel and Table.
298
+
299
+ Args:
300
+ reports: List of anomaly reports
301
+ top_n: Number of top files to display in summary
302
+ """
303
+ num_anomalies = len(reports)
304
+ pct = (num_anomalies / self._total_files_scanned * 100) if self._total_files_scanned > 0 else 0
305
+ avg_confidence = (
306
+ sum(r.confidence for r in reports) / num_anomalies
307
+ if num_anomalies > 0
308
+ else 0.0
309
+ )
310
+
311
+ summary_text = (
312
+ f"Scanned [bold]{self._total_files_scanned}[/bold] files "
313
+ f"([cyan]{self._detected_language}[/cyan]) | "
314
+ f"[yellow]{num_anomalies}[/yellow] anomalies "
315
+ f"([yellow]{pct:.0f}%[/yellow]) | "
316
+ f"Avg confidence: [blue]{avg_confidence:.2f}[/blue]"
317
+ )
318
+ console.print(Panel(summary_text, title="[bold cyan]Summary[/bold cyan]", expand=False))
319
+ console.print()
320
+
321
+ if not reports:
322
+ return
323
+
324
+ table = Table(title=f"Top {min(top_n, len(reports))} Files Requiring Attention", expand=True)
325
+ table.add_column("#", style="dim", width=4)
326
+ table.add_column("File", style="yellow", no_wrap=False, ratio=3)
327
+ table.add_column("Score", style="red", justify="right", width=8)
328
+ table.add_column("Confidence", style="blue", justify="right", width=12)
329
+ table.add_column("Primary Issue", style="white", ratio=2)
330
+
331
+ # Shorten flag names for table display
332
+ _short = {
333
+ "structural_entropy_high": "entropy high",
334
+ "structural_entropy_low": "entropy low",
335
+ "high_centrality": "high centrality",
336
+ "high_volatility": "high volatility",
337
+ "semantic_coherence_low": "coherence low",
338
+ "semantic_coherence_high": "coherence high",
339
+ "high_cognitive_load": "high cog. load",
340
+ }
341
+
342
+ for i, report in enumerate(reports[:top_n], 1):
343
+ flags = [_short.get(f, f) for f in report.anomaly_flags]
344
+ primary = ", ".join(flags) if flags else "-"
345
+ table.add_row(
346
+ str(i),
347
+ report.file,
348
+ f"{report.overall_score:.3f}",
349
+ f"{report.confidence:.2f}",
350
+ primary,
351
+ )
352
+
353
+ console.print(table)
354
+ console.print()
355
+
356
+ def print_report(self, reports: List[AnomalyReport], top_n: int = 10):
357
+ """
358
+ Print human-readable analysis report with rich formatting.
359
+
360
+ Args:
361
+ reports: List of anomaly reports
362
+ top_n: Number of top files to display
363
+ """
364
+ console.print("[bold cyan]=" * 40)
365
+ console.print(
366
+ f"[bold cyan]TOP {min(top_n, len(reports))} FILES REQUIRING ATTENTION"
367
+ )
368
+ console.print("[bold cyan]=" * 40)
369
+ console.print()
370
+
371
+ for i, report in enumerate(reports[:top_n], 1):
372
+ console.print(f"[bold yellow]{i}. {report.file}[/bold yellow]")
373
+ console.print(
374
+ f" Overall Score: [red]{report.overall_score:.3f}[/red] "
375
+ f"(Confidence: [blue]{report.confidence:.2f}[/blue])"
376
+ )
377
+ console.print()
378
+
379
+ console.print(" [dim]Raw Primitives:[/dim]")
380
+ console.print(
381
+ f" - Structural Entropy: {report.primitives.structural_entropy:.3f}"
382
+ )
383
+ console.print(
384
+ f" - Network Centrality: {report.primitives.network_centrality:.3f}"
385
+ )
386
+ console.print(
387
+ f" - Churn Volatility: {report.primitives.churn_volatility:.3f}"
388
+ )
389
+ console.print(
390
+ f" - Semantic Coherence: {report.primitives.semantic_coherence:.3f}"
391
+ )
392
+ console.print(
393
+ f" - Cognitive Load: {report.primitives.cognitive_load:.3f}"
394
+ )
395
+ console.print()
396
+
397
+ console.print(" [dim]Normalized (Z-Scores):[/dim]")
398
+ console.print(
399
+ f" - Structural Entropy: {report.normalized_primitives.structural_entropy:+.2f}s"
400
+ )
401
+ console.print(
402
+ f" - Network Centrality: {report.normalized_primitives.network_centrality:+.2f}s"
403
+ )
404
+ console.print(
405
+ f" - Churn Volatility: {report.normalized_primitives.churn_volatility:+.2f}s"
406
+ )
407
+ console.print(
408
+ f" - Semantic Coherence: {report.normalized_primitives.semantic_coherence:+.2f}s"
409
+ )
410
+ console.print(
411
+ f" - Cognitive Load: {report.normalized_primitives.cognitive_load:+.2f}s"
412
+ )
413
+ console.print()
414
+
415
+ if report.root_causes:
416
+ console.print(" [dim]Root Causes:[/dim]")
417
+ for cause in report.root_causes:
418
+ console.print(f" [red]![/red] {cause}")
419
+ console.print()
420
+
421
+ if report.recommendations:
422
+ console.print(" [dim]Recommendations:[/dim]")
423
+ for rec in report.recommendations:
424
+ console.print(f" [green]->[/green] {rec}")
425
+ console.print()
426
+
427
+ console.print("[dim]" + "-" * 80 + "[/dim]")
428
+ console.print()
429
+
430
+ logger.info(f"Printed report for top {min(top_n, len(reports))} files")
431
+
432
+ def print_explain(self, reports: List[AnomalyReport], pattern: str):
433
+ """
434
+ Print a deep-dive explanation for file(s) matching a pattern.
435
+
436
+ Args:
437
+ reports: List of anomaly reports
438
+ pattern: File name or pattern to match
439
+ """
440
+ matching = [r for r in reports if pattern in r.file]
441
+
442
+ if not matching:
443
+ console.print(f"[yellow]No files matching '{pattern}' found in analysis results.[/yellow]")
444
+ return
445
+
446
+ for report in matching:
447
+ console.print(Panel(
448
+ f"[bold]{report.file}[/bold]",
449
+ title="[bold cyan]Deep Dive[/bold cyan]",
450
+ expand=False,
451
+ ))
452
+ console.print()
453
+
454
+ console.print("[bold]Raw Primitives:[/bold]")
455
+ console.print(f" Structural Entropy: {report.primitives.structural_entropy:.4f}")
456
+ console.print(f" Network Centrality: {report.primitives.network_centrality:.4f}")
457
+ console.print(f" Churn Volatility: {report.primitives.churn_volatility:.4f}")
458
+ console.print(f" Semantic Coherence: {report.primitives.semantic_coherence:.4f}")
459
+ console.print(f" Cognitive Load: {report.primitives.cognitive_load:.4f}")
460
+ console.print()
461
+
462
+ threshold = self.settings.z_score_threshold
463
+ console.print(f"[bold]Normalized Z-Scores[/bold] (threshold: {threshold:.1f}):")
464
+ for name, val in [
465
+ ("Structural Entropy", report.normalized_primitives.structural_entropy),
466
+ ("Network Centrality", report.normalized_primitives.network_centrality),
467
+ ("Churn Volatility", report.normalized_primitives.churn_volatility),
468
+ ("Semantic Coherence", report.normalized_primitives.semantic_coherence),
469
+ ("Cognitive Load", report.normalized_primitives.cognitive_load),
470
+ ]:
471
+ marker = " [red]<< ANOMALY[/red]" if abs(val) > threshold else ""
472
+ console.print(f" {name:22s} {val:+.3f}s{marker}")
473
+ console.print()
474
+
475
+ console.print(f"[bold]Overall Score:[/bold] [red]{report.overall_score:.4f}[/red]")
476
+ console.print(f"[bold]Confidence:[/bold] [blue]{report.confidence:.4f}[/blue]")
477
+ console.print()
478
+
479
+ if report.anomaly_flags:
480
+ console.print("[bold]Anomaly Flags:[/bold]")
481
+ for flag in report.anomaly_flags:
482
+ console.print(f" [red]-[/red] {flag}")
483
+ console.print()
484
+
485
+ if report.root_causes:
486
+ console.print("[bold]Root Causes:[/bold]")
487
+ for cause in report.root_causes:
488
+ console.print(f" [red]![/red] {cause}")
489
+ console.print()
490
+
491
+ if report.recommendations:
492
+ console.print("[bold]Recommendations:[/bold]")
493
+ for rec in report.recommendations:
494
+ console.print(f" [green]->[/green] {rec}")
495
+ console.print()
496
+
497
+ console.print("[dim]" + "-" * 80 + "[/dim]")
498
+ console.print()
499
+
500
+ def format_json(self, reports: List[AnomalyReport]) -> str:
501
+ """Format reports as JSON string."""
502
+ data = [asdict(r) for r in reports]
503
+ return json.dumps(data, indent=2)
504
+
505
+ def format_csv(self, reports: List[AnomalyReport]) -> str:
506
+ """Format reports as CSV string."""
507
+ output = io.StringIO()
508
+ writer = csv.writer(output)
509
+ writer.writerow([
510
+ "file", "overall_score", "confidence",
511
+ "structural_entropy", "network_centrality",
512
+ "churn_volatility", "semantic_coherence", "cognitive_load",
513
+ "anomaly_flags",
514
+ ])
515
+ for r in reports:
516
+ writer.writerow([
517
+ r.file, f"{r.overall_score:.4f}", f"{r.confidence:.4f}",
518
+ f"{r.primitives.structural_entropy:.4f}",
519
+ f"{r.primitives.network_centrality:.4f}",
520
+ f"{r.primitives.churn_volatility:.4f}",
521
+ f"{r.primitives.semantic_coherence:.4f}",
522
+ f"{r.primitives.cognitive_load:.4f}",
523
+ ";".join(r.anomaly_flags),
524
+ ])
525
+ return output.getvalue()
526
+
527
+ def format_quiet(self, reports: List[AnomalyReport]) -> str:
528
+ """Format reports as one file path per line."""
529
+ return "\n".join(r.file for r in reports)
530
+
531
+ def export_json(
532
+ self, reports: List[AnomalyReport], filename: str = "analysis_report.json"
533
+ ):
534
+ """
535
+ Export analysis to JSON file.
536
+
537
+ Args:
538
+ reports: List of anomaly reports
539
+ filename: Output filename
540
+ """
541
+ output_path = Path(filename)
542
+ with open(output_path, "w") as f:
543
+ f.write(self.format_json(reports))
544
+
545
+ console.print(f"[green]Exported detailed report to {output_path}[/green]")
546
+ logger.info(f"Exported {len(reports)} reports to {output_path}")
@@ -0,0 +1,31 @@
1
+ """Exception hierarchy for Shannon Insight."""
2
+
3
+ from .base import ShannonInsightError
4
+ from .analysis import (
5
+ AnalysisError,
6
+ FileAccessError,
7
+ ParsingError,
8
+ UnsupportedLanguageError,
9
+ InsufficientDataError,
10
+ PrimitiveExtractionError,
11
+ )
12
+ from .config import (
13
+ ConfigurationError,
14
+ InvalidPathError,
15
+ InvalidConfigError,
16
+ SecurityError,
17
+ )
18
+
19
+ __all__ = [
20
+ "ShannonInsightError",
21
+ "AnalysisError",
22
+ "FileAccessError",
23
+ "ParsingError",
24
+ "UnsupportedLanguageError",
25
+ "InsufficientDataError",
26
+ "PrimitiveExtractionError",
27
+ "ConfigurationError",
28
+ "InvalidPathError",
29
+ "InvalidConfigError",
30
+ "SecurityError",
31
+ ]
@@ -0,0 +1,78 @@
1
+ """Analysis-related exceptions: file access, parsing, data issues."""
2
+
3
+ from pathlib import Path
4
+ from typing import Optional, List, Dict
5
+
6
+ from .base import ShannonInsightError
7
+
8
+
9
+ class AnalysisError(ShannonInsightError):
10
+ """Base class for analysis-related errors."""
11
+ pass
12
+
13
+
14
+ class FileAccessError(AnalysisError):
15
+ """Raised when a file cannot be accessed or read."""
16
+
17
+ def __init__(self, filepath: Path, reason: str):
18
+ super().__init__(
19
+ f"Cannot access file: {filepath}",
20
+ details={"filepath": str(filepath), "reason": reason},
21
+ )
22
+ self.filepath = filepath
23
+ self.reason = reason
24
+
25
+
26
+ class ParsingError(AnalysisError):
27
+ """Raised when file content cannot be parsed."""
28
+
29
+ def __init__(self, filepath: Path, language: str, reason: str):
30
+ super().__init__(
31
+ f"Failed to parse {language} file: {filepath}",
32
+ details={"filepath": str(filepath), "language": language, "reason": reason},
33
+ )
34
+ self.filepath = filepath
35
+ self.language = language
36
+ self.reason = reason
37
+
38
+
39
+ class UnsupportedLanguageError(AnalysisError):
40
+ """Raised when attempting to analyze an unsupported language."""
41
+
42
+ def __init__(self, language: str, supported_languages: List[str]):
43
+ super().__init__(
44
+ f"Unsupported language: {language}",
45
+ details={"language": language, "supported": ", ".join(supported_languages)},
46
+ )
47
+ self.language = language
48
+ self.supported_languages = supported_languages
49
+
50
+
51
+ class InsufficientDataError(AnalysisError):
52
+ """Raised when there's not enough data for analysis."""
53
+
54
+ def __init__(self, reason: str, minimum_required: Optional[int] = None):
55
+ details: Dict[str, str] = {"reason": reason}
56
+ if minimum_required is not None:
57
+ details["minimum_required"] = str(minimum_required)
58
+
59
+ super().__init__(f"Insufficient data for analysis: {reason}", details=details)
60
+ self.reason = reason
61
+ self.minimum_required = minimum_required
62
+
63
+
64
+ class PrimitiveExtractionError(AnalysisError):
65
+ """Raised when primitive extraction fails."""
66
+
67
+ def __init__(self, primitive_name: str, filepath: Path, reason: str):
68
+ super().__init__(
69
+ f"Failed to extract {primitive_name} from {filepath}",
70
+ details={
71
+ "primitive": primitive_name,
72
+ "filepath": str(filepath),
73
+ "reason": reason,
74
+ },
75
+ )
76
+ self.primitive_name = primitive_name
77
+ self.filepath = filepath
78
+ self.reason = reason
@@ -0,0 +1,18 @@
1
+ """Base exception for Shannon Insight."""
2
+
3
+ from typing import Optional, Dict
4
+
5
+
6
+ class ShannonInsightError(Exception):
7
+ """Base exception for all Shannon Insight errors."""
8
+
9
+ def __init__(self, message: str, details: Optional[Dict[str, str]] = None):
10
+ super().__init__(message)
11
+ self.message = message
12
+ self.details = details or {}
13
+
14
+ def __str__(self) -> str:
15
+ if self.details:
16
+ details_str = ", ".join(f"{k}={v}" for k, v in self.details.items())
17
+ return f"{self.message} ({details_str})"
18
+ return self.message