shannon-codebase-insight 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shannon_codebase_insight-0.4.0.dist-info/METADATA +209 -0
- shannon_codebase_insight-0.4.0.dist-info/RECORD +37 -0
- shannon_codebase_insight-0.4.0.dist-info/WHEEL +5 -0
- shannon_codebase_insight-0.4.0.dist-info/entry_points.txt +7 -0
- shannon_codebase_insight-0.4.0.dist-info/licenses/LICENSE +21 -0
- shannon_codebase_insight-0.4.0.dist-info/top_level.txt +1 -0
- shannon_insight/__init__.py +25 -0
- shannon_insight/analyzers/__init__.py +8 -0
- shannon_insight/analyzers/base.py +215 -0
- shannon_insight/analyzers/go_analyzer.py +150 -0
- shannon_insight/analyzers/python_analyzer.py +169 -0
- shannon_insight/analyzers/typescript_analyzer.py +162 -0
- shannon_insight/cache.py +214 -0
- shannon_insight/cli.py +333 -0
- shannon_insight/config.py +235 -0
- shannon_insight/core.py +546 -0
- shannon_insight/exceptions/__init__.py +31 -0
- shannon_insight/exceptions/analysis.py +78 -0
- shannon_insight/exceptions/base.py +18 -0
- shannon_insight/exceptions/config.py +48 -0
- shannon_insight/file_ops.py +218 -0
- shannon_insight/logging_config.py +98 -0
- shannon_insight/math/__init__.py +15 -0
- shannon_insight/math/entropy.py +133 -0
- shannon_insight/math/fusion.py +109 -0
- shannon_insight/math/graph.py +209 -0
- shannon_insight/math/robust.py +106 -0
- shannon_insight/math/statistics.py +159 -0
- shannon_insight/models.py +48 -0
- shannon_insight/primitives/__init__.py +13 -0
- shannon_insight/primitives/detector.py +318 -0
- shannon_insight/primitives/extractor.py +278 -0
- shannon_insight/primitives/fusion.py +373 -0
- shannon_insight/primitives/recommendations.py +158 -0
- shannon_insight/py.typed +2 -0
- shannon_insight/security.py +284 -0
- shannon_insight/utils/__init__.py +1 -0
shannon_insight/cli.py
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
"""Command-line interface for Shannon Insight"""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
from . import __version__
|
|
12
|
+
from .core import CodebaseAnalyzer
|
|
13
|
+
from .config import load_settings, AnalysisSettings
|
|
14
|
+
from .logging_config import setup_logging
|
|
15
|
+
from .exceptions import ShannonInsightError
|
|
16
|
+
|
|
17
|
+
app = typer.Typer(
|
|
18
|
+
name="shannon-insight",
|
|
19
|
+
help="Shannon Insight - Multi-Signal Codebase Quality Analyzer",
|
|
20
|
+
add_completion=False,
|
|
21
|
+
rich_markup_mode="rich",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
console = Console()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@app.callback(invoke_without_command=True, no_args_is_help=True)
|
|
28
|
+
def main(
|
|
29
|
+
ctx: typer.Context,
|
|
30
|
+
path: Path = typer.Argument(
|
|
31
|
+
Path("."),
|
|
32
|
+
help="Path to the codebase directory to analyze",
|
|
33
|
+
exists=False,
|
|
34
|
+
file_okay=False,
|
|
35
|
+
dir_okay=True,
|
|
36
|
+
readable=True,
|
|
37
|
+
),
|
|
38
|
+
language: str = typer.Option(
|
|
39
|
+
"auto",
|
|
40
|
+
"--language",
|
|
41
|
+
"-l",
|
|
42
|
+
help="Programming language (auto, go, typescript, react, javascript, python)",
|
|
43
|
+
),
|
|
44
|
+
top: int = typer.Option(
|
|
45
|
+
15,
|
|
46
|
+
"--top",
|
|
47
|
+
"-t",
|
|
48
|
+
help="Number of top files to display",
|
|
49
|
+
min=1,
|
|
50
|
+
max=1000,
|
|
51
|
+
),
|
|
52
|
+
output: Optional[Path] = typer.Option(
|
|
53
|
+
None,
|
|
54
|
+
"--output",
|
|
55
|
+
"-o",
|
|
56
|
+
help="Output JSON file path",
|
|
57
|
+
dir_okay=False,
|
|
58
|
+
),
|
|
59
|
+
fmt: str = typer.Option(
|
|
60
|
+
"rich",
|
|
61
|
+
"--format",
|
|
62
|
+
"-f",
|
|
63
|
+
help="Output format: rich (default), json, csv, quiet",
|
|
64
|
+
),
|
|
65
|
+
explain: Optional[str] = typer.Option(
|
|
66
|
+
None,
|
|
67
|
+
"--explain",
|
|
68
|
+
"-e",
|
|
69
|
+
help="Deep-dive explanation for a specific file (substring match)",
|
|
70
|
+
),
|
|
71
|
+
fail_above: Optional[float] = typer.Option(
|
|
72
|
+
None,
|
|
73
|
+
"--fail-above",
|
|
74
|
+
help="Exit 1 if any file's score exceeds this threshold (for CI gating)",
|
|
75
|
+
min=0.0,
|
|
76
|
+
),
|
|
77
|
+
threshold: Optional[float] = typer.Option(
|
|
78
|
+
None,
|
|
79
|
+
"--threshold",
|
|
80
|
+
help="Z-score threshold for anomaly detection (0.0 - 10.0)",
|
|
81
|
+
min=0.0,
|
|
82
|
+
max=10.0,
|
|
83
|
+
),
|
|
84
|
+
config: Optional[Path] = typer.Option(
|
|
85
|
+
None,
|
|
86
|
+
"--config",
|
|
87
|
+
"-c",
|
|
88
|
+
help="Configuration file path (TOML format)",
|
|
89
|
+
exists=True,
|
|
90
|
+
file_okay=True,
|
|
91
|
+
dir_okay=False,
|
|
92
|
+
readable=True,
|
|
93
|
+
),
|
|
94
|
+
verbose: bool = typer.Option(
|
|
95
|
+
False,
|
|
96
|
+
"--verbose",
|
|
97
|
+
"-v",
|
|
98
|
+
help="Enable verbose (DEBUG) logging",
|
|
99
|
+
),
|
|
100
|
+
quiet: bool = typer.Option(
|
|
101
|
+
False,
|
|
102
|
+
"--quiet",
|
|
103
|
+
"-q",
|
|
104
|
+
help="Suppress all but ERROR logging",
|
|
105
|
+
),
|
|
106
|
+
no_cache: bool = typer.Option(
|
|
107
|
+
False,
|
|
108
|
+
"--no-cache",
|
|
109
|
+
help="Disable caching",
|
|
110
|
+
),
|
|
111
|
+
clear_cache: bool = typer.Option(
|
|
112
|
+
False,
|
|
113
|
+
"--clear-cache",
|
|
114
|
+
help="Clear cache before running",
|
|
115
|
+
),
|
|
116
|
+
workers: Optional[int] = typer.Option(
|
|
117
|
+
None,
|
|
118
|
+
"--workers",
|
|
119
|
+
"-w",
|
|
120
|
+
help="Number of parallel workers (default: auto-detect)",
|
|
121
|
+
min=1,
|
|
122
|
+
max=32,
|
|
123
|
+
),
|
|
124
|
+
version: bool = typer.Option(
|
|
125
|
+
False,
|
|
126
|
+
"--version",
|
|
127
|
+
help="Show version and exit",
|
|
128
|
+
),
|
|
129
|
+
):
|
|
130
|
+
"""
|
|
131
|
+
Analyze codebase quality using mathematical primitives.
|
|
132
|
+
|
|
133
|
+
Named after Claude Shannon, father of information theory.
|
|
134
|
+
|
|
135
|
+
[bold cyan]Examples:[/bold cyan]
|
|
136
|
+
|
|
137
|
+
shannon-insight /path/to/codebase
|
|
138
|
+
|
|
139
|
+
shannon-insight /path/to/codebase --language go
|
|
140
|
+
|
|
141
|
+
shannon-insight /path/to/codebase --top 20 --output results.json
|
|
142
|
+
|
|
143
|
+
shannon-insight . --format json | jq .
|
|
144
|
+
|
|
145
|
+
shannon-insight . --fail-above 2.0 --format quiet
|
|
146
|
+
|
|
147
|
+
shannon-insight . --explain complex.go
|
|
148
|
+
"""
|
|
149
|
+
# If subcommand is invoked, don't run main analysis
|
|
150
|
+
if ctx.invoked_subcommand is not None:
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
# Handle version
|
|
154
|
+
if version:
|
|
155
|
+
console.print(
|
|
156
|
+
f"[bold cyan]Shannon Insight[/bold cyan] version [green]{__version__}[/green]"
|
|
157
|
+
)
|
|
158
|
+
raise typer.Exit(0)
|
|
159
|
+
|
|
160
|
+
# Validate mutually exclusive options
|
|
161
|
+
if verbose and quiet:
|
|
162
|
+
console.print("[red]Error:[/red] --verbose and --quiet are mutually exclusive")
|
|
163
|
+
raise typer.Exit(1)
|
|
164
|
+
|
|
165
|
+
# Validate format
|
|
166
|
+
valid_formats = {"rich", "json", "csv", "quiet"}
|
|
167
|
+
if fmt not in valid_formats:
|
|
168
|
+
console.print(f"[red]Error:[/red] --format must be one of: {', '.join(sorted(valid_formats))}")
|
|
169
|
+
raise typer.Exit(1)
|
|
170
|
+
|
|
171
|
+
# Setup logging first
|
|
172
|
+
logger = setup_logging(verbose=verbose, quiet=quiet)
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
# Load settings from config file and environment
|
|
176
|
+
overrides = {}
|
|
177
|
+
|
|
178
|
+
# CLI overrides (highest priority)
|
|
179
|
+
if threshold is not None:
|
|
180
|
+
overrides["z_score_threshold"] = threshold
|
|
181
|
+
if no_cache:
|
|
182
|
+
overrides["enable_cache"] = False
|
|
183
|
+
if workers is not None:
|
|
184
|
+
overrides["parallel_workers"] = workers
|
|
185
|
+
if verbose:
|
|
186
|
+
overrides["verbose"] = True
|
|
187
|
+
if quiet:
|
|
188
|
+
overrides["quiet"] = True
|
|
189
|
+
|
|
190
|
+
settings = load_settings(config_file=config, **overrides)
|
|
191
|
+
|
|
192
|
+
# Log configuration
|
|
193
|
+
logger.debug(f"Loaded settings: {settings.model_dump()}")
|
|
194
|
+
|
|
195
|
+
# Create analyzer
|
|
196
|
+
analyzer = CodebaseAnalyzer(root_dir=path, language=language, settings=settings)
|
|
197
|
+
|
|
198
|
+
# Clear cache if requested
|
|
199
|
+
if clear_cache:
|
|
200
|
+
if hasattr(analyzer, "cache") and analyzer.cache:
|
|
201
|
+
analyzer.cache.clear()
|
|
202
|
+
console.print("[yellow]Cache cleared[/yellow]")
|
|
203
|
+
|
|
204
|
+
# Run analysis
|
|
205
|
+
reports = analyzer.analyze()
|
|
206
|
+
|
|
207
|
+
# Handle results based on format
|
|
208
|
+
if reports:
|
|
209
|
+
if explain:
|
|
210
|
+
# --explain mode: deep-dive on matching file(s)
|
|
211
|
+
analyzer.print_explain(reports, explain)
|
|
212
|
+
elif fmt == "json":
|
|
213
|
+
# JSON to stdout
|
|
214
|
+
print(analyzer.format_json(reports))
|
|
215
|
+
elif fmt == "csv":
|
|
216
|
+
# CSV to stdout
|
|
217
|
+
print(analyzer.format_csv(reports), end="")
|
|
218
|
+
elif fmt == "quiet":
|
|
219
|
+
# Just file paths
|
|
220
|
+
print(analyzer.format_quiet(reports))
|
|
221
|
+
else:
|
|
222
|
+
# Default rich output: summary + detailed report
|
|
223
|
+
analyzer.print_summary(reports, top_n=top)
|
|
224
|
+
analyzer.print_report(reports, top_n=top)
|
|
225
|
+
|
|
226
|
+
# Export JSON report
|
|
227
|
+
if output is not None:
|
|
228
|
+
analyzer.export_json(reports, filename=str(output))
|
|
229
|
+
elif fmt == "rich":
|
|
230
|
+
# Auto-save with timestamp when using rich output
|
|
231
|
+
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
232
|
+
auto_path = f"analysis_report_{ts}.json"
|
|
233
|
+
analyzer.export_json(reports, filename=auto_path)
|
|
234
|
+
|
|
235
|
+
# --fail-above CI gating
|
|
236
|
+
if fail_above is not None:
|
|
237
|
+
max_score = max(r.overall_score for r in reports)
|
|
238
|
+
if max_score > fail_above:
|
|
239
|
+
if fmt == "rich":
|
|
240
|
+
console.print(
|
|
241
|
+
f"\n[red]FAIL:[/red] Max score {max_score:.3f} "
|
|
242
|
+
f"exceeds threshold {fail_above:.3f}"
|
|
243
|
+
)
|
|
244
|
+
raise typer.Exit(1)
|
|
245
|
+
|
|
246
|
+
if fmt == "rich":
|
|
247
|
+
console.print()
|
|
248
|
+
console.print("[bold green]ANALYSIS COMPLETE[/bold green]")
|
|
249
|
+
console.print()
|
|
250
|
+
else:
|
|
251
|
+
if fmt == "json":
|
|
252
|
+
print("[]")
|
|
253
|
+
elif fmt == "csv":
|
|
254
|
+
print("file,overall_score,confidence,structural_entropy,"
|
|
255
|
+
"network_centrality,churn_volatility,semantic_coherence,"
|
|
256
|
+
"cognitive_load,anomaly_flags")
|
|
257
|
+
elif fmt == "rich":
|
|
258
|
+
console.print(
|
|
259
|
+
"[bold green]No anomalies detected - codebase looks clean![/bold green]"
|
|
260
|
+
)
|
|
261
|
+
raise typer.Exit(0)
|
|
262
|
+
|
|
263
|
+
except typer.Exit:
|
|
264
|
+
raise # Re-raise typer exits (--fail-above, no anomalies, etc.)
|
|
265
|
+
|
|
266
|
+
except ShannonInsightError as e:
|
|
267
|
+
# Handle known errors
|
|
268
|
+
logger.error(f"{e.__class__.__name__}: {e}")
|
|
269
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
270
|
+
raise typer.Exit(1)
|
|
271
|
+
|
|
272
|
+
except KeyboardInterrupt:
|
|
273
|
+
logger.info("Analysis interrupted by user")
|
|
274
|
+
console.print("\n[yellow]Analysis interrupted[/yellow]")
|
|
275
|
+
raise typer.Exit(130)
|
|
276
|
+
|
|
277
|
+
except Exception as e:
|
|
278
|
+
# Handle unexpected errors
|
|
279
|
+
logger.exception("Unexpected error during analysis")
|
|
280
|
+
console.print(f"[red]Unexpected error:[/red] {e}")
|
|
281
|
+
if verbose:
|
|
282
|
+
console.print_exception()
|
|
283
|
+
raise typer.Exit(1)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
@app.command()
|
|
287
|
+
def cache_info():
|
|
288
|
+
"""Show cache information and statistics."""
|
|
289
|
+
from .config import default_settings
|
|
290
|
+
from .cache import AnalysisCache
|
|
291
|
+
|
|
292
|
+
cache = AnalysisCache(
|
|
293
|
+
cache_dir=default_settings.cache_dir,
|
|
294
|
+
ttl_hours=default_settings.cache_ttl_hours,
|
|
295
|
+
enabled=default_settings.enable_cache,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
stats = cache.stats()
|
|
299
|
+
|
|
300
|
+
console.print("[bold cyan]Shannon Insight Cache Info[/bold cyan]")
|
|
301
|
+
console.print()
|
|
302
|
+
|
|
303
|
+
if stats.get("enabled"):
|
|
304
|
+
console.print(f"Status: [green]Enabled[/green]")
|
|
305
|
+
console.print(f"Directory: [blue]{stats.get('directory', 'N/A')}[/blue]")
|
|
306
|
+
console.print(f"Entries: [yellow]{stats.get('size', 0)}[/yellow]")
|
|
307
|
+
console.print(f"Size: [yellow]{stats.get('volume', 0)} bytes[/yellow]")
|
|
308
|
+
else:
|
|
309
|
+
console.print(f"Status: [red]Disabled[/red]")
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
@app.command()
|
|
313
|
+
def cache_clear():
|
|
314
|
+
"""Clear the analysis cache."""
|
|
315
|
+
from .config import default_settings
|
|
316
|
+
from .cache import AnalysisCache
|
|
317
|
+
|
|
318
|
+
cache = AnalysisCache(
|
|
319
|
+
cache_dir=default_settings.cache_dir,
|
|
320
|
+
ttl_hours=default_settings.cache_ttl_hours,
|
|
321
|
+
enabled=default_settings.enable_cache,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
if not default_settings.enable_cache:
|
|
325
|
+
console.print("[yellow]Cache is disabled[/yellow]")
|
|
326
|
+
raise typer.Exit(0)
|
|
327
|
+
|
|
328
|
+
cache.clear()
|
|
329
|
+
console.print("[green]Cache cleared successfully[/green]")
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
if __name__ == "__main__":
|
|
333
|
+
app()
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration management for Shannon Insight.
|
|
3
|
+
|
|
4
|
+
Uses pydantic-settings for type-safe configuration with automatic validation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
|
|
10
|
+
from pydantic import Field, field_validator
|
|
11
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AnalysisSettings(BaseSettings):
|
|
15
|
+
"""
|
|
16
|
+
Type-safe configuration with automatic validation.
|
|
17
|
+
|
|
18
|
+
Configuration hierarchy (highest to lowest priority):
|
|
19
|
+
1. CLI arguments (merged manually in CLI)
|
|
20
|
+
2. Environment variables (SHANNON_*)
|
|
21
|
+
3. TOML file (./shannon-insight.toml or ~/.shannon-insight.toml)
|
|
22
|
+
4. Defaults defined in Field()
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
model_config = SettingsConfigDict(
|
|
26
|
+
env_prefix="SHANNON_",
|
|
27
|
+
env_file=".env",
|
|
28
|
+
env_file_encoding="utf-8",
|
|
29
|
+
case_sensitive=False,
|
|
30
|
+
extra="ignore",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# ==================== Anomaly Detection ====================
|
|
34
|
+
|
|
35
|
+
z_score_threshold: float = Field(
|
|
36
|
+
default=1.5,
|
|
37
|
+
gt=0.0,
|
|
38
|
+
lt=10.0,
|
|
39
|
+
description="Z-score threshold for anomaly detection",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# ==================== PageRank ====================
|
|
43
|
+
|
|
44
|
+
pagerank_damping: float = Field(
|
|
45
|
+
default=0.85, ge=0.0, le=1.0, description="PageRank damping factor"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
pagerank_iterations: int = Field(
|
|
49
|
+
default=20, ge=1, le=100, description="Maximum PageRank iterations"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
pagerank_tolerance: float = Field(
|
|
53
|
+
default=1e-6, gt=0.0, description="PageRank convergence tolerance"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# ==================== Signal Fusion ====================
|
|
57
|
+
|
|
58
|
+
fusion_weights: List[float] = Field(
|
|
59
|
+
default=[0.2, 0.25, 0.2, 0.15, 0.2],
|
|
60
|
+
description="Signal fusion weights [entropy, centrality, churn, coherence, cognitive]",
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
fusion_normalize: bool = Field(
|
|
64
|
+
default=True, description="Normalize weights to sum to 1.0"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# ==================== File Filtering ====================
|
|
68
|
+
|
|
69
|
+
exclude_patterns: List[str] = Field(
|
|
70
|
+
default=[
|
|
71
|
+
"*_test.go",
|
|
72
|
+
"*_test.ts",
|
|
73
|
+
"*.test.ts",
|
|
74
|
+
"*.spec.ts",
|
|
75
|
+
"vendor/*",
|
|
76
|
+
"node_modules/*",
|
|
77
|
+
"dist/*",
|
|
78
|
+
"build/*",
|
|
79
|
+
".git/*",
|
|
80
|
+
"venv/*",
|
|
81
|
+
".venv/*",
|
|
82
|
+
"__pycache__/*",
|
|
83
|
+
".tox/*",
|
|
84
|
+
".mypy_cache/*",
|
|
85
|
+
],
|
|
86
|
+
description="File patterns to exclude from analysis",
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
max_file_size_mb: float = Field(
|
|
90
|
+
default=10.0, gt=0.0, le=100.0, description="Maximum file size in MB"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
max_files: int = Field(
|
|
94
|
+
default=10000, gt=0, le=100000, description="Maximum number of files to analyze"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# ==================== Performance ====================
|
|
98
|
+
|
|
99
|
+
parallel_workers: Optional[int] = Field(
|
|
100
|
+
default=None,
|
|
101
|
+
ge=1,
|
|
102
|
+
le=32,
|
|
103
|
+
description="Number of parallel workers (None = auto-detect)",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
timeout_seconds: int = Field(
|
|
107
|
+
default=10, ge=1, le=300, description="Timeout for file operations in seconds"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# ==================== Cache ====================
|
|
111
|
+
|
|
112
|
+
enable_cache: bool = Field(
|
|
113
|
+
default=True, description="Enable caching for faster repeated analysis"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
cache_dir: str = Field(default=".shannon-cache", description="Cache directory path")
|
|
117
|
+
|
|
118
|
+
cache_ttl_hours: int = Field(
|
|
119
|
+
default=24,
|
|
120
|
+
ge=0,
|
|
121
|
+
le=720, # 30 days max
|
|
122
|
+
description="Cache time-to-live in hours",
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# ==================== Logging ====================
|
|
126
|
+
|
|
127
|
+
verbose: bool = Field(default=False, description="Enable verbose (DEBUG) logging")
|
|
128
|
+
|
|
129
|
+
quiet: bool = Field(default=False, description="Suppress all but ERROR logging")
|
|
130
|
+
|
|
131
|
+
log_file: Optional[str] = Field(
|
|
132
|
+
default=None, description="Log file path (optional)"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# ==================== Security ====================
|
|
136
|
+
|
|
137
|
+
allow_hidden_files: bool = Field(
|
|
138
|
+
default=False, description="Allow analysis of hidden files (starting with .)"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
block_system_dirs: bool = Field(
|
|
142
|
+
default=True, description="Block access to system directories"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
follow_symlinks: bool = Field(
|
|
146
|
+
default=False, description="Follow symbolic links during scanning"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# ==================== Validators ====================
|
|
150
|
+
|
|
151
|
+
@field_validator("fusion_weights")
|
|
152
|
+
@classmethod
|
|
153
|
+
def validate_fusion_weights(cls, v: List[float]) -> List[float]:
|
|
154
|
+
"""Validate fusion weights."""
|
|
155
|
+
if len(v) != 5:
|
|
156
|
+
raise ValueError("fusion_weights must have exactly 5 values")
|
|
157
|
+
|
|
158
|
+
if any(w < 0 for w in v):
|
|
159
|
+
raise ValueError("fusion_weights must be non-negative")
|
|
160
|
+
|
|
161
|
+
weight_sum = sum(v)
|
|
162
|
+
if weight_sum == 0:
|
|
163
|
+
raise ValueError("fusion_weights cannot all be zero")
|
|
164
|
+
|
|
165
|
+
# Normalize to sum to 1.0
|
|
166
|
+
return [w / weight_sum for w in v]
|
|
167
|
+
|
|
168
|
+
@field_validator("cache_dir")
|
|
169
|
+
@classmethod
|
|
170
|
+
def validate_cache_dir(cls, v: str) -> str:
|
|
171
|
+
"""Validate cache directory."""
|
|
172
|
+
# Convert to absolute path
|
|
173
|
+
cache_path = Path(v)
|
|
174
|
+
if not cache_path.is_absolute():
|
|
175
|
+
cache_path = Path.cwd() / cache_path
|
|
176
|
+
return str(cache_path)
|
|
177
|
+
|
|
178
|
+
@field_validator("parallel_workers")
|
|
179
|
+
@classmethod
|
|
180
|
+
def validate_parallel_workers(cls, v: Optional[int]) -> Optional[int]:
|
|
181
|
+
"""Validate parallel workers count."""
|
|
182
|
+
if v is not None and v < 1:
|
|
183
|
+
raise ValueError("parallel_workers must be at least 1")
|
|
184
|
+
return v
|
|
185
|
+
|
|
186
|
+
# ==================== Computed Properties ====================
|
|
187
|
+
|
|
188
|
+
@property
|
|
189
|
+
def max_file_size_bytes(self) -> int:
|
|
190
|
+
"""Get max file size in bytes."""
|
|
191
|
+
return int(self.max_file_size_mb * 1024 * 1024)
|
|
192
|
+
|
|
193
|
+
@property
|
|
194
|
+
def cache_ttl_seconds(self) -> int:
|
|
195
|
+
"""Get cache TTL in seconds."""
|
|
196
|
+
return self.cache_ttl_hours * 3600
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def load_settings(config_file: Optional[Path] = None, **overrides) -> AnalysisSettings:
|
|
200
|
+
"""
|
|
201
|
+
Load settings from config file and environment variables.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
config_file: Optional TOML config file path
|
|
205
|
+
**overrides: Manual overrides (typically from CLI args)
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
Loaded settings with all overrides applied
|
|
209
|
+
"""
|
|
210
|
+
# Try to load from TOML file
|
|
211
|
+
if config_file and config_file.exists():
|
|
212
|
+
# pydantic-settings doesn't natively support TOML
|
|
213
|
+
# We'll load it manually and pass as overrides
|
|
214
|
+
try:
|
|
215
|
+
tomli = __import__("tomli")
|
|
216
|
+
with open(config_file, "rb") as f:
|
|
217
|
+
toml_data = tomli.load(f)
|
|
218
|
+
# Merge TOML data with overrides (overrides take precedence)
|
|
219
|
+
merged = {**toml_data, **overrides}
|
|
220
|
+
return AnalysisSettings(**merged)
|
|
221
|
+
except ImportError:
|
|
222
|
+
# tomli not available, fall back to env vars + overrides
|
|
223
|
+
pass
|
|
224
|
+
except Exception as e:
|
|
225
|
+
# TOML parsing failed, warn and continue
|
|
226
|
+
import warnings
|
|
227
|
+
|
|
228
|
+
warnings.warn(f"Failed to load config file {config_file}: {e}")
|
|
229
|
+
|
|
230
|
+
# Load from environment variables + overrides
|
|
231
|
+
return AnalysisSettings(**overrides)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
# Default settings instance
|
|
235
|
+
default_settings = AnalysisSettings()
|