shannon-codebase-insight 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. shannon_codebase_insight-0.4.0.dist-info/METADATA +209 -0
  2. shannon_codebase_insight-0.4.0.dist-info/RECORD +37 -0
  3. shannon_codebase_insight-0.4.0.dist-info/WHEEL +5 -0
  4. shannon_codebase_insight-0.4.0.dist-info/entry_points.txt +7 -0
  5. shannon_codebase_insight-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. shannon_codebase_insight-0.4.0.dist-info/top_level.txt +1 -0
  7. shannon_insight/__init__.py +25 -0
  8. shannon_insight/analyzers/__init__.py +8 -0
  9. shannon_insight/analyzers/base.py +215 -0
  10. shannon_insight/analyzers/go_analyzer.py +150 -0
  11. shannon_insight/analyzers/python_analyzer.py +169 -0
  12. shannon_insight/analyzers/typescript_analyzer.py +162 -0
  13. shannon_insight/cache.py +214 -0
  14. shannon_insight/cli.py +333 -0
  15. shannon_insight/config.py +235 -0
  16. shannon_insight/core.py +546 -0
  17. shannon_insight/exceptions/__init__.py +31 -0
  18. shannon_insight/exceptions/analysis.py +78 -0
  19. shannon_insight/exceptions/base.py +18 -0
  20. shannon_insight/exceptions/config.py +48 -0
  21. shannon_insight/file_ops.py +218 -0
  22. shannon_insight/logging_config.py +98 -0
  23. shannon_insight/math/__init__.py +15 -0
  24. shannon_insight/math/entropy.py +133 -0
  25. shannon_insight/math/fusion.py +109 -0
  26. shannon_insight/math/graph.py +209 -0
  27. shannon_insight/math/robust.py +106 -0
  28. shannon_insight/math/statistics.py +159 -0
  29. shannon_insight/models.py +48 -0
  30. shannon_insight/primitives/__init__.py +13 -0
  31. shannon_insight/primitives/detector.py +318 -0
  32. shannon_insight/primitives/extractor.py +278 -0
  33. shannon_insight/primitives/fusion.py +373 -0
  34. shannon_insight/primitives/recommendations.py +158 -0
  35. shannon_insight/py.typed +2 -0
  36. shannon_insight/security.py +284 -0
  37. shannon_insight/utils/__init__.py +1 -0
@@ -0,0 +1,209 @@
1
+ Metadata-Version: 2.4
2
+ Name: shannon-codebase-insight
3
+ Version: 0.4.0
4
+ Summary: Multi-signal codebase quality analyzer using mathematical primitives
5
+ Author: Naman Agarwal
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/namanagarwal/shannon-insight
8
+ Project-URL: Documentation, https://github.com/namanagarwal/shannon-insight#readme
9
+ Project-URL: Repository, https://github.com/namanagarwal/shannon-insight
10
+ Project-URL: Bug Tracker, https://github.com/namanagarwal/shannon-insight/issues
11
+ Keywords: code-quality,static-analysis,codebase-analysis,metrics,entropy,mathematics
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Topic :: Software Development :: Quality Assurance
15
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Environment :: Console
23
+ Classifier: Typing :: Typed
24
+ Requires-Python: >=3.9
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Requires-Dist: numpy>=1.22.0
28
+ Requires-Dist: scikit-learn>=1.0.0
29
+ Requires-Dist: scipy>=1.7.0
30
+ Requires-Dist: rich>=13.0.0
31
+ Requires-Dist: pydantic>=2.0.0
32
+ Requires-Dist: pydantic-settings>=2.0.0
33
+ Requires-Dist: diskcache>=5.6.0
34
+ Requires-Dist: typer>=0.9.0
35
+ Provides-Extra: dev
36
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
37
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
38
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
39
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
40
+ Requires-Dist: build>=0.10.0; extra == "dev"
41
+ Requires-Dist: twine>=4.0.0; extra == "dev"
42
+ Dynamic: license-file
43
+
44
+ # Shannon Insight
45
+
46
+ [![CI](https://github.com/namanagarwal/shannon-insight/actions/workflows/ci.yml/badge.svg)](https://github.com/namanagarwal/shannon-insight/actions/workflows/ci.yml)
47
+ [![PyPI](https://img.shields.io/pypi/v/shannon-insight)](https://pypi.org/project/shannon-insight/)
48
+ [![Python](https://img.shields.io/pypi/pyversions/shannon-insight)](https://pypi.org/project/shannon-insight/)
49
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
50
+
51
+ Multi-signal codebase quality analyzer using information-theoretic primitives. Named after Claude Shannon, father of information theory.
52
+
53
+ ## Quick Start
54
+
55
+ ```bash
56
+ pip install shannon-insight
57
+ shannon-insight /path/to/codebase
58
+ shannon-insight . --format json | jq .
59
+ ```
60
+
61
+ ## What It Does
62
+
63
+ Shannon Insight scans your codebase and computes **5 orthogonal quality primitives** per file, then fuses them with consistency-weighted scoring to surface files that need attention:
64
+
65
+ | Primitive | What it measures | High means |
66
+ |-----------|-----------------|------------|
67
+ | **Structural Entropy** | AST node type distribution | Chaotic organization |
68
+ | **Network Centrality** | PageRank on dependency graph | Critical hub |
69
+ | **Churn Volatility** | File modification recency | Recently changed / unstable |
70
+ | **Semantic Coherence** | Import/export focus | Low: too many unrelated concerns |
71
+ | **Cognitive Load** | Functions x complexity x nesting | Overloaded file |
72
+
73
+ ## Output Formats
74
+
75
+ ```bash
76
+ # Rich terminal output (default) with summary dashboard
77
+ shannon-insight .
78
+
79
+ # Machine-readable JSON
80
+ shannon-insight . --format json
81
+
82
+ # Pipe-friendly CSV
83
+ shannon-insight . --format csv
84
+
85
+ # Just file paths (one per line)
86
+ shannon-insight . --format quiet
87
+
88
+ # Deep-dive on a specific file
89
+ shannon-insight . --explain complex.go
90
+
91
+ # Export to file
92
+ shannon-insight . --output report.json
93
+ ```
94
+
95
+ ## CI Integration
96
+
97
+ Use `--fail-above` to gate CI pipelines on code quality:
98
+
99
+ ```bash
100
+ # Fail if any file scores above 2.0
101
+ shannon-insight . --format quiet --fail-above 2.0
102
+ ```
103
+
104
+ Example GitHub Actions step:
105
+
106
+ ```yaml
107
+ - name: Code quality gate
108
+ run: shannon-insight . --fail-above 2.0 --format quiet
109
+ ```
110
+
111
+ ## Configuration
112
+
113
+ Create `shannon-insight.toml` in your project root:
114
+
115
+ ```toml
116
+ z_score_threshold = 1.5
117
+ fusion_weights = [0.2, 0.25, 0.2, 0.15, 0.2]
118
+ exclude_patterns = ["*_test.go", "vendor/*", "node_modules/*"]
119
+ max_file_size_mb = 10.0
120
+ enable_cache = true
121
+ ```
122
+
123
+ Or use environment variables with `SHANNON_` prefix:
124
+
125
+ ```bash
126
+ export SHANNON_Z_SCORE_THRESHOLD=2.0
127
+ export SHANNON_ENABLE_CACHE=false
128
+ ```
129
+
130
+ ## CLI Options
131
+
132
+ ```
133
+ Options:
134
+ PATH Path to codebase directory [default: .]
135
+ -l, --language TEXT Language (auto, python, go, typescript, react, javascript)
136
+ -t, --top INTEGER Number of top files to display [1-1000]
137
+ -o, --output FILE Export JSON report to file
138
+ -f, --format TEXT Output format: rich, json, csv, quiet
139
+ -e, --explain TEXT Deep-dive on matching file(s)
140
+ --fail-above FLOAT CI gate: exit 1 if max score exceeds threshold
141
+ --threshold FLOAT Z-score threshold for anomaly detection
142
+ -c, --config FILE TOML configuration file
143
+ -v, --verbose Enable DEBUG logging
144
+ -q, --quiet Suppress all but ERROR logging
145
+ --no-cache Disable caching
146
+ --clear-cache Clear cache before running
147
+ -w, --workers INTEGER Parallel workers [1-32]
148
+ --version Show version and exit
149
+
150
+ Commands:
151
+ cache-info Show cache statistics
152
+ cache-clear Clear analysis cache
153
+ ```
154
+
155
+ ## Supported Languages
156
+
157
+ - **Python** - `.py` files
158
+ - **Go** - `.go` files
159
+ - **TypeScript/React** - `.ts`, `.tsx` files
160
+ - **JavaScript** - `.js`, `.jsx` files (uses TypeScript scanner)
161
+
162
+ Language is auto-detected by default. Override with `--language`.
163
+
164
+ ## How It Works
165
+
166
+ ```
167
+ CodebaseAnalyzer
168
+ Layer 1: Scanning - Language-specific file parsing
169
+ Layer 2: Extraction - Compute 5 orthogonal primitives per file
170
+ Layer 3: Detection - Z-score normalization + anomaly thresholding
171
+ Layer 4: Fusion - Consistency-weighted signal combination
172
+ Layer 5: Recommendations - Root cause attribution + actionable advice
173
+ ```
174
+
175
+ Signal fusion uses coefficient of variation to penalize inconsistent signals:
176
+
177
+ ```
178
+ consistency = 1 / (1 + CV)
179
+ final_score = consistency * |weighted_average|
180
+ ```
181
+
182
+ See [docs/MATHEMATICAL_FOUNDATION.md](docs/MATHEMATICAL_FOUNDATION.md) for the full mathematical framework.
183
+
184
+ ## Development
185
+
186
+ ```bash
187
+ git clone https://github.com/namanagarwal/shannon-insight.git
188
+ cd shannon-insight
189
+ python -m venv .venv && source .venv/bin/activate
190
+ pip install -e ".[dev]"
191
+
192
+ make test # Run tests with coverage
193
+ make lint # Run ruff linter
194
+ make format # Format with ruff
195
+ make type-check # Run mypy
196
+ make all # Format + lint + type-check + test
197
+ ```
198
+
199
+ ## Contributing
200
+
201
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
202
+
203
+ ## License
204
+
205
+ MIT License - see [LICENSE](LICENSE)
206
+
207
+ ## Credits
208
+
209
+ Created by Naman Agarwal. Inspired by Claude Shannon's information theory, PageRank (Page & Brin), and cyclomatic complexity (McCabe).
@@ -0,0 +1,37 @@
1
+ shannon_codebase_insight-0.4.0.dist-info/licenses/LICENSE,sha256=PVdI9e_PoIiAyouwB69rVtOD0WEGAFn-G5z84L4YXko,1070
2
+ shannon_insight/__init__.py,sha256=cVuT2YFKbs2ouJ_Dhd9IZlaHQSxcECrSssiN1MH2IkE,703
3
+ shannon_insight/cache.py,sha256=GxbG0GiqqKpCSwlolwBonx19UqcPK6m9-37oHSfRxm4,5858
4
+ shannon_insight/cli.py,sha256=msSzObpLKb3yOzLW5afPc04TwNzxbSi-_uI2WX5QGi4,10058
5
+ shannon_insight/config.py,sha256=WPnUJ-iuM1Uh2Hy1OMauedIaW2hOIf76o43zwLcaxVQ,7140
6
+ shannon_insight/core.py,sha256=pX5pNXNUZ4sWNGuHN6fFyreN2YDw9zzsJBIuWBCPKVc,20728
7
+ shannon_insight/file_ops.py,sha256=P4F-mpEYCRO-tyfOtJNXcyCk9gVgnJ1ovEZm50Z8ETU,5831
8
+ shannon_insight/logging_config.py,sha256=qG6f2VyDeMtE8VCoRFOjPeAbC11fVJXKMpxicwyk0zg,2389
9
+ shannon_insight/models.py,sha256=hNd5qmHcVtd7y4DN1vsN0lKHZi91alQ44ikXkicpBoQ,959
10
+ shannon_insight/py.typed,sha256=g4nn5g02a75-ZjbL9uICetnLeAGCH1GCSLgvU_YxCAQ,79
11
+ shannon_insight/security.py,sha256=6bLkjc1Af_rvaGwHJrKGicyfr4Mpmo3ylozsvub4h6E,8053
12
+ shannon_insight/analyzers/__init__.py,sha256=M5c7yP_EQ8UGPgvndHdyzaQBwyPGa7swOeNBlr3I9o0,272
13
+ shannon_insight/analyzers/base.py,sha256=OPhneFHU0uYG7wE6X3BtKAsHbxzUfauZss2sNBPrY4k,5996
14
+ shannon_insight/analyzers/go_analyzer.py,sha256=SJ2hWtx9Tn80M7NLUo5OGJyjTTP_NXibwf8MJfn4Nvk,6161
15
+ shannon_insight/analyzers/python_analyzer.py,sha256=7IIjAbZOUaktprQjONE5HY5c9WF_nHLI_3Qp4rrPdJI,6616
16
+ shannon_insight/analyzers/typescript_analyzer.py,sha256=C4SZxBFD2Edj727ge1WnJu5Jeavla6DjDRRg2V0Ci28,6576
17
+ shannon_insight/exceptions/__init__.py,sha256=sxPsbQZHsXEEgeMZNEyM6NAuGYiNQXpHRB_Mu3jW9rM,666
18
+ shannon_insight/exceptions/analysis.py,sha256=hmJkELAWmVltbXFNLH9h9JyBTEtDWYqdnarz8rbY1t0,2583
19
+ shannon_insight/exceptions/base.py,sha256=9A53D2OZX_2Dc93UqWyzIu3V3M7eb_MqM-No5YHywWw,575
20
+ shannon_insight/exceptions/config.py,sha256=Vo-k7Fg8Zk6sL7_6RgXuIdjF6bK9EMsHT1AdGvmR0ME,1431
21
+ shannon_insight/math/__init__.py,sha256=eGYJRFh-QF_w7DK5QsqORoaK5K5Gh97EydSo-slfDjw,331
22
+ shannon_insight/math/entropy.py,sha256=_V4tNyPB3mRB1fbm9L3ub0LfMPdl8rhmwBJjER1V72g,3947
23
+ shannon_insight/math/fusion.py,sha256=NCvhk1RdRyM2RwAUr2Mhv24QSPcrcWoNgmbZuK1lPY0,3616
24
+ shannon_insight/math/graph.py,sha256=2649DhTtsodmQiPsU5piYcT8AU0uKSpMtl1NOpWyPec,6800
25
+ shannon_insight/math/robust.py,sha256=xMtRmK--WZH0xlMU6WuSqB2FsFF72gY4cMvR8pegaSM,3157
26
+ shannon_insight/math/statistics.py,sha256=hnfUwoQ8iD1eUm_DwA59CuoyzcXC6fm2qqjOmQRQW40,4297
27
+ shannon_insight/primitives/__init__.py,sha256=rgCX9YpgvCYM2mvQYkxKgeWnLnrrGCK7f-E1OvVSzRE,335
28
+ shannon_insight/primitives/detector.py,sha256=GeSb_opcKdm8E7KBYjK3trl0NI8MaVVLN5ISwZgWWNc,11679
29
+ shannon_insight/primitives/extractor.py,sha256=7zcOWTwC91mevyQfSc6UhCXynEUp2tTyjZi-dU0-Bhs,10285
30
+ shannon_insight/primitives/fusion.py,sha256=axDi5WWK4UWmd-MPA5BAy_X1PHoIET49MBboGUXn0fA,11895
31
+ shannon_insight/primitives/recommendations.py,sha256=fe-tQiYwPmSoLh75Bu7_poM0TAKpibxbuDnd_rntYSk,6221
32
+ shannon_insight/utils/__init__.py,sha256=UOdKOJkQ6Nleq5cb4QELuSNb4mJZOp7Zuym1cg4rWnE,24
33
+ shannon_codebase_insight-0.4.0.dist-info/METADATA,sha256=JX1ink0O5xSaciFj2kttsB6vAkCtiRAxTHd4Ildhk9M,6955
34
+ shannon_codebase_insight-0.4.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
35
+ shannon_codebase_insight-0.4.0.dist-info/entry_points.txt,sha256=WMKvN0qPS7nAUTGhAR8f9Or_3VfwJDLn3NjFPoy3DhE,284
36
+ shannon_codebase_insight-0.4.0.dist-info/top_level.txt,sha256=to3Vz1EmonG4LYEKYtTbqI7gqdH9_fHgEKbYdMUyDp4,16
37
+ shannon_codebase_insight-0.4.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,7 @@
1
+ [console_scripts]
2
+ shannon-insight = shannon_insight.cli:app
3
+
4
+ [shannon_insight.languages]
5
+ go = shannon_insight.analyzers.go_analyzer:GoScanner
6
+ python = shannon_insight.analyzers.python_analyzer:PythonScanner
7
+ typescript = shannon_insight.analyzers.typescript_analyzer:TypeScriptScanner
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Naman Agarwal
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ shannon_insight
@@ -0,0 +1,25 @@
1
+ """
2
+ Shannon Insight - Multi-Signal Codebase Quality Analyzer
3
+
4
+ A mathematical approach to code quality analysis using five orthogonal primitives:
5
+ 1. Structural Entropy - Disorder in code organization
6
+ 2. Network Centrality - Importance in dependency graph
7
+ 3. Churn Volatility - Instability of change patterns
8
+ 4. Semantic Coherence - Conceptual focus
9
+ 5. Cognitive Load - Mental effort to understand
10
+
11
+ Named after Claude Shannon, father of information theory.
12
+ """
13
+
14
+ __version__ = "0.4.0"
15
+ __author__ = "Naman Agarwal"
16
+
17
+ from .core import CodebaseAnalyzer
18
+ from .models import FileMetrics, AnomalyReport, Primitives
19
+
20
+ __all__ = [
21
+ "CodebaseAnalyzer",
22
+ "Primitives",
23
+ "FileMetrics",
24
+ "AnomalyReport",
25
+ ]
@@ -0,0 +1,8 @@
1
+ """Language-specific analyzers"""
2
+
3
+ from .base import BaseScanner
4
+ from .go_analyzer import GoScanner
5
+ from .typescript_analyzer import TypeScriptScanner
6
+ from .python_analyzer import PythonScanner
7
+
8
+ __all__ = ["BaseScanner", "GoScanner", "TypeScriptScanner", "PythonScanner"]
@@ -0,0 +1,215 @@
1
+ """Base scanner class for language-agnostic functionality"""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from pathlib import Path
5
+ from typing import List, Optional
6
+
7
+ from ..models import FileMetrics
8
+ from ..config import AnalysisSettings, default_settings
9
+ from ..logging_config import get_logger
10
+ from ..exceptions import FileAccessError, ParsingError
11
+ from ..file_ops import should_skip_file
12
+
13
+ logger = get_logger(__name__)
14
+
15
+
16
+ class BaseScanner(ABC):
17
+ """Abstract base class for language-specific scanners"""
18
+
19
+ def __init__(
20
+ self,
21
+ root_dir: str,
22
+ extensions: List[str],
23
+ settings: Optional[AnalysisSettings] = None
24
+ ):
25
+ """
26
+ Initialize scanner.
27
+
28
+ Args:
29
+ root_dir: Root directory to scan
30
+ extensions: File extensions to include (e.g., ['.go', '.py'])
31
+ settings: Analysis settings
32
+ """
33
+ self.root_dir = Path(root_dir)
34
+ self.extensions = extensions
35
+ self.settings = settings or default_settings
36
+ logger.debug(f"Initialized {self.__class__.__name__} for {self.root_dir}")
37
+
38
+ def scan(self) -> List[FileMetrics]:
39
+ """
40
+ Scan all source files and extract metrics.
41
+
42
+ Returns:
43
+ List of file metrics for analyzed files
44
+ """
45
+ files = []
46
+ files_scanned = 0
47
+ files_skipped = 0
48
+ files_errored = 0
49
+
50
+ for ext in self.extensions:
51
+ for filepath in self.root_dir.rglob(f"*{ext}"):
52
+ # Check file count limit
53
+ if files_scanned >= self.settings.max_files:
54
+ logger.warning(f"Reached max files limit ({self.settings.max_files})")
55
+ break
56
+
57
+ # Skip based on custom logic
58
+ if self._should_skip(filepath):
59
+ files_skipped += 1
60
+ logger.debug(f"Skipped (custom): {filepath}")
61
+ continue
62
+
63
+ # Skip based on exclusion patterns
64
+ if should_skip_file(filepath, self.settings.exclude_patterns):
65
+ files_skipped += 1
66
+ logger.debug(f"Skipped (pattern): {filepath}")
67
+ continue
68
+
69
+ # Check file size
70
+ try:
71
+ size = filepath.stat().st_size
72
+ if size > self.settings.max_file_size_bytes:
73
+ files_skipped += 1
74
+ logger.debug(f"Skipped (size): {filepath} ({size} bytes)")
75
+ continue
76
+ except OSError as e:
77
+ files_errored += 1
78
+ logger.warning(f"Cannot stat {filepath}: {e}")
79
+ continue
80
+
81
+ # Analyze file
82
+ try:
83
+ metrics = self._analyze_file(filepath)
84
+ files.append(metrics)
85
+ files_scanned += 1
86
+ logger.debug(f"Analyzed: {filepath}")
87
+ except FileAccessError as e:
88
+ files_errored += 1
89
+ logger.warning(f"Access error for {filepath}: {e.reason}")
90
+ except ParsingError as e:
91
+ files_errored += 1
92
+ logger.warning(f"Parse error for {filepath}: {e.reason}")
93
+ except Exception as e:
94
+ files_errored += 1
95
+ logger.error(f"Unexpected error analyzing {filepath}: {e}")
96
+
97
+ logger.info(f"Scan complete: {files_scanned} analyzed, {files_skipped} skipped, {files_errored} errors")
98
+ return files
99
+
100
+ @abstractmethod
101
+ def _should_skip(self, filepath: Path) -> bool:
102
+ """
103
+ Determine if file should be skipped (e.g., tests, vendor).
104
+
105
+ Args:
106
+ filepath: File to check
107
+
108
+ Returns:
109
+ True if file should be skipped
110
+ """
111
+ pass
112
+
113
+ @abstractmethod
114
+ def _analyze_file(self, filepath: Path) -> FileMetrics:
115
+ """
116
+ Extract all metrics from a single file.
117
+
118
+ Args:
119
+ filepath: File to analyze
120
+
121
+ Returns:
122
+ File metrics
123
+
124
+ Raises:
125
+ FileAccessError: If file cannot be read
126
+ ParsingError: If file cannot be parsed
127
+ """
128
+ pass
129
+
130
+ @abstractmethod
131
+ def _count_tokens(self, content: str) -> int:
132
+ """
133
+ Count tokens in source code.
134
+
135
+ Args:
136
+ content: File content
137
+
138
+ Returns:
139
+ Token count
140
+ """
141
+ pass
142
+
143
+ @abstractmethod
144
+ def _extract_imports(self, content: str) -> List[str]:
145
+ """
146
+ Extract import statements.
147
+
148
+ Args:
149
+ content: File content
150
+
151
+ Returns:
152
+ List of import paths/names
153
+ """
154
+ pass
155
+
156
+ @abstractmethod
157
+ def _extract_exports(self, content: str) -> List[str]:
158
+ """
159
+ Extract exported identifiers.
160
+
161
+ Args:
162
+ content: File content
163
+
164
+ Returns:
165
+ List of exported names
166
+ """
167
+ pass
168
+
169
+ @abstractmethod
170
+ def _count_functions(self, content: str) -> int:
171
+ """
172
+ Count function declarations.
173
+
174
+ Args:
175
+ content: File content
176
+
177
+ Returns:
178
+ Function count
179
+ """
180
+ pass
181
+
182
+ @abstractmethod
183
+ def _estimate_complexity(self, content: str) -> float:
184
+ """
185
+ Estimate cyclomatic complexity.
186
+
187
+ Args:
188
+ content: File content
189
+
190
+ Returns:
191
+ Complexity score
192
+ """
193
+ pass
194
+
195
+ def _max_nesting_depth(self, content: str) -> int:
196
+ """
197
+ Calculate maximum nesting depth (language-agnostic).
198
+
199
+ Args:
200
+ content: File content
201
+
202
+ Returns:
203
+ Maximum nesting depth
204
+ """
205
+ max_depth = 0
206
+ current_depth = 0
207
+
208
+ for char in content:
209
+ if char == "{":
210
+ current_depth += 1
211
+ max_depth = max(max_depth, current_depth)
212
+ elif char == "}":
213
+ current_depth -= 1
214
+
215
+ return max_depth
@@ -0,0 +1,150 @@
1
+ """Go language analyzer"""
2
+
3
+ import re
4
+ from pathlib import Path
5
+ from collections import Counter
6
+ from typing import List, Optional
7
+
8
+ from .base import BaseScanner
9
+ from ..models import FileMetrics
10
+ from ..config import AnalysisSettings
11
+ from ..exceptions import FileAccessError
12
+ from ..logging_config import get_logger
13
+
14
+ logger = get_logger(__name__)
15
+
16
+
17
+ class GoScanner(BaseScanner):
18
+ """Scanner optimized for Go codebases"""
19
+
20
+ def __init__(self, root_dir: str, settings: Optional[AnalysisSettings] = None):
21
+ super().__init__(root_dir, extensions=[".go"], settings=settings)
22
+
23
+ def _should_skip(self, filepath: Path) -> bool:
24
+ """Skip test files, vendor, venv, and other non-project directories"""
25
+ path_str = str(filepath)
26
+ skip_dirs = ("vendor", "venv", ".venv", "__pycache__", ".git", ".tox", ".mypy_cache")
27
+ return "_test.go" in path_str or any(d in path_str for d in skip_dirs)
28
+
29
+ def _analyze_file(self, filepath: Path) -> FileMetrics:
30
+ """Extract all metrics from a Go file"""
31
+ try:
32
+ with open(filepath, "r", encoding="utf-8", errors="replace") as f:
33
+ content = f.read()
34
+ except OSError as e:
35
+ raise FileAccessError(filepath, f"Cannot read file: {e}")
36
+ except Exception as e:
37
+ raise FileAccessError(filepath, f"Unexpected error: {e}")
38
+
39
+ lines = content.split("\n")
40
+
41
+ return FileMetrics(
42
+ path=str(filepath.relative_to(self.root_dir)),
43
+ lines=len(lines),
44
+ tokens=self._count_tokens(content),
45
+ imports=self._extract_imports(content),
46
+ exports=self._extract_exports(content),
47
+ functions=self._count_functions(content),
48
+ interfaces=self._count_interfaces(content),
49
+ structs=self._count_structs(content),
50
+ complexity_score=self._estimate_complexity(content),
51
+ nesting_depth=self._max_nesting_depth(content),
52
+ ast_node_types=self._extract_ast_node_types(content),
53
+ last_modified=filepath.stat().st_mtime,
54
+ )
55
+
56
+ def _count_tokens(self, content: str) -> int:
57
+ """Approximate token count for Go"""
58
+ # Remove comments and strings
59
+ content = re.sub(r"//.*", "", content)
60
+ content = re.sub(r"/\*.*?\*/", "", content, flags=re.DOTALL)
61
+ content = re.sub(r"`[^`]*`", "", content)
62
+ content = re.sub(r'"[^"]*"', "", content)
63
+
64
+ # Split on whitespace and common operators
65
+ tokens = re.findall(r"\w+|[{}()\[\];,.]", content)
66
+ return len(tokens)
67
+
68
+ def _extract_imports(self, content: str) -> List[str]:
69
+ """Extract Go import statements"""
70
+ imports = []
71
+
72
+ # Match: import "github.com/..."
73
+ for match in re.finditer(r'import\s+"([^"]+)"', content):
74
+ imports.append(match.group(1))
75
+
76
+ # Match grouped imports: import (\n "foo"\n "bar"\n)
77
+ for match in re.finditer(r"import\s*\([^)]+\)", content, re.DOTALL):
78
+ group = match.group(0)
79
+ for imp in re.findall(r'"([^"]+)"', group):
80
+ imports.append(imp)
81
+
82
+ return imports
83
+
84
+ def _extract_exports(self, content: str) -> List[str]:
85
+ """Extract exported identifiers (capitalized names in Go)"""
86
+ exports = []
87
+
88
+ # Exported functions: func ExportedName(...)
89
+ exports.extend(re.findall(r"^func\s+([A-Z]\w*)\s*\(", content, re.MULTILINE))
90
+
91
+ # Exported types: type ExportedName ...
92
+ exports.extend(re.findall(r"^type\s+([A-Z]\w*)\s+", content, re.MULTILINE))
93
+
94
+ # Exported constants: const ExportedName
95
+ exports.extend(
96
+ re.findall(r"^const\s+([A-Z]\w*)\s*[=\n]", content, re.MULTILINE)
97
+ )
98
+
99
+ # Exported variables: var ExportedName
100
+ exports.extend(re.findall(r"^var\s+([A-Z]\w*)\s*[=\n]", content, re.MULTILINE))
101
+
102
+ return exports
103
+
104
+ def _count_functions(self, content: str) -> int:
105
+ """Count function declarations (including methods with receivers)"""
106
+ return len(re.findall(r"\bfunc\s+\w+\s*\(", content))
107
+
108
+ def _count_interfaces(self, content: str) -> int:
109
+ """Count interface declarations"""
110
+ return len(re.findall(r"\btype\s+\w+\s+interface\s*\{", content))
111
+
112
+ def _count_structs(self, content: str) -> int:
113
+ """Count struct declarations"""
114
+ return len(re.findall(r"\btype\s+\w+\s+struct\s*\{", content))
115
+
116
+ def _estimate_complexity(self, content: str) -> float:
117
+ """Estimate cyclomatic complexity for Go"""
118
+ # Count decision points: if, else, case, for, range, select, &&, ||
119
+ complexity = 1 # Base complexity
120
+
121
+ complexity += len(re.findall(r"\bif\s+", content))
122
+ complexity += len(re.findall(r"\belse\b", content))
123
+ complexity += len(re.findall(r"\bcase\s+", content))
124
+ complexity += len(re.findall(r"\bfor\s+", content))
125
+ complexity += len(re.findall(r"\brange\s+", content))
126
+ complexity += len(re.findall(r"\bselect\s*\{", content))
127
+ complexity += len(re.findall(r"&&", content))
128
+ complexity += len(re.findall(r"\|\|", content))
129
+
130
+ return complexity
131
+
132
+ def _extract_ast_node_types(self, content: str) -> Counter:
133
+ """Extract distribution of AST node types for Go"""
134
+ node_types = Counter()
135
+
136
+ # Go-specific node types
137
+ node_types["function"] = self._count_functions(content)
138
+ node_types["struct"] = self._count_structs(content)
139
+ node_types["interface"] = self._count_interfaces(content)
140
+ node_types["import"] = len(self._extract_imports(content))
141
+ node_types["export"] = len(self._extract_exports(content))
142
+ node_types["if"] = len(re.findall(r"\bif\s+", content))
143
+ node_types["for"] = len(re.findall(r"\bfor\s+", content))
144
+ node_types["range"] = len(re.findall(r"\brange\s+", content))
145
+ node_types["return"] = len(re.findall(r"\breturn\b", content))
146
+ node_types["defer"] = len(re.findall(r"\bdefer\b", content))
147
+ node_types["go"] = len(re.findall(r"\bgo\s+\w+\s*\(", content))
148
+ node_types["chan"] = len(re.findall(r"\bchan\s+\w+", content))
149
+
150
+ return node_types