shannon-codebase-insight 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shannon_codebase_insight-0.4.0.dist-info/METADATA +209 -0
- shannon_codebase_insight-0.4.0.dist-info/RECORD +37 -0
- shannon_codebase_insight-0.4.0.dist-info/WHEEL +5 -0
- shannon_codebase_insight-0.4.0.dist-info/entry_points.txt +7 -0
- shannon_codebase_insight-0.4.0.dist-info/licenses/LICENSE +21 -0
- shannon_codebase_insight-0.4.0.dist-info/top_level.txt +1 -0
- shannon_insight/__init__.py +25 -0
- shannon_insight/analyzers/__init__.py +8 -0
- shannon_insight/analyzers/base.py +215 -0
- shannon_insight/analyzers/go_analyzer.py +150 -0
- shannon_insight/analyzers/python_analyzer.py +169 -0
- shannon_insight/analyzers/typescript_analyzer.py +162 -0
- shannon_insight/cache.py +214 -0
- shannon_insight/cli.py +333 -0
- shannon_insight/config.py +235 -0
- shannon_insight/core.py +546 -0
- shannon_insight/exceptions/__init__.py +31 -0
- shannon_insight/exceptions/analysis.py +78 -0
- shannon_insight/exceptions/base.py +18 -0
- shannon_insight/exceptions/config.py +48 -0
- shannon_insight/file_ops.py +218 -0
- shannon_insight/logging_config.py +98 -0
- shannon_insight/math/__init__.py +15 -0
- shannon_insight/math/entropy.py +133 -0
- shannon_insight/math/fusion.py +109 -0
- shannon_insight/math/graph.py +209 -0
- shannon_insight/math/robust.py +106 -0
- shannon_insight/math/statistics.py +159 -0
- shannon_insight/models.py +48 -0
- shannon_insight/primitives/__init__.py +13 -0
- shannon_insight/primitives/detector.py +318 -0
- shannon_insight/primitives/extractor.py +278 -0
- shannon_insight/primitives/fusion.py +373 -0
- shannon_insight/primitives/recommendations.py +158 -0
- shannon_insight/py.typed +2 -0
- shannon_insight/security.py +284 -0
- shannon_insight/utils/__init__.py +1 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: shannon-codebase-insight
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Multi-signal codebase quality analyzer using mathematical primitives
|
|
5
|
+
Author: Naman Agarwal
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/namanagarwal/shannon-insight
|
|
8
|
+
Project-URL: Documentation, https://github.com/namanagarwal/shannon-insight#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/namanagarwal/shannon-insight
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/namanagarwal/shannon-insight/issues
|
|
11
|
+
Keywords: code-quality,static-analysis,codebase-analysis,metrics,entropy,mathematics
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Environment :: Console
|
|
23
|
+
Classifier: Typing :: Typed
|
|
24
|
+
Requires-Python: >=3.9
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: numpy>=1.22.0
|
|
28
|
+
Requires-Dist: scikit-learn>=1.0.0
|
|
29
|
+
Requires-Dist: scipy>=1.7.0
|
|
30
|
+
Requires-Dist: rich>=13.0.0
|
|
31
|
+
Requires-Dist: pydantic>=2.0.0
|
|
32
|
+
Requires-Dist: pydantic-settings>=2.0.0
|
|
33
|
+
Requires-Dist: diskcache>=5.6.0
|
|
34
|
+
Requires-Dist: typer>=0.9.0
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
37
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
38
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
39
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
40
|
+
Requires-Dist: build>=0.10.0; extra == "dev"
|
|
41
|
+
Requires-Dist: twine>=4.0.0; extra == "dev"
|
|
42
|
+
Dynamic: license-file
|
|
43
|
+
|
|
44
|
+
# Shannon Insight
|
|
45
|
+
|
|
46
|
+
[](https://github.com/namanagarwal/shannon-insight/actions/workflows/ci.yml)
|
|
47
|
+
[](https://pypi.org/project/shannon-insight/)
|
|
48
|
+
[](https://pypi.org/project/shannon-insight/)
|
|
49
|
+
[](LICENSE)
|
|
50
|
+
|
|
51
|
+
Multi-signal codebase quality analyzer using information-theoretic primitives. Named after Claude Shannon, father of information theory.
|
|
52
|
+
|
|
53
|
+
## Quick Start
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install shannon-insight
|
|
57
|
+
shannon-insight /path/to/codebase
|
|
58
|
+
shannon-insight . --format json | jq .
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## What It Does
|
|
62
|
+
|
|
63
|
+
Shannon Insight scans your codebase and computes **5 orthogonal quality primitives** per file, then fuses them with consistency-weighted scoring to surface files that need attention:
|
|
64
|
+
|
|
65
|
+
| Primitive | What it measures | High means |
|
|
66
|
+
|-----------|-----------------|------------|
|
|
67
|
+
| **Structural Entropy** | AST node type distribution | Chaotic organization |
|
|
68
|
+
| **Network Centrality** | PageRank on dependency graph | Critical hub |
|
|
69
|
+
| **Churn Volatility** | File modification recency | Recently changed / unstable |
|
|
70
|
+
| **Semantic Coherence** | Import/export focus | Low: too many unrelated concerns |
|
|
71
|
+
| **Cognitive Load** | Functions x complexity x nesting | Overloaded file |
|
|
72
|
+
|
|
73
|
+
## Output Formats
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# Rich terminal output (default) with summary dashboard
|
|
77
|
+
shannon-insight .
|
|
78
|
+
|
|
79
|
+
# Machine-readable JSON
|
|
80
|
+
shannon-insight . --format json
|
|
81
|
+
|
|
82
|
+
# Pipe-friendly CSV
|
|
83
|
+
shannon-insight . --format csv
|
|
84
|
+
|
|
85
|
+
# Just file paths (one per line)
|
|
86
|
+
shannon-insight . --format quiet
|
|
87
|
+
|
|
88
|
+
# Deep-dive on a specific file
|
|
89
|
+
shannon-insight . --explain complex.go
|
|
90
|
+
|
|
91
|
+
# Export to file
|
|
92
|
+
shannon-insight . --output report.json
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## CI Integration
|
|
96
|
+
|
|
97
|
+
Use `--fail-above` to gate CI pipelines on code quality:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
# Fail if any file scores above 2.0
|
|
101
|
+
shannon-insight . --format quiet --fail-above 2.0
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Example GitHub Actions step:
|
|
105
|
+
|
|
106
|
+
```yaml
|
|
107
|
+
- name: Code quality gate
|
|
108
|
+
run: shannon-insight . --fail-above 2.0 --format quiet
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Configuration
|
|
112
|
+
|
|
113
|
+
Create `shannon-insight.toml` in your project root:
|
|
114
|
+
|
|
115
|
+
```toml
|
|
116
|
+
z_score_threshold = 1.5
|
|
117
|
+
fusion_weights = [0.2, 0.25, 0.2, 0.15, 0.2]
|
|
118
|
+
exclude_patterns = ["*_test.go", "vendor/*", "node_modules/*"]
|
|
119
|
+
max_file_size_mb = 10.0
|
|
120
|
+
enable_cache = true
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Or use environment variables with `SHANNON_` prefix:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
export SHANNON_Z_SCORE_THRESHOLD=2.0
|
|
127
|
+
export SHANNON_ENABLE_CACHE=false
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## CLI Options
|
|
131
|
+
|
|
132
|
+
```
|
|
133
|
+
Options:
|
|
134
|
+
PATH Path to codebase directory [default: .]
|
|
135
|
+
-l, --language TEXT Language (auto, python, go, typescript, react, javascript)
|
|
136
|
+
-t, --top INTEGER Number of top files to display [1-1000]
|
|
137
|
+
-o, --output FILE Export JSON report to file
|
|
138
|
+
-f, --format TEXT Output format: rich, json, csv, quiet
|
|
139
|
+
-e, --explain TEXT Deep-dive on matching file(s)
|
|
140
|
+
--fail-above FLOAT CI gate: exit 1 if max score exceeds threshold
|
|
141
|
+
--threshold FLOAT Z-score threshold for anomaly detection
|
|
142
|
+
-c, --config FILE TOML configuration file
|
|
143
|
+
-v, --verbose Enable DEBUG logging
|
|
144
|
+
-q, --quiet Suppress all but ERROR logging
|
|
145
|
+
--no-cache Disable caching
|
|
146
|
+
--clear-cache Clear cache before running
|
|
147
|
+
-w, --workers INTEGER Parallel workers [1-32]
|
|
148
|
+
--version Show version and exit
|
|
149
|
+
|
|
150
|
+
Commands:
|
|
151
|
+
cache-info Show cache statistics
|
|
152
|
+
cache-clear Clear analysis cache
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Supported Languages
|
|
156
|
+
|
|
157
|
+
- **Python** - `.py` files
|
|
158
|
+
- **Go** - `.go` files
|
|
159
|
+
- **TypeScript/React** - `.ts`, `.tsx` files
|
|
160
|
+
- **JavaScript** - `.js`, `.jsx` files (uses TypeScript scanner)
|
|
161
|
+
|
|
162
|
+
Language is auto-detected by default. Override with `--language`.
|
|
163
|
+
|
|
164
|
+
## How It Works
|
|
165
|
+
|
|
166
|
+
```
|
|
167
|
+
CodebaseAnalyzer
|
|
168
|
+
Layer 1: Scanning - Language-specific file parsing
|
|
169
|
+
Layer 2: Extraction - Compute 5 orthogonal primitives per file
|
|
170
|
+
Layer 3: Detection - Z-score normalization + anomaly thresholding
|
|
171
|
+
Layer 4: Fusion - Consistency-weighted signal combination
|
|
172
|
+
Layer 5: Recommendations - Root cause attribution + actionable advice
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Signal fusion uses coefficient of variation to penalize inconsistent signals:
|
|
176
|
+
|
|
177
|
+
```
|
|
178
|
+
consistency = 1 / (1 + CV)
|
|
179
|
+
final_score = consistency * |weighted_average|
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
See [docs/MATHEMATICAL_FOUNDATION.md](docs/MATHEMATICAL_FOUNDATION.md) for the full mathematical framework.
|
|
183
|
+
|
|
184
|
+
## Development
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
git clone https://github.com/namanagarwal/shannon-insight.git
|
|
188
|
+
cd shannon-insight
|
|
189
|
+
python -m venv .venv && source .venv/bin/activate
|
|
190
|
+
pip install -e ".[dev]"
|
|
191
|
+
|
|
192
|
+
make test # Run tests with coverage
|
|
193
|
+
make lint # Run ruff linter
|
|
194
|
+
make format # Format with ruff
|
|
195
|
+
make type-check # Run mypy
|
|
196
|
+
make all # Format + lint + type-check + test
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## Contributing
|
|
200
|
+
|
|
201
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
202
|
+
|
|
203
|
+
## License
|
|
204
|
+
|
|
205
|
+
MIT License - see [LICENSE](LICENSE)
|
|
206
|
+
|
|
207
|
+
## Credits
|
|
208
|
+
|
|
209
|
+
Created by Naman Agarwal. Inspired by Claude Shannon's information theory, PageRank (Page & Brin), and cyclomatic complexity (McCabe).
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
shannon_codebase_insight-0.4.0.dist-info/licenses/LICENSE,sha256=PVdI9e_PoIiAyouwB69rVtOD0WEGAFn-G5z84L4YXko,1070
|
|
2
|
+
shannon_insight/__init__.py,sha256=cVuT2YFKbs2ouJ_Dhd9IZlaHQSxcECrSssiN1MH2IkE,703
|
|
3
|
+
shannon_insight/cache.py,sha256=GxbG0GiqqKpCSwlolwBonx19UqcPK6m9-37oHSfRxm4,5858
|
|
4
|
+
shannon_insight/cli.py,sha256=msSzObpLKb3yOzLW5afPc04TwNzxbSi-_uI2WX5QGi4,10058
|
|
5
|
+
shannon_insight/config.py,sha256=WPnUJ-iuM1Uh2Hy1OMauedIaW2hOIf76o43zwLcaxVQ,7140
|
|
6
|
+
shannon_insight/core.py,sha256=pX5pNXNUZ4sWNGuHN6fFyreN2YDw9zzsJBIuWBCPKVc,20728
|
|
7
|
+
shannon_insight/file_ops.py,sha256=P4F-mpEYCRO-tyfOtJNXcyCk9gVgnJ1ovEZm50Z8ETU,5831
|
|
8
|
+
shannon_insight/logging_config.py,sha256=qG6f2VyDeMtE8VCoRFOjPeAbC11fVJXKMpxicwyk0zg,2389
|
|
9
|
+
shannon_insight/models.py,sha256=hNd5qmHcVtd7y4DN1vsN0lKHZi91alQ44ikXkicpBoQ,959
|
|
10
|
+
shannon_insight/py.typed,sha256=g4nn5g02a75-ZjbL9uICetnLeAGCH1GCSLgvU_YxCAQ,79
|
|
11
|
+
shannon_insight/security.py,sha256=6bLkjc1Af_rvaGwHJrKGicyfr4Mpmo3ylozsvub4h6E,8053
|
|
12
|
+
shannon_insight/analyzers/__init__.py,sha256=M5c7yP_EQ8UGPgvndHdyzaQBwyPGa7swOeNBlr3I9o0,272
|
|
13
|
+
shannon_insight/analyzers/base.py,sha256=OPhneFHU0uYG7wE6X3BtKAsHbxzUfauZss2sNBPrY4k,5996
|
|
14
|
+
shannon_insight/analyzers/go_analyzer.py,sha256=SJ2hWtx9Tn80M7NLUo5OGJyjTTP_NXibwf8MJfn4Nvk,6161
|
|
15
|
+
shannon_insight/analyzers/python_analyzer.py,sha256=7IIjAbZOUaktprQjONE5HY5c9WF_nHLI_3Qp4rrPdJI,6616
|
|
16
|
+
shannon_insight/analyzers/typescript_analyzer.py,sha256=C4SZxBFD2Edj727ge1WnJu5Jeavla6DjDRRg2V0Ci28,6576
|
|
17
|
+
shannon_insight/exceptions/__init__.py,sha256=sxPsbQZHsXEEgeMZNEyM6NAuGYiNQXpHRB_Mu3jW9rM,666
|
|
18
|
+
shannon_insight/exceptions/analysis.py,sha256=hmJkELAWmVltbXFNLH9h9JyBTEtDWYqdnarz8rbY1t0,2583
|
|
19
|
+
shannon_insight/exceptions/base.py,sha256=9A53D2OZX_2Dc93UqWyzIu3V3M7eb_MqM-No5YHywWw,575
|
|
20
|
+
shannon_insight/exceptions/config.py,sha256=Vo-k7Fg8Zk6sL7_6RgXuIdjF6bK9EMsHT1AdGvmR0ME,1431
|
|
21
|
+
shannon_insight/math/__init__.py,sha256=eGYJRFh-QF_w7DK5QsqORoaK5K5Gh97EydSo-slfDjw,331
|
|
22
|
+
shannon_insight/math/entropy.py,sha256=_V4tNyPB3mRB1fbm9L3ub0LfMPdl8rhmwBJjER1V72g,3947
|
|
23
|
+
shannon_insight/math/fusion.py,sha256=NCvhk1RdRyM2RwAUr2Mhv24QSPcrcWoNgmbZuK1lPY0,3616
|
|
24
|
+
shannon_insight/math/graph.py,sha256=2649DhTtsodmQiPsU5piYcT8AU0uKSpMtl1NOpWyPec,6800
|
|
25
|
+
shannon_insight/math/robust.py,sha256=xMtRmK--WZH0xlMU6WuSqB2FsFF72gY4cMvR8pegaSM,3157
|
|
26
|
+
shannon_insight/math/statistics.py,sha256=hnfUwoQ8iD1eUm_DwA59CuoyzcXC6fm2qqjOmQRQW40,4297
|
|
27
|
+
shannon_insight/primitives/__init__.py,sha256=rgCX9YpgvCYM2mvQYkxKgeWnLnrrGCK7f-E1OvVSzRE,335
|
|
28
|
+
shannon_insight/primitives/detector.py,sha256=GeSb_opcKdm8E7KBYjK3trl0NI8MaVVLN5ISwZgWWNc,11679
|
|
29
|
+
shannon_insight/primitives/extractor.py,sha256=7zcOWTwC91mevyQfSc6UhCXynEUp2tTyjZi-dU0-Bhs,10285
|
|
30
|
+
shannon_insight/primitives/fusion.py,sha256=axDi5WWK4UWmd-MPA5BAy_X1PHoIET49MBboGUXn0fA,11895
|
|
31
|
+
shannon_insight/primitives/recommendations.py,sha256=fe-tQiYwPmSoLh75Bu7_poM0TAKpibxbuDnd_rntYSk,6221
|
|
32
|
+
shannon_insight/utils/__init__.py,sha256=UOdKOJkQ6Nleq5cb4QELuSNb4mJZOp7Zuym1cg4rWnE,24
|
|
33
|
+
shannon_codebase_insight-0.4.0.dist-info/METADATA,sha256=JX1ink0O5xSaciFj2kttsB6vAkCtiRAxTHd4Ildhk9M,6955
|
|
34
|
+
shannon_codebase_insight-0.4.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
35
|
+
shannon_codebase_insight-0.4.0.dist-info/entry_points.txt,sha256=WMKvN0qPS7nAUTGhAR8f9Or_3VfwJDLn3NjFPoy3DhE,284
|
|
36
|
+
shannon_codebase_insight-0.4.0.dist-info/top_level.txt,sha256=to3Vz1EmonG4LYEKYtTbqI7gqdH9_fHgEKbYdMUyDp4,16
|
|
37
|
+
shannon_codebase_insight-0.4.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
[console_scripts]
|
|
2
|
+
shannon-insight = shannon_insight.cli:app
|
|
3
|
+
|
|
4
|
+
[shannon_insight.languages]
|
|
5
|
+
go = shannon_insight.analyzers.go_analyzer:GoScanner
|
|
6
|
+
python = shannon_insight.analyzers.python_analyzer:PythonScanner
|
|
7
|
+
typescript = shannon_insight.analyzers.typescript_analyzer:TypeScriptScanner
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Naman Agarwal
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
shannon_insight
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shannon Insight - Multi-Signal Codebase Quality Analyzer
|
|
3
|
+
|
|
4
|
+
A mathematical approach to code quality analysis using five orthogonal primitives:
|
|
5
|
+
1. Structural Entropy - Disorder in code organization
|
|
6
|
+
2. Network Centrality - Importance in dependency graph
|
|
7
|
+
3. Churn Volatility - Instability of change patterns
|
|
8
|
+
4. Semantic Coherence - Conceptual focus
|
|
9
|
+
5. Cognitive Load - Mental effort to understand
|
|
10
|
+
|
|
11
|
+
Named after Claude Shannon, father of information theory.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
__version__ = "0.4.0"
|
|
15
|
+
__author__ = "Naman Agarwal"
|
|
16
|
+
|
|
17
|
+
from .core import CodebaseAnalyzer
|
|
18
|
+
from .models import FileMetrics, AnomalyReport, Primitives
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"CodebaseAnalyzer",
|
|
22
|
+
"Primitives",
|
|
23
|
+
"FileMetrics",
|
|
24
|
+
"AnomalyReport",
|
|
25
|
+
]
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""Language-specific analyzers"""
|
|
2
|
+
|
|
3
|
+
from .base import BaseScanner
|
|
4
|
+
from .go_analyzer import GoScanner
|
|
5
|
+
from .typescript_analyzer import TypeScriptScanner
|
|
6
|
+
from .python_analyzer import PythonScanner
|
|
7
|
+
|
|
8
|
+
__all__ = ["BaseScanner", "GoScanner", "TypeScriptScanner", "PythonScanner"]
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""Base scanner class for language-agnostic functionality"""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
|
|
7
|
+
from ..models import FileMetrics
|
|
8
|
+
from ..config import AnalysisSettings, default_settings
|
|
9
|
+
from ..logging_config import get_logger
|
|
10
|
+
from ..exceptions import FileAccessError, ParsingError
|
|
11
|
+
from ..file_ops import should_skip_file
|
|
12
|
+
|
|
13
|
+
logger = get_logger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BaseScanner(ABC):
|
|
17
|
+
"""Abstract base class for language-specific scanners"""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
root_dir: str,
|
|
22
|
+
extensions: List[str],
|
|
23
|
+
settings: Optional[AnalysisSettings] = None
|
|
24
|
+
):
|
|
25
|
+
"""
|
|
26
|
+
Initialize scanner.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
root_dir: Root directory to scan
|
|
30
|
+
extensions: File extensions to include (e.g., ['.go', '.py'])
|
|
31
|
+
settings: Analysis settings
|
|
32
|
+
"""
|
|
33
|
+
self.root_dir = Path(root_dir)
|
|
34
|
+
self.extensions = extensions
|
|
35
|
+
self.settings = settings or default_settings
|
|
36
|
+
logger.debug(f"Initialized {self.__class__.__name__} for {self.root_dir}")
|
|
37
|
+
|
|
38
|
+
def scan(self) -> List[FileMetrics]:
|
|
39
|
+
"""
|
|
40
|
+
Scan all source files and extract metrics.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
List of file metrics for analyzed files
|
|
44
|
+
"""
|
|
45
|
+
files = []
|
|
46
|
+
files_scanned = 0
|
|
47
|
+
files_skipped = 0
|
|
48
|
+
files_errored = 0
|
|
49
|
+
|
|
50
|
+
for ext in self.extensions:
|
|
51
|
+
for filepath in self.root_dir.rglob(f"*{ext}"):
|
|
52
|
+
# Check file count limit
|
|
53
|
+
if files_scanned >= self.settings.max_files:
|
|
54
|
+
logger.warning(f"Reached max files limit ({self.settings.max_files})")
|
|
55
|
+
break
|
|
56
|
+
|
|
57
|
+
# Skip based on custom logic
|
|
58
|
+
if self._should_skip(filepath):
|
|
59
|
+
files_skipped += 1
|
|
60
|
+
logger.debug(f"Skipped (custom): {filepath}")
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
# Skip based on exclusion patterns
|
|
64
|
+
if should_skip_file(filepath, self.settings.exclude_patterns):
|
|
65
|
+
files_skipped += 1
|
|
66
|
+
logger.debug(f"Skipped (pattern): {filepath}")
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
# Check file size
|
|
70
|
+
try:
|
|
71
|
+
size = filepath.stat().st_size
|
|
72
|
+
if size > self.settings.max_file_size_bytes:
|
|
73
|
+
files_skipped += 1
|
|
74
|
+
logger.debug(f"Skipped (size): {filepath} ({size} bytes)")
|
|
75
|
+
continue
|
|
76
|
+
except OSError as e:
|
|
77
|
+
files_errored += 1
|
|
78
|
+
logger.warning(f"Cannot stat {filepath}: {e}")
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
# Analyze file
|
|
82
|
+
try:
|
|
83
|
+
metrics = self._analyze_file(filepath)
|
|
84
|
+
files.append(metrics)
|
|
85
|
+
files_scanned += 1
|
|
86
|
+
logger.debug(f"Analyzed: {filepath}")
|
|
87
|
+
except FileAccessError as e:
|
|
88
|
+
files_errored += 1
|
|
89
|
+
logger.warning(f"Access error for {filepath}: {e.reason}")
|
|
90
|
+
except ParsingError as e:
|
|
91
|
+
files_errored += 1
|
|
92
|
+
logger.warning(f"Parse error for {filepath}: {e.reason}")
|
|
93
|
+
except Exception as e:
|
|
94
|
+
files_errored += 1
|
|
95
|
+
logger.error(f"Unexpected error analyzing {filepath}: {e}")
|
|
96
|
+
|
|
97
|
+
logger.info(f"Scan complete: {files_scanned} analyzed, {files_skipped} skipped, {files_errored} errors")
|
|
98
|
+
return files
|
|
99
|
+
|
|
100
|
+
@abstractmethod
|
|
101
|
+
def _should_skip(self, filepath: Path) -> bool:
|
|
102
|
+
"""
|
|
103
|
+
Determine if file should be skipped (e.g., tests, vendor).
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
filepath: File to check
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
True if file should be skipped
|
|
110
|
+
"""
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
@abstractmethod
|
|
114
|
+
def _analyze_file(self, filepath: Path) -> FileMetrics:
|
|
115
|
+
"""
|
|
116
|
+
Extract all metrics from a single file.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
filepath: File to analyze
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
File metrics
|
|
123
|
+
|
|
124
|
+
Raises:
|
|
125
|
+
FileAccessError: If file cannot be read
|
|
126
|
+
ParsingError: If file cannot be parsed
|
|
127
|
+
"""
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
@abstractmethod
|
|
131
|
+
def _count_tokens(self, content: str) -> int:
|
|
132
|
+
"""
|
|
133
|
+
Count tokens in source code.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
content: File content
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Token count
|
|
140
|
+
"""
|
|
141
|
+
pass
|
|
142
|
+
|
|
143
|
+
@abstractmethod
|
|
144
|
+
def _extract_imports(self, content: str) -> List[str]:
|
|
145
|
+
"""
|
|
146
|
+
Extract import statements.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
content: File content
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
List of import paths/names
|
|
153
|
+
"""
|
|
154
|
+
pass
|
|
155
|
+
|
|
156
|
+
@abstractmethod
|
|
157
|
+
def _extract_exports(self, content: str) -> List[str]:
|
|
158
|
+
"""
|
|
159
|
+
Extract exported identifiers.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
content: File content
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
List of exported names
|
|
166
|
+
"""
|
|
167
|
+
pass
|
|
168
|
+
|
|
169
|
+
@abstractmethod
|
|
170
|
+
def _count_functions(self, content: str) -> int:
|
|
171
|
+
"""
|
|
172
|
+
Count function declarations.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
content: File content
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Function count
|
|
179
|
+
"""
|
|
180
|
+
pass
|
|
181
|
+
|
|
182
|
+
@abstractmethod
|
|
183
|
+
def _estimate_complexity(self, content: str) -> float:
|
|
184
|
+
"""
|
|
185
|
+
Estimate cyclomatic complexity.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
content: File content
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Complexity score
|
|
192
|
+
"""
|
|
193
|
+
pass
|
|
194
|
+
|
|
195
|
+
def _max_nesting_depth(self, content: str) -> int:
|
|
196
|
+
"""
|
|
197
|
+
Calculate maximum nesting depth (language-agnostic).
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
content: File content
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
Maximum nesting depth
|
|
204
|
+
"""
|
|
205
|
+
max_depth = 0
|
|
206
|
+
current_depth = 0
|
|
207
|
+
|
|
208
|
+
for char in content:
|
|
209
|
+
if char == "{":
|
|
210
|
+
current_depth += 1
|
|
211
|
+
max_depth = max(max_depth, current_depth)
|
|
212
|
+
elif char == "}":
|
|
213
|
+
current_depth -= 1
|
|
214
|
+
|
|
215
|
+
return max_depth
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""Go language analyzer"""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from collections import Counter
|
|
6
|
+
from typing import List, Optional
|
|
7
|
+
|
|
8
|
+
from .base import BaseScanner
|
|
9
|
+
from ..models import FileMetrics
|
|
10
|
+
from ..config import AnalysisSettings
|
|
11
|
+
from ..exceptions import FileAccessError
|
|
12
|
+
from ..logging_config import get_logger
|
|
13
|
+
|
|
14
|
+
logger = get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class GoScanner(BaseScanner):
|
|
18
|
+
"""Scanner optimized for Go codebases"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, root_dir: str, settings: Optional[AnalysisSettings] = None):
|
|
21
|
+
super().__init__(root_dir, extensions=[".go"], settings=settings)
|
|
22
|
+
|
|
23
|
+
def _should_skip(self, filepath: Path) -> bool:
|
|
24
|
+
"""Skip test files, vendor, venv, and other non-project directories"""
|
|
25
|
+
path_str = str(filepath)
|
|
26
|
+
skip_dirs = ("vendor", "venv", ".venv", "__pycache__", ".git", ".tox", ".mypy_cache")
|
|
27
|
+
return "_test.go" in path_str or any(d in path_str for d in skip_dirs)
|
|
28
|
+
|
|
29
|
+
def _analyze_file(self, filepath: Path) -> FileMetrics:
|
|
30
|
+
"""Extract all metrics from a Go file"""
|
|
31
|
+
try:
|
|
32
|
+
with open(filepath, "r", encoding="utf-8", errors="replace") as f:
|
|
33
|
+
content = f.read()
|
|
34
|
+
except OSError as e:
|
|
35
|
+
raise FileAccessError(filepath, f"Cannot read file: {e}")
|
|
36
|
+
except Exception as e:
|
|
37
|
+
raise FileAccessError(filepath, f"Unexpected error: {e}")
|
|
38
|
+
|
|
39
|
+
lines = content.split("\n")
|
|
40
|
+
|
|
41
|
+
return FileMetrics(
|
|
42
|
+
path=str(filepath.relative_to(self.root_dir)),
|
|
43
|
+
lines=len(lines),
|
|
44
|
+
tokens=self._count_tokens(content),
|
|
45
|
+
imports=self._extract_imports(content),
|
|
46
|
+
exports=self._extract_exports(content),
|
|
47
|
+
functions=self._count_functions(content),
|
|
48
|
+
interfaces=self._count_interfaces(content),
|
|
49
|
+
structs=self._count_structs(content),
|
|
50
|
+
complexity_score=self._estimate_complexity(content),
|
|
51
|
+
nesting_depth=self._max_nesting_depth(content),
|
|
52
|
+
ast_node_types=self._extract_ast_node_types(content),
|
|
53
|
+
last_modified=filepath.stat().st_mtime,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def _count_tokens(self, content: str) -> int:
|
|
57
|
+
"""Approximate token count for Go"""
|
|
58
|
+
# Remove comments and strings
|
|
59
|
+
content = re.sub(r"//.*", "", content)
|
|
60
|
+
content = re.sub(r"/\*.*?\*/", "", content, flags=re.DOTALL)
|
|
61
|
+
content = re.sub(r"`[^`]*`", "", content)
|
|
62
|
+
content = re.sub(r'"[^"]*"', "", content)
|
|
63
|
+
|
|
64
|
+
# Split on whitespace and common operators
|
|
65
|
+
tokens = re.findall(r"\w+|[{}()\[\];,.]", content)
|
|
66
|
+
return len(tokens)
|
|
67
|
+
|
|
68
|
+
def _extract_imports(self, content: str) -> List[str]:
|
|
69
|
+
"""Extract Go import statements"""
|
|
70
|
+
imports = []
|
|
71
|
+
|
|
72
|
+
# Match: import "github.com/..."
|
|
73
|
+
for match in re.finditer(r'import\s+"([^"]+)"', content):
|
|
74
|
+
imports.append(match.group(1))
|
|
75
|
+
|
|
76
|
+
# Match grouped imports: import (\n "foo"\n "bar"\n)
|
|
77
|
+
for match in re.finditer(r"import\s*\([^)]+\)", content, re.DOTALL):
|
|
78
|
+
group = match.group(0)
|
|
79
|
+
for imp in re.findall(r'"([^"]+)"', group):
|
|
80
|
+
imports.append(imp)
|
|
81
|
+
|
|
82
|
+
return imports
|
|
83
|
+
|
|
84
|
+
def _extract_exports(self, content: str) -> List[str]:
|
|
85
|
+
"""Extract exported identifiers (capitalized names in Go)"""
|
|
86
|
+
exports = []
|
|
87
|
+
|
|
88
|
+
# Exported functions: func ExportedName(...)
|
|
89
|
+
exports.extend(re.findall(r"^func\s+([A-Z]\w*)\s*\(", content, re.MULTILINE))
|
|
90
|
+
|
|
91
|
+
# Exported types: type ExportedName ...
|
|
92
|
+
exports.extend(re.findall(r"^type\s+([A-Z]\w*)\s+", content, re.MULTILINE))
|
|
93
|
+
|
|
94
|
+
# Exported constants: const ExportedName
|
|
95
|
+
exports.extend(
|
|
96
|
+
re.findall(r"^const\s+([A-Z]\w*)\s*[=\n]", content, re.MULTILINE)
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Exported variables: var ExportedName
|
|
100
|
+
exports.extend(re.findall(r"^var\s+([A-Z]\w*)\s*[=\n]", content, re.MULTILINE))
|
|
101
|
+
|
|
102
|
+
return exports
|
|
103
|
+
|
|
104
|
+
def _count_functions(self, content: str) -> int:
|
|
105
|
+
"""Count function declarations (including methods with receivers)"""
|
|
106
|
+
return len(re.findall(r"\bfunc\s+\w+\s*\(", content))
|
|
107
|
+
|
|
108
|
+
def _count_interfaces(self, content: str) -> int:
|
|
109
|
+
"""Count interface declarations"""
|
|
110
|
+
return len(re.findall(r"\btype\s+\w+\s+interface\s*\{", content))
|
|
111
|
+
|
|
112
|
+
def _count_structs(self, content: str) -> int:
|
|
113
|
+
"""Count struct declarations"""
|
|
114
|
+
return len(re.findall(r"\btype\s+\w+\s+struct\s*\{", content))
|
|
115
|
+
|
|
116
|
+
def _estimate_complexity(self, content: str) -> float:
|
|
117
|
+
"""Estimate cyclomatic complexity for Go"""
|
|
118
|
+
# Count decision points: if, else, case, for, range, select, &&, ||
|
|
119
|
+
complexity = 1 # Base complexity
|
|
120
|
+
|
|
121
|
+
complexity += len(re.findall(r"\bif\s+", content))
|
|
122
|
+
complexity += len(re.findall(r"\belse\b", content))
|
|
123
|
+
complexity += len(re.findall(r"\bcase\s+", content))
|
|
124
|
+
complexity += len(re.findall(r"\bfor\s+", content))
|
|
125
|
+
complexity += len(re.findall(r"\brange\s+", content))
|
|
126
|
+
complexity += len(re.findall(r"\bselect\s*\{", content))
|
|
127
|
+
complexity += len(re.findall(r"&&", content))
|
|
128
|
+
complexity += len(re.findall(r"\|\|", content))
|
|
129
|
+
|
|
130
|
+
return complexity
|
|
131
|
+
|
|
132
|
+
def _extract_ast_node_types(self, content: str) -> Counter:
|
|
133
|
+
"""Extract distribution of AST node types for Go"""
|
|
134
|
+
node_types = Counter()
|
|
135
|
+
|
|
136
|
+
# Go-specific node types
|
|
137
|
+
node_types["function"] = self._count_functions(content)
|
|
138
|
+
node_types["struct"] = self._count_structs(content)
|
|
139
|
+
node_types["interface"] = self._count_interfaces(content)
|
|
140
|
+
node_types["import"] = len(self._extract_imports(content))
|
|
141
|
+
node_types["export"] = len(self._extract_exports(content))
|
|
142
|
+
node_types["if"] = len(re.findall(r"\bif\s+", content))
|
|
143
|
+
node_types["for"] = len(re.findall(r"\bfor\s+", content))
|
|
144
|
+
node_types["range"] = len(re.findall(r"\brange\s+", content))
|
|
145
|
+
node_types["return"] = len(re.findall(r"\breturn\b", content))
|
|
146
|
+
node_types["defer"] = len(re.findall(r"\bdefer\b", content))
|
|
147
|
+
node_types["go"] = len(re.findall(r"\bgo\s+\w+\s*\(", content))
|
|
148
|
+
node_types["chan"] = len(re.findall(r"\bchan\s+\w+", content))
|
|
149
|
+
|
|
150
|
+
return node_types
|